From b59deb49149b4f128b3f5a2ec9b48c0e6263ad8a Mon Sep 17 00:00:00 2001 From: Morten Torkildsen Date: Wed, 7 May 2025 21:51:00 +0000 Subject: [PATCH] DRA: Fix failure to allocate large number of devices --- .../structured/allocator.go | 7 +++--- .../structured/allocator_test.go | 23 +++++++++++++++++++ 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/staging/src/k8s.io/dynamic-resource-allocation/structured/allocator.go b/staging/src/k8s.io/dynamic-resource-allocation/structured/allocator.go index 37a192008d2..a126667bdf9 100644 --- a/staging/src/k8s.io/dynamic-resource-allocation/structured/allocator.go +++ b/staging/src/k8s.io/dynamic-resource-allocation/structured/allocator.go @@ -745,9 +745,10 @@ func (alloc *allocator) allocateOne(r deviceIndices, allocateSubRequest bool) (b return alloc.allocateOne(deviceIndices{claimIndex: r.claimIndex, requestIndex: r.requestIndex + 1}, false) } - // Before trying to allocate devices, check if allocating the devices - // in the current request will put us over the threshold. - numDevicesAfterAlloc := len(alloc.result[r.claimIndex].devices) + requestData.numDevices + // We can calculate this by adding the number of already allocated devices with the number + // of devices in the current request, and then finally subtract the deviceIndex since we + // don't want to double count any devices already allocated for the current request. + numDevicesAfterAlloc := len(alloc.result[r.claimIndex].devices) + requestData.numDevices - r.deviceIndex if numDevicesAfterAlloc > resourceapi.AllocationResultsMaxSize { // Don't return an error here since we want to keep searching for // a solution that works. diff --git a/staging/src/k8s.io/dynamic-resource-allocation/structured/allocator_test.go b/staging/src/k8s.io/dynamic-resource-allocation/structured/allocator_test.go index 8a988950151..9509f862a20 100644 --- a/staging/src/k8s.io/dynamic-resource-allocation/structured/allocator_test.go +++ b/staging/src/k8s.io/dynamic-resource-allocation/structured/allocator_test.go @@ -395,6 +395,14 @@ func deviceAllocationResult(request, driver, pool, device string, adminAccess bo return r } +func multipleDeviceAllocationResults(request, driver, pool string, count, startIndex int) []resourceapi.DeviceRequestAllocationResult { + var results []resourceapi.DeviceRequestAllocationResult + for i := startIndex; i < startIndex+count; i++ { + results = append(results, deviceAllocationResult(request, driver, pool, fmt.Sprintf("device-%d", i), false)) + } + return results +} + // nodeLabelSelector creates a node selector with a label match for "key" in "values". func nodeLabelSelector(key string, values ...string) *v1.NodeSelector { requirements := []v1.NodeSelectorRequirement{{ @@ -3024,6 +3032,21 @@ func TestAllocator(t *testing.T) { deviceAllocationResult(req0, driverA, pool1, device1, false), )}, }, + "max-number-devices": { + claimsToAllocate: objects( + claimWithRequests( + claim0, nil, request(req0, classA, resourceapi.AllocationResultsMaxSize), + ), + ), + classes: objects(class(classA, driverA)), + slices: objects(sliceWithMultipleDevices(slice1, node1, pool1, driverA, resourceapi.AllocationResultsMaxSize)), + node: node(node1, region1), + + expectResults: []any{allocationResult( + localNodeSelector(node1), + multipleDeviceAllocationResults(req0, driverA, pool1, resourceapi.AllocationResultsMaxSize, 0)..., + )}, + }, } for name, tc := range testcases {