mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-31 07:20:13 +00:00
DRA scheduler: fix incorrect allocation of "all" devices
The code which pre-determined the set of "all" devices when using "allocationMode: all" accidentally ignored the selector of the device class. As a result, allocation worked correctly only when a node had only devices matching the intended device class. When there were additional devices, things went wrong: - Unrelated devices allocated for a request. - Claim allocation failed completely.
This commit is contained in:
parent
e456fbfaa6
commit
1a34d4840b
@ -169,9 +169,13 @@ func (a *Allocator) Allocate(ctx context.Context, node *v1.Node) (finalResult []
|
||||
return nil, fmt.Errorf("claim %s, request %s: could not retrieve device class %s: %w", klog.KObj(claim), request.Name, request.DeviceClassName, err)
|
||||
}
|
||||
|
||||
// Start collecting information about the request.
|
||||
// The class must be set and stored before calling isSelectable.
|
||||
requestData := requestData{
|
||||
class: class,
|
||||
}
|
||||
requestKey := requestIndices{claimIndex: claimIndex, requestIndex: requestIndex}
|
||||
alloc.requestData[requestKey] = requestData
|
||||
|
||||
switch request.AllocationMode {
|
||||
case resourceapi.DeviceAllocationModeExactCount:
|
||||
@ -190,7 +194,7 @@ func (a *Allocator) Allocate(ctx context.Context, node *v1.Node) (finalResult []
|
||||
|
||||
for _, slice := range pool.Slices {
|
||||
for deviceIndex := range slice.Spec.Devices {
|
||||
selectable, err := alloc.isSelectable(requestIndices{claimIndex: claimIndex, requestIndex: requestIndex}, slice, deviceIndex)
|
||||
selectable, err := alloc.isSelectable(requestKey, slice, deviceIndex)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -205,7 +209,7 @@ func (a *Allocator) Allocate(ctx context.Context, node *v1.Node) (finalResult []
|
||||
default:
|
||||
return nil, fmt.Errorf("claim %s, request %s: unsupported count mode %s", klog.KObj(claim), request.Name, request.AllocationMode)
|
||||
}
|
||||
alloc.requestData[requestIndices{claimIndex: claimIndex, requestIndex: requestIndex}] = requestData
|
||||
alloc.requestData[requestKey] = requestData
|
||||
numDevices += requestData.numDevices
|
||||
}
|
||||
alloc.logger.V(6).Info("Checked claim", "claim", klog.KObj(claim), "numDevices", numDevices)
|
||||
|
@ -619,6 +619,41 @@ func TestAllocator(t *testing.T) {
|
||||
expectResults: nil,
|
||||
expectError: gomega.MatchError(gomega.ContainSubstring("claim claim-0, request req-0: asks for all devices, but resource pool driver-a/pool-1 is currently being updated")),
|
||||
},
|
||||
"all-devices-plus-another": {
|
||||
claimsToAllocate: objects(
|
||||
claimWithRequests(claim0, nil, resourceapi.DeviceRequest{
|
||||
Name: req0,
|
||||
AllocationMode: resourceapi.DeviceAllocationModeAll,
|
||||
DeviceClassName: classA,
|
||||
}),
|
||||
claimWithRequests(claim1, nil, resourceapi.DeviceRequest{
|
||||
Name: req0,
|
||||
AllocationMode: resourceapi.DeviceAllocationModeExactCount,
|
||||
Count: 1,
|
||||
DeviceClassName: classB,
|
||||
}),
|
||||
),
|
||||
classes: objects(
|
||||
class(classA, driverA),
|
||||
class(classB, driverB),
|
||||
),
|
||||
slices: objects(
|
||||
sliceWithOneDevice(slice1, node1, pool1, driverA),
|
||||
sliceWithOneDevice(slice1, node1, pool1, driverB),
|
||||
),
|
||||
node: node(node1, region1),
|
||||
|
||||
expectResults: []any{
|
||||
allocationResult(
|
||||
localNodeSelector(node1),
|
||||
deviceAllocationResult(req0, driverA, pool1, device1),
|
||||
),
|
||||
allocationResult(
|
||||
localNodeSelector(node1),
|
||||
deviceAllocationResult(req0, driverB, pool1, device1),
|
||||
),
|
||||
},
|
||||
},
|
||||
"network-attached-device": {
|
||||
claimsToAllocate: objects(claim(claim0, req0, classA)),
|
||||
classes: objects(class(classA, driverA)),
|
||||
|
Loading…
Reference in New Issue
Block a user