mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-13 13:55:41 +00:00
DRA scheduler: fix aborting of allocation with mode "all"
The internal "stop allocation" error was sometimes erroneously (pun intended) returned as result of the allocation and then shown to users. No error and no results should have been returned instead, which then is shown as "allocation not possible". Aborting allocation early is only correct if the device which cannot be allocated is one of the "all" devices which were requested.
This commit is contained in:
parent
1a34d4840b
commit
d2ab00cf55
@ -294,10 +294,13 @@ func (a *Allocator) Allocate(ctx context.Context, node *v1.Node) (finalResult []
|
||||
// All errors get created such that they can be returned by Allocate
|
||||
// without further wrapping.
|
||||
done, err := alloc.allocateOne(deviceIndices{})
|
||||
if errors.Is(err, errStop) {
|
||||
return nil, nil
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if errors.Is(err, errStop) || !done {
|
||||
if !done {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
@ -538,20 +541,23 @@ func (alloc *allocator) allocateOne(r deviceIndices) (bool, error) {
|
||||
// For "all" devices we already know which ones we need. We
|
||||
// just need to check whether we can use them.
|
||||
deviceWithID := requestData.allDevices[r.deviceIndex]
|
||||
_, _, err := alloc.allocateDevice(r, deviceWithID.device, deviceWithID.DeviceID, true)
|
||||
success, _, err := alloc.allocateDevice(r, deviceWithID.device, deviceWithID.DeviceID, true)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if !success {
|
||||
// The order in which we allocate "all" devices doesn't matter,
|
||||
// so we only try with the one which was up next. If we couldn't
|
||||
// get all of them, then there is no solution and we have to stop.
|
||||
return false, errStop
|
||||
}
|
||||
done, err := alloc.allocateOne(deviceIndices{claimIndex: r.claimIndex, requestIndex: r.requestIndex, deviceIndex: r.deviceIndex + 1})
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
// The order in which we allocate "all" devices doesn't matter,
|
||||
// so we only try with the one which was up next. If we couldn't
|
||||
// get all of them, then there is no solution and we have to stop.
|
||||
if !done {
|
||||
return false, errStop
|
||||
// Backtrack.
|
||||
return false, nil
|
||||
}
|
||||
return done, nil
|
||||
}
|
||||
|
@ -582,11 +582,10 @@ func TestAllocator(t *testing.T) {
|
||||
|
||||
expectResults: nil,
|
||||
},
|
||||
"all-devices": {
|
||||
"all-devices-single": {
|
||||
claimsToAllocate: objects(claimWithRequests(claim0, nil, resourceapi.DeviceRequest{
|
||||
Name: req0,
|
||||
AllocationMode: resourceapi.DeviceAllocationModeAll,
|
||||
Count: 1,
|
||||
DeviceClassName: classA,
|
||||
})),
|
||||
classes: objects(class(classA, driverA)),
|
||||
@ -598,6 +597,25 @@ func TestAllocator(t *testing.T) {
|
||||
deviceAllocationResult(req0, driverA, pool1, device1),
|
||||
)},
|
||||
},
|
||||
"all-devices-many": {
|
||||
claimsToAllocate: objects(claimWithRequests(claim0, nil, resourceapi.DeviceRequest{
|
||||
Name: req0,
|
||||
AllocationMode: resourceapi.DeviceAllocationModeAll,
|
||||
DeviceClassName: classA,
|
||||
})),
|
||||
classes: objects(class(classA, driverA)),
|
||||
slices: objects(
|
||||
sliceWithOneDevice(slice1, node1, pool1, driverA),
|
||||
sliceWithOneDevice(slice1, node1, pool2, driverA),
|
||||
),
|
||||
node: node(node1, region1),
|
||||
|
||||
expectResults: []any{allocationResult(
|
||||
localNodeSelector(node1),
|
||||
deviceAllocationResult(req0, driverA, pool1, device1),
|
||||
deviceAllocationResult(req0, driverA, pool2, device1),
|
||||
)},
|
||||
},
|
||||
"all-devices-of-the-incomplete-pool": {
|
||||
claimsToAllocate: objects(claimWithRequests(claim0, nil, resourceapi.DeviceRequest{
|
||||
Name: req0,
|
||||
@ -654,6 +672,165 @@ func TestAllocator(t *testing.T) {
|
||||
),
|
||||
},
|
||||
},
|
||||
"all-devices-plus-another-reversed": {
|
||||
claimsToAllocate: objects(
|
||||
claimWithRequests(claim1, nil, resourceapi.DeviceRequest{
|
||||
Name: req0,
|
||||
AllocationMode: resourceapi.DeviceAllocationModeExactCount,
|
||||
Count: 1,
|
||||
DeviceClassName: classB,
|
||||
}),
|
||||
claimWithRequests(claim0, nil, resourceapi.DeviceRequest{
|
||||
Name: req0,
|
||||
AllocationMode: resourceapi.DeviceAllocationModeAll,
|
||||
DeviceClassName: classA,
|
||||
}),
|
||||
),
|
||||
classes: objects(
|
||||
class(classA, driverA),
|
||||
class(classB, driverB),
|
||||
),
|
||||
slices: objects(
|
||||
sliceWithOneDevice(slice1, node1, pool1, driverA),
|
||||
sliceWithOneDevice(slice1, node1, pool1, driverB),
|
||||
),
|
||||
node: node(node1, region1),
|
||||
|
||||
expectResults: []any{
|
||||
allocationResult(
|
||||
localNodeSelector(node1),
|
||||
deviceAllocationResult(req0, driverB, pool1, device1),
|
||||
),
|
||||
allocationResult(
|
||||
localNodeSelector(node1),
|
||||
deviceAllocationResult(req0, driverA, pool1, device1),
|
||||
),
|
||||
},
|
||||
},
|
||||
"all-devices-many-plus-another": {
|
||||
claimsToAllocate: objects(
|
||||
claimWithRequests(claim0, nil, resourceapi.DeviceRequest{
|
||||
Name: req0,
|
||||
AllocationMode: resourceapi.DeviceAllocationModeAll,
|
||||
DeviceClassName: classA,
|
||||
}),
|
||||
claimWithRequests(claim1, nil, resourceapi.DeviceRequest{
|
||||
Name: req0,
|
||||
AllocationMode: resourceapi.DeviceAllocationModeExactCount,
|
||||
Count: 1,
|
||||
DeviceClassName: classB,
|
||||
}),
|
||||
),
|
||||
classes: objects(
|
||||
class(classA, driverA),
|
||||
class(classB, driverB),
|
||||
),
|
||||
slices: objects(
|
||||
slice(slice1, node1, pool1, driverA,
|
||||
device(device1, nil, nil),
|
||||
device(device2, nil, nil),
|
||||
),
|
||||
sliceWithOneDevice(slice1, node1, pool1, driverB),
|
||||
),
|
||||
node: node(node1, region1),
|
||||
|
||||
expectResults: []any{
|
||||
allocationResult(
|
||||
localNodeSelector(node1),
|
||||
deviceAllocationResult(req0, driverA, pool1, device1),
|
||||
deviceAllocationResult(req0, driverA, pool1, device2),
|
||||
),
|
||||
allocationResult(
|
||||
localNodeSelector(node1),
|
||||
deviceAllocationResult(req0, driverB, pool1, device1),
|
||||
),
|
||||
},
|
||||
},
|
||||
"all-devices-many-plus-another-reversed": {
|
||||
claimsToAllocate: objects(
|
||||
claimWithRequests(claim1, nil, resourceapi.DeviceRequest{
|
||||
Name: req0,
|
||||
AllocationMode: resourceapi.DeviceAllocationModeExactCount,
|
||||
Count: 1,
|
||||
DeviceClassName: classB,
|
||||
}),
|
||||
claimWithRequests(claim0, nil, resourceapi.DeviceRequest{
|
||||
Name: req0,
|
||||
AllocationMode: resourceapi.DeviceAllocationModeAll,
|
||||
DeviceClassName: classA,
|
||||
}),
|
||||
),
|
||||
classes: objects(
|
||||
class(classA, driverA),
|
||||
class(classB, driverB),
|
||||
),
|
||||
slices: objects(
|
||||
slice(slice1, node1, pool1, driverA,
|
||||
device(device1, nil, nil),
|
||||
device(device2, nil, nil),
|
||||
),
|
||||
sliceWithOneDevice(slice1, node1, pool1, driverB),
|
||||
),
|
||||
node: node(node1, region1),
|
||||
|
||||
expectResults: []any{
|
||||
allocationResult(
|
||||
localNodeSelector(node1),
|
||||
deviceAllocationResult(req0, driverB, pool1, device1),
|
||||
),
|
||||
allocationResult(
|
||||
localNodeSelector(node1),
|
||||
deviceAllocationResult(req0, driverA, pool1, device1),
|
||||
deviceAllocationResult(req0, driverA, pool1, device2),
|
||||
),
|
||||
},
|
||||
},
|
||||
"all-devices-no-solution": {
|
||||
// One device, two claims both trying to allocate it.
|
||||
claimsToAllocate: objects(
|
||||
claimWithRequests(claim1, nil, resourceapi.DeviceRequest{
|
||||
Name: req0,
|
||||
AllocationMode: resourceapi.DeviceAllocationModeExactCount,
|
||||
Count: 1,
|
||||
DeviceClassName: classA,
|
||||
}),
|
||||
claimWithRequests(claim0, nil, resourceapi.DeviceRequest{
|
||||
Name: req0,
|
||||
AllocationMode: resourceapi.DeviceAllocationModeAll,
|
||||
DeviceClassName: classA,
|
||||
}),
|
||||
),
|
||||
classes: objects(
|
||||
class(classA, driverA),
|
||||
),
|
||||
slices: objects(
|
||||
sliceWithOneDevice(slice1, node1, pool1, driverA),
|
||||
),
|
||||
node: node(node1, region1),
|
||||
},
|
||||
"all-devices-no-solution-reversed": {
|
||||
// One device, two claims both trying to allocate it.
|
||||
claimsToAllocate: objects(
|
||||
claimWithRequests(claim0, nil, resourceapi.DeviceRequest{
|
||||
Name: req0,
|
||||
AllocationMode: resourceapi.DeviceAllocationModeAll,
|
||||
DeviceClassName: classA,
|
||||
}),
|
||||
claimWithRequests(claim1, nil, resourceapi.DeviceRequest{
|
||||
Name: req0,
|
||||
AllocationMode: resourceapi.DeviceAllocationModeExactCount,
|
||||
Count: 1,
|
||||
DeviceClassName: classA,
|
||||
}),
|
||||
),
|
||||
classes: objects(
|
||||
class(classA, driverA),
|
||||
),
|
||||
slices: objects(
|
||||
sliceWithOneDevice(slice1, node1, pool1, driverA),
|
||||
),
|
||||
node: node(node1, region1),
|
||||
},
|
||||
"network-attached-device": {
|
||||
claimsToAllocate: objects(claim(claim0, req0, classA)),
|
||||
classes: objects(class(classA, driverA)),
|
||||
|
Loading…
Reference in New Issue
Block a user