DRA scheduler: populate set of allocated devices only once

The devices which are allocated before starting the allocation always remain
allocated. They can be stored once in a set, then each Filter call for the
different nodes can reuse that set instead of allocating it anew for each node.

    goos: linux
    goarch: amd64
    pkg: k8s.io/kubernetes/test/integration/scheduler_perf
    cpu: Intel(R) Core(TM) i9-7980XE CPU @ 2.60GHz
                                                                                       │            before            │                        after                         │
                                                                                       │ SchedulingThroughput/Average │ SchedulingThroughput/Average  vs base                │
    PerfScheduling/SchedulingWithResourceClaimTemplateStructured/5000pods_500nodes-36                      36.89 ± 2%                     54.70 ± 6%   +48.26% (p=0.002 n=6)
    PerfScheduling/SteadyStateClusterResourceClaimTemplateStructured/empty_100nodes-36                     105.7 ± 5%                     106.4 ± 4%         ~ (p=0.970 n=6)
    PerfScheduling/SteadyStateClusterResourceClaimTemplateStructured/empty_500nodes-36                     117.8 ± 3%                     120.0 ± 4%         ~ (p=0.134 n=6)
    PerfScheduling/SteadyStateClusterResourceClaimTemplateStructured/half_100nodes-36                      119.5 ± 4%                     112.5 ± 4%    -5.86% (p=0.009 n=6)
    PerfScheduling/SteadyStateClusterResourceClaimTemplateStructured/half_500nodes-36                      63.22 ± 2%                     87.13 ± 4%   +37.82% (p=0.002 n=6)
    PerfScheduling/SteadyStateClusterResourceClaimTemplateStructured/full_100nodes-36                      109.5 ± 2%                     113.4 ± 2%    +3.65% (p=0.006 n=6)
    PerfScheduling/SteadyStateClusterResourceClaimTemplateStructured/full_500nodes-36                      27.56 ± 5%                     65.55 ± 3%  +137.84% (p=0.002 n=6)
    geomean                                                                                                72.44                          90.81        +25.37%
This commit is contained in:
Patrick Ohly 2024-09-04 18:55:39 +02:00
parent f070dd760c
commit f0efb8a5fd

View File

@ -43,7 +43,7 @@ import (
type Allocator struct {
adminAccessEnabled bool
claimsToAllocate []*resourceapi.ResourceClaim
allocatedDevices []DeviceID
allocatedDevices sets.Set[DeviceID]
classLister resourcelisters.DeviceClassLister
slices []*resourceapi.ResourceSlice
celCache *CELCache
@ -66,11 +66,12 @@ func NewAllocator(ctx context.Context,
return &Allocator{
adminAccessEnabled: adminAccessEnabled,
claimsToAllocate: claimsToAllocate,
allocatedDevices: allocatedDevices,
classLister: classLister,
slices: slices,
celCache: celCache,
celMutex: keymutex.NewHashed(0),
// This won't change, so build this set only once.
allocatedDevices: sets.New(allocatedDevices...),
classLister: classLister,
slices: slices,
celCache: celCache,
celMutex: keymutex.NewHashed(0),
}, nil
}
@ -295,18 +296,10 @@ func (a *Allocator) Allocate(ctx context.Context, node *v1.Node) (finalResult []
// the Allocate call and can be compared in Go.
alloc.deviceMatchesRequest = make(map[matchKey]bool)
// Some of the existing devices are probably already allocated by
// claims...
numAllocated := len(alloc.allocatedDevices)
// We can estimate the size based on what we need to allocate.
alloc.allocatingDevices = make(map[DeviceID]bool, numDevicesTotal)
// Pre-allocating the map avoids growing it later. We can estimate
// the size based on what is already allocated and what we need to
// allocate.
alloc.allocated = make(map[DeviceID]bool, numAllocated+numDevicesTotal)
for _, deviceID := range alloc.allocatedDevices {
alloc.allocated[deviceID] = true
}
alloc.logger.V(6).Info("Gathered information about allocated devices", "numAllocated", numAllocated)
alloc.logger.V(6).Info("Gathered information about devices", "numAllocated", len(alloc.allocatedDevices), "toBeAllocated", numDevicesTotal)
// In practice, there aren't going to be many different CEL
// expressions. Most likely, there is going to be handful of different
@ -393,7 +386,7 @@ type allocator struct {
deviceMatchesRequest map[matchKey]bool
constraints [][]constraint // one list of constraints per claim
requestData map[requestIndices]requestData // one entry per request
allocated map[DeviceID]bool
allocatingDevices map[DeviceID]bool
result []internalAllocationResult
}
@ -620,7 +613,7 @@ func (alloc *allocator) allocateOne(r deviceIndices) (bool, error) {
deviceID := DeviceID{Driver: pool.Driver, Pool: pool.Pool, Device: slice.Spec.Devices[deviceIndex].Name}
// Checking for "in use" is cheap and thus gets done first.
if !ptr.Deref(request.AdminAccess, false) && alloc.allocated[deviceID] {
if !ptr.Deref(request.AdminAccess, false) && (alloc.allocatedDevices.Has(deviceID) || alloc.allocatingDevices[deviceID]) {
alloc.logger.V(7).Info("Device in use", "device", deviceID)
continue
}
@ -775,7 +768,7 @@ func (alloc *allocator) allocateDevice(r deviceIndices, device deviceWithID, mus
claim := alloc.claimsToAllocate[r.claimIndex]
request := &claim.Spec.Devices.Requests[r.requestIndex]
adminAccess := ptr.Deref(request.AdminAccess, false)
if !adminAccess && alloc.allocated[device.id] {
if !adminAccess && (alloc.allocatedDevices.Has(device.id) || alloc.allocatingDevices[device.id]) {
alloc.logger.V(7).Info("Device in use", "device", device.id)
return false, nil, nil
}
@ -802,7 +795,7 @@ func (alloc *allocator) allocateDevice(r deviceIndices, device deviceWithID, mus
// and record the result.
alloc.logger.V(7).Info("Device allocated", "device", device.id)
if !adminAccess {
alloc.allocated[device.id] = true
alloc.allocatingDevices[device.id] = true
}
result := internalDeviceResult{
request: request.Name,
@ -820,7 +813,7 @@ func (alloc *allocator) allocateDevice(r deviceIndices, device deviceWithID, mus
constraint.remove(request.Name, device.basic, device.id)
}
if !adminAccess {
alloc.allocated[device.id] = false
alloc.allocatingDevices[device.id] = false
}
// Truncate, but keep the underlying slice.
alloc.result[r.claimIndex].devices = alloc.result[r.claimIndex].devices[:previousNumResults]