From f0efb8a5fd09bb538bc2018d2a7da65e4d40a2ac Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Wed, 4 Sep 2024 18:55:39 +0200 Subject: [PATCH] DRA scheduler: populate set of allocated devices only once MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The devices which are allocated before starting the allocation always remain allocated. They can be stored once in a set, then each Filter call for the different nodes can reuse that set instead of allocating it anew for each node. goos: linux goarch: amd64 pkg: k8s.io/kubernetes/test/integration/scheduler_perf cpu: Intel(R) Core(TM) i9-7980XE CPU @ 2.60GHz │ before │ after │ │ SchedulingThroughput/Average │ SchedulingThroughput/Average vs base │ PerfScheduling/SchedulingWithResourceClaimTemplateStructured/5000pods_500nodes-36 36.89 ± 2% 54.70 ± 6% +48.26% (p=0.002 n=6) PerfScheduling/SteadyStateClusterResourceClaimTemplateStructured/empty_100nodes-36 105.7 ± 5% 106.4 ± 4% ~ (p=0.970 n=6) PerfScheduling/SteadyStateClusterResourceClaimTemplateStructured/empty_500nodes-36 117.8 ± 3% 120.0 ± 4% ~ (p=0.134 n=6) PerfScheduling/SteadyStateClusterResourceClaimTemplateStructured/half_100nodes-36 119.5 ± 4% 112.5 ± 4% -5.86% (p=0.009 n=6) PerfScheduling/SteadyStateClusterResourceClaimTemplateStructured/half_500nodes-36 63.22 ± 2% 87.13 ± 4% +37.82% (p=0.002 n=6) PerfScheduling/SteadyStateClusterResourceClaimTemplateStructured/full_100nodes-36 109.5 ± 2% 113.4 ± 2% +3.65% (p=0.006 n=6) PerfScheduling/SteadyStateClusterResourceClaimTemplateStructured/full_500nodes-36 27.56 ± 5% 65.55 ± 3% +137.84% (p=0.002 n=6) geomean 72.44 90.81 +25.37% --- .../structured/allocator.go | 37 ++++++++----------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/staging/src/k8s.io/dynamic-resource-allocation/structured/allocator.go b/staging/src/k8s.io/dynamic-resource-allocation/structured/allocator.go index e12f29aeb3a..51f950fb132 100644 --- a/staging/src/k8s.io/dynamic-resource-allocation/structured/allocator.go +++ b/staging/src/k8s.io/dynamic-resource-allocation/structured/allocator.go @@ -43,7 +43,7 @@ import ( type Allocator struct { adminAccessEnabled bool claimsToAllocate []*resourceapi.ResourceClaim - allocatedDevices []DeviceID + allocatedDevices sets.Set[DeviceID] classLister resourcelisters.DeviceClassLister slices []*resourceapi.ResourceSlice celCache *CELCache @@ -66,11 +66,12 @@ func NewAllocator(ctx context.Context, return &Allocator{ adminAccessEnabled: adminAccessEnabled, claimsToAllocate: claimsToAllocate, - allocatedDevices: allocatedDevices, - classLister: classLister, - slices: slices, - celCache: celCache, - celMutex: keymutex.NewHashed(0), + // This won't change, so build this set only once. + allocatedDevices: sets.New(allocatedDevices...), + classLister: classLister, + slices: slices, + celCache: celCache, + celMutex: keymutex.NewHashed(0), }, nil } @@ -295,18 +296,10 @@ func (a *Allocator) Allocate(ctx context.Context, node *v1.Node) (finalResult [] // the Allocate call and can be compared in Go. alloc.deviceMatchesRequest = make(map[matchKey]bool) - // Some of the existing devices are probably already allocated by - // claims... - numAllocated := len(alloc.allocatedDevices) + // We can estimate the size based on what we need to allocate. + alloc.allocatingDevices = make(map[DeviceID]bool, numDevicesTotal) - // Pre-allocating the map avoids growing it later. We can estimate - // the size based on what is already allocated and what we need to - // allocate. - alloc.allocated = make(map[DeviceID]bool, numAllocated+numDevicesTotal) - for _, deviceID := range alloc.allocatedDevices { - alloc.allocated[deviceID] = true - } - alloc.logger.V(6).Info("Gathered information about allocated devices", "numAllocated", numAllocated) + alloc.logger.V(6).Info("Gathered information about devices", "numAllocated", len(alloc.allocatedDevices), "toBeAllocated", numDevicesTotal) // In practice, there aren't going to be many different CEL // expressions. Most likely, there is going to be handful of different @@ -393,7 +386,7 @@ type allocator struct { deviceMatchesRequest map[matchKey]bool constraints [][]constraint // one list of constraints per claim requestData map[requestIndices]requestData // one entry per request - allocated map[DeviceID]bool + allocatingDevices map[DeviceID]bool result []internalAllocationResult } @@ -620,7 +613,7 @@ func (alloc *allocator) allocateOne(r deviceIndices) (bool, error) { deviceID := DeviceID{Driver: pool.Driver, Pool: pool.Pool, Device: slice.Spec.Devices[deviceIndex].Name} // Checking for "in use" is cheap and thus gets done first. - if !ptr.Deref(request.AdminAccess, false) && alloc.allocated[deviceID] { + if !ptr.Deref(request.AdminAccess, false) && (alloc.allocatedDevices.Has(deviceID) || alloc.allocatingDevices[deviceID]) { alloc.logger.V(7).Info("Device in use", "device", deviceID) continue } @@ -775,7 +768,7 @@ func (alloc *allocator) allocateDevice(r deviceIndices, device deviceWithID, mus claim := alloc.claimsToAllocate[r.claimIndex] request := &claim.Spec.Devices.Requests[r.requestIndex] adminAccess := ptr.Deref(request.AdminAccess, false) - if !adminAccess && alloc.allocated[device.id] { + if !adminAccess && (alloc.allocatedDevices.Has(device.id) || alloc.allocatingDevices[device.id]) { alloc.logger.V(7).Info("Device in use", "device", device.id) return false, nil, nil } @@ -802,7 +795,7 @@ func (alloc *allocator) allocateDevice(r deviceIndices, device deviceWithID, mus // and record the result. alloc.logger.V(7).Info("Device allocated", "device", device.id) if !adminAccess { - alloc.allocated[device.id] = true + alloc.allocatingDevices[device.id] = true } result := internalDeviceResult{ request: request.Name, @@ -820,7 +813,7 @@ func (alloc *allocator) allocateDevice(r deviceIndices, device deviceWithID, mus constraint.remove(request.Name, device.basic, device.id) } if !adminAccess { - alloc.allocated[device.id] = false + alloc.allocatingDevices[device.id] = false } // Truncate, but keep the underlying slice. alloc.result[r.claimIndex].devices = alloc.result[r.claimIndex].devices[:previousNumResults]