DRA scheduler: avoid listing claims during Filter

The Allocate call used to call back into the claim lister for each node. This
was significant work which showed up at the top of the CPU profile. It's
okay to list only once during PreFilter because the Filter call does not change
the claim status between Allocate calls.

    goos: linux
    goarch: amd64
    pkg: k8s.io/kubernetes/test/integration/scheduler_perf
    cpu: Intel(R) Core(TM) i9-7980XE CPU @ 2.60GHz
                                                                                       │            before            │                        after                        │
                                                                                       │ SchedulingThroughput/Average │ SchedulingThroughput/Average  vs base               │
    PerfScheduling/SchedulingWithResourceClaimTemplateStructured/5000pods_500nodes-36                      15.04 ± 0%                    18.06 ±  2%  +20.07% (p=0.002 n=6)
    PerfScheduling/SteadyStateClusterResourceClaimTemplateStructured/empty_100nodes-36                     105.5 ± 1%                    104.7 ±  2%        ~ (p=0.485 n=6)
    PerfScheduling/SteadyStateClusterResourceClaimTemplateStructured/empty_500nodes-36                     95.83 ± 1%                    96.62 ±  1%        ~ (p=0.063 n=6)
    PerfScheduling/SteadyStateClusterResourceClaimTemplateStructured/half_100nodes-36                      79.67 ± 3%                    83.00 ±  2%   +4.18% (p=0.002 n=6)
    PerfScheduling/SteadyStateClusterResourceClaimTemplateStructured/half_500nodes-36                      27.11 ± 5%                    32.45 ±  7%  +19.68% (p=0.002 n=6)
    PerfScheduling/SteadyStateClusterResourceClaimTemplateStructured/full_100nodes-36                      84.00 ± 3%                    95.22 ±  7%  +13.36% (p=0.002 n=6)
    PerfScheduling/SteadyStateClusterResourceClaimTemplateStructured/full_500nodes-36                      7.110 ± 6%                    9.111 ± 10%  +28.15% (p=0.002 n=6)
    geomean                                                                                                41.05                         45.86        +11.73%
This commit is contained in:
Patrick Ohly 2024-09-03 16:41:29 +02:00
parent ad22b74c60
commit 7de6d070f2

View File

@ -528,8 +528,14 @@ func (pl *DynamicResources) PreFilter(ctx context.Context, state *framework.Cycl
// persistently.
//
// Claims are treated as "allocated" if they are in the assume cache
// or currently their allocation is in-flight.
allocator, err := structured.NewAllocator(ctx, pl.enableAdminAccess, allocateClaims, &claimListerForAssumeCache{assumeCache: pl.claimAssumeCache, inFlightAllocations: &pl.inFlightAllocations}, pl.classLister, pl.sliceLister)
// or currently their allocation is in-flight. This does not change
// during filtering, so we can determine that once.
claimLister := &claimListerForAssumeCache{assumeCache: pl.claimAssumeCache, inFlightAllocations: &pl.inFlightAllocations}
allocatedClaims, err := claimLister.ListAllAllocated()
if err != nil {
return nil, statusError(logger, err)
}
allocator, err := structured.NewAllocator(ctx, pl.enableAdminAccess, allocateClaims, staticClaimLister(allocatedClaims), pl.classLister, pl.sliceLister)
if err != nil {
return nil, statusError(logger, err)
}
@ -562,6 +568,12 @@ func (cl *claimListerForAssumeCache) ListAllAllocated() ([]*resourceapi.Resource
return allocated, nil
}
type staticClaimLister []*resourceapi.ResourceClaim
func (cl staticClaimLister) ListAllAllocated() ([]*resourceapi.ResourceClaim, error) {
return cl, nil
}
// PreFilterExtensions returns prefilter extensions, pod add and remove.
func (pl *DynamicResources) PreFilterExtensions() framework.PreFilterExtensions {
return nil