From 838f911deac5110adee8ad72069a315872b59b4c Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Wed, 9 Oct 2024 15:10:53 +0200 Subject: [PATCH] cpumanager: smtalign: fix error message Fix error message if availablePhysicalCPUs = 0. Without this change, the logic was mistakenly emitting the old error message, which is confusing for troubleshooting. Plus, a tiny quality of life improvement: cpumanager static policy wants to use `cpuGroupSize` multiple times. The value represents how many VCPUs per PCPUs the machine has. So, let's cache (and log!) the value in the policy data. We don't support dynamic update of the HW topology anyway. Signed-off-by: Francesco Romani --- pkg/kubelet/cm/cpumanager/policy_static.go | 31 ++++++++------ .../cm/cpumanager/policy_static_test.go | 40 ++++++++++++++++++- 2 files changed, 58 insertions(+), 13 deletions(-) diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go index 19c98af703e..333c5c3caaf 100644 --- a/pkg/kubelet/cm/cpumanager/policy_static.go +++ b/pkg/kubelet/cm/cpumanager/policy_static.go @@ -49,10 +49,11 @@ type SMTAlignmentError struct { RequestedCPUs int CpusPerCore int AvailablePhysicalCPUs int + CausedByPhysicalCPUs bool } func (e SMTAlignmentError) Error() string { - if e.AvailablePhysicalCPUs > 0 { + if e.CausedByPhysicalCPUs { return fmt.Sprintf("SMT Alignment Error: not enough free physical CPUs: available physical CPUs = %d, requested CPUs = %d, CPUs per core = %d", e.AvailablePhysicalCPUs, e.RequestedCPUs, e.CpusPerCore) } return fmt.Sprintf("SMT Alignment Error: requested %d cpus not multiple cpus per core = %d", e.RequestedCPUs, e.CpusPerCore) @@ -118,6 +119,9 @@ type staticPolicy struct { cpusToReuse map[string]cpuset.CPUSet // options allow to fine-tune the behaviour of the policy options StaticPolicyOptions + // we compute this value multiple time, and it's not supposed to change + // at runtime - the cpumanager can't deal with runtime topology changes anyway. + cpuGroupSize int } // Ensure staticPolicy implements Policy interface @@ -136,13 +140,15 @@ func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reserv return nil, err } - klog.InfoS("Static policy created with configuration", "options", opts) + cpuGroupSize := topology.CPUsPerCore() + klog.InfoS("Static policy created with configuration", "options", opts, "cpuGroupSize", cpuGroupSize) policy := &staticPolicy{ - topology: topology, - affinity: affinity, - cpusToReuse: make(map[string]cpuset.CPUSet), - options: opts, + topology: topology, + affinity: affinity, + cpusToReuse: make(map[string]cpuset.CPUSet), + options: opts, + cpuGroupSize: cpuGroupSize, } allCPUs := topology.CPUDetails.CPUs() @@ -310,8 +316,7 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai }() if p.options.FullPhysicalCPUsOnly { - CPUsPerCore := p.topology.CPUsPerCore() - if (numCPUs % CPUsPerCore) != 0 { + if (numCPUs % p.cpuGroupSize) != 0 { // Since CPU Manager has been enabled requesting strict SMT alignment, it means a guaranteed pod can only be admitted // if the CPU requested is a multiple of the number of virtual cpus per physical cores. // In case CPU request is not a multiple of the number of virtual cpus per physical cores the Pod will be put @@ -322,8 +327,9 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai // and only in case the request cannot be sattisfied on a single socket, CPU allocation is done for a workload to occupy all // CPUs on a physical core. Allocation of individual threads would never have to occur. return SMTAlignmentError{ - RequestedCPUs: numCPUs, - CpusPerCore: CPUsPerCore, + RequestedCPUs: numCPUs, + CpusPerCore: p.cpuGroupSize, + CausedByPhysicalCPUs: false, } } @@ -336,8 +342,9 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai if numCPUs > availablePhysicalCPUs { return SMTAlignmentError{ RequestedCPUs: numCPUs, - CpusPerCore: CPUsPerCore, + CpusPerCore: p.cpuGroupSize, AvailablePhysicalCPUs: availablePhysicalCPUs, + CausedByPhysicalCPUs: true, } } } @@ -491,7 +498,7 @@ func (p *staticPolicy) takeByTopology(availableCPUs cpuset.CPUSet, numCPUs int) if p.options.DistributeCPUsAcrossNUMA { cpuGroupSize := 1 if p.options.FullPhysicalCPUsOnly { - cpuGroupSize = p.topology.CPUsPerCore() + cpuGroupSize = p.cpuGroupSize } return takeByTopologyNUMADistributed(p.topology, availableCPUs, numCPUs, cpuGroupSize, cpuSortingStrategy) } diff --git a/pkg/kubelet/cm/cpumanager/policy_static_test.go b/pkg/kubelet/cm/cpumanager/policy_static_test.go index bcd1e19ba29..922f1c49f69 100644 --- a/pkg/kubelet/cm/cpumanager/policy_static_test.go +++ b/pkg/kubelet/cm/cpumanager/policy_static_test.go @@ -509,7 +509,7 @@ func TestStaticPolicyAdd(t *testing.T) { stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(0, 2, 3, 4, 5, 7, 8, 9, 10, 11), pod: makePod("fakePod", "fakeContainerBug113537_1", "10000m", "10000m"), - expErr: SMTAlignmentError{RequestedCPUs: 10, CpusPerCore: 2, AvailablePhysicalCPUs: 8}, + expErr: SMTAlignmentError{RequestedCPUs: 10, CpusPerCore: 2, AvailablePhysicalCPUs: 8, CausedByPhysicalCPUs: true}, expCPUAlloc: false, expCSet: cpuset.New(), }, @@ -1188,6 +1188,44 @@ func TestStaticPolicyOptions(t *testing.T) { } } +func TestSMTAlignmentErrorText(t *testing.T) { + type smtErrTestCase struct { + name string + err SMTAlignmentError + expected string + } + + testCases := []smtErrTestCase{ + { + name: "base SMT alignment error", + err: SMTAlignmentError{RequestedCPUs: 15, CpusPerCore: 4}, + expected: `SMT Alignment Error: requested 15 cpus not multiple cpus per core = 4`, + }, + { + // Note the explicit 0. The intent is to signal the lack of physical CPUs, but + // in the corner case of no available physical CPUs at all, without the explicit + // flag we cannot distinguish the case, and before PR#127959 we printed the old message + name: "base SMT alignment error, no physical CPUs, missing flag", + err: SMTAlignmentError{RequestedCPUs: 4, CpusPerCore: 2, AvailablePhysicalCPUs: 0}, + expected: `SMT Alignment Error: requested 4 cpus not multiple cpus per core = 2`, + }, + { + name: "base SMT alignment error, no physical CPUs, explicit flag", + err: SMTAlignmentError{RequestedCPUs: 4, CpusPerCore: 2, AvailablePhysicalCPUs: 0, CausedByPhysicalCPUs: true}, + expected: `SMT Alignment Error: not enough free physical CPUs: available physical CPUs = 0, requested CPUs = 4, CPUs per core = 2`, + }, + } + + for _, testCase := range testCases { + t.Run(testCase.name, func(t *testing.T) { + got := testCase.err.Error() + if got != testCase.expected { + t.Errorf("got=%v expected=%v", got, testCase.expected) + } + }) + } +} + func newCPUSetPtr(cpus ...int) *cpuset.CPUSet { ret := cpuset.New(cpus...) return &ret