diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go index 19c98af703e..333c5c3caaf 100644 --- a/pkg/kubelet/cm/cpumanager/policy_static.go +++ b/pkg/kubelet/cm/cpumanager/policy_static.go @@ -49,10 +49,11 @@ type SMTAlignmentError struct { RequestedCPUs int CpusPerCore int AvailablePhysicalCPUs int + CausedByPhysicalCPUs bool } func (e SMTAlignmentError) Error() string { - if e.AvailablePhysicalCPUs > 0 { + if e.CausedByPhysicalCPUs { return fmt.Sprintf("SMT Alignment Error: not enough free physical CPUs: available physical CPUs = %d, requested CPUs = %d, CPUs per core = %d", e.AvailablePhysicalCPUs, e.RequestedCPUs, e.CpusPerCore) } return fmt.Sprintf("SMT Alignment Error: requested %d cpus not multiple cpus per core = %d", e.RequestedCPUs, e.CpusPerCore) @@ -118,6 +119,9 @@ type staticPolicy struct { cpusToReuse map[string]cpuset.CPUSet // options allow to fine-tune the behaviour of the policy options StaticPolicyOptions + // we compute this value multiple time, and it's not supposed to change + // at runtime - the cpumanager can't deal with runtime topology changes anyway. + cpuGroupSize int } // Ensure staticPolicy implements Policy interface @@ -136,13 +140,15 @@ func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reserv return nil, err } - klog.InfoS("Static policy created with configuration", "options", opts) + cpuGroupSize := topology.CPUsPerCore() + klog.InfoS("Static policy created with configuration", "options", opts, "cpuGroupSize", cpuGroupSize) policy := &staticPolicy{ - topology: topology, - affinity: affinity, - cpusToReuse: make(map[string]cpuset.CPUSet), - options: opts, + topology: topology, + affinity: affinity, + cpusToReuse: make(map[string]cpuset.CPUSet), + options: opts, + cpuGroupSize: cpuGroupSize, } allCPUs := topology.CPUDetails.CPUs() @@ -310,8 +316,7 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai }() if p.options.FullPhysicalCPUsOnly { - CPUsPerCore := p.topology.CPUsPerCore() - if (numCPUs % CPUsPerCore) != 0 { + if (numCPUs % p.cpuGroupSize) != 0 { // Since CPU Manager has been enabled requesting strict SMT alignment, it means a guaranteed pod can only be admitted // if the CPU requested is a multiple of the number of virtual cpus per physical cores. // In case CPU request is not a multiple of the number of virtual cpus per physical cores the Pod will be put @@ -322,8 +327,9 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai // and only in case the request cannot be sattisfied on a single socket, CPU allocation is done for a workload to occupy all // CPUs on a physical core. Allocation of individual threads would never have to occur. return SMTAlignmentError{ - RequestedCPUs: numCPUs, - CpusPerCore: CPUsPerCore, + RequestedCPUs: numCPUs, + CpusPerCore: p.cpuGroupSize, + CausedByPhysicalCPUs: false, } } @@ -336,8 +342,9 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai if numCPUs > availablePhysicalCPUs { return SMTAlignmentError{ RequestedCPUs: numCPUs, - CpusPerCore: CPUsPerCore, + CpusPerCore: p.cpuGroupSize, AvailablePhysicalCPUs: availablePhysicalCPUs, + CausedByPhysicalCPUs: true, } } } @@ -491,7 +498,7 @@ func (p *staticPolicy) takeByTopology(availableCPUs cpuset.CPUSet, numCPUs int) if p.options.DistributeCPUsAcrossNUMA { cpuGroupSize := 1 if p.options.FullPhysicalCPUsOnly { - cpuGroupSize = p.topology.CPUsPerCore() + cpuGroupSize = p.cpuGroupSize } return takeByTopologyNUMADistributed(p.topology, availableCPUs, numCPUs, cpuGroupSize, cpuSortingStrategy) } diff --git a/pkg/kubelet/cm/cpumanager/policy_static_test.go b/pkg/kubelet/cm/cpumanager/policy_static_test.go index bcd1e19ba29..922f1c49f69 100644 --- a/pkg/kubelet/cm/cpumanager/policy_static_test.go +++ b/pkg/kubelet/cm/cpumanager/policy_static_test.go @@ -509,7 +509,7 @@ func TestStaticPolicyAdd(t *testing.T) { stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(0, 2, 3, 4, 5, 7, 8, 9, 10, 11), pod: makePod("fakePod", "fakeContainerBug113537_1", "10000m", "10000m"), - expErr: SMTAlignmentError{RequestedCPUs: 10, CpusPerCore: 2, AvailablePhysicalCPUs: 8}, + expErr: SMTAlignmentError{RequestedCPUs: 10, CpusPerCore: 2, AvailablePhysicalCPUs: 8, CausedByPhysicalCPUs: true}, expCPUAlloc: false, expCSet: cpuset.New(), }, @@ -1188,6 +1188,44 @@ func TestStaticPolicyOptions(t *testing.T) { } } +func TestSMTAlignmentErrorText(t *testing.T) { + type smtErrTestCase struct { + name string + err SMTAlignmentError + expected string + } + + testCases := []smtErrTestCase{ + { + name: "base SMT alignment error", + err: SMTAlignmentError{RequestedCPUs: 15, CpusPerCore: 4}, + expected: `SMT Alignment Error: requested 15 cpus not multiple cpus per core = 4`, + }, + { + // Note the explicit 0. The intent is to signal the lack of physical CPUs, but + // in the corner case of no available physical CPUs at all, without the explicit + // flag we cannot distinguish the case, and before PR#127959 we printed the old message + name: "base SMT alignment error, no physical CPUs, missing flag", + err: SMTAlignmentError{RequestedCPUs: 4, CpusPerCore: 2, AvailablePhysicalCPUs: 0}, + expected: `SMT Alignment Error: requested 4 cpus not multiple cpus per core = 2`, + }, + { + name: "base SMT alignment error, no physical CPUs, explicit flag", + err: SMTAlignmentError{RequestedCPUs: 4, CpusPerCore: 2, AvailablePhysicalCPUs: 0, CausedByPhysicalCPUs: true}, + expected: `SMT Alignment Error: not enough free physical CPUs: available physical CPUs = 0, requested CPUs = 4, CPUs per core = 2`, + }, + } + + for _, testCase := range testCases { + t.Run(testCase.name, func(t *testing.T) { + got := testCase.err.Error() + if got != testCase.expected { + t.Errorf("got=%v expected=%v", got, testCase.expected) + } + }) + } +} + func newCPUSetPtr(cpus ...int) *cpuset.CPUSet { ret := cpuset.New(cpus...) return &ret