Merge pull request #127959 from ffromani/fix-smtalign-error-message

node: cpumanager: fix smtalign error message and minor cleanup
This commit is contained in:
Kubernetes Prow Robot 2024-10-11 00:32:20 +01:00 committed by GitHub
commit 3bf17e2340
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 58 additions and 13 deletions

View File

@ -49,10 +49,11 @@ type SMTAlignmentError struct {
RequestedCPUs int
CpusPerCore int
AvailablePhysicalCPUs int
CausedByPhysicalCPUs bool
}
func (e SMTAlignmentError) Error() string {
if e.AvailablePhysicalCPUs > 0 {
if e.CausedByPhysicalCPUs {
return fmt.Sprintf("SMT Alignment Error: not enough free physical CPUs: available physical CPUs = %d, requested CPUs = %d, CPUs per core = %d", e.AvailablePhysicalCPUs, e.RequestedCPUs, e.CpusPerCore)
}
return fmt.Sprintf("SMT Alignment Error: requested %d cpus not multiple cpus per core = %d", e.RequestedCPUs, e.CpusPerCore)
@ -118,6 +119,9 @@ type staticPolicy struct {
cpusToReuse map[string]cpuset.CPUSet
// options allow to fine-tune the behaviour of the policy
options StaticPolicyOptions
// we compute this value multiple time, and it's not supposed to change
// at runtime - the cpumanager can't deal with runtime topology changes anyway.
cpuGroupSize int
}
// Ensure staticPolicy implements Policy interface
@ -136,13 +140,15 @@ func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reserv
return nil, err
}
klog.InfoS("Static policy created with configuration", "options", opts)
cpuGroupSize := topology.CPUsPerCore()
klog.InfoS("Static policy created with configuration", "options", opts, "cpuGroupSize", cpuGroupSize)
policy := &staticPolicy{
topology: topology,
affinity: affinity,
cpusToReuse: make(map[string]cpuset.CPUSet),
options: opts,
topology: topology,
affinity: affinity,
cpusToReuse: make(map[string]cpuset.CPUSet),
options: opts,
cpuGroupSize: cpuGroupSize,
}
allCPUs := topology.CPUDetails.CPUs()
@ -310,8 +316,7 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai
}()
if p.options.FullPhysicalCPUsOnly {
CPUsPerCore := p.topology.CPUsPerCore()
if (numCPUs % CPUsPerCore) != 0 {
if (numCPUs % p.cpuGroupSize) != 0 {
// Since CPU Manager has been enabled requesting strict SMT alignment, it means a guaranteed pod can only be admitted
// if the CPU requested is a multiple of the number of virtual cpus per physical cores.
// In case CPU request is not a multiple of the number of virtual cpus per physical cores the Pod will be put
@ -322,8 +327,9 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai
// and only in case the request cannot be sattisfied on a single socket, CPU allocation is done for a workload to occupy all
// CPUs on a physical core. Allocation of individual threads would never have to occur.
return SMTAlignmentError{
RequestedCPUs: numCPUs,
CpusPerCore: CPUsPerCore,
RequestedCPUs: numCPUs,
CpusPerCore: p.cpuGroupSize,
CausedByPhysicalCPUs: false,
}
}
@ -336,8 +342,9 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai
if numCPUs > availablePhysicalCPUs {
return SMTAlignmentError{
RequestedCPUs: numCPUs,
CpusPerCore: CPUsPerCore,
CpusPerCore: p.cpuGroupSize,
AvailablePhysicalCPUs: availablePhysicalCPUs,
CausedByPhysicalCPUs: true,
}
}
}
@ -491,7 +498,7 @@ func (p *staticPolicy) takeByTopology(availableCPUs cpuset.CPUSet, numCPUs int)
if p.options.DistributeCPUsAcrossNUMA {
cpuGroupSize := 1
if p.options.FullPhysicalCPUsOnly {
cpuGroupSize = p.topology.CPUsPerCore()
cpuGroupSize = p.cpuGroupSize
}
return takeByTopologyNUMADistributed(p.topology, availableCPUs, numCPUs, cpuGroupSize, cpuSortingStrategy)
}

View File

@ -509,7 +509,7 @@ func TestStaticPolicyAdd(t *testing.T) {
stAssignments: state.ContainerCPUAssignments{},
stDefaultCPUSet: cpuset.New(0, 2, 3, 4, 5, 7, 8, 9, 10, 11),
pod: makePod("fakePod", "fakeContainerBug113537_1", "10000m", "10000m"),
expErr: SMTAlignmentError{RequestedCPUs: 10, CpusPerCore: 2, AvailablePhysicalCPUs: 8},
expErr: SMTAlignmentError{RequestedCPUs: 10, CpusPerCore: 2, AvailablePhysicalCPUs: 8, CausedByPhysicalCPUs: true},
expCPUAlloc: false,
expCSet: cpuset.New(),
},
@ -1188,6 +1188,44 @@ func TestStaticPolicyOptions(t *testing.T) {
}
}
func TestSMTAlignmentErrorText(t *testing.T) {
type smtErrTestCase struct {
name string
err SMTAlignmentError
expected string
}
testCases := []smtErrTestCase{
{
name: "base SMT alignment error",
err: SMTAlignmentError{RequestedCPUs: 15, CpusPerCore: 4},
expected: `SMT Alignment Error: requested 15 cpus not multiple cpus per core = 4`,
},
{
// Note the explicit 0. The intent is to signal the lack of physical CPUs, but
// in the corner case of no available physical CPUs at all, without the explicit
// flag we cannot distinguish the case, and before PR#127959 we printed the old message
name: "base SMT alignment error, no physical CPUs, missing flag",
err: SMTAlignmentError{RequestedCPUs: 4, CpusPerCore: 2, AvailablePhysicalCPUs: 0},
expected: `SMT Alignment Error: requested 4 cpus not multiple cpus per core = 2`,
},
{
name: "base SMT alignment error, no physical CPUs, explicit flag",
err: SMTAlignmentError{RequestedCPUs: 4, CpusPerCore: 2, AvailablePhysicalCPUs: 0, CausedByPhysicalCPUs: true},
expected: `SMT Alignment Error: not enough free physical CPUs: available physical CPUs = 0, requested CPUs = 4, CPUs per core = 2`,
},
}
for _, testCase := range testCases {
t.Run(testCase.name, func(t *testing.T) {
got := testCase.err.Error()
if got != testCase.expected {
t.Errorf("got=%v expected=%v", got, testCase.expected)
}
})
}
}
func newCPUSetPtr(cpus ...int) *cpuset.CPUSet {
ret := cpuset.New(cpus...)
return &ret