cpumanager: smtalign: fix error message

Fix error message if availablePhysicalCPUs = 0. Without this change, the logic was mistakenly emitting the old error message, which is confusing for troubleshooting. Plus, a tiny quality of life improvement: cpumanager static policy wants to use `cpuGroupSize` multiple times. The value represents how many VCPUs per PCPUs the machine has. So, let's cache (and log!) the value in the policy data. We don't support dynamic update of the HW topology anyway. Signed-off-by: Francesco Romani <fromani@redhat.com>
2025-09-24 03:18:57 +00:00 · 2024-10-09 15:10:53 +02:00
parent c5f2fc05ad
commit 838f911dea
2 changed files with 58 additions and 13 deletions
--- a/pkg/kubelet/cm/cpumanager/policy_static.go
+++ b/pkg/kubelet/cm/cpumanager/policy_static.go
@@ -49,10 +49,11 @@ type SMTAlignmentError struct {
 	RequestedCPUs         int
 	CpusPerCore           int
 	AvailablePhysicalCPUs int
+	CausedByPhysicalCPUs  bool
 }

 func (e SMTAlignmentError) Error() string {
-	if e.AvailablePhysicalCPUs > 0 {
+	if e.CausedByPhysicalCPUs {
 		return fmt.Sprintf("SMT Alignment Error: not enough free physical CPUs: available physical CPUs = %d, requested CPUs = %d, CPUs per core = %d", e.AvailablePhysicalCPUs, e.RequestedCPUs, e.CpusPerCore)
 	}
 	return fmt.Sprintf("SMT Alignment Error: requested %d cpus not multiple cpus per core = %d", e.RequestedCPUs, e.CpusPerCore)
@@ -118,6 +119,9 @@ type staticPolicy struct {
 	cpusToReuse map[string]cpuset.CPUSet
 	// options allow to fine-tune the behaviour of the policy
 	options StaticPolicyOptions
+	// we compute this value multiple time, and it's not supposed to change
+	// at runtime - the cpumanager can't deal with runtime topology changes anyway.
+	cpuGroupSize int
 }

 // Ensure staticPolicy implements Policy interface
@@ -136,13 +140,15 @@ func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reserv
 		return nil, err
 	}

-	klog.InfoS("Static policy created with configuration", "options", opts)
+	cpuGroupSize := topology.CPUsPerCore()
+	klog.InfoS("Static policy created with configuration", "options", opts, "cpuGroupSize", cpuGroupSize)

 	policy := &staticPolicy{
-		topology:    topology,
-		affinity:    affinity,
-		cpusToReuse: make(map[string]cpuset.CPUSet),
-		options:     opts,
+		topology:     topology,
+		affinity:     affinity,
+		cpusToReuse:  make(map[string]cpuset.CPUSet),
+		options:      opts,
+		cpuGroupSize: cpuGroupSize,
 	}

 	allCPUs := topology.CPUDetails.CPUs()
@@ -310,8 +316,7 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai
 	}()

 	if p.options.FullPhysicalCPUsOnly {
-		CPUsPerCore := p.topology.CPUsPerCore()
-		if (numCPUs % CPUsPerCore) != 0 {
+		if (numCPUs % p.cpuGroupSize) != 0 {
 			// Since CPU Manager has been enabled requesting strict SMT alignment, it means a guaranteed pod can only be admitted
 			// if the CPU requested is a multiple of the number of virtual cpus per physical cores.
 			// In case CPU request is not a multiple of the number of virtual cpus per physical cores the Pod will be put
@@ -322,8 +327,9 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai
 			// and only in case the request cannot be sattisfied on a single socket, CPU allocation is done for a workload to occupy all
 			// CPUs on a physical core. Allocation of individual threads would never have to occur.
 			return SMTAlignmentError{
-				RequestedCPUs: numCPUs,
-				CpusPerCore:   CPUsPerCore,
+				RequestedCPUs:        numCPUs,
+				CpusPerCore:          p.cpuGroupSize,
+				CausedByPhysicalCPUs: false,
 			}
 		}

@@ -336,8 +342,9 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai
 		if numCPUs > availablePhysicalCPUs {
 			return SMTAlignmentError{
 				RequestedCPUs:         numCPUs,
-				CpusPerCore:           CPUsPerCore,
+				CpusPerCore:           p.cpuGroupSize,
 				AvailablePhysicalCPUs: availablePhysicalCPUs,
+				CausedByPhysicalCPUs:  true,
 			}
 		}
 	}
@@ -491,7 +498,7 @@ func (p *staticPolicy) takeByTopology(availableCPUs cpuset.CPUSet, numCPUs int)
 	if p.options.DistributeCPUsAcrossNUMA {
 		cpuGroupSize := 1
 		if p.options.FullPhysicalCPUsOnly {
-			cpuGroupSize = p.topology.CPUsPerCore()
+			cpuGroupSize = p.cpuGroupSize
 		}
 		return takeByTopologyNUMADistributed(p.topology, availableCPUs, numCPUs, cpuGroupSize, cpuSortingStrategy)
 	}
--- a/pkg/kubelet/cm/cpumanager/policy_static_test.go
+++ b/pkg/kubelet/cm/cpumanager/policy_static_test.go
@@ -509,7 +509,7 @@ func TestStaticPolicyAdd(t *testing.T) {
 			stAssignments:   state.ContainerCPUAssignments{},
 			stDefaultCPUSet: cpuset.New(0, 2, 3, 4, 5, 7, 8, 9, 10, 11),
 			pod:             makePod("fakePod", "fakeContainerBug113537_1", "10000m", "10000m"),
-			expErr:          SMTAlignmentError{RequestedCPUs: 10, CpusPerCore: 2, AvailablePhysicalCPUs: 8},
+			expErr:          SMTAlignmentError{RequestedCPUs: 10, CpusPerCore: 2, AvailablePhysicalCPUs: 8, CausedByPhysicalCPUs: true},
 			expCPUAlloc:     false,
 			expCSet:         cpuset.New(),
 		},
@@ -1188,6 +1188,44 @@ func TestStaticPolicyOptions(t *testing.T) {
 	}
 }

+func TestSMTAlignmentErrorText(t *testing.T) {
+	type smtErrTestCase struct {
+		name     string
+		err      SMTAlignmentError
+		expected string
+	}
+
+	testCases := []smtErrTestCase{
+		{
+			name:     "base SMT alignment error",
+			err:      SMTAlignmentError{RequestedCPUs: 15, CpusPerCore: 4},
+			expected: `SMT Alignment Error: requested 15 cpus not multiple cpus per core = 4`,
+		},
+		{
+			// Note the explicit 0. The intent is to signal the lack of physical CPUs, but
+			// in the corner case of no available physical CPUs at all, without the explicit
+			// flag we cannot distinguish the case, and before PR#127959 we printed the old message
+			name:     "base SMT alignment error, no physical CPUs, missing flag",
+			err:      SMTAlignmentError{RequestedCPUs: 4, CpusPerCore: 2, AvailablePhysicalCPUs: 0},
+			expected: `SMT Alignment Error: requested 4 cpus not multiple cpus per core = 2`,
+		},
+		{
+			name:     "base SMT alignment error, no physical CPUs, explicit flag",
+			err:      SMTAlignmentError{RequestedCPUs: 4, CpusPerCore: 2, AvailablePhysicalCPUs: 0, CausedByPhysicalCPUs: true},
+			expected: `SMT Alignment Error: not enough free physical CPUs: available physical CPUs = 0, requested CPUs = 4, CPUs per core = 2`,
+		},
+	}
+
+	for _, testCase := range testCases {
+		t.Run(testCase.name, func(t *testing.T) {
+			got := testCase.err.Error()
+			if got != testCase.expected {
+				t.Errorf("got=%v expected=%v", got, testCase.expected)
+			}
+		})
+	}
+}
+
 func newCPUSetPtr(cpus ...int) *cpuset.CPUSet {
 	ret := cpuset.New(cpus...)
 	return &ret