Merge pull request #127155 from ffromani/alignment-metrics

node: metrics: add resource alignment metrics
2025-07-31 07:20:13 +00:00 · 2024-10-23 09:54:58 +01:00 · 2024-10-23 09:54:58 +01:00 · c6669ea7d6
commit c6669ea7d6
parent 403fcab520 c025861e0c
8 changed files with 96 additions and 2 deletions
--- a/pkg/kubelet/cm/cpumanager/policy_static.go
+++ b/pkg/kubelet/cm/cpumanager/policy_static.go
@ -312,7 +312,13 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai
 	defer func() {
 		if rerr != nil {
 			metrics.CPUManagerPinningErrorsTotal.Inc()
+			return
 		}
+		if !p.options.FullPhysicalCPUsOnly {
+			// increment only if we know we allocate aligned resources
+			return
+		}
+		metrics.ContainerAlignedComputeResources.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedPhysicalCPU).Inc()
 	}()

 	if p.options.FullPhysicalCPUsOnly {
--- a/pkg/kubelet/cm/topologymanager/policy.go
+++ b/pkg/kubelet/cm/topologymanager/policy.go
@ -30,6 +30,14 @@ type Policy interface {
 	Merge(providersHints []map[string][]TopologyHint) (TopologyHint, bool)
 }

+// IsAlignmentGuaranteed return true if the given policy guarantees that either
+// the compute resources will be allocated within a NUMA boundary, or the allocation will fail at all.
+func IsAlignmentGuaranteed(p Policy) bool {
+	// We are abusing the name, but atm this matches almost 1:1 the policy name
+	// so we are not adding new fields for now.
+	return p.Name() == PolicySingleNumaNode
+}
+
 // Merge a TopologyHints permutation to a single hint by performing a bitwise-AND
 // of their affinity masks. The hint shall be preferred if all hits in the permutation
 // are preferred.
--- a/pkg/kubelet/cm/topologymanager/scope_container.go
+++ b/pkg/kubelet/cm/topologymanager/scope_container.go
@ -61,6 +61,10 @@ func (s *containerScope) Admit(pod *v1.Pod) lifecycle.PodAdmitResult {
 			metrics.TopologyManagerAdmissionErrorsTotal.Inc()
 			return admission.GetPodAdmitResult(err)
 		}
+
+		if IsAlignmentGuaranteed(s.policy) {
+			metrics.ContainerAlignedComputeResources.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedNUMANode).Inc()
+		}
 	}
 	return admission.GetPodAdmitResult(nil)
 }
--- a/pkg/kubelet/cm/topologymanager/scope_pod.go
+++ b/pkg/kubelet/cm/topologymanager/scope_pod.go
@ -62,6 +62,10 @@ func (s *podScope) Admit(pod *v1.Pod) lifecycle.PodAdmitResult {
 			return admission.GetPodAdmitResult(err)
 		}
 	}
+	if IsAlignmentGuaranteed(s.policy) {
+		// increment only if we know we allocate aligned resources.
+		metrics.ContainerAlignedComputeResources.WithLabelValues(metrics.AlignScopePod, metrics.AlignedNUMANode).Inc()
+	}
 	return admission.GetPodAdmitResult(nil)
 }

--- a/pkg/kubelet/metrics/metrics.go
+++ b/pkg/kubelet/metrics/metrics.go
@ -127,10 +127,21 @@ const (
 	// Metric for tracking garbage collected images
 	ImageGarbageCollectedTotalKey = "image_garbage_collected_total"

+	// Metric for tracking aligment of compute resources
+	ContainerAlignedComputeResourcesNameKey          = "container_aligned_compute_resources_count"
+	ContainerAlignedComputeResourcesScopeLabelKey    = "scope"
+	ContainerAlignedComputeResourcesBoundaryLabelKey = "boundary"
+
 	// Values used in metric labels
 	Container          = "container"
 	InitContainer      = "init_container"
 	EphemeralContainer = "ephemeral_container"
+
+	AlignScopePod       = "pod"
+	AlignScopeContainer = "container"
+
+	AlignedPhysicalCPU = "physical_cpu"
+	AlignedNUMANode    = "numa_node"
 )

 type imageSizeBucket struct {
@ -762,6 +773,16 @@ var (
 		},
 	)

+	ContainerAlignedComputeResources = metrics.NewCounterVec(
+		&metrics.CounterOpts{
+			Subsystem:      KubeletSubsystem,
+			Name:           ContainerAlignedComputeResourcesNameKey,
+			Help:           "Cumulative number of aligned compute resources allocated to containers by alignment type.",
+			StabilityLevel: metrics.ALPHA,
+		},
+		[]string{ContainerAlignedComputeResourcesScopeLabelKey, ContainerAlignedComputeResourcesBoundaryLabelKey},
+	)
+
 	// MemoryManagerPinningRequestTotal tracks the number of times the pod spec required the memory manager to pin memory pages
 	MemoryManagerPinningRequestTotal = metrics.NewCounter(
 		&metrics.CounterOpts{
@ -985,6 +1006,7 @@ func Register(collectors ...metrics.StableCollector) {
 		legacyregistry.MustRegister(RunPodSandboxErrors)
 		legacyregistry.MustRegister(CPUManagerPinningRequestsTotal)
 		legacyregistry.MustRegister(CPUManagerPinningErrorsTotal)
+		legacyregistry.MustRegister(ContainerAlignedComputeResources)
 		if utilfeature.DefaultFeatureGate.Enabled(features.MemoryManager) {
 			legacyregistry.MustRegister(MemoryManagerPinningRequestTotal)
 			legacyregistry.MustRegister(MemoryManagerPinningErrorsTotal)
--- a/test/e2e_node/cpu_manager_metrics_test.go
+++ b/test/e2e_node/cpu_manager_metrics_test.go
@ -91,6 +91,7 @@ var _ = SIGDescribe("CPU Manager Metrics", framework.WithSerial(), feature.CPUMa
 		ginkgo.AfterEach(func(ctx context.Context) {
 			if testPod != nil {
 				deletePodSyncByName(ctx, f, testPod.Name)
+				waitForContainerRemoval(ctx, testPod.Spec.Containers[0].Name, testPod.Name, testPod.Namespace)
 			}
 			updateKubeletConfig(ctx, f, oldCfg, true)
 		})
@ -160,11 +161,32 @@ var _ = SIGDescribe("CPU Manager Metrics", framework.WithSerial(), feature.CPUMa
 			ginkgo.By("Ensuring the metrics match the expectations a few more times")
 			gomega.Consistently(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)
 		})
+
+		ginkgo.It("should return updated alignment counters when pod successfully run", func(ctx context.Context) {
+			ginkgo.By("Creating the test pod")
+			testPod = e2epod.NewPodClient(f).Create(ctx, makeGuaranteedCPUExclusiveSleeperPod("count-align-smt-ok", smtLevel))
+
+			// we updated the kubelet config in BeforeEach, so we can assume we start fresh.
+			// being [Serial], we can also assume noone else but us is running pods.
+			ginkgo.By("Checking the cpumanager metrics right after the kubelet restart, with pod should be admitted")
+
+			idFn := makeCustomPairID("scope", "boundary")
+			matchAlignmentMetrics := gstruct.MatchKeys(gstruct.IgnoreExtras, gstruct.Keys{
+				"kubelet_container_aligned_compute_resources_count": gstruct.MatchElements(idFn, gstruct.IgnoreExtras, gstruct.Elements{
+					"container::physical_cpu": timelessSample(1),
+				}),
+			})
+
+			ginkgo.By("Giving the Kubelet time to update the alignment metrics")
+			gomega.Eventually(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchAlignmentMetrics)
+			ginkgo.By("Ensuring the metrics match the expectations about alignment metrics a few more times")
+			gomega.Consistently(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchAlignmentMetrics)
+		})
 	})
 })

 func getKubeletMetrics(ctx context.Context) (e2emetrics.KubeletMetrics, error) {
-	ginkgo.By("getting Kubelet metrics from the metrics API")
+	ginkgo.By("Getting Kubelet metrics from the metrics API")
 	return e2emetrics.GrabKubeletMetricsWithoutProxy(ctx, nodeNameOrIP()+":10255", "/metrics")
 }

@ -189,7 +211,7 @@ func makeGuaranteedCPUExclusiveSleeperPod(name string, cpus int) *v1.Pod {
 							v1.ResourceMemory: resource.MustParse("64Mi"),
 						},
 					},
-					Command: []string{"sh", "-c", "sleep", "1d"},
+					Command: []string{"sh", "-c", "sleep 1d"},
 				},
 			},
 		},
--- a/test/e2e_node/resource_metrics_test.go
+++ b/test/e2e_node/resource_metrics_test.go
@ -157,6 +157,13 @@ func containerID(element interface{}) string {
 	return fmt.Sprintf("%s::%s::%s", el.Metric["namespace"], el.Metric["pod"], el.Metric["container"])
 }

+func makeCustomPairID(pri, sec string) func(interface{}) string {
+	return func(element interface{}) string {
+		el := element.(*model.Sample)
+		return fmt.Sprintf("%s::%s", el.Metric[model.LabelName(pri)], el.Metric[model.LabelName(sec)])
+	}
+}
+
 func boundedSample(lower, upper interface{}) types.GomegaMatcher {
 	return gstruct.PointTo(gstruct.MatchAllFields(gstruct.Fields{
 		// We already check Metric when matching the Id
--- a/test/e2e_node/topology_manager_metrics_test.go
+++ b/test/e2e_node/topology_manager_metrics_test.go
@ -28,6 +28,7 @@ import (
 	v1 "k8s.io/api/core/v1"
 	kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
 	"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
+	"k8s.io/kubernetes/pkg/kubelet/metrics"
 	"k8s.io/kubernetes/test/e2e/feature"
 	"k8s.io/kubernetes/test/e2e/framework"
 	e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
@ -152,6 +153,26 @@ var _ = SIGDescribe("Topology Manager Metrics", framework.WithSerial(), feature.
 			ginkgo.By("Ensuring the metrics match the expectations a few more times")
 			gomega.Consistently(ctx, getKubeletMetrics, 2*time.Minute, 10*time.Second).Should(matchResourceMetrics)
 		})
+
+		ginkgo.It("[alignment] should return updated alignment counters when pod successfully run", func(ctx context.Context) {
+			ginkgo.By("Creating the test pod")
+			testPod = e2epod.NewPodClient(f).Create(ctx, makeGuaranteedCPUExclusiveSleeperPod("count-align-numa-ok", cpusNumPerNUMA))
+
+			// we updated the kubelet config in BeforeEach, so we can assume we start fresh.
+			// being [Serial], we can also assume noone else but us is running pods.
+			ginkgo.By("Checking the cpumanager metrics right after the kubelet restart, with pod should be admitted")
+
+			matchAlignmentMetrics := gstruct.MatchKeys(gstruct.IgnoreExtras, gstruct.Keys{
+				"kubelet_container_aligned_compute_resources_count": gstruct.MatchAllElements(nodeID, gstruct.Elements{
+					metrics.AlignedNUMANode: timelessSample(1),
+				}),
+			})
+
+			ginkgo.By("Giving the Kubelet time to update the alignment metrics")
+			gomega.Eventually(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchAlignmentMetrics)
+			ginkgo.By("Ensuring the metrics match the expectations about alignment metrics a few more times")
+			gomega.Consistently(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchAlignmentMetrics)
+		})
 	})
 })