Merge pull request #115137 from swatisehgal/topologymgr-metrics

node: topologymgr: add metrics about admission requests and errors
2025-09-08 04:32:37 +00:00 · 2023-01-30 18:43:00 -08:00
parent 3244ebd532 340db7109d
commit 4df945853e
6 changed files with 202 additions and 9 deletions
--- a/pkg/kubelet/cm/topologymanager/scope_container.go
+++ b/pkg/kubelet/cm/topologymanager/scope_container.go
@@ -22,6 +22,7 @@ import (
 	"k8s.io/kubernetes/pkg/kubelet/cm/admission"
 	"k8s.io/kubernetes/pkg/kubelet/cm/containermap"
 	"k8s.io/kubernetes/pkg/kubelet/lifecycle"
+	"k8s.io/kubernetes/pkg/kubelet/metrics"
 )

 type containerScope struct {
@@ -54,6 +55,7 @@ func (s *containerScope) Admit(pod *v1.Pod) lifecycle.PodAdmitResult {
 		klog.InfoS("Best TopologyHint", "bestHint", bestHint, "pod", klog.KObj(pod), "containerName", container.Name)

 		if !admit {
+			metrics.TopologyManagerAdmissionErrorsTotal.Inc()
 			return admission.GetPodAdmitResult(&TopologyAffinityError{})
 		}
 		klog.InfoS("Topology Affinity", "bestHint", bestHint, "pod", klog.KObj(pod), "containerName", container.Name)
@@ -61,6 +63,7 @@ func (s *containerScope) Admit(pod *v1.Pod) lifecycle.PodAdmitResult {

 		err := s.allocateAlignedResources(pod, &container)
 		if err != nil {
+			metrics.TopologyManagerAdmissionErrorsTotal.Inc()
 			return admission.GetPodAdmitResult(err)
 		}
 	}
--- a/pkg/kubelet/cm/topologymanager/scope_pod.go
+++ b/pkg/kubelet/cm/topologymanager/scope_pod.go
@@ -22,6 +22,7 @@ import (
 	"k8s.io/kubernetes/pkg/kubelet/cm/admission"
 	"k8s.io/kubernetes/pkg/kubelet/cm/containermap"
 	"k8s.io/kubernetes/pkg/kubelet/lifecycle"
+	"k8s.io/kubernetes/pkg/kubelet/metrics"
 )

 type podScope struct {
@@ -52,6 +53,7 @@ func (s *podScope) Admit(pod *v1.Pod) lifecycle.PodAdmitResult {
 	bestHint, admit := s.calculateAffinity(pod)
 	klog.InfoS("Best TopologyHint", "bestHint", bestHint, "pod", klog.KObj(pod))
 	if !admit {
+		metrics.TopologyManagerAdmissionErrorsTotal.Inc()
 		return admission.GetPodAdmitResult(&TopologyAffinityError{})
 	}

@@ -61,6 +63,7 @@ func (s *podScope) Admit(pod *v1.Pod) lifecycle.PodAdmitResult {

 		err := s.allocateAlignedResources(pod, &container)
 		if err != nil {
+			metrics.TopologyManagerAdmissionErrorsTotal.Inc()
 			return admission.GetPodAdmitResult(err)
 		}
 	}
--- a/pkg/kubelet/cm/topologymanager/topology_manager.go
+++ b/pkg/kubelet/cm/topologymanager/topology_manager.go
@@ -24,6 +24,7 @@ import (
 	"k8s.io/klog/v2"
 	"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask"
 	"k8s.io/kubernetes/pkg/kubelet/lifecycle"
+	"k8s.io/kubernetes/pkg/kubelet/metrics"
 )

 const (
@@ -208,6 +209,8 @@ func (m *manager) RemoveContainer(containerID string) error {

 func (m *manager) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAdmitResult {
 	klog.InfoS("Topology Admit Handler")
+
+	metrics.TopologyManagerAdmissionRequestsTotal.Inc()
 	pod := attrs.Pod

 	return m.scope.Admit(pod)
--- a/pkg/kubelet/metrics/metrics.go
+++ b/pkg/kubelet/metrics/metrics.go
@@ -91,6 +91,10 @@ const (
 	CPUManagerPinningRequestsTotalKey = "cpu_manager_pinning_requests_total"
 	CPUManagerPinningErrorsTotalKey   = "cpu_manager_pinning_errors_total"

+	// Metrics to track the Topology manager behavior
+	TopologyManagerAdmissionRequestsTotalKey = "topology_manager_admission_requests_total"
+	TopologyManagerAdmissionErrorsTotalKey   = "topology_manager_admission_errors_total"
+
 	// Values used in metric labels
 	Container          = "container"
 	InitContainer      = "init_container"
@@ -549,6 +553,26 @@ var (
 			StabilityLevel: metrics.ALPHA,
 		},
 	)
+
+	// TopologyManagerAdmissionRequestsTotal tracks the number of times the pod spec will cause the topology manager to admit a pod
+	TopologyManagerAdmissionRequestsTotal = metrics.NewCounter(
+		&metrics.CounterOpts{
+			Subsystem:      KubeletSubsystem,
+			Name:           TopologyManagerAdmissionRequestsTotalKey,
+			Help:           "The number of admission requests where resources have to be aligned.",
+			StabilityLevel: metrics.ALPHA,
+		},
+	)
+
+	// TopologyManagerAdmissionErrorsTotal tracks the number of times the pod spec required the topology manager to admit a pod, but the admission failed
+	TopologyManagerAdmissionErrorsTotal = metrics.NewCounter(
+		&metrics.CounterOpts{
+			Subsystem:      KubeletSubsystem,
+			Name:           TopologyManagerAdmissionErrorsTotalKey,
+			Help:           "The number of admission request failures where resources could not be aligned.",
+			StabilityLevel: metrics.ALPHA,
+		},
+	)
 )

 var registerMetrics sync.Once
@@ -600,6 +624,8 @@ func Register(collectors ...metrics.StableCollector) {
 		legacyregistry.MustRegister(RunPodSandboxErrors)
 		legacyregistry.MustRegister(CPUManagerPinningRequestsTotal)
 		legacyregistry.MustRegister(CPUManagerPinningErrorsTotal)
+		legacyregistry.MustRegister(TopologyManagerAdmissionRequestsTotal)
+		legacyregistry.MustRegister(TopologyManagerAdmissionErrorsTotal)

 		for _, collector := range collectors {
 			legacyregistry.CustomMustRegister(collector)
--- a/test/e2e_node/cpu_manager_metrics_test.go
+++ b/test/e2e_node/cpu_manager_metrics_test.go
@@ -109,9 +109,9 @@ var _ = SIGDescribe("CPU Manager Metrics [Serial][Feature:CPUManager]", func() {
 			})

 			ginkgo.By("Giving the Kubelet time to start up and produce metrics")
-			gomega.Eventually(ctx, getCPUManagerMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)
+			gomega.Eventually(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)
 			ginkgo.By("Ensuring the metrics match the expectations a few more times")
-			gomega.Consistently(ctx, getCPUManagerMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)
+			gomega.Consistently(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)
 		})

 		ginkgo.It("should report pinning failures when the cpumanager allocation is known to fail", func(ctx context.Context) {
@@ -132,9 +132,9 @@ var _ = SIGDescribe("CPU Manager Metrics [Serial][Feature:CPUManager]", func() {
 			})

 			ginkgo.By("Giving the Kubelet time to start up and produce metrics")
-			gomega.Eventually(ctx, getCPUManagerMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)
+			gomega.Eventually(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)
 			ginkgo.By("Ensuring the metrics match the expectations a few more times")
-			gomega.Consistently(ctx, getCPUManagerMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)
+			gomega.Consistently(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)
 		})

 		ginkgo.It("should not report any pinning failures when the cpumanager allocation is expected to succeed", func(ctx context.Context) {
@@ -155,16 +155,15 @@ var _ = SIGDescribe("CPU Manager Metrics [Serial][Feature:CPUManager]", func() {
 			})

 			ginkgo.By("Giving the Kubelet time to start up and produce metrics")
-			gomega.Eventually(ctx, getCPUManagerMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)
+			gomega.Eventually(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)
 			ginkgo.By("Ensuring the metrics match the expectations a few more times")
-			gomega.Consistently(ctx, getCPUManagerMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)
+			gomega.Consistently(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)
 		})
 	})
 })

-func getCPUManagerMetrics(ctx context.Context) (e2emetrics.KubeletMetrics, error) {
-	// we are running out of good names, so we need to be unnecessarily specific to avoid clashes
-	ginkgo.By("getting CPU Manager metrics from the metrics API")
+func getKubeletMetrics(ctx context.Context) (e2emetrics.KubeletMetrics, error) {
+	ginkgo.By("getting Kubelet metrics from the metrics API")
 	return e2emetrics.GrabKubeletMetricsWithoutProxy(ctx, framework.TestContext.NodeName+":10255", "/metrics")
 }

--- a/test/e2e_node/topology_manager_metrics_test.go
+++ b/test/e2e_node/topology_manager_metrics_test.go
@@ -0,0 +1,159 @@
+/*
+Copyright 2023 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package e2enode
+
+import (
+	"context"
+	"time"
+
+	"github.com/onsi/ginkgo/v2"
+	"github.com/onsi/gomega"
+	"github.com/onsi/gomega/gstruct"
+
+	v1 "k8s.io/api/core/v1"
+	kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
+	"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
+	"k8s.io/kubernetes/test/e2e/framework"
+	e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
+	e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
+	admissionapi "k8s.io/pod-security-admission/api"
+)
+
+var _ = SIGDescribe("Topology Manager Metrics [Serial][Feature:TopologyManager]", func() {
+	f := framework.NewDefaultFramework("topologymanager-metrics")
+	f.NamespacePodSecurityEnforceLevel = admissionapi.LevelPrivileged
+
+	ginkgo.Context("when querying /metrics", func() {
+		var oldCfg *kubeletconfig.KubeletConfiguration
+		var testPod *v1.Pod
+		var cpusNumPerNUMA, numaNodes int
+
+		ginkgo.BeforeEach(func(ctx context.Context) {
+			var err error
+			if oldCfg == nil {
+				oldCfg, err = getCurrentKubeletConfig(ctx)
+				framework.ExpectNoError(err)
+			}
+
+			numaNodes, cpusNumPerNUMA = hostCheck()
+
+			// It is safe to assume that the CPUs are distributed equally across
+			// NUMA nodes and therefore number of CPUs on all NUMA nodes are same
+			// so we just check the CPUs on the first NUMA node
+
+			framework.Logf("numaNodes on the system %d", numaNodes)
+			framework.Logf("CPUs per NUMA on the system %d", cpusNumPerNUMA)
+
+			policy := topologymanager.PolicySingleNumaNode
+			scope := podScopeTopology
+
+			newCfg, _ := configureTopologyManagerInKubelet(oldCfg, policy, scope, nil, 0)
+			updateKubeletConfig(ctx, f, newCfg, true)
+
+		})
+
+		ginkgo.AfterEach(func(ctx context.Context) {
+			if testPod != nil {
+				deletePodSyncByName(ctx, f, testPod.Name)
+			}
+			updateKubeletConfig(ctx, f, oldCfg, true)
+		})
+
+		ginkgo.It("should report zero admission counters after a fresh restart", func(ctx context.Context) {
+			// we updated the kubelet config in BeforeEach, so we can assume we start fresh.
+			// being [Serial], we can also assume noone else but us is running pods.
+			ginkgo.By("Checking the topologymanager metrics right after the kubelet restart, with no pods running")
+
+			matchResourceMetrics := gstruct.MatchKeys(gstruct.IgnoreExtras, gstruct.Keys{
+				"kubelet_topology_manager_admission_requests_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{
+					"": timelessSample(0),
+				}),
+				"kubelet_topology_manager_admission_errors_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{
+					"": timelessSample(0),
+				}),
+			})
+
+			ginkgo.By("Giving the Kubelet time to start up and produce metrics")
+			gomega.Eventually(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)
+			ginkgo.By("Ensuring the metrics match the expectations a few more times")
+			gomega.Consistently(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)
+		})
+
+		ginkgo.It("should report admission failures when the topology manager alignment is known to fail", func(ctx context.Context) {
+			ginkgo.By("Creating the test pod which will be rejected for TopologyAffinity")
+			testPod = e2epod.NewPodClient(f).Create(ctx, makeGuaranteedCPUExclusiveSleeperPod("topology-affinity-err", cpusNumPerNUMA+1))
+
+			// we updated the kubelet config in BeforeEach, so we can assume we start fresh.
+			// being [Serial], we can also assume noone else but us is running pods.
+			ginkgo.By("Checking the topologymanager metrics right after the kubelet restart, with pod failed to admit")
+
+			matchResourceMetrics := gstruct.MatchKeys(gstruct.IgnoreExtras, gstruct.Keys{
+				"kubelet_topology_manager_admission_requests_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{
+					"": timelessSample(1),
+				}),
+				"kubelet_topology_manager_admission_errors_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{
+					"": timelessSample(1),
+				}),
+			})
+
+			ginkgo.By("Giving the Kubelet time to start up and produce metrics")
+			gomega.Eventually(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)
+			ginkgo.By("Ensuring the metrics match the expectations a few more times")
+			gomega.Consistently(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)
+		})
+
+		ginkgo.It("should not report any admission failures when the topology manager alignment is expected to succeed", func(ctx context.Context) {
+			ginkgo.By("Creating the test pod")
+			testPod = e2epod.NewPodClient(f).Create(ctx, makeGuaranteedCPUExclusiveSleeperPod("topology-alignment-ok", cpusNumPerNUMA))
+
+			// we updated the kubelet config in BeforeEach, so we can assume we start fresh.
+			// being [Serial], we can also assume noone else but us is running pods.
+			ginkgo.By("Checking the cpumanager metrics right after the kubelet restart, with pod should be admitted")
+
+			matchResourceMetrics := gstruct.MatchKeys(gstruct.IgnoreExtras, gstruct.Keys{
+				"kubelet_topology_manager_admission_requests_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{
+					"": timelessSample(1),
+				}),
+				"kubelet_topology_manager_admission_errors_total": gstruct.MatchAllElements(nodeID, gstruct.Elements{
+					"": timelessSample(0),
+				}),
+			})
+
+			ginkgo.By("Giving the Kubelet time to start up and produce metrics")
+			gomega.Eventually(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)
+			ginkgo.By("Ensuring the metrics match the expectations a few more times")
+			gomega.Consistently(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchResourceMetrics)
+		})
+	})
+})
+
+func hostCheck() (int, int) {
+	// this is a very rough check. We just want to rule out system that does NOT have
+	// multi-NUMA nodes or at least 4 cores
+
+	numaNodes := detectNUMANodes()
+	if numaNodes < minNumaNodes {
+		e2eskipper.Skipf("this test is intended to be run on a multi-node NUMA system")
+	}
+
+	coreCount := detectCoresPerSocket()
+	if coreCount < minCoreCount {
+		e2eskipper.Skipf("this test is intended to be run on a system with at least %d cores per socket", minCoreCount)
+	}
+
+	return numaNodes, coreCount
+}