node: topologymgr: add metrics about admission requests and errors

Signed-off-by: Swati Sehgal <swsehgal@redhat.com>
This commit is contained in:
Swati Sehgal 2023-01-17 17:39:21 +00:00
parent 5550064bc2
commit 172c55d310
4 changed files with 35 additions and 0 deletions

View File

@ -22,6 +22,7 @@ import (
"k8s.io/kubernetes/pkg/kubelet/cm/admission" "k8s.io/kubernetes/pkg/kubelet/cm/admission"
"k8s.io/kubernetes/pkg/kubelet/cm/containermap" "k8s.io/kubernetes/pkg/kubelet/cm/containermap"
"k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/kubernetes/pkg/kubelet/lifecycle"
"k8s.io/kubernetes/pkg/kubelet/metrics"
) )
type containerScope struct { type containerScope struct {
@ -54,6 +55,7 @@ func (s *containerScope) Admit(pod *v1.Pod) lifecycle.PodAdmitResult {
klog.InfoS("Best TopologyHint", "bestHint", bestHint, "pod", klog.KObj(pod), "containerName", container.Name) klog.InfoS("Best TopologyHint", "bestHint", bestHint, "pod", klog.KObj(pod), "containerName", container.Name)
if !admit { if !admit {
metrics.TopologyManagerAdmissionErrorsTotal.Inc()
return admission.GetPodAdmitResult(&TopologyAffinityError{}) return admission.GetPodAdmitResult(&TopologyAffinityError{})
} }
klog.InfoS("Topology Affinity", "bestHint", bestHint, "pod", klog.KObj(pod), "containerName", container.Name) klog.InfoS("Topology Affinity", "bestHint", bestHint, "pod", klog.KObj(pod), "containerName", container.Name)
@ -61,6 +63,7 @@ func (s *containerScope) Admit(pod *v1.Pod) lifecycle.PodAdmitResult {
err := s.allocateAlignedResources(pod, &container) err := s.allocateAlignedResources(pod, &container)
if err != nil { if err != nil {
metrics.TopologyManagerAdmissionErrorsTotal.Inc()
return admission.GetPodAdmitResult(err) return admission.GetPodAdmitResult(err)
} }
} }

View File

@ -22,6 +22,7 @@ import (
"k8s.io/kubernetes/pkg/kubelet/cm/admission" "k8s.io/kubernetes/pkg/kubelet/cm/admission"
"k8s.io/kubernetes/pkg/kubelet/cm/containermap" "k8s.io/kubernetes/pkg/kubelet/cm/containermap"
"k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/kubernetes/pkg/kubelet/lifecycle"
"k8s.io/kubernetes/pkg/kubelet/metrics"
) )
type podScope struct { type podScope struct {
@ -52,6 +53,7 @@ func (s *podScope) Admit(pod *v1.Pod) lifecycle.PodAdmitResult {
bestHint, admit := s.calculateAffinity(pod) bestHint, admit := s.calculateAffinity(pod)
klog.InfoS("Best TopologyHint", "bestHint", bestHint, "pod", klog.KObj(pod)) klog.InfoS("Best TopologyHint", "bestHint", bestHint, "pod", klog.KObj(pod))
if !admit { if !admit {
metrics.TopologyManagerAdmissionErrorsTotal.Inc()
return admission.GetPodAdmitResult(&TopologyAffinityError{}) return admission.GetPodAdmitResult(&TopologyAffinityError{})
} }
@ -61,6 +63,7 @@ func (s *podScope) Admit(pod *v1.Pod) lifecycle.PodAdmitResult {
err := s.allocateAlignedResources(pod, &container) err := s.allocateAlignedResources(pod, &container)
if err != nil { if err != nil {
metrics.TopologyManagerAdmissionErrorsTotal.Inc()
return admission.GetPodAdmitResult(err) return admission.GetPodAdmitResult(err)
} }
} }

View File

@ -24,6 +24,7 @@ import (
"k8s.io/klog/v2" "k8s.io/klog/v2"
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask"
"k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/kubernetes/pkg/kubelet/lifecycle"
"k8s.io/kubernetes/pkg/kubelet/metrics"
) )
const ( const (
@ -208,6 +209,8 @@ func (m *manager) RemoveContainer(containerID string) error {
func (m *manager) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAdmitResult { func (m *manager) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAdmitResult {
klog.InfoS("Topology Admit Handler") klog.InfoS("Topology Admit Handler")
metrics.TopologyManagerAdmissionRequestsTotal.Inc()
pod := attrs.Pod pod := attrs.Pod
return m.scope.Admit(pod) return m.scope.Admit(pod)

View File

@ -91,6 +91,10 @@ const (
CPUManagerPinningRequestsTotalKey = "cpu_manager_pinning_requests_total" CPUManagerPinningRequestsTotalKey = "cpu_manager_pinning_requests_total"
CPUManagerPinningErrorsTotalKey = "cpu_manager_pinning_errors_total" CPUManagerPinningErrorsTotalKey = "cpu_manager_pinning_errors_total"
// Metrics to track the Topology manager behavior
TopologyManagerAdmissionRequestsTotalKey = "topology_manager_admission_requests_total"
TopologyManagerAdmissionErrorsTotalKey = "topology_manager_admission_errors_total"
// Values used in metric labels // Values used in metric labels
Container = "container" Container = "container"
InitContainer = "init_container" InitContainer = "init_container"
@ -549,6 +553,26 @@ var (
StabilityLevel: metrics.ALPHA, StabilityLevel: metrics.ALPHA,
}, },
) )
// TopologyManagerAdmissionRequestsTotal tracks the number of times the pod spec will cause the topology manager to admit a pod
TopologyManagerAdmissionRequestsTotal = metrics.NewCounter(
&metrics.CounterOpts{
Subsystem: KubeletSubsystem,
Name: TopologyManagerAdmissionRequestsTotalKey,
Help: "The number of admission requests where resources have to be aligned.",
StabilityLevel: metrics.ALPHA,
},
)
// TopologyManagerAdmissionErrorsTotal tracks the number of times the pod spec required the topology manager to admit a pod, but the admission failed
TopologyManagerAdmissionErrorsTotal = metrics.NewCounter(
&metrics.CounterOpts{
Subsystem: KubeletSubsystem,
Name: TopologyManagerAdmissionErrorsTotalKey,
Help: "The number of admission request failures where resources could not be aligned.",
StabilityLevel: metrics.ALPHA,
},
)
) )
var registerMetrics sync.Once var registerMetrics sync.Once
@ -600,6 +624,8 @@ func Register(collectors ...metrics.StableCollector) {
legacyregistry.MustRegister(RunPodSandboxErrors) legacyregistry.MustRegister(RunPodSandboxErrors)
legacyregistry.MustRegister(CPUManagerPinningRequestsTotal) legacyregistry.MustRegister(CPUManagerPinningRequestsTotal)
legacyregistry.MustRegister(CPUManagerPinningErrorsTotal) legacyregistry.MustRegister(CPUManagerPinningErrorsTotal)
legacyregistry.MustRegister(TopologyManagerAdmissionRequestsTotal)
legacyregistry.MustRegister(TopologyManagerAdmissionErrorsTotal)
for _, collector := range collectors { for _, collector := range collectors {
legacyregistry.CustomMustRegister(collector) legacyregistry.CustomMustRegister(collector)