mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-19 18:02:01 +00:00
node: topologymgr: add metrics about admission requests and errors
Signed-off-by: Swati Sehgal <swsehgal@redhat.com>
This commit is contained in:
parent
5550064bc2
commit
172c55d310
@ -22,6 +22,7 @@ import (
|
|||||||
"k8s.io/kubernetes/pkg/kubelet/cm/admission"
|
"k8s.io/kubernetes/pkg/kubelet/cm/admission"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/cm/containermap"
|
"k8s.io/kubernetes/pkg/kubelet/cm/containermap"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
|
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/metrics"
|
||||||
)
|
)
|
||||||
|
|
||||||
type containerScope struct {
|
type containerScope struct {
|
||||||
@ -54,6 +55,7 @@ func (s *containerScope) Admit(pod *v1.Pod) lifecycle.PodAdmitResult {
|
|||||||
klog.InfoS("Best TopologyHint", "bestHint", bestHint, "pod", klog.KObj(pod), "containerName", container.Name)
|
klog.InfoS("Best TopologyHint", "bestHint", bestHint, "pod", klog.KObj(pod), "containerName", container.Name)
|
||||||
|
|
||||||
if !admit {
|
if !admit {
|
||||||
|
metrics.TopologyManagerAdmissionErrorsTotal.Inc()
|
||||||
return admission.GetPodAdmitResult(&TopologyAffinityError{})
|
return admission.GetPodAdmitResult(&TopologyAffinityError{})
|
||||||
}
|
}
|
||||||
klog.InfoS("Topology Affinity", "bestHint", bestHint, "pod", klog.KObj(pod), "containerName", container.Name)
|
klog.InfoS("Topology Affinity", "bestHint", bestHint, "pod", klog.KObj(pod), "containerName", container.Name)
|
||||||
@ -61,6 +63,7 @@ func (s *containerScope) Admit(pod *v1.Pod) lifecycle.PodAdmitResult {
|
|||||||
|
|
||||||
err := s.allocateAlignedResources(pod, &container)
|
err := s.allocateAlignedResources(pod, &container)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
metrics.TopologyManagerAdmissionErrorsTotal.Inc()
|
||||||
return admission.GetPodAdmitResult(err)
|
return admission.GetPodAdmitResult(err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -22,6 +22,7 @@ import (
|
|||||||
"k8s.io/kubernetes/pkg/kubelet/cm/admission"
|
"k8s.io/kubernetes/pkg/kubelet/cm/admission"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/cm/containermap"
|
"k8s.io/kubernetes/pkg/kubelet/cm/containermap"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
|
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/metrics"
|
||||||
)
|
)
|
||||||
|
|
||||||
type podScope struct {
|
type podScope struct {
|
||||||
@ -52,6 +53,7 @@ func (s *podScope) Admit(pod *v1.Pod) lifecycle.PodAdmitResult {
|
|||||||
bestHint, admit := s.calculateAffinity(pod)
|
bestHint, admit := s.calculateAffinity(pod)
|
||||||
klog.InfoS("Best TopologyHint", "bestHint", bestHint, "pod", klog.KObj(pod))
|
klog.InfoS("Best TopologyHint", "bestHint", bestHint, "pod", klog.KObj(pod))
|
||||||
if !admit {
|
if !admit {
|
||||||
|
metrics.TopologyManagerAdmissionErrorsTotal.Inc()
|
||||||
return admission.GetPodAdmitResult(&TopologyAffinityError{})
|
return admission.GetPodAdmitResult(&TopologyAffinityError{})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -61,6 +63,7 @@ func (s *podScope) Admit(pod *v1.Pod) lifecycle.PodAdmitResult {
|
|||||||
|
|
||||||
err := s.allocateAlignedResources(pod, &container)
|
err := s.allocateAlignedResources(pod, &container)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
metrics.TopologyManagerAdmissionErrorsTotal.Inc()
|
||||||
return admission.GetPodAdmitResult(err)
|
return admission.GetPodAdmitResult(err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -24,6 +24,7 @@ import (
|
|||||||
"k8s.io/klog/v2"
|
"k8s.io/klog/v2"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask"
|
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
|
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/metrics"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@ -208,6 +209,8 @@ func (m *manager) RemoveContainer(containerID string) error {
|
|||||||
|
|
||||||
func (m *manager) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAdmitResult {
|
func (m *manager) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAdmitResult {
|
||||||
klog.InfoS("Topology Admit Handler")
|
klog.InfoS("Topology Admit Handler")
|
||||||
|
|
||||||
|
metrics.TopologyManagerAdmissionRequestsTotal.Inc()
|
||||||
pod := attrs.Pod
|
pod := attrs.Pod
|
||||||
|
|
||||||
return m.scope.Admit(pod)
|
return m.scope.Admit(pod)
|
||||||
|
@ -91,6 +91,10 @@ const (
|
|||||||
CPUManagerPinningRequestsTotalKey = "cpu_manager_pinning_requests_total"
|
CPUManagerPinningRequestsTotalKey = "cpu_manager_pinning_requests_total"
|
||||||
CPUManagerPinningErrorsTotalKey = "cpu_manager_pinning_errors_total"
|
CPUManagerPinningErrorsTotalKey = "cpu_manager_pinning_errors_total"
|
||||||
|
|
||||||
|
// Metrics to track the Topology manager behavior
|
||||||
|
TopologyManagerAdmissionRequestsTotalKey = "topology_manager_admission_requests_total"
|
||||||
|
TopologyManagerAdmissionErrorsTotalKey = "topology_manager_admission_errors_total"
|
||||||
|
|
||||||
// Values used in metric labels
|
// Values used in metric labels
|
||||||
Container = "container"
|
Container = "container"
|
||||||
InitContainer = "init_container"
|
InitContainer = "init_container"
|
||||||
@ -549,6 +553,26 @@ var (
|
|||||||
StabilityLevel: metrics.ALPHA,
|
StabilityLevel: metrics.ALPHA,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// TopologyManagerAdmissionRequestsTotal tracks the number of times the pod spec will cause the topology manager to admit a pod
|
||||||
|
TopologyManagerAdmissionRequestsTotal = metrics.NewCounter(
|
||||||
|
&metrics.CounterOpts{
|
||||||
|
Subsystem: KubeletSubsystem,
|
||||||
|
Name: TopologyManagerAdmissionRequestsTotalKey,
|
||||||
|
Help: "The number of admission requests where resources have to be aligned.",
|
||||||
|
StabilityLevel: metrics.ALPHA,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
// TopologyManagerAdmissionErrorsTotal tracks the number of times the pod spec required the topology manager to admit a pod, but the admission failed
|
||||||
|
TopologyManagerAdmissionErrorsTotal = metrics.NewCounter(
|
||||||
|
&metrics.CounterOpts{
|
||||||
|
Subsystem: KubeletSubsystem,
|
||||||
|
Name: TopologyManagerAdmissionErrorsTotalKey,
|
||||||
|
Help: "The number of admission request failures where resources could not be aligned.",
|
||||||
|
StabilityLevel: metrics.ALPHA,
|
||||||
|
},
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
var registerMetrics sync.Once
|
var registerMetrics sync.Once
|
||||||
@ -600,6 +624,8 @@ func Register(collectors ...metrics.StableCollector) {
|
|||||||
legacyregistry.MustRegister(RunPodSandboxErrors)
|
legacyregistry.MustRegister(RunPodSandboxErrors)
|
||||||
legacyregistry.MustRegister(CPUManagerPinningRequestsTotal)
|
legacyregistry.MustRegister(CPUManagerPinningRequestsTotal)
|
||||||
legacyregistry.MustRegister(CPUManagerPinningErrorsTotal)
|
legacyregistry.MustRegister(CPUManagerPinningErrorsTotal)
|
||||||
|
legacyregistry.MustRegister(TopologyManagerAdmissionRequestsTotal)
|
||||||
|
legacyregistry.MustRegister(TopologyManagerAdmissionErrorsTotal)
|
||||||
|
|
||||||
for _, collector := range collectors {
|
for _, collector := range collectors {
|
||||||
legacyregistry.CustomMustRegister(collector)
|
legacyregistry.CustomMustRegister(collector)
|
||||||
|
Loading…
Reference in New Issue
Block a user