diff --git a/pkg/kubelet/cm/memorymanager/policy_static.go b/pkg/kubelet/cm/memorymanager/policy_static.go index f6591749156..130b62ab637 100644 --- a/pkg/kubelet/cm/memorymanager/policy_static.go +++ b/pkg/kubelet/cm/memorymanager/policy_static.go @@ -34,6 +34,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask" + "k8s.io/kubernetes/pkg/kubelet/metrics" "k8s.io/kubernetes/pkg/kubelet/types" ) @@ -95,7 +96,7 @@ func (p *staticPolicy) Start(s state.State) error { } // Allocate call is idempotent -func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Container) error { +func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Container) (rerr error) { // allocate the memory only for guaranteed pods if v1qos.GetPodQOS(pod) != v1.PodQOSGuaranteed { return nil @@ -103,6 +104,13 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai podUID := string(pod.UID) klog.InfoS("Allocate", "pod", klog.KObj(pod), "containerName", container.Name) + // container belongs in an exclusively allocated pool + metrics.MemoryManagerPinningRequestTotal.Inc() + defer func() { + if rerr != nil { + metrics.MemoryManagerPinningErrorsTotal.Inc() + } + }() if blocks := s.GetMemoryBlocks(podUID, container.Name); blocks != nil { p.updatePodReusableMemory(pod, container, blocks) diff --git a/pkg/kubelet/metrics/metrics.go b/pkg/kubelet/metrics/metrics.go index c79e6c9bf9a..64e08e54022 100644 --- a/pkg/kubelet/metrics/metrics.go +++ b/pkg/kubelet/metrics/metrics.go @@ -108,6 +108,10 @@ const ( CPUManagerPinningRequestsTotalKey = "cpu_manager_pinning_requests_total" CPUManagerPinningErrorsTotalKey = "cpu_manager_pinning_errors_total" + // Metrics to track the Memory manager behavior + MemoryManagerPinningRequestsTotalKey = "memory_manager_pinning_requests_total" + MemoryManagerPinningErrorsTotalKey = "memory_manager_pinning_errors_total" + // Metrics to track the Topology manager behavior TopologyManagerAdmissionRequestsTotalKey = "topology_manager_admission_requests_total" TopologyManagerAdmissionErrorsTotalKey = "topology_manager_admission_errors_total" @@ -719,6 +723,25 @@ var ( }, ) + // MemoryManagerPinningRequestTotal tracks the number of times the pod spec required the memory manager to pin memory pages + MemoryManagerPinningRequestTotal = metrics.NewCounter( + &metrics.CounterOpts{ + Subsystem: KubeletSubsystem, + Name: MemoryManagerPinningRequestsTotalKey, + Help: "The number of memory pages allocations which required pinning.", + StabilityLevel: metrics.ALPHA, + }) + + // MemoryManagerPinningErrorsTotal tracks the number of times the pod spec required the memory manager to pin memory pages, but the allocation failed + MemoryManagerPinningErrorsTotal = metrics.NewCounter( + &metrics.CounterOpts{ + Subsystem: KubeletSubsystem, + Name: MemoryManagerPinningErrorsTotalKey, + Help: "The number of memory pages allocations which required pinning that failed.", + StabilityLevel: metrics.ALPHA, + }, + ) + // TopologyManagerAdmissionRequestsTotal tracks the number of times the pod spec will cause the topology manager to admit a pod TopologyManagerAdmissionRequestsTotal = metrics.NewCounter( &metrics.CounterOpts{ @@ -887,6 +910,10 @@ func Register(collectors ...metrics.StableCollector) { legacyregistry.MustRegister(RunPodSandboxErrors) legacyregistry.MustRegister(CPUManagerPinningRequestsTotal) legacyregistry.MustRegister(CPUManagerPinningErrorsTotal) + if utilfeature.DefaultFeatureGate.Enabled(features.MemoryManager) { + legacyregistry.MustRegister(MemoryManagerPinningRequestTotal) + legacyregistry.MustRegister(MemoryManagerPinningErrorsTotal) + } legacyregistry.MustRegister(TopologyManagerAdmissionRequestsTotal) legacyregistry.MustRegister(TopologyManagerAdmissionErrorsTotal) legacyregistry.MustRegister(TopologyManagerAdmissionDuration)