mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-09 03:57:41 +00:00
memorymanager:metrics: add metrics
As part of the memory manager GA graduation effort, we should add metrics in order to iprove observability. The metrics also mentioned in the PR https://github.com/kubernetes/enhancements/pull/4251 (which was not merged yet) Signed-off-by: Talor Itzhak <titzhak@redhat.com>
This commit is contained in:
parent
246d363ea4
commit
ddd60de3f3
@ -34,6 +34,7 @@ import (
|
|||||||
"k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state"
|
"k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask"
|
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask"
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/metrics"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/types"
|
"k8s.io/kubernetes/pkg/kubelet/types"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -95,7 +96,7 @@ func (p *staticPolicy) Start(s state.State) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Allocate call is idempotent
|
// Allocate call is idempotent
|
||||||
func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Container) error {
|
func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Container) (rerr error) {
|
||||||
// allocate the memory only for guaranteed pods
|
// allocate the memory only for guaranteed pods
|
||||||
if v1qos.GetPodQOS(pod) != v1.PodQOSGuaranteed {
|
if v1qos.GetPodQOS(pod) != v1.PodQOSGuaranteed {
|
||||||
return nil
|
return nil
|
||||||
@ -103,6 +104,13 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai
|
|||||||
|
|
||||||
podUID := string(pod.UID)
|
podUID := string(pod.UID)
|
||||||
klog.InfoS("Allocate", "pod", klog.KObj(pod), "containerName", container.Name)
|
klog.InfoS("Allocate", "pod", klog.KObj(pod), "containerName", container.Name)
|
||||||
|
// container belongs in an exclusively allocated pool
|
||||||
|
metrics.MemoryManagerPinningRequestTotal.Inc()
|
||||||
|
defer func() {
|
||||||
|
if rerr != nil {
|
||||||
|
metrics.MemoryManagerPinningErrorsTotal.Inc()
|
||||||
|
}
|
||||||
|
}()
|
||||||
if blocks := s.GetMemoryBlocks(podUID, container.Name); blocks != nil {
|
if blocks := s.GetMemoryBlocks(podUID, container.Name); blocks != nil {
|
||||||
p.updatePodReusableMemory(pod, container, blocks)
|
p.updatePodReusableMemory(pod, container, blocks)
|
||||||
|
|
||||||
|
@ -108,6 +108,10 @@ const (
|
|||||||
CPUManagerPinningRequestsTotalKey = "cpu_manager_pinning_requests_total"
|
CPUManagerPinningRequestsTotalKey = "cpu_manager_pinning_requests_total"
|
||||||
CPUManagerPinningErrorsTotalKey = "cpu_manager_pinning_errors_total"
|
CPUManagerPinningErrorsTotalKey = "cpu_manager_pinning_errors_total"
|
||||||
|
|
||||||
|
// Metrics to track the Memory manager behavior
|
||||||
|
MemoryManagerPinningRequestsTotalKey = "memory_manager_pinning_requests_total"
|
||||||
|
MemoryManagerPinningErrorsTotalKey = "memory_manager_pinning_errors_total"
|
||||||
|
|
||||||
// Metrics to track the Topology manager behavior
|
// Metrics to track the Topology manager behavior
|
||||||
TopologyManagerAdmissionRequestsTotalKey = "topology_manager_admission_requests_total"
|
TopologyManagerAdmissionRequestsTotalKey = "topology_manager_admission_requests_total"
|
||||||
TopologyManagerAdmissionErrorsTotalKey = "topology_manager_admission_errors_total"
|
TopologyManagerAdmissionErrorsTotalKey = "topology_manager_admission_errors_total"
|
||||||
@ -719,6 +723,25 @@ var (
|
|||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// MemoryManagerPinningRequestTotal tracks the number of times the pod spec required the memory manager to pin memory pages
|
||||||
|
MemoryManagerPinningRequestTotal = metrics.NewCounter(
|
||||||
|
&metrics.CounterOpts{
|
||||||
|
Subsystem: KubeletSubsystem,
|
||||||
|
Name: MemoryManagerPinningRequestsTotalKey,
|
||||||
|
Help: "The number of memory pages allocations which required pinning.",
|
||||||
|
StabilityLevel: metrics.ALPHA,
|
||||||
|
})
|
||||||
|
|
||||||
|
// MemoryManagerPinningErrorsTotal tracks the number of times the pod spec required the memory manager to pin memory pages, but the allocation failed
|
||||||
|
MemoryManagerPinningErrorsTotal = metrics.NewCounter(
|
||||||
|
&metrics.CounterOpts{
|
||||||
|
Subsystem: KubeletSubsystem,
|
||||||
|
Name: MemoryManagerPinningErrorsTotalKey,
|
||||||
|
Help: "The number of memory pages allocations which required pinning that failed.",
|
||||||
|
StabilityLevel: metrics.ALPHA,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
// TopologyManagerAdmissionRequestsTotal tracks the number of times the pod spec will cause the topology manager to admit a pod
|
// TopologyManagerAdmissionRequestsTotal tracks the number of times the pod spec will cause the topology manager to admit a pod
|
||||||
TopologyManagerAdmissionRequestsTotal = metrics.NewCounter(
|
TopologyManagerAdmissionRequestsTotal = metrics.NewCounter(
|
||||||
&metrics.CounterOpts{
|
&metrics.CounterOpts{
|
||||||
@ -887,6 +910,10 @@ func Register(collectors ...metrics.StableCollector) {
|
|||||||
legacyregistry.MustRegister(RunPodSandboxErrors)
|
legacyregistry.MustRegister(RunPodSandboxErrors)
|
||||||
legacyregistry.MustRegister(CPUManagerPinningRequestsTotal)
|
legacyregistry.MustRegister(CPUManagerPinningRequestsTotal)
|
||||||
legacyregistry.MustRegister(CPUManagerPinningErrorsTotal)
|
legacyregistry.MustRegister(CPUManagerPinningErrorsTotal)
|
||||||
|
if utilfeature.DefaultFeatureGate.Enabled(features.MemoryManager) {
|
||||||
|
legacyregistry.MustRegister(MemoryManagerPinningRequestTotal)
|
||||||
|
legacyregistry.MustRegister(MemoryManagerPinningErrorsTotal)
|
||||||
|
}
|
||||||
legacyregistry.MustRegister(TopologyManagerAdmissionRequestsTotal)
|
legacyregistry.MustRegister(TopologyManagerAdmissionRequestsTotal)
|
||||||
legacyregistry.MustRegister(TopologyManagerAdmissionErrorsTotal)
|
legacyregistry.MustRegister(TopologyManagerAdmissionErrorsTotal)
|
||||||
legacyregistry.MustRegister(TopologyManagerAdmissionDuration)
|
legacyregistry.MustRegister(TopologyManagerAdmissionDuration)
|
||||||
|
Loading…
Reference in New Issue
Block a user