diff --git a/pkg/kubelet/eviction/eviction_manager.go b/pkg/kubelet/eviction/eviction_manager.go index de92d6eab24..ef9e85fe568 100644 --- a/pkg/kubelet/eviction/eviction_manager.go +++ b/pkg/kubelet/eviction/eviction_manager.go @@ -24,7 +24,7 @@ import ( "k8s.io/klog" - "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/util/clock" utilfeature "k8s.io/apiserver/pkg/util/feature" @@ -376,6 +376,7 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act } message, annotations := evictionMessage(resourceToReclaim, pod, statsFunc) if m.evictPod(pod, gracePeriodOverride, message, annotations) { + metrics.Evictions.WithLabelValues(string(thresholdToReclaim.Signal)).Inc() return []*v1.Pod{pod} } } diff --git a/pkg/kubelet/metrics/metrics.go b/pkg/kubelet/metrics/metrics.go index 83f175b2461..71e1b6d24b3 100644 --- a/pkg/kubelet/metrics/metrics.go +++ b/pkg/kubelet/metrics/metrics.go @@ -42,6 +42,7 @@ const ( PLEGRelistDurationKey = "pleg_relist_duration_seconds" PLEGDiscardEventsKey = "pleg_discard_events" PLEGRelistIntervalKey = "pleg_relist_interval_seconds" + EvictionsKey = "evictions" EvictionStatsAgeKey = "eviction_stats_age_seconds" DeprecatedPodWorkerLatencyKey = "pod_worker_latency_microseconds" DeprecatedPodStartLatencyKey = "pod_start_latency_microseconds" @@ -205,6 +206,16 @@ var ( }, []string{"operation_type"}, ) + // Evictions is a Counter that tracks the cumulative number of pod evictions initiated by the kubelet. + // Broken down by eviction signal. + Evictions = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Subsystem: KubeletSubsystem, + Name: EvictionsKey, + Help: "Cumulative number of pod evictions by eviction signal", + }, + []string{"eviction_signal"}, + ) // EvictionStatsAge is a Histogram that tracks the time (in seconds) between when stats are collected and when a pod is evicted // based on those stats. Broken down by eviction signal. EvictionStatsAge = prometheus.NewHistogramVec( @@ -435,6 +446,7 @@ func Register(containerCache kubecontainer.RuntimeCache, collectors ...prometheu prometheus.MustRegister(RuntimeOperations) prometheus.MustRegister(RuntimeOperationsDuration) prometheus.MustRegister(RuntimeOperationsErrors) + prometheus.MustRegister(Evictions) prometheus.MustRegister(EvictionStatsAge) prometheus.MustRegister(DevicePluginRegistrationCount) prometheus.MustRegister(DevicePluginAllocationDuration)