diff --git a/pkg/kubelet/eviction/BUILD b/pkg/kubelet/eviction/BUILD index f37d12b6f2c..0b364263de7 100644 --- a/pkg/kubelet/eviction/BUILD +++ b/pkg/kubelet/eviction/BUILD @@ -66,6 +66,7 @@ go_library( "//pkg/kubelet/cm:go_default_library", "//pkg/kubelet/eviction/api:go_default_library", "//pkg/kubelet/lifecycle:go_default_library", + "//pkg/kubelet/metrics:go_default_library", "//pkg/kubelet/pod:go_default_library", "//pkg/kubelet/qos:go_default_library", "//pkg/kubelet/server/stats:go_default_library", diff --git a/pkg/kubelet/eviction/eviction_manager.go b/pkg/kubelet/eviction/eviction_manager.go index a03601eef80..33e7aadf111 100644 --- a/pkg/kubelet/eviction/eviction_manager.go +++ b/pkg/kubelet/eviction/eviction_manager.go @@ -35,6 +35,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/cm" evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api" "k8s.io/kubernetes/pkg/kubelet/lifecycle" + "k8s.io/kubernetes/pkg/kubelet/metrics" kubepod "k8s.io/kubernetes/pkg/kubelet/pod" "k8s.io/kubernetes/pkg/kubelet/qos" "k8s.io/kubernetes/pkg/kubelet/server/stats" @@ -331,6 +332,14 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act glog.Infof("eviction manager: pods ranked for eviction: %s", format.Pods(activePods)) + //record age of metrics for met thresholds that we are using for evictions. + for _, t := range thresholds { + timeObserved := observations[t.Signal].time + if !timeObserved.IsZero() { + metrics.EvictionStatsAge.WithLabelValues(string(t.Signal)).Observe(metrics.SinceInMicroseconds(timeObserved.Time)) + } + } + // we kill at most a single pod during each eviction interval for i := range activePods { pod := activePods[i] diff --git a/pkg/kubelet/metrics/metrics.go b/pkg/kubelet/metrics/metrics.go index 682e6a94610..a006dc497a1 100644 --- a/pkg/kubelet/metrics/metrics.go +++ b/pkg/kubelet/metrics/metrics.go @@ -40,6 +40,7 @@ const ( PodWorkerStartLatencyKey = "pod_worker_start_latency_microseconds" PLEGRelistLatencyKey = "pleg_relist_latency_microseconds" PLEGRelistIntervalKey = "pleg_relist_interval_microseconds" + EvictionStatsAgeKey = "eviction_stats_age_microseconds" // Metrics keys of remote runtime operations RuntimeOperationsKey = "runtime_operations" RuntimeOperationsLatencyKey = "runtime_operations_latency_microseconds" @@ -178,6 +179,14 @@ var ( }, []string{"operation_type"}, ) + EvictionStatsAge = prometheus.NewSummaryVec( + prometheus.SummaryOpts{ + Subsystem: KubeletSubsystem, + Name: EvictionStatsAgeKey, + Help: "Time between when stats are collected, and when pod is evicted based on those stats by eviction signal", + }, + []string{"eviction_signal"}, + ) ) var registerMetrics sync.Once @@ -204,6 +213,7 @@ func Register(containerCache kubecontainer.RuntimeCache) { prometheus.MustRegister(RuntimeOperations) prometheus.MustRegister(RuntimeOperationsLatency) prometheus.MustRegister(RuntimeOperationsErrors) + prometheus.MustRegister(EvictionStatsAge) }) }