From 4735011287e2665e956acde7b50288c980378f89 Mon Sep 17 00:00:00 2001 From: Mike Dame Date: Fri, 4 Oct 2019 13:24:41 -0400 Subject: [PATCH 1/2] Add scheduler cache size metrics --- pkg/scheduler/internal/cache/cache.go | 10 ++++++++++ pkg/scheduler/metrics/metrics.go | 9 +++++++++ 2 files changed, 19 insertions(+) diff --git a/pkg/scheduler/internal/cache/cache.go b/pkg/scheduler/internal/cache/cache.go index 66f95d34c73..09877d50e85 100644 --- a/pkg/scheduler/internal/cache/cache.go +++ b/pkg/scheduler/internal/cache/cache.go @@ -29,6 +29,7 @@ import ( "k8s.io/klog" "k8s.io/kubernetes/pkg/features" schedulerlisters "k8s.io/kubernetes/pkg/scheduler/listers" + "k8s.io/kubernetes/pkg/scheduler/metrics" schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo" nodeinfosnapshot "k8s.io/kubernetes/pkg/scheduler/nodeinfo/snapshot" ) @@ -635,9 +636,11 @@ func (cache *schedulerCache) cleanupExpiredAssumedPods() { } // cleanupAssumedPods exists for making test deterministic by taking time as input argument. +// It also reports metrics on the cache size for nodes, pods, and assumed pods. func (cache *schedulerCache) cleanupAssumedPods(now time.Time) { cache.mu.Lock() defer cache.mu.Unlock() + defer cache.updateMetrics() // The size of assumedPods should be small for key := range cache.assumedPods { @@ -680,3 +683,10 @@ func (cache *schedulerCache) GetNodeInfo(nodeName string) (*v1.Node, error) { return n.info.Node(), nil } + +// updateMetrics updates cache size metric values for pods, assumed pods, and nodes +func (cache *schedulerCache) updateMetrics() { + metrics.CacheSize.WithLabelValues("assumed_pods").Set(float64(len(cache.assumedPods))) + metrics.CacheSize.WithLabelValues("pods").Set(float64(len(cache.podStates))) + metrics.CacheSize.WithLabelValues("nodes").Set(float64(len(cache.nodes))) +} diff --git a/pkg/scheduler/metrics/metrics.go b/pkg/scheduler/metrics/metrics.go index aa4df6877db..d37de61909f 100644 --- a/pkg/scheduler/metrics/metrics.go +++ b/pkg/scheduler/metrics/metrics.go @@ -272,6 +272,14 @@ var ( }, []string{"result"}) + CacheSize = metrics.NewGaugeVec( + &metrics.GaugeOpts{ + Subsystem: SchedulerSubsystem, + Name: "scheduler_cache_size", + Help: "Number of nodes, pods, and assumed (bound) pods in the scheduler cache.", + StabilityLevel: metrics.ALPHA, + }, []string{"type"}) + metricsList = []metrics.Registerable{ scheduleAttempts, SchedulingLatency, @@ -297,6 +305,7 @@ var ( SchedulerQueueIncomingPods, SchedulerGoroutines, PermitWaitDuration, + CacheSize, } ) From 828d6622a96548a0d69da4b722f5e88960e22bb6 Mon Sep 17 00:00:00 2001 From: Mike Dame Date: Fri, 4 Oct 2019 13:28:44 -0400 Subject: [PATCH 2/2] Update bazel --- pkg/scheduler/internal/cache/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/scheduler/internal/cache/BUILD b/pkg/scheduler/internal/cache/BUILD index b5624d4b0db..67e0dad1bfe 100644 --- a/pkg/scheduler/internal/cache/BUILD +++ b/pkg/scheduler/internal/cache/BUILD @@ -12,6 +12,7 @@ go_library( deps = [ "//pkg/features:go_default_library", "//pkg/scheduler/listers:go_default_library", + "//pkg/scheduler/metrics:go_default_library", "//pkg/scheduler/nodeinfo:go_default_library", "//pkg/scheduler/nodeinfo/snapshot:go_default_library", "//pkg/util/node:go_default_library",