Merge pull request #83508 from damemi/scheduler-cache-metric

Add scheduler cache size metrics
This commit is contained in:
Kubernetes Prow Robot 2019-10-30 16:55:36 -07:00 committed by GitHub
commit a381f7cb3e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 20 additions and 0 deletions

View File

@ -12,6 +12,7 @@ go_library(
deps = [
"//pkg/features:go_default_library",
"//pkg/scheduler/listers:go_default_library",
"//pkg/scheduler/metrics:go_default_library",
"//pkg/scheduler/nodeinfo:go_default_library",
"//pkg/scheduler/nodeinfo/snapshot:go_default_library",
"//pkg/util/node:go_default_library",

View File

@ -29,6 +29,7 @@ import (
"k8s.io/klog"
"k8s.io/kubernetes/pkg/features"
schedulerlisters "k8s.io/kubernetes/pkg/scheduler/listers"
"k8s.io/kubernetes/pkg/scheduler/metrics"
schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
nodeinfosnapshot "k8s.io/kubernetes/pkg/scheduler/nodeinfo/snapshot"
)
@ -635,9 +636,11 @@ func (cache *schedulerCache) cleanupExpiredAssumedPods() {
}
// cleanupAssumedPods exists for making test deterministic by taking time as input argument.
// It also reports metrics on the cache size for nodes, pods, and assumed pods.
func (cache *schedulerCache) cleanupAssumedPods(now time.Time) {
cache.mu.Lock()
defer cache.mu.Unlock()
defer cache.updateMetrics()
// The size of assumedPods should be small
for key := range cache.assumedPods {
@ -680,3 +683,10 @@ func (cache *schedulerCache) GetNodeInfo(nodeName string) (*v1.Node, error) {
return n.info.Node(), nil
}
// updateMetrics updates cache size metric values for pods, assumed pods, and nodes
func (cache *schedulerCache) updateMetrics() {
metrics.CacheSize.WithLabelValues("assumed_pods").Set(float64(len(cache.assumedPods)))
metrics.CacheSize.WithLabelValues("pods").Set(float64(len(cache.podStates)))
metrics.CacheSize.WithLabelValues("nodes").Set(float64(len(cache.nodes)))
}

View File

@ -272,6 +272,14 @@ var (
},
[]string{"result"})
CacheSize = metrics.NewGaugeVec(
&metrics.GaugeOpts{
Subsystem: SchedulerSubsystem,
Name: "scheduler_cache_size",
Help: "Number of nodes, pods, and assumed (bound) pods in the scheduler cache.",
StabilityLevel: metrics.ALPHA,
}, []string{"type"})
metricsList = []metrics.Registerable{
scheduleAttempts,
SchedulingLatency,
@ -297,6 +305,7 @@ var (
SchedulerQueueIncomingPods,
SchedulerGoroutines,
PermitWaitDuration,
CacheSize,
}
)