From 08f7b220250fa753bfb081c1b6470c9411c574c9 Mon Sep 17 00:00:00 2001 From: Guoliang Wang Date: Sun, 6 Oct 2019 09:44:39 +0800 Subject: [PATCH 1/2] Add a metric to track number of scheduler binding goroutines --- pkg/scheduler/metrics/metrics.go | 9 ++++++++- pkg/scheduler/scheduler.go | 3 +++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/pkg/scheduler/metrics/metrics.go b/pkg/scheduler/metrics/metrics.go index 43c2738ee69..71512e0c446 100644 --- a/pkg/scheduler/metrics/metrics.go +++ b/pkg/scheduler/metrics/metrics.go @@ -211,7 +211,6 @@ var ( Help: "Total preemption attempts in the cluster till now", StabilityLevel: metrics.ALPHA, }) - pendingPods = metrics.NewGaugeVec( &metrics.GaugeOpts{ Subsystem: SchedulerSubsystem, @@ -219,6 +218,13 @@ var ( Help: "Number of pending pods, by the queue type. 'active' means number of pods in activeQ; 'backoff' means number of pods in backoffQ; 'unschedulable' means number of pods in unschedulableQ.", StabilityLevel: metrics.ALPHA, }, []string{"queue"}) + SchedulerGoroutines = metrics.NewGaugeVec( + &metrics.GaugeOpts{ + Subsystem: SchedulerSubsystem, + Name: "scheduler_goroutines", + Help: "Number of running goroutines split by the work they do such as binding.", + StabilityLevel: metrics.ALPHA, + }, []string{"work"}) PodSchedulingDuration = metrics.NewHistogram( &metrics.HistogramOpts{ @@ -279,6 +285,7 @@ var ( PodSchedulingAttempts, FrameworkExtensionPointDuration, SchedulerQueueIncomingPods, + SchedulerGoroutines, } ) diff --git a/pkg/scheduler/scheduler.go b/pkg/scheduler/scheduler.go index 0c0a19b9d10..4d175135924 100644 --- a/pkg/scheduler/scheduler.go +++ b/pkg/scheduler/scheduler.go @@ -675,6 +675,9 @@ func (sched *Scheduler) scheduleOne(ctx context.Context) { } // bind the pod to its host asynchronously (we can do this b/c of the assumption step above). go func() { + metrics.SchedulerGoroutines.WithLabelValues("binding").Inc() + defer metrics.SchedulerGoroutines.WithLabelValues("binding").Dec() + // Bind volumes first before Pod if !allBound { err := sched.bindVolumes(assumedPod) From 9d173852c14f0e8efb0d67db2c38b8dcfa45b31b Mon Sep 17 00:00:00 2001 From: Guoliang Wang Date: Mon, 7 Oct 2019 09:39:49 +0800 Subject: [PATCH 2/2] Add a metric to track number of scheduler prioritizing goroutines --- pkg/scheduler/core/generic_scheduler.go | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/pkg/scheduler/core/generic_scheduler.go b/pkg/scheduler/core/generic_scheduler.go index f68a6937fb1..a36f866f5c0 100644 --- a/pkg/scheduler/core/generic_scheduler.go +++ b/pkg/scheduler/core/generic_scheduler.go @@ -732,7 +732,11 @@ func PrioritizeNodes( if priorityConfigs[i].Function != nil { wg.Add(1) go func(index int) { - defer wg.Done() + metrics.SchedulerGoroutines.WithLabelValues("prioritizing_legacy").Inc() + defer func() { + metrics.SchedulerGoroutines.WithLabelValues("prioritizing_legacy").Dec() + wg.Done() + }() var err error results[index], err = priorityConfigs[index].Function(pod, nodeNameToInfo, nodes) if err != nil { @@ -766,7 +770,11 @@ func PrioritizeNodes( } wg.Add(1) go func(index int) { - defer wg.Done() + metrics.SchedulerGoroutines.WithLabelValues("prioritizing_mapreduce").Inc() + defer func() { + metrics.SchedulerGoroutines.WithLabelValues("prioritizing_mapreduce").Dec() + wg.Done() + }() if err := priorityConfigs[index].Reduce(pod, meta, nodeNameToInfo, results[index]); err != nil { appendError(err) } @@ -812,7 +820,11 @@ func PrioritizeNodes( } wg.Add(1) go func(extIndex int) { - defer wg.Done() + metrics.SchedulerGoroutines.WithLabelValues("prioritizing_extender").Inc() + defer func() { + metrics.SchedulerGoroutines.WithLabelValues("prioritizing_extender").Dec() + wg.Done() + }() prioritizedList, weight, err := extenders[extIndex].Prioritize(pod, nodes) if err != nil { // Prioritization errors from extender can be ignored, let k8s/other extenders determine the priorities