diff --git a/pkg/scheduler/internal/queue/scheduling_queue.go b/pkg/scheduler/internal/queue/scheduling_queue.go index d04df2122c0..edecfbb4949 100644 --- a/pkg/scheduler/internal/queue/scheduling_queue.go +++ b/pkg/scheduler/internal/queue/scheduling_queue.go @@ -404,6 +404,9 @@ func (p *PriorityQueue) AddUnschedulableIfNotPresent(pInfo *framework.QueuedPodI // If a move request has been received, move it to the BackoffQ, otherwise move // it to unschedulableQ. + for plugin := range pInfo.UnschedulablePlugins { + metrics.UnschedulableReason(plugin, pInfo.Pod.Spec.SchedulerName).Inc() + } if p.moveRequestCycle >= podSchedulingCycle { if err := p.podBackoffQ.Add(pInfo); err != nil { return fmt.Errorf("error adding pod %v to the backoff queue: %v", pod.Name, err) @@ -412,6 +415,7 @@ func (p *PriorityQueue) AddUnschedulableIfNotPresent(pInfo *framework.QueuedPodI } else { p.unschedulableQ.addOrUpdate(pInfo) metrics.SchedulerQueueIncomingPods.WithLabelValues("unschedulable", ScheduleAttemptFailure).Inc() + } p.PodNominator.AddNominatedPod(pInfo.PodInfo, nil) @@ -940,6 +944,9 @@ func MakeNextPodFunc(queue SchedulingQueue) func() *framework.QueuedPodInfo { podInfo, err := queue.Pop() if err == nil { klog.V(4).InfoS("About to try and schedule pod", "pod", klog.KObj(podInfo.Pod)) + for plugin := range podInfo.UnschedulablePlugins { + metrics.UnschedulableReason(plugin, podInfo.Pod.Spec.SchedulerName).Dec() + } return podInfo } klog.ErrorS(err, "Error while retrieving next pod from scheduling queue") diff --git a/pkg/scheduler/metrics/metrics.go b/pkg/scheduler/metrics/metrics.go index e926269af67..f950ed82fed 100644 --- a/pkg/scheduler/metrics/metrics.go +++ b/pkg/scheduler/metrics/metrics.go @@ -173,6 +173,14 @@ var ( StabilityLevel: metrics.ALPHA, }, []string{"type"}) + unschedulableReasons = metrics.NewGaugeVec( + &metrics.GaugeOpts{ + Subsystem: SchedulerSubsystem, + Name: "unschedulable_pods", + Help: "The number of unschedulable pods broken down by plugin name. A pod will increment the gauge for all plugins that caused it to not schedule and so this metric have meaning only when broken down by plugin.", + StabilityLevel: metrics.ALPHA, + }, []string{"plugin", "profile"}) + metricsList = []metrics.Registerable{ scheduleAttempts, e2eSchedulingLatency, @@ -189,6 +197,7 @@ var ( SchedulerGoroutines, PermitWaitDuration, CacheSize, + unschedulableReasons, } ) @@ -235,3 +244,7 @@ func UnschedulablePods() metrics.GaugeMetric { func SinceInSeconds(start time.Time) float64 { return time.Since(start).Seconds() } + +func UnschedulableReason(plugin string, profile string) metrics.GaugeMetric { + return unschedulableReasons.With(metrics.Labels{"plugin": plugin, "profile": profile}) +}