diff --git a/pkg/scheduler/metrics/metrics.go b/pkg/scheduler/metrics/metrics.go index c0a87f319ae..7d329ba5ac0 100644 --- a/pkg/scheduler/metrics/metrics.go +++ b/pkg/scheduler/metrics/metrics.go @@ -59,6 +59,14 @@ var ( Buckets: prometheus.ExponentialBuckets(1000, 2, 15), }, ) + SchedulingAlgorithmPremptionEvaluationDuration = prometheus.NewHistogram( + prometheus.HistogramOpts{ + Subsystem: schedulerSubsystem, + Name: "scheduling_algorithm_preemption_evaluation", + Help: "Scheduling algorithm preemption evaluation duration", + Buckets: prometheus.ExponentialBuckets(1000, 2, 15), + }, + ) BindingLatency = prometheus.NewHistogram( prometheus.HistogramOpts{ Subsystem: schedulerSubsystem, @@ -67,6 +75,18 @@ var ( Buckets: prometheus.ExponentialBuckets(1000, 2, 15), }, ) + PreemptionVictims = prometheus.NewGauge( + prometheus.GaugeOpts{ + Subsystem: schedulerSubsystem, + Name: "pod_preemption_victims", + Help: "Number of selected preemption victims", + }) + PreemptionAttempts = prometheus.NewCounter( + prometheus.CounterOpts{ + Subsystem: schedulerSubsystem, + Name: "total_preemption_attempts", + Help: "Total preemption attempts in the cluster till now", + }) ) var registerMetrics sync.Once @@ -78,8 +98,12 @@ func Register() { prometheus.MustRegister(E2eSchedulingLatency) prometheus.MustRegister(SchedulingAlgorithmLatency) prometheus.MustRegister(BindingLatency) + prometheus.MustRegister(SchedulingAlgorithmPredicateEvaluationDuration) prometheus.MustRegister(SchedulingAlgorithmPriorityEvaluationDuration) + prometheus.MustRegister(SchedulingAlgorithmPremptionEvaluationDuration) + prometheus.MustRegister(PreemptionVictims) + prometheus.MustRegister(PreemptionAttempts) }) } diff --git a/pkg/scheduler/scheduler.go b/pkg/scheduler/scheduler.go index 9fae7d117f7..fee845f34e8 100644 --- a/pkg/scheduler/scheduler.go +++ b/pkg/scheduler/scheduler.go @@ -216,7 +216,9 @@ func (sched *Scheduler) preempt(preemptor *v1.Pod, scheduleErr error) (string, e glog.Errorf("Error getting the updated preemptor pod object: %v", err) return "", err } + node, victims, nominatedPodsToClear, err := sched.config.Algorithm.Preempt(preemptor, sched.config.NodeLister, scheduleErr) + metrics.PreemptionVictims.Set(float64(len(victims))) if err != nil { glog.Errorf("Error preempting victims to make room for %v/%v.", preemptor.Namespace, preemptor.Name) return "", err @@ -440,18 +442,20 @@ func (sched *Scheduler) scheduleOne() { // Synchronously attempt to find a fit for the pod. start := time.Now() suggestedHost, err := sched.schedule(pod) - metrics.SchedulingAlgorithmLatency.Observe(metrics.SinceInMicroseconds(start)) if err != nil { // schedule() may have failed because the pod would not fit on any host, so we try to // preempt, with the expectation that the next time the pod is tried for scheduling it // will fit due to the preemption. It is also possible that a different pod will schedule // into the resources that were preempted, but this is harmless. if fitError, ok := err.(*core.FitError); ok { + preemptionStartTime := time.Now() sched.preempt(pod, fitError) + metrics.PreemptionAttempts.Inc() + metrics.SchedulingAlgorithmPremptionEvaluationDuration.Observe(metrics.SinceInMicroseconds(preemptionStartTime)) } return } - + metrics.SchedulingAlgorithmLatency.Observe(metrics.SinceInMicroseconds(start)) // Tell the cache to assume that a pod now is running on a given node, even though it hasn't been bound yet. // This allows us to keep scheduling without waiting on binding to occur. assumedPod := pod.DeepCopy()