mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-24 04:06:03 +00:00
Added metrics for preemption victims, pods preempted and duration of preemption
This commit is contained in:
parent
7dbb709413
commit
8aebf3554c
@ -59,6 +59,14 @@ var (
|
||||
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
|
||||
},
|
||||
)
|
||||
SchedulingAlgorithmPremptionEvaluationDuration = prometheus.NewHistogram(
|
||||
prometheus.HistogramOpts{
|
||||
Subsystem: schedulerSubsystem,
|
||||
Name: "scheduling_algorithm_preemption_evaluation",
|
||||
Help: "Scheduling algorithm preemption evaluation duration",
|
||||
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
|
||||
},
|
||||
)
|
||||
BindingLatency = prometheus.NewHistogram(
|
||||
prometheus.HistogramOpts{
|
||||
Subsystem: schedulerSubsystem,
|
||||
@ -67,6 +75,18 @@ var (
|
||||
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
|
||||
},
|
||||
)
|
||||
PreemptionVictims = prometheus.NewGauge(
|
||||
prometheus.GaugeOpts{
|
||||
Subsystem: schedulerSubsystem,
|
||||
Name: "pod_preemption_victims",
|
||||
Help: "Number of selected preemption victims",
|
||||
})
|
||||
PreemptionAttempts = prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Subsystem: schedulerSubsystem,
|
||||
Name: "total_preemption_attempts",
|
||||
Help: "Total preemption attempts in the cluster till now",
|
||||
})
|
||||
)
|
||||
|
||||
var registerMetrics sync.Once
|
||||
@ -78,8 +98,12 @@ func Register() {
|
||||
prometheus.MustRegister(E2eSchedulingLatency)
|
||||
prometheus.MustRegister(SchedulingAlgorithmLatency)
|
||||
prometheus.MustRegister(BindingLatency)
|
||||
|
||||
prometheus.MustRegister(SchedulingAlgorithmPredicateEvaluationDuration)
|
||||
prometheus.MustRegister(SchedulingAlgorithmPriorityEvaluationDuration)
|
||||
prometheus.MustRegister(SchedulingAlgorithmPremptionEvaluationDuration)
|
||||
prometheus.MustRegister(PreemptionVictims)
|
||||
prometheus.MustRegister(PreemptionAttempts)
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -216,7 +216,9 @@ func (sched *Scheduler) preempt(preemptor *v1.Pod, scheduleErr error) (string, e
|
||||
glog.Errorf("Error getting the updated preemptor pod object: %v", err)
|
||||
return "", err
|
||||
}
|
||||
|
||||
node, victims, nominatedPodsToClear, err := sched.config.Algorithm.Preempt(preemptor, sched.config.NodeLister, scheduleErr)
|
||||
metrics.PreemptionVictims.Set(float64(len(victims)))
|
||||
if err != nil {
|
||||
glog.Errorf("Error preempting victims to make room for %v/%v.", preemptor.Namespace, preemptor.Name)
|
||||
return "", err
|
||||
@ -440,18 +442,20 @@ func (sched *Scheduler) scheduleOne() {
|
||||
// Synchronously attempt to find a fit for the pod.
|
||||
start := time.Now()
|
||||
suggestedHost, err := sched.schedule(pod)
|
||||
metrics.SchedulingAlgorithmLatency.Observe(metrics.SinceInMicroseconds(start))
|
||||
if err != nil {
|
||||
// schedule() may have failed because the pod would not fit on any host, so we try to
|
||||
// preempt, with the expectation that the next time the pod is tried for scheduling it
|
||||
// will fit due to the preemption. It is also possible that a different pod will schedule
|
||||
// into the resources that were preempted, but this is harmless.
|
||||
if fitError, ok := err.(*core.FitError); ok {
|
||||
preemptionStartTime := time.Now()
|
||||
sched.preempt(pod, fitError)
|
||||
metrics.PreemptionAttempts.Inc()
|
||||
metrics.SchedulingAlgorithmPremptionEvaluationDuration.Observe(metrics.SinceInMicroseconds(preemptionStartTime))
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
metrics.SchedulingAlgorithmLatency.Observe(metrics.SinceInMicroseconds(start))
|
||||
// Tell the cache to assume that a pod now is running on a given node, even though it hasn't been bound yet.
|
||||
// This allows us to keep scheduling without waiting on binding to occur.
|
||||
assumedPod := pod.DeepCopy()
|
||||
|
Loading…
Reference in New Issue
Block a user