mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 19:56:01 +00:00
Add gauge metric to track unschedulable pod (#108475)
* Add gauge metric to track unschedulable pod * Add review comments
This commit is contained in:
parent
faa027ca2b
commit
840fc3ea7b
@ -404,6 +404,9 @@ func (p *PriorityQueue) AddUnschedulableIfNotPresent(pInfo *framework.QueuedPodI
|
|||||||
|
|
||||||
// If a move request has been received, move it to the BackoffQ, otherwise move
|
// If a move request has been received, move it to the BackoffQ, otherwise move
|
||||||
// it to unschedulableQ.
|
// it to unschedulableQ.
|
||||||
|
for plugin := range pInfo.UnschedulablePlugins {
|
||||||
|
metrics.UnschedulableReason(plugin, pInfo.Pod.Spec.SchedulerName).Inc()
|
||||||
|
}
|
||||||
if p.moveRequestCycle >= podSchedulingCycle {
|
if p.moveRequestCycle >= podSchedulingCycle {
|
||||||
if err := p.podBackoffQ.Add(pInfo); err != nil {
|
if err := p.podBackoffQ.Add(pInfo); err != nil {
|
||||||
return fmt.Errorf("error adding pod %v to the backoff queue: %v", pod.Name, err)
|
return fmt.Errorf("error adding pod %v to the backoff queue: %v", pod.Name, err)
|
||||||
@ -412,6 +415,7 @@ func (p *PriorityQueue) AddUnschedulableIfNotPresent(pInfo *framework.QueuedPodI
|
|||||||
} else {
|
} else {
|
||||||
p.unschedulableQ.addOrUpdate(pInfo)
|
p.unschedulableQ.addOrUpdate(pInfo)
|
||||||
metrics.SchedulerQueueIncomingPods.WithLabelValues("unschedulable", ScheduleAttemptFailure).Inc()
|
metrics.SchedulerQueueIncomingPods.WithLabelValues("unschedulable", ScheduleAttemptFailure).Inc()
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
p.PodNominator.AddNominatedPod(pInfo.PodInfo, nil)
|
p.PodNominator.AddNominatedPod(pInfo.PodInfo, nil)
|
||||||
@ -940,6 +944,9 @@ func MakeNextPodFunc(queue SchedulingQueue) func() *framework.QueuedPodInfo {
|
|||||||
podInfo, err := queue.Pop()
|
podInfo, err := queue.Pop()
|
||||||
if err == nil {
|
if err == nil {
|
||||||
klog.V(4).InfoS("About to try and schedule pod", "pod", klog.KObj(podInfo.Pod))
|
klog.V(4).InfoS("About to try and schedule pod", "pod", klog.KObj(podInfo.Pod))
|
||||||
|
for plugin := range podInfo.UnschedulablePlugins {
|
||||||
|
metrics.UnschedulableReason(plugin, podInfo.Pod.Spec.SchedulerName).Dec()
|
||||||
|
}
|
||||||
return podInfo
|
return podInfo
|
||||||
}
|
}
|
||||||
klog.ErrorS(err, "Error while retrieving next pod from scheduling queue")
|
klog.ErrorS(err, "Error while retrieving next pod from scheduling queue")
|
||||||
|
@ -173,6 +173,14 @@ var (
|
|||||||
StabilityLevel: metrics.ALPHA,
|
StabilityLevel: metrics.ALPHA,
|
||||||
}, []string{"type"})
|
}, []string{"type"})
|
||||||
|
|
||||||
|
unschedulableReasons = metrics.NewGaugeVec(
|
||||||
|
&metrics.GaugeOpts{
|
||||||
|
Subsystem: SchedulerSubsystem,
|
||||||
|
Name: "unschedulable_pods",
|
||||||
|
Help: "The number of unschedulable pods broken down by plugin name. A pod will increment the gauge for all plugins that caused it to not schedule and so this metric have meaning only when broken down by plugin.",
|
||||||
|
StabilityLevel: metrics.ALPHA,
|
||||||
|
}, []string{"plugin", "profile"})
|
||||||
|
|
||||||
metricsList = []metrics.Registerable{
|
metricsList = []metrics.Registerable{
|
||||||
scheduleAttempts,
|
scheduleAttempts,
|
||||||
e2eSchedulingLatency,
|
e2eSchedulingLatency,
|
||||||
@ -189,6 +197,7 @@ var (
|
|||||||
SchedulerGoroutines,
|
SchedulerGoroutines,
|
||||||
PermitWaitDuration,
|
PermitWaitDuration,
|
||||||
CacheSize,
|
CacheSize,
|
||||||
|
unschedulableReasons,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -235,3 +244,7 @@ func UnschedulablePods() metrics.GaugeMetric {
|
|||||||
func SinceInSeconds(start time.Time) float64 {
|
func SinceInSeconds(start time.Time) float64 {
|
||||||
return time.Since(start).Seconds()
|
return time.Since(start).Seconds()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func UnschedulableReason(plugin string, profile string) metrics.GaugeMetric {
|
||||||
|
return unschedulableReasons.With(metrics.Labels{"plugin": plugin, "profile": profile})
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user