mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-09-28 13:45:50 +00:00
issue#105861: making scheduler metrics stable
This commit is contained in:
@@ -1450,7 +1450,7 @@ func TestPendingPodsMetric(t *testing.T) {
|
||||
},
|
||||
metricsName: "scheduler_pending_pods",
|
||||
wants: `
|
||||
# HELP scheduler_pending_pods [ALPHA] Number of pending pods, by the queue type. 'active' means number of pods in activeQ; 'backoff' means number of pods in backoffQ; 'unschedulable' means number of pods in unschedulableQ.
|
||||
# HELP scheduler_pending_pods [STABLE] Number of pending pods, by the queue type. 'active' means number of pods in activeQ; 'backoff' means number of pods in backoffQ; 'unschedulable' means number of pods in unschedulableQ.
|
||||
# TYPE scheduler_pending_pods gauge
|
||||
scheduler_pending_pods{queue="active"} 30
|
||||
scheduler_pending_pods{queue="backoff"} 0
|
||||
@@ -1471,7 +1471,7 @@ scheduler_pending_pods{queue="unschedulable"} 20
|
||||
},
|
||||
metricsName: "scheduler_pending_pods",
|
||||
wants: `
|
||||
# HELP scheduler_pending_pods [ALPHA] Number of pending pods, by the queue type. 'active' means number of pods in activeQ; 'backoff' means number of pods in backoffQ; 'unschedulable' means number of pods in unschedulableQ.
|
||||
# HELP scheduler_pending_pods [STABLE] Number of pending pods, by the queue type. 'active' means number of pods in activeQ; 'backoff' means number of pods in backoffQ; 'unschedulable' means number of pods in unschedulableQ.
|
||||
# TYPE scheduler_pending_pods gauge
|
||||
scheduler_pending_pods{queue="active"} 15
|
||||
scheduler_pending_pods{queue="backoff"} 25
|
||||
@@ -1492,7 +1492,7 @@ scheduler_pending_pods{queue="unschedulable"} 10
|
||||
},
|
||||
metricsName: "scheduler_pending_pods",
|
||||
wants: `
|
||||
# HELP scheduler_pending_pods [ALPHA] Number of pending pods, by the queue type. 'active' means number of pods in activeQ; 'backoff' means number of pods in backoffQ; 'unschedulable' means number of pods in unschedulableQ.
|
||||
# HELP scheduler_pending_pods [STABLE] Number of pending pods, by the queue type. 'active' means number of pods in activeQ; 'backoff' means number of pods in backoffQ; 'unschedulable' means number of pods in unschedulableQ.
|
||||
# TYPE scheduler_pending_pods gauge
|
||||
scheduler_pending_pods{queue="active"} 50
|
||||
scheduler_pending_pods{queue="backoff"} 0
|
||||
@@ -1515,7 +1515,7 @@ scheduler_pending_pods{queue="unschedulable"} 0
|
||||
},
|
||||
metricsName: "scheduler_pending_pods",
|
||||
wants: `
|
||||
# HELP scheduler_pending_pods [ALPHA] Number of pending pods, by the queue type. 'active' means number of pods in activeQ; 'backoff' means number of pods in backoffQ; 'unschedulable' means number of pods in unschedulableQ.
|
||||
# HELP scheduler_pending_pods [STABLE] Number of pending pods, by the queue type. 'active' means number of pods in activeQ; 'backoff' means number of pods in backoffQ; 'unschedulable' means number of pods in unschedulableQ.
|
||||
# TYPE scheduler_pending_pods gauge
|
||||
scheduler_pending_pods{queue="active"} 30
|
||||
scheduler_pending_pods{queue="backoff"} 20
|
||||
@@ -1538,7 +1538,7 @@ scheduler_pending_pods{queue="unschedulable"} 0
|
||||
},
|
||||
metricsName: "scheduler_pending_pods",
|
||||
wants: `
|
||||
# HELP scheduler_pending_pods [ALPHA] Number of pending pods, by the queue type. 'active' means number of pods in activeQ; 'backoff' means number of pods in backoffQ; 'unschedulable' means number of pods in unschedulableQ.
|
||||
# HELP scheduler_pending_pods [STABLE] Number of pending pods, by the queue type. 'active' means number of pods in activeQ; 'backoff' means number of pods in backoffQ; 'unschedulable' means number of pods in unschedulableQ.
|
||||
# TYPE scheduler_pending_pods gauge
|
||||
scheduler_pending_pods{queue="active"} 50
|
||||
scheduler_pending_pods{queue="backoff"} 0
|
||||
|
@@ -44,16 +44,25 @@ var (
|
||||
Subsystem: SchedulerSubsystem,
|
||||
Name: "schedule_attempts_total",
|
||||
Help: "Number of attempts to schedule pods, by the result. 'unschedulable' means a pod could not be scheduled, while 'error' means an internal scheduler problem.",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
StabilityLevel: metrics.STABLE,
|
||||
}, []string{"result", "profile"})
|
||||
|
||||
e2eSchedulingLatency = metrics.NewHistogramVec(
|
||||
&metrics.HistogramOpts{
|
||||
Subsystem: SchedulerSubsystem,
|
||||
Name: "e2e_scheduling_duration_seconds",
|
||||
DeprecatedVersion: "1.23.0",
|
||||
Help: "E2e scheduling latency in seconds (scheduling algorithm + binding). This metric is replaced by scheduling_attempt_duration_seconds.",
|
||||
Buckets: metrics.ExponentialBuckets(0.001, 2, 15),
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
}, []string{"result", "profile"})
|
||||
schedulingLatency = metrics.NewHistogramVec(
|
||||
&metrics.HistogramOpts{
|
||||
Subsystem: SchedulerSubsystem,
|
||||
Name: "e2e_scheduling_duration_seconds",
|
||||
Help: "E2e scheduling latency in seconds (scheduling algorithm + binding)",
|
||||
Name: "scheduling_attempt_duration_seconds",
|
||||
Help: "Scheduling attempt latency in seconds (scheduling algorithm + binding)",
|
||||
Buckets: metrics.ExponentialBuckets(0.001, 2, 15),
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
StabilityLevel: metrics.STABLE,
|
||||
}, []string{"result", "profile"})
|
||||
SchedulingAlgorithmLatency = metrics.NewHistogram(
|
||||
&metrics.HistogramOpts{
|
||||
@@ -71,21 +80,21 @@ var (
|
||||
Help: "Number of selected preemption victims",
|
||||
// we think #victims>50 is pretty rare, therefore [50, +Inf) is considered a single bucket.
|
||||
Buckets: metrics.LinearBuckets(5, 5, 10),
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
StabilityLevel: metrics.STABLE,
|
||||
})
|
||||
PreemptionAttempts = metrics.NewCounter(
|
||||
&metrics.CounterOpts{
|
||||
Subsystem: SchedulerSubsystem,
|
||||
Name: "preemption_attempts_total",
|
||||
Help: "Total preemption attempts in the cluster till now",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
StabilityLevel: metrics.STABLE,
|
||||
})
|
||||
pendingPods = metrics.NewGaugeVec(
|
||||
&metrics.GaugeOpts{
|
||||
Subsystem: SchedulerSubsystem,
|
||||
Name: "pending_pods",
|
||||
Help: "Number of pending pods, by the queue type. 'active' means number of pods in activeQ; 'backoff' means number of pods in backoffQ; 'unschedulable' means number of pods in unschedulableQ.",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
StabilityLevel: metrics.STABLE,
|
||||
}, []string{"queue"})
|
||||
SchedulerGoroutines = metrics.NewGaugeVec(
|
||||
&metrics.GaugeOpts{
|
||||
@@ -167,6 +176,7 @@ var (
|
||||
metricsList = []metrics.Registerable{
|
||||
scheduleAttempts,
|
||||
e2eSchedulingLatency,
|
||||
schedulingLatency,
|
||||
SchedulingAlgorithmLatency,
|
||||
PreemptionVictims,
|
||||
PreemptionAttempts,
|
||||
|
@@ -44,5 +44,6 @@ func PodScheduleError(profile string, duration float64) {
|
||||
|
||||
func observeScheduleAttemptAndLatency(result, profile string, duration float64) {
|
||||
e2eSchedulingLatency.WithLabelValues(result, profile).Observe(duration)
|
||||
schedulingLatency.WithLabelValues(result, profile).Observe(duration)
|
||||
scheduleAttempts.WithLabelValues(result, profile).Inc()
|
||||
}
|
||||
|
Reference in New Issue
Block a user