mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-30 15:05:27 +00:00
Merge pull request #105941 from rezakrimi/issue/105861
Make some scheduler metrics stable
This commit is contained in:
commit
c2706035f2
@ -1450,7 +1450,7 @@ func TestPendingPodsMetric(t *testing.T) {
|
||||
},
|
||||
metricsName: "scheduler_pending_pods",
|
||||
wants: `
|
||||
# HELP scheduler_pending_pods [ALPHA] Number of pending pods, by the queue type. 'active' means number of pods in activeQ; 'backoff' means number of pods in backoffQ; 'unschedulable' means number of pods in unschedulableQ.
|
||||
# HELP scheduler_pending_pods [STABLE] Number of pending pods, by the queue type. 'active' means number of pods in activeQ; 'backoff' means number of pods in backoffQ; 'unschedulable' means number of pods in unschedulableQ.
|
||||
# TYPE scheduler_pending_pods gauge
|
||||
scheduler_pending_pods{queue="active"} 30
|
||||
scheduler_pending_pods{queue="backoff"} 0
|
||||
@ -1471,7 +1471,7 @@ scheduler_pending_pods{queue="unschedulable"} 20
|
||||
},
|
||||
metricsName: "scheduler_pending_pods",
|
||||
wants: `
|
||||
# HELP scheduler_pending_pods [ALPHA] Number of pending pods, by the queue type. 'active' means number of pods in activeQ; 'backoff' means number of pods in backoffQ; 'unschedulable' means number of pods in unschedulableQ.
|
||||
# HELP scheduler_pending_pods [STABLE] Number of pending pods, by the queue type. 'active' means number of pods in activeQ; 'backoff' means number of pods in backoffQ; 'unschedulable' means number of pods in unschedulableQ.
|
||||
# TYPE scheduler_pending_pods gauge
|
||||
scheduler_pending_pods{queue="active"} 15
|
||||
scheduler_pending_pods{queue="backoff"} 25
|
||||
@ -1492,7 +1492,7 @@ scheduler_pending_pods{queue="unschedulable"} 10
|
||||
},
|
||||
metricsName: "scheduler_pending_pods",
|
||||
wants: `
|
||||
# HELP scheduler_pending_pods [ALPHA] Number of pending pods, by the queue type. 'active' means number of pods in activeQ; 'backoff' means number of pods in backoffQ; 'unschedulable' means number of pods in unschedulableQ.
|
||||
# HELP scheduler_pending_pods [STABLE] Number of pending pods, by the queue type. 'active' means number of pods in activeQ; 'backoff' means number of pods in backoffQ; 'unschedulable' means number of pods in unschedulableQ.
|
||||
# TYPE scheduler_pending_pods gauge
|
||||
scheduler_pending_pods{queue="active"} 50
|
||||
scheduler_pending_pods{queue="backoff"} 0
|
||||
@ -1515,7 +1515,7 @@ scheduler_pending_pods{queue="unschedulable"} 0
|
||||
},
|
||||
metricsName: "scheduler_pending_pods",
|
||||
wants: `
|
||||
# HELP scheduler_pending_pods [ALPHA] Number of pending pods, by the queue type. 'active' means number of pods in activeQ; 'backoff' means number of pods in backoffQ; 'unschedulable' means number of pods in unschedulableQ.
|
||||
# HELP scheduler_pending_pods [STABLE] Number of pending pods, by the queue type. 'active' means number of pods in activeQ; 'backoff' means number of pods in backoffQ; 'unschedulable' means number of pods in unschedulableQ.
|
||||
# TYPE scheduler_pending_pods gauge
|
||||
scheduler_pending_pods{queue="active"} 30
|
||||
scheduler_pending_pods{queue="backoff"} 20
|
||||
@ -1538,7 +1538,7 @@ scheduler_pending_pods{queue="unschedulable"} 0
|
||||
},
|
||||
metricsName: "scheduler_pending_pods",
|
||||
wants: `
|
||||
# HELP scheduler_pending_pods [ALPHA] Number of pending pods, by the queue type. 'active' means number of pods in activeQ; 'backoff' means number of pods in backoffQ; 'unschedulable' means number of pods in unschedulableQ.
|
||||
# HELP scheduler_pending_pods [STABLE] Number of pending pods, by the queue type. 'active' means number of pods in activeQ; 'backoff' means number of pods in backoffQ; 'unschedulable' means number of pods in unschedulableQ.
|
||||
# TYPE scheduler_pending_pods gauge
|
||||
scheduler_pending_pods{queue="active"} 50
|
||||
scheduler_pending_pods{queue="backoff"} 0
|
||||
|
@ -44,16 +44,25 @@ var (
|
||||
Subsystem: SchedulerSubsystem,
|
||||
Name: "schedule_attempts_total",
|
||||
Help: "Number of attempts to schedule pods, by the result. 'unschedulable' means a pod could not be scheduled, while 'error' means an internal scheduler problem.",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
StabilityLevel: metrics.STABLE,
|
||||
}, []string{"result", "profile"})
|
||||
|
||||
e2eSchedulingLatency = metrics.NewHistogramVec(
|
||||
&metrics.HistogramOpts{
|
||||
Subsystem: SchedulerSubsystem,
|
||||
Name: "e2e_scheduling_duration_seconds",
|
||||
DeprecatedVersion: "1.23.0",
|
||||
Help: "E2e scheduling latency in seconds (scheduling algorithm + binding). This metric is replaced by scheduling_attempt_duration_seconds.",
|
||||
Buckets: metrics.ExponentialBuckets(0.001, 2, 15),
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
}, []string{"result", "profile"})
|
||||
schedulingLatency = metrics.NewHistogramVec(
|
||||
&metrics.HistogramOpts{
|
||||
Subsystem: SchedulerSubsystem,
|
||||
Name: "e2e_scheduling_duration_seconds",
|
||||
Help: "E2e scheduling latency in seconds (scheduling algorithm + binding)",
|
||||
Name: "scheduling_attempt_duration_seconds",
|
||||
Help: "Scheduling attempt latency in seconds (scheduling algorithm + binding)",
|
||||
Buckets: metrics.ExponentialBuckets(0.001, 2, 15),
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
StabilityLevel: metrics.STABLE,
|
||||
}, []string{"result", "profile"})
|
||||
SchedulingAlgorithmLatency = metrics.NewHistogram(
|
||||
&metrics.HistogramOpts{
|
||||
@ -71,21 +80,21 @@ var (
|
||||
Help: "Number of selected preemption victims",
|
||||
// we think #victims>50 is pretty rare, therefore [50, +Inf) is considered a single bucket.
|
||||
Buckets: metrics.LinearBuckets(5, 5, 10),
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
StabilityLevel: metrics.STABLE,
|
||||
})
|
||||
PreemptionAttempts = metrics.NewCounter(
|
||||
&metrics.CounterOpts{
|
||||
Subsystem: SchedulerSubsystem,
|
||||
Name: "preemption_attempts_total",
|
||||
Help: "Total preemption attempts in the cluster till now",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
StabilityLevel: metrics.STABLE,
|
||||
})
|
||||
pendingPods = metrics.NewGaugeVec(
|
||||
&metrics.GaugeOpts{
|
||||
Subsystem: SchedulerSubsystem,
|
||||
Name: "pending_pods",
|
||||
Help: "Number of pending pods, by the queue type. 'active' means number of pods in activeQ; 'backoff' means number of pods in backoffQ; 'unschedulable' means number of pods in unschedulableQ.",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
StabilityLevel: metrics.STABLE,
|
||||
}, []string{"queue"})
|
||||
SchedulerGoroutines = metrics.NewGaugeVec(
|
||||
&metrics.GaugeOpts{
|
||||
@ -167,6 +176,7 @@ var (
|
||||
metricsList = []metrics.Registerable{
|
||||
scheduleAttempts,
|
||||
e2eSchedulingLatency,
|
||||
schedulingLatency,
|
||||
SchedulingAlgorithmLatency,
|
||||
PreemptionVictims,
|
||||
PreemptionAttempts,
|
||||
|
@ -44,5 +44,6 @@ func PodScheduleError(profile string, duration float64) {
|
||||
|
||||
func observeScheduleAttemptAndLatency(result, profile string, duration float64) {
|
||||
e2eSchedulingLatency.WithLabelValues(result, profile).Observe(duration)
|
||||
schedulingLatency.WithLabelValues(result, profile).Observe(duration)
|
||||
scheduleAttempts.WithLabelValues(result, profile).Inc()
|
||||
}
|
||||
|
@ -1,3 +1,66 @@
|
||||
- name: pending_pods
|
||||
subsystem: scheduler
|
||||
help: Number of pending pods, by the queue type. 'active' means number of pods in
|
||||
activeQ; 'backoff' means number of pods in backoffQ; 'unschedulable' means number
|
||||
of pods in unschedulableQ.
|
||||
type: Gauge
|
||||
stabilityLevel: STABLE
|
||||
labels:
|
||||
- queue
|
||||
- name: preemption_attempts_total
|
||||
subsystem: scheduler
|
||||
help: Total preemption attempts in the cluster till now
|
||||
type: Counter
|
||||
stabilityLevel: STABLE
|
||||
- name: preemption_victims
|
||||
subsystem: scheduler
|
||||
help: Number of selected preemption victims
|
||||
type: Histogram
|
||||
stabilityLevel: STABLE
|
||||
buckets:
|
||||
- 5
|
||||
- 10
|
||||
- 15
|
||||
- 20
|
||||
- 25
|
||||
- 30
|
||||
- 35
|
||||
- 40
|
||||
- 45
|
||||
- 50
|
||||
- name: schedule_attempts_total
|
||||
subsystem: scheduler
|
||||
help: Number of attempts to schedule pods, by the result. 'unschedulable' means
|
||||
a pod could not be scheduled, while 'error' means an internal scheduler problem.
|
||||
type: Counter
|
||||
stabilityLevel: STABLE
|
||||
labels:
|
||||
- profile
|
||||
- result
|
||||
- name: scheduling_attempt_duration_seconds
|
||||
subsystem: scheduler
|
||||
help: Scheduling attempt latency in seconds (scheduling algorithm + binding)
|
||||
type: Histogram
|
||||
stabilityLevel: STABLE
|
||||
labels:
|
||||
- profile
|
||||
- result
|
||||
buckets:
|
||||
- 0.001
|
||||
- 0.002
|
||||
- 0.004
|
||||
- 0.008
|
||||
- 0.016
|
||||
- 0.032
|
||||
- 0.064
|
||||
- 0.128
|
||||
- 0.256
|
||||
- 0.512
|
||||
- 1.024
|
||||
- 2.048
|
||||
- 4.096
|
||||
- 8.192
|
||||
- 16.384
|
||||
- name: apiserver_request_duration_seconds
|
||||
help: Response latency distribution in seconds for each verb, dry run value, group,
|
||||
version, resource, subresource, scope and component.
|
||||
|
Loading…
Reference in New Issue
Block a user