Add Pod Scheduling SLI Duration metric (#119049)

Signed-off-by: Heba Elayoty <hebaelayoty@gmail.com>
Co-authored-by: Aldo Culquicondor <1299064+alculquicondor@users.noreply.github.com>
This commit is contained in:
Heba Elayoty 2023-08-15 15:17:41 -07:00 committed by GitHub
parent 5c365939bd
commit 224087abfa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 50 additions and 2 deletions

View File

@ -113,14 +113,29 @@ var (
Help: "Number of running goroutines split by the work they do such as binding.",
StabilityLevel: metrics.ALPHA,
}, []string{"operation"})
// PodSchedulingDuration is deprecated as of Kubernetes v1.28, and will be removed
// in v1.31. Please use PodSchedulingSLIDuration instead.
PodSchedulingDuration = metrics.NewHistogramVec(
&metrics.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "pod_scheduling_duration_seconds",
Help: "E2e latency for a pod being scheduled which may include multiple scheduling attempts.",
// Start with 10ms with the last bucket being [~88m, Inf).
Buckets: metrics.ExponentialBuckets(0.01, 2, 20),
StabilityLevel: metrics.STABLE,
DeprecatedVersion: "1.28.0",
},
[]string{"attempts"})
PodSchedulingSLIDuration = metrics.NewHistogramVec(
&metrics.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "pod_scheduling_sli_duration_seconds",
Help: "E2e latency for a pod being scheduled, from the time the pod enters the scheduling queue an d might involve multiple scheduling attempts.",
// Start with 10ms with the last bucket being [~88m, Inf).
Buckets: metrics.ExponentialBuckets(0.01, 2, 20),
StabilityLevel: metrics.STABLE,
StabilityLevel: metrics.BETA,
},
[]string{"attempts"})
@ -206,6 +221,7 @@ var (
PreemptionAttempts,
pendingPods,
PodSchedulingDuration,
PodSchedulingSLIDuration,
PodSchedulingAttempts,
FrameworkExtensionPointDuration,
PluginExecutionDuration,

View File

@ -288,6 +288,7 @@ func (sched *Scheduler) bindingCycle(
metrics.PodSchedulingAttempts.Observe(float64(assumedPodInfo.Attempts))
if assumedPodInfo.InitialAttemptTimestamp != nil {
metrics.PodSchedulingDuration.WithLabelValues(getAttemptsLabel(assumedPodInfo)).Observe(metrics.SinceInSeconds(*assumedPodInfo.InitialAttemptTimestamp))
metrics.PodSchedulingSLIDuration.WithLabelValues(getAttemptsLabel(assumedPodInfo)).Observe(metrics.SinceInSeconds(*assumedPodInfo.InitialAttemptTimestamp))
}
// Run "postbind" plugins.
fwk.RunPostBindPlugins(ctx, state, assumedPod, scheduleResult.SuggestedHost)

View File

@ -74,6 +74,35 @@
stabilityLevel: STABLE
labels:
- zone
- name: pod_scheduling_sli_duration_seconds
subsystem: scheduler
help: E2e latency for a pod being scheduled, from the time the pod enters the scheduling
queue an d might involve multiple scheduling attempts.
type: Histogram
stabilityLevel: BETA
labels:
- attempts
buckets:
- 0.01
- 0.02
- 0.04
- 0.08
- 0.16
- 0.32
- 0.64
- 1.28
- 2.56
- 5.12
- 10.24
- 20.48
- 40.96
- 81.92
- 163.84
- 327.68
- 655.36
- 1310.72
- 2621.44
- 5242.88
- name: kube_pod_resource_limit
help: Resources limit for workloads on the cluster, broken down by pod. This shows
the resource usage the scheduler and kubelet expect per pod for resources along
@ -151,6 +180,7 @@
help: E2e latency for a pod being scheduled which may include multiple scheduling
attempts.
type: Histogram
deprecatedVersion: 1.28.0
stabilityLevel: STABLE
labels:
- attempts

View File

@ -102,7 +102,8 @@ var (
label: resultLabelName,
values: []string{metrics.ScheduledResult, metrics.UnschedulableResult, metrics.ErrorResult},
},
"scheduler_pod_scheduling_duration_seconds": nil,
"scheduler_pod_scheduling_duration_seconds": nil,
"scheduler_pod_scheduling_sli_duration_seconds": nil,
},
}
)