Add Pod Scheduling SLI Duration metric (#119049)

Signed-off-by: Heba Elayoty <hebaelayoty@gmail.com>
Co-authored-by: Aldo Culquicondor <1299064+alculquicondor@users.noreply.github.com>
This commit is contained in:
Heba Elayoty 2023-08-15 15:17:41 -07:00 committed by GitHub
parent 5c365939bd
commit 224087abfa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 50 additions and 2 deletions

View File

@ -113,14 +113,29 @@ var (
Help: "Number of running goroutines split by the work they do such as binding.", Help: "Number of running goroutines split by the work they do such as binding.",
StabilityLevel: metrics.ALPHA, StabilityLevel: metrics.ALPHA,
}, []string{"operation"}) }, []string{"operation"})
// PodSchedulingDuration is deprecated as of Kubernetes v1.28, and will be removed
// in v1.31. Please use PodSchedulingSLIDuration instead.
PodSchedulingDuration = metrics.NewHistogramVec( PodSchedulingDuration = metrics.NewHistogramVec(
&metrics.HistogramOpts{ &metrics.HistogramOpts{
Subsystem: SchedulerSubsystem, Subsystem: SchedulerSubsystem,
Name: "pod_scheduling_duration_seconds", Name: "pod_scheduling_duration_seconds",
Help: "E2e latency for a pod being scheduled which may include multiple scheduling attempts.", Help: "E2e latency for a pod being scheduled which may include multiple scheduling attempts.",
// Start with 10ms with the last bucket being [~88m, Inf). // Start with 10ms with the last bucket being [~88m, Inf).
Buckets: metrics.ExponentialBuckets(0.01, 2, 20),
StabilityLevel: metrics.STABLE,
DeprecatedVersion: "1.28.0",
},
[]string{"attempts"})
PodSchedulingSLIDuration = metrics.NewHistogramVec(
&metrics.HistogramOpts{
Subsystem: SchedulerSubsystem,
Name: "pod_scheduling_sli_duration_seconds",
Help: "E2e latency for a pod being scheduled, from the time the pod enters the scheduling queue an d might involve multiple scheduling attempts.",
// Start with 10ms with the last bucket being [~88m, Inf).
Buckets: metrics.ExponentialBuckets(0.01, 2, 20), Buckets: metrics.ExponentialBuckets(0.01, 2, 20),
StabilityLevel: metrics.STABLE, StabilityLevel: metrics.BETA,
}, },
[]string{"attempts"}) []string{"attempts"})
@ -206,6 +221,7 @@ var (
PreemptionAttempts, PreemptionAttempts,
pendingPods, pendingPods,
PodSchedulingDuration, PodSchedulingDuration,
PodSchedulingSLIDuration,
PodSchedulingAttempts, PodSchedulingAttempts,
FrameworkExtensionPointDuration, FrameworkExtensionPointDuration,
PluginExecutionDuration, PluginExecutionDuration,

View File

@ -288,6 +288,7 @@ func (sched *Scheduler) bindingCycle(
metrics.PodSchedulingAttempts.Observe(float64(assumedPodInfo.Attempts)) metrics.PodSchedulingAttempts.Observe(float64(assumedPodInfo.Attempts))
if assumedPodInfo.InitialAttemptTimestamp != nil { if assumedPodInfo.InitialAttemptTimestamp != nil {
metrics.PodSchedulingDuration.WithLabelValues(getAttemptsLabel(assumedPodInfo)).Observe(metrics.SinceInSeconds(*assumedPodInfo.InitialAttemptTimestamp)) metrics.PodSchedulingDuration.WithLabelValues(getAttemptsLabel(assumedPodInfo)).Observe(metrics.SinceInSeconds(*assumedPodInfo.InitialAttemptTimestamp))
metrics.PodSchedulingSLIDuration.WithLabelValues(getAttemptsLabel(assumedPodInfo)).Observe(metrics.SinceInSeconds(*assumedPodInfo.InitialAttemptTimestamp))
} }
// Run "postbind" plugins. // Run "postbind" plugins.
fwk.RunPostBindPlugins(ctx, state, assumedPod, scheduleResult.SuggestedHost) fwk.RunPostBindPlugins(ctx, state, assumedPod, scheduleResult.SuggestedHost)

View File

@ -74,6 +74,35 @@
stabilityLevel: STABLE stabilityLevel: STABLE
labels: labels:
- zone - zone
- name: pod_scheduling_sli_duration_seconds
subsystem: scheduler
help: E2e latency for a pod being scheduled, from the time the pod enters the scheduling
queue an d might involve multiple scheduling attempts.
type: Histogram
stabilityLevel: BETA
labels:
- attempts
buckets:
- 0.01
- 0.02
- 0.04
- 0.08
- 0.16
- 0.32
- 0.64
- 1.28
- 2.56
- 5.12
- 10.24
- 20.48
- 40.96
- 81.92
- 163.84
- 327.68
- 655.36
- 1310.72
- 2621.44
- 5242.88
- name: kube_pod_resource_limit - name: kube_pod_resource_limit
help: Resources limit for workloads on the cluster, broken down by pod. This shows help: Resources limit for workloads on the cluster, broken down by pod. This shows
the resource usage the scheduler and kubelet expect per pod for resources along the resource usage the scheduler and kubelet expect per pod for resources along
@ -151,6 +180,7 @@
help: E2e latency for a pod being scheduled which may include multiple scheduling help: E2e latency for a pod being scheduled which may include multiple scheduling
attempts. attempts.
type: Histogram type: Histogram
deprecatedVersion: 1.28.0
stabilityLevel: STABLE stabilityLevel: STABLE
labels: labels:
- attempts - attempts

View File

@ -102,7 +102,8 @@ var (
label: resultLabelName, label: resultLabelName,
values: []string{metrics.ScheduledResult, metrics.UnschedulableResult, metrics.ErrorResult}, values: []string{metrics.ScheduledResult, metrics.UnschedulableResult, metrics.ErrorResult},
}, },
"scheduler_pod_scheduling_duration_seconds": nil, "scheduler_pod_scheduling_duration_seconds": nil,
"scheduler_pod_scheduling_sli_duration_seconds": nil,
}, },
} }
) )