Add Pod Scheduling SLI Duration metric (#119049)

Signed-off-by: Heba Elayoty <hebaelayoty@gmail.com> Co-authored-by: Aldo Culquicondor <1299064+alculquicondor@users.noreply.github.com>
2025-09-23 10:58:42 +00:00 · 2023-08-15 15:17:41 -07:00
parent 5c365939bd
commit 224087abfa
4 changed files with 50 additions and 2 deletions
--- a/pkg/scheduler/metrics/metrics.go
+++ b/pkg/scheduler/metrics/metrics.go
@@ -113,14 +113,29 @@ var (
 			Help:           "Number of running goroutines split by the work they do such as binding.",
 			StabilityLevel: metrics.ALPHA,
 		}, []string{"operation"})
 	// PodSchedulingDuration is deprecated as of Kubernetes v1.28, and will be removed
 	// in v1.31. Please use PodSchedulingSLIDuration instead.
 	PodSchedulingDuration = metrics.NewHistogramVec(
 		&metrics.HistogramOpts{
 			Subsystem: SchedulerSubsystem,
 			Name:      "pod_scheduling_duration_seconds",
 			Help:      "E2e latency for a pod being scheduled which may include multiple scheduling attempts.",
 			// Start with 10ms with the last bucket being [~88m, Inf).
 			Buckets:           metrics.ExponentialBuckets(0.01, 2, 20),
 			StabilityLevel:    metrics.STABLE,
 			DeprecatedVersion: "1.28.0",
 		},
 		[]string{"attempts"})
 	PodSchedulingSLIDuration = metrics.NewHistogramVec(
 		&metrics.HistogramOpts{
 			Subsystem: SchedulerSubsystem,
 			Name:      "pod_scheduling_sli_duration_seconds",
 			Help:      "E2e latency for a pod being scheduled, from the time the pod enters the scheduling queue an d might involve multiple scheduling attempts.",
 			// Start with 10ms with the last bucket being [~88m, Inf).
 			Buckets:        metrics.ExponentialBuckets(0.01, 2, 20),
-			StabilityLevel: metrics.STABLE,
+			StabilityLevel: metrics.BETA,
 		},
 		[]string{"attempts"})
@@ -206,6 +221,7 @@ var (
 		PreemptionAttempts,
 		pendingPods,
 		PodSchedulingDuration,
 		PodSchedulingSLIDuration,
 		PodSchedulingAttempts,
 		FrameworkExtensionPointDuration,
 		PluginExecutionDuration,
--- a/pkg/scheduler/schedule_one.go
+++ b/pkg/scheduler/schedule_one.go
@@ -288,6 +288,7 @@ func (sched *Scheduler) bindingCycle(
 	metrics.PodSchedulingAttempts.Observe(float64(assumedPodInfo.Attempts))
 	if assumedPodInfo.InitialAttemptTimestamp != nil {
 		metrics.PodSchedulingDuration.WithLabelValues(getAttemptsLabel(assumedPodInfo)).Observe(metrics.SinceInSeconds(*assumedPodInfo.InitialAttemptTimestamp))
 		metrics.PodSchedulingSLIDuration.WithLabelValues(getAttemptsLabel(assumedPodInfo)).Observe(metrics.SinceInSeconds(*assumedPodInfo.InitialAttemptTimestamp))
 	}
 	// Run "postbind" plugins.
 	fwk.RunPostBindPlugins(ctx, state, assumedPod, scheduleResult.SuggestedHost)
--- a/test/instrumentation/testdata/stable-metrics-list.yaml
+++ b/test/instrumentation/testdata/stable-metrics-list.yaml
@@ -74,6 +74,35 @@
  stabilityLevel: STABLE
  labels:
  - zone
 - name: pod_scheduling_sli_duration_seconds
  subsystem: scheduler
  help: E2e latency for a pod being scheduled, from the time the pod enters the scheduling
    queue an d might involve multiple scheduling attempts.
  type: Histogram
  stabilityLevel: BETA
  labels:
  - attempts
  buckets:
  - 0.01
  - 0.02
  - 0.04
  - 0.08
  - 0.16
  - 0.32
  - 0.64
  - 1.28
  - 2.56
  - 5.12
  - 10.24
  - 20.48
  - 40.96
  - 81.92
  - 163.84
  - 327.68
  - 655.36
  - 1310.72
  - 2621.44
  - 5242.88
 - name: kube_pod_resource_limit
  help: Resources limit for workloads on the cluster, broken down by pod. This shows
    the resource usage the scheduler and kubelet expect per pod for resources along
@@ -151,6 +180,7 @@
  help: E2e latency for a pod being scheduled which may include multiple scheduling
    attempts.
  type: Histogram
  deprecatedVersion: 1.28.0
  stabilityLevel: STABLE
  labels:
  - attempts
--- a/test/integration/scheduler_perf/scheduler_perf_test.go
+++ b/test/integration/scheduler_perf/scheduler_perf_test.go
@@ -102,7 +102,8 @@ var (
 				label:  resultLabelName,
 				values: []string{metrics.ScheduledResult, metrics.UnschedulableResult, metrics.ErrorResult},
 			},
-			"scheduler_pod_scheduling_duration_seconds": nil,
+			"scheduler_pod_scheduling_duration_seconds":     nil,
 			"scheduler_pod_scheduling_sli_duration_seconds": nil,
 		},
 	}
 )