sort stability list of metrics in the shell command (#120197)

* sort stability list of metrics in the shell command * remove used vars * fix verify errors
2025-08-01 07:47:56 +00:00 · 2023-08-28 04:43:54 -07:00 · 2023-08-28 04:43:54 -07:00 · dbbce2aaba
commit dbbce2aaba
parent faf1b5d655
3 changed files with 488 additions and 391 deletions
--- a/test/instrumentation/sort/main.go
+++ b/test/instrumentation/sort/main.go
@ -0,0 +1,86 @@
+/*
+Copyright 2023 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package main
+
+import (
+	"fmt"
+	"os"
+	"sort"
+
+	flag "github.com/spf13/pflag"
+	"gopkg.in/yaml.v2"
+	"k8s.io/component-base/metrics"
+)
+
+func main() {
+	var sortFile string
+	flag.StringVar(&sortFile, "sort-file", "", "file of metrics to sort")
+	flag.Parse()
+	dat, err := os.ReadFile(sortFile)
+	if err == nil {
+		var parsedMetrics []metric
+		err = yaml.Unmarshal(dat, &parsedMetrics)
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "%s\n", err)
+			os.Exit(1)
+		}
+		sort.Sort(byFQName(parsedMetrics))
+		data, err := yaml.Marshal(parsedMetrics)
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "%s\n", err)
+			os.Exit(1)
+		}
+
+		fmt.Print(string(data))
+	}
+}
+
+type metric struct {
+	Name              string              `yaml:"name" json:"name"`
+	Subsystem         string              `yaml:"subsystem,omitempty" json:"subsystem,omitempty"`
+	Namespace         string              `yaml:"namespace,omitempty" json:"namespace,omitempty"`
+	Help              string              `yaml:"help,omitempty" json:"help,omitempty"`
+	Type              string              `yaml:"type,omitempty" json:"type,omitempty"`
+	DeprecatedVersion string              `yaml:"deprecatedVersion,omitempty" json:"deprecatedVersion,omitempty"`
+	StabilityLevel    string              `yaml:"stabilityLevel,omitempty" json:"stabilityLevel,omitempty"`
+	Labels            []string            `yaml:"labels,omitempty" json:"labels,omitempty"`
+	Buckets           []float64           `yaml:"buckets,omitempty" json:"buckets,omitempty"`
+	Objectives        map[float64]float64 `yaml:"objectives,omitempty" json:"objectives,omitempty"`
+	AgeBuckets        uint32              `yaml:"ageBuckets,omitempty" json:"ageBuckets,omitempty"`
+	BufCap            uint32              `yaml:"bufCap,omitempty" json:"bufCap,omitempty"`
+	MaxAge            int64               `yaml:"maxAge,omitempty" json:"maxAge,omitempty"`
+	ConstLabels       map[string]string   `yaml:"constLabels,omitempty" json:"constLabels,omitempty"`
+}
+
+func (m metric) BuildFQName() string {
+	return metrics.BuildFQName(m.Namespace, m.Subsystem, m.Name)
+}
+
+type byFQName []metric
+
+func (ms byFQName) Len() int { return len(ms) }
+func (ms byFQName) Less(i, j int) bool {
+	if ms[i].StabilityLevel < ms[j].StabilityLevel {
+		return true
+	} else if ms[i].StabilityLevel > ms[j].StabilityLevel {
+		return false
+	}
+	return ms[i].BuildFQName() < ms[j].BuildFQName()
+}
+func (ms byFQName) Swap(i, j int) {
+	ms[i], ms[j] = ms[j], ms[i]
+}
--- a/test/instrumentation/stability-utils.sh
+++ b/test/instrumentation/stability-utils.sh
@ -59,6 +59,7 @@ reset=$(tput sgr0)
 function kube::validate::stablemetrics() {
  stability_check_setup
  temp_file=$(mktemp)
+  temp_file2=$(mktemp)
  doValidate=$(find_files_to_check -z \
      | sort -z \
      | KUBE_ROOT=${KUBE_ROOT} xargs -0 -L 200 \
@ -73,12 +74,16 @@ function kube::validate::stablemetrics() {

  if $doValidate; then
    echo -e "${green}Diffing test/instrumentation/testdata/stable-metrics-list.yaml\n${reset}"
-    if diff -u "$KUBE_ROOT/test/instrumentation/testdata/stable-metrics-list.yaml" "$temp_file"; then
-      echo -e "${green}\nPASS metrics stability verification ${reset}"
-      return 0
-    fi
  fi
-
+  doSort=$(KUBE_ROOT=${KUBE_ROOT} go run "test/instrumentation/sort/main.go" --sort-file="${temp_file}" 1>"${temp_file2}")
+  if ! $doSort; then
+    echo "${red}!!! sorting metrics has failed! ${reset}" >&2
+    exit 1
+  fi
+  if diff -u "$KUBE_ROOT/test/instrumentation/testdata/stable-metrics-list.yaml" "$temp_file2"; then
+    echo -e "${green}\nPASS metrics stability verification ${reset}"
+    return 0
+  fi
  echo "${red}!!! Metrics Stability static analysis has failed!${reset}" >&2
  echo "${red}!!! Please run ./hack/update-generated-stable-metrics.sh to update the golden list.${reset}" >&2
  exit 1
@ -115,7 +120,7 @@ function kube::validate::test::stablemetrics() {
 function kube::update::stablemetrics() {
  stability_check_setup
  temp_file=$(mktemp)
-
+  temp_file2=$(mktemp)
  doCheckStability=$(find_files_to_check -z \
      | sort -z \
      | KUBE_ROOT=${KUBE_ROOT} xargs -0 -L 200 \
@ -133,6 +138,12 @@ function kube::update::stablemetrics() {
    exit 1
  fi
  mv -f "$temp_file" "${KUBE_ROOT}/test/instrumentation/testdata/stable-metrics-list.yaml"
+  doSort=$(go run "test/instrumentation/sort/main.go" --sort-file="${KUBE_ROOT}/test/instrumentation/testdata/stable-metrics-list.yaml" 1>"${temp_file2}")
+  if ! $doSort; then
+    echo "${red}!!! sorting metrics has failed! ${reset}" >&2
+    exit 1
+  fi
+  mv -f "$temp_file2" "${KUBE_ROOT}/test/instrumentation/testdata/stable-metrics-list.yaml"
  echo "${green}Updated golden list of stable metrics.${reset}"
 }

--- a/test/instrumentation/testdata/stable-metrics-list.yaml
+++ b/test/instrumentation/testdata/stable-metrics-list.yaml
@ -1,129 +1,126 @@
- name: job_creation_skew_duration_seconds
-  subsystem: cronjob_controller
-  help: Time between when a cronjob is scheduled to be run, and when the corresponding
-    job is created
+- name: current_executing_requests
+  subsystem: flowcontrol
+  namespace: apiserver
+  help: Number of requests in initial (for a WATCH) or any (for a non-WATCH) execution
+    stage in the API Priority and Fairness subsystem
+  type: Gauge
+  stabilityLevel: BETA
+  labels:
+  - flow_schema
+  - priority_level
+- name: current_executing_seats
+  subsystem: flowcontrol
+  namespace: apiserver
+  help: Concurrency (number of seats) occupied by the currently executing (initial
+    stage for a WATCH, any stage otherwise) requests in the API Priority and Fairness
+    subsystem
+  type: Gauge
+  stabilityLevel: BETA
+  labels:
+  - flow_schema
+  - priority_level
+- name: current_inqueue_requests
+  subsystem: flowcontrol
+  namespace: apiserver
+  help: Number of requests currently pending in queues of the API Priority and Fairness
+    subsystem
+  type: Gauge
+  stabilityLevel: BETA
+  labels:
+  - flow_schema
+  - priority_level
+- name: dispatched_requests_total
+  subsystem: flowcontrol
+  namespace: apiserver
+  help: Number of requests executed by API Priority and Fairness subsystem
+  type: Counter
+  stabilityLevel: BETA
+  labels:
+  - flow_schema
+  - priority_level
+- name: nominal_limit_seats
+  subsystem: flowcontrol
+  namespace: apiserver
+  help: Nominal number of execution seats configured for each priority level
+  type: Gauge
+  stabilityLevel: BETA
+  labels:
+  - priority_level
+- name: rejected_requests_total
+  subsystem: flowcontrol
+  namespace: apiserver
+  help: Number of requests rejected by API Priority and Fairness subsystem
+  type: Counter
+  stabilityLevel: BETA
+  labels:
+  - flow_schema
+  - priority_level
+  - reason
+- name: request_wait_duration_seconds
+  subsystem: flowcontrol
+  namespace: apiserver
+  help: Length of time a request spent waiting in its queue
  type: Histogram
-  stabilityLevel: STABLE
+  stabilityLevel: BETA
+  labels:
+  - execute
+  - flow_schema
+  - priority_level
  buckets:
+  - 0
+  - 0.005
+  - 0.02
+  - 0.05
+  - 0.1
+  - 0.2
+  - 0.5
  - 1
  - 2
-  - 4
-  - 8
-  - 16
-  - 32
-  - 64
-  - 128
-  - 256
-  - 512
- name: job_pods_finished_total
-  subsystem: job_controller
-  help: The number of finished Pods that are fully tracked
+  - 5
+  - 10
+  - 15
+  - 30
+- name: disabled_metrics_total
+  help: The count of disabled metrics.
  type: Counter
-  stabilityLevel: STABLE
-  labels:
-  - completion_mode
-  - result
- name: job_sync_duration_seconds
-  subsystem: job_controller
-  help: The time it took to sync a job
-  type: Histogram
-  stabilityLevel: STABLE
-  labels:
-  - action
-  - completion_mode
-  - result
-  buckets:
-  - 0.001
-  - 0.002
-  - 0.004
-  - 0.008
-  - 0.016
-  - 0.032
-  - 0.064
-  - 0.128
-  - 0.256
-  - 0.512
-  - 1.024
-  - 2.048
-  - 4.096
-  - 8.192
-  - 16.384
- name: job_syncs_total
-  subsystem: job_controller
-  help: The number of job syncs
+  stabilityLevel: BETA
+- name: hidden_metrics_total
+  help: The count of hidden metrics.
  type: Counter
-  stabilityLevel: STABLE
+  stabilityLevel: BETA
+- name: feature_enabled
+  namespace: kubernetes
+  help: This metric records the data about the stage and enablement of a k8s feature.
+  type: Gauge
+  stabilityLevel: BETA
  labels:
-  - action
-  - completion_mode
-  - result
- name: jobs_finished_total
-  subsystem: job_controller
-  help: The number of finished jobs
+  - name
+  - stage
+- name: healthcheck
+  namespace: kubernetes
+  help: This metric records the result of a single healthcheck.
+  type: Gauge
+  stabilityLevel: BETA
+  labels:
+  - name
+  - type
+- name: healthchecks_total
+  namespace: kubernetes
+  help: This metric records the results of all healthcheck.
  type: Counter
-  stabilityLevel: STABLE
+  stabilityLevel: BETA
  labels:
-  - completion_mode
-  - reason
-  - result
- name: evictions_total
-  subsystem: node_collector
-  help: Number of Node evictions that happened since current instance of NodeController
-    started.
+  - name
+  - status
+  - type
+- name: registered_metrics_total
+  help: The count of registered metrics broken by stability level and deprecation
+    version.
  type: Counter
-  stabilityLevel: STABLE
+  stabilityLevel: BETA
  labels:
-  - zone
- name: container_cpu_usage_seconds_total
-  help: Cumulative cpu time consumed by the container in core-seconds
-  type: Custom
-  stabilityLevel: STABLE
-  labels:
-  - container
-  - pod
-  - namespace
- name: container_memory_working_set_bytes
-  help: Current working set of the container in bytes
-  type: Custom
-  stabilityLevel: STABLE
-  labels:
-  - container
-  - pod
-  - namespace
- name: container_start_time_seconds
-  help: Start time of the container since unix epoch in seconds
-  type: Custom
-  stabilityLevel: STABLE
-  labels:
-  - container
-  - pod
-  - namespace
- name: node_cpu_usage_seconds_total
-  help: Cumulative cpu time consumed by the node in core-seconds
-  type: Custom
-  stabilityLevel: STABLE
- name: node_memory_working_set_bytes
-  help: Current working set of the node in bytes
-  type: Custom
-  stabilityLevel: STABLE
- name: pod_cpu_usage_seconds_total
-  help: Cumulative cpu time consumed by the pod in core-seconds
-  type: Custom
-  stabilityLevel: STABLE
-  labels:
-  - pod
-  - namespace
- name: pod_memory_working_set_bytes
-  help: Current working set of the pod in bytes
-  type: Custom
-  stabilityLevel: STABLE
-  labels:
-  - pod
-  - namespace
- name: resource_scrape_error
-  help: 1 if there was an error while getting container metrics, 0 otherwise
-  type: Custom
-  stabilityLevel: STABLE
+  - deprecated_version
+  - stability_level
 - name: pod_scheduling_sli_duration_seconds
  subsystem: scheduler
  help: E2e latency for a pod being scheduled, from the time the pod enters the scheduling
@ -153,167 +150,6 @@
  - 1310.72
  - 2621.44
  - 5242.88
- name: kube_pod_resource_limit
-  help: Resources limit for workloads on the cluster, broken down by pod. This shows
-    the resource usage the scheduler and kubelet expect per pod for resources along
-    with the unit for the resource if any.
-  type: Custom
-  stabilityLevel: STABLE
-  labels:
-  - namespace
-  - pod
-  - node
-  - scheduler
-  - priority
-  - resource
-  - unit
- name: kube_pod_resource_request
-  help: Resources requested by workloads on the cluster, broken down by pod. This
-    shows the resource usage the scheduler and kubelet expect per pod for resources
-    along with the unit for the resource if any.
-  type: Custom
-  stabilityLevel: STABLE
-  labels:
-  - namespace
-  - pod
-  - node
-  - scheduler
-  - priority
-  - resource
-  - unit
- name: framework_extension_point_duration_seconds
-  subsystem: scheduler
-  help: Latency for running all plugins of a specific extension point.
-  type: Histogram
-  stabilityLevel: STABLE
-  labels:
-  - extension_point
-  - profile
-  - status
-  buckets:
-  - 0.0001
-  - 0.0002
-  - 0.0004
-  - 0.0008
-  - 0.0016
-  - 0.0032
-  - 0.0064
-  - 0.0128
-  - 0.0256
-  - 0.0512
-  - 0.1024
-  - 0.2048
- name: pending_pods
-  subsystem: scheduler
-  help: Number of pending pods, by the queue type. 'active' means number of pods in
-    activeQ; 'backoff' means number of pods in backoffQ; 'unschedulable' means number
-    of pods in unschedulablePods that the scheduler attempted to schedule and failed;
-    'gated' is the number of unschedulable pods that the scheduler never attempted
-    to schedule because they are gated.
-  type: Gauge
-  stabilityLevel: STABLE
-  labels:
-  - queue
- name: pod_scheduling_attempts
-  subsystem: scheduler
-  help: Number of attempts to successfully schedule a pod.
-  type: Histogram
-  stabilityLevel: STABLE
-  buckets:
-  - 1
-  - 2
-  - 4
-  - 8
-  - 16
- name: pod_scheduling_duration_seconds
-  subsystem: scheduler
-  help: E2e latency for a pod being scheduled which may include multiple scheduling
-    attempts.
-  type: Histogram
-  deprecatedVersion: 1.28.0
-  stabilityLevel: STABLE
-  labels:
-  - attempts
-  buckets:
-  - 0.01
-  - 0.02
-  - 0.04
-  - 0.08
-  - 0.16
-  - 0.32
-  - 0.64
-  - 1.28
-  - 2.56
-  - 5.12
-  - 10.24
-  - 20.48
-  - 40.96
-  - 81.92
-  - 163.84
-  - 327.68
-  - 655.36
-  - 1310.72
-  - 2621.44
-  - 5242.88
- name: preemption_attempts_total
-  subsystem: scheduler
-  help: Total preemption attempts in the cluster till now
-  type: Counter
-  stabilityLevel: STABLE
- name: preemption_victims
-  subsystem: scheduler
-  help: Number of selected preemption victims
-  type: Histogram
-  stabilityLevel: STABLE
-  buckets:
-  - 1
-  - 2
-  - 4
-  - 8
-  - 16
-  - 32
-  - 64
- name: queue_incoming_pods_total
-  subsystem: scheduler
-  help: Number of pods added to scheduling queues by event and queue type.
-  type: Counter
-  stabilityLevel: STABLE
-  labels:
-  - event
-  - queue
- name: schedule_attempts_total
-  subsystem: scheduler
-  help: Number of attempts to schedule pods, by the result. 'unschedulable' means
-    a pod could not be scheduled, while 'error' means an internal scheduler problem.
-  type: Counter
-  stabilityLevel: STABLE
-  labels:
-  - profile
-  - result
- name: scheduling_attempt_duration_seconds
-  subsystem: scheduler
-  help: Scheduling attempt latency in seconds (scheduling algorithm + binding)
-  type: Histogram
-  stabilityLevel: STABLE
-  labels:
-  - profile
-  - result
-  buckets:
-  - 0.001
-  - 0.002
-  - 0.004
-  - 0.008
-  - 0.016
-  - 0.032
-  - 0.064
-  - 0.128
-  - 0.256
-  - 0.512
-  - 1.024
-  - 2.048
-  - 4.096
-  - 8.192
-  - 16.384
 - name: controller_admission_duration_seconds
  subsystem: admission
  namespace: apiserver
@ -489,126 +325,290 @@
  stabilityLevel: STABLE
  labels:
  - resource
- name: current_executing_requests
-  subsystem: flowcontrol
-  namespace: apiserver
-  help: Number of requests in initial (for a WATCH) or any (for a non-WATCH) execution
-    stage in the API Priority and Fairness subsystem
-  type: Gauge
-  stabilityLevel: BETA
+- name: container_cpu_usage_seconds_total
+  help: Cumulative cpu time consumed by the container in core-seconds
+  type: Custom
+  stabilityLevel: STABLE
  labels:
-  - flow_schema
-  - priority_level
- name: current_executing_seats
-  subsystem: flowcontrol
-  namespace: apiserver
-  help: Concurrency (number of seats) occupied by the currently executing (initial
-    stage for a WATCH, any stage otherwise) requests in the API Priority and Fairness
-    subsystem
-  type: Gauge
-  stabilityLevel: BETA
+  - container
+  - pod
+  - namespace
+- name: container_memory_working_set_bytes
+  help: Current working set of the container in bytes
+  type: Custom
+  stabilityLevel: STABLE
  labels:
-  - flow_schema
-  - priority_level
- name: current_inqueue_requests
-  subsystem: flowcontrol
-  namespace: apiserver
-  help: Number of requests currently pending in queues of the API Priority and Fairness
-    subsystem
-  type: Gauge
-  stabilityLevel: BETA
+  - container
+  - pod
+  - namespace
+- name: container_start_time_seconds
+  help: Start time of the container since unix epoch in seconds
+  type: Custom
+  stabilityLevel: STABLE
  labels:
-  - flow_schema
-  - priority_level
- name: dispatched_requests_total
-  subsystem: flowcontrol
-  namespace: apiserver
-  help: Number of requests executed by API Priority and Fairness subsystem
-  type: Counter
-  stabilityLevel: BETA
-  labels:
-  - flow_schema
-  - priority_level
- name: nominal_limit_seats
-  subsystem: flowcontrol
-  namespace: apiserver
-  help: Nominal number of execution seats configured for each priority level
-  type: Gauge
-  stabilityLevel: BETA
-  labels:
-  - priority_level
- name: rejected_requests_total
-  subsystem: flowcontrol
-  namespace: apiserver
-  help: Number of requests rejected by API Priority and Fairness subsystem
-  type: Counter
-  stabilityLevel: BETA
-  labels:
-  - flow_schema
-  - priority_level
-  - reason
- name: request_wait_duration_seconds
-  subsystem: flowcontrol
-  namespace: apiserver
-  help: Length of time a request spent waiting in its queue
+  - container
+  - pod
+  - namespace
+- name: job_creation_skew_duration_seconds
+  subsystem: cronjob_controller
+  help: Time between when a cronjob is scheduled to be run, and when the corresponding
+    job is created
  type: Histogram
-  stabilityLevel: BETA
-  labels:
-  - execute
-  - flow_schema
-  - priority_level
+  stabilityLevel: STABLE
  buckets:
-  - 0
-  - 0.005
-  - 0.02
-  - 0.05
-  - 0.1
-  - 0.2
-  - 0.5
  - 1
  - 2
-  - 5
-  - 10
-  - 15
-  - 30
- name: disabled_metrics_total
-  help: The count of disabled metrics.
+  - 4
+  - 8
+  - 16
+  - 32
+  - 64
+  - 128
+  - 256
+  - 512
+- name: job_pods_finished_total
+  subsystem: job_controller
+  help: The number of finished Pods that are fully tracked
  type: Counter
-  stabilityLevel: BETA
- name: hidden_metrics_total
-  help: The count of hidden metrics.
+  stabilityLevel: STABLE
+  labels:
+  - completion_mode
+  - result
+- name: job_sync_duration_seconds
+  subsystem: job_controller
+  help: The time it took to sync a job
+  type: Histogram
+  stabilityLevel: STABLE
+  labels:
+  - action
+  - completion_mode
+  - result
+  buckets:
+  - 0.001
+  - 0.002
+  - 0.004
+  - 0.008
+  - 0.016
+  - 0.032
+  - 0.064
+  - 0.128
+  - 0.256
+  - 0.512
+  - 1.024
+  - 2.048
+  - 4.096
+  - 8.192
+  - 16.384
+- name: job_syncs_total
+  subsystem: job_controller
+  help: The number of job syncs
  type: Counter
-  stabilityLevel: BETA
- name: feature_enabled
-  namespace: kubernetes
-  help: This metric records the data about the stage and enablement of a k8s feature.
-  type: Gauge
-  stabilityLevel: BETA
+  stabilityLevel: STABLE
  labels:
-  - name
-  - stage
- name: healthcheck
-  namespace: kubernetes
-  help: This metric records the result of a single healthcheck.
-  type: Gauge
-  stabilityLevel: BETA
-  labels:
-  - name
-  - type
- name: healthchecks_total
-  namespace: kubernetes
-  help: This metric records the results of all healthcheck.
+  - action
+  - completion_mode
+  - result
+- name: jobs_finished_total
+  subsystem: job_controller
+  help: The number of finished jobs
  type: Counter
-  stabilityLevel: BETA
+  stabilityLevel: STABLE
  labels:
-  - name
+  - completion_mode
+  - reason
+  - result
+- name: kube_pod_resource_limit
+  help: Resources limit for workloads on the cluster, broken down by pod. This shows
+    the resource usage the scheduler and kubelet expect per pod for resources along
+    with the unit for the resource if any.
+  type: Custom
+  stabilityLevel: STABLE
+  labels:
+  - namespace
+  - pod
+  - node
+  - scheduler
+  - priority
+  - resource
+  - unit
+- name: kube_pod_resource_request
+  help: Resources requested by workloads on the cluster, broken down by pod. This
+    shows the resource usage the scheduler and kubelet expect per pod for resources
+    along with the unit for the resource if any.
+  type: Custom
+  stabilityLevel: STABLE
+  labels:
+  - namespace
+  - pod
+  - node
+  - scheduler
+  - priority
+  - resource
+  - unit
+- name: evictions_total
+  subsystem: node_collector
+  help: Number of Node evictions that happened since current instance of NodeController
+    started.
+  type: Counter
+  stabilityLevel: STABLE
+  labels:
+  - zone
+- name: node_cpu_usage_seconds_total
+  help: Cumulative cpu time consumed by the node in core-seconds
+  type: Custom
+  stabilityLevel: STABLE
+- name: node_memory_working_set_bytes
+  help: Current working set of the node in bytes
+  type: Custom
+  stabilityLevel: STABLE
+- name: pod_cpu_usage_seconds_total
+  help: Cumulative cpu time consumed by the pod in core-seconds
+  type: Custom
+  stabilityLevel: STABLE
+  labels:
+  - pod
+  - namespace
+- name: pod_memory_working_set_bytes
+  help: Current working set of the pod in bytes
+  type: Custom
+  stabilityLevel: STABLE
+  labels:
+  - pod
+  - namespace
+- name: resource_scrape_error
+  help: 1 if there was an error while getting container metrics, 0 otherwise
+  type: Custom
+  stabilityLevel: STABLE
+- name: framework_extension_point_duration_seconds
+  subsystem: scheduler
+  help: Latency for running all plugins of a specific extension point.
+  type: Histogram
+  stabilityLevel: STABLE
+  labels:
+  - extension_point
+  - profile
  - status
-  - type
- name: registered_metrics_total
-  help: The count of registered metrics broken by stability level and deprecation
-    version.
-  type: Counter
-  stabilityLevel: BETA
+  buckets:
+  - 0.0001
+  - 0.0002
+  - 0.0004
+  - 0.0008
+  - 0.0016
+  - 0.0032
+  - 0.0064
+  - 0.0128
+  - 0.0256
+  - 0.0512
+  - 0.1024
+  - 0.2048
+- name: pending_pods
+  subsystem: scheduler
+  help: Number of pending pods, by the queue type. 'active' means number of pods in
+    activeQ; 'backoff' means number of pods in backoffQ; 'unschedulable' means number
+    of pods in unschedulablePods that the scheduler attempted to schedule and failed;
+    'gated' is the number of unschedulable pods that the scheduler never attempted
+    to schedule because they are gated.
+  type: Gauge
+  stabilityLevel: STABLE
  labels:
-  - deprecated_version
-  - stability_level
+  - queue
+- name: pod_scheduling_attempts
+  subsystem: scheduler
+  help: Number of attempts to successfully schedule a pod.
+  type: Histogram
+  stabilityLevel: STABLE
+  buckets:
+  - 1
+  - 2
+  - 4
+  - 8
+  - 16
+- name: pod_scheduling_duration_seconds
+  subsystem: scheduler
+  help: E2e latency for a pod being scheduled which may include multiple scheduling
+    attempts.
+  type: Histogram
+  deprecatedVersion: 1.28.0
+  stabilityLevel: STABLE
+  labels:
+  - attempts
+  buckets:
+  - 0.01
+  - 0.02
+  - 0.04
+  - 0.08
+  - 0.16
+  - 0.32
+  - 0.64
+  - 1.28
+  - 2.56
+  - 5.12
+  - 10.24
+  - 20.48
+  - 40.96
+  - 81.92
+  - 163.84
+  - 327.68
+  - 655.36
+  - 1310.72
+  - 2621.44
+  - 5242.88
+- name: preemption_attempts_total
+  subsystem: scheduler
+  help: Total preemption attempts in the cluster till now
+  type: Counter
+  stabilityLevel: STABLE
+- name: preemption_victims
+  subsystem: scheduler
+  help: Number of selected preemption victims
+  type: Histogram
+  stabilityLevel: STABLE
+  buckets:
+  - 1
+  - 2
+  - 4
+  - 8
+  - 16
+  - 32
+  - 64
+- name: queue_incoming_pods_total
+  subsystem: scheduler
+  help: Number of pods added to scheduling queues by event and queue type.
+  type: Counter
+  stabilityLevel: STABLE
+  labels:
+  - event
+  - queue
+- name: schedule_attempts_total
+  subsystem: scheduler
+  help: Number of attempts to schedule pods, by the result. 'unschedulable' means
+    a pod could not be scheduled, while 'error' means an internal scheduler problem.
+  type: Counter
+  stabilityLevel: STABLE
+  labels:
+  - profile
+  - result
+- name: scheduling_attempt_duration_seconds
+  subsystem: scheduler
+  help: Scheduling attempt latency in seconds (scheduling algorithm + binding)
+  type: Histogram
+  stabilityLevel: STABLE
+  labels:
+  - profile
+  - result
+  buckets:
+  - 0.001
+  - 0.002
+  - 0.004
+  - 0.008
+  - 0.016
+  - 0.032
+  - 0.064
+  - 0.128
+  - 0.256
+  - 0.512
+  - 1.024
+  - 2.048
+  - 4.096
+  - 8.192
+  - 16.384