mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-01 07:47:56 +00:00
sort stability list of metrics in the shell command (#120197)
* sort stability list of metrics in the shell command * remove used vars * fix verify errors
This commit is contained in:
parent
faf1b5d655
commit
dbbce2aaba
86
test/instrumentation/sort/main.go
Normal file
86
test/instrumentation/sort/main.go
Normal file
@ -0,0 +1,86 @@
|
||||
/*
|
||||
Copyright 2023 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"sort"
|
||||
|
||||
flag "github.com/spf13/pflag"
|
||||
"gopkg.in/yaml.v2"
|
||||
"k8s.io/component-base/metrics"
|
||||
)
|
||||
|
||||
func main() {
|
||||
var sortFile string
|
||||
flag.StringVar(&sortFile, "sort-file", "", "file of metrics to sort")
|
||||
flag.Parse()
|
||||
dat, err := os.ReadFile(sortFile)
|
||||
if err == nil {
|
||||
var parsedMetrics []metric
|
||||
err = yaml.Unmarshal(dat, &parsedMetrics)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "%s\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
sort.Sort(byFQName(parsedMetrics))
|
||||
data, err := yaml.Marshal(parsedMetrics)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "%s\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
fmt.Print(string(data))
|
||||
}
|
||||
}
|
||||
|
||||
type metric struct {
|
||||
Name string `yaml:"name" json:"name"`
|
||||
Subsystem string `yaml:"subsystem,omitempty" json:"subsystem,omitempty"`
|
||||
Namespace string `yaml:"namespace,omitempty" json:"namespace,omitempty"`
|
||||
Help string `yaml:"help,omitempty" json:"help,omitempty"`
|
||||
Type string `yaml:"type,omitempty" json:"type,omitempty"`
|
||||
DeprecatedVersion string `yaml:"deprecatedVersion,omitempty" json:"deprecatedVersion,omitempty"`
|
||||
StabilityLevel string `yaml:"stabilityLevel,omitempty" json:"stabilityLevel,omitempty"`
|
||||
Labels []string `yaml:"labels,omitempty" json:"labels,omitempty"`
|
||||
Buckets []float64 `yaml:"buckets,omitempty" json:"buckets,omitempty"`
|
||||
Objectives map[float64]float64 `yaml:"objectives,omitempty" json:"objectives,omitempty"`
|
||||
AgeBuckets uint32 `yaml:"ageBuckets,omitempty" json:"ageBuckets,omitempty"`
|
||||
BufCap uint32 `yaml:"bufCap,omitempty" json:"bufCap,omitempty"`
|
||||
MaxAge int64 `yaml:"maxAge,omitempty" json:"maxAge,omitempty"`
|
||||
ConstLabels map[string]string `yaml:"constLabels,omitempty" json:"constLabels,omitempty"`
|
||||
}
|
||||
|
||||
func (m metric) BuildFQName() string {
|
||||
return metrics.BuildFQName(m.Namespace, m.Subsystem, m.Name)
|
||||
}
|
||||
|
||||
type byFQName []metric
|
||||
|
||||
func (ms byFQName) Len() int { return len(ms) }
|
||||
func (ms byFQName) Less(i, j int) bool {
|
||||
if ms[i].StabilityLevel < ms[j].StabilityLevel {
|
||||
return true
|
||||
} else if ms[i].StabilityLevel > ms[j].StabilityLevel {
|
||||
return false
|
||||
}
|
||||
return ms[i].BuildFQName() < ms[j].BuildFQName()
|
||||
}
|
||||
func (ms byFQName) Swap(i, j int) {
|
||||
ms[i], ms[j] = ms[j], ms[i]
|
||||
}
|
@ -59,6 +59,7 @@ reset=$(tput sgr0)
|
||||
function kube::validate::stablemetrics() {
|
||||
stability_check_setup
|
||||
temp_file=$(mktemp)
|
||||
temp_file2=$(mktemp)
|
||||
doValidate=$(find_files_to_check -z \
|
||||
| sort -z \
|
||||
| KUBE_ROOT=${KUBE_ROOT} xargs -0 -L 200 \
|
||||
@ -73,12 +74,16 @@ function kube::validate::stablemetrics() {
|
||||
|
||||
if $doValidate; then
|
||||
echo -e "${green}Diffing test/instrumentation/testdata/stable-metrics-list.yaml\n${reset}"
|
||||
if diff -u "$KUBE_ROOT/test/instrumentation/testdata/stable-metrics-list.yaml" "$temp_file"; then
|
||||
echo -e "${green}\nPASS metrics stability verification ${reset}"
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
|
||||
doSort=$(KUBE_ROOT=${KUBE_ROOT} go run "test/instrumentation/sort/main.go" --sort-file="${temp_file}" 1>"${temp_file2}")
|
||||
if ! $doSort; then
|
||||
echo "${red}!!! sorting metrics has failed! ${reset}" >&2
|
||||
exit 1
|
||||
fi
|
||||
if diff -u "$KUBE_ROOT/test/instrumentation/testdata/stable-metrics-list.yaml" "$temp_file2"; then
|
||||
echo -e "${green}\nPASS metrics stability verification ${reset}"
|
||||
return 0
|
||||
fi
|
||||
echo "${red}!!! Metrics Stability static analysis has failed!${reset}" >&2
|
||||
echo "${red}!!! Please run ./hack/update-generated-stable-metrics.sh to update the golden list.${reset}" >&2
|
||||
exit 1
|
||||
@ -115,7 +120,7 @@ function kube::validate::test::stablemetrics() {
|
||||
function kube::update::stablemetrics() {
|
||||
stability_check_setup
|
||||
temp_file=$(mktemp)
|
||||
|
||||
temp_file2=$(mktemp)
|
||||
doCheckStability=$(find_files_to_check -z \
|
||||
| sort -z \
|
||||
| KUBE_ROOT=${KUBE_ROOT} xargs -0 -L 200 \
|
||||
@ -133,6 +138,12 @@ function kube::update::stablemetrics() {
|
||||
exit 1
|
||||
fi
|
||||
mv -f "$temp_file" "${KUBE_ROOT}/test/instrumentation/testdata/stable-metrics-list.yaml"
|
||||
doSort=$(go run "test/instrumentation/sort/main.go" --sort-file="${KUBE_ROOT}/test/instrumentation/testdata/stable-metrics-list.yaml" 1>"${temp_file2}")
|
||||
if ! $doSort; then
|
||||
echo "${red}!!! sorting metrics has failed! ${reset}" >&2
|
||||
exit 1
|
||||
fi
|
||||
mv -f "$temp_file2" "${KUBE_ROOT}/test/instrumentation/testdata/stable-metrics-list.yaml"
|
||||
echo "${green}Updated golden list of stable metrics.${reset}"
|
||||
}
|
||||
|
||||
|
@ -1,129 +1,126 @@
|
||||
- name: job_creation_skew_duration_seconds
|
||||
subsystem: cronjob_controller
|
||||
help: Time between when a cronjob is scheduled to be run, and when the corresponding
|
||||
job is created
|
||||
- name: current_executing_requests
|
||||
subsystem: flowcontrol
|
||||
namespace: apiserver
|
||||
help: Number of requests in initial (for a WATCH) or any (for a non-WATCH) execution
|
||||
stage in the API Priority and Fairness subsystem
|
||||
type: Gauge
|
||||
stabilityLevel: BETA
|
||||
labels:
|
||||
- flow_schema
|
||||
- priority_level
|
||||
- name: current_executing_seats
|
||||
subsystem: flowcontrol
|
||||
namespace: apiserver
|
||||
help: Concurrency (number of seats) occupied by the currently executing (initial
|
||||
stage for a WATCH, any stage otherwise) requests in the API Priority and Fairness
|
||||
subsystem
|
||||
type: Gauge
|
||||
stabilityLevel: BETA
|
||||
labels:
|
||||
- flow_schema
|
||||
- priority_level
|
||||
- name: current_inqueue_requests
|
||||
subsystem: flowcontrol
|
||||
namespace: apiserver
|
||||
help: Number of requests currently pending in queues of the API Priority and Fairness
|
||||
subsystem
|
||||
type: Gauge
|
||||
stabilityLevel: BETA
|
||||
labels:
|
||||
- flow_schema
|
||||
- priority_level
|
||||
- name: dispatched_requests_total
|
||||
subsystem: flowcontrol
|
||||
namespace: apiserver
|
||||
help: Number of requests executed by API Priority and Fairness subsystem
|
||||
type: Counter
|
||||
stabilityLevel: BETA
|
||||
labels:
|
||||
- flow_schema
|
||||
- priority_level
|
||||
- name: nominal_limit_seats
|
||||
subsystem: flowcontrol
|
||||
namespace: apiserver
|
||||
help: Nominal number of execution seats configured for each priority level
|
||||
type: Gauge
|
||||
stabilityLevel: BETA
|
||||
labels:
|
||||
- priority_level
|
||||
- name: rejected_requests_total
|
||||
subsystem: flowcontrol
|
||||
namespace: apiserver
|
||||
help: Number of requests rejected by API Priority and Fairness subsystem
|
||||
type: Counter
|
||||
stabilityLevel: BETA
|
||||
labels:
|
||||
- flow_schema
|
||||
- priority_level
|
||||
- reason
|
||||
- name: request_wait_duration_seconds
|
||||
subsystem: flowcontrol
|
||||
namespace: apiserver
|
||||
help: Length of time a request spent waiting in its queue
|
||||
type: Histogram
|
||||
stabilityLevel: STABLE
|
||||
stabilityLevel: BETA
|
||||
labels:
|
||||
- execute
|
||||
- flow_schema
|
||||
- priority_level
|
||||
buckets:
|
||||
- 0
|
||||
- 0.005
|
||||
- 0.02
|
||||
- 0.05
|
||||
- 0.1
|
||||
- 0.2
|
||||
- 0.5
|
||||
- 1
|
||||
- 2
|
||||
- 4
|
||||
- 8
|
||||
- 16
|
||||
- 32
|
||||
- 64
|
||||
- 128
|
||||
- 256
|
||||
- 512
|
||||
- name: job_pods_finished_total
|
||||
subsystem: job_controller
|
||||
help: The number of finished Pods that are fully tracked
|
||||
- 5
|
||||
- 10
|
||||
- 15
|
||||
- 30
|
||||
- name: disabled_metrics_total
|
||||
help: The count of disabled metrics.
|
||||
type: Counter
|
||||
stabilityLevel: STABLE
|
||||
labels:
|
||||
- completion_mode
|
||||
- result
|
||||
- name: job_sync_duration_seconds
|
||||
subsystem: job_controller
|
||||
help: The time it took to sync a job
|
||||
type: Histogram
|
||||
stabilityLevel: STABLE
|
||||
labels:
|
||||
- action
|
||||
- completion_mode
|
||||
- result
|
||||
buckets:
|
||||
- 0.001
|
||||
- 0.002
|
||||
- 0.004
|
||||
- 0.008
|
||||
- 0.016
|
||||
- 0.032
|
||||
- 0.064
|
||||
- 0.128
|
||||
- 0.256
|
||||
- 0.512
|
||||
- 1.024
|
||||
- 2.048
|
||||
- 4.096
|
||||
- 8.192
|
||||
- 16.384
|
||||
- name: job_syncs_total
|
||||
subsystem: job_controller
|
||||
help: The number of job syncs
|
||||
stabilityLevel: BETA
|
||||
- name: hidden_metrics_total
|
||||
help: The count of hidden metrics.
|
||||
type: Counter
|
||||
stabilityLevel: STABLE
|
||||
stabilityLevel: BETA
|
||||
- name: feature_enabled
|
||||
namespace: kubernetes
|
||||
help: This metric records the data about the stage and enablement of a k8s feature.
|
||||
type: Gauge
|
||||
stabilityLevel: BETA
|
||||
labels:
|
||||
- action
|
||||
- completion_mode
|
||||
- result
|
||||
- name: jobs_finished_total
|
||||
subsystem: job_controller
|
||||
help: The number of finished jobs
|
||||
- name
|
||||
- stage
|
||||
- name: healthcheck
|
||||
namespace: kubernetes
|
||||
help: This metric records the result of a single healthcheck.
|
||||
type: Gauge
|
||||
stabilityLevel: BETA
|
||||
labels:
|
||||
- name
|
||||
- type
|
||||
- name: healthchecks_total
|
||||
namespace: kubernetes
|
||||
help: This metric records the results of all healthcheck.
|
||||
type: Counter
|
||||
stabilityLevel: STABLE
|
||||
stabilityLevel: BETA
|
||||
labels:
|
||||
- completion_mode
|
||||
- reason
|
||||
- result
|
||||
- name: evictions_total
|
||||
subsystem: node_collector
|
||||
help: Number of Node evictions that happened since current instance of NodeController
|
||||
started.
|
||||
- name
|
||||
- status
|
||||
- type
|
||||
- name: registered_metrics_total
|
||||
help: The count of registered metrics broken by stability level and deprecation
|
||||
version.
|
||||
type: Counter
|
||||
stabilityLevel: STABLE
|
||||
stabilityLevel: BETA
|
||||
labels:
|
||||
- zone
|
||||
- name: container_cpu_usage_seconds_total
|
||||
help: Cumulative cpu time consumed by the container in core-seconds
|
||||
type: Custom
|
||||
stabilityLevel: STABLE
|
||||
labels:
|
||||
- container
|
||||
- pod
|
||||
- namespace
|
||||
- name: container_memory_working_set_bytes
|
||||
help: Current working set of the container in bytes
|
||||
type: Custom
|
||||
stabilityLevel: STABLE
|
||||
labels:
|
||||
- container
|
||||
- pod
|
||||
- namespace
|
||||
- name: container_start_time_seconds
|
||||
help: Start time of the container since unix epoch in seconds
|
||||
type: Custom
|
||||
stabilityLevel: STABLE
|
||||
labels:
|
||||
- container
|
||||
- pod
|
||||
- namespace
|
||||
- name: node_cpu_usage_seconds_total
|
||||
help: Cumulative cpu time consumed by the node in core-seconds
|
||||
type: Custom
|
||||
stabilityLevel: STABLE
|
||||
- name: node_memory_working_set_bytes
|
||||
help: Current working set of the node in bytes
|
||||
type: Custom
|
||||
stabilityLevel: STABLE
|
||||
- name: pod_cpu_usage_seconds_total
|
||||
help: Cumulative cpu time consumed by the pod in core-seconds
|
||||
type: Custom
|
||||
stabilityLevel: STABLE
|
||||
labels:
|
||||
- pod
|
||||
- namespace
|
||||
- name: pod_memory_working_set_bytes
|
||||
help: Current working set of the pod in bytes
|
||||
type: Custom
|
||||
stabilityLevel: STABLE
|
||||
labels:
|
||||
- pod
|
||||
- namespace
|
||||
- name: resource_scrape_error
|
||||
help: 1 if there was an error while getting container metrics, 0 otherwise
|
||||
type: Custom
|
||||
stabilityLevel: STABLE
|
||||
- deprecated_version
|
||||
- stability_level
|
||||
- name: pod_scheduling_sli_duration_seconds
|
||||
subsystem: scheduler
|
||||
help: E2e latency for a pod being scheduled, from the time the pod enters the scheduling
|
||||
@ -153,167 +150,6 @@
|
||||
- 1310.72
|
||||
- 2621.44
|
||||
- 5242.88
|
||||
- name: kube_pod_resource_limit
|
||||
help: Resources limit for workloads on the cluster, broken down by pod. This shows
|
||||
the resource usage the scheduler and kubelet expect per pod for resources along
|
||||
with the unit for the resource if any.
|
||||
type: Custom
|
||||
stabilityLevel: STABLE
|
||||
labels:
|
||||
- namespace
|
||||
- pod
|
||||
- node
|
||||
- scheduler
|
||||
- priority
|
||||
- resource
|
||||
- unit
|
||||
- name: kube_pod_resource_request
|
||||
help: Resources requested by workloads on the cluster, broken down by pod. This
|
||||
shows the resource usage the scheduler and kubelet expect per pod for resources
|
||||
along with the unit for the resource if any.
|
||||
type: Custom
|
||||
stabilityLevel: STABLE
|
||||
labels:
|
||||
- namespace
|
||||
- pod
|
||||
- node
|
||||
- scheduler
|
||||
- priority
|
||||
- resource
|
||||
- unit
|
||||
- name: framework_extension_point_duration_seconds
|
||||
subsystem: scheduler
|
||||
help: Latency for running all plugins of a specific extension point.
|
||||
type: Histogram
|
||||
stabilityLevel: STABLE
|
||||
labels:
|
||||
- extension_point
|
||||
- profile
|
||||
- status
|
||||
buckets:
|
||||
- 0.0001
|
||||
- 0.0002
|
||||
- 0.0004
|
||||
- 0.0008
|
||||
- 0.0016
|
||||
- 0.0032
|
||||
- 0.0064
|
||||
- 0.0128
|
||||
- 0.0256
|
||||
- 0.0512
|
||||
- 0.1024
|
||||
- 0.2048
|
||||
- name: pending_pods
|
||||
subsystem: scheduler
|
||||
help: Number of pending pods, by the queue type. 'active' means number of pods in
|
||||
activeQ; 'backoff' means number of pods in backoffQ; 'unschedulable' means number
|
||||
of pods in unschedulablePods that the scheduler attempted to schedule and failed;
|
||||
'gated' is the number of unschedulable pods that the scheduler never attempted
|
||||
to schedule because they are gated.
|
||||
type: Gauge
|
||||
stabilityLevel: STABLE
|
||||
labels:
|
||||
- queue
|
||||
- name: pod_scheduling_attempts
|
||||
subsystem: scheduler
|
||||
help: Number of attempts to successfully schedule a pod.
|
||||
type: Histogram
|
||||
stabilityLevel: STABLE
|
||||
buckets:
|
||||
- 1
|
||||
- 2
|
||||
- 4
|
||||
- 8
|
||||
- 16
|
||||
- name: pod_scheduling_duration_seconds
|
||||
subsystem: scheduler
|
||||
help: E2e latency for a pod being scheduled which may include multiple scheduling
|
||||
attempts.
|
||||
type: Histogram
|
||||
deprecatedVersion: 1.28.0
|
||||
stabilityLevel: STABLE
|
||||
labels:
|
||||
- attempts
|
||||
buckets:
|
||||
- 0.01
|
||||
- 0.02
|
||||
- 0.04
|
||||
- 0.08
|
||||
- 0.16
|
||||
- 0.32
|
||||
- 0.64
|
||||
- 1.28
|
||||
- 2.56
|
||||
- 5.12
|
||||
- 10.24
|
||||
- 20.48
|
||||
- 40.96
|
||||
- 81.92
|
||||
- 163.84
|
||||
- 327.68
|
||||
- 655.36
|
||||
- 1310.72
|
||||
- 2621.44
|
||||
- 5242.88
|
||||
- name: preemption_attempts_total
|
||||
subsystem: scheduler
|
||||
help: Total preemption attempts in the cluster till now
|
||||
type: Counter
|
||||
stabilityLevel: STABLE
|
||||
- name: preemption_victims
|
||||
subsystem: scheduler
|
||||
help: Number of selected preemption victims
|
||||
type: Histogram
|
||||
stabilityLevel: STABLE
|
||||
buckets:
|
||||
- 1
|
||||
- 2
|
||||
- 4
|
||||
- 8
|
||||
- 16
|
||||
- 32
|
||||
- 64
|
||||
- name: queue_incoming_pods_total
|
||||
subsystem: scheduler
|
||||
help: Number of pods added to scheduling queues by event and queue type.
|
||||
type: Counter
|
||||
stabilityLevel: STABLE
|
||||
labels:
|
||||
- event
|
||||
- queue
|
||||
- name: schedule_attempts_total
|
||||
subsystem: scheduler
|
||||
help: Number of attempts to schedule pods, by the result. 'unschedulable' means
|
||||
a pod could not be scheduled, while 'error' means an internal scheduler problem.
|
||||
type: Counter
|
||||
stabilityLevel: STABLE
|
||||
labels:
|
||||
- profile
|
||||
- result
|
||||
- name: scheduling_attempt_duration_seconds
|
||||
subsystem: scheduler
|
||||
help: Scheduling attempt latency in seconds (scheduling algorithm + binding)
|
||||
type: Histogram
|
||||
stabilityLevel: STABLE
|
||||
labels:
|
||||
- profile
|
||||
- result
|
||||
buckets:
|
||||
- 0.001
|
||||
- 0.002
|
||||
- 0.004
|
||||
- 0.008
|
||||
- 0.016
|
||||
- 0.032
|
||||
- 0.064
|
||||
- 0.128
|
||||
- 0.256
|
||||
- 0.512
|
||||
- 1.024
|
||||
- 2.048
|
||||
- 4.096
|
||||
- 8.192
|
||||
- 16.384
|
||||
- name: controller_admission_duration_seconds
|
||||
subsystem: admission
|
||||
namespace: apiserver
|
||||
@ -489,126 +325,290 @@
|
||||
stabilityLevel: STABLE
|
||||
labels:
|
||||
- resource
|
||||
- name: current_executing_requests
|
||||
subsystem: flowcontrol
|
||||
namespace: apiserver
|
||||
help: Number of requests in initial (for a WATCH) or any (for a non-WATCH) execution
|
||||
stage in the API Priority and Fairness subsystem
|
||||
type: Gauge
|
||||
stabilityLevel: BETA
|
||||
- name: container_cpu_usage_seconds_total
|
||||
help: Cumulative cpu time consumed by the container in core-seconds
|
||||
type: Custom
|
||||
stabilityLevel: STABLE
|
||||
labels:
|
||||
- flow_schema
|
||||
- priority_level
|
||||
- name: current_executing_seats
|
||||
subsystem: flowcontrol
|
||||
namespace: apiserver
|
||||
help: Concurrency (number of seats) occupied by the currently executing (initial
|
||||
stage for a WATCH, any stage otherwise) requests in the API Priority and Fairness
|
||||
subsystem
|
||||
type: Gauge
|
||||
stabilityLevel: BETA
|
||||
- container
|
||||
- pod
|
||||
- namespace
|
||||
- name: container_memory_working_set_bytes
|
||||
help: Current working set of the container in bytes
|
||||
type: Custom
|
||||
stabilityLevel: STABLE
|
||||
labels:
|
||||
- flow_schema
|
||||
- priority_level
|
||||
- name: current_inqueue_requests
|
||||
subsystem: flowcontrol
|
||||
namespace: apiserver
|
||||
help: Number of requests currently pending in queues of the API Priority and Fairness
|
||||
subsystem
|
||||
type: Gauge
|
||||
stabilityLevel: BETA
|
||||
- container
|
||||
- pod
|
||||
- namespace
|
||||
- name: container_start_time_seconds
|
||||
help: Start time of the container since unix epoch in seconds
|
||||
type: Custom
|
||||
stabilityLevel: STABLE
|
||||
labels:
|
||||
- flow_schema
|
||||
- priority_level
|
||||
- name: dispatched_requests_total
|
||||
subsystem: flowcontrol
|
||||
namespace: apiserver
|
||||
help: Number of requests executed by API Priority and Fairness subsystem
|
||||
type: Counter
|
||||
stabilityLevel: BETA
|
||||
labels:
|
||||
- flow_schema
|
||||
- priority_level
|
||||
- name: nominal_limit_seats
|
||||
subsystem: flowcontrol
|
||||
namespace: apiserver
|
||||
help: Nominal number of execution seats configured for each priority level
|
||||
type: Gauge
|
||||
stabilityLevel: BETA
|
||||
labels:
|
||||
- priority_level
|
||||
- name: rejected_requests_total
|
||||
subsystem: flowcontrol
|
||||
namespace: apiserver
|
||||
help: Number of requests rejected by API Priority and Fairness subsystem
|
||||
type: Counter
|
||||
stabilityLevel: BETA
|
||||
labels:
|
||||
- flow_schema
|
||||
- priority_level
|
||||
- reason
|
||||
- name: request_wait_duration_seconds
|
||||
subsystem: flowcontrol
|
||||
namespace: apiserver
|
||||
help: Length of time a request spent waiting in its queue
|
||||
- container
|
||||
- pod
|
||||
- namespace
|
||||
- name: job_creation_skew_duration_seconds
|
||||
subsystem: cronjob_controller
|
||||
help: Time between when a cronjob is scheduled to be run, and when the corresponding
|
||||
job is created
|
||||
type: Histogram
|
||||
stabilityLevel: BETA
|
||||
labels:
|
||||
- execute
|
||||
- flow_schema
|
||||
- priority_level
|
||||
stabilityLevel: STABLE
|
||||
buckets:
|
||||
- 0
|
||||
- 0.005
|
||||
- 0.02
|
||||
- 0.05
|
||||
- 0.1
|
||||
- 0.2
|
||||
- 0.5
|
||||
- 1
|
||||
- 2
|
||||
- 5
|
||||
- 10
|
||||
- 15
|
||||
- 30
|
||||
- name: disabled_metrics_total
|
||||
help: The count of disabled metrics.
|
||||
- 4
|
||||
- 8
|
||||
- 16
|
||||
- 32
|
||||
- 64
|
||||
- 128
|
||||
- 256
|
||||
- 512
|
||||
- name: job_pods_finished_total
|
||||
subsystem: job_controller
|
||||
help: The number of finished Pods that are fully tracked
|
||||
type: Counter
|
||||
stabilityLevel: BETA
|
||||
- name: hidden_metrics_total
|
||||
help: The count of hidden metrics.
|
||||
stabilityLevel: STABLE
|
||||
labels:
|
||||
- completion_mode
|
||||
- result
|
||||
- name: job_sync_duration_seconds
|
||||
subsystem: job_controller
|
||||
help: The time it took to sync a job
|
||||
type: Histogram
|
||||
stabilityLevel: STABLE
|
||||
labels:
|
||||
- action
|
||||
- completion_mode
|
||||
- result
|
||||
buckets:
|
||||
- 0.001
|
||||
- 0.002
|
||||
- 0.004
|
||||
- 0.008
|
||||
- 0.016
|
||||
- 0.032
|
||||
- 0.064
|
||||
- 0.128
|
||||
- 0.256
|
||||
- 0.512
|
||||
- 1.024
|
||||
- 2.048
|
||||
- 4.096
|
||||
- 8.192
|
||||
- 16.384
|
||||
- name: job_syncs_total
|
||||
subsystem: job_controller
|
||||
help: The number of job syncs
|
||||
type: Counter
|
||||
stabilityLevel: BETA
|
||||
- name: feature_enabled
|
||||
namespace: kubernetes
|
||||
help: This metric records the data about the stage and enablement of a k8s feature.
|
||||
type: Gauge
|
||||
stabilityLevel: BETA
|
||||
stabilityLevel: STABLE
|
||||
labels:
|
||||
- name
|
||||
- stage
|
||||
- name: healthcheck
|
||||
namespace: kubernetes
|
||||
help: This metric records the result of a single healthcheck.
|
||||
type: Gauge
|
||||
stabilityLevel: BETA
|
||||
labels:
|
||||
- name
|
||||
- type
|
||||
- name: healthchecks_total
|
||||
namespace: kubernetes
|
||||
help: This metric records the results of all healthcheck.
|
||||
- action
|
||||
- completion_mode
|
||||
- result
|
||||
- name: jobs_finished_total
|
||||
subsystem: job_controller
|
||||
help: The number of finished jobs
|
||||
type: Counter
|
||||
stabilityLevel: BETA
|
||||
stabilityLevel: STABLE
|
||||
labels:
|
||||
- name
|
||||
- completion_mode
|
||||
- reason
|
||||
- result
|
||||
- name: kube_pod_resource_limit
|
||||
help: Resources limit for workloads on the cluster, broken down by pod. This shows
|
||||
the resource usage the scheduler and kubelet expect per pod for resources along
|
||||
with the unit for the resource if any.
|
||||
type: Custom
|
||||
stabilityLevel: STABLE
|
||||
labels:
|
||||
- namespace
|
||||
- pod
|
||||
- node
|
||||
- scheduler
|
||||
- priority
|
||||
- resource
|
||||
- unit
|
||||
- name: kube_pod_resource_request
|
||||
help: Resources requested by workloads on the cluster, broken down by pod. This
|
||||
shows the resource usage the scheduler and kubelet expect per pod for resources
|
||||
along with the unit for the resource if any.
|
||||
type: Custom
|
||||
stabilityLevel: STABLE
|
||||
labels:
|
||||
- namespace
|
||||
- pod
|
||||
- node
|
||||
- scheduler
|
||||
- priority
|
||||
- resource
|
||||
- unit
|
||||
- name: evictions_total
|
||||
subsystem: node_collector
|
||||
help: Number of Node evictions that happened since current instance of NodeController
|
||||
started.
|
||||
type: Counter
|
||||
stabilityLevel: STABLE
|
||||
labels:
|
||||
- zone
|
||||
- name: node_cpu_usage_seconds_total
|
||||
help: Cumulative cpu time consumed by the node in core-seconds
|
||||
type: Custom
|
||||
stabilityLevel: STABLE
|
||||
- name: node_memory_working_set_bytes
|
||||
help: Current working set of the node in bytes
|
||||
type: Custom
|
||||
stabilityLevel: STABLE
|
||||
- name: pod_cpu_usage_seconds_total
|
||||
help: Cumulative cpu time consumed by the pod in core-seconds
|
||||
type: Custom
|
||||
stabilityLevel: STABLE
|
||||
labels:
|
||||
- pod
|
||||
- namespace
|
||||
- name: pod_memory_working_set_bytes
|
||||
help: Current working set of the pod in bytes
|
||||
type: Custom
|
||||
stabilityLevel: STABLE
|
||||
labels:
|
||||
- pod
|
||||
- namespace
|
||||
- name: resource_scrape_error
|
||||
help: 1 if there was an error while getting container metrics, 0 otherwise
|
||||
type: Custom
|
||||
stabilityLevel: STABLE
|
||||
- name: framework_extension_point_duration_seconds
|
||||
subsystem: scheduler
|
||||
help: Latency for running all plugins of a specific extension point.
|
||||
type: Histogram
|
||||
stabilityLevel: STABLE
|
||||
labels:
|
||||
- extension_point
|
||||
- profile
|
||||
- status
|
||||
- type
|
||||
- name: registered_metrics_total
|
||||
help: The count of registered metrics broken by stability level and deprecation
|
||||
version.
|
||||
type: Counter
|
||||
stabilityLevel: BETA
|
||||
buckets:
|
||||
- 0.0001
|
||||
- 0.0002
|
||||
- 0.0004
|
||||
- 0.0008
|
||||
- 0.0016
|
||||
- 0.0032
|
||||
- 0.0064
|
||||
- 0.0128
|
||||
- 0.0256
|
||||
- 0.0512
|
||||
- 0.1024
|
||||
- 0.2048
|
||||
- name: pending_pods
|
||||
subsystem: scheduler
|
||||
help: Number of pending pods, by the queue type. 'active' means number of pods in
|
||||
activeQ; 'backoff' means number of pods in backoffQ; 'unschedulable' means number
|
||||
of pods in unschedulablePods that the scheduler attempted to schedule and failed;
|
||||
'gated' is the number of unschedulable pods that the scheduler never attempted
|
||||
to schedule because they are gated.
|
||||
type: Gauge
|
||||
stabilityLevel: STABLE
|
||||
labels:
|
||||
- deprecated_version
|
||||
- stability_level
|
||||
- queue
|
||||
- name: pod_scheduling_attempts
|
||||
subsystem: scheduler
|
||||
help: Number of attempts to successfully schedule a pod.
|
||||
type: Histogram
|
||||
stabilityLevel: STABLE
|
||||
buckets:
|
||||
- 1
|
||||
- 2
|
||||
- 4
|
||||
- 8
|
||||
- 16
|
||||
- name: pod_scheduling_duration_seconds
|
||||
subsystem: scheduler
|
||||
help: E2e latency for a pod being scheduled which may include multiple scheduling
|
||||
attempts.
|
||||
type: Histogram
|
||||
deprecatedVersion: 1.28.0
|
||||
stabilityLevel: STABLE
|
||||
labels:
|
||||
- attempts
|
||||
buckets:
|
||||
- 0.01
|
||||
- 0.02
|
||||
- 0.04
|
||||
- 0.08
|
||||
- 0.16
|
||||
- 0.32
|
||||
- 0.64
|
||||
- 1.28
|
||||
- 2.56
|
||||
- 5.12
|
||||
- 10.24
|
||||
- 20.48
|
||||
- 40.96
|
||||
- 81.92
|
||||
- 163.84
|
||||
- 327.68
|
||||
- 655.36
|
||||
- 1310.72
|
||||
- 2621.44
|
||||
- 5242.88
|
||||
- name: preemption_attempts_total
|
||||
subsystem: scheduler
|
||||
help: Total preemption attempts in the cluster till now
|
||||
type: Counter
|
||||
stabilityLevel: STABLE
|
||||
- name: preemption_victims
|
||||
subsystem: scheduler
|
||||
help: Number of selected preemption victims
|
||||
type: Histogram
|
||||
stabilityLevel: STABLE
|
||||
buckets:
|
||||
- 1
|
||||
- 2
|
||||
- 4
|
||||
- 8
|
||||
- 16
|
||||
- 32
|
||||
- 64
|
||||
- name: queue_incoming_pods_total
|
||||
subsystem: scheduler
|
||||
help: Number of pods added to scheduling queues by event and queue type.
|
||||
type: Counter
|
||||
stabilityLevel: STABLE
|
||||
labels:
|
||||
- event
|
||||
- queue
|
||||
- name: schedule_attempts_total
|
||||
subsystem: scheduler
|
||||
help: Number of attempts to schedule pods, by the result. 'unschedulable' means
|
||||
a pod could not be scheduled, while 'error' means an internal scheduler problem.
|
||||
type: Counter
|
||||
stabilityLevel: STABLE
|
||||
labels:
|
||||
- profile
|
||||
- result
|
||||
- name: scheduling_attempt_duration_seconds
|
||||
subsystem: scheduler
|
||||
help: Scheduling attempt latency in seconds (scheduling algorithm + binding)
|
||||
type: Histogram
|
||||
stabilityLevel: STABLE
|
||||
labels:
|
||||
- profile
|
||||
- result
|
||||
buckets:
|
||||
- 0.001
|
||||
- 0.002
|
||||
- 0.004
|
||||
- 0.008
|
||||
- 0.016
|
||||
- 0.032
|
||||
- 0.064
|
||||
- 0.128
|
||||
- 0.256
|
||||
- 0.512
|
||||
- 1.024
|
||||
- 2.048
|
||||
- 4.096
|
||||
- 8.192
|
||||
- 16.384
|
||||
|
Loading…
Reference in New Issue
Block a user