mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-08 03:33:56 +00:00
sort stability list of metrics in the shell command (#120197)
* sort stability list of metrics in the shell command * remove used vars * fix verify errors
This commit is contained in:
parent
faf1b5d655
commit
dbbce2aaba
86
test/instrumentation/sort/main.go
Normal file
86
test/instrumentation/sort/main.go
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2023 The Kubernetes Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"sort"
|
||||||
|
|
||||||
|
flag "github.com/spf13/pflag"
|
||||||
|
"gopkg.in/yaml.v2"
|
||||||
|
"k8s.io/component-base/metrics"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
var sortFile string
|
||||||
|
flag.StringVar(&sortFile, "sort-file", "", "file of metrics to sort")
|
||||||
|
flag.Parse()
|
||||||
|
dat, err := os.ReadFile(sortFile)
|
||||||
|
if err == nil {
|
||||||
|
var parsedMetrics []metric
|
||||||
|
err = yaml.Unmarshal(dat, &parsedMetrics)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "%s\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
sort.Sort(byFQName(parsedMetrics))
|
||||||
|
data, err := yaml.Marshal(parsedMetrics)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "%s\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Print(string(data))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type metric struct {
|
||||||
|
Name string `yaml:"name" json:"name"`
|
||||||
|
Subsystem string `yaml:"subsystem,omitempty" json:"subsystem,omitempty"`
|
||||||
|
Namespace string `yaml:"namespace,omitempty" json:"namespace,omitempty"`
|
||||||
|
Help string `yaml:"help,omitempty" json:"help,omitempty"`
|
||||||
|
Type string `yaml:"type,omitempty" json:"type,omitempty"`
|
||||||
|
DeprecatedVersion string `yaml:"deprecatedVersion,omitempty" json:"deprecatedVersion,omitempty"`
|
||||||
|
StabilityLevel string `yaml:"stabilityLevel,omitempty" json:"stabilityLevel,omitempty"`
|
||||||
|
Labels []string `yaml:"labels,omitempty" json:"labels,omitempty"`
|
||||||
|
Buckets []float64 `yaml:"buckets,omitempty" json:"buckets,omitempty"`
|
||||||
|
Objectives map[float64]float64 `yaml:"objectives,omitempty" json:"objectives,omitempty"`
|
||||||
|
AgeBuckets uint32 `yaml:"ageBuckets,omitempty" json:"ageBuckets,omitempty"`
|
||||||
|
BufCap uint32 `yaml:"bufCap,omitempty" json:"bufCap,omitempty"`
|
||||||
|
MaxAge int64 `yaml:"maxAge,omitempty" json:"maxAge,omitempty"`
|
||||||
|
ConstLabels map[string]string `yaml:"constLabels,omitempty" json:"constLabels,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m metric) BuildFQName() string {
|
||||||
|
return metrics.BuildFQName(m.Namespace, m.Subsystem, m.Name)
|
||||||
|
}
|
||||||
|
|
||||||
|
type byFQName []metric
|
||||||
|
|
||||||
|
func (ms byFQName) Len() int { return len(ms) }
|
||||||
|
func (ms byFQName) Less(i, j int) bool {
|
||||||
|
if ms[i].StabilityLevel < ms[j].StabilityLevel {
|
||||||
|
return true
|
||||||
|
} else if ms[i].StabilityLevel > ms[j].StabilityLevel {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return ms[i].BuildFQName() < ms[j].BuildFQName()
|
||||||
|
}
|
||||||
|
func (ms byFQName) Swap(i, j int) {
|
||||||
|
ms[i], ms[j] = ms[j], ms[i]
|
||||||
|
}
|
@ -59,6 +59,7 @@ reset=$(tput sgr0)
|
|||||||
function kube::validate::stablemetrics() {
|
function kube::validate::stablemetrics() {
|
||||||
stability_check_setup
|
stability_check_setup
|
||||||
temp_file=$(mktemp)
|
temp_file=$(mktemp)
|
||||||
|
temp_file2=$(mktemp)
|
||||||
doValidate=$(find_files_to_check -z \
|
doValidate=$(find_files_to_check -z \
|
||||||
| sort -z \
|
| sort -z \
|
||||||
| KUBE_ROOT=${KUBE_ROOT} xargs -0 -L 200 \
|
| KUBE_ROOT=${KUBE_ROOT} xargs -0 -L 200 \
|
||||||
@ -73,12 +74,16 @@ function kube::validate::stablemetrics() {
|
|||||||
|
|
||||||
if $doValidate; then
|
if $doValidate; then
|
||||||
echo -e "${green}Diffing test/instrumentation/testdata/stable-metrics-list.yaml\n${reset}"
|
echo -e "${green}Diffing test/instrumentation/testdata/stable-metrics-list.yaml\n${reset}"
|
||||||
if diff -u "$KUBE_ROOT/test/instrumentation/testdata/stable-metrics-list.yaml" "$temp_file"; then
|
|
||||||
echo -e "${green}\nPASS metrics stability verification ${reset}"
|
|
||||||
return 0
|
|
||||||
fi
|
|
||||||
fi
|
fi
|
||||||
|
doSort=$(KUBE_ROOT=${KUBE_ROOT} go run "test/instrumentation/sort/main.go" --sort-file="${temp_file}" 1>"${temp_file2}")
|
||||||
|
if ! $doSort; then
|
||||||
|
echo "${red}!!! sorting metrics has failed! ${reset}" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if diff -u "$KUBE_ROOT/test/instrumentation/testdata/stable-metrics-list.yaml" "$temp_file2"; then
|
||||||
|
echo -e "${green}\nPASS metrics stability verification ${reset}"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
echo "${red}!!! Metrics Stability static analysis has failed!${reset}" >&2
|
echo "${red}!!! Metrics Stability static analysis has failed!${reset}" >&2
|
||||||
echo "${red}!!! Please run ./hack/update-generated-stable-metrics.sh to update the golden list.${reset}" >&2
|
echo "${red}!!! Please run ./hack/update-generated-stable-metrics.sh to update the golden list.${reset}" >&2
|
||||||
exit 1
|
exit 1
|
||||||
@ -115,7 +120,7 @@ function kube::validate::test::stablemetrics() {
|
|||||||
function kube::update::stablemetrics() {
|
function kube::update::stablemetrics() {
|
||||||
stability_check_setup
|
stability_check_setup
|
||||||
temp_file=$(mktemp)
|
temp_file=$(mktemp)
|
||||||
|
temp_file2=$(mktemp)
|
||||||
doCheckStability=$(find_files_to_check -z \
|
doCheckStability=$(find_files_to_check -z \
|
||||||
| sort -z \
|
| sort -z \
|
||||||
| KUBE_ROOT=${KUBE_ROOT} xargs -0 -L 200 \
|
| KUBE_ROOT=${KUBE_ROOT} xargs -0 -L 200 \
|
||||||
@ -133,6 +138,12 @@ function kube::update::stablemetrics() {
|
|||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
mv -f "$temp_file" "${KUBE_ROOT}/test/instrumentation/testdata/stable-metrics-list.yaml"
|
mv -f "$temp_file" "${KUBE_ROOT}/test/instrumentation/testdata/stable-metrics-list.yaml"
|
||||||
|
doSort=$(go run "test/instrumentation/sort/main.go" --sort-file="${KUBE_ROOT}/test/instrumentation/testdata/stable-metrics-list.yaml" 1>"${temp_file2}")
|
||||||
|
if ! $doSort; then
|
||||||
|
echo "${red}!!! sorting metrics has failed! ${reset}" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
mv -f "$temp_file2" "${KUBE_ROOT}/test/instrumentation/testdata/stable-metrics-list.yaml"
|
||||||
echo "${green}Updated golden list of stable metrics.${reset}"
|
echo "${green}Updated golden list of stable metrics.${reset}"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,129 +1,126 @@
|
|||||||
- name: job_creation_skew_duration_seconds
|
- name: current_executing_requests
|
||||||
subsystem: cronjob_controller
|
subsystem: flowcontrol
|
||||||
help: Time between when a cronjob is scheduled to be run, and when the corresponding
|
namespace: apiserver
|
||||||
job is created
|
help: Number of requests in initial (for a WATCH) or any (for a non-WATCH) execution
|
||||||
|
stage in the API Priority and Fairness subsystem
|
||||||
|
type: Gauge
|
||||||
|
stabilityLevel: BETA
|
||||||
|
labels:
|
||||||
|
- flow_schema
|
||||||
|
- priority_level
|
||||||
|
- name: current_executing_seats
|
||||||
|
subsystem: flowcontrol
|
||||||
|
namespace: apiserver
|
||||||
|
help: Concurrency (number of seats) occupied by the currently executing (initial
|
||||||
|
stage for a WATCH, any stage otherwise) requests in the API Priority and Fairness
|
||||||
|
subsystem
|
||||||
|
type: Gauge
|
||||||
|
stabilityLevel: BETA
|
||||||
|
labels:
|
||||||
|
- flow_schema
|
||||||
|
- priority_level
|
||||||
|
- name: current_inqueue_requests
|
||||||
|
subsystem: flowcontrol
|
||||||
|
namespace: apiserver
|
||||||
|
help: Number of requests currently pending in queues of the API Priority and Fairness
|
||||||
|
subsystem
|
||||||
|
type: Gauge
|
||||||
|
stabilityLevel: BETA
|
||||||
|
labels:
|
||||||
|
- flow_schema
|
||||||
|
- priority_level
|
||||||
|
- name: dispatched_requests_total
|
||||||
|
subsystem: flowcontrol
|
||||||
|
namespace: apiserver
|
||||||
|
help: Number of requests executed by API Priority and Fairness subsystem
|
||||||
|
type: Counter
|
||||||
|
stabilityLevel: BETA
|
||||||
|
labels:
|
||||||
|
- flow_schema
|
||||||
|
- priority_level
|
||||||
|
- name: nominal_limit_seats
|
||||||
|
subsystem: flowcontrol
|
||||||
|
namespace: apiserver
|
||||||
|
help: Nominal number of execution seats configured for each priority level
|
||||||
|
type: Gauge
|
||||||
|
stabilityLevel: BETA
|
||||||
|
labels:
|
||||||
|
- priority_level
|
||||||
|
- name: rejected_requests_total
|
||||||
|
subsystem: flowcontrol
|
||||||
|
namespace: apiserver
|
||||||
|
help: Number of requests rejected by API Priority and Fairness subsystem
|
||||||
|
type: Counter
|
||||||
|
stabilityLevel: BETA
|
||||||
|
labels:
|
||||||
|
- flow_schema
|
||||||
|
- priority_level
|
||||||
|
- reason
|
||||||
|
- name: request_wait_duration_seconds
|
||||||
|
subsystem: flowcontrol
|
||||||
|
namespace: apiserver
|
||||||
|
help: Length of time a request spent waiting in its queue
|
||||||
type: Histogram
|
type: Histogram
|
||||||
stabilityLevel: STABLE
|
stabilityLevel: BETA
|
||||||
|
labels:
|
||||||
|
- execute
|
||||||
|
- flow_schema
|
||||||
|
- priority_level
|
||||||
buckets:
|
buckets:
|
||||||
|
- 0
|
||||||
|
- 0.005
|
||||||
|
- 0.02
|
||||||
|
- 0.05
|
||||||
|
- 0.1
|
||||||
|
- 0.2
|
||||||
|
- 0.5
|
||||||
- 1
|
- 1
|
||||||
- 2
|
- 2
|
||||||
- 4
|
- 5
|
||||||
- 8
|
- 10
|
||||||
- 16
|
- 15
|
||||||
- 32
|
- 30
|
||||||
- 64
|
- name: disabled_metrics_total
|
||||||
- 128
|
help: The count of disabled metrics.
|
||||||
- 256
|
|
||||||
- 512
|
|
||||||
- name: job_pods_finished_total
|
|
||||||
subsystem: job_controller
|
|
||||||
help: The number of finished Pods that are fully tracked
|
|
||||||
type: Counter
|
type: Counter
|
||||||
stabilityLevel: STABLE
|
stabilityLevel: BETA
|
||||||
labels:
|
- name: hidden_metrics_total
|
||||||
- completion_mode
|
help: The count of hidden metrics.
|
||||||
- result
|
|
||||||
- name: job_sync_duration_seconds
|
|
||||||
subsystem: job_controller
|
|
||||||
help: The time it took to sync a job
|
|
||||||
type: Histogram
|
|
||||||
stabilityLevel: STABLE
|
|
||||||
labels:
|
|
||||||
- action
|
|
||||||
- completion_mode
|
|
||||||
- result
|
|
||||||
buckets:
|
|
||||||
- 0.001
|
|
||||||
- 0.002
|
|
||||||
- 0.004
|
|
||||||
- 0.008
|
|
||||||
- 0.016
|
|
||||||
- 0.032
|
|
||||||
- 0.064
|
|
||||||
- 0.128
|
|
||||||
- 0.256
|
|
||||||
- 0.512
|
|
||||||
- 1.024
|
|
||||||
- 2.048
|
|
||||||
- 4.096
|
|
||||||
- 8.192
|
|
||||||
- 16.384
|
|
||||||
- name: job_syncs_total
|
|
||||||
subsystem: job_controller
|
|
||||||
help: The number of job syncs
|
|
||||||
type: Counter
|
type: Counter
|
||||||
stabilityLevel: STABLE
|
stabilityLevel: BETA
|
||||||
|
- name: feature_enabled
|
||||||
|
namespace: kubernetes
|
||||||
|
help: This metric records the data about the stage and enablement of a k8s feature.
|
||||||
|
type: Gauge
|
||||||
|
stabilityLevel: BETA
|
||||||
labels:
|
labels:
|
||||||
- action
|
- name
|
||||||
- completion_mode
|
- stage
|
||||||
- result
|
- name: healthcheck
|
||||||
- name: jobs_finished_total
|
namespace: kubernetes
|
||||||
subsystem: job_controller
|
help: This metric records the result of a single healthcheck.
|
||||||
help: The number of finished jobs
|
type: Gauge
|
||||||
|
stabilityLevel: BETA
|
||||||
|
labels:
|
||||||
|
- name
|
||||||
|
- type
|
||||||
|
- name: healthchecks_total
|
||||||
|
namespace: kubernetes
|
||||||
|
help: This metric records the results of all healthcheck.
|
||||||
type: Counter
|
type: Counter
|
||||||
stabilityLevel: STABLE
|
stabilityLevel: BETA
|
||||||
labels:
|
labels:
|
||||||
- completion_mode
|
- name
|
||||||
- reason
|
- status
|
||||||
- result
|
- type
|
||||||
- name: evictions_total
|
- name: registered_metrics_total
|
||||||
subsystem: node_collector
|
help: The count of registered metrics broken by stability level and deprecation
|
||||||
help: Number of Node evictions that happened since current instance of NodeController
|
version.
|
||||||
started.
|
|
||||||
type: Counter
|
type: Counter
|
||||||
stabilityLevel: STABLE
|
stabilityLevel: BETA
|
||||||
labels:
|
labels:
|
||||||
- zone
|
- deprecated_version
|
||||||
- name: container_cpu_usage_seconds_total
|
- stability_level
|
||||||
help: Cumulative cpu time consumed by the container in core-seconds
|
|
||||||
type: Custom
|
|
||||||
stabilityLevel: STABLE
|
|
||||||
labels:
|
|
||||||
- container
|
|
||||||
- pod
|
|
||||||
- namespace
|
|
||||||
- name: container_memory_working_set_bytes
|
|
||||||
help: Current working set of the container in bytes
|
|
||||||
type: Custom
|
|
||||||
stabilityLevel: STABLE
|
|
||||||
labels:
|
|
||||||
- container
|
|
||||||
- pod
|
|
||||||
- namespace
|
|
||||||
- name: container_start_time_seconds
|
|
||||||
help: Start time of the container since unix epoch in seconds
|
|
||||||
type: Custom
|
|
||||||
stabilityLevel: STABLE
|
|
||||||
labels:
|
|
||||||
- container
|
|
||||||
- pod
|
|
||||||
- namespace
|
|
||||||
- name: node_cpu_usage_seconds_total
|
|
||||||
help: Cumulative cpu time consumed by the node in core-seconds
|
|
||||||
type: Custom
|
|
||||||
stabilityLevel: STABLE
|
|
||||||
- name: node_memory_working_set_bytes
|
|
||||||
help: Current working set of the node in bytes
|
|
||||||
type: Custom
|
|
||||||
stabilityLevel: STABLE
|
|
||||||
- name: pod_cpu_usage_seconds_total
|
|
||||||
help: Cumulative cpu time consumed by the pod in core-seconds
|
|
||||||
type: Custom
|
|
||||||
stabilityLevel: STABLE
|
|
||||||
labels:
|
|
||||||
- pod
|
|
||||||
- namespace
|
|
||||||
- name: pod_memory_working_set_bytes
|
|
||||||
help: Current working set of the pod in bytes
|
|
||||||
type: Custom
|
|
||||||
stabilityLevel: STABLE
|
|
||||||
labels:
|
|
||||||
- pod
|
|
||||||
- namespace
|
|
||||||
- name: resource_scrape_error
|
|
||||||
help: 1 if there was an error while getting container metrics, 0 otherwise
|
|
||||||
type: Custom
|
|
||||||
stabilityLevel: STABLE
|
|
||||||
- name: pod_scheduling_sli_duration_seconds
|
- name: pod_scheduling_sli_duration_seconds
|
||||||
subsystem: scheduler
|
subsystem: scheduler
|
||||||
help: E2e latency for a pod being scheduled, from the time the pod enters the scheduling
|
help: E2e latency for a pod being scheduled, from the time the pod enters the scheduling
|
||||||
@ -153,167 +150,6 @@
|
|||||||
- 1310.72
|
- 1310.72
|
||||||
- 2621.44
|
- 2621.44
|
||||||
- 5242.88
|
- 5242.88
|
||||||
- name: kube_pod_resource_limit
|
|
||||||
help: Resources limit for workloads on the cluster, broken down by pod. This shows
|
|
||||||
the resource usage the scheduler and kubelet expect per pod for resources along
|
|
||||||
with the unit for the resource if any.
|
|
||||||
type: Custom
|
|
||||||
stabilityLevel: STABLE
|
|
||||||
labels:
|
|
||||||
- namespace
|
|
||||||
- pod
|
|
||||||
- node
|
|
||||||
- scheduler
|
|
||||||
- priority
|
|
||||||
- resource
|
|
||||||
- unit
|
|
||||||
- name: kube_pod_resource_request
|
|
||||||
help: Resources requested by workloads on the cluster, broken down by pod. This
|
|
||||||
shows the resource usage the scheduler and kubelet expect per pod for resources
|
|
||||||
along with the unit for the resource if any.
|
|
||||||
type: Custom
|
|
||||||
stabilityLevel: STABLE
|
|
||||||
labels:
|
|
||||||
- namespace
|
|
||||||
- pod
|
|
||||||
- node
|
|
||||||
- scheduler
|
|
||||||
- priority
|
|
||||||
- resource
|
|
||||||
- unit
|
|
||||||
- name: framework_extension_point_duration_seconds
|
|
||||||
subsystem: scheduler
|
|
||||||
help: Latency for running all plugins of a specific extension point.
|
|
||||||
type: Histogram
|
|
||||||
stabilityLevel: STABLE
|
|
||||||
labels:
|
|
||||||
- extension_point
|
|
||||||
- profile
|
|
||||||
- status
|
|
||||||
buckets:
|
|
||||||
- 0.0001
|
|
||||||
- 0.0002
|
|
||||||
- 0.0004
|
|
||||||
- 0.0008
|
|
||||||
- 0.0016
|
|
||||||
- 0.0032
|
|
||||||
- 0.0064
|
|
||||||
- 0.0128
|
|
||||||
- 0.0256
|
|
||||||
- 0.0512
|
|
||||||
- 0.1024
|
|
||||||
- 0.2048
|
|
||||||
- name: pending_pods
|
|
||||||
subsystem: scheduler
|
|
||||||
help: Number of pending pods, by the queue type. 'active' means number of pods in
|
|
||||||
activeQ; 'backoff' means number of pods in backoffQ; 'unschedulable' means number
|
|
||||||
of pods in unschedulablePods that the scheduler attempted to schedule and failed;
|
|
||||||
'gated' is the number of unschedulable pods that the scheduler never attempted
|
|
||||||
to schedule because they are gated.
|
|
||||||
type: Gauge
|
|
||||||
stabilityLevel: STABLE
|
|
||||||
labels:
|
|
||||||
- queue
|
|
||||||
- name: pod_scheduling_attempts
|
|
||||||
subsystem: scheduler
|
|
||||||
help: Number of attempts to successfully schedule a pod.
|
|
||||||
type: Histogram
|
|
||||||
stabilityLevel: STABLE
|
|
||||||
buckets:
|
|
||||||
- 1
|
|
||||||
- 2
|
|
||||||
- 4
|
|
||||||
- 8
|
|
||||||
- 16
|
|
||||||
- name: pod_scheduling_duration_seconds
|
|
||||||
subsystem: scheduler
|
|
||||||
help: E2e latency for a pod being scheduled which may include multiple scheduling
|
|
||||||
attempts.
|
|
||||||
type: Histogram
|
|
||||||
deprecatedVersion: 1.28.0
|
|
||||||
stabilityLevel: STABLE
|
|
||||||
labels:
|
|
||||||
- attempts
|
|
||||||
buckets:
|
|
||||||
- 0.01
|
|
||||||
- 0.02
|
|
||||||
- 0.04
|
|
||||||
- 0.08
|
|
||||||
- 0.16
|
|
||||||
- 0.32
|
|
||||||
- 0.64
|
|
||||||
- 1.28
|
|
||||||
- 2.56
|
|
||||||
- 5.12
|
|
||||||
- 10.24
|
|
||||||
- 20.48
|
|
||||||
- 40.96
|
|
||||||
- 81.92
|
|
||||||
- 163.84
|
|
||||||
- 327.68
|
|
||||||
- 655.36
|
|
||||||
- 1310.72
|
|
||||||
- 2621.44
|
|
||||||
- 5242.88
|
|
||||||
- name: preemption_attempts_total
|
|
||||||
subsystem: scheduler
|
|
||||||
help: Total preemption attempts in the cluster till now
|
|
||||||
type: Counter
|
|
||||||
stabilityLevel: STABLE
|
|
||||||
- name: preemption_victims
|
|
||||||
subsystem: scheduler
|
|
||||||
help: Number of selected preemption victims
|
|
||||||
type: Histogram
|
|
||||||
stabilityLevel: STABLE
|
|
||||||
buckets:
|
|
||||||
- 1
|
|
||||||
- 2
|
|
||||||
- 4
|
|
||||||
- 8
|
|
||||||
- 16
|
|
||||||
- 32
|
|
||||||
- 64
|
|
||||||
- name: queue_incoming_pods_total
|
|
||||||
subsystem: scheduler
|
|
||||||
help: Number of pods added to scheduling queues by event and queue type.
|
|
||||||
type: Counter
|
|
||||||
stabilityLevel: STABLE
|
|
||||||
labels:
|
|
||||||
- event
|
|
||||||
- queue
|
|
||||||
- name: schedule_attempts_total
|
|
||||||
subsystem: scheduler
|
|
||||||
help: Number of attempts to schedule pods, by the result. 'unschedulable' means
|
|
||||||
a pod could not be scheduled, while 'error' means an internal scheduler problem.
|
|
||||||
type: Counter
|
|
||||||
stabilityLevel: STABLE
|
|
||||||
labels:
|
|
||||||
- profile
|
|
||||||
- result
|
|
||||||
- name: scheduling_attempt_duration_seconds
|
|
||||||
subsystem: scheduler
|
|
||||||
help: Scheduling attempt latency in seconds (scheduling algorithm + binding)
|
|
||||||
type: Histogram
|
|
||||||
stabilityLevel: STABLE
|
|
||||||
labels:
|
|
||||||
- profile
|
|
||||||
- result
|
|
||||||
buckets:
|
|
||||||
- 0.001
|
|
||||||
- 0.002
|
|
||||||
- 0.004
|
|
||||||
- 0.008
|
|
||||||
- 0.016
|
|
||||||
- 0.032
|
|
||||||
- 0.064
|
|
||||||
- 0.128
|
|
||||||
- 0.256
|
|
||||||
- 0.512
|
|
||||||
- 1.024
|
|
||||||
- 2.048
|
|
||||||
- 4.096
|
|
||||||
- 8.192
|
|
||||||
- 16.384
|
|
||||||
- name: controller_admission_duration_seconds
|
- name: controller_admission_duration_seconds
|
||||||
subsystem: admission
|
subsystem: admission
|
||||||
namespace: apiserver
|
namespace: apiserver
|
||||||
@ -489,126 +325,290 @@
|
|||||||
stabilityLevel: STABLE
|
stabilityLevel: STABLE
|
||||||
labels:
|
labels:
|
||||||
- resource
|
- resource
|
||||||
- name: current_executing_requests
|
- name: container_cpu_usage_seconds_total
|
||||||
subsystem: flowcontrol
|
help: Cumulative cpu time consumed by the container in core-seconds
|
||||||
namespace: apiserver
|
type: Custom
|
||||||
help: Number of requests in initial (for a WATCH) or any (for a non-WATCH) execution
|
stabilityLevel: STABLE
|
||||||
stage in the API Priority and Fairness subsystem
|
|
||||||
type: Gauge
|
|
||||||
stabilityLevel: BETA
|
|
||||||
labels:
|
labels:
|
||||||
- flow_schema
|
- container
|
||||||
- priority_level
|
- pod
|
||||||
- name: current_executing_seats
|
- namespace
|
||||||
subsystem: flowcontrol
|
- name: container_memory_working_set_bytes
|
||||||
namespace: apiserver
|
help: Current working set of the container in bytes
|
||||||
help: Concurrency (number of seats) occupied by the currently executing (initial
|
type: Custom
|
||||||
stage for a WATCH, any stage otherwise) requests in the API Priority and Fairness
|
stabilityLevel: STABLE
|
||||||
subsystem
|
|
||||||
type: Gauge
|
|
||||||
stabilityLevel: BETA
|
|
||||||
labels:
|
labels:
|
||||||
- flow_schema
|
- container
|
||||||
- priority_level
|
- pod
|
||||||
- name: current_inqueue_requests
|
- namespace
|
||||||
subsystem: flowcontrol
|
- name: container_start_time_seconds
|
||||||
namespace: apiserver
|
help: Start time of the container since unix epoch in seconds
|
||||||
help: Number of requests currently pending in queues of the API Priority and Fairness
|
type: Custom
|
||||||
subsystem
|
stabilityLevel: STABLE
|
||||||
type: Gauge
|
|
||||||
stabilityLevel: BETA
|
|
||||||
labels:
|
labels:
|
||||||
- flow_schema
|
- container
|
||||||
- priority_level
|
- pod
|
||||||
- name: dispatched_requests_total
|
- namespace
|
||||||
subsystem: flowcontrol
|
- name: job_creation_skew_duration_seconds
|
||||||
namespace: apiserver
|
subsystem: cronjob_controller
|
||||||
help: Number of requests executed by API Priority and Fairness subsystem
|
help: Time between when a cronjob is scheduled to be run, and when the corresponding
|
||||||
type: Counter
|
job is created
|
||||||
stabilityLevel: BETA
|
|
||||||
labels:
|
|
||||||
- flow_schema
|
|
||||||
- priority_level
|
|
||||||
- name: nominal_limit_seats
|
|
||||||
subsystem: flowcontrol
|
|
||||||
namespace: apiserver
|
|
||||||
help: Nominal number of execution seats configured for each priority level
|
|
||||||
type: Gauge
|
|
||||||
stabilityLevel: BETA
|
|
||||||
labels:
|
|
||||||
- priority_level
|
|
||||||
- name: rejected_requests_total
|
|
||||||
subsystem: flowcontrol
|
|
||||||
namespace: apiserver
|
|
||||||
help: Number of requests rejected by API Priority and Fairness subsystem
|
|
||||||
type: Counter
|
|
||||||
stabilityLevel: BETA
|
|
||||||
labels:
|
|
||||||
- flow_schema
|
|
||||||
- priority_level
|
|
||||||
- reason
|
|
||||||
- name: request_wait_duration_seconds
|
|
||||||
subsystem: flowcontrol
|
|
||||||
namespace: apiserver
|
|
||||||
help: Length of time a request spent waiting in its queue
|
|
||||||
type: Histogram
|
type: Histogram
|
||||||
stabilityLevel: BETA
|
stabilityLevel: STABLE
|
||||||
labels:
|
|
||||||
- execute
|
|
||||||
- flow_schema
|
|
||||||
- priority_level
|
|
||||||
buckets:
|
buckets:
|
||||||
- 0
|
|
||||||
- 0.005
|
|
||||||
- 0.02
|
|
||||||
- 0.05
|
|
||||||
- 0.1
|
|
||||||
- 0.2
|
|
||||||
- 0.5
|
|
||||||
- 1
|
- 1
|
||||||
- 2
|
- 2
|
||||||
- 5
|
- 4
|
||||||
- 10
|
- 8
|
||||||
- 15
|
- 16
|
||||||
- 30
|
- 32
|
||||||
- name: disabled_metrics_total
|
- 64
|
||||||
help: The count of disabled metrics.
|
- 128
|
||||||
|
- 256
|
||||||
|
- 512
|
||||||
|
- name: job_pods_finished_total
|
||||||
|
subsystem: job_controller
|
||||||
|
help: The number of finished Pods that are fully tracked
|
||||||
type: Counter
|
type: Counter
|
||||||
stabilityLevel: BETA
|
stabilityLevel: STABLE
|
||||||
- name: hidden_metrics_total
|
labels:
|
||||||
help: The count of hidden metrics.
|
- completion_mode
|
||||||
|
- result
|
||||||
|
- name: job_sync_duration_seconds
|
||||||
|
subsystem: job_controller
|
||||||
|
help: The time it took to sync a job
|
||||||
|
type: Histogram
|
||||||
|
stabilityLevel: STABLE
|
||||||
|
labels:
|
||||||
|
- action
|
||||||
|
- completion_mode
|
||||||
|
- result
|
||||||
|
buckets:
|
||||||
|
- 0.001
|
||||||
|
- 0.002
|
||||||
|
- 0.004
|
||||||
|
- 0.008
|
||||||
|
- 0.016
|
||||||
|
- 0.032
|
||||||
|
- 0.064
|
||||||
|
- 0.128
|
||||||
|
- 0.256
|
||||||
|
- 0.512
|
||||||
|
- 1.024
|
||||||
|
- 2.048
|
||||||
|
- 4.096
|
||||||
|
- 8.192
|
||||||
|
- 16.384
|
||||||
|
- name: job_syncs_total
|
||||||
|
subsystem: job_controller
|
||||||
|
help: The number of job syncs
|
||||||
type: Counter
|
type: Counter
|
||||||
stabilityLevel: BETA
|
stabilityLevel: STABLE
|
||||||
- name: feature_enabled
|
|
||||||
namespace: kubernetes
|
|
||||||
help: This metric records the data about the stage and enablement of a k8s feature.
|
|
||||||
type: Gauge
|
|
||||||
stabilityLevel: BETA
|
|
||||||
labels:
|
labels:
|
||||||
- name
|
- action
|
||||||
- stage
|
- completion_mode
|
||||||
- name: healthcheck
|
- result
|
||||||
namespace: kubernetes
|
- name: jobs_finished_total
|
||||||
help: This metric records the result of a single healthcheck.
|
subsystem: job_controller
|
||||||
type: Gauge
|
help: The number of finished jobs
|
||||||
stabilityLevel: BETA
|
|
||||||
labels:
|
|
||||||
- name
|
|
||||||
- type
|
|
||||||
- name: healthchecks_total
|
|
||||||
namespace: kubernetes
|
|
||||||
help: This metric records the results of all healthcheck.
|
|
||||||
type: Counter
|
type: Counter
|
||||||
stabilityLevel: BETA
|
stabilityLevel: STABLE
|
||||||
labels:
|
labels:
|
||||||
- name
|
- completion_mode
|
||||||
|
- reason
|
||||||
|
- result
|
||||||
|
- name: kube_pod_resource_limit
|
||||||
|
help: Resources limit for workloads on the cluster, broken down by pod. This shows
|
||||||
|
the resource usage the scheduler and kubelet expect per pod for resources along
|
||||||
|
with the unit for the resource if any.
|
||||||
|
type: Custom
|
||||||
|
stabilityLevel: STABLE
|
||||||
|
labels:
|
||||||
|
- namespace
|
||||||
|
- pod
|
||||||
|
- node
|
||||||
|
- scheduler
|
||||||
|
- priority
|
||||||
|
- resource
|
||||||
|
- unit
|
||||||
|
- name: kube_pod_resource_request
|
||||||
|
help: Resources requested by workloads on the cluster, broken down by pod. This
|
||||||
|
shows the resource usage the scheduler and kubelet expect per pod for resources
|
||||||
|
along with the unit for the resource if any.
|
||||||
|
type: Custom
|
||||||
|
stabilityLevel: STABLE
|
||||||
|
labels:
|
||||||
|
- namespace
|
||||||
|
- pod
|
||||||
|
- node
|
||||||
|
- scheduler
|
||||||
|
- priority
|
||||||
|
- resource
|
||||||
|
- unit
|
||||||
|
- name: evictions_total
|
||||||
|
subsystem: node_collector
|
||||||
|
help: Number of Node evictions that happened since current instance of NodeController
|
||||||
|
started.
|
||||||
|
type: Counter
|
||||||
|
stabilityLevel: STABLE
|
||||||
|
labels:
|
||||||
|
- zone
|
||||||
|
- name: node_cpu_usage_seconds_total
|
||||||
|
help: Cumulative cpu time consumed by the node in core-seconds
|
||||||
|
type: Custom
|
||||||
|
stabilityLevel: STABLE
|
||||||
|
- name: node_memory_working_set_bytes
|
||||||
|
help: Current working set of the node in bytes
|
||||||
|
type: Custom
|
||||||
|
stabilityLevel: STABLE
|
||||||
|
- name: pod_cpu_usage_seconds_total
|
||||||
|
help: Cumulative cpu time consumed by the pod in core-seconds
|
||||||
|
type: Custom
|
||||||
|
stabilityLevel: STABLE
|
||||||
|
labels:
|
||||||
|
- pod
|
||||||
|
- namespace
|
||||||
|
- name: pod_memory_working_set_bytes
|
||||||
|
help: Current working set of the pod in bytes
|
||||||
|
type: Custom
|
||||||
|
stabilityLevel: STABLE
|
||||||
|
labels:
|
||||||
|
- pod
|
||||||
|
- namespace
|
||||||
|
- name: resource_scrape_error
|
||||||
|
help: 1 if there was an error while getting container metrics, 0 otherwise
|
||||||
|
type: Custom
|
||||||
|
stabilityLevel: STABLE
|
||||||
|
- name: framework_extension_point_duration_seconds
|
||||||
|
subsystem: scheduler
|
||||||
|
help: Latency for running all plugins of a specific extension point.
|
||||||
|
type: Histogram
|
||||||
|
stabilityLevel: STABLE
|
||||||
|
labels:
|
||||||
|
- extension_point
|
||||||
|
- profile
|
||||||
- status
|
- status
|
||||||
- type
|
buckets:
|
||||||
- name: registered_metrics_total
|
- 0.0001
|
||||||
help: The count of registered metrics broken by stability level and deprecation
|
- 0.0002
|
||||||
version.
|
- 0.0004
|
||||||
type: Counter
|
- 0.0008
|
||||||
stabilityLevel: BETA
|
- 0.0016
|
||||||
|
- 0.0032
|
||||||
|
- 0.0064
|
||||||
|
- 0.0128
|
||||||
|
- 0.0256
|
||||||
|
- 0.0512
|
||||||
|
- 0.1024
|
||||||
|
- 0.2048
|
||||||
|
- name: pending_pods
|
||||||
|
subsystem: scheduler
|
||||||
|
help: Number of pending pods, by the queue type. 'active' means number of pods in
|
||||||
|
activeQ; 'backoff' means number of pods in backoffQ; 'unschedulable' means number
|
||||||
|
of pods in unschedulablePods that the scheduler attempted to schedule and failed;
|
||||||
|
'gated' is the number of unschedulable pods that the scheduler never attempted
|
||||||
|
to schedule because they are gated.
|
||||||
|
type: Gauge
|
||||||
|
stabilityLevel: STABLE
|
||||||
labels:
|
labels:
|
||||||
- deprecated_version
|
- queue
|
||||||
- stability_level
|
- name: pod_scheduling_attempts
|
||||||
|
subsystem: scheduler
|
||||||
|
help: Number of attempts to successfully schedule a pod.
|
||||||
|
type: Histogram
|
||||||
|
stabilityLevel: STABLE
|
||||||
|
buckets:
|
||||||
|
- 1
|
||||||
|
- 2
|
||||||
|
- 4
|
||||||
|
- 8
|
||||||
|
- 16
|
||||||
|
- name: pod_scheduling_duration_seconds
|
||||||
|
subsystem: scheduler
|
||||||
|
help: E2e latency for a pod being scheduled which may include multiple scheduling
|
||||||
|
attempts.
|
||||||
|
type: Histogram
|
||||||
|
deprecatedVersion: 1.28.0
|
||||||
|
stabilityLevel: STABLE
|
||||||
|
labels:
|
||||||
|
- attempts
|
||||||
|
buckets:
|
||||||
|
- 0.01
|
||||||
|
- 0.02
|
||||||
|
- 0.04
|
||||||
|
- 0.08
|
||||||
|
- 0.16
|
||||||
|
- 0.32
|
||||||
|
- 0.64
|
||||||
|
- 1.28
|
||||||
|
- 2.56
|
||||||
|
- 5.12
|
||||||
|
- 10.24
|
||||||
|
- 20.48
|
||||||
|
- 40.96
|
||||||
|
- 81.92
|
||||||
|
- 163.84
|
||||||
|
- 327.68
|
||||||
|
- 655.36
|
||||||
|
- 1310.72
|
||||||
|
- 2621.44
|
||||||
|
- 5242.88
|
||||||
|
- name: preemption_attempts_total
|
||||||
|
subsystem: scheduler
|
||||||
|
help: Total preemption attempts in the cluster till now
|
||||||
|
type: Counter
|
||||||
|
stabilityLevel: STABLE
|
||||||
|
- name: preemption_victims
|
||||||
|
subsystem: scheduler
|
||||||
|
help: Number of selected preemption victims
|
||||||
|
type: Histogram
|
||||||
|
stabilityLevel: STABLE
|
||||||
|
buckets:
|
||||||
|
- 1
|
||||||
|
- 2
|
||||||
|
- 4
|
||||||
|
- 8
|
||||||
|
- 16
|
||||||
|
- 32
|
||||||
|
- 64
|
||||||
|
- name: queue_incoming_pods_total
|
||||||
|
subsystem: scheduler
|
||||||
|
help: Number of pods added to scheduling queues by event and queue type.
|
||||||
|
type: Counter
|
||||||
|
stabilityLevel: STABLE
|
||||||
|
labels:
|
||||||
|
- event
|
||||||
|
- queue
|
||||||
|
- name: schedule_attempts_total
|
||||||
|
subsystem: scheduler
|
||||||
|
help: Number of attempts to schedule pods, by the result. 'unschedulable' means
|
||||||
|
a pod could not be scheduled, while 'error' means an internal scheduler problem.
|
||||||
|
type: Counter
|
||||||
|
stabilityLevel: STABLE
|
||||||
|
labels:
|
||||||
|
- profile
|
||||||
|
- result
|
||||||
|
- name: scheduling_attempt_duration_seconds
|
||||||
|
subsystem: scheduler
|
||||||
|
help: Scheduling attempt latency in seconds (scheduling algorithm + binding)
|
||||||
|
type: Histogram
|
||||||
|
stabilityLevel: STABLE
|
||||||
|
labels:
|
||||||
|
- profile
|
||||||
|
- result
|
||||||
|
buckets:
|
||||||
|
- 0.001
|
||||||
|
- 0.002
|
||||||
|
- 0.004
|
||||||
|
- 0.008
|
||||||
|
- 0.016
|
||||||
|
- 0.032
|
||||||
|
- 0.064
|
||||||
|
- 0.128
|
||||||
|
- 0.256
|
||||||
|
- 0.512
|
||||||
|
- 1.024
|
||||||
|
- 2.048
|
||||||
|
- 4.096
|
||||||
|
- 8.192
|
||||||
|
- 16.384
|
||||||
|
Loading…
Reference in New Issue
Block a user