diff --git a/test/instrumentation/documentation/main.go b/test/instrumentation/documentation/main.go index d52a3d4e0b7..e423ded75b6 100755 --- a/test/instrumentation/documentation/main.go +++ b/test/instrumentation/documentation/main.go @@ -53,9 +53,9 @@ description: >- This page details the metrics that different Kubernetes components export. You can query the metrics endpoint for these components using an HTTP scrape, and fetch the current metrics data in Prometheus format. -### List of Kubernetes Metrics +### List of Stable Kubernetes Metrics -
Name | @@ -67,7 +67,31 @@ components using an HTTP scrape, and fetch the current metrics data in Prometheu|||||||
---|---|---|---|---|---|---|---|
{{with $metric}}{{.BuildFQName}}{{end}} | +{{$metric.StabilityLevel}} | +{{$metric.Type}} | +{{$metric.Help}} | +{{if not $metric.Labels }}None | {{else }}{{range $label := $metric.Labels}} {{$label}} {{end}} | {{end}}
+{{if not $metric.ConstLabels }}None | {{else }}{{$metric.ConstLabels}} | {{end}}
Name | +Stability Level | +Type | +Help | +Labels | +Const Labels | +|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
{{with $metric}}{{.BuildFQName}}{{end}} | {{$metric.StabilityLevel}} | {{$metric.Type}} | @@ -80,32 +104,35 @@ components using an HTTP scrape, and fetch the current metrics data in Prometheu ) type templateData struct { - Metrics []metric + AlphaMetrics []metric + StableMetrics []metric GeneratedDate time.Time } func main() { dat, err := os.ReadFile("test/instrumentation/testdata/documentation-list.yaml") if err == nil { - metrics := []metric{} - err = yaml.Unmarshal(dat, &metrics) + var parsedMetrics []metric + err = yaml.Unmarshal(dat, &parsedMetrics) if err != nil { println("err", err) } - sort.Sort(byFQName(metrics)) + sort.Sort(byFQName(parsedMetrics)) t := template.New("t").Funcs(funcMap) t, err := t.Parse(templ) if err != nil { println("err", err) } var tpl bytes.Buffer - for i, m := range metrics { + for i, m := range parsedMetrics { m.Help = strings.Join(strings.Split(m.Help, "\n"), ", ") _ = m.BuildFQName() // ignore golint error - metrics[i] = m + parsedMetrics[i] = m } + sortedMetrics := byStabilityLevel(parsedMetrics) data := templateData{ - Metrics: metrics, + AlphaMetrics: sortedMetrics["ALPHA"], + StableMetrics: sortedMetrics["STABLE"], GeneratedDate: time.Now(), } err = t.Execute(&tpl, data) @@ -154,3 +181,14 @@ func (ms byFQName) Less(i, j int) bool { func (ms byFQName) Swap(i, j int) { ms[i], ms[j] = ms[j], ms[i] } + +func byStabilityLevel(ms []metric) map[string][]metric { + res := map[string][]metric{} + for _, m := range ms { + if _, ok := res[m.StabilityLevel]; !ok { + res[m.StabilityLevel] = []metric{} + } + res[m.StabilityLevel] = append(res[m.StabilityLevel], m) + } + return res +} diff --git a/test/instrumentation/testdata/documentation.md b/test/instrumentation/testdata/documentation.md index c7085b7c24b..6a75ad66c48 100644 --- a/test/instrumentation/testdata/documentation.md +++ b/test/instrumentation/testdata/documentation.md @@ -6,14 +6,152 @@ description: >- --- -## Metrics (auto-generated 2022 Oct 25) +## Metrics (auto-generated 2022 Oct 27) This page details the metrics that different Kubernetes components export. You can query the metrics endpoint for these -components using an HTTP scrape, and fetch the current metrics data in Prometheus format. +components using an HTTP scrape, and fetch the metrics in Prometheus format. -### List of Kubernetes Metrics +### List of Stable Kubernetes Metrics -
Name | +Stability Level | +Type | +Help | +Labels | +Const Labels | +
---|---|---|---|---|---|
apiserver_admission_controller_admission_duration_seconds | +STABLE | +Histogram | +Admission controller latency histogram in seconds, identified by name and broken out for each operation and API resource and type (validate or admit). | +name operation rejected type |
+None |
apiserver_admission_step_admission_duration_seconds | +STABLE | +Histogram | +Admission sub-step latency histogram in seconds, broken out for each operation and API resource and step type (validate or admit). | +operation rejected type |
+None |
apiserver_admission_webhook_admission_duration_seconds | +STABLE | +Histogram | +Admission webhook latency histogram in seconds, identified by name and broken out for each operation and API resource and type (validate or admit). | +name operation rejected type |
+None |
apiserver_current_inflight_requests | +STABLE | +Gauge | +Maximal number of currently used inflight request limit of this apiserver per request kind in last second. | +request_kind |
+None |
apiserver_longrunning_requests | +STABLE | +Gauge | +Gauge of all active long-running apiserver requests broken out by verb, group, version, resource, scope and component. Not all requests are tracked this way. | +component group resource scope subresource verb version |
+None |
apiserver_request_duration_seconds | +STABLE | +Histogram | +Response latency distribution in seconds for each verb, dry run value, group, version, resource, subresource, scope and component. | +component dry_run group resource scope subresource verb version |
+None |
apiserver_request_total | +STABLE | +Counter | +Counter of apiserver requests broken out for each verb, dry run value, group, version, resource, scope, component, and HTTP response code. | +code component dry_run group resource scope subresource verb version |
+None |
apiserver_requested_deprecated_apis | +STABLE | +Gauge | +Gauge of deprecated APIs that have been requested, broken out by API group, version, resource, subresource, and removed_release. | +group removed_release resource subresource version |
+None |
apiserver_response_sizes | +STABLE | +Histogram | +Response size distribution in bytes for each group, version, verb, resource, subresource, scope and component. | +component group resource scope subresource verb version |
+None |
apiserver_storage_objects | +STABLE | +Gauge | +Number of stored objects at the time of last check split by kind. | +resource |
+None |
node_collector_evictions_total | +STABLE | +Counter | +Number of Node evictions that happened since current instance of NodeController started. | +zone |
+None |
scheduler_framework_extension_point_duration_seconds | +STABLE | +Histogram | +Latency for running all plugins of a specific extension point. | +extension_point profile status |
+None |
scheduler_pending_pods | +STABLE | +Gauge | +Number of pending pods, by the queue type. 'active' means number of pods in activeQ; 'backoff' means number of pods in backoffQ; 'unschedulable' means number of pods in unschedulablePods. | +queue |
+None |
scheduler_pod_scheduling_attempts | +STABLE | +Histogram | +Number of attempts to successfully schedule a pod. | +None | +None |
scheduler_pod_scheduling_duration_seconds | +STABLE | +Histogram | +E2e latency for a pod being scheduled which may include multiple scheduling attempts. | +attempts |
+None |
scheduler_preemption_attempts_total | +STABLE | +Counter | +Total preemption attempts in the cluster till now | +None | +None |
scheduler_preemption_victims | +STABLE | +Histogram | +Number of selected preemption victims | +None | +None |
scheduler_queue_incoming_pods_total | +STABLE | +Counter | +Number of pods added to scheduling queues by event and queue type. | +event queue |
+None |
scheduler_schedule_attempts_total | +STABLE | +Counter | +Number of attempts to schedule pods, by the result. 'unschedulable' means a pod could not be scheduled, while 'error' means an internal scheduler problem. | +profile result |
+None |
scheduler_scheduling_attempt_duration_seconds | +STABLE | +Histogram | +Scheduling attempt latency in seconds (scheduling algorithm + binding) | +profile result |
+None |
Name | @@ -1682,125 +1820,5 @@ components using an HTTP scrape, and fetch the current metrics data in PrometheuHow long in seconds processing an item from workqueue takes. | name |
None | ||
---|---|---|---|---|---|
apiserver_admission_controller_admission_duration_seconds | -STABLE | -Histogram | -Admission controller latency histogram in seconds, identified by name and broken out for each operation and API resource and type (validate or admit). | -name operation rejected type |
-None |
apiserver_admission_step_admission_duration_seconds | -STABLE | -Histogram | -Admission sub-step latency histogram in seconds, broken out for each operation and API resource and step type (validate or admit). | -operation rejected type |
-None |
apiserver_admission_webhook_admission_duration_seconds | -STABLE | -Histogram | -Admission webhook latency histogram in seconds, identified by name and broken out for each operation and API resource and type (validate or admit). | -name operation rejected type |
-None |
apiserver_current_inflight_requests | -STABLE | -Gauge | -Maximal number of currently used inflight request limit of this apiserver per request kind in last second. | -request_kind |
-None |
apiserver_longrunning_requests | -STABLE | -Gauge | -Gauge of all active long-running apiserver requests broken out by verb, group, version, resource, scope and component. Not all requests are tracked this way. | -component group resource scope subresource verb version |
-None |
apiserver_request_duration_seconds | -STABLE | -Histogram | -Response latency distribution in seconds for each verb, dry run value, group, version, resource, subresource, scope and component. | -component dry_run group resource scope subresource verb version |
-None |
apiserver_request_total | -STABLE | -Counter | -Counter of apiserver requests broken out for each verb, dry run value, group, version, resource, scope, component, and HTTP response code. | -code component dry_run group resource scope subresource verb version |
-None |
apiserver_requested_deprecated_apis | -STABLE | -Gauge | -Gauge of deprecated APIs that have been requested, broken out by API group, version, resource, subresource, and removed_release. | -group removed_release resource subresource version |
-None |
apiserver_response_sizes | -STABLE | -Histogram | -Response size distribution in bytes for each group, version, verb, resource, subresource, scope and component. | -component group resource scope subresource verb version |
-None |
apiserver_storage_objects | -STABLE | -Gauge | -Number of stored objects at the time of last check split by kind. | -resource |
-None |
node_collector_evictions_total | -STABLE | -Counter | -Number of Node evictions that happened since current instance of NodeController started. | -zone |
-None |
scheduler_framework_extension_point_duration_seconds | -STABLE | -Histogram | -Latency for running all plugins of a specific extension point. | -extension_point profile status |
-None |
scheduler_pending_pods | -STABLE | -Gauge | -Number of pending pods, by the queue type. 'active' means number of pods in activeQ; 'backoff' means number of pods in backoffQ; 'unschedulable' means number of pods in unschedulablePods. | -queue |
-None |
scheduler_pod_scheduling_attempts | -STABLE | -Histogram | -Number of attempts to successfully schedule a pod. | -None | -None |
scheduler_pod_scheduling_duration_seconds | -STABLE | -Histogram | -E2e latency for a pod being scheduled which may include multiple scheduling attempts. | -attempts |
-None |
scheduler_preemption_attempts_total | -STABLE | -Counter | -Total preemption attempts in the cluster till now | -None | -None |
scheduler_preemption_victims | -STABLE | -Histogram | -Number of selected preemption victims | -None | -None |
scheduler_queue_incoming_pods_total | -STABLE | -Counter | -Number of pods added to scheduling queues by event and queue type. | -event queue |
-None |
scheduler_schedule_attempts_total | -STABLE | -Counter | -Number of attempts to schedule pods, by the result. 'unschedulable' means a pod could not be scheduled, while 'error' means an internal scheduler problem. | -profile result |
-None |
scheduler_scheduling_attempt_duration_seconds | -STABLE | -Histogram | -Scheduling attempt latency in seconds (scheduling algorithm + binding) | -profile result |
-None |