From 9624086933577abe76cccfb8faf5e23cf6a50447 Mon Sep 17 00:00:00 2001 From: Mengjiao Liu Date: Mon, 28 Oct 2024 15:33:54 +0800 Subject: [PATCH] Update metrics documentation for v1.31 --- .../documentation/documentation-list.yaml | 1516 +++++++++-------- .../documentation/documentation.md | 103 +- 2 files changed, 870 insertions(+), 749 deletions(-) diff --git a/test/instrumentation/documentation/documentation-list.yaml b/test/instrumentation/documentation/documentation-list.yaml index 557baca0f9c..68cc85358e3 100644 --- a/test/instrumentation/documentation/documentation-list.yaml +++ b/test/instrumentation/documentation/documentation-list.yaml @@ -539,36 +539,6 @@ help: Number of PersistenVolumeClaims creation requests type: Counter stabilityLevel: ALPHA -- name: client_expiration_renew_errors - subsystem: certificate_manager - namespace: kubelet - help: Counter of certificate renewal errors. - type: Counter - stabilityLevel: ALPHA -- name: certificate_manager_server_rotation_seconds - subsystem: kubelet - help: Histogram of the number of seconds the previous certificate lived before being - rotated. - type: Histogram - stabilityLevel: ALPHA - buckets: - - 60 - - 3600 - - 14400 - - 86400 - - 604800 - - 2.592e+06 - - 7.776e+06 - - 1.5552e+07 - - 3.1104e+07 - - 1.24416e+08 -- name: certificate_manager_server_ttl_seconds - subsystem: kubelet - help: Gauge of the shortest TTL (time-to-live) of the Kubelet's serving certificate. - The value is in seconds until certificate expiry (negative if already expired). - If serving certificate is invalid or unused, the value will be +INF. - type: Gauge - stabilityLevel: ALPHA - name: credential_provider_plugin_duration subsystem: kubelet help: Duration of execution in seconds for credential provider plugin @@ -595,11 +565,6 @@ stabilityLevel: ALPHA labels: - plugin_name -- name: server_expiration_renew_errors - subsystem: kubelet - help: Counter of certificate renewal errors. - type: Counter - stabilityLevel: ALPHA - name: pv_collector_bound_pv_count help: Gauge measuring number of persistent volume currently bound type: Custom @@ -612,6 +577,8 @@ stabilityLevel: ALPHA labels: - namespace + - storage_class + - volume_attributes_class - name: pv_collector_total_pv_count help: Gauge measuring total number of persistent volumes type: Custom @@ -631,6 +598,8 @@ stabilityLevel: ALPHA labels: - namespace + - storage_class + - volume_attributes_class - name: retroactive_storageclass_errors_total help: Total number of failed retroactive StorageClass assignments to persistent volume claim @@ -699,6 +668,41 @@ labels: - operation_name - plugin_name +- name: client_expiration_renew_errors + subsystem: certificate_manager + namespace: kubelet + help: Counter of certificate renewal errors. + type: Counter + stabilityLevel: ALPHA +- name: certificate_manager_server_rotation_seconds + subsystem: kubelet + help: Histogram of the number of seconds the previous certificate lived before being + rotated. + type: Histogram + stabilityLevel: ALPHA + buckets: + - 60 + - 3600 + - 14400 + - 86400 + - 604800 + - 2.592e+06 + - 7.776e+06 + - 1.5552e+07 + - 3.1104e+07 + - 1.24416e+08 +- name: certificate_manager_server_ttl_seconds + subsystem: kubelet + help: Gauge of the shortest TTL (time-to-live) of the Kubelet's serving certificate. + The value is in seconds until certificate expiry (negative if already expired). + If serving certificate is invalid or unused, the value will be +INF. + type: Gauge + stabilityLevel: ALPHA +- name: server_expiration_renew_errors + subsystem: kubelet + help: Counter of certificate renewal errors. + type: Counter + stabilityLevel: ALPHA - name: container_swap_usage_bytes help: Current amount of the container swap usage in bytes. Reported only on non-windows systems @@ -745,6 +749,11 @@ - 2.5 - 5 - 10 +- name: cgroup_version + subsystem: kubelet + help: cgroup version on the hosts. + type: Gauge + stabilityLevel: ALPHA - name: kubelet_container_log_filesystem_used_bytes help: Bytes used by the container's logs on the filesystem. type: Custom @@ -1726,98 +1735,6 @@ stabilityLevel: ALPHA labels: - signerName -- name: ip_errors_total - subsystem: clusterip_repair - namespace: apiserver - help: 'Number of errors detected on clusterips by the repair loop broken down by - type of error: leak, repair, full, outOfRange, duplicate, unknown, invalid' - type: Counter - stabilityLevel: ALPHA - labels: - - type -- name: reconcile_errors_total - subsystem: clusterip_repair - namespace: apiserver - help: Number of reconciliation failures on the clusterip repair reconcile loop - type: Counter - stabilityLevel: ALPHA -- name: port_errors_total - subsystem: nodeport_repair - namespace: apiserver - help: 'Number of errors detected on ports by the repair loop broken down by type - of error: leak, repair, full, outOfRange, duplicate, unknown' - type: Counter - stabilityLevel: ALPHA - labels: - - type -- name: reconcile_errors_total - subsystem: nodeport_repair - namespace: apiserver - help: Number of reconciliation failures on the nodeport repair reconcile loop - type: Counter - stabilityLevel: ALPHA -- name: allocated_ips - subsystem: clusterip_allocator - namespace: kube_apiserver - help: Gauge measuring the number of allocated IPs for Services - type: Gauge - stabilityLevel: ALPHA - labels: - - cidr -- name: allocation_errors_total - subsystem: clusterip_allocator - namespace: kube_apiserver - help: Number of errors trying to allocate Cluster IPs - type: Counter - stabilityLevel: ALPHA - labels: - - cidr - - scope -- name: allocation_total - subsystem: clusterip_allocator - namespace: kube_apiserver - help: Number of Cluster IPs allocations - type: Counter - stabilityLevel: ALPHA - labels: - - cidr - - scope -- name: available_ips - subsystem: clusterip_allocator - namespace: kube_apiserver - help: Gauge measuring the number of available IPs for Services - type: Gauge - stabilityLevel: ALPHA - labels: - - cidr -- name: allocated_ports - subsystem: nodeport_allocator - namespace: kube_apiserver - help: Gauge measuring the number of allocated NodePorts for Services - type: Gauge - stabilityLevel: ALPHA -- name: allocation_errors_total - subsystem: nodeport_allocator - namespace: kube_apiserver - help: Number of errors trying to allocate NodePort - type: Counter - stabilityLevel: ALPHA - labels: - - scope -- name: allocation_total - subsystem: nodeport_allocator - namespace: kube_apiserver - help: Number of NodePort allocations - type: Counter - stabilityLevel: ALPHA - labels: - - scope -- name: available_ports - subsystem: nodeport_allocator - namespace: kube_apiserver - help: Gauge measuring the number of available NodePorts for Services - type: Gauge - stabilityLevel: ALPHA - name: backend_tls_failure_total subsystem: pod_logs namespace: kube_apiserver @@ -2094,32 +2011,57 @@ help: Cumulative proxy rules Service changes type: Counter stabilityLevel: ALPHA -- name: binder_cache_requests_total - subsystem: scheduler_volume - help: Total number for request volume binding cache +- name: ip_errors_total + subsystem: clusterip_repair + namespace: apiserver + help: 'Number of errors detected on clusterips by the repair loop broken down by + type of error: leak, repair, full, outOfRange, duplicate, unknown, invalid' type: Counter stabilityLevel: ALPHA labels: - - operation -- name: scheduling_stage_error_total - subsystem: scheduler_volume - help: Volume scheduling stage error count + - type +- name: reconcile_errors_total + subsystem: clusterip_repair + namespace: apiserver + help: Number of reconciliation failures on the clusterip repair reconcile loop + type: Counter + stabilityLevel: ALPHA +- name: port_errors_total + subsystem: nodeport_repair + namespace: apiserver + help: 'Number of errors detected on ports by the repair loop broken down by type + of error: leak, repair, full, outOfRange, duplicate, unknown' type: Counter stabilityLevel: ALPHA labels: - - operation -- name: operations_seconds - subsystem: csi - help: Container Storage Interface operation duration with gRPC error code status - total + - type +- name: reconcile_errors_total + subsystem: nodeport_repair + namespace: apiserver + help: Number of reconciliation failures on the nodeport repair reconcile loop + type: Counter + stabilityLevel: ALPHA +- name: allocated_ips + subsystem: clusterip_allocator + namespace: kube_apiserver + help: Gauge measuring the number of allocated IPs for Services + type: Gauge + stabilityLevel: ALPHA + labels: + - cidr +- name: allocation_duration_seconds + subsystem: clusterip_allocator + namespace: kube_apiserver + help: Duration in seconds to allocate a Cluster IP by ServiceCIDR type: Histogram stabilityLevel: ALPHA labels: - - driver_name - - grpc_status_code - - method_name - - migrated + - cidr buckets: + - 0.005 + - 0.01 + - 0.025 + - 0.05 - 0.1 - 0.25 - 0.5 @@ -2127,12 +2069,80 @@ - 2.5 - 5 - 10 - - 15 - - 25 - - 50 - - 120 - - 300 - - 600 +- name: allocation_errors_total + subsystem: clusterip_allocator + namespace: kube_apiserver + help: Number of errors trying to allocate Cluster IPs + type: Counter + stabilityLevel: ALPHA + labels: + - cidr + - scope +- name: allocation_total + subsystem: clusterip_allocator + namespace: kube_apiserver + help: Number of Cluster IPs allocations + type: Counter + stabilityLevel: ALPHA + labels: + - cidr + - scope +- name: available_ips + subsystem: clusterip_allocator + namespace: kube_apiserver + help: Gauge measuring the number of available IPs for Services + type: Gauge + stabilityLevel: ALPHA + labels: + - cidr +- name: allocated_ports + subsystem: nodeport_allocator + namespace: kube_apiserver + help: Gauge measuring the number of allocated NodePorts for Services + type: Gauge + stabilityLevel: ALPHA +- name: allocation_errors_total + subsystem: nodeport_allocator + namespace: kube_apiserver + help: Number of errors trying to allocate NodePort + type: Counter + stabilityLevel: ALPHA + labels: + - scope +- name: allocation_total + subsystem: nodeport_allocator + namespace: kube_apiserver + help: Number of NodePort allocations + type: Counter + stabilityLevel: ALPHA + labels: + - scope +- name: available_ports + subsystem: nodeport_allocator + namespace: kube_apiserver + help: Gauge measuring the number of available NodePorts for Services + type: Gauge + stabilityLevel: ALPHA +- name: event_handling_duration_seconds + subsystem: scheduler + help: Event handling latency in seconds. + type: Histogram + stabilityLevel: ALPHA + labels: + - event + buckets: + - 0.0001 + - 0.0002 + - 0.0004 + - 0.0008 + - 0.0016 + - 0.0032 + - 0.0064 + - 0.0128 + - 0.0256 + - 0.0512 + - 0.1024 + - 0.2048 - name: goroutines subsystem: scheduler help: Number of running goroutines split by the work they do such as binding. @@ -2203,6 +2213,36 @@ - 0.009852612533569338 - 0.014778918800354007 - 0.02216837820053101 +- name: queueing_hint_execution_duration_seconds + subsystem: scheduler + help: Duration for running a queueing hint function of a plugin. + type: Histogram + stabilityLevel: ALPHA + labels: + - event + - hint + - plugin + buckets: + - 1e-05 + - 1.5000000000000002e-05 + - 2.2500000000000005e-05 + - 3.375000000000001e-05 + - 5.062500000000001e-05 + - 7.593750000000002e-05 + - 0.00011390625000000003 + - 0.00017085937500000006 + - 0.0002562890625000001 + - 0.00038443359375000017 + - 0.0005766503906250003 + - 0.0008649755859375004 + - 0.0012974633789062506 + - 0.0019461950683593758 + - 0.0029192926025390638 + - 0.004378938903808595 + - 0.006568408355712893 + - 0.009852612533569338 + - 0.014778918800354007 + - 0.02216837820053101 - name: scheduler_cache_size subsystem: scheduler help: Number of nodes, pods, and assumed (bound) pods in the scheduler cache. @@ -2241,6 +2281,20 @@ labels: - plugin - profile +- name: binder_cache_requests_total + subsystem: scheduler_volume + help: Total number for request volume binding cache + type: Counter + stabilityLevel: ALPHA + labels: + - operation +- name: scheduling_stage_error_total + subsystem: scheduler_volume + help: Volume scheduling stage error count + type: Counter + stabilityLevel: ALPHA + labels: + - operation - name: invalid_legacy_auto_token_uses_total subsystem: serviceaccount help: Cumulative invalid auto-generated legacy tokens used @@ -2271,50 +2325,6 @@ help: Cumulative valid projected service account tokens used type: Counter stabilityLevel: ALPHA -- name: storage_operation_duration_seconds - help: Storage operation duration - type: Histogram - stabilityLevel: ALPHA - labels: - - migrated - - operation_name - - status - - volume_plugin - buckets: - - 0.1 - - 0.25 - - 0.5 - - 1 - - 2.5 - - 5 - - 10 - - 15 - - 25 - - 50 - - 120 - - 300 - - 600 -- name: volume_operation_total_seconds - help: Storage operation end to end duration in seconds - type: Histogram - stabilityLevel: ALPHA - labels: - - operation_name - - plugin_name - buckets: - - 0.1 - - 0.25 - - 0.5 - - 1 - - 2.5 - - 5 - - 10 - - 15 - - 25 - - 50 - - 120 - - 300 - - 600 - name: pod_scheduling_sli_duration_seconds subsystem: scheduler help: E2e latency for a pod being scheduled, from the time the pod enters the scheduling @@ -2505,6 +2515,31 @@ - 4.096 - 8.192 - 16.384 +- name: operations_seconds + subsystem: csi + help: Container Storage Interface operation duration with gRPC error code status + total + type: Histogram + stabilityLevel: ALPHA + labels: + - driver_name + - grpc_status_code + - method_name + - migrated + buckets: + - 0.1 + - 0.25 + - 0.5 + - 1 + - 2.5 + - 5 + - 10 + - 15 + - 25 + - 50 + - 120 + - 300 + - 600 - name: graph_actions_duration_seconds subsystem: node_authorizer help: Histogram of duration of graph actions in node authorizer. @@ -2525,6 +2560,86 @@ - 0.0512 - 0.1024 - 0.2048 +- name: storage_operation_duration_seconds + help: Storage operation duration + type: Histogram + stabilityLevel: ALPHA + labels: + - migrated + - operation_name + - status + - volume_plugin + buckets: + - 0.1 + - 0.25 + - 0.5 + - 1 + - 2.5 + - 5 + - 10 + - 15 + - 25 + - 50 + - 120 + - 300 + - 600 +- name: volume_operation_total_seconds + help: Storage operation end to end duration in seconds + type: Histogram + stabilityLevel: ALPHA + labels: + - operation_name + - plugin_name + buckets: + - 0.1 + - 0.25 + - 0.5 + - 1 + - 2.5 + - 5 + - 10 + - 15 + - 25 + - 50 + - 120 + - 300 + - 600 +- name: ratcheting_seconds + subsystem: validation + namespace: apiextensions_apiserver + help: Time for comparison of old to new for the purposes of CRDValidationRatcheting + during an UPDATE in seconds. + type: Histogram + stabilityLevel: ALPHA + buckets: + - 1e-05 + - 4e-05 + - 0.00016 + - 0.00064 + - 0.00256 + - 0.01024 + - 0.04096 + - 0.16384 + - 0.65536 + - 2.62144 +- name: apiextensions_openapi_v2_regeneration_count + help: Counter of OpenAPI v2 spec regeneration count broken down by causing CRD name + and reason. + type: Counter + stabilityLevel: ALPHA + labels: + - crd + - reason +- name: apiextensions_openapi_v3_regeneration_count + help: Counter of OpenAPI v3 spec regeneration count broken down by group, version, + causing CRD and reason. + type: Counter + stabilityLevel: ALPHA + labels: + - crd + - group + - reason + - version - name: conversion_webhook_duration_seconds namespace: apiserver help: Conversion webhook request latency @@ -2583,42 +2698,6 @@ - 4.096 - 8.192 - 16.384 -- name: ratcheting_seconds - subsystem: validation - namespace: apiextensions_apiserver - help: Time for comparison of old to new for the purposes of CRDValidationRatcheting - during an UPDATE in seconds. - type: Histogram - stabilityLevel: ALPHA - buckets: - - 1e-05 - - 4e-05 - - 0.00016 - - 0.00064 - - 0.00256 - - 0.01024 - - 0.04096 - - 0.16384 - - 0.65536 - - 2.62144 -- name: apiextensions_openapi_v2_regeneration_count - help: Counter of OpenAPI v2 spec regeneration count broken down by causing CRD name - and reason. - type: Counter - stabilityLevel: ALPHA - labels: - - crd - - reason -- name: apiextensions_openapi_v3_regeneration_count - help: Counter of OpenAPI v3 spec regeneration count broken down by group, version, - causing CRD and reason. - type: Counter - stabilityLevel: ALPHA - labels: - - crd - - group - - reason - - version - name: match_condition_evaluation_errors_total subsystem: admission namespace: apiserver @@ -2726,15 +2805,14 @@ subsystem: validating_admission_policy namespace: apiserver help: Validation admission latency for individual validation expressions in seconds, - labeled by policy and further including binding, state and enforcement action - taken. + labeled by policy and further including binding and enforcement action taken. type: Histogram - stabilityLevel: ALPHA + stabilityLevel: BETA labels: - enforcement_action + - error_type - policy - policy_binding - - state buckets: - 5e-07 - 0.001 @@ -2745,24 +2823,14 @@ subsystem: validating_admission_policy namespace: apiserver help: Validation admission policy check total, labeled by policy and further identified - by binding, enforcement action taken, and state. + by binding and enforcement action taken. type: Counter - stabilityLevel: ALPHA + stabilityLevel: BETA labels: - enforcement_action + - error_type - policy - policy_binding - - state -- name: definition_total - subsystem: validating_admission_policy - namespace: apiserver - help: Validation admission policy count total, labeled by state and enforcement - action. - type: Counter - stabilityLevel: ALPHA - labels: - - enforcement_action - - state - name: controller_admission_duration_seconds subsystem: admission namespace: apiserver @@ -2899,18 +2967,6 @@ labels: - name - type -- name: compilation_duration_seconds - subsystem: cel - namespace: apiserver - help: CEL compilation time in seconds. - type: Histogram - stabilityLevel: ALPHA -- name: evaluation_duration_seconds - subsystem: cel - namespace: apiserver - help: CEL evaluation time in seconds. - type: Histogram - stabilityLevel: ALPHA - name: certificate_expiration_seconds subsystem: client namespace: apiserver @@ -3363,6 +3419,18 @@ - 30 - 45 - 60 +- name: compilation_duration_seconds + subsystem: cel + namespace: apiserver + help: CEL compilation time in seconds. + type: Histogram + stabilityLevel: BETA +- name: evaluation_duration_seconds + subsystem: cel + namespace: apiserver + help: CEL evaluation time in seconds. + type: Histogram + stabilityLevel: BETA - name: current_inflight_requests subsystem: apiserver help: Maximal number of currently used inflight request limit of this apiserver @@ -3725,6 +3793,371 @@ - 13.1072 - 26.2144 - 52.4288 +- name: init_events_total + namespace: apiserver + help: Counter of init events processed in watch cache broken by resource type. + type: Counter + stabilityLevel: ALPHA + labels: + - resource +- name: data_key_generation_duration_seconds + subsystem: storage + namespace: apiserver + help: Latencies in seconds of data encryption key(DEK) generation operations. + type: Histogram + stabilityLevel: ALPHA + buckets: + - 5e-06 + - 1e-05 + - 2e-05 + - 4e-05 + - 8e-05 + - 0.00016 + - 0.00032 + - 0.00064 + - 0.00128 + - 0.00256 + - 0.00512 + - 0.01024 + - 0.02048 + - 0.04096 +- name: data_key_generation_failures_total + subsystem: storage + namespace: apiserver + help: Total number of failed data encryption key(DEK) generation operations. + type: Counter + stabilityLevel: ALPHA +- name: storage_db_total_size_in_bytes + subsystem: apiserver + help: Total size of the storage database file physically allocated in bytes. + type: Gauge + deprecatedVersion: 1.28.0 + stabilityLevel: ALPHA + labels: + - endpoint +- name: storage_decode_errors_total + namespace: apiserver + help: Number of stored object decode errors split by object type + type: Counter + stabilityLevel: ALPHA + labels: + - resource +- name: envelope_transformation_cache_misses_total + subsystem: storage + namespace: apiserver + help: Total number of cache misses while accessing key decryption key(KEK). + type: Counter + stabilityLevel: ALPHA +- name: storage_events_received_total + subsystem: apiserver + help: Number of etcd events received split by kind. + type: Counter + stabilityLevel: ALPHA + labels: + - resource +- name: apiserver_storage_list_evaluated_objects_total + help: Number of objects tested in the course of serving a LIST request from storage + type: Counter + stabilityLevel: ALPHA + labels: + - resource +- name: apiserver_storage_list_fetched_objects_total + help: Number of objects read from storage in the course of serving a LIST request + type: Counter + stabilityLevel: ALPHA + labels: + - resource +- name: apiserver_storage_list_returned_objects_total + help: Number of objects returned for a LIST request from storage + type: Counter + stabilityLevel: ALPHA + labels: + - resource +- name: apiserver_storage_list_total + help: Number of LIST requests served from storage + type: Counter + stabilityLevel: ALPHA + labels: + - resource +- name: transformation_duration_seconds + subsystem: storage + namespace: apiserver + help: Latencies in seconds of value transformation operations. + type: Histogram + stabilityLevel: ALPHA + labels: + - transformation_type + - transformer_prefix + buckets: + - 5e-06 + - 1e-05 + - 2e-05 + - 4e-05 + - 8e-05 + - 0.00016 + - 0.00032 + - 0.00064 + - 0.00128 + - 0.00256 + - 0.00512 + - 0.01024 + - 0.02048 + - 0.04096 + - 0.08192 + - 0.16384 + - 0.32768 + - 0.65536 + - 1.31072 + - 2.62144 + - 5.24288 + - 10.48576 + - 20.97152 + - 41.94304 + - 83.88608 +- name: transformation_operations_total + subsystem: storage + namespace: apiserver + help: Total number of transformations. Successful transformation will have a status + 'OK' and a varied status string when the transformation fails. This status and + transformation_type fields may be used for alerting on encryption/decryption failure + using transformation_type from_storage for decryption and to_storage for encryption + type: Counter + stabilityLevel: ALPHA + labels: + - status + - transformation_type + - transformer_prefix +- name: terminated_watchers_total + namespace: apiserver + help: Counter of watchers closed due to unresponsiveness broken by resource type. + type: Counter + stabilityLevel: ALPHA + labels: + - resource +- name: consistent_read_total + subsystem: watch_cache + namespace: apiserver + help: Counter for consistent reads from cache. + type: Counter + stabilityLevel: ALPHA + labels: + - fallback + - resource + - success +- name: events_dispatched_total + subsystem: watch_cache + namespace: apiserver + help: Counter of events dispatched in watch cache broken by resource type. + type: Counter + stabilityLevel: ALPHA + labels: + - resource +- name: events_received_total + subsystem: watch_cache + namespace: apiserver + help: Counter of events received in watch cache broken by resource type. + type: Counter + stabilityLevel: ALPHA + labels: + - resource +- name: initializations_total + subsystem: watch_cache + namespace: apiserver + help: Counter of watch cache initializations broken by resource type. + type: Counter + stabilityLevel: ALPHA + labels: + - resource +- name: read_wait_seconds + subsystem: watch_cache + namespace: apiserver + help: Histogram of time spent waiting for a watch cache to become fresh. + type: Histogram + stabilityLevel: ALPHA + labels: + - resource + buckets: + - 0.005 + - 0.025 + - 0.05 + - 0.1 + - 0.2 + - 0.4 + - 0.6 + - 0.8 + - 1 + - 1.25 + - 1.5 + - 2 + - 3 +- name: resource_version + subsystem: watch_cache + namespace: apiserver + help: Current resource version of watch cache broken by resource type. + type: Gauge + stabilityLevel: ALPHA + labels: + - resource +- name: etcd_bookmark_counts + help: Number of etcd bookmarks (progress notify events) split by kind. + type: Gauge + stabilityLevel: ALPHA + labels: + - resource +- name: etcd_lease_object_counts + help: Number of objects attached to a single etcd lease. + type: Histogram + stabilityLevel: ALPHA + buckets: + - 10 + - 50 + - 100 + - 500 + - 1000 + - 2500 + - 5000 +- name: etcd_request_duration_seconds + help: Etcd request latency in seconds for each operation and object type. + type: Histogram + stabilityLevel: ALPHA + labels: + - operation + - type + buckets: + - 0.005 + - 0.025 + - 0.05 + - 0.1 + - 0.2 + - 0.4 + - 0.6 + - 0.8 + - 1 + - 1.25 + - 1.5 + - 2 + - 3 + - 4 + - 5 + - 6 + - 8 + - 10 + - 15 + - 20 + - 30 + - 45 + - 60 +- name: etcd_request_errors_total + help: Etcd failed request counts for each operation and object type. + type: Counter + stabilityLevel: ALPHA + labels: + - operation + - type +- name: etcd_requests_total + help: Etcd request counts for each operation and object type. + type: Counter + stabilityLevel: ALPHA + labels: + - operation + - type +- name: capacity + subsystem: watch_cache + help: Total capacity of watch cache broken by resource type. + type: Gauge + stabilityLevel: ALPHA + labels: + - resource +- name: capacity_decrease_total + subsystem: watch_cache + help: Total number of watch cache capacity decrease events broken by resource type. + type: Counter + stabilityLevel: ALPHA + labels: + - resource +- name: capacity_increase_total + subsystem: watch_cache + help: Total number of watch cache capacity increase events broken by resource type. + type: Counter + stabilityLevel: ALPHA + labels: + - resource +- name: apiserver_storage_objects + help: Number of stored objects at the time of last check split by kind. In case + of a fetching error, the value will be -1. + type: Gauge + stabilityLevel: STABLE + labels: + - resource +- name: apiserver_storage_size_bytes + help: Size of the storage database file physically allocated in bytes. + type: Custom + stabilityLevel: STABLE + labels: + - storage_cluster_id +- name: jwt_authenticator_latency_seconds + subsystem: authentication + namespace: apiserver + help: Latency of jwt authentication operations in seconds. This is the time spent + authenticating a token for cache miss only (i.e. when the token is not found in + the cache). + type: Histogram + stabilityLevel: ALPHA + labels: + - jwt_issuer_hash + - result + buckets: + - 0.001 + - 0.005 + - 0.01 + - 0.025 + - 0.05 + - 0.1 + - 0.25 + - 0.5 + - 1 + - 2.5 + - 5 + - 10 +- name: webhook_duration_seconds + subsystem: authorization + namespace: apiserver + help: Request latency in seconds. + type: Histogram + stabilityLevel: ALPHA + labels: + - name + - result + buckets: + - 0.005 + - 0.01 + - 0.025 + - 0.05 + - 0.1 + - 0.25 + - 0.5 + - 1 + - 2.5 + - 5 + - 10 +- name: webhook_evaluations_fail_open_total + subsystem: authorization + namespace: apiserver + help: NoOpinion results due to webhook timeout or error. + type: Counter + stabilityLevel: ALPHA + labels: + - name + - result +- name: webhook_evaluations_total + subsystem: authorization + namespace: apiserver + help: Round-trips to authorization webhooks. + type: Counter + stabilityLevel: ALPHA + labels: + - name + - result - name: current_inqueue_seats subsystem: flowcontrol namespace: apiserver @@ -4061,285 +4494,46 @@ - 2 - 4 - 10 -- name: init_events_total - namespace: apiserver - help: Counter of init events processed in watch cache broken by resource type. - type: Counter - stabilityLevel: ALPHA - labels: - - resource -- name: data_key_generation_duration_seconds - subsystem: storage - namespace: apiserver - help: Latencies in seconds of data encryption key(DEK) generation operations. - type: Histogram - stabilityLevel: ALPHA - buckets: - - 5e-06 - - 1e-05 - - 2e-05 - - 4e-05 - - 8e-05 - - 0.00016 - - 0.00032 - - 0.00064 - - 0.00128 - - 0.00256 - - 0.00512 - - 0.01024 - - 0.02048 - - 0.04096 -- name: data_key_generation_failures_total - subsystem: storage - namespace: apiserver - help: Total number of failed data encryption key(DEK) generation operations. - type: Counter - stabilityLevel: ALPHA -- name: storage_db_total_size_in_bytes +- name: rerouted_request_total subsystem: apiserver - help: Total size of the storage database file physically allocated in bytes. - type: Gauge - deprecatedVersion: 1.28.0 - stabilityLevel: ALPHA - labels: - - endpoint -- name: storage_decode_errors_total - namespace: apiserver - help: Number of stored object decode errors split by object type + help: Total number of requests that were proxied to a peer kube apiserver because + the local apiserver was not capable of serving it type: Counter stabilityLevel: ALPHA labels: - - resource -- name: envelope_transformation_cache_misses_total - subsystem: storage - namespace: apiserver - help: Total number of cache misses while accessing key decryption key(KEK). - type: Counter - stabilityLevel: ALPHA -- name: storage_events_received_total + - code +- name: stream_translator_requests_total subsystem: apiserver - help: Number of etcd events received split by kind. + help: Total number of requests that were handled by the StreamTranslatorProxy, which + processes streaming RemoteCommand/V5 type: Counter stabilityLevel: ALPHA labels: - - resource -- name: apiserver_storage_list_evaluated_objects_total - help: Number of objects tested in the course of serving a LIST request from storage + - code +- name: stream_tunnel_requests_total + subsystem: apiserver + help: Total number of requests that were handled by the StreamTunnelProxy, which + processes streaming PortForward/V2 type: Counter stabilityLevel: ALPHA labels: - - resource -- name: apiserver_storage_list_fetched_objects_total - help: Number of objects read from storage in the course of serving a LIST request - type: Counter - stabilityLevel: ALPHA - labels: - - resource -- name: apiserver_storage_list_returned_objects_total - help: Number of objects returned for a LIST request from storage - type: Counter - stabilityLevel: ALPHA - labels: - - resource -- name: apiserver_storage_list_total - help: Number of LIST requests served from storage - type: Counter - stabilityLevel: ALPHA - labels: - - resource -- name: transformation_duration_seconds - subsystem: storage + - code +- name: x509_insecure_sha1_total + subsystem: webhooks namespace: apiserver - help: Latencies in seconds of value transformation operations. - type: Histogram + help: Counts the number of requests to servers with insecure SHA1 signatures in + their serving certificate OR the number of connection failures due to the insecure + SHA1 signatures (either/or, based on the runtime environment) + type: Counter stabilityLevel: ALPHA - labels: - - transformation_type - - transformer_prefix - buckets: - - 5e-06 - - 1e-05 - - 2e-05 - - 4e-05 - - 8e-05 - - 0.00016 - - 0.00032 - - 0.00064 - - 0.00128 - - 0.00256 - - 0.00512 - - 0.01024 - - 0.02048 - - 0.04096 - - 0.08192 - - 0.16384 - - 0.32768 - - 0.65536 - - 1.31072 - - 2.62144 - - 5.24288 - - 10.48576 - - 20.97152 - - 41.94304 - - 83.88608 -- name: transformation_operations_total - subsystem: storage +- name: x509_missing_san_total + subsystem: webhooks namespace: apiserver - help: Total number of transformations. Successful transformation will have a status - 'OK' and a varied status string when the transformation fails. This status and - transformation_type fields may be used for alerting on encryption/decryption failure - using transformation_type from_storage for decryption and to_storage for encryption + help: Counts the number of requests to servers missing SAN extension in their serving + certificate OR the number of connection failures due to the lack of x509 certificate + SAN extension missing (either/or, based on the runtime environment) type: Counter stabilityLevel: ALPHA - labels: - - status - - transformation_type - - transformer_prefix -- name: terminated_watchers_total - namespace: apiserver - help: Counter of watchers closed due to unresponsiveness broken by resource type. - type: Counter - stabilityLevel: ALPHA - labels: - - resource -- name: events_dispatched_total - subsystem: watch_cache - namespace: apiserver - help: Counter of events dispatched in watch cache broken by resource type. - type: Counter - stabilityLevel: ALPHA - labels: - - resource -- name: events_received_total - subsystem: watch_cache - namespace: apiserver - help: Counter of events received in watch cache broken by resource type. - type: Counter - stabilityLevel: ALPHA - labels: - - resource -- name: initializations_total - subsystem: watch_cache - namespace: apiserver - help: Counter of watch cache initializations broken by resource type. - type: Counter - stabilityLevel: ALPHA - labels: - - resource -- name: read_wait_seconds - subsystem: watch_cache - namespace: apiserver - help: Histogram of time spent waiting for a watch cache to become fresh. - type: Histogram - stabilityLevel: ALPHA - labels: - - resource - buckets: - - 0.005 - - 0.025 - - 0.05 - - 0.1 - - 0.2 - - 0.4 - - 0.6 - - 0.8 - - 1 - - 1.25 - - 1.5 - - 2 - - 3 -- name: resource_version - subsystem: watch_cache - namespace: apiserver - help: Current resource version of watch cache broken by resource type. - type: Gauge - stabilityLevel: ALPHA - labels: - - resource -- name: etcd_bookmark_counts - help: Number of etcd bookmarks (progress notify events) split by kind. - type: Gauge - stabilityLevel: ALPHA - labels: - - resource -- name: etcd_lease_object_counts - help: Number of objects attached to a single etcd lease. - type: Histogram - stabilityLevel: ALPHA - buckets: - - 10 - - 50 - - 100 - - 500 - - 1000 - - 2500 - - 5000 -- name: etcd_request_duration_seconds - help: Etcd request latency in seconds for each operation and object type. - type: Histogram - stabilityLevel: ALPHA - labels: - - operation - - type - buckets: - - 0.005 - - 0.025 - - 0.05 - - 0.1 - - 0.2 - - 0.4 - - 0.6 - - 0.8 - - 1 - - 1.25 - - 1.5 - - 2 - - 3 - - 4 - - 5 - - 6 - - 8 - - 10 - - 15 - - 20 - - 30 - - 45 - - 60 -- name: etcd_request_errors_total - help: Etcd failed request counts for each operation and object type. - type: Counter - stabilityLevel: ALPHA - labels: - - operation - - type -- name: etcd_requests_total - help: Etcd request counts for each operation and object type. - type: Counter - stabilityLevel: ALPHA - labels: - - operation - - type -- name: capacity - subsystem: watch_cache - help: Total capacity of watch cache broken by resource type. - type: Gauge - stabilityLevel: ALPHA - labels: - - resource -- name: capacity_decrease_total - subsystem: watch_cache - help: Total number of watch cache capacity decrease events broken by resource type. - type: Counter - stabilityLevel: ALPHA - labels: - - resource -- name: capacity_increase_total - subsystem: watch_cache - help: Total number of watch cache capacity increase events broken by resource type. - type: Counter - stabilityLevel: ALPHA - labels: - - resource - name: current_executing_requests subsystem: flowcontrol namespace: apiserver @@ -4422,114 +4616,6 @@ - 10 - 15 - 30 -- name: apiserver_storage_objects - help: Number of stored objects at the time of last check split by kind. In case - of a fetching error, the value will be -1. - type: Gauge - stabilityLevel: STABLE - labels: - - resource -- name: apiserver_storage_size_bytes - help: Size of the storage database file physically allocated in bytes. - type: Custom - stabilityLevel: STABLE - labels: - - storage_cluster_id -- name: jwt_authenticator_latency_seconds - subsystem: authentication - namespace: apiserver - help: Latency of jwt authentication operations in seconds. This is the time spent - authenticating a token for cache miss only (i.e. when the token is not found in - the cache). - type: Histogram - stabilityLevel: ALPHA - labels: - - jwt_issuer_hash - - result - buckets: - - 0.001 - - 0.005 - - 0.01 - - 0.025 - - 0.05 - - 0.1 - - 0.25 - - 0.5 - - 1 - - 2.5 - - 5 - - 10 -- name: webhook_duration_seconds - subsystem: authorization - namespace: apiserver - help: Request latency in seconds. - type: Histogram - stabilityLevel: ALPHA - labels: - - name - - result - buckets: - - 0.005 - - 0.01 - - 0.025 - - 0.05 - - 0.1 - - 0.25 - - 0.5 - - 1 - - 2.5 - - 5 - - 10 -- name: webhook_evaluations_fail_open_total - subsystem: authorization - namespace: apiserver - help: NoOpinion results due to webhook timeout or error. - type: Counter - stabilityLevel: ALPHA - labels: - - name - - result -- name: webhook_evaluations_total - subsystem: authorization - namespace: apiserver - help: Round-trips to authorization webhooks. - type: Counter - stabilityLevel: ALPHA - labels: - - name - - result -- name: rerouted_request_total - subsystem: apiserver - help: Total number of requests that were proxied to a peer kube apiserver because - the local apiserver was not capable of serving it - type: Counter - stabilityLevel: ALPHA - labels: - - code -- name: stream_translator_requests_total - subsystem: apiserver - help: Total number of requests that were handled by the StreamTranslatorProxy, which - processes streaming RemoteCommand/V5 - type: Counter - stabilityLevel: ALPHA - labels: - - code -- name: x509_insecure_sha1_total - subsystem: webhooks - namespace: apiserver - help: Counts the number of requests to servers with insecure SHA1 signatures in - their serving certificate OR the number of connection failures due to the insecure - SHA1 signatures (either/or, based on the runtime environment) - type: Counter - stabilityLevel: ALPHA -- name: x509_missing_san_total - subsystem: webhooks - namespace: apiserver - help: Counts the number of requests to servers missing SAN extension in their serving - certificate OR the number of connection failures due to the lack of x509 certificate - SAN extension missing (either/or, based on the runtime environment) - type: Counter - stabilityLevel: ALPHA - name: request_duration_seconds subsystem: cloud_provider_webhook help: Request latency in seconds. Broken down by status code. @@ -4636,6 +4722,92 @@ - 4096 - 8192 - 16384 +- name: changes + subsystem: endpoint_slice_controller + help: Number of EndpointSlice changes + type: Counter + stabilityLevel: ALPHA + labels: + - operation +- name: desired_endpoint_slices + subsystem: endpoint_slice_controller + help: Number of EndpointSlices that would exist with perfect endpoint allocation + type: Gauge + stabilityLevel: ALPHA +- name: endpoints_added_per_sync + subsystem: endpoint_slice_controller + help: Number of endpoints added on each Service sync + type: Histogram + stabilityLevel: ALPHA + buckets: + - 2 + - 4 + - 8 + - 16 + - 32 + - 64 + - 128 + - 256 + - 512 + - 1024 + - 2048 + - 4096 + - 8192 + - 16384 + - 32768 +- name: endpoints_desired + subsystem: endpoint_slice_controller + help: Number of endpoints desired + type: Gauge + stabilityLevel: ALPHA +- name: endpoints_removed_per_sync + subsystem: endpoint_slice_controller + help: Number of endpoints removed on each Service sync + type: Histogram + stabilityLevel: ALPHA + buckets: + - 2 + - 4 + - 8 + - 16 + - 32 + - 64 + - 128 + - 256 + - 512 + - 1024 + - 2048 + - 4096 + - 8192 + - 16384 + - 32768 +- name: endpointslices_changed_per_sync + subsystem: endpoint_slice_controller + help: Number of EndpointSlices changed on each Service sync + type: Histogram + stabilityLevel: ALPHA + labels: + - topology + - traffic_distribution +- name: num_endpoint_slices + subsystem: endpoint_slice_controller + help: Number of EndpointSlices + type: Gauge + stabilityLevel: ALPHA +- name: services_count_by_traffic_distribution + subsystem: endpoint_slice_controller + help: Number of Services using some specific trafficDistribution + type: Gauge + stabilityLevel: ALPHA + labels: + - traffic_distribution +- name: syncs + subsystem: endpoint_slice_controller + help: Number of EndpointSlice syncs + type: Counter + stabilityLevel: ALPHA + labels: + - result - name: kubernetes_build_info help: A metric with a constant '1' value labeled by major, minor, git version, git commit, git tree state, build date, Go version, and compiler from which Kubernetes @@ -4992,92 +5164,6 @@ SAN extension missing (either/or, based on the runtime environment) type: Counter stabilityLevel: ALPHA -- name: changes - subsystem: endpoint_slice_controller - help: Number of EndpointSlice changes - type: Counter - stabilityLevel: ALPHA - labels: - - operation -- name: desired_endpoint_slices - subsystem: endpoint_slice_controller - help: Number of EndpointSlices that would exist with perfect endpoint allocation - type: Gauge - stabilityLevel: ALPHA -- name: endpoints_added_per_sync - subsystem: endpoint_slice_controller - help: Number of endpoints added on each Service sync - type: Histogram - stabilityLevel: ALPHA - buckets: - - 2 - - 4 - - 8 - - 16 - - 32 - - 64 - - 128 - - 256 - - 512 - - 1024 - - 2048 - - 4096 - - 8192 - - 16384 - - 32768 -- name: endpoints_desired - subsystem: endpoint_slice_controller - help: Number of endpoints desired - type: Gauge - stabilityLevel: ALPHA -- name: endpoints_removed_per_sync - subsystem: endpoint_slice_controller - help: Number of endpoints removed on each Service sync - type: Histogram - stabilityLevel: ALPHA - buckets: - - 2 - - 4 - - 8 - - 16 - - 32 - - 64 - - 128 - - 256 - - 512 - - 1024 - - 2048 - - 4096 - - 8192 - - 16384 - - 32768 -- name: endpointslices_changed_per_sync - subsystem: endpoint_slice_controller - help: Number of EndpointSlices changed on each Service sync - type: Histogram - stabilityLevel: ALPHA - labels: - - topology - - traffic_distribution -- name: num_endpoint_slices - subsystem: endpoint_slice_controller - help: Number of EndpointSlices - type: Gauge - stabilityLevel: ALPHA -- name: services_count_by_traffic_distribution - subsystem: endpoint_slice_controller - help: Number of Services using some specific trafficDistribution - type: Gauge - stabilityLevel: ALPHA - labels: - - traffic_distribution -- name: syncs - subsystem: endpoint_slice_controller - help: Number of EndpointSlice syncs - type: Counter - stabilityLevel: ALPHA - labels: - - result - name: pod_security_errors_total help: Number of errors preventing normal evaluation. Non-fatal errors may result in the latest restricted profile being used for evaluation. diff --git a/test/instrumentation/documentation/documentation.md b/test/instrumentation/documentation/documentation.md index 1abb15733cf..6acaf5eecc6 100644 --- a/test/instrumentation/documentation/documentation.md +++ b/test/instrumentation/documentation/documentation.md @@ -8,7 +8,7 @@ description: >- ## Metrics (v1.31) - + This page details the metrics that different Kubernetes components export. You can query the metrics endpoint for these components using an HTTP scrape, and fetch the current metrics data in Prometheus format. @@ -291,6 +291,20 @@ Stable metrics observe strict API contracts and no labels can be added or remove Beta metrics observe a looser API contract than its stable counterparts. No labels can be removed from beta metrics during their lifetime, however, labels can be added while the metric is in the beta stage. This offers the assurance that beta metrics will honor existing dashboards and alerts, while allowing for amendments in the future.
+
apiserver_cel_compilation_duration_seconds
+
CEL compilation time in seconds.
+
    +
  • BETA
  • +
  • Histogram
  • +
+
+
apiserver_cel_evaluation_duration_seconds
+
CEL evaluation time in seconds.
+
    +
  • BETA
  • +
  • Histogram
  • +
+
apiserver_flowcontrol_current_executing_requests
Number of requests in initial (for a WATCH) or any (for a non-WATCH) execution stage in the API Priority and Fairness subsystem
    @@ -340,6 +354,20 @@ Beta metrics observe a looser API contract than its stable counterparts. No labe
  • Histogram
  • executeflow_schemapriority_level
+
apiserver_validating_admission_policy_check_duration_seconds
+
Validation admission latency for individual validation expressions in seconds, labeled by policy and further including binding and enforcement action taken.
+
    +
  • BETA
  • +
  • Histogram
  • +
  • enforcement_actionerror_typepolicypolicy_binding
+
+
apiserver_validating_admission_policy_check_total
+
Validation admission policy check total, labeled by policy and further identified by binding and enforcement action taken.
+
    +
  • BETA
  • +
  • Counter
  • +
  • enforcement_actionerror_typepolicypolicy_binding
+
disabled_metrics_total
The count of disabled metrics.
    @@ -620,20 +648,6 @@ Alpha metrics do not have any API guarantees. These metrics must be used at your
  • Counter
  • indexresource_prefix
-
apiserver_cel_compilation_duration_seconds
-
CEL compilation time in seconds.
-
    -
  • ALPHA
  • -
  • Histogram
  • -
-
-
apiserver_cel_evaluation_duration_seconds
-
CEL evaluation time in seconds.
-
    -
  • ALPHA
  • -
  • Histogram
  • -
-
apiserver_certificates_registry_csr_honored_duration_total
Total number of issued CSRs with a requested duration that was honored, sliced by signer (only kubernetes.io signer names are specifically identified)
    @@ -1215,6 +1229,13 @@ Alpha metrics do not have any API guarantees. These metrics must be used at your
  • Counter
  • code
+
apiserver_stream_tunnel_requests_total
+
Total number of requests that were handled by the StreamTunnelProxy, which processes streaming PortForward/V2
+
    +
  • ALPHA
  • +
  • Counter
  • +
  • code
+
apiserver_terminated_watchers_total
Counter of watchers closed due to unresponsiveness broken by resource type.
    @@ -1229,26 +1250,12 @@ Alpha metrics do not have any API guarantees. These metrics must be used at your
  • Counter
-
apiserver_validating_admission_policy_check_duration_seconds
-
Validation admission latency for individual validation expressions in seconds, labeled by policy and further including binding, state and enforcement action taken.
-
    -
  • ALPHA
  • -
  • Histogram
  • -
  • enforcement_actionpolicypolicy_bindingstate
-
-
apiserver_validating_admission_policy_check_total
-
Validation admission policy check total, labeled by policy and further identified by binding, enforcement action taken, and state.
+
apiserver_watch_cache_consistent_read_total
+
Counter for consistent reads from cache.
  • ALPHA
  • Counter
  • -
  • enforcement_actionpolicypolicy_bindingstate
-
-
apiserver_validating_admission_policy_definition_total
-
Validation admission policy count total, labeled by state and enforcement action.
-
    -
  • ALPHA
  • -
  • Counter
  • -
  • enforcement_actionstate
+
  • fallbackresourcesuccess
  • apiserver_watch_cache_events_dispatched_total
    Counter of events dispatched in watch cache broken by resource type.
    @@ -1705,6 +1712,13 @@ Alpha metrics do not have any API guarantees. These metrics must be used at your
  • Gauge
  • cidr
  • +
    kube_apiserver_clusterip_allocator_allocation_duration_seconds
    +
    Duration in seconds to allocate a Cluster IP by ServiceCIDR
    +
      +
    • ALPHA
    • +
    • Histogram
    • +
    • cidr
    +
    kube_apiserver_clusterip_allocator_allocation_errors_total
    Number of errors trying to allocate Cluster IPs
      @@ -1824,6 +1838,13 @@ Alpha metrics do not have any API guarantees. These metrics must be used at your
    • Histogram
    • operation_type
    +
    kubelet_cgroup_version
    +
    cgroup version on the hosts.
    +
      +
    • ALPHA
    • +
    • Gauge
    • +
    +
    kubelet_container_log_filesystem_used_bytes
    Bytes used by the container's logs on the filesystem.
      @@ -2732,7 +2753,7 @@ Alpha metrics do not have any API guarantees. These metrics must be used at your
      • ALPHA
      • Custom
      • -
      • namespace
      +
    • namespacestorage_classvolume_attributes_class
    pv_collector_total_pv_count
    Gauge measuring total number of persistent volumes
    @@ -2753,7 +2774,7 @@ Alpha metrics do not have any API guarantees. These metrics must be used at your
    • ALPHA
    • Custom
    • -
    • namespace
    +
  • namespacestorage_classvolume_attributes_class
  • reconstruct_volume_operations_errors_total
    The number of volumes that failed reconstruction from the operating system during kubelet startup.
    @@ -2909,6 +2930,13 @@ Alpha metrics do not have any API guarantees. These metrics must be used at your
  • Gauge
  • managername
  • +
    scheduler_event_handling_duration_seconds
    +
    Event handling latency in seconds.
    +
      +
    • ALPHA
    • +
    • Histogram
    • +
    • event
    +
    scheduler_goroutines
    Number of running goroutines split by the work they do such as binding.
      @@ -2937,6 +2965,13 @@ Alpha metrics do not have any API guarantees. These metrics must be used at your
    • Histogram
    • extension_pointpluginstatus
    +
    scheduler_queueing_hint_execution_duration_seconds
    +
    Duration for running a queueing hint function of a plugin.
    +
      +
    • ALPHA
    • +
    • Histogram
    • +
    • eventhintplugin
    +
    scheduler_scheduler_cache_size
    Number of nodes, pods, and assumed (bound) pods in the scheduler cache.