From c2f67ac14119148fbbfb489ecf9ca7bd3b7aa092 Mon Sep 17 00:00:00 2001 From: Han Kang Date: Tue, 1 Nov 2022 13:50:12 -0700 Subject: [PATCH] add support for parsing custom collectors from the stability framework Change-Id: I1053b9f6956de571700c95b96e05c4377806a3cc --- test/instrumentation/decode_metric.go | 136 +++++++++-- .../documentation/documentation-list.yaml | 211 ++++++++++++++++++ .../documentation/documentation.md | 176 ++++++++++++++- test/instrumentation/find_stable_metric.go | 25 ++- test/instrumentation/metric.go | 1 + .../testdata/pkg/kubelet/metrics/metrics.go | 57 ++++- .../testdata/test-stable-metrics-list.yaml | 20 ++ 7 files changed, 608 insertions(+), 18 deletions(-) diff --git a/test/instrumentation/decode_metric.go b/test/instrumentation/decode_metric.go index 0e2d913483d..b7a53106bcb 100644 --- a/test/instrumentation/decode_metric.go +++ b/test/instrumentation/decode_metric.go @@ -45,7 +45,6 @@ func decodeMetricCalls(fs []*ast.CallExpr, metricsImportName string, variables m if m != nil { ms = append(ms, *m) } - } return ms, errors } @@ -65,6 +64,7 @@ func (c *metricDecoder) decodeNewMetricCall(fc *ast.CallExpr) (*metric, error) { case *ast.Ident: if v.Name == "NewTimingRatioHistogramVec" { m, err = c.decodeMetricVecForTimingRatioHistogram(fc) + m.Type = timingRatioHistogram return &m, err } } @@ -85,6 +85,8 @@ func (c *metricDecoder) decodeNewMetricCall(fc *ast.CallExpr) (*metric, error) { m, err = c.decodeMetricVec(fc) case "Labels", "HandlerOpts", "HandlerFor", "HandlerWithReset": return nil, nil + case "NewDesc": + m, err = c.decodeDesc(fc) default: return &m, newDecodeErrorf(fc, errNotDirectCall) } @@ -97,6 +99,8 @@ func (c *metricDecoder) decodeNewMetricCall(fc *ast.CallExpr) (*metric, error) { func getMetricType(functionName string) string { switch functionName { + case "NewDesc": + return customType case "NewCounter", "NewCounterVec": return counterMetricType case "NewGauge", "NewGaugeVec", "NewGaugeFunc": @@ -119,6 +123,77 @@ func (c *metricDecoder) decodeMetric(call *ast.CallExpr) (metric, error) { return c.decodeOpts(call.Args[0]) } +func (c *metricDecoder) decodeDesc(ce *ast.CallExpr) (metric, error) { + m := &metric{} + name, err := c.decodeString(ce.Args[0]) + if err != nil { + return *m, newDecodeErrorf(ce, "can't decode string") + } + m.Name = *name + help, err := c.decodeString(ce.Args[1]) + if err != nil { + return *m, newDecodeErrorf(ce, "can't decode string") + } + m.Help = *help + labels, err := c.decodeLabels(ce.Args[2]) + if err != nil { + return *m, newDecodeErrorf(ce, "can't decode labels") + } + m.Labels = labels + cLabels, err := c.decodeConstLabels(ce.Args[3]) + if err != nil { + return *m, newDecodeErrorf(ce, "can't decode const labels") + } + m.ConstLabels = cLabels + sl, err := decodeStabilityLevel(ce.Args[4], "metrics") + if sl != nil { + m.StabilityLevel = string(*sl) + } + deprecatedVersion, err := c.decodeString(ce.Args[5]) + if err != nil { + return *m, newDecodeErrorf(ce, "can't decode string") + } + if deprecatedVersion != nil { + m.DeprecatedVersion = *deprecatedVersion + } + return *m, nil +} + +func (c *metricDecoder) decodeString(expr ast.Expr) (*string, error) { + switch e := expr.(type) { + case *ast.BasicLit: + s, err := stringValue(e) + return &s, err + case *ast.Ident: + variableExpr, found := c.variables[e.Name] + if !found { + return nil, fmt.Errorf("can't decode string") + } + bl, ok := variableExpr.(*ast.BasicLit) + if !ok { + return nil, fmt.Errorf("can't decode string") + } + v, err := stringValue(bl) + return &v, err + case *ast.CallExpr: + firstArg, secondArg, thirdArg, err := c.decodeBuildFQNameArguments(e) + if err != nil { + return nil, err + } + se, ok := e.Fun.(*ast.SelectorExpr) + if ok { + functionName := se.Sel.Name + switch functionName { + case "BuildFQName": + n := metrics.BuildFQName(firstArg, secondArg, thirdArg) + return &n, nil + } + } + + } + return nil, fmt.Errorf("can't decode string") +} + func (c *metricDecoder) decodeMetricVec(call *ast.CallExpr) (metric, error) { if len(call.Args) != 2 { return metric{}, newDecodeErrorf(call, errInvalidNewMetricCall) @@ -186,19 +261,21 @@ func (c *metricDecoder) decodeLabelsFromArray(exprs []ast.Expr) ([]string, error func (c *metricDecoder) decodeLabels(expr ast.Expr) ([]string, error) { cl, ok := expr.(*ast.CompositeLit) if !ok { - id, ok := expr.(*ast.Ident) - if !ok { - return nil, newDecodeErrorf(expr, errInvalidNewMetricCall) + switch e := expr.(type) { + case *ast.Ident: + if e.Name == "nil" { + return []string{}, nil + } + variableExpr, found := c.variables[e.Name] + if !found { + return nil, newDecodeErrorf(expr, "couldn't find variable for labels") + } + cl2, ok := variableExpr.(*ast.CompositeLit) + if !ok { + return nil, newDecodeErrorf(expr, "couldn't interpret variable for labels") + } + cl = cl2 } - variableExpr, found := c.variables[id.Name] - if !found { - return nil, newDecodeErrorf(expr, "couldn't find variable for labels") - } - cl2, ok := variableExpr.(*ast.CompositeLit) - if !ok { - return nil, newDecodeErrorf(expr, "couldn't interpret variable for labels") - } - cl = cl2 } labels := make([]string, len(cl.Elts)) for i, el := range cl.Elts { @@ -280,7 +357,6 @@ func (c *metricDecoder) decodeOpts(expr ast.Expr) (metric, error) { if !ok { return m, newDecodeErrorf(expr, errExprNotIdent, v.X) } - variableExpr, found := c.variables[strings.Join([]string{s.Name, v.Sel.Name}, ".")] if !found { return m, newDecodeErrorf(expr, errBadImportedVariableAttribute) @@ -737,6 +813,38 @@ func decodeBucketArguments(fc *ast.CallExpr) (float64, float64, int, error) { return firstArg, secondArg, int(thirdArg), nil } +func (c *metricDecoder) decodeBuildFQNameArguments(fc *ast.CallExpr) (string, string, string, error) { + if len(fc.Args) != 3 { + return "", "", "", newDecodeErrorf(fc, "can't decode fq name args") + } + strArgs := make([]string, len(fc.Args)) + for i, elt := range fc.Args { + switch arg := elt.(type) { + case *ast.BasicLit: + if arg.Kind != token.STRING { + return "", "", "", newDecodeErrorf(fc, "can't decode fq name args") + } + strArgs[i] = strings.Trim(arg.Value, `"`) + case *ast.Ident: + s, err := c.decodeString(arg) + if err != nil { + return "", "", "", newDecodeErrorf(fc, "can't decode fq name args") + } + strArgs[i] = *s + case *ast.SelectorExpr: + id, ok := arg.X.(*ast.Ident) + expr, ok := c.variables[id.Name+"."+arg.Sel.Name] + if ok { + s, err := c.decodeString(expr) + if err != nil { + return "", "", "", newDecodeErrorf(fc, "can't decode fq name args") + } + strArgs[i] = *s + } + } + } + return strArgs[0], strArgs[1], strArgs[2], nil +} func decodeStabilityLevel(expr ast.Expr, metricsFrameworkImportName string) (*metrics.StabilityLevel, error) { se, ok := expr.(*ast.SelectorExpr) diff --git a/test/instrumentation/documentation/documentation-list.yaml b/test/instrumentation/documentation/documentation-list.yaml index 5585e648855..1081b9b0de8 100644 --- a/test/instrumentation/documentation/documentation-list.yaml +++ b/test/instrumentation/documentation/documentation-list.yaml @@ -274,6 +274,13 @@ help: Number of times the A/D Controller performed a forced detach type: Counter stabilityLevel: ALPHA +- name: attachdetach_controller_total_volumes + help: Number of volumes in A/D Controller + type: Custom + stabilityLevel: ALPHA + labels: + - plugin_name + - state - name: job_finished_total subsystem: job_controller help: The number of finished job @@ -459,6 +466,13 @@ - 2 - 4 - 8 +- name: storage_count_attachable_volumes_in_use + help: Measure number of volumes in use + type: Custom + stabilityLevel: ALPHA + labels: + - node + - volume_plugin - name: job_deletion_duration_seconds subsystem: ttl_after_finished_controller help: The time it took to delete the job since it became eligible for deletion @@ -558,6 +572,37 @@ help: Counter of certificate renewal errors. type: Counter stabilityLevel: ALPHA +- name: pv_collector_bound_pv_count + help: Gauge measuring number of persistent volume currently bound + type: Custom + stabilityLevel: ALPHA + labels: + - storage_class +- name: pv_collector_bound_pvc_count + help: Gauge measuring number of persistent volume claim currently bound + type: Custom + stabilityLevel: ALPHA + labels: + - namespace +- name: pv_collector_total_pv_count + help: Gauge measuring total number of persistent volumes + type: Custom + stabilityLevel: ALPHA + labels: + - plugin_name + - volume_mode +- name: pv_collector_unbound_pv_count + help: Gauge measuring number of persistent volume currently unbound + type: Custom + stabilityLevel: ALPHA + labels: + - storage_class +- name: pv_collector_unbound_pvc_count + help: Gauge measuring number of persistent volume claim currently unbound + type: Custom + stabilityLevel: ALPHA + labels: + - namespace - name: retroactive_storageclass_errors_total help: Total number of failed retroactive StorageClass assignments to persistent volume claim @@ -575,6 +620,30 @@ labels: - operation_name - plugin_name +- name: container_cpu_usage_seconds_total + help: Cumulative cpu time consumed by the container in core-seconds + type: Custom + stabilityLevel: ALPHA + labels: + - container + - pod + - namespace +- name: container_memory_working_set_bytes + help: Current working set of the container in bytes + type: Custom + stabilityLevel: ALPHA + labels: + - container + - pod + - namespace +- name: container_start_time_seconds + help: Start time of the container since unix epoch in seconds + type: Custom + stabilityLevel: ALPHA + labels: + - container + - pod + - namespace - name: cgroup_manager_duration_seconds subsystem: kubelet help: Duration in seconds for cgroup manager operations. Broken down by method. @@ -594,6 +663,15 @@ - 2.5 - 5 - 10 +- name: kubelet_container_log_filesystem_used_bytes + help: Bytes used by the container's logs on the filesystem. + type: Custom + stabilityLevel: ALPHA + labels: + - uid + - namespace + - pod + - container - name: containers_per_pod_count subsystem: kubelet help: The number of containers per pod. @@ -1044,6 +1122,85 @@ - 2.5 - 5 - 10 +- name: kubelet_volume_stats_available_bytes + help: Number of available bytes in the volume + type: Custom + stabilityLevel: ALPHA + labels: + - namespace + - persistentvolumeclaim +- name: kubelet_volume_stats_capacity_bytes + help: Capacity in bytes of the volume + type: Custom + stabilityLevel: ALPHA + labels: + - namespace + - persistentvolumeclaim +- name: kubelet_volume_stats_health_status_abnormal + help: Abnormal volume health status. The count is either 1 or 0. 1 indicates the + volume is unhealthy, 0 indicates volume is healthy + type: Custom + stabilityLevel: ALPHA + labels: + - namespace + - persistentvolumeclaim +- name: kubelet_volume_stats_inodes + help: Maximum number of inodes in the volume + type: Custom + stabilityLevel: ALPHA + labels: + - namespace + - persistentvolumeclaim +- name: kubelet_volume_stats_inodes_free + help: Number of free inodes in the volume + type: Custom + stabilityLevel: ALPHA + labels: + - namespace + - persistentvolumeclaim +- name: kubelet_volume_stats_inodes_used + help: Number of used inodes in the volume + type: Custom + stabilityLevel: ALPHA + labels: + - namespace + - persistentvolumeclaim +- name: kubelet_volume_stats_used_bytes + help: Number of used bytes in the volume + type: Custom + stabilityLevel: ALPHA + labels: + - namespace + - persistentvolumeclaim +- name: node_cpu_usage_seconds_total + help: Cumulative cpu time consumed by the node in core-seconds + type: Custom + stabilityLevel: ALPHA +- name: node_memory_working_set_bytes + help: Current working set of the node in bytes + type: Custom + stabilityLevel: ALPHA +- name: plugin_manager_total_plugins + help: Number of plugins in Plugin Manager + type: Custom + stabilityLevel: ALPHA + labels: + - socket_path + - state +- name: pod_cpu_usage_seconds_total + help: Cumulative cpu time consumed by the pod in core-seconds + type: Custom + stabilityLevel: ALPHA + labels: + - pod + - namespace +- name: pod_memory_working_set_bytes + help: Current working set of the pod in bytes + type: Custom + stabilityLevel: ALPHA + labels: + - pod + - namespace - name: probe_duration_seconds subsystem: prober help: Duration in seconds for a probe response. @@ -1067,6 +1224,10 @@ - pod_uid - probe_type - result +- name: scrape_error + help: 1 if there was an error while getting container metrics, 0 otherwise + type: Custom + stabilityLevel: ALPHA - name: csr_honored_duration_total subsystem: certificates_registry namespace: apiserver @@ -1284,6 +1445,13 @@ mount -o context option. type: Gauge stabilityLevel: ALPHA +- name: volume_manager_total_volumes + help: Number of volumes in Volume Manager + type: Custom + stabilityLevel: ALPHA + labels: + - plugin_name + - state - name: allocated_ips subsystem: clusterip_allocator namespace: kube_apiserver @@ -1334,6 +1502,34 @@ stabilityLevel: ALPHA labels: - usage +- name: kube_pod_resource_limit + help: Resources limit for workloads on the cluster, broken down by pod. This shows + the resource usage the scheduler and kubelet expect per pod for resources along + with the unit for the resource if any. + type: Custom + stabilityLevel: ALPHA + labels: + - namespace + - pod + - node + - scheduler + - priority + - resource + - unit +- name: kube_pod_resource_request + help: Resources requested by workloads on the cluster, broken down by pod. This + shows the resource usage the scheduler and kubelet expect per pod for resources + along with the unit for the resource if any. + type: Custom + stabilityLevel: ALPHA + labels: + - namespace + - pod + - node + - scheduler + - priority + - resource + - unit - name: e2e_scheduling_duration_seconds subsystem: scheduler help: E2e scheduling latency in seconds (scheduling algorithm + binding). This metric @@ -3332,6 +3528,13 @@ stabilityLevel: ALPHA labels: - reason +- name: aggregator_unavailable_apiservice + help: Gauge of APIServices which are marked as unavailable broken down by APIService + name. + type: Custom + stabilityLevel: ALPHA + labels: + - name - name: aggregator_unavailable_apiservice_total help: Counter of APIServices which are marked as unavailable broken down by APIService name and reason. @@ -3489,6 +3692,14 @@ stabilityLevel: ALPHA labels: - operation +- name: cloudprovider_vsphere_vcenter_versions + help: Versions for connected vSphere vCenters + type: Custom + stabilityLevel: ALPHA + labels: + - hostname + - version + - build - name: get_token_count help: Counter of total Token() requests to the alternate token source type: Counter diff --git a/test/instrumentation/documentation/documentation.md b/test/instrumentation/documentation/documentation.md index b197b5b9e3f..2531eaabcc7 100644 --- a/test/instrumentation/documentation/documentation.md +++ b/test/instrumentation/documentation/documentation.md @@ -6,7 +6,7 @@ description: >- --- -## Metrics (auto-generated 2022 Oct 31) +## Metrics (auto-generated 2022 Nov 01) This page details the metrics that different Kubernetes components export. You can query the metrics endpoint for these components using an HTTP scrape, and fetch the current metrics data in Prometheus format. @@ -176,6 +176,12 @@ components using an HTTP scrape, and fetch the current metrics data in Prometheu Gauge of OpenAPI v2 spec regeneration duration in seconds.
reason
None +aggregator_unavailable_apiservice +ALPHA +Custom +Gauge of APIServices which are marked as unavailable broken down by APIService name. +
name
+None aggregator_unavailable_apiservice_total ALPHA Counter @@ -674,6 +680,12 @@ components using an HTTP scrape, and fetch the current metrics data in Prometheu Number of times the A/D Controller performed a forced detach None None +attachdetach_controller_total_volumes +ALPHA +Custom +Number of volumes in A/D Controller +
plugin_name
state
+None authenticated_user_requests ALPHA Counter @@ -806,6 +818,30 @@ components using an HTTP scrape, and fetch the current metrics data in Prometheu vsphere operation errors
operation
None +cloudprovider_vsphere_vcenter_versions +ALPHA +Custom +Versions for connected vSphere vCenters +
hostname
version
build
+None +container_cpu_usage_seconds_total +ALPHA +Custom +Cumulative cpu time consumed by the container in core-seconds +
container
pod
namespace
+None +container_memory_working_set_bytes +ALPHA +Custom +Current working set of the container in bytes +
container
pod
namespace
+None +container_start_time_seconds +ALPHA +Custom +Start time of the container since unix epoch in seconds +
container
pod
namespace
+None cronjob_controller_cronjob_job_creation_skew_duration_seconds ALPHA Histogram @@ -1052,6 +1088,18 @@ components using an HTTP scrape, and fetch the current metrics data in Prometheu Total number of requests for pods/logs sliced by usage type: enforce_tls, skip_tls_allowed, skip_tls_denied
usage
None +kube_pod_resource_limit +ALPHA +Custom +Resources limit for workloads on the cluster, broken down by pod. This shows the resource usage the scheduler and kubelet expect per pod for resources along with the unit for the resource if any. +
namespace
pod
node
scheduler
priority
resource
unit
+None +kube_pod_resource_request +ALPHA +Custom +Resources requested by workloads on the cluster, broken down by pod. This shows the resource usage the scheduler and kubelet expect per pod for resources along with the unit for the resource if any. +
namespace
pod
node
scheduler
priority
resource
unit
+None kubelet_certificate_manager_client_expiration_renew_errors ALPHA Counter @@ -1082,6 +1130,12 @@ components using an HTTP scrape, and fetch the current metrics data in Prometheu Duration in seconds for cgroup manager operations. Broken down by method.
operation_type
None +kubelet_container_log_filesystem_used_bytes +ALPHA +Custom +Bytes used by the container's logs on the filesystem. +
uid
namespace
pod
container
+None kubelet_containers_per_pod_count ALPHA Histogram @@ -1358,6 +1412,48 @@ components using an HTTP scrape, and fetch the current metrics data in Prometheu Duration in seconds to calculate volume stats
metric_source
None +kubelet_volume_stats_available_bytes +ALPHA +Custom +Number of available bytes in the volume +
namespace
persistentvolumeclaim
+None +kubelet_volume_stats_capacity_bytes +ALPHA +Custom +Capacity in bytes of the volume +
namespace
persistentvolumeclaim
+None +kubelet_volume_stats_health_status_abnormal +ALPHA +Custom +Abnormal volume health status. The count is either 1 or 0. 1 indicates the volume is unhealthy, 0 indicates volume is healthy +
namespace
persistentvolumeclaim
+None +kubelet_volume_stats_inodes +ALPHA +Custom +Maximum number of inodes in the volume +
namespace
persistentvolumeclaim
+None +kubelet_volume_stats_inodes_free +ALPHA +Custom +Number of free inodes in the volume +
namespace
persistentvolumeclaim
+None +kubelet_volume_stats_inodes_used +ALPHA +Custom +Number of used inodes in the volume +
namespace
persistentvolumeclaim
+None +kubelet_volume_stats_used_bytes +ALPHA +Custom +Number of used bytes in the volume +
namespace
persistentvolumeclaim
+None kubeproxy_network_programming_duration_seconds ALPHA Histogram @@ -1484,6 +1580,12 @@ components using an HTTP scrape, and fetch the current metrics data in Prometheu Gauge measuring number of registered Nodes per zones.
zone
None +node_cpu_usage_seconds_total +ALPHA +Custom +Cumulative cpu time consumed by the node in core-seconds +None +None node_ipam_controller_cidrset_allocation_tries_per_request ALPHA Histogram @@ -1532,12 +1634,36 @@ components using an HTTP scrape, and fetch the current metrics data in Prometheu Gauge measuring percentage of allocated CIDRs.
clusterCIDR
None +node_memory_working_set_bytes +ALPHA +Custom +Current working set of the node in bytes +None +None number_of_l4_ilbs ALPHA Gauge Number of L4 ILBs
feature
None +plugin_manager_total_plugins +ALPHA +Custom +Number of plugins in Plugin Manager +
socket_path
state
+None +pod_cpu_usage_seconds_total +ALPHA +Custom +Cumulative cpu time consumed by the pod in core-seconds +
pod
namespace
+None +pod_memory_working_set_bytes +ALPHA +Custom +Current working set of the pod in bytes +
pod
namespace
+None pod_security_errors_total ALPHA Counter @@ -1568,6 +1694,36 @@ components using an HTTP scrape, and fetch the current metrics data in Prometheu Cumulative number of a liveness, readiness or startup probe for a container by result.
container
namespace
pod
pod_uid
probe_type
result
None +pv_collector_bound_pv_count +ALPHA +Custom +Gauge measuring number of persistent volume currently bound +
storage_class
+None +pv_collector_bound_pvc_count +ALPHA +Custom +Gauge measuring number of persistent volume claim currently bound +
namespace
+None +pv_collector_total_pv_count +ALPHA +Custom +Gauge measuring total number of persistent volumes +
plugin_name
volume_mode
+None +pv_collector_unbound_pv_count +ALPHA +Custom +Gauge measuring number of persistent volume currently unbound +
storage_class
+None +pv_collector_unbound_pvc_count +ALPHA +Custom +Gauge measuring number of persistent volume claim currently unbound +
namespace
+None replicaset_controller_sorting_deletion_age_ratio ALPHA Histogram @@ -1712,6 +1868,12 @@ components using an HTTP scrape, and fetch the current metrics data in Prometheu Volume scheduling stage error count
operation
None +scrape_error +ALPHA +Custom +1 if there was an error while getting container metrics, 0 otherwise +None +None service_controller_nodesync_latency_seconds ALPHA Histogram @@ -1742,6 +1904,12 @@ components using an HTTP scrape, and fetch the current metrics data in Prometheu Cumulative valid projected service account tokens used None None +storage_count_attachable_volumes_in_use +ALPHA +Custom +Measure number of volumes in use +
node
volume_plugin
+None storage_operation_duration_seconds ALPHA Histogram @@ -1796,6 +1964,12 @@ components using an HTTP scrape, and fetch the current metrics data in Prometheu Number of volumes whose SELinux context was fine and will be mounted with mount -o context option. None None +volume_manager_total_volumes +ALPHA +Custom +Number of volumes in Volume Manager +
plugin_name
state
+None volume_operation_total_errors ALPHA Counter diff --git a/test/instrumentation/find_stable_metric.go b/test/instrumentation/find_stable_metric.go index 3542c1e9d50..a2b9f0af093 100644 --- a/test/instrumentation/find_stable_metric.go +++ b/test/instrumentation/find_stable_metric.go @@ -64,7 +64,30 @@ func contains(v metrics.StabilityLevel, a []metrics.StabilityLevel) bool { func (f *stableMetricFinder) Visit(node ast.Node) (w ast.Visitor) { switch opts := node.(type) { case *ast.CallExpr: - f.currentFunctionCall = opts + if se, ok := opts.Fun.(*ast.SelectorExpr); ok { + if se.Sel.Name == "NewDesc" { + sl, _ := decodeStabilityLevel(opts.Args[4], "metrics") + if sl != nil { + classes := []metrics.StabilityLevel{metrics.STABLE, metrics.BETA} + if ALL_STABILITY_CLASSES { + classes = append(classes, metrics.ALPHA) + } + switch { + case contains(*sl, classes): + f.stableMetricsFunctionCalls = append(f.stableMetricsFunctionCalls, opts) + f.currentFunctionCall = nil + default: + return nil + } + } + + } else { + f.currentFunctionCall = opts + } + + } else { + f.currentFunctionCall = opts + } case *ast.CompositeLit: se, ok := opts.Type.(*ast.SelectorExpr) if !ok { diff --git a/test/instrumentation/metric.go b/test/instrumentation/metric.go index 35abb759a2e..061596bcae9 100644 --- a/test/instrumentation/metric.go +++ b/test/instrumentation/metric.go @@ -26,6 +26,7 @@ const ( histogramMetricType = "Histogram" summaryMetricType = "Summary" timingRatioHistogram = "TimingRatioHistogram" + customType = "Custom" ) type metric struct { diff --git a/test/instrumentation/testdata/pkg/kubelet/metrics/metrics.go b/test/instrumentation/testdata/pkg/kubelet/metrics/metrics.go index 47da0b3721f..8e87f70030a 100644 --- a/test/instrumentation/testdata/pkg/kubelet/metrics/metrics.go +++ b/test/instrumentation/testdata/pkg/kubelet/metrics/metrics.go @@ -20,10 +20,10 @@ import ( "sync" "time" + "k8s.io/apimachinery/pkg/types" "k8s.io/component-base/metrics" "k8s.io/component-base/metrics/legacyregistry" - - "k8s.io/apimachinery/pkg/types" + kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics" ) // This const block defines the metric names for the kubelet metrics. @@ -63,6 +63,27 @@ const ( RunPodSandboxErrorsKey = "run_podsandbox_errors_total" ) +const ( + // Subsystem names. + pvControllerSubsystem = "pv_collector" + + // Metric names. + totalPVKey = "total_pv_count" + boundPVKey = "bound_pv_count" + unboundPVKey = "unbound_pv_count" + boundPVCKey = "bound_pvc_count" + unboundPVCKey = "unbound_pvc_count" + + // Label names. + namespaceLabel = "namespace" + storageClassLabel = "storage_class" + pluginNameLabel = "plugin_name" + volumeModeLabel = "volume_mode" + + // String to use when plugin name cannot be determined + pluginNameNotAvailable = "N/A" +) + const ( requestKind = "request_kind" priorityLevel = "priority_level" @@ -503,6 +524,38 @@ var ( StabilityLevel: metrics.BETA, }, ) + + volumeManagerTotalVolumes = "volume_manager_total_volumes" + + _ = metrics.NewDesc( + volumeManagerTotalVolumes, + "Number of volumes in Volume Manager", + []string{"plugin_name", "state"}, + nil, + metrics.STABLE, "", + ) + + _ = metrics.NewDesc( + metrics.BuildFQName("test", "beta", "desc"), + "Number of volumes in Volume Manager", + nil, + map[string]string{"alalala": "lalalal"}, + metrics.BETA, "", + ) + _ = metrics.NewDesc( + "test_desc_alpha", + "Number of volumes in Volume Manager", + []string{"plugin_name", "state"}, + map[string]string{"alalala": "lalalal"}, + metrics.ALPHA, "", + ) + + _ = metrics.NewDesc( + metrics.BuildFQName("", kubeletmetrics.KubeletSubsystem, kubeletmetrics.VolumeStatsCapacityBytesKey), + "Capacity in bytes of the volume", + []string{"namespace", "persistentvolumeclaim"}, nil, + metrics.BETA, "", + ) ) var registerMetrics sync.Once diff --git a/test/instrumentation/testdata/test-stable-metrics-list.yaml b/test/instrumentation/testdata/test-stable-metrics-list.yaml index ea0955f57cf..4b6453d943f 100644 --- a/test/instrumentation/testdata/test-stable-metrics-list.yaml +++ b/test/instrumentation/testdata/test-stable-metrics-list.yaml @@ -185,6 +185,13 @@ - 119.20928955078125 - 298.0232238769531 - 745.0580596923828 +- name: kubelet_volume_stats_capacity_bytes + help: Capacity in bytes of the volume + type: Custom + stabilityLevel: BETA + labels: + - namespace + - persistentvolumeclaim - name: priority_level_seat_utilization subsystem: subsystem namespace: namespace @@ -210,6 +217,12 @@ - 1 constLabels: phase: executing +- name: test_beta_desc + help: Number of volumes in Volume Manager + type: Custom + stabilityLevel: BETA + constLabels: + alalala: lalalal - name: label subsystem: const namespace: test @@ -281,3 +294,10 @@ - 2.5 - 5 - 10 +- name: volume_manager_total_volumes + help: Number of volumes in Volume Manager + type: Custom + stabilityLevel: STABLE + labels: + - plugin_name + - state