From c2f67ac14119148fbbfb489ecf9ca7bd3b7aa092 Mon Sep 17 00:00:00 2001
From: Han Kang <hankang@google.com>
Date: Tue, 1 Nov 2022 13:50:12 -0700
Subject: [PATCH] add support for parsing custom collectors from the stability
 framework

Change-Id: I1053b9f6956de571700c95b96e05c4377806a3cc
---
 test/instrumentation/decode_metric.go         | 136 +++++++++--
 .../documentation/documentation-list.yaml     | 211 ++++++++++++++++++
 .../documentation/documentation.md            | 176 ++++++++++++++-
 test/instrumentation/find_stable_metric.go    |  25 ++-
 test/instrumentation/metric.go                |   1 +
 .../testdata/pkg/kubelet/metrics/metrics.go   |  57 ++++-
 .../testdata/test-stable-metrics-list.yaml    |  20 ++
 7 files changed, 608 insertions(+), 18 deletions(-)

diff --git a/test/instrumentation/decode_metric.go b/test/instrumentation/decode_metric.go
index 0e2d913483d..b7a53106bcb 100644
--- a/test/instrumentation/decode_metric.go
+++ b/test/instrumentation/decode_metric.go
@@ -45,7 +45,6 @@ func decodeMetricCalls(fs []*ast.CallExpr, metricsImportName string, variables m
 		if m != nil {
 			ms = append(ms, *m)
 		}
-
 	}
 	return ms, errors
 }
@@ -65,6 +64,7 @@ func (c *metricDecoder) decodeNewMetricCall(fc *ast.CallExpr) (*metric, error) {
 		case *ast.Ident:
 			if v.Name == "NewTimingRatioHistogramVec" {
 				m, err = c.decodeMetricVecForTimingRatioHistogram(fc)
+				m.Type = timingRatioHistogram
 				return &m, err
 			}
 		}
@@ -85,6 +85,8 @@ func (c *metricDecoder) decodeNewMetricCall(fc *ast.CallExpr) (*metric, error) {
 		m, err = c.decodeMetricVec(fc)
 	case "Labels", "HandlerOpts", "HandlerFor", "HandlerWithReset":
 		return nil, nil
+	case "NewDesc":
+		m, err = c.decodeDesc(fc)
 	default:
 		return &m, newDecodeErrorf(fc, errNotDirectCall)
 	}
@@ -97,6 +99,8 @@ func (c *metricDecoder) decodeNewMetricCall(fc *ast.CallExpr) (*metric, error) {
 
 func getMetricType(functionName string) string {
 	switch functionName {
+	case "NewDesc":
+		return customType
 	case "NewCounter", "NewCounterVec":
 		return counterMetricType
 	case "NewGauge", "NewGaugeVec", "NewGaugeFunc":
@@ -119,6 +123,77 @@ func (c *metricDecoder) decodeMetric(call *ast.CallExpr) (metric, error) {
 	return c.decodeOpts(call.Args[0])
 }
 
+func (c *metricDecoder) decodeDesc(ce *ast.CallExpr) (metric, error) {
+	m := &metric{}
+	name, err := c.decodeString(ce.Args[0])
+	if err != nil {
+		return *m, newDecodeErrorf(ce, "can't decode string")
+	}
+	m.Name = *name
+	help, err := c.decodeString(ce.Args[1])
+	if err != nil {
+		return *m, newDecodeErrorf(ce, "can't decode string")
+	}
+	m.Help = *help
+	labels, err := c.decodeLabels(ce.Args[2])
+	if err != nil {
+		return *m, newDecodeErrorf(ce, "can't decode labels")
+	}
+	m.Labels = labels
+	cLabels, err := c.decodeConstLabels(ce.Args[3])
+	if err != nil {
+		return *m, newDecodeErrorf(ce, "can't decode const labels")
+	}
+	m.ConstLabels = cLabels
+	sl, err := decodeStabilityLevel(ce.Args[4], "metrics")
+	if sl != nil {
+		m.StabilityLevel = string(*sl)
+	}
+	deprecatedVersion, err := c.decodeString(ce.Args[5])
+	if err != nil {
+		return *m, newDecodeErrorf(ce, "can't decode string")
+	}
+	if deprecatedVersion != nil {
+		m.DeprecatedVersion = *deprecatedVersion
+	}
+	return *m, nil
+}
+
+func (c *metricDecoder) decodeString(expr ast.Expr) (*string, error) {
+	switch e := expr.(type) {
+	case *ast.BasicLit:
+		s, err := stringValue(e)
+		return &s, err
+	case *ast.Ident:
+		variableExpr, found := c.variables[e.Name]
+		if !found {
+			return nil, fmt.Errorf("can't decode string")
+		}
+		bl, ok := variableExpr.(*ast.BasicLit)
+		if !ok {
+			return nil, fmt.Errorf("can't decode string")
+		}
+		v, err := stringValue(bl)
+		return &v, err
+	case *ast.CallExpr:
+		firstArg, secondArg, thirdArg, err := c.decodeBuildFQNameArguments(e)
+		if err != nil {
+			return nil, err
+		}
+		se, ok := e.Fun.(*ast.SelectorExpr)
+		if ok {
+			functionName := se.Sel.Name
+			switch functionName {
+			case "BuildFQName":
+				n := metrics.BuildFQName(firstArg, secondArg, thirdArg)
+				return &n, nil
+			}
+		}
+
+	}
+	return nil, fmt.Errorf("can't decode string")
+}
+
 func (c *metricDecoder) decodeMetricVec(call *ast.CallExpr) (metric, error) {
 	if len(call.Args) != 2 {
 		return metric{}, newDecodeErrorf(call, errInvalidNewMetricCall)
@@ -186,19 +261,21 @@ func (c *metricDecoder) decodeLabelsFromArray(exprs []ast.Expr) ([]string, error
 func (c *metricDecoder) decodeLabels(expr ast.Expr) ([]string, error) {
 	cl, ok := expr.(*ast.CompositeLit)
 	if !ok {
-		id, ok := expr.(*ast.Ident)
-		if !ok {
-			return nil, newDecodeErrorf(expr, errInvalidNewMetricCall)
+		switch e := expr.(type) {
+		case *ast.Ident:
+			if e.Name == "nil" {
+				return []string{}, nil
+			}
+			variableExpr, found := c.variables[e.Name]
+			if !found {
+				return nil, newDecodeErrorf(expr, "couldn't find variable for labels")
+			}
+			cl2, ok := variableExpr.(*ast.CompositeLit)
+			if !ok {
+				return nil, newDecodeErrorf(expr, "couldn't interpret variable for labels")
+			}
+			cl = cl2
 		}
-		variableExpr, found := c.variables[id.Name]
-		if !found {
-			return nil, newDecodeErrorf(expr, "couldn't find variable for labels")
-		}
-		cl2, ok := variableExpr.(*ast.CompositeLit)
-		if !ok {
-			return nil, newDecodeErrorf(expr, "couldn't interpret variable for labels")
-		}
-		cl = cl2
 	}
 	labels := make([]string, len(cl.Elts))
 	for i, el := range cl.Elts {
@@ -280,7 +357,6 @@ func (c *metricDecoder) decodeOpts(expr ast.Expr) (metric, error) {
 				if !ok {
 					return m, newDecodeErrorf(expr, errExprNotIdent, v.X)
 				}
-
 				variableExpr, found := c.variables[strings.Join([]string{s.Name, v.Sel.Name}, ".")]
 				if !found {
 					return m, newDecodeErrorf(expr, errBadImportedVariableAttribute)
@@ -737,6 +813,38 @@ func decodeBucketArguments(fc *ast.CallExpr) (float64, float64, int, error) {
 
 	return firstArg, secondArg, int(thirdArg), nil
 }
+func (c *metricDecoder) decodeBuildFQNameArguments(fc *ast.CallExpr) (string, string, string, error) {
+	if len(fc.Args) != 3 {
+		return "", "", "", newDecodeErrorf(fc, "can't decode fq name args")
+	}
+	strArgs := make([]string, len(fc.Args))
+	for i, elt := range fc.Args {
+		switch arg := elt.(type) {
+		case *ast.BasicLit:
+			if arg.Kind != token.STRING {
+				return "", "", "", newDecodeErrorf(fc, "can't decode fq name args")
+			}
+			strArgs[i] = strings.Trim(arg.Value, `"`)
+		case *ast.Ident:
+			s, err := c.decodeString(arg)
+			if err != nil {
+				return "", "", "", newDecodeErrorf(fc, "can't decode fq name args")
+			}
+			strArgs[i] = *s
+		case *ast.SelectorExpr:
+			id, ok := arg.X.(*ast.Ident)
+			expr, ok := c.variables[id.Name+"."+arg.Sel.Name]
+			if ok {
+				s, err := c.decodeString(expr)
+				if err != nil {
+					return "", "", "", newDecodeErrorf(fc, "can't decode fq name args")
+				}
+				strArgs[i] = *s
+			}
+		}
+	}
+	return strArgs[0], strArgs[1], strArgs[2], nil
+}
 
 func decodeStabilityLevel(expr ast.Expr, metricsFrameworkImportName string) (*metrics.StabilityLevel, error) {
 	se, ok := expr.(*ast.SelectorExpr)
diff --git a/test/instrumentation/documentation/documentation-list.yaml b/test/instrumentation/documentation/documentation-list.yaml
index 5585e648855..1081b9b0de8 100644
--- a/test/instrumentation/documentation/documentation-list.yaml
+++ b/test/instrumentation/documentation/documentation-list.yaml
@@ -274,6 +274,13 @@
   help: Number of times the A/D Controller performed a forced detach
   type: Counter
   stabilityLevel: ALPHA
+- name: attachdetach_controller_total_volumes
+  help: Number of volumes in A/D Controller
+  type: Custom
+  stabilityLevel: ALPHA
+  labels:
+  - plugin_name
+  - state
 - name: job_finished_total
   subsystem: job_controller
   help: The number of finished job
@@ -459,6 +466,13 @@
   - 2
   - 4
   - 8
+- name: storage_count_attachable_volumes_in_use
+  help: Measure number of volumes in use
+  type: Custom
+  stabilityLevel: ALPHA
+  labels:
+  - node
+  - volume_plugin
 - name: job_deletion_duration_seconds
   subsystem: ttl_after_finished_controller
   help: The time it took to delete the job since it became eligible for deletion
@@ -558,6 +572,37 @@
   help: Counter of certificate renewal errors.
   type: Counter
   stabilityLevel: ALPHA
+- name: pv_collector_bound_pv_count
+  help: Gauge measuring number of persistent volume currently bound
+  type: Custom
+  stabilityLevel: ALPHA
+  labels:
+  - storage_class
+- name: pv_collector_bound_pvc_count
+  help: Gauge measuring number of persistent volume claim currently bound
+  type: Custom
+  stabilityLevel: ALPHA
+  labels:
+  - namespace
+- name: pv_collector_total_pv_count
+  help: Gauge measuring total number of persistent volumes
+  type: Custom
+  stabilityLevel: ALPHA
+  labels:
+  - plugin_name
+  - volume_mode
+- name: pv_collector_unbound_pv_count
+  help: Gauge measuring number of persistent volume currently unbound
+  type: Custom
+  stabilityLevel: ALPHA
+  labels:
+  - storage_class
+- name: pv_collector_unbound_pvc_count
+  help: Gauge measuring number of persistent volume claim currently unbound
+  type: Custom
+  stabilityLevel: ALPHA
+  labels:
+  - namespace
 - name: retroactive_storageclass_errors_total
   help: Total number of failed retroactive StorageClass assignments to persistent
     volume claim
@@ -575,6 +620,30 @@
   labels:
   - operation_name
   - plugin_name
+- name: container_cpu_usage_seconds_total
+  help: Cumulative cpu time consumed by the container in core-seconds
+  type: Custom
+  stabilityLevel: ALPHA
+  labels:
+  - container
+  - pod
+  - namespace
+- name: container_memory_working_set_bytes
+  help: Current working set of the container in bytes
+  type: Custom
+  stabilityLevel: ALPHA
+  labels:
+  - container
+  - pod
+  - namespace
+- name: container_start_time_seconds
+  help: Start time of the container since unix epoch in seconds
+  type: Custom
+  stabilityLevel: ALPHA
+  labels:
+  - container
+  - pod
+  - namespace
 - name: cgroup_manager_duration_seconds
   subsystem: kubelet
   help: Duration in seconds for cgroup manager operations. Broken down by method.
@@ -594,6 +663,15 @@
   - 2.5
   - 5
   - 10
+- name: kubelet_container_log_filesystem_used_bytes
+  help: Bytes used by the container's logs on the filesystem.
+  type: Custom
+  stabilityLevel: ALPHA
+  labels:
+  - uid
+  - namespace
+  - pod
+  - container
 - name: containers_per_pod_count
   subsystem: kubelet
   help: The number of containers per pod.
@@ -1044,6 +1122,85 @@
   - 2.5
   - 5
   - 10
+- name: kubelet_volume_stats_available_bytes
+  help: Number of available bytes in the volume
+  type: Custom
+  stabilityLevel: ALPHA
+  labels:
+  - namespace
+  - persistentvolumeclaim
+- name: kubelet_volume_stats_capacity_bytes
+  help: Capacity in bytes of the volume
+  type: Custom
+  stabilityLevel: ALPHA
+  labels:
+  - namespace
+  - persistentvolumeclaim
+- name: kubelet_volume_stats_health_status_abnormal
+  help: Abnormal volume health status. The count is either 1 or 0. 1 indicates the
+    volume is unhealthy, 0 indicates volume is healthy
+  type: Custom
+  stabilityLevel: ALPHA
+  labels:
+  - namespace
+  - persistentvolumeclaim
+- name: kubelet_volume_stats_inodes
+  help: Maximum number of inodes in the volume
+  type: Custom
+  stabilityLevel: ALPHA
+  labels:
+  - namespace
+  - persistentvolumeclaim
+- name: kubelet_volume_stats_inodes_free
+  help: Number of free inodes in the volume
+  type: Custom
+  stabilityLevel: ALPHA
+  labels:
+  - namespace
+  - persistentvolumeclaim
+- name: kubelet_volume_stats_inodes_used
+  help: Number of used inodes in the volume
+  type: Custom
+  stabilityLevel: ALPHA
+  labels:
+  - namespace
+  - persistentvolumeclaim
+- name: kubelet_volume_stats_used_bytes
+  help: Number of used bytes in the volume
+  type: Custom
+  stabilityLevel: ALPHA
+  labels:
+  - namespace
+  - persistentvolumeclaim
+- name: node_cpu_usage_seconds_total
+  help: Cumulative cpu time consumed by the node in core-seconds
+  type: Custom
+  stabilityLevel: ALPHA
+- name: node_memory_working_set_bytes
+  help: Current working set of the node in bytes
+  type: Custom
+  stabilityLevel: ALPHA
+- name: plugin_manager_total_plugins
+  help: Number of plugins in Plugin Manager
+  type: Custom
+  stabilityLevel: ALPHA
+  labels:
+  - socket_path
+  - state
+- name: pod_cpu_usage_seconds_total
+  help: Cumulative cpu time consumed by the pod in core-seconds
+  type: Custom
+  stabilityLevel: ALPHA
+  labels:
+  - pod
+  - namespace
+- name: pod_memory_working_set_bytes
+  help: Current working set of the pod in bytes
+  type: Custom
+  stabilityLevel: ALPHA
+  labels:
+  - pod
+  - namespace
 - name: probe_duration_seconds
   subsystem: prober
   help: Duration in seconds for a probe response.
@@ -1067,6 +1224,10 @@
   - pod_uid
   - probe_type
   - result
+- name: scrape_error
+  help: 1 if there was an error while getting container metrics, 0 otherwise
+  type: Custom
+  stabilityLevel: ALPHA
 - name: csr_honored_duration_total
   subsystem: certificates_registry
   namespace: apiserver
@@ -1284,6 +1445,13 @@
     mount -o context option.
   type: Gauge
   stabilityLevel: ALPHA
+- name: volume_manager_total_volumes
+  help: Number of volumes in Volume Manager
+  type: Custom
+  stabilityLevel: ALPHA
+  labels:
+  - plugin_name
+  - state
 - name: allocated_ips
   subsystem: clusterip_allocator
   namespace: kube_apiserver
@@ -1334,6 +1502,34 @@
   stabilityLevel: ALPHA
   labels:
   - usage
+- name: kube_pod_resource_limit
+  help: Resources limit for workloads on the cluster, broken down by pod. This shows
+    the resource usage the scheduler and kubelet expect per pod for resources along
+    with the unit for the resource if any.
+  type: Custom
+  stabilityLevel: ALPHA
+  labels:
+  - namespace
+  - pod
+  - node
+  - scheduler
+  - priority
+  - resource
+  - unit
+- name: kube_pod_resource_request
+  help: Resources requested by workloads on the cluster, broken down by pod. This
+    shows the resource usage the scheduler and kubelet expect per pod for resources
+    along with the unit for the resource if any.
+  type: Custom
+  stabilityLevel: ALPHA
+  labels:
+  - namespace
+  - pod
+  - node
+  - scheduler
+  - priority
+  - resource
+  - unit
 - name: e2e_scheduling_duration_seconds
   subsystem: scheduler
   help: E2e scheduling latency in seconds (scheduling algorithm + binding). This metric
@@ -3332,6 +3528,13 @@
   stabilityLevel: ALPHA
   labels:
   - reason
+- name: aggregator_unavailable_apiservice
+  help: Gauge of APIServices which are marked as unavailable broken down by APIService
+    name.
+  type: Custom
+  stabilityLevel: ALPHA
+  labels:
+  - name
 - name: aggregator_unavailable_apiservice_total
   help: Counter of APIServices which are marked as unavailable broken down by APIService
     name and reason.
@@ -3489,6 +3692,14 @@
   stabilityLevel: ALPHA
   labels:
   - operation
+- name: cloudprovider_vsphere_vcenter_versions
+  help: Versions for connected vSphere vCenters
+  type: Custom
+  stabilityLevel: ALPHA
+  labels:
+  - hostname
+  - version
+  - build
 - name: get_token_count
   help: Counter of total Token() requests to the alternate token source
   type: Counter
diff --git a/test/instrumentation/documentation/documentation.md b/test/instrumentation/documentation/documentation.md
index b197b5b9e3f..2531eaabcc7 100644
--- a/test/instrumentation/documentation/documentation.md
+++ b/test/instrumentation/documentation/documentation.md
@@ -6,7 +6,7 @@ description: >-
 ---
 
 
-## Metrics (auto-generated 2022 Oct 31)
+## Metrics (auto-generated 2022 Nov 01)
 
 This page details the metrics that different Kubernetes components export. You can query the metrics endpoint for these 
 components using an HTTP scrape, and fetch the current metrics data in Prometheus format.
@@ -176,6 +176,12 @@ components using an HTTP scrape, and fetch the current metrics data in Prometheu
 <td class="metric_description">Gauge of OpenAPI v2 spec regeneration duration in seconds.</td>
 <td class="metric_labels_varying"><div class="metric_label">reason</div></td>
 <td class="metric_labels_constant">None</td></tr>
+<tr class="metric"><td class="metric_name">aggregator_unavailable_apiservice</td>
+<td class="metric_stability_level" data-stability="alpha">ALPHA</td>
+<td class="metric_type" data-type="custom">Custom</td>
+<td class="metric_description">Gauge of APIServices which are marked as unavailable broken down by APIService name.</td>
+<td class="metric_labels_varying"><div class="metric_label">name</div></td>
+<td class="metric_labels_constant">None</td></tr>
 <tr class="metric"><td class="metric_name">aggregator_unavailable_apiservice_total</td>
 <td class="metric_stability_level" data-stability="alpha">ALPHA</td>
 <td class="metric_type" data-type="counter">Counter</td>
@@ -674,6 +680,12 @@ components using an HTTP scrape, and fetch the current metrics data in Prometheu
 <td class="metric_description">Number of times the A/D Controller performed a forced detach</td>
 <td class="metric_labels_varying">None</td>
 <td class="metric_labels_constant">None</td></tr>
+<tr class="metric"><td class="metric_name">attachdetach_controller_total_volumes</td>
+<td class="metric_stability_level" data-stability="alpha">ALPHA</td>
+<td class="metric_type" data-type="custom">Custom</td>
+<td class="metric_description">Number of volumes in A/D Controller</td>
+<td class="metric_labels_varying"><div class="metric_label">plugin_name</div><div class="metric_label">state</div></td>
+<td class="metric_labels_constant">None</td></tr>
 <tr class="metric"><td class="metric_name">authenticated_user_requests</td>
 <td class="metric_stability_level" data-stability="alpha">ALPHA</td>
 <td class="metric_type" data-type="counter">Counter</td>
@@ -806,6 +818,30 @@ components using an HTTP scrape, and fetch the current metrics data in Prometheu
 <td class="metric_description">vsphere operation errors</td>
 <td class="metric_labels_varying"><div class="metric_label">operation</div></td>
 <td class="metric_labels_constant">None</td></tr>
+<tr class="metric"><td class="metric_name">cloudprovider_vsphere_vcenter_versions</td>
+<td class="metric_stability_level" data-stability="alpha">ALPHA</td>
+<td class="metric_type" data-type="custom">Custom</td>
+<td class="metric_description">Versions for connected vSphere vCenters</td>
+<td class="metric_labels_varying"><div class="metric_label">hostname</div><div class="metric_label">version</div><div class="metric_label">build</div></td>
+<td class="metric_labels_constant">None</td></tr>
+<tr class="metric"><td class="metric_name">container_cpu_usage_seconds_total</td>
+<td class="metric_stability_level" data-stability="alpha">ALPHA</td>
+<td class="metric_type" data-type="custom">Custom</td>
+<td class="metric_description">Cumulative cpu time consumed by the container in core-seconds</td>
+<td class="metric_labels_varying"><div class="metric_label">container</div><div class="metric_label">pod</div><div class="metric_label">namespace</div></td>
+<td class="metric_labels_constant">None</td></tr>
+<tr class="metric"><td class="metric_name">container_memory_working_set_bytes</td>
+<td class="metric_stability_level" data-stability="alpha">ALPHA</td>
+<td class="metric_type" data-type="custom">Custom</td>
+<td class="metric_description">Current working set of the container in bytes</td>
+<td class="metric_labels_varying"><div class="metric_label">container</div><div class="metric_label">pod</div><div class="metric_label">namespace</div></td>
+<td class="metric_labels_constant">None</td></tr>
+<tr class="metric"><td class="metric_name">container_start_time_seconds</td>
+<td class="metric_stability_level" data-stability="alpha">ALPHA</td>
+<td class="metric_type" data-type="custom">Custom</td>
+<td class="metric_description">Start time of the container since unix epoch in seconds</td>
+<td class="metric_labels_varying"><div class="metric_label">container</div><div class="metric_label">pod</div><div class="metric_label">namespace</div></td>
+<td class="metric_labels_constant">None</td></tr>
 <tr class="metric"><td class="metric_name">cronjob_controller_cronjob_job_creation_skew_duration_seconds</td>
 <td class="metric_stability_level" data-stability="alpha">ALPHA</td>
 <td class="metric_type" data-type="histogram">Histogram</td>
@@ -1052,6 +1088,18 @@ components using an HTTP scrape, and fetch the current metrics data in Prometheu
 <td class="metric_description">Total number of requests for pods/logs sliced by usage type: enforce_tls, skip_tls_allowed, skip_tls_denied</td>
 <td class="metric_labels_varying"><div class="metric_label">usage</div></td>
 <td class="metric_labels_constant">None</td></tr>
+<tr class="metric"><td class="metric_name">kube_pod_resource_limit</td>
+<td class="metric_stability_level" data-stability="alpha">ALPHA</td>
+<td class="metric_type" data-type="custom">Custom</td>
+<td class="metric_description">Resources limit for workloads on the cluster, broken down by pod. This shows the resource usage the scheduler and kubelet expect per pod for resources along with the unit for the resource if any.</td>
+<td class="metric_labels_varying"><div class="metric_label">namespace</div><div class="metric_label">pod</div><div class="metric_label">node</div><div class="metric_label">scheduler</div><div class="metric_label">priority</div><div class="metric_label">resource</div><div class="metric_label">unit</div></td>
+<td class="metric_labels_constant">None</td></tr>
+<tr class="metric"><td class="metric_name">kube_pod_resource_request</td>
+<td class="metric_stability_level" data-stability="alpha">ALPHA</td>
+<td class="metric_type" data-type="custom">Custom</td>
+<td class="metric_description">Resources requested by workloads on the cluster, broken down by pod. This shows the resource usage the scheduler and kubelet expect per pod for resources along with the unit for the resource if any.</td>
+<td class="metric_labels_varying"><div class="metric_label">namespace</div><div class="metric_label">pod</div><div class="metric_label">node</div><div class="metric_label">scheduler</div><div class="metric_label">priority</div><div class="metric_label">resource</div><div class="metric_label">unit</div></td>
+<td class="metric_labels_constant">None</td></tr>
 <tr class="metric"><td class="metric_name">kubelet_certificate_manager_client_expiration_renew_errors</td>
 <td class="metric_stability_level" data-stability="alpha">ALPHA</td>
 <td class="metric_type" data-type="counter">Counter</td>
@@ -1082,6 +1130,12 @@ components using an HTTP scrape, and fetch the current metrics data in Prometheu
 <td class="metric_description">Duration in seconds for cgroup manager operations. Broken down by method.</td>
 <td class="metric_labels_varying"><div class="metric_label">operation_type</div></td>
 <td class="metric_labels_constant">None</td></tr>
+<tr class="metric"><td class="metric_name">kubelet_container_log_filesystem_used_bytes</td>
+<td class="metric_stability_level" data-stability="alpha">ALPHA</td>
+<td class="metric_type" data-type="custom">Custom</td>
+<td class="metric_description">Bytes used by the container's logs on the filesystem.</td>
+<td class="metric_labels_varying"><div class="metric_label">uid</div><div class="metric_label">namespace</div><div class="metric_label">pod</div><div class="metric_label">container</div></td>
+<td class="metric_labels_constant">None</td></tr>
 <tr class="metric"><td class="metric_name">kubelet_containers_per_pod_count</td>
 <td class="metric_stability_level" data-stability="alpha">ALPHA</td>
 <td class="metric_type" data-type="histogram">Histogram</td>
@@ -1358,6 +1412,48 @@ components using an HTTP scrape, and fetch the current metrics data in Prometheu
 <td class="metric_description">Duration in seconds to calculate volume stats</td>
 <td class="metric_labels_varying"><div class="metric_label">metric_source</div></td>
 <td class="metric_labels_constant">None</td></tr>
+<tr class="metric"><td class="metric_name">kubelet_volume_stats_available_bytes</td>
+<td class="metric_stability_level" data-stability="alpha">ALPHA</td>
+<td class="metric_type" data-type="custom">Custom</td>
+<td class="metric_description">Number of available bytes in the volume</td>
+<td class="metric_labels_varying"><div class="metric_label">namespace</div><div class="metric_label">persistentvolumeclaim</div></td>
+<td class="metric_labels_constant">None</td></tr>
+<tr class="metric"><td class="metric_name">kubelet_volume_stats_capacity_bytes</td>
+<td class="metric_stability_level" data-stability="alpha">ALPHA</td>
+<td class="metric_type" data-type="custom">Custom</td>
+<td class="metric_description">Capacity in bytes of the volume</td>
+<td class="metric_labels_varying"><div class="metric_label">namespace</div><div class="metric_label">persistentvolumeclaim</div></td>
+<td class="metric_labels_constant">None</td></tr>
+<tr class="metric"><td class="metric_name">kubelet_volume_stats_health_status_abnormal</td>
+<td class="metric_stability_level" data-stability="alpha">ALPHA</td>
+<td class="metric_type" data-type="custom">Custom</td>
+<td class="metric_description">Abnormal volume health status. The count is either 1 or 0. 1 indicates the volume is unhealthy, 0 indicates volume is healthy</td>
+<td class="metric_labels_varying"><div class="metric_label">namespace</div><div class="metric_label">persistentvolumeclaim</div></td>
+<td class="metric_labels_constant">None</td></tr>
+<tr class="metric"><td class="metric_name">kubelet_volume_stats_inodes</td>
+<td class="metric_stability_level" data-stability="alpha">ALPHA</td>
+<td class="metric_type" data-type="custom">Custom</td>
+<td class="metric_description">Maximum number of inodes in the volume</td>
+<td class="metric_labels_varying"><div class="metric_label">namespace</div><div class="metric_label">persistentvolumeclaim</div></td>
+<td class="metric_labels_constant">None</td></tr>
+<tr class="metric"><td class="metric_name">kubelet_volume_stats_inodes_free</td>
+<td class="metric_stability_level" data-stability="alpha">ALPHA</td>
+<td class="metric_type" data-type="custom">Custom</td>
+<td class="metric_description">Number of free inodes in the volume</td>
+<td class="metric_labels_varying"><div class="metric_label">namespace</div><div class="metric_label">persistentvolumeclaim</div></td>
+<td class="metric_labels_constant">None</td></tr>
+<tr class="metric"><td class="metric_name">kubelet_volume_stats_inodes_used</td>
+<td class="metric_stability_level" data-stability="alpha">ALPHA</td>
+<td class="metric_type" data-type="custom">Custom</td>
+<td class="metric_description">Number of used inodes in the volume</td>
+<td class="metric_labels_varying"><div class="metric_label">namespace</div><div class="metric_label">persistentvolumeclaim</div></td>
+<td class="metric_labels_constant">None</td></tr>
+<tr class="metric"><td class="metric_name">kubelet_volume_stats_used_bytes</td>
+<td class="metric_stability_level" data-stability="alpha">ALPHA</td>
+<td class="metric_type" data-type="custom">Custom</td>
+<td class="metric_description">Number of used bytes in the volume</td>
+<td class="metric_labels_varying"><div class="metric_label">namespace</div><div class="metric_label">persistentvolumeclaim</div></td>
+<td class="metric_labels_constant">None</td></tr>
 <tr class="metric"><td class="metric_name">kubeproxy_network_programming_duration_seconds</td>
 <td class="metric_stability_level" data-stability="alpha">ALPHA</td>
 <td class="metric_type" data-type="histogram">Histogram</td>
@@ -1484,6 +1580,12 @@ components using an HTTP scrape, and fetch the current metrics data in Prometheu
 <td class="metric_description">Gauge measuring number of registered Nodes per zones.</td>
 <td class="metric_labels_varying"><div class="metric_label">zone</div></td>
 <td class="metric_labels_constant">None</td></tr>
+<tr class="metric"><td class="metric_name">node_cpu_usage_seconds_total</td>
+<td class="metric_stability_level" data-stability="alpha">ALPHA</td>
+<td class="metric_type" data-type="custom">Custom</td>
+<td class="metric_description">Cumulative cpu time consumed by the node in core-seconds</td>
+<td class="metric_labels_varying">None</td>
+<td class="metric_labels_constant">None</td></tr>
 <tr class="metric"><td class="metric_name">node_ipam_controller_cidrset_allocation_tries_per_request</td>
 <td class="metric_stability_level" data-stability="alpha">ALPHA</td>
 <td class="metric_type" data-type="histogram">Histogram</td>
@@ -1532,12 +1634,36 @@ components using an HTTP scrape, and fetch the current metrics data in Prometheu
 <td class="metric_description">Gauge measuring percentage of allocated CIDRs.</td>
 <td class="metric_labels_varying"><div class="metric_label">clusterCIDR</div></td>
 <td class="metric_labels_constant">None</td></tr>
+<tr class="metric"><td class="metric_name">node_memory_working_set_bytes</td>
+<td class="metric_stability_level" data-stability="alpha">ALPHA</td>
+<td class="metric_type" data-type="custom">Custom</td>
+<td class="metric_description">Current working set of the node in bytes</td>
+<td class="metric_labels_varying">None</td>
+<td class="metric_labels_constant">None</td></tr>
 <tr class="metric"><td class="metric_name">number_of_l4_ilbs</td>
 <td class="metric_stability_level" data-stability="alpha">ALPHA</td>
 <td class="metric_type" data-type="gauge">Gauge</td>
 <td class="metric_description">Number of L4 ILBs</td>
 <td class="metric_labels_varying"><div class="metric_label">feature</div></td>
 <td class="metric_labels_constant">None</td></tr>
+<tr class="metric"><td class="metric_name">plugin_manager_total_plugins</td>
+<td class="metric_stability_level" data-stability="alpha">ALPHA</td>
+<td class="metric_type" data-type="custom">Custom</td>
+<td class="metric_description">Number of plugins in Plugin Manager</td>
+<td class="metric_labels_varying"><div class="metric_label">socket_path</div><div class="metric_label">state</div></td>
+<td class="metric_labels_constant">None</td></tr>
+<tr class="metric"><td class="metric_name">pod_cpu_usage_seconds_total</td>
+<td class="metric_stability_level" data-stability="alpha">ALPHA</td>
+<td class="metric_type" data-type="custom">Custom</td>
+<td class="metric_description">Cumulative cpu time consumed by the pod in core-seconds</td>
+<td class="metric_labels_varying"><div class="metric_label">pod</div><div class="metric_label">namespace</div></td>
+<td class="metric_labels_constant">None</td></tr>
+<tr class="metric"><td class="metric_name">pod_memory_working_set_bytes</td>
+<td class="metric_stability_level" data-stability="alpha">ALPHA</td>
+<td class="metric_type" data-type="custom">Custom</td>
+<td class="metric_description">Current working set of the pod in bytes</td>
+<td class="metric_labels_varying"><div class="metric_label">pod</div><div class="metric_label">namespace</div></td>
+<td class="metric_labels_constant">None</td></tr>
 <tr class="metric"><td class="metric_name">pod_security_errors_total</td>
 <td class="metric_stability_level" data-stability="alpha">ALPHA</td>
 <td class="metric_type" data-type="counter">Counter</td>
@@ -1568,6 +1694,36 @@ components using an HTTP scrape, and fetch the current metrics data in Prometheu
 <td class="metric_description">Cumulative number of a liveness, readiness or startup probe for a container by result.</td>
 <td class="metric_labels_varying"><div class="metric_label">container</div><div class="metric_label">namespace</div><div class="metric_label">pod</div><div class="metric_label">pod_uid</div><div class="metric_label">probe_type</div><div class="metric_label">result</div></td>
 <td class="metric_labels_constant">None</td></tr>
+<tr class="metric"><td class="metric_name">pv_collector_bound_pv_count</td>
+<td class="metric_stability_level" data-stability="alpha">ALPHA</td>
+<td class="metric_type" data-type="custom">Custom</td>
+<td class="metric_description">Gauge measuring number of persistent volume currently bound</td>
+<td class="metric_labels_varying"><div class="metric_label">storage_class</div></td>
+<td class="metric_labels_constant">None</td></tr>
+<tr class="metric"><td class="metric_name">pv_collector_bound_pvc_count</td>
+<td class="metric_stability_level" data-stability="alpha">ALPHA</td>
+<td class="metric_type" data-type="custom">Custom</td>
+<td class="metric_description">Gauge measuring number of persistent volume claim currently bound</td>
+<td class="metric_labels_varying"><div class="metric_label">namespace</div></td>
+<td class="metric_labels_constant">None</td></tr>
+<tr class="metric"><td class="metric_name">pv_collector_total_pv_count</td>
+<td class="metric_stability_level" data-stability="alpha">ALPHA</td>
+<td class="metric_type" data-type="custom">Custom</td>
+<td class="metric_description">Gauge measuring total number of persistent volumes</td>
+<td class="metric_labels_varying"><div class="metric_label">plugin_name</div><div class="metric_label">volume_mode</div></td>
+<td class="metric_labels_constant">None</td></tr>
+<tr class="metric"><td class="metric_name">pv_collector_unbound_pv_count</td>
+<td class="metric_stability_level" data-stability="alpha">ALPHA</td>
+<td class="metric_type" data-type="custom">Custom</td>
+<td class="metric_description">Gauge measuring number of persistent volume currently unbound</td>
+<td class="metric_labels_varying"><div class="metric_label">storage_class</div></td>
+<td class="metric_labels_constant">None</td></tr>
+<tr class="metric"><td class="metric_name">pv_collector_unbound_pvc_count</td>
+<td class="metric_stability_level" data-stability="alpha">ALPHA</td>
+<td class="metric_type" data-type="custom">Custom</td>
+<td class="metric_description">Gauge measuring number of persistent volume claim currently unbound</td>
+<td class="metric_labels_varying"><div class="metric_label">namespace</div></td>
+<td class="metric_labels_constant">None</td></tr>
 <tr class="metric"><td class="metric_name">replicaset_controller_sorting_deletion_age_ratio</td>
 <td class="metric_stability_level" data-stability="alpha">ALPHA</td>
 <td class="metric_type" data-type="histogram">Histogram</td>
@@ -1712,6 +1868,12 @@ components using an HTTP scrape, and fetch the current metrics data in Prometheu
 <td class="metric_description">Volume scheduling stage error count</td>
 <td class="metric_labels_varying"><div class="metric_label">operation</div></td>
 <td class="metric_labels_constant">None</td></tr>
+<tr class="metric"><td class="metric_name">scrape_error</td>
+<td class="metric_stability_level" data-stability="alpha">ALPHA</td>
+<td class="metric_type" data-type="custom">Custom</td>
+<td class="metric_description">1 if there was an error while getting container metrics, 0 otherwise</td>
+<td class="metric_labels_varying">None</td>
+<td class="metric_labels_constant">None</td></tr>
 <tr class="metric"><td class="metric_name">service_controller_nodesync_latency_seconds</td>
 <td class="metric_stability_level" data-stability="alpha">ALPHA</td>
 <td class="metric_type" data-type="histogram">Histogram</td>
@@ -1742,6 +1904,12 @@ components using an HTTP scrape, and fetch the current metrics data in Prometheu
 <td class="metric_description">Cumulative valid projected service account tokens used</td>
 <td class="metric_labels_varying">None</td>
 <td class="metric_labels_constant">None</td></tr>
+<tr class="metric"><td class="metric_name">storage_count_attachable_volumes_in_use</td>
+<td class="metric_stability_level" data-stability="alpha">ALPHA</td>
+<td class="metric_type" data-type="custom">Custom</td>
+<td class="metric_description">Measure number of volumes in use</td>
+<td class="metric_labels_varying"><div class="metric_label">node</div><div class="metric_label">volume_plugin</div></td>
+<td class="metric_labels_constant">None</td></tr>
 <tr class="metric"><td class="metric_name">storage_operation_duration_seconds</td>
 <td class="metric_stability_level" data-stability="alpha">ALPHA</td>
 <td class="metric_type" data-type="histogram">Histogram</td>
@@ -1796,6 +1964,12 @@ components using an HTTP scrape, and fetch the current metrics data in Prometheu
 <td class="metric_description">Number of volumes whose SELinux context was fine and will be mounted with mount -o context option.</td>
 <td class="metric_labels_varying">None</td>
 <td class="metric_labels_constant">None</td></tr>
+<tr class="metric"><td class="metric_name">volume_manager_total_volumes</td>
+<td class="metric_stability_level" data-stability="alpha">ALPHA</td>
+<td class="metric_type" data-type="custom">Custom</td>
+<td class="metric_description">Number of volumes in Volume Manager</td>
+<td class="metric_labels_varying"><div class="metric_label">plugin_name</div><div class="metric_label">state</div></td>
+<td class="metric_labels_constant">None</td></tr>
 <tr class="metric"><td class="metric_name">volume_operation_total_errors</td>
 <td class="metric_stability_level" data-stability="alpha">ALPHA</td>
 <td class="metric_type" data-type="counter">Counter</td>
diff --git a/test/instrumentation/find_stable_metric.go b/test/instrumentation/find_stable_metric.go
index 3542c1e9d50..a2b9f0af093 100644
--- a/test/instrumentation/find_stable_metric.go
+++ b/test/instrumentation/find_stable_metric.go
@@ -64,7 +64,30 @@ func contains(v metrics.StabilityLevel, a []metrics.StabilityLevel) bool {
 func (f *stableMetricFinder) Visit(node ast.Node) (w ast.Visitor) {
 	switch opts := node.(type) {
 	case *ast.CallExpr:
-		f.currentFunctionCall = opts
+		if se, ok := opts.Fun.(*ast.SelectorExpr); ok {
+			if se.Sel.Name == "NewDesc" {
+				sl, _ := decodeStabilityLevel(opts.Args[4], "metrics")
+				if sl != nil {
+					classes := []metrics.StabilityLevel{metrics.STABLE, metrics.BETA}
+					if ALL_STABILITY_CLASSES {
+						classes = append(classes, metrics.ALPHA)
+					}
+					switch {
+					case contains(*sl, classes):
+						f.stableMetricsFunctionCalls = append(f.stableMetricsFunctionCalls, opts)
+						f.currentFunctionCall = nil
+					default:
+						return nil
+					}
+				}
+
+			} else {
+				f.currentFunctionCall = opts
+			}
+
+		} else {
+			f.currentFunctionCall = opts
+		}
 	case *ast.CompositeLit:
 		se, ok := opts.Type.(*ast.SelectorExpr)
 		if !ok {
diff --git a/test/instrumentation/metric.go b/test/instrumentation/metric.go
index 35abb759a2e..061596bcae9 100644
--- a/test/instrumentation/metric.go
+++ b/test/instrumentation/metric.go
@@ -26,6 +26,7 @@ const (
 	histogramMetricType  = "Histogram"
 	summaryMetricType    = "Summary"
 	timingRatioHistogram = "TimingRatioHistogram"
+	customType           = "Custom"
 )
 
 type metric struct {
diff --git a/test/instrumentation/testdata/pkg/kubelet/metrics/metrics.go b/test/instrumentation/testdata/pkg/kubelet/metrics/metrics.go
index 47da0b3721f..8e87f70030a 100644
--- a/test/instrumentation/testdata/pkg/kubelet/metrics/metrics.go
+++ b/test/instrumentation/testdata/pkg/kubelet/metrics/metrics.go
@@ -20,10 +20,10 @@ import (
 	"sync"
 	"time"
 
+	"k8s.io/apimachinery/pkg/types"
 	"k8s.io/component-base/metrics"
 	"k8s.io/component-base/metrics/legacyregistry"
-
-	"k8s.io/apimachinery/pkg/types"
+	kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics"
 )
 
 // This const block defines the metric names for the kubelet metrics.
@@ -63,6 +63,27 @@ const (
 	RunPodSandboxErrorsKey   = "run_podsandbox_errors_total"
 )
 
+const (
+	// Subsystem names.
+	pvControllerSubsystem = "pv_collector"
+
+	// Metric names.
+	totalPVKey    = "total_pv_count"
+	boundPVKey    = "bound_pv_count"
+	unboundPVKey  = "unbound_pv_count"
+	boundPVCKey   = "bound_pvc_count"
+	unboundPVCKey = "unbound_pvc_count"
+
+	// Label names.
+	namespaceLabel    = "namespace"
+	storageClassLabel = "storage_class"
+	pluginNameLabel   = "plugin_name"
+	volumeModeLabel   = "volume_mode"
+
+	// String to use when plugin name cannot be determined
+	pluginNameNotAvailable = "N/A"
+)
+
 const (
 	requestKind         = "request_kind"
 	priorityLevel       = "priority_level"
@@ -503,6 +524,38 @@ var (
 			StabilityLevel: metrics.BETA,
 		},
 	)
+
+	volumeManagerTotalVolumes = "volume_manager_total_volumes"
+
+	_ = metrics.NewDesc(
+		volumeManagerTotalVolumes,
+		"Number of volumes in Volume Manager",
+		[]string{"plugin_name", "state"},
+		nil,
+		metrics.STABLE, "",
+	)
+
+	_ = metrics.NewDesc(
+		metrics.BuildFQName("test", "beta", "desc"),
+		"Number of volumes in Volume Manager",
+		nil,
+		map[string]string{"alalala": "lalalal"},
+		metrics.BETA, "",
+	)
+	_ = metrics.NewDesc(
+		"test_desc_alpha",
+		"Number of volumes in Volume Manager",
+		[]string{"plugin_name", "state"},
+		map[string]string{"alalala": "lalalal"},
+		metrics.ALPHA, "",
+	)
+
+	_ = metrics.NewDesc(
+		metrics.BuildFQName("", kubeletmetrics.KubeletSubsystem, kubeletmetrics.VolumeStatsCapacityBytesKey),
+		"Capacity in bytes of the volume",
+		[]string{"namespace", "persistentvolumeclaim"}, nil,
+		metrics.BETA, "",
+	)
 )
 
 var registerMetrics sync.Once
diff --git a/test/instrumentation/testdata/test-stable-metrics-list.yaml b/test/instrumentation/testdata/test-stable-metrics-list.yaml
index ea0955f57cf..4b6453d943f 100644
--- a/test/instrumentation/testdata/test-stable-metrics-list.yaml
+++ b/test/instrumentation/testdata/test-stable-metrics-list.yaml
@@ -185,6 +185,13 @@
   - 119.20928955078125
   - 298.0232238769531
   - 745.0580596923828
+- name: kubelet_volume_stats_capacity_bytes
+  help: Capacity in bytes of the volume
+  type: Custom
+  stabilityLevel: BETA
+  labels:
+  - namespace
+  - persistentvolumeclaim
 - name: priority_level_seat_utilization
   subsystem: subsystem
   namespace: namespace
@@ -210,6 +217,12 @@
   - 1
   constLabels:
     phase: executing
+- name: test_beta_desc
+  help: Number of volumes in Volume Manager
+  type: Custom
+  stabilityLevel: BETA
+  constLabels:
+    alalala: lalalal
 - name: label
   subsystem: const
   namespace: test
@@ -281,3 +294,10 @@
   - 2.5
   - 5
   - 10
+- name: volume_manager_total_volumes
+  help: Number of volumes in Volume Manager
+  type: Custom
+  stabilityLevel: STABLE
+  labels:
+  - plugin_name
+  - state