Merge pull request #84987 from RainbowMango/pr_migrate_custom_collector_kubelet_part2

migrate kubelet custom metrics to stability framework part 2
2025-09-04 18:52:38 +00:00 · 2019-11-10 19:44:04 -08:00
parent b3dde20411 159f56bf9b
commit ec86baf00b
8 changed files with 296 additions and 293 deletions
--- a/pkg/kubelet/apis/resourcemetrics/v1alpha1/BUILD
+++ b/pkg/kubelet/apis/resourcemetrics/v1alpha1/BUILD
@@ -8,6 +8,7 @@ go_library(
    deps = [
        "//pkg/kubelet/apis/stats/v1alpha1:go_default_library",
        "//pkg/kubelet/server/stats:go_default_library",
+        "//staging/src/k8s.io/component-base/metrics:go_default_library",
    ],
 )

--- a/pkg/kubelet/apis/resourcemetrics/v1alpha1/config.go
+++ b/pkg/kubelet/apis/resourcemetrics/v1alpha1/config.go
@@ -19,6 +19,7 @@ package v1alpha1
 import (
 	"time"

+	"k8s.io/component-base/metrics"
 	summary "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
 	"k8s.io/kubernetes/pkg/kubelet/server/stats"
 )
@@ -26,55 +27,93 @@ import (
 // Version is the string representation of the version of this configuration
 const Version = "v1alpha1"

+var (
+	nodeCPUUsageDesc = metrics.NewDesc("node_cpu_usage_seconds_total",
+		"Cumulative cpu time consumed by the node in core-seconds",
+		nil,
+		nil,
+		metrics.ALPHA,
+		"")
+
+	nodeMemoryUsageDesc = metrics.NewDesc("node_memory_working_set_bytes",
+		"Current working set of the node in bytes",
+		nil,
+		nil,
+		metrics.ALPHA,
+		"")
+
+	containerCPUUsageDesc = metrics.NewDesc("container_cpu_usage_seconds_total",
+		"Cumulative cpu time consumed by the container in core-seconds",
+		[]string{"container", "pod", "namespace"},
+		nil,
+		metrics.ALPHA,
+		"")
+
+	containerMemoryUsageDesc = metrics.NewDesc("container_memory_working_set_bytes",
+		"Current working set of the container in bytes",
+		[]string{"container", "pod", "namespace"},
+		nil,
+		metrics.ALPHA,
+		"")
+)
+
+// getNodeCPUMetrics returns CPU utilization of a node.
+func getNodeCPUMetrics(s summary.NodeStats) (*float64, time.Time) {
+	if s.CPU == nil {
+		return nil, time.Time{}
+	}
+	v := float64(*s.CPU.UsageCoreNanoSeconds) / float64(time.Second)
+	return &v, s.CPU.Time.Time
+}
+
+// getNodeMemoryMetrics returns memory utilization of a node.
+func getNodeMemoryMetrics(s summary.NodeStats) (*float64, time.Time) {
+	if s.Memory == nil {
+		return nil, time.Time{}
+	}
+	v := float64(*s.Memory.WorkingSetBytes)
+	return &v, s.Memory.Time.Time
+}
+
+// getContainerCPUMetrics returns CPU utilization of a container.
+func getContainerCPUMetrics(s summary.ContainerStats) (*float64, time.Time) {
+	if s.CPU == nil {
+		return nil, time.Time{}
+	}
+	v := float64(*s.CPU.UsageCoreNanoSeconds) / float64(time.Second)
+	return &v, s.CPU.Time.Time
+}
+
+// getContainerMemoryMetrics returns memory utilization of a container.
+func getContainerMemoryMetrics(s summary.ContainerStats) (*float64, time.Time) {
+	if s.Memory == nil {
+		return nil, time.Time{}
+	}
+	v := float64(*s.Memory.WorkingSetBytes)
+	return &v, s.Memory.Time.Time
+}
+
 // Config is the v1alpha1 resource metrics definition
 func Config() stats.ResourceMetricsConfig {
 	return stats.ResourceMetricsConfig{
 		NodeMetrics: []stats.NodeResourceMetric{
 			{
-				Name:        "node_cpu_usage_seconds_total",
-				Description: "Cumulative cpu time consumed by the node in core-seconds",
-				ValueFn: func(s summary.NodeStats) (*float64, time.Time) {
-					if s.CPU == nil {
-						return nil, time.Time{}
-					}
-					v := float64(*s.CPU.UsageCoreNanoSeconds) / float64(time.Second)
-					return &v, s.CPU.Time.Time
-				},
+				Desc:    nodeCPUUsageDesc,
+				ValueFn: getNodeCPUMetrics,
 			},
 			{
-				Name:        "node_memory_working_set_bytes",
-				Description: "Current working set of the node in bytes",
-				ValueFn: func(s summary.NodeStats) (*float64, time.Time) {
-					if s.Memory == nil {
-						return nil, time.Time{}
-					}
-					v := float64(*s.Memory.WorkingSetBytes)
-					return &v, s.Memory.Time.Time
-				},
+				Desc:    nodeMemoryUsageDesc,
+				ValueFn: getNodeMemoryMetrics,
 			},
 		},
 		ContainerMetrics: []stats.ContainerResourceMetric{
 			{
-				Name:        "container_cpu_usage_seconds_total",
-				Description: "Cumulative cpu time consumed by the container in core-seconds",
-				ValueFn: func(s summary.ContainerStats) (*float64, time.Time) {
-					if s.CPU == nil {
-						return nil, time.Time{}
-					}
-					v := float64(*s.CPU.UsageCoreNanoSeconds) / float64(time.Second)
-					return &v, s.CPU.Time.Time
-				},
+				Desc:    containerCPUUsageDesc,
+				ValueFn: getContainerCPUMetrics,
 			},
 			{
-				Name:        "container_memory_working_set_bytes",
-				Description: "Current working set of the container in bytes",
-				ValueFn: func(s summary.ContainerStats) (*float64, time.Time) {
-					if s.Memory == nil {
-						return nil, time.Time{}
-					}
-					v := float64(*s.Memory.WorkingSetBytes)
-					return &v, s.Memory.Time.Time
-				},
+				Desc:    containerMemoryUsageDesc,
+				ValueFn: getContainerMemoryMetrics,
 			},
 		},
 	}
--- a/pkg/kubelet/server/BUILD
+++ b/pkg/kubelet/server/BUILD
@@ -52,7 +52,6 @@ go_library(
        "//vendor/github.com/google/cadvisor/container:go_default_library",
        "//vendor/github.com/google/cadvisor/info/v1:go_default_library",
        "//vendor/github.com/google/cadvisor/metrics:go_default_library",
-        "//vendor/github.com/prometheus/client_golang/prometheus:go_default_library",
        "//vendor/google.golang.org/grpc:go_default_library",
        "//vendor/k8s.io/klog:go_default_library",
    ],
--- a/pkg/kubelet/server/server.go
+++ b/pkg/kubelet/server/server.go
@@ -36,7 +36,6 @@ import (
 	cadvisormetrics "github.com/google/cadvisor/container"
 	cadvisorapi "github.com/google/cadvisor/info/v1"
 	"github.com/google/cadvisor/metrics"
-	"github.com/prometheus/client_golang/prometheus"
 	"google.golang.org/grpc"
 	"k8s.io/klog"

@@ -302,7 +301,7 @@ func (s *Server) InstallDefaultHandlers(enableCAdvisorJSONEndpoints bool) {
 	s.restfulCont.Handle(metricsPath, legacyregistry.Handler())

 	// cAdvisor metrics are exposed under the secured handler as well
-	r := prometheus.NewRegistry()
+	r := compbasemetrics.NewKubeRegistry()

 	includedMetrics := cadvisormetrics.MetricSet{
 		cadvisormetrics.CpuUsageMetrics:         struct{}{},
@@ -315,13 +314,13 @@ func (s *Server) InstallDefaultHandlers(enableCAdvisorJSONEndpoints bool) {
 		cadvisormetrics.AppMetrics:              struct{}{},
 		cadvisormetrics.ProcessMetrics:          struct{}{},
 	}
-	r.MustRegister(metrics.NewPrometheusCollector(prometheusHostAdapter{s.host}, containerPrometheusLabelsFunc(s.host), includedMetrics))
+	r.RawMustRegister(metrics.NewPrometheusCollector(prometheusHostAdapter{s.host}, containerPrometheusLabelsFunc(s.host), includedMetrics))
 	s.restfulCont.Handle(cadvisorMetricsPath,
 		compbasemetrics.HandlerFor(r, compbasemetrics.HandlerOpts{ErrorHandling: compbasemetrics.ContinueOnError}),
 	)

-	v1alpha1ResourceRegistry := prometheus.NewRegistry()
-	v1alpha1ResourceRegistry.MustRegister(stats.NewPrometheusResourceMetricCollector(s.resourceAnalyzer, v1alpha1.Config()))
+	v1alpha1ResourceRegistry := compbasemetrics.NewKubeRegistry()
+	v1alpha1ResourceRegistry.CustomMustRegister(stats.NewPrometheusResourceMetricCollector(s.resourceAnalyzer, v1alpha1.Config()))
 	s.restfulCont.Handle(path.Join(resourceMetricsPathPrefix, v1alpha1.Version),
 		compbasemetrics.HandlerFor(v1alpha1ResourceRegistry, compbasemetrics.HandlerOpts{ErrorHandling: compbasemetrics.ContinueOnError}),
 	)
--- a/pkg/kubelet/server/stats/BUILD
+++ b/pkg/kubelet/server/stats/BUILD
@@ -26,9 +26,9 @@ go_library(
        "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
        "//staging/src/k8s.io/apimachinery/pkg/types:go_default_library",
        "//staging/src/k8s.io/apimachinery/pkg/util/wait:go_default_library",
+        "//staging/src/k8s.io/component-base/metrics:go_default_library",
        "//vendor/github.com/emicklei/go-restful:go_default_library",
        "//vendor/github.com/google/cadvisor/info/v1:go_default_library",
-        "//vendor/github.com/prometheus/client_golang/prometheus:go_default_library",
        "//vendor/k8s.io/klog:go_default_library",
    ],
 )
@@ -49,8 +49,8 @@ go_test(
        "//staging/src/k8s.io/api/core/v1:go_default_library",
        "//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
        "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
-        "//vendor/github.com/prometheus/client_golang/prometheus:go_default_library",
-        "//vendor/github.com/prometheus/client_model/go:go_default_library",
+        "//staging/src/k8s.io/component-base/metrics:go_default_library",
+        "//staging/src/k8s.io/component-base/metrics/testutil:go_default_library",
        "//vendor/github.com/stretchr/testify/assert:go_default_library",
        "//vendor/github.com/stretchr/testify/mock:go_default_library",
    ] + select({
--- a/pkg/kubelet/server/stats/prometheus_resource_metrics.go
+++ b/pkg/kubelet/server/stats/prometheus_resource_metrics.go
@@ -19,32 +19,29 @@ package stats
 import (
 	"time"

+	"k8s.io/component-base/metrics"
 	"k8s.io/klog"
 	stats "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
-
-	"github.com/prometheus/client_golang/prometheus"
 )

 // NodeResourceMetric describes a metric for the node
 type NodeResourceMetric struct {
-	Name        string
-	Description string
-	ValueFn     func(stats.NodeStats) (*float64, time.Time)
+	Desc    *metrics.Desc
+	ValueFn func(stats.NodeStats) (*float64, time.Time)
 }

-func (n *NodeResourceMetric) desc() *prometheus.Desc {
-	return prometheus.NewDesc(n.Name, n.Description, []string{}, nil)
+func (n *NodeResourceMetric) desc() *metrics.Desc {
+	return n.Desc
 }

 // ContainerResourceMetric describes a metric for containers
 type ContainerResourceMetric struct {
-	Name        string
-	Description string
-	ValueFn     func(stats.ContainerStats) (*float64, time.Time)
+	Desc    *metrics.Desc
+	ValueFn func(stats.ContainerStats) (*float64, time.Time)
 }

-func (n *ContainerResourceMetric) desc() *prometheus.Desc {
-	return prometheus.NewDesc(n.Name, n.Description, []string{"container", "pod", "namespace"}, nil)
+func (n *ContainerResourceMetric) desc() *metrics.Desc {
+	return n.Desc
 }

 // ResourceMetricsConfig specifies which metrics to collect and export
@@ -53,29 +50,34 @@ type ResourceMetricsConfig struct {
 	ContainerMetrics []ContainerResourceMetric
 }

-// NewPrometheusResourceMetricCollector returns a prometheus.Collector which exports resource metrics
-func NewPrometheusResourceMetricCollector(provider SummaryProvider, config ResourceMetricsConfig) prometheus.Collector {
+// NewPrometheusResourceMetricCollector returns a metrics.StableCollector which exports resource metrics
+func NewPrometheusResourceMetricCollector(provider SummaryProvider, config ResourceMetricsConfig) metrics.StableCollector {
 	return &resourceMetricCollector{
 		provider: provider,
 		config:   config,
-		errors: prometheus.NewGauge(prometheus.GaugeOpts{
-			Name: "scrape_error",
-			Help: "1 if there was an error while getting container metrics, 0 otherwise",
-		}),
+		errors: metrics.NewDesc("scrape_error",
+			"1 if there was an error while getting container metrics, 0 otherwise",
+			nil,
+			nil,
+			metrics.ALPHA,
+			""),
 	}
 }

 type resourceMetricCollector struct {
+	metrics.BaseStableCollector
+
 	provider SummaryProvider
 	config   ResourceMetricsConfig
-	errors   prometheus.Gauge
+	errors   *metrics.Desc
 }

-var _ prometheus.Collector = &resourceMetricCollector{}
+var _ metrics.StableCollector = &resourceMetricCollector{}
+
+// DescribeWithStability implements metrics.StableCollector
+func (rc *resourceMetricCollector) DescribeWithStability(ch chan<- *metrics.Desc) {
+	ch <- rc.errors

-// Describe implements prometheus.Collector
-func (rc *resourceMetricCollector) Describe(ch chan<- *prometheus.Desc) {
-	rc.errors.Describe(ch)
 	for _, metric := range rc.config.NodeMetrics {
 		ch <- metric.desc()
 	}
@@ -84,24 +86,26 @@ func (rc *resourceMetricCollector) Describe(ch chan<- *prometheus.Desc) {
 	}
 }

-// Collect implements prometheus.Collector
-// Since new containers are frequently created and removed, using the prometheus.Gauge Collector would
+// CollectWithStability implements metrics.StableCollector
+// Since new containers are frequently created and removed, using the Gauge would
 // leak metric collectors for containers or pods that no longer exist.  Instead, implement
-// prometheus.Collector in a way that only collects metrics for active containers.
-func (rc *resourceMetricCollector) Collect(ch chan<- prometheus.Metric) {
-	rc.errors.Set(0)
-	defer rc.errors.Collect(ch)
+// custom collector in a way that only collects metrics for active containers.
+func (rc *resourceMetricCollector) CollectWithStability(ch chan<- metrics.Metric) {
+	var errorCount float64
+	defer func() {
+		ch <- metrics.NewLazyConstMetric(rc.errors, metrics.GaugeValue, errorCount)
+	}()
 	summary, err := rc.provider.GetCPUAndMemoryStats()
 	if err != nil {
-		rc.errors.Set(1)
+		errorCount = 1
 		klog.Warningf("Error getting summary for resourceMetric prometheus endpoint: %v", err)
 		return
 	}

 	for _, metric := range rc.config.NodeMetrics {
 		if value, timestamp := metric.ValueFn(summary.Node); value != nil {
-			ch <- prometheus.NewMetricWithTimestamp(timestamp,
-				prometheus.MustNewConstMetric(metric.desc(), prometheus.GaugeValue, *value))
+			ch <- metrics.NewLazyMetricWithTimestamp(timestamp,
+				metrics.NewLazyConstMetric(metric.desc(), metrics.GaugeValue, *value))
 		}
 	}

@@ -109,8 +113,8 @@ func (rc *resourceMetricCollector) Collect(ch chan<- prometheus.Metric) {
 		for _, container := range pod.Containers {
 			for _, metric := range rc.config.ContainerMetrics {
 				if value, timestamp := metric.ValueFn(container); value != nil {
-					ch <- prometheus.NewMetricWithTimestamp(timestamp,
-						prometheus.MustNewConstMetric(metric.desc(), prometheus.GaugeValue, *value, container.Name, pod.PodRef.Name, pod.PodRef.Namespace))
+					ch <- metrics.NewLazyMetricWithTimestamp(timestamp,
+						metrics.NewLazyConstMetric(metric.desc(), metrics.GaugeValue, *value, container.Name, pod.PodRef.Name, pod.PodRef.Namespace))
 				}
 			}
 		}
--- a/pkg/kubelet/server/stats/prometheus_resource_metrics_test.go
+++ b/pkg/kubelet/server/stats/prometheus_resource_metrics_test.go
@@ -18,28 +18,114 @@ package stats

 import (
 	"fmt"
+	"strings"
 	"testing"
 	"time"

-	"github.com/prometheus/client_golang/prometheus"
-	dto "github.com/prometheus/client_model/go"
-	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/mock"

 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/component-base/metrics"
+	"k8s.io/component-base/metrics/testutil"
 	statsapi "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
+	summary "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
 )

-const (
-	errorName = "scrape_error"
-	errorHelp = "1 if there was an error while getting container metrics, 0 otherwise"
-)
-
+// TODO(RainbowMango): The Desc variables and value functions should be shared with source code.
+// It can not be shared now because there is a import cycle.
+// Consider deprecate endpoint `/resource/v1alpha1` as stability framework could offer guarantee now.
 var (
-	noError  = float64(0)
-	hasError = float64(1)
+	nodeCPUUsageDesc = metrics.NewDesc("node_cpu_usage_seconds_total",
+		"Cumulative cpu time consumed by the node in core-seconds",
+		nil,
+		nil,
+		metrics.ALPHA,
+		"")
+
+	nodeMemoryUsageDesc = metrics.NewDesc("node_memory_working_set_bytes",
+		"Current working set of the node in bytes",
+		nil,
+		nil,
+		metrics.ALPHA,
+		"")
+
+	containerCPUUsageDesc = metrics.NewDesc("container_cpu_usage_seconds_total",
+		"Cumulative cpu time consumed by the container in core-seconds",
+		[]string{"container", "pod", "namespace"},
+		nil,
+		metrics.ALPHA,
+		"")
+
+	containerMemoryUsageDesc = metrics.NewDesc("container_memory_working_set_bytes",
+		"Current working set of the container in bytes",
+		[]string{"container", "pod", "namespace"},
+		nil,
+		metrics.ALPHA,
+		"")
 )

+// getNodeCPUMetrics returns CPU utilization of a node.
+func getNodeCPUMetrics(s summary.NodeStats) (*float64, time.Time) {
+	if s.CPU == nil {
+		return nil, time.Time{}
+	}
+	v := float64(*s.CPU.UsageCoreNanoSeconds) / float64(time.Second)
+	return &v, s.CPU.Time.Time
+}
+
+// getNodeMemoryMetrics returns memory utilization of a node.
+func getNodeMemoryMetrics(s summary.NodeStats) (*float64, time.Time) {
+	if s.Memory == nil {
+		return nil, time.Time{}
+	}
+	v := float64(*s.Memory.WorkingSetBytes)
+	return &v, s.Memory.Time.Time
+}
+
+// getContainerCPUMetrics returns CPU utilization of a container.
+func getContainerCPUMetrics(s summary.ContainerStats) (*float64, time.Time) {
+	if s.CPU == nil {
+		return nil, time.Time{}
+	}
+	v := float64(*s.CPU.UsageCoreNanoSeconds) / float64(time.Second)
+	return &v, s.CPU.Time.Time
+}
+
+// getContainerMemoryMetrics returns memory utilization of a container.
+func getContainerMemoryMetrics(s summary.ContainerStats) (*float64, time.Time) {
+	if s.Memory == nil {
+		return nil, time.Time{}
+	}
+	v := float64(*s.Memory.WorkingSetBytes)
+	return &v, s.Memory.Time.Time
+}
+
+// Config is the v1alpha1 resource metrics definition
+func Config() ResourceMetricsConfig {
+	return ResourceMetricsConfig{
+		NodeMetrics: []NodeResourceMetric{
+			{
+				Desc:    nodeCPUUsageDesc,
+				ValueFn: getNodeCPUMetrics,
+			},
+			{
+				Desc:    nodeMemoryUsageDesc,
+				ValueFn: getNodeMemoryMetrics,
+			},
+		},
+		ContainerMetrics: []ContainerResourceMetric{
+			{
+				Desc:    containerCPUUsageDesc,
+				ValueFn: getContainerCPUMetrics,
+			},
+			{
+				Desc:    containerMemoryUsageDesc,
+				ValueFn: getContainerMemoryMetrics,
+			},
+		},
+	}
+}
+
 type mockSummaryProvider struct {
 	mock.Mock
 }
@@ -54,60 +140,32 @@ func (m *mockSummaryProvider) GetCPUAndMemoryStats() (*statsapi.Summary, error)
 	return args.Get(0).(*statsapi.Summary), args.Error(1)
 }

-type collectResult struct {
-	desc   *prometheus.Desc
-	metric *dto.Metric
-}
-
 func TestCollectResourceMetrics(t *testing.T) {
-	testTime := metav1.Now()
-	for _, tc := range []struct {
-		description     string
-		config          ResourceMetricsConfig
-		summary         *statsapi.Summary
-		summaryErr      error
-		expectedMetrics []collectResult
+	testTime := metav1.NewTime(time.Unix(2, 0)) // a static timestamp: 2000
+
+	tests := []struct {
+		name                 string
+		config               ResourceMetricsConfig
+		summary              *statsapi.Summary
+		summaryErr           error
+		expectedMetricsNames []string
+		expectedMetrics      string
 	}{
 		{
-			description: "error getting summary",
-			config:      ResourceMetricsConfig{},
-			summary:     nil,
-			summaryErr:  fmt.Errorf("failed to get summary"),
-			expectedMetrics: []collectResult{
-				{
-					desc:   prometheus.NewDesc(errorName, errorHelp, []string{}, nil),
-					metric: &dto.Metric{Gauge: &dto.Gauge{Value: &hasError}},
-				},
-			},
+			name:                 "error getting summary",
+			config:               Config(),
+			summary:              nil,
+			summaryErr:           fmt.Errorf("failed to get summary"),
+			expectedMetricsNames: []string{"scrape_error"},
+			expectedMetrics: `
+				# HELP scrape_error [ALPHA] 1 if there was an error while getting container metrics, 0 otherwise
+				# TYPE scrape_error gauge
+				scrape_error 1
+			`,
 		},
 		{
-			description: "arbitrary node metrics",
-			config: ResourceMetricsConfig{
-				NodeMetrics: []NodeResourceMetric{
-					{
-						Name:        "node_foo",
-						Description: "a metric from nodestats",
-						ValueFn: func(s statsapi.NodeStats) (*float64, time.Time) {
-							if s.CPU == nil {
-								return nil, time.Time{}
-							}
-							v := float64(*s.CPU.UsageCoreNanoSeconds) / float64(time.Second)
-							return &v, s.CPU.Time.Time
-						},
-					},
-					{
-						Name:        "node_bar",
-						Description: "another metric from nodestats",
-						ValueFn: func(s statsapi.NodeStats) (*float64, time.Time) {
-							if s.Memory == nil {
-								return nil, time.Time{}
-							}
-							v := float64(*s.Memory.WorkingSetBytes)
-							return &v, s.Memory.Time.Time
-						},
-					},
-				},
-			},
+			name:   "arbitrary node metrics",
+			config: Config(),
 			summary: &statsapi.Summary{
 				Node: statsapi.NodeStats{
 					CPU: &statsapi.CPUStats{
@@ -121,49 +179,26 @@ func TestCollectResourceMetrics(t *testing.T) {
 				},
 			},
 			summaryErr: nil,
-			expectedMetrics: []collectResult{
-				{
-					desc:   prometheus.NewDesc("node_foo", "a metric from nodestats", []string{}, nil),
-					metric: &dto.Metric{Gauge: &dto.Gauge{Value: float64Ptr(10)}},
-				},
-				{
-					desc:   prometheus.NewDesc("node_bar", "another metric from nodestats", []string{}, nil),
-					metric: &dto.Metric{Gauge: &dto.Gauge{Value: float64Ptr(1000)}},
-				},
-				{
-					desc:   prometheus.NewDesc(errorName, errorHelp, []string{}, nil),
-					metric: &dto.Metric{Gauge: &dto.Gauge{Value: &noError}},
-				},
+			expectedMetricsNames: []string{
+				"node_cpu_usage_seconds_total",
+				"node_memory_working_set_bytes",
+				"scrape_error",
 			},
+			expectedMetrics: `
+				# HELP node_cpu_usage_seconds_total [ALPHA] Cumulative cpu time consumed by the node in core-seconds
+				# TYPE node_cpu_usage_seconds_total gauge
+				node_cpu_usage_seconds_total 10 2000
+				# HELP node_memory_working_set_bytes [ALPHA] Current working set of the node in bytes
+				# TYPE node_memory_working_set_bytes gauge
+				node_memory_working_set_bytes 1000 2000
+				# HELP scrape_error [ALPHA] 1 if there was an error while getting container metrics, 0 otherwise
+				# TYPE scrape_error gauge
+				scrape_error 0
+			`,
 		},
 		{
-			description: "arbitrary container metrics for different container, pods and namespaces",
-			config: ResourceMetricsConfig{
-				ContainerMetrics: []ContainerResourceMetric{
-					{
-						Name:        "container_foo",
-						Description: "a metric from container stats",
-						ValueFn: func(s statsapi.ContainerStats) (*float64, time.Time) {
-							if s.CPU == nil {
-								return nil, time.Time{}
-							}
-							v := float64(*s.CPU.UsageCoreNanoSeconds) / float64(time.Second)
-							return &v, s.CPU.Time.Time
-						},
-					},
-					{
-						Name:        "container_bar",
-						Description: "another metric from container stats",
-						ValueFn: func(s statsapi.ContainerStats) (*float64, time.Time) {
-							if s.Memory == nil {
-								return nil, time.Time{}
-							}
-							v := float64(*s.Memory.WorkingSetBytes)
-							return &v, s.Memory.Time.Time
-						},
-					},
-				},
-			},
+			name:   "arbitrary container metrics for different container, pods and namespaces",
+			config: Config(),
 			summary: &statsapi.Summary{
 				Pods: []statsapi.PodStats{
 					{
@@ -218,131 +253,43 @@ func TestCollectResourceMetrics(t *testing.T) {
 				},
 			},
 			summaryErr: nil,
-			expectedMetrics: []collectResult{
-				{
-					desc: prometheus.NewDesc("container_foo", "a metric from container stats", []string{"container", "pod", "namespace"}, nil),
-					metric: &dto.Metric{
-						Gauge: &dto.Gauge{Value: float64Ptr(10)},
-						Label: []*dto.LabelPair{
-							{Name: stringPtr("container"), Value: stringPtr("container_a")},
-							{Name: stringPtr("namespace"), Value: stringPtr("namespace_a")},
-							{Name: stringPtr("pod"), Value: stringPtr("pod_a")},
-						},
-					},
-				},
-				{
-					desc: prometheus.NewDesc("container_bar", "another metric from container stats", []string{"container", "pod", "namespace"}, nil),
-					metric: &dto.Metric{
-						Gauge: &dto.Gauge{Value: float64Ptr(1000)},
-						Label: []*dto.LabelPair{
-							{Name: stringPtr("container"), Value: stringPtr("container_a")},
-							{Name: stringPtr("namespace"), Value: stringPtr("namespace_a")},
-							{Name: stringPtr("pod"), Value: stringPtr("pod_a")},
-						},
-					},
-				},
-				{
-					desc: prometheus.NewDesc("container_foo", "a metric from container stats", []string{"container", "pod", "namespace"}, nil),
-					metric: &dto.Metric{
-						Gauge: &dto.Gauge{Value: float64Ptr(10)},
-						Label: []*dto.LabelPair{
-							{Name: stringPtr("container"), Value: stringPtr("container_b")},
-							{Name: stringPtr("namespace"), Value: stringPtr("namespace_a")},
-							{Name: stringPtr("pod"), Value: stringPtr("pod_a")},
-						},
-					},
-				},
-				{
-					desc: prometheus.NewDesc("container_bar", "another metric from container stats", []string{"container", "pod", "namespace"}, nil),
-					metric: &dto.Metric{
-						Gauge: &dto.Gauge{Value: float64Ptr(1000)},
-						Label: []*dto.LabelPair{
-							{Name: stringPtr("container"), Value: stringPtr("container_b")},
-							{Name: stringPtr("namespace"), Value: stringPtr("namespace_a")},
-							{Name: stringPtr("pod"), Value: stringPtr("pod_a")},
-						},
-					},
-				},
-				{
-					desc: prometheus.NewDesc("container_foo", "a metric from container stats", []string{"container", "pod", "namespace"}, nil),
-					metric: &dto.Metric{
-						Gauge: &dto.Gauge{Value: float64Ptr(10)},
-						Label: []*dto.LabelPair{
-							{Name: stringPtr("container"), Value: stringPtr("container_a")},
-							{Name: stringPtr("namespace"), Value: stringPtr("namespace_b")},
-							{Name: stringPtr("pod"), Value: stringPtr("pod_b")},
-						},
-					},
-				},
-				{
-					desc: prometheus.NewDesc("container_bar", "another metric from container stats", []string{"container", "pod", "namespace"}, nil),
-					metric: &dto.Metric{
-						Gauge: &dto.Gauge{Value: float64Ptr(1000)},
-						Label: []*dto.LabelPair{
-							{Name: stringPtr("container"), Value: stringPtr("container_a")},
-							{Name: stringPtr("namespace"), Value: stringPtr("namespace_b")},
-							{Name: stringPtr("pod"), Value: stringPtr("pod_b")},
-						},
-					},
-				},
-				{
-					desc:   prometheus.NewDesc(errorName, errorHelp, []string{}, nil),
-					metric: &dto.Metric{Gauge: &dto.Gauge{Value: &noError}},
-				},
+			expectedMetricsNames: []string{
+				"container_cpu_usage_seconds_total",
+				"container_memory_working_set_bytes",
+				"scrape_error",
 			},
+			expectedMetrics: `
+				# HELP scrape_error [ALPHA] 1 if there was an error while getting container metrics, 0 otherwise
+				# TYPE scrape_error gauge
+				scrape_error 0
+				# HELP container_cpu_usage_seconds_total [ALPHA] Cumulative cpu time consumed by the container in core-seconds
+				# TYPE container_cpu_usage_seconds_total gauge
+				container_cpu_usage_seconds_total{container="container_a",namespace="namespace_a",pod="pod_a"} 10 2000
+				container_cpu_usage_seconds_total{container="container_a",namespace="namespace_b",pod="pod_b"} 10 2000
+				container_cpu_usage_seconds_total{container="container_b",namespace="namespace_a",pod="pod_a"} 10 2000
+				# HELP container_memory_working_set_bytes [ALPHA] Current working set of the container in bytes
+				# TYPE container_memory_working_set_bytes gauge
+				container_memory_working_set_bytes{container="container_a",namespace="namespace_a",pod="pod_a"} 1000 2000
+				container_memory_working_set_bytes{container="container_a",namespace="namespace_b",pod="pod_b"} 1000 2000
+				container_memory_working_set_bytes{container="container_b",namespace="namespace_a",pod="pod_a"} 1000 2000
+			`,
 		},
-	} {
-		t.Run(tc.description, func(t *testing.T) {
+	}
+
+	for _, test := range tests {
+		tc := test
+		t.Run(tc.name, func(t *testing.T) {
 			provider := &mockSummaryProvider{}
 			provider.On("GetCPUAndMemoryStats").Return(tc.summary, tc.summaryErr)
 			collector := NewPrometheusResourceMetricCollector(provider, tc.config)
-			metrics := collectMetrics(t, collector, len(tc.expectedMetrics))
-			for i := range metrics {
-				assertEqual(t, metrics[i], tc.expectedMetrics[i])
+
+			if err := testutil.CustomCollectAndCompare(collector, strings.NewReader(tc.expectedMetrics), tc.expectedMetricsNames...); err != nil {
+				t.Fatal(err)
 			}
 		})
 	}
 }

-// collectMetrics is a wrapper around a prometheus.Collector which returns the metrics added to the metric channel as a slice.metric
-// It will block indefinitely if the collector does not collect exactly numMetrics.
-func collectMetrics(t *testing.T, collector prometheus.Collector, numMetrics int) (results []collectResult) {
-	metricsCh := make(chan prometheus.Metric)
-	done := make(chan struct{})
-	go func() {
-		collector.Collect(metricsCh)
-		done <- struct{}{}
-	}()
-	for i := 0; i < numMetrics; i++ {
-		metric := <-metricsCh
-		metricProto := &dto.Metric{}
-		assert.NoError(t, metric.Write(metricProto))
-		results = append(results, collectResult{desc: metric.Desc(), metric: metricProto})
-	}
-	<-done
-	return
-}
-
-// assertEqual asserts for semanitic equality for fields we care about
-func assertEqual(t *testing.T, expected, actual collectResult) {
-	assert.Equal(t, expected.desc.String(), actual.desc.String())
-	assert.Equal(t, *expected.metric.Gauge.Value, *actual.metric.Gauge.Value, "for desc: %v", expected.desc.String())
-	assert.Equal(t, len(expected.metric.Label), len(actual.metric.Label))
-	if len(expected.metric.Label) == len(actual.metric.Label) {
-		for i := range expected.metric.Label {
-			assert.Equal(t, *expected.metric.Label[i], *actual.metric.Label[i], "for desc: %v", expected.desc.String())
-		}
-	}
-}
-
-func stringPtr(s string) *string {
-	return &s
-}
-
 func uint64Ptr(u uint64) *uint64 {
 	return &u
 }
-
-func float64Ptr(f float64) *float64 {
-	return &f
-}
--- a/staging/src/k8s.io/component-base/metrics/value.go
+++ b/staging/src/k8s.io/component-base/metrics/value.go
@@ -17,6 +17,8 @@ limitations under the License.
 package metrics

 import (
+	"time"
+
 	"github.com/prometheus/client_golang/prometheus"
 )

@@ -44,3 +46,15 @@ func NewLazyConstMetric(desc *Desc, valueType ValueType, value float64, labelVal
 	}
 	return prometheus.MustNewConstMetric(desc.toPrometheusDesc(), valueType.toPromValueType(), value, labelValues...)
 }
+
+// NewLazyMetricWithTimestamp is a helper of NewMetricWithTimestamp.
+//
+// Warning: the Metric 'm' must be the one created by NewLazyConstMetric(),
+//          otherwise, no stability guarantees would be offered.
+func NewLazyMetricWithTimestamp(t time.Time, m Metric) Metric {
+	if m == nil {
+		return nil
+	}
+
+	return prometheus.NewMetricWithTimestamp(t, m)
+}