Add support for summary metrics for stablity checks

Also add entrypoints for verifying and updating a test file for easier debugging. This is considerably faster than running the stablity checks against the entire Kubernetes codebase. Change-Id: I5d5e5b3abf396ebf1317a44130f20771a09afb7f
2025-07-21 10:51:29 +00:00 · 2022-07-25 17:00:28 -07:00 · 2022-07-25 17:00:28 -07:00 · e452850ec3
commit e452850ec3
parent 3ffdfbe286
10 changed files with 340 additions and 52 deletions
--- a/staging/src/k8s.io/component-base/metrics/summary.go
+++ b/staging/src/k8s.io/component-base/metrics/summary.go
@ -23,6 +23,12 @@ import (
 	"github.com/prometheus/client_golang/prometheus"
 )

+const (
+	DefAgeBuckets = prometheus.DefAgeBuckets
+	DefBufCap     = prometheus.DefBufCap
+	DefMaxAge     = prometheus.DefMaxAge
+)
+
 // Summary is our internal representation for our wrapping struct around prometheus
 // summaries. Summary implements both kubeCollector and ObserverMetric
 //
--- a/test/instrumentation/decode_metric.go
+++ b/test/instrumentation/decode_metric.go
@ -66,12 +66,10 @@ func (c *metricDecoder) decodeNewMetricCall(fc *ast.CallExpr) (metric, error) {
 		return m, newDecodeErrorf(fc, errNotDirectCall)
 	}
 	switch functionName {
-	case "NewCounter", "NewGauge", "NewHistogram":
+	case "NewCounter", "NewGauge", "NewHistogram", "NewSummary":
 		m, err = c.decodeMetric(fc)
-	case "NewCounterVec", "NewGaugeVec", "NewHistogramVec":
+	case "NewCounterVec", "NewGaugeVec", "NewHistogramVec", "NewSummaryVec":
 		m, err = c.decodeMetricVec(fc)
-	case "NewSummary", "NewSummaryVec":
-		return m, newDecodeErrorf(fc, errStableSummary)
 	default:
 		return m, newDecodeErrorf(fc, errNotDirectCall)
 	}
@ -90,6 +88,8 @@ func getMetricType(functionName string) string {
 		return gaugeMetricType
 	case "NewHistogram", "NewHistogramVec":
 		return histogramMetricType
+	case "NewSummary", "NewSummaryVec":
+		return summaryMetricType
 	default:
 		panic("getMetricType expects correct function name")
 	}
@ -229,6 +229,33 @@ func (c *metricDecoder) decodeOpts(expr ast.Expr) (metric, error) {
 				return m, err
 			}
 			m.StabilityLevel = string(*level)
+		case "AgeBuckets", "BufCap":
+			uintVal, err := c.decodeUint32(kv.Value)
+			if err != nil {
+				print(key)
+				return m, err
+			}
+			if key == "AgeBuckets" {
+				m.AgeBuckets = uintVal
+			}
+			if key == "BufCap" {
+				m.BufCap = uintVal
+			}
+
+		case "Objectives":
+			obj, err := c.decodeObjectives(kv.Value)
+			if err != nil {
+				print(key)
+				return m, err
+			}
+			m.Objectives = obj
+		case "MaxAge":
+			int64Val, err := c.decodeInt64(kv.Value)
+			if err != nil {
+				print(key)
+				return m, err
+			}
+			m.MaxAge = int64Val
 		default:
 			return m, newDecodeErrorf(expr, errFieldNotSupported, key)
 		}
@ -280,6 +307,114 @@ func (c *metricDecoder) decodeBuckets(expr ast.Expr) ([]float64, error) {
 	return nil, newDecodeErrorf(expr, errBuckets)
 }

+func (c *metricDecoder) decodeObjectives(expr ast.Expr) (map[float64]float64, error) {
+	switch v := expr.(type) {
+	case *ast.CompositeLit:
+		return decodeFloatMap(v.Elts)
+	case *ast.Ident:
+		variableExpr, found := c.variables[v.Name]
+		if !found {
+			return nil, newDecodeErrorf(expr, errBadVariableAttribute)
+		}
+		return decodeFloatMap(variableExpr.(*ast.CompositeLit).Elts)
+	}
+	return nil, newDecodeErrorf(expr, errObjectives)
+}
+
+func (c *metricDecoder) decodeUint32(expr ast.Expr) (uint32, error) {
+	switch v := expr.(type) {
+	case *ast.BasicLit:
+		if v.Kind != token.FLOAT && v.Kind != token.INT {
+			print(v.Kind)
+		}
+		value, err := strconv.ParseUint(v.Value, 10, 32)
+		if err != nil {
+			return 0, err
+		}
+		return uint32(value), nil
+	case *ast.SelectorExpr:
+		variableName := v.Sel.String()
+		importName, ok := v.X.(*ast.Ident)
+		if ok && importName.String() == c.kubeMetricsImportName {
+			if variableName == "DefAgeBuckets" {
+				// hardcode this for now
+				return 5, nil
+			}
+			if variableName == "DefBufCap" {
+				// hardcode this for now
+				return 500, nil
+			}
+		}
+	case *ast.CallExpr:
+		_, ok := v.Fun.(*ast.SelectorExpr)
+		if !ok {
+			return 0, newDecodeErrorf(v, errDecodeUint32)
+		}
+		return 0, nil
+	}
+	return 0, newDecodeErrorf(expr, errDecodeUint32)
+}
+
+func (c *metricDecoder) decodeInt64(expr ast.Expr) (int64, error) {
+	switch v := expr.(type) {
+	case *ast.BasicLit:
+		if v.Kind != token.FLOAT && v.Kind != token.INT {
+			print(v.Kind)
+		}
+
+		value, err := strconv.ParseInt(v.Value, 10, 64)
+		if err != nil {
+			return 0, err
+		}
+		return value, nil
+	case *ast.SelectorExpr:
+		variableName := v.Sel.String()
+		importName, ok := v.X.(*ast.Ident)
+		if ok && importName.String() == c.kubeMetricsImportName {
+			if variableName == "DefMaxAge" {
+				// hardcode this for now. This is a duration but we'll output it as
+				// an int64 representing nanoseconds.
+				return 1000 * 1000 * 1000 * 60 * 10, nil
+			}
+		}
+	case *ast.CallExpr:
+		_, ok := v.Fun.(*ast.SelectorExpr)
+		if !ok {
+			return 0, newDecodeErrorf(v, errDecodeInt64)
+		}
+		return 0, nil
+	}
+	return 0, newDecodeErrorf(expr, errDecodeInt64)
+}
+
+func decodeFloatMap(exprs []ast.Expr) (map[float64]float64, error) {
+	buckets := map[float64]float64{}
+	for _, elt := range exprs {
+		bl, ok := elt.(*ast.KeyValueExpr)
+		if !ok {
+			return nil, newDecodeErrorf(bl, errObjectives)
+		}
+		keyExpr, ok := bl.Key.(*ast.BasicLit)
+		if !ok {
+			return nil, newDecodeErrorf(bl, errObjectives)
+		}
+		valueExpr, ok := bl.Value.(*ast.BasicLit)
+		if !ok {
+			return nil, newDecodeErrorf(bl, errObjectives)
+		}
+		valueForKey, err := strconv.ParseFloat(keyExpr.Value, 64)
+		if err != nil {
+			return nil, newDecodeErrorf(bl, errObjectives)
+		}
+		valueForValue, err := strconv.ParseFloat(valueExpr.Value, 64)
+		if err != nil {
+			return nil, newDecodeErrorf(bl, errObjectives)
+		}
+		buckets[valueForKey] = valueForValue
+	}
+	return buckets, nil
+}
+
 func decodeListOfFloats(exprs []ast.Expr) ([]float64, error) {
 	buckets := make([]float64, len(exprs))
 	for i, elt := range exprs {
--- a/test/instrumentation/error.go
+++ b/test/instrumentation/error.go
@ -33,9 +33,13 @@ const (
 	errBadImportedVariableAttribute = "Metric attribute was not correctly set. Please use only global consts in correctly impoprted same file"
 	errFieldNotSupported            = "Field %s is not supported"
 	errBuckets                      = "Buckets should be set to list of floats, result from function call of prometheus.LinearBuckets or prometheus.ExponentialBuckets"
-	errLabels                       = "Labels were not set to list of strings"
-	errImport                       = `Importing using "." is not supported`
-	errExprNotIdent                 = "expr selector does not refer to type ast.Ident, is type %s"
+	errObjectives                   = "Buckets should be set to map of floats to floats"
+	errDecodeUint32                 = "Should decode to uint32"
+	errDecodeInt64                  = "Should decode to int64"
+
+	errLabels       = "Labels were not set to list of strings"
+	errImport       = `Importing using "." is not supported`
+	errExprNotIdent = "expr selector does not refer to type ast.Ident, is type %s"
 )

 type decodeError struct {
--- a/test/instrumentation/main_test.go
+++ b/test/instrumentation/main_test.go
@ -539,18 +539,6 @@ func TestIncorrectStableMetricDeclarations(t *testing.T) {
 		src      string
 		err      error
 	}{
-		{
-			testName: "Fail on stable summary metric (Summary is DEPRECATED)",
-			err:      fmt.Errorf("testdata/metric.go:4:9: Stable summary metric is not supported"),
-			src: `
-package test
-import "k8s.io/component-base/metrics"
-var _ = metrics.NewSummary(
-		&metrics.SummaryOpts{
-			StabilityLevel: metrics.STABLE,
-		},
-	)
-`},
 		{
 			testName: "Fail on stable metric with attribute set to unknown variable",
 			err:      fmt.Errorf("testdata/metric.go:6:4: Metric attribute was not correctly set. Please use only global consts in same file"),
--- a/test/instrumentation/metric.go
+++ b/test/instrumentation/metric.go
@ -24,18 +24,23 @@ const (
 	counterMetricType   = "Counter"
 	gaugeMetricType     = "Gauge"
 	histogramMetricType = "Histogram"
+	summaryMetricType   = "Summary"
 )

 type metric struct {
-	Name              string    `yaml:"name"`
-	Subsystem         string    `yaml:"subsystem,omitempty"`
-	Namespace         string    `yaml:"namespace,omitempty"`
-	Help              string    `yaml:"help,omitempty"`
-	Type              string    `yaml:"type,omitempty"`
-	DeprecatedVersion string    `yaml:"deprecatedVersion,omitempty"`
-	StabilityLevel    string    `yaml:"stabilityLevel,omitempty"`
-	Labels            []string  `yaml:"labels,omitempty"`
-	Buckets           []float64 `yaml:"buckets,omitempty"`
+	Name              string              `yaml:"name"`
+	Subsystem         string              `yaml:"subsystem,omitempty"`
+	Namespace         string              `yaml:"namespace,omitempty"`
+	Help              string              `yaml:"help,omitempty"`
+	Type              string              `yaml:"type,omitempty"`
+	DeprecatedVersion string              `yaml:"deprecatedVersion,omitempty"`
+	StabilityLevel    string              `yaml:"stabilityLevel,omitempty"`
+	Labels            []string            `yaml:"labels,omitempty"`
+	Buckets           []float64           `yaml:"buckets,omitempty"`
+	Objectives        map[float64]float64 `yaml:"objectives,omitempty"`
+	AgeBuckets        uint32              `yaml:"ageBuckets,omitempty"`
+	BufCap            uint32              `yaml:"bufCap,omitempty"`
+	MaxAge            int64               `yaml:"maxAge,omitempty"`
 }

 func (m metric) buildFQName() string {
--- a/test/instrumentation/stability-utils.sh
+++ b/test/instrumentation/stability-utils.sh
@ -41,12 +41,17 @@ find_files_to_check() {
        -o -wholename '*/vendor/*' \
        -o -wholename '*/hack/*' \
        -o -wholename '*_test.go' \
+        -o -wholename './test/instrumentation/*.go' \
        \) -prune \
    \) \
    \( -wholename '**/*.go' \
  \)
 }

+find_test_files() {
+  find './test/instrumentation' -wholename '**/*.go'
+}
+
 red=$(tput setaf 1)
 green=$(tput setaf 2)
 reset=$(tput sgr0)
@ -69,6 +74,24 @@ kube::validate::stablemetrics() {
  exit 1
 }

+kube::validate::test::stablemetrics() {
+  stability_check_setup
+  temp_file=$(mktemp)
+  doValidate=$(find_test_files | grep -E ".*.go" | grep -v ".*_test.go" | grep -v ".git" | sort | KUBE_ROOT=${KUBE_ROOT} xargs -L 200 go run "test/instrumentation/main.go" "test/instrumentation/decode_metric.go" "test/instrumentation/find_stable_metric.go" "test/instrumentation/error.go" "test/instrumentation/metric.go" -- 1>"${temp_file}")
+
+  if $doValidate; then
+    echo -e "${green}Diffing test/instrumentation/testdata/test-stable-metrics-list.yaml\n${reset}"
+    if diff -u "$KUBE_ROOT/test/instrumentation/testdata/test-stable-metrics-list.yaml" "$temp_file"; then
+      echo -e "${green}\nPASS metrics stability verification ${reset}"
+      return 0
+    fi
+  fi
+
+  echo "${red}!!! Metrics stability static analysis test has failed!${reset}" >&2
+  echo "${red}!!! Please run './test/instrumentation/test-update.sh' to update the golden list.${reset}" >&2
+  exit 1
+}
+
 kube::update::stablemetrics() {
  stability_check_setup
  temp_file=$(mktemp)
@ -82,3 +105,16 @@ kube::update::stablemetrics() {
  echo "${green}Updated golden list of stable metrics.${reset}"
 }

+kube::update::test::stablemetrics() {
+  stability_check_setup
+  temp_file=$(mktemp)
+  doCheckStability=$(find_test_files | grep -E ".*.go" | grep -v ".*_test.go" | sort | KUBE_ROOT=${KUBE_ROOT} xargs -L 200 go run "test/instrumentation/main.go" "test/instrumentation/decode_metric.go" "test/instrumentation/find_stable_metric.go" "test/instrumentation/error.go" "test/instrumentation/metric.go" -- 1>"${temp_file}")
+
+  if ! $doCheckStability; then
+    echo "${red}!!! updating golden list of test metrics has failed! ${reset}" >&2
+    exit 1
+  fi
+  mv -f "$temp_file" "${KUBE_ROOT}/test/instrumentation/testdata/test-stable-metrics-list.yaml"
+  echo "${green}Updated test list of stable metrics.${reset}"
+}
+
--- a/test/instrumentation/test-update.sh
+++ b/test/instrumentation/test-update.sh
@ -0,0 +1,28 @@
+#!/usr/bin/env bash
+
+# Copyright 2022 The Kubernetes Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This script runs to ensure that we do not violate metric stability
+# policies.
+# Usage: `test/instrumentation/test-update.sh`.
+
+set -o errexit
+set -o nounset
+set -o pipefail
+
+KUBE_ROOT=$(dirname "${BASH_SOURCE[0]}")/../..
+source "${KUBE_ROOT}/test/instrumentation/stability-utils.sh"
+
+kube::update::test::stablemetrics
--- a/test/instrumentation/test-verify.sh
+++ b/test/instrumentation/test-verify.sh
@ -0,0 +1,28 @@
+#!/usr/bin/env bash
+
+# Copyright 2022 The Kubernetes Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This script runs to ensure that we do not violate metric stability
+# policies.
+# Usage: `test/instrumentation/test-verify.sh`.
+
+set -o errexit
+set -o nounset
+set -o pipefail
+
+KUBE_ROOT=$(dirname "${BASH_SOURCE[0]}")/../..
+source "${KUBE_ROOT}/test/instrumentation/stability-utils.sh"
+
+kube::validate::test::stablemetrics
--- a/test/instrumentation/testdata/pkg/kubelet/metrics/metrics.go
+++ b/test/instrumentation/testdata/pkg/kubelet/metrics/metrics.go
@ -28,28 +28,22 @@ import (

 // This const block defines the metric names for the kubelet metrics.
 const (
-	KubeletSubsystem             = "kubelet"
-	NodeNameKey                  = "node_name"
-	NodeLabelKey                 = "node"
-	PodWorkerDurationKey         = "pod_worker_duration_seconds"
-	PodStartDurationKey          = "pod_start_duration_seconds"
-	CgroupManagerOperationsKey   = "cgroup_manager_duration_seconds"
-	PodWorkerStartDurationKey    = "pod_worker_start_duration_seconds"
-	PLEGRelistDurationKey        = "pleg_relist_duration_seconds"
-	PLEGDiscardEventsKey         = "pleg_discard_events"
-	PLEGRelistIntervalKey        = "pleg_relist_interval_seconds"
-	PLEGLastSeenKey              = "pleg_last_seen_seconds"
-	EvictionsKey                 = "evictions"
-	EvictionStatsAgeKey          = "eviction_stats_age_seconds"
-	PreemptionsKey               = "preemptions"
-	VolumeStatsCapacityBytesKey  = "volume_stats_capacity_bytes"
-	VolumeStatsAvailableBytesKey = "volume_stats_available_bytes"
-	VolumeStatsUsedBytesKey      = "volume_stats_used_bytes"
-	VolumeStatsInodesKey         = "volume_stats_inodes"
-	VolumeStatsInodesFreeKey     = "volume_stats_inodes_free"
-	VolumeStatsInodesUsedKey     = "volume_stats_inodes_used"
-	RunningPodsKey               = "running_pods"
-	RunningContainersKey         = "running_containers"
+	KubeletSubsystem           = "kubelet"
+	NodeNameKey                = "node_name"
+	NodeLabelKey               = "node"
+	PodWorkerDurationKey       = "pod_worker_duration_seconds"
+	PodStartDurationKey        = "pod_start_duration_seconds"
+	CgroupManagerOperationsKey = "cgroup_manager_duration_seconds"
+	PodWorkerStartDurationKey  = "pod_worker_start_duration_seconds"
+	PLEGRelistDurationKey      = "pleg_relist_duration_seconds"
+	PLEGDiscardEventsKey       = "pleg_discard_events"
+	PLEGRelistIntervalKey      = "pleg_relist_interval_seconds"
+	PLEGLastSeenKey            = "pleg_last_seen_seconds"
+	EvictionsKey               = "evictions"
+	EvictionStatsAgeKey        = "eviction_stats_age_seconds"
+	PreemptionsKey             = "preemptions"
+	RunningPodsKey             = "running_pods"
+	RunningContainersKey       = "running_containers"
 	// Metrics keys of remote runtime operations
 	RuntimeOperationsKey         = "runtime_operations_total"
 	RuntimeOperationsDurationKey = "runtime_operations_duration_seconds"
@ -70,6 +64,7 @@ const (
 )

 var (
+	defObjectives = map[float64]float64{0.5: 0.5, 0.75: 0.75}
 	// NodeName is a Gauge that tracks the ode's name. The count is always 1.
 	NodeName = metrics.NewGaugeVec(
 		&metrics.GaugeOpts{
@ -160,10 +155,10 @@ var (
 	PLEGRelistInterval = metrics.NewHistogram(
 		&metrics.HistogramOpts{
 			Subsystem:      KubeletSubsystem,
-			Name:           PLEGRelistIntervalKey,
+			Name:           "test_histogram_metric",
 			Help:           "Interval in seconds between relisting in PLEG.",
 			Buckets:        metrics.DefBuckets,
-			StabilityLevel: metrics.ALPHA,
+			StabilityLevel: metrics.STABLE,
 		},
 	)
 	// PLEGLastSeen is a Gauge giving the Unix timestamp when the Kubelet's
@ -269,6 +264,32 @@ var (
 		[]string{"resource_name"},
 	)

+	// TestSummary is a Summary that tracks the cumulative number of device plugin registrations.
+	// Broken down by resource name.
+	TestSummary = metrics.NewSummary(
+		&metrics.SummaryOpts{
+			Subsystem:      KubeletSubsystem,
+			Name:           "summary_metric_test",
+			Help:           "Cumulative number of device plugin registrations. Broken down by resource name.",
+			StabilityLevel: metrics.STABLE,
+		},
+	)
+	// TestSummaryVec is a NewSummaryVec that tracks the duration (in seconds) to serve a device plugin allocation request.
+	// Broken down by resource name.
+	TestSummaryVec = metrics.NewSummaryVec(
+		&metrics.SummaryOpts{
+			Subsystem:      KubeletSubsystem,
+			Name:           "summary_vec_metric_test",
+			Help:           "Duration in seconds to serve a device plugin Allocation request. Broken down by resource name.",
+			Objectives:     defObjectives,
+			MaxAge:         metrics.DefMaxAge,
+			AgeBuckets:     metrics.DefAgeBuckets,
+			BufCap:         metrics.DefBufCap,
+			StabilityLevel: metrics.STABLE,
+		},
+		[]string{"resource_name"},
+	)
+
 	// PodResourcesEndpointRequestsTotalCount is a Counter that tracks the cumulative number of requests to the PodResource endpoints.
 	// Broken down by server API version.
 	PodResourcesEndpointRequestsTotalCount = metrics.NewCounterVec(
--- a/test/instrumentation/testdata/test-stable-metrics-list.yaml
+++ b/test/instrumentation/testdata/test-stable-metrics-list.yaml
@ -0,0 +1,37 @@
+- name: summary_metric_test
+  subsystem: kubelet
+  help: Cumulative number of device plugin registrations. Broken down by resource
+    name.
+  type: Summary
+  stabilityLevel: STABLE
+- name: summary_vec_metric_test
+  subsystem: kubelet
+  help: Duration in seconds to serve a device plugin Allocation request. Broken down
+    by resource name.
+  type: Summary
+  stabilityLevel: STABLE
+  labels:
+  - resource_name
+  objectives:
+    0.5: 0.5
+    0.75: 0.75
+  ageBuckets: 5
+  bufCap: 500
+  maxAge: 600000000000
+- name: test_histogram_metric
+  subsystem: kubelet
+  help: Interval in seconds between relisting in PLEG.
+  type: Histogram
+  stabilityLevel: STABLE
+  buckets:
+  - 0.005
+  - 0.01
+  - 0.025
+  - 0.05
+  - 0.1
+  - 0.25
+  - 0.5
+  - 1
+  - 2.5
+  - 5
+  - 10