Add support for summary metrics for stablity checks

Also add entrypoints for verifying and updating a test file for easier
debugging. This is considerably faster than running the stablity checks
against the entire Kubernetes codebase.

Change-Id: I5d5e5b3abf396ebf1317a44130f20771a09afb7f
This commit is contained in:
Han Kang 2022-07-25 17:00:28 -07:00
parent 3ffdfbe286
commit e452850ec3
10 changed files with 340 additions and 52 deletions

View File

@ -23,6 +23,12 @@ import (
"github.com/prometheus/client_golang/prometheus"
)
const (
DefAgeBuckets = prometheus.DefAgeBuckets
DefBufCap = prometheus.DefBufCap
DefMaxAge = prometheus.DefMaxAge
)
// Summary is our internal representation for our wrapping struct around prometheus
// summaries. Summary implements both kubeCollector and ObserverMetric
//

View File

@ -66,12 +66,10 @@ func (c *metricDecoder) decodeNewMetricCall(fc *ast.CallExpr) (metric, error) {
return m, newDecodeErrorf(fc, errNotDirectCall)
}
switch functionName {
case "NewCounter", "NewGauge", "NewHistogram":
case "NewCounter", "NewGauge", "NewHistogram", "NewSummary":
m, err = c.decodeMetric(fc)
case "NewCounterVec", "NewGaugeVec", "NewHistogramVec":
case "NewCounterVec", "NewGaugeVec", "NewHistogramVec", "NewSummaryVec":
m, err = c.decodeMetricVec(fc)
case "NewSummary", "NewSummaryVec":
return m, newDecodeErrorf(fc, errStableSummary)
default:
return m, newDecodeErrorf(fc, errNotDirectCall)
}
@ -90,6 +88,8 @@ func getMetricType(functionName string) string {
return gaugeMetricType
case "NewHistogram", "NewHistogramVec":
return histogramMetricType
case "NewSummary", "NewSummaryVec":
return summaryMetricType
default:
panic("getMetricType expects correct function name")
}
@ -229,6 +229,33 @@ func (c *metricDecoder) decodeOpts(expr ast.Expr) (metric, error) {
return m, err
}
m.StabilityLevel = string(*level)
case "AgeBuckets", "BufCap":
uintVal, err := c.decodeUint32(kv.Value)
if err != nil {
print(key)
return m, err
}
if key == "AgeBuckets" {
m.AgeBuckets = uintVal
}
if key == "BufCap" {
m.BufCap = uintVal
}
case "Objectives":
obj, err := c.decodeObjectives(kv.Value)
if err != nil {
print(key)
return m, err
}
m.Objectives = obj
case "MaxAge":
int64Val, err := c.decodeInt64(kv.Value)
if err != nil {
print(key)
return m, err
}
m.MaxAge = int64Val
default:
return m, newDecodeErrorf(expr, errFieldNotSupported, key)
}
@ -280,6 +307,114 @@ func (c *metricDecoder) decodeBuckets(expr ast.Expr) ([]float64, error) {
return nil, newDecodeErrorf(expr, errBuckets)
}
func (c *metricDecoder) decodeObjectives(expr ast.Expr) (map[float64]float64, error) {
switch v := expr.(type) {
case *ast.CompositeLit:
return decodeFloatMap(v.Elts)
case *ast.Ident:
variableExpr, found := c.variables[v.Name]
if !found {
return nil, newDecodeErrorf(expr, errBadVariableAttribute)
}
return decodeFloatMap(variableExpr.(*ast.CompositeLit).Elts)
}
return nil, newDecodeErrorf(expr, errObjectives)
}
func (c *metricDecoder) decodeUint32(expr ast.Expr) (uint32, error) {
switch v := expr.(type) {
case *ast.BasicLit:
if v.Kind != token.FLOAT && v.Kind != token.INT {
print(v.Kind)
}
value, err := strconv.ParseUint(v.Value, 10, 32)
if err != nil {
return 0, err
}
return uint32(value), nil
case *ast.SelectorExpr:
variableName := v.Sel.String()
importName, ok := v.X.(*ast.Ident)
if ok && importName.String() == c.kubeMetricsImportName {
if variableName == "DefAgeBuckets" {
// hardcode this for now
return 5, nil
}
if variableName == "DefBufCap" {
// hardcode this for now
return 500, nil
}
}
case *ast.CallExpr:
_, ok := v.Fun.(*ast.SelectorExpr)
if !ok {
return 0, newDecodeErrorf(v, errDecodeUint32)
}
return 0, nil
}
return 0, newDecodeErrorf(expr, errDecodeUint32)
}
func (c *metricDecoder) decodeInt64(expr ast.Expr) (int64, error) {
switch v := expr.(type) {
case *ast.BasicLit:
if v.Kind != token.FLOAT && v.Kind != token.INT {
print(v.Kind)
}
value, err := strconv.ParseInt(v.Value, 10, 64)
if err != nil {
return 0, err
}
return value, nil
case *ast.SelectorExpr:
variableName := v.Sel.String()
importName, ok := v.X.(*ast.Ident)
if ok && importName.String() == c.kubeMetricsImportName {
if variableName == "DefMaxAge" {
// hardcode this for now. This is a duration but we'll output it as
// an int64 representing nanoseconds.
return 1000 * 1000 * 1000 * 60 * 10, nil
}
}
case *ast.CallExpr:
_, ok := v.Fun.(*ast.SelectorExpr)
if !ok {
return 0, newDecodeErrorf(v, errDecodeInt64)
}
return 0, nil
}
return 0, newDecodeErrorf(expr, errDecodeInt64)
}
func decodeFloatMap(exprs []ast.Expr) (map[float64]float64, error) {
buckets := map[float64]float64{}
for _, elt := range exprs {
bl, ok := elt.(*ast.KeyValueExpr)
if !ok {
return nil, newDecodeErrorf(bl, errObjectives)
}
keyExpr, ok := bl.Key.(*ast.BasicLit)
if !ok {
return nil, newDecodeErrorf(bl, errObjectives)
}
valueExpr, ok := bl.Value.(*ast.BasicLit)
if !ok {
return nil, newDecodeErrorf(bl, errObjectives)
}
valueForKey, err := strconv.ParseFloat(keyExpr.Value, 64)
if err != nil {
return nil, newDecodeErrorf(bl, errObjectives)
}
valueForValue, err := strconv.ParseFloat(valueExpr.Value, 64)
if err != nil {
return nil, newDecodeErrorf(bl, errObjectives)
}
buckets[valueForKey] = valueForValue
}
return buckets, nil
}
func decodeListOfFloats(exprs []ast.Expr) ([]float64, error) {
buckets := make([]float64, len(exprs))
for i, elt := range exprs {

View File

@ -33,9 +33,13 @@ const (
errBadImportedVariableAttribute = "Metric attribute was not correctly set. Please use only global consts in correctly impoprted same file"
errFieldNotSupported = "Field %s is not supported"
errBuckets = "Buckets should be set to list of floats, result from function call of prometheus.LinearBuckets or prometheus.ExponentialBuckets"
errLabels = "Labels were not set to list of strings"
errImport = `Importing using "." is not supported`
errExprNotIdent = "expr selector does not refer to type ast.Ident, is type %s"
errObjectives = "Buckets should be set to map of floats to floats"
errDecodeUint32 = "Should decode to uint32"
errDecodeInt64 = "Should decode to int64"
errLabels = "Labels were not set to list of strings"
errImport = `Importing using "." is not supported`
errExprNotIdent = "expr selector does not refer to type ast.Ident, is type %s"
)
type decodeError struct {

View File

@ -539,18 +539,6 @@ func TestIncorrectStableMetricDeclarations(t *testing.T) {
src string
err error
}{
{
testName: "Fail on stable summary metric (Summary is DEPRECATED)",
err: fmt.Errorf("testdata/metric.go:4:9: Stable summary metric is not supported"),
src: `
package test
import "k8s.io/component-base/metrics"
var _ = metrics.NewSummary(
&metrics.SummaryOpts{
StabilityLevel: metrics.STABLE,
},
)
`},
{
testName: "Fail on stable metric with attribute set to unknown variable",
err: fmt.Errorf("testdata/metric.go:6:4: Metric attribute was not correctly set. Please use only global consts in same file"),

View File

@ -24,18 +24,23 @@ const (
counterMetricType = "Counter"
gaugeMetricType = "Gauge"
histogramMetricType = "Histogram"
summaryMetricType = "Summary"
)
type metric struct {
Name string `yaml:"name"`
Subsystem string `yaml:"subsystem,omitempty"`
Namespace string `yaml:"namespace,omitempty"`
Help string `yaml:"help,omitempty"`
Type string `yaml:"type,omitempty"`
DeprecatedVersion string `yaml:"deprecatedVersion,omitempty"`
StabilityLevel string `yaml:"stabilityLevel,omitempty"`
Labels []string `yaml:"labels,omitempty"`
Buckets []float64 `yaml:"buckets,omitempty"`
Name string `yaml:"name"`
Subsystem string `yaml:"subsystem,omitempty"`
Namespace string `yaml:"namespace,omitempty"`
Help string `yaml:"help,omitempty"`
Type string `yaml:"type,omitempty"`
DeprecatedVersion string `yaml:"deprecatedVersion,omitempty"`
StabilityLevel string `yaml:"stabilityLevel,omitempty"`
Labels []string `yaml:"labels,omitempty"`
Buckets []float64 `yaml:"buckets,omitempty"`
Objectives map[float64]float64 `yaml:"objectives,omitempty"`
AgeBuckets uint32 `yaml:"ageBuckets,omitempty"`
BufCap uint32 `yaml:"bufCap,omitempty"`
MaxAge int64 `yaml:"maxAge,omitempty"`
}
func (m metric) buildFQName() string {

36
test/instrumentation/stability-utils.sh Normal file → Executable file
View File

@ -41,12 +41,17 @@ find_files_to_check() {
-o -wholename '*/vendor/*' \
-o -wholename '*/hack/*' \
-o -wholename '*_test.go' \
-o -wholename './test/instrumentation/*.go' \
\) -prune \
\) \
\( -wholename '**/*.go' \
\)
}
find_test_files() {
find './test/instrumentation' -wholename '**/*.go'
}
red=$(tput setaf 1)
green=$(tput setaf 2)
reset=$(tput sgr0)
@ -69,6 +74,24 @@ kube::validate::stablemetrics() {
exit 1
}
kube::validate::test::stablemetrics() {
stability_check_setup
temp_file=$(mktemp)
doValidate=$(find_test_files | grep -E ".*.go" | grep -v ".*_test.go" | grep -v ".git" | sort | KUBE_ROOT=${KUBE_ROOT} xargs -L 200 go run "test/instrumentation/main.go" "test/instrumentation/decode_metric.go" "test/instrumentation/find_stable_metric.go" "test/instrumentation/error.go" "test/instrumentation/metric.go" -- 1>"${temp_file}")
if $doValidate; then
echo -e "${green}Diffing test/instrumentation/testdata/test-stable-metrics-list.yaml\n${reset}"
if diff -u "$KUBE_ROOT/test/instrumentation/testdata/test-stable-metrics-list.yaml" "$temp_file"; then
echo -e "${green}\nPASS metrics stability verification ${reset}"
return 0
fi
fi
echo "${red}!!! Metrics stability static analysis test has failed!${reset}" >&2
echo "${red}!!! Please run './test/instrumentation/test-update.sh' to update the golden list.${reset}" >&2
exit 1
}
kube::update::stablemetrics() {
stability_check_setup
temp_file=$(mktemp)
@ -82,3 +105,16 @@ kube::update::stablemetrics() {
echo "${green}Updated golden list of stable metrics.${reset}"
}
kube::update::test::stablemetrics() {
stability_check_setup
temp_file=$(mktemp)
doCheckStability=$(find_test_files | grep -E ".*.go" | grep -v ".*_test.go" | sort | KUBE_ROOT=${KUBE_ROOT} xargs -L 200 go run "test/instrumentation/main.go" "test/instrumentation/decode_metric.go" "test/instrumentation/find_stable_metric.go" "test/instrumentation/error.go" "test/instrumentation/metric.go" -- 1>"${temp_file}")
if ! $doCheckStability; then
echo "${red}!!! updating golden list of test metrics has failed! ${reset}" >&2
exit 1
fi
mv -f "$temp_file" "${KUBE_ROOT}/test/instrumentation/testdata/test-stable-metrics-list.yaml"
echo "${green}Updated test list of stable metrics.${reset}"
}

View File

@ -0,0 +1,28 @@
#!/usr/bin/env bash
# Copyright 2022 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This script runs to ensure that we do not violate metric stability
# policies.
# Usage: `test/instrumentation/test-update.sh`.
set -o errexit
set -o nounset
set -o pipefail
KUBE_ROOT=$(dirname "${BASH_SOURCE[0]}")/../..
source "${KUBE_ROOT}/test/instrumentation/stability-utils.sh"
kube::update::test::stablemetrics

View File

@ -0,0 +1,28 @@
#!/usr/bin/env bash
# Copyright 2022 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This script runs to ensure that we do not violate metric stability
# policies.
# Usage: `test/instrumentation/test-verify.sh`.
set -o errexit
set -o nounset
set -o pipefail
KUBE_ROOT=$(dirname "${BASH_SOURCE[0]}")/../..
source "${KUBE_ROOT}/test/instrumentation/stability-utils.sh"
kube::validate::test::stablemetrics

View File

@ -28,28 +28,22 @@ import (
// This const block defines the metric names for the kubelet metrics.
const (
KubeletSubsystem = "kubelet"
NodeNameKey = "node_name"
NodeLabelKey = "node"
PodWorkerDurationKey = "pod_worker_duration_seconds"
PodStartDurationKey = "pod_start_duration_seconds"
CgroupManagerOperationsKey = "cgroup_manager_duration_seconds"
PodWorkerStartDurationKey = "pod_worker_start_duration_seconds"
PLEGRelistDurationKey = "pleg_relist_duration_seconds"
PLEGDiscardEventsKey = "pleg_discard_events"
PLEGRelistIntervalKey = "pleg_relist_interval_seconds"
PLEGLastSeenKey = "pleg_last_seen_seconds"
EvictionsKey = "evictions"
EvictionStatsAgeKey = "eviction_stats_age_seconds"
PreemptionsKey = "preemptions"
VolumeStatsCapacityBytesKey = "volume_stats_capacity_bytes"
VolumeStatsAvailableBytesKey = "volume_stats_available_bytes"
VolumeStatsUsedBytesKey = "volume_stats_used_bytes"
VolumeStatsInodesKey = "volume_stats_inodes"
VolumeStatsInodesFreeKey = "volume_stats_inodes_free"
VolumeStatsInodesUsedKey = "volume_stats_inodes_used"
RunningPodsKey = "running_pods"
RunningContainersKey = "running_containers"
KubeletSubsystem = "kubelet"
NodeNameKey = "node_name"
NodeLabelKey = "node"
PodWorkerDurationKey = "pod_worker_duration_seconds"
PodStartDurationKey = "pod_start_duration_seconds"
CgroupManagerOperationsKey = "cgroup_manager_duration_seconds"
PodWorkerStartDurationKey = "pod_worker_start_duration_seconds"
PLEGRelistDurationKey = "pleg_relist_duration_seconds"
PLEGDiscardEventsKey = "pleg_discard_events"
PLEGRelistIntervalKey = "pleg_relist_interval_seconds"
PLEGLastSeenKey = "pleg_last_seen_seconds"
EvictionsKey = "evictions"
EvictionStatsAgeKey = "eviction_stats_age_seconds"
PreemptionsKey = "preemptions"
RunningPodsKey = "running_pods"
RunningContainersKey = "running_containers"
// Metrics keys of remote runtime operations
RuntimeOperationsKey = "runtime_operations_total"
RuntimeOperationsDurationKey = "runtime_operations_duration_seconds"
@ -70,6 +64,7 @@ const (
)
var (
defObjectives = map[float64]float64{0.5: 0.5, 0.75: 0.75}
// NodeName is a Gauge that tracks the ode's name. The count is always 1.
NodeName = metrics.NewGaugeVec(
&metrics.GaugeOpts{
@ -160,10 +155,10 @@ var (
PLEGRelistInterval = metrics.NewHistogram(
&metrics.HistogramOpts{
Subsystem: KubeletSubsystem,
Name: PLEGRelistIntervalKey,
Name: "test_histogram_metric",
Help: "Interval in seconds between relisting in PLEG.",
Buckets: metrics.DefBuckets,
StabilityLevel: metrics.ALPHA,
StabilityLevel: metrics.STABLE,
},
)
// PLEGLastSeen is a Gauge giving the Unix timestamp when the Kubelet's
@ -269,6 +264,32 @@ var (
[]string{"resource_name"},
)
// TestSummary is a Summary that tracks the cumulative number of device plugin registrations.
// Broken down by resource name.
TestSummary = metrics.NewSummary(
&metrics.SummaryOpts{
Subsystem: KubeletSubsystem,
Name: "summary_metric_test",
Help: "Cumulative number of device plugin registrations. Broken down by resource name.",
StabilityLevel: metrics.STABLE,
},
)
// TestSummaryVec is a NewSummaryVec that tracks the duration (in seconds) to serve a device plugin allocation request.
// Broken down by resource name.
TestSummaryVec = metrics.NewSummaryVec(
&metrics.SummaryOpts{
Subsystem: KubeletSubsystem,
Name: "summary_vec_metric_test",
Help: "Duration in seconds to serve a device plugin Allocation request. Broken down by resource name.",
Objectives: defObjectives,
MaxAge: metrics.DefMaxAge,
AgeBuckets: metrics.DefAgeBuckets,
BufCap: metrics.DefBufCap,
StabilityLevel: metrics.STABLE,
},
[]string{"resource_name"},
)
// PodResourcesEndpointRequestsTotalCount is a Counter that tracks the cumulative number of requests to the PodResource endpoints.
// Broken down by server API version.
PodResourcesEndpointRequestsTotalCount = metrics.NewCounterVec(

View File

@ -0,0 +1,37 @@
- name: summary_metric_test
subsystem: kubelet
help: Cumulative number of device plugin registrations. Broken down by resource
name.
type: Summary
stabilityLevel: STABLE
- name: summary_vec_metric_test
subsystem: kubelet
help: Duration in seconds to serve a device plugin Allocation request. Broken down
by resource name.
type: Summary
stabilityLevel: STABLE
labels:
- resource_name
objectives:
0.5: 0.5
0.75: 0.75
ageBuckets: 5
bufCap: 500
maxAge: 600000000000
- name: test_histogram_metric
subsystem: kubelet
help: Interval in seconds between relisting in PLEG.
type: Histogram
stabilityLevel: STABLE
buckets:
- 0.005
- 0.01
- 0.025
- 0.05
- 0.1
- 0.25
- 0.5
- 1
- 2.5
- 5
- 10