diff --git a/test/instrumentation/decode_metric.go b/test/instrumentation/decode_metric.go index 721ab9738b1..32cefd7ea30 100644 --- a/test/instrumentation/decode_metric.go +++ b/test/instrumentation/decode_metric.go @@ -182,6 +182,25 @@ func (c *metricDecoder) decodeOpts(expr ast.Expr) (metric, error) { if err != nil { return m, err } + case *ast.SelectorExpr: + s, ok := v.X.(*ast.Ident) + if !ok { + return m, newDecodeErrorf(expr, errExprNotIdent, v.X) + } + + variableExpr, found := c.variables[strings.Join([]string{s.Name, v.Sel.Name}, ".")] + if !found { + return m, newDecodeErrorf(expr, errBadImportedVariableAttribute) + } + bl, ok := variableExpr.(*ast.BasicLit) + if !ok { + return m, newDecodeErrorf(expr, errNonStringAttribute) + } + value, err = stringValue(bl) + if err != nil { + return m, err + } + default: return m, newDecodeErrorf(expr, errNonStringAttribute) } diff --git a/test/instrumentation/error.go b/test/instrumentation/error.go index bc3942522d8..cc7eab3d38c 100644 --- a/test/instrumentation/error.go +++ b/test/instrumentation/error.go @@ -23,17 +23,19 @@ import ( ) const ( - errNotDirectCall = "Opts for STABLE metric was not directly passed to new metric function" - errPositionalArguments = "Positional arguments are not supported" - errStabilityLevel = "StabilityLevel should be passed STABLE, ALPHA or removed" - errStableSummary = "Stable summary metric is not supported" - errInvalidNewMetricCall = "Invalid new metric call, please ensure code compiles" - errNonStringAttribute = "Non string attribute it not supported" - errBadVariableAttribute = "Metric attribute was not correctly set. Please use only global consts in same file" - errFieldNotSupported = "Field %s is not supported" - errBuckets = "Buckets should be set to list of floats, result from function call of prometheus.LinearBuckets or prometheus.ExponentialBuckets" - errLabels = "Labels were not set to list of strings" - errImport = `Importing using "." is not supported` + errNotDirectCall = "Opts for STABLE metric was not directly passed to new metric function" + errPositionalArguments = "Positional arguments are not supported" + errStabilityLevel = "StabilityLevel should be passed STABLE, ALPHA or removed" + errStableSummary = "Stable summary metric is not supported" + errInvalidNewMetricCall = "Invalid new metric call, please ensure code compiles" + errNonStringAttribute = "Non string attribute is not supported" + errBadVariableAttribute = "Metric attribute was not correctly set. Please use only global consts in same file" + errBadImportedVariableAttribute = "Metric attribute was not correctly set. Please use only global consts in correctly impoprted same file" + errFieldNotSupported = "Field %s is not supported" + errBuckets = "Buckets should be set to list of floats, result from function call of prometheus.LinearBuckets or prometheus.ExponentialBuckets" + errLabels = "Labels were not set to list of strings" + errImport = `Importing using "." is not supported` + errExprNotIdent = "expr selector does not refer to type ast.Ident, is type %s" ) type decodeError struct { diff --git a/test/instrumentation/main.go b/test/instrumentation/main.go index 961cbac95df..2d051bccc61 100644 --- a/test/instrumentation/main.go +++ b/test/instrumentation/main.go @@ -23,6 +23,7 @@ import ( "go/ast" "go/parser" "go/token" + "io/ioutil" "os" "path/filepath" "sort" @@ -35,6 +36,15 @@ const ( kubeMetricImportPath = `"k8s.io/component-base/metrics"` // Should equal to final directory name of kubeMetricImportPath kubeMetricsDefaultImportName = "metrics" + + kubeURLRoot = "k8s.io/kubernetes/" +) + +var ( + // env configs + GOROOT string = os.Getenv("GOROOT") + GOOS string = os.Getenv("GOOS") + KUBE_ROOT string = os.Getenv("KUBE_ROOT") ) func main() { @@ -124,6 +134,11 @@ func searchFileForStableMetrics(filename string, src interface{}) ([]metric, []e } variables := globalVariableDeclarations(tree) + variables, err = importedGlobalVariableDeclaration(variables, tree.Imports) + if err != nil { + return []metric{}, addFileInformationToErrors([]error{err}, fileset) + } + stableMetricsFunctionCalls, errors := findStableMetricDeclaration(tree, metricsImportName) metrics, es := decodeMetricCalls(stableMetricsFunctionCalls, metricsImportName, variables) errors = append(errors, es...) @@ -173,3 +188,107 @@ func globalVariableDeclarations(tree *ast.File) map[string]ast.Expr { } return consts } + +func localImportPath(importExpr string) (string, error) { + // parse directory path + var pathPrefix string + if strings.Contains(importExpr, kubeURLRoot) { + // search k/k local checkout + pathPrefix = KUBE_ROOT + importExpr = strings.Replace(importExpr, kubeURLRoot, "", 1) + } else if strings.Contains(importExpr, "k8s.io/klog/v2") || strings.Contains(importExpr, "k8s.io/util") { + pathPrefix = strings.Join([]string{KUBE_ROOT, "vendor"}, string(os.PathSeparator)) + } else if strings.Contains(importExpr, "k8s.io/") { + // search k/k/staging local checkout + pathPrefix = strings.Join([]string{KUBE_ROOT, "staging", "src"}, string(os.PathSeparator)) + } else if strings.Contains(importExpr, ".") { + // not stdlib -> prefix with GOMODCACHE + // pathPrefix = strings.Join([]string{KUBE_ROOT, "vendor"}, string(os.PathSeparator)) + + // this requires implementing SIV, skip for now + return "", fmt.Errorf("unable to handle general, non STL imports for metric analysis. import path: %s", importExpr) + } else { + // stdlib -> prefix with GOROOT + pathPrefix = strings.Join([]string{GOROOT, "src"}, string(os.PathSeparator)) + } // ToDo: support non go mod + + crossPlatformImportExpr := strings.Replace(importExpr, "/", string(os.PathSeparator), -1) + importDirectory := strings.Join([]string{pathPrefix, strings.Trim(crossPlatformImportExpr, "\"")}, string(os.PathSeparator)) + + return importDirectory, nil +} + +func importedGlobalVariableDeclaration(localVariables map[string]ast.Expr, imports []*ast.ImportSpec) (map[string]ast.Expr, error) { + for _, im := range imports { + // get imported label + var importAlias string + if im.Name == nil { + pathSegments := strings.Split(im.Path.Value, "/") + importAlias = strings.Trim(pathSegments[len(pathSegments)-1], "\"") + } else { + importAlias = im.Name.String() + } + + // find local path on disk for listed import + importDirectory, err := localImportPath(im.Path.Value) + if err != nil { + // uncomment the below log line if you want to start using non k8s/non stl libs for resolving const/var in metric definitions + // fmt.Fprint(os.Stderr, err.Error() + "\n") + continue + } + + files, err := ioutil.ReadDir(importDirectory) + if err != nil { + fmt.Fprintf(os.Stderr, "failed to read import path directory %s with error %s, skipping\n", importDirectory, err) + continue + } + + for _, file := range files { + if file.IsDir() { + // do not grab constants from subpackages + continue + } + + if strings.Contains(file.Name(), "_test") { + // do not parse test files + continue + } + + if !strings.HasSuffix(file.Name(), ".go") { + // not a go code file, do not attempt to parse + continue + } + + fileset := token.NewFileSet() + tree, err := parser.ParseFile(fileset, strings.Join([]string{importDirectory, file.Name()}, string(os.PathSeparator)), nil, parser.AllErrors) + if err != nil { + return nil, fmt.Errorf("failed to parse path %s with error %w", im.Path.Value, err) + } + + // pass parsed filepath into globalVariableDeclarations + variables := globalVariableDeclarations(tree) + + // add returned map into supplied map and prepend import label to all keys + for k, v := range variables { + importK := strings.Join([]string{importAlias, k}, ".") + if _, ok := localVariables[importK]; !ok { + localVariables[importK] = v + } else { + // cross-platform file that gets included in the correct OS build via OS build tags + // use whatever matches GOOS + + if strings.Contains(file.Name(), GOOS) { + // assume at some point we will find the correct OS version of this file + // if we are running on an OS that does not have an OS specific file for something then we will include a constant we shouldn't + // TODO: should we include/exclude based on the build tags? + localVariables[importK] = v + } + + } + } + } + + } + + return localVariables, nil +} diff --git a/test/instrumentation/main_test.go b/test/instrumentation/main_test.go index 33e833e9699..944d69243fb 100644 --- a/test/instrumentation/main_test.go +++ b/test/instrumentation/main_test.go @@ -18,7 +18,9 @@ package main import ( "fmt" + "os" "reflect" + "strings" "testing" "k8s.io/component-base/metrics" @@ -117,10 +119,16 @@ var _ = NewCounter( } func TestStableMetric(t *testing.T) { + wd, err := os.Getwd() + if err != nil { + t.Fatalf("unable to fetch path to testing package - needed for simulating import path tests") + } + for _, test := range []struct { testName string src string metric metric + kubeRoot string }{ { testName: "Counter", @@ -434,9 +442,80 @@ var _ = metrics.NewHistogram( Buckets: metrics.DefBuckets, }, ) +`}, + { + testName: "Imported stdlib constant", + metric: metric{ + Name: "importedCounter", + StabilityLevel: "STABLE", + Subsystem: "GET", + Type: counterMetricType, + }, + src: ` +package test +import "k8s.io/component-base/metrics" +import "net/http" +var _ = metrics.NewCounter( + &metrics.CounterOpts{ + Name: "importedCounter", + StabilityLevel: metrics.STABLE, + Subsystem: http.MethodGet, + }, + ) +`}, + { + testName: "Imported k8s.io constant", + metric: metric{ + Name: "importedCounter", + StabilityLevel: "STABLE", + Subsystem: "kubelet", + Type: counterMetricType, + }, + kubeRoot: strings.Join([]string{wd, "testdata"}, string(os.PathSeparator)), + src: ` +package test +import compbasemetrics "k8s.io/component-base/metrics" +import "k8s.io/kubernetes/pkg/kubelet/metrics" +var _ = compbasemetrics.NewCounter( + &compbasemetrics.CounterOpts{ + Name: "importedCounter", + StabilityLevel: compbasemetrics.STABLE, + Subsystem: metrics.KubeletSubsystem, + }, + ) +`}, + { + testName: "Imported k8s.io/staging constant", + metric: metric{ + Name: "importedCounter", + StabilityLevel: "STABLE", + Subsystem: "ThisIsNotTheSoundOfTheTrain", + Type: counterMetricType, + }, + kubeRoot: strings.Join([]string{wd, "testdata"}, string(os.PathSeparator)), + src: ` +package test +import compbasemetrics "k8s.io/component-base/metrics" +import "k8s.io/metrics" +var _ = compbasemetrics.NewCounter( + &compbasemetrics.CounterOpts{ + Name: "importedCounter", + StabilityLevel: compbasemetrics.STABLE, + Subsystem: metrics.OKGO, + }, + ) `}, } { t.Run(test.testName, func(t *testing.T) { + // these sub-tests cannot be run in parallel with the below + if test.kubeRoot != "" { + priorKRoot := KUBE_ROOT + KUBE_ROOT = test.kubeRoot + defer func() { + KUBE_ROOT = priorKRoot + }() + } + metrics, errors := searchFileForStableMetrics(fakeFilename, test.src) if len(errors) != 0 { t.Errorf("Unexpected errors: %s", errors) @@ -487,7 +566,7 @@ var _ = metrics.NewCounter( `}, { testName: "Fail on stable metric with attribute set to local function return", - err: fmt.Errorf("testdata/metric.go:9:4: Non string attribute it not supported"), + err: fmt.Errorf("testdata/metric.go:9:4: Non string attribute is not supported"), src: ` package test import "k8s.io/component-base/metrics" @@ -503,7 +582,7 @@ var _ = metrics.NewCounter( `}, { testName: "Fail on stable metric with attribute set to imported function return", - err: fmt.Errorf("testdata/metric.go:7:4: Non string attribute it not supported"), + err: fmt.Errorf("testdata/metric.go:7:4: Non string attribute is not supported"), src: ` package test import "k8s.io/component-base/metrics" @@ -685,3 +764,56 @@ var _ = metrics.NewHistogram( }) } } + +func Test_localImportPath(t *testing.T) { + KUBE_ROOT = "/home/pchristopher/go/src/k8s.io/kubernetes" + GOROOT := os.Getenv("GOROOT") + + for _, test := range []struct { + name string + importExpr string + expectedPath string + errorExp bool + }{ + { + name: "k8s local package", + importExpr: "k8s.io/kubernetes/pkg/kubelet/metrics", + expectedPath: strings.Join([]string{KUBE_ROOT, "pkg", "kubelet", "metrics"}, string(os.PathSeparator)), + errorExp: false, + }, + { + name: "k8s staging package", + importExpr: "k8s.io/kubelet/metrics", + expectedPath: strings.Join([]string{KUBE_ROOT, "staging", "src", "k8s.io", "kubelet", "metrics"}, string(os.PathSeparator)), + errorExp: false, + }, + { + name: "public package", + importExpr: "github.com/thisisnot/thesoundofthetrain", + errorExp: true, + }, + { + name: "stl package", + importExpr: "os", + expectedPath: strings.Join([]string{GOROOT, "src", "os"}, string(os.PathSeparator)), + errorExp: false, + }, + } { + t.Run(test.name, func(t *testing.T) { + path, err := localImportPath(test.importExpr) + if test.errorExp { + if err == nil { + t.Error("did not receive error as expected") + } + } else { + if err != nil { + t.Errorf("received unexpected error %s", err) + } + } + + if path != test.expectedPath { + t.Errorf("did not received expected path. \nwant: %s \ngot: %s", test.expectedPath, path) + } + }) + } +} diff --git a/test/instrumentation/stability-utils.sh b/test/instrumentation/stability-utils.sh index 4a80f629b11..1b32ce97438 100644 --- a/test/instrumentation/stability-utils.sh +++ b/test/instrumentation/stability-utils.sh @@ -58,7 +58,7 @@ reset=$(tput sgr0) kube::validate::stablemetrics() { stability_check_setup temp_file=$(mktemp) - doValidate=$(find_files_to_check | grep -E ".*.go" | grep -v ".*_test.go" | sort | xargs -L 200 go run "test/instrumentation/main.go" "test/instrumentation/decode_metric.go" "test/instrumentation/find_stable_metric.go" "test/instrumentation/error.go" "test/instrumentation/metric.go" -- 1>"${temp_file}") + doValidate=$(find_files_to_check | grep -E ".*.go" | grep -v ".*_test.go" | sort | KUBE_ROOT=${KUBE_ROOT} xargs -L 200 go run "test/instrumentation/main.go" "test/instrumentation/decode_metric.go" "test/instrumentation/find_stable_metric.go" "test/instrumentation/error.go" "test/instrumentation/metric.go" -- 1>"${temp_file}") if $doValidate; then echo -e "${green}Diffing test/instrumentation/testdata/stable-metrics-list.yaml\n${reset}" @@ -76,7 +76,7 @@ kube::validate::stablemetrics() { kube::update::stablemetrics() { stability_check_setup temp_file=$(mktemp) - doCheckStability=$(find_files_to_check | grep -E ".*.go" | grep -v ".*_test.go" | sort | xargs -L 200 go run "test/instrumentation/main.go" "test/instrumentation/decode_metric.go" "test/instrumentation/find_stable_metric.go" "test/instrumentation/error.go" "test/instrumentation/metric.go" -- 1>"${temp_file}") + doCheckStability=$(find_files_to_check | grep -E ".*.go" | grep -v ".*_test.go" | sort | KUBE_ROOT=${KUBE_ROOT} xargs -L 200 go run "test/instrumentation/main.go" "test/instrumentation/decode_metric.go" "test/instrumentation/find_stable_metric.go" "test/instrumentation/error.go" "test/instrumentation/metric.go" -- 1>"${temp_file}") if ! $doCheckStability; then echo "${red}!!! updating golden list of metrics has failed! ${reset}" >&2 diff --git a/test/instrumentation/testdata/pkg/kubelet/metrics/metrics.go b/test/instrumentation/testdata/pkg/kubelet/metrics/metrics.go new file mode 100644 index 00000000000..aa256093be0 --- /dev/null +++ b/test/instrumentation/testdata/pkg/kubelet/metrics/metrics.go @@ -0,0 +1,588 @@ +/* +Copyright 2015 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "fmt" + "sync" + "time" + + "k8s.io/component-base/metrics" + "k8s.io/component-base/metrics/legacyregistry" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/types" + utilfeature "k8s.io/apiserver/pkg/util/feature" + "k8s.io/klog/v2" + "k8s.io/kubernetes/pkg/features" +) + +// This const block defines the metric names for the kubelet metrics. +const ( + KubeletSubsystem = "kubelet" + NodeNameKey = "node_name" + NodeLabelKey = "node" + PodWorkerDurationKey = "pod_worker_duration_seconds" + PodStartDurationKey = "pod_start_duration_seconds" + CgroupManagerOperationsKey = "cgroup_manager_duration_seconds" + PodWorkerStartDurationKey = "pod_worker_start_duration_seconds" + PLEGRelistDurationKey = "pleg_relist_duration_seconds" + PLEGDiscardEventsKey = "pleg_discard_events" + PLEGRelistIntervalKey = "pleg_relist_interval_seconds" + PLEGLastSeenKey = "pleg_last_seen_seconds" + EvictionsKey = "evictions" + EvictionStatsAgeKey = "eviction_stats_age_seconds" + PreemptionsKey = "preemptions" + VolumeStatsCapacityBytesKey = "volume_stats_capacity_bytes" + VolumeStatsAvailableBytesKey = "volume_stats_available_bytes" + VolumeStatsUsedBytesKey = "volume_stats_used_bytes" + VolumeStatsInodesKey = "volume_stats_inodes" + VolumeStatsInodesFreeKey = "volume_stats_inodes_free" + VolumeStatsInodesUsedKey = "volume_stats_inodes_used" + RunningPodsKey = "running_pods" + RunningContainersKey = "running_containers" + // Metrics keys of remote runtime operations + RuntimeOperationsKey = "runtime_operations_total" + RuntimeOperationsDurationKey = "runtime_operations_duration_seconds" + RuntimeOperationsErrorsKey = "runtime_operations_errors_total" + // Metrics keys of device plugin operations + DevicePluginRegistrationCountKey = "device_plugin_registration_total" + DevicePluginAllocationDurationKey = "device_plugin_alloc_duration_seconds" + // Metrics keys of pod resources operations + PodResourcesEndpointRequestsTotalKey = "pod_resources_endpoint_requests_total" + PodResourcesEndpointRequestsListKey = "pod_resources_endpoint_requests_list" + PodResourcesEndpointRequestsGetAllocatableKey = "pod_resources_endpoint_requests_get_allocatable" + PodResourcesEndpointErrorsListKey = "pod_resources_endpoint_errors_list" + PodResourcesEndpointErrorsGetAllocatableKey = "pod_resources_endpoint_errors_get_allocatable" + + // Metric keys for node config + AssignedConfigKey = "node_config_assigned" + ActiveConfigKey = "node_config_active" + LastKnownGoodConfigKey = "node_config_last_known_good" + ConfigErrorKey = "node_config_error" + ConfigSourceLabelKey = "node_config_source" + ConfigSourceLabelValueLocal = "local" + ConfigUIDLabelKey = "node_config_uid" + ConfigResourceVersionLabelKey = "node_config_resource_version" + KubeletConfigKeyLabelKey = "node_config_kubelet_key" + + // Metrics keys for RuntimeClass + RunPodSandboxDurationKey = "run_podsandbox_duration_seconds" + RunPodSandboxErrorsKey = "run_podsandbox_errors_total" +) + +var ( + // NodeName is a Gauge that tracks the ode's name. The count is always 1. + NodeName = metrics.NewGaugeVec( + &metrics.GaugeOpts{ + Subsystem: KubeletSubsystem, + Name: NodeNameKey, + Help: "The node's name. The count is always 1.", + StabilityLevel: metrics.ALPHA, + }, + []string{NodeLabelKey}, + ) + // ContainersPerPodCount is a Histogram that tracks the number of containers per pod. + ContainersPerPodCount = metrics.NewHistogram( + &metrics.HistogramOpts{ + Subsystem: KubeletSubsystem, + Name: "containers_per_pod_count", + Help: "The number of containers per pod.", + Buckets: metrics.ExponentialBuckets(1, 2, 5), + StabilityLevel: metrics.ALPHA, + }, + ) + // PodWorkerDuration is a Histogram that tracks the duration (in seconds) in takes to sync a single pod. + // Broken down by the operation type. + PodWorkerDuration = metrics.NewHistogramVec( + &metrics.HistogramOpts{ + Subsystem: KubeletSubsystem, + Name: PodWorkerDurationKey, + Help: "Duration in seconds to sync a single pod. Broken down by operation type: create, update, or sync", + Buckets: metrics.DefBuckets, + StabilityLevel: metrics.ALPHA, + }, + []string{"operation_type"}, + ) + // PodStartDuration is a Histogram that tracks the duration (in seconds) it takes for a single pod to go from pending to running. + PodStartDuration = metrics.NewHistogram( + &metrics.HistogramOpts{ + Subsystem: KubeletSubsystem, + Name: PodStartDurationKey, + Help: "Duration in seconds for a single pod to go from pending to running.", + Buckets: metrics.DefBuckets, + StabilityLevel: metrics.ALPHA, + }, + ) + // CgroupManagerDuration is a Histogram that tracks the duration (in seconds) it takes for cgroup manager operations to complete. + // Broken down by method. + CgroupManagerDuration = metrics.NewHistogramVec( + &metrics.HistogramOpts{ + Subsystem: KubeletSubsystem, + Name: CgroupManagerOperationsKey, + Help: "Duration in seconds for cgroup manager operations. Broken down by method.", + Buckets: metrics.DefBuckets, + StabilityLevel: metrics.ALPHA, + }, + []string{"operation_type"}, + ) + // PodWorkerStartDuration is a Histogram that tracks the duration (in seconds) it takes from seeing a pod to starting a worker. + PodWorkerStartDuration = metrics.NewHistogram( + &metrics.HistogramOpts{ + Subsystem: KubeletSubsystem, + Name: PodWorkerStartDurationKey, + Help: "Duration in seconds from seeing a pod to starting a worker.", + Buckets: metrics.DefBuckets, + StabilityLevel: metrics.ALPHA, + }, + ) + // PLEGRelistDuration is a Histogram that tracks the duration (in seconds) it takes for relisting pods in the Kubelet's + // Pod Lifecycle Event Generator (PLEG). + PLEGRelistDuration = metrics.NewHistogram( + &metrics.HistogramOpts{ + Subsystem: KubeletSubsystem, + Name: PLEGRelistDurationKey, + Help: "Duration in seconds for relisting pods in PLEG.", + Buckets: metrics.DefBuckets, + StabilityLevel: metrics.ALPHA, + }, + ) + // PLEGDiscardEvents is a Counter that tracks the number of discarding events in the Kubelet's Pod Lifecycle Event Generator (PLEG). + PLEGDiscardEvents = metrics.NewCounter( + &metrics.CounterOpts{ + Subsystem: KubeletSubsystem, + Name: PLEGDiscardEventsKey, + Help: "The number of discard events in PLEG.", + StabilityLevel: metrics.ALPHA, + }, + ) + + // PLEGRelistInterval is a Histogram that tracks the intervals (in seconds) between relisting in the Kubelet's + // Pod Lifecycle Event Generator (PLEG). + PLEGRelistInterval = metrics.NewHistogram( + &metrics.HistogramOpts{ + Subsystem: KubeletSubsystem, + Name: PLEGRelistIntervalKey, + Help: "Interval in seconds between relisting in PLEG.", + Buckets: metrics.DefBuckets, + StabilityLevel: metrics.ALPHA, + }, + ) + // PLEGLastSeen is a Gauge giving the Unix timestamp when the Kubelet's + // Pod Lifecycle Event Generator (PLEG) was last seen active. + PLEGLastSeen = metrics.NewGauge( + &metrics.GaugeOpts{ + Subsystem: KubeletSubsystem, + Name: PLEGLastSeenKey, + Help: "Timestamp in seconds when PLEG was last seen active.", + StabilityLevel: metrics.ALPHA, + }, + ) + // RuntimeOperations is a Counter that tracks the cumulative number of remote runtime operations. + // Broken down by operation type. + RuntimeOperations = metrics.NewCounterVec( + &metrics.CounterOpts{ + Subsystem: KubeletSubsystem, + Name: RuntimeOperationsKey, + Help: "Cumulative number of runtime operations by operation type.", + StabilityLevel: metrics.ALPHA, + }, + []string{"operation_type"}, + ) + // RuntimeOperationsDuration is a Histogram that tracks the duration (in seconds) for remote runtime operations to complete. + // Broken down by operation type. + RuntimeOperationsDuration = metrics.NewHistogramVec( + &metrics.HistogramOpts{ + Subsystem: KubeletSubsystem, + Name: RuntimeOperationsDurationKey, + Help: "Duration in seconds of runtime operations. Broken down by operation type.", + Buckets: metrics.ExponentialBuckets(.005, 2.5, 14), + StabilityLevel: metrics.ALPHA, + }, + []string{"operation_type"}, + ) + // RuntimeOperationsErrors is a Counter that tracks the cumulative number of remote runtime operations errors. + // Broken down by operation type. + RuntimeOperationsErrors = metrics.NewCounterVec( + &metrics.CounterOpts{ + Subsystem: KubeletSubsystem, + Name: RuntimeOperationsErrorsKey, + Help: "Cumulative number of runtime operation errors by operation type.", + StabilityLevel: metrics.ALPHA, + }, + []string{"operation_type"}, + ) + // Evictions is a Counter that tracks the cumulative number of pod evictions initiated by the kubelet. + // Broken down by eviction signal. + Evictions = metrics.NewCounterVec( + &metrics.CounterOpts{ + Subsystem: KubeletSubsystem, + Name: EvictionsKey, + Help: "Cumulative number of pod evictions by eviction signal", + StabilityLevel: metrics.ALPHA, + }, + []string{"eviction_signal"}, + ) + // EvictionStatsAge is a Histogram that tracks the time (in seconds) between when stats are collected and when a pod is evicted + // based on those stats. Broken down by eviction signal. + EvictionStatsAge = metrics.NewHistogramVec( + &metrics.HistogramOpts{ + Subsystem: KubeletSubsystem, + Name: EvictionStatsAgeKey, + Help: "Time between when stats are collected, and when pod is evicted based on those stats by eviction signal", + Buckets: metrics.DefBuckets, + StabilityLevel: metrics.ALPHA, + }, + []string{"eviction_signal"}, + ) + // Preemptions is a Counter that tracks the cumulative number of pod preemptions initiated by the kubelet. + // Broken down by preemption signal. A preemption is only recorded for one resource, the sum of all signals + // is the number of preemptions on the given node. + Preemptions = metrics.NewCounterVec( + &metrics.CounterOpts{ + Subsystem: KubeletSubsystem, + Name: PreemptionsKey, + Help: "Cumulative number of pod preemptions by preemption resource", + StabilityLevel: metrics.ALPHA, + }, + []string{"preemption_signal"}, + ) + // DevicePluginRegistrationCount is a Counter that tracks the cumulative number of device plugin registrations. + // Broken down by resource name. + DevicePluginRegistrationCount = metrics.NewCounterVec( + &metrics.CounterOpts{ + Subsystem: KubeletSubsystem, + Name: DevicePluginRegistrationCountKey, + Help: "Cumulative number of device plugin registrations. Broken down by resource name.", + StabilityLevel: metrics.ALPHA, + }, + []string{"resource_name"}, + ) + // DevicePluginAllocationDuration is a Histogram that tracks the duration (in seconds) to serve a device plugin allocation request. + // Broken down by resource name. + DevicePluginAllocationDuration = metrics.NewHistogramVec( + &metrics.HistogramOpts{ + Subsystem: KubeletSubsystem, + Name: DevicePluginAllocationDurationKey, + Help: "Duration in seconds to serve a device plugin Allocation request. Broken down by resource name.", + Buckets: metrics.DefBuckets, + StabilityLevel: metrics.ALPHA, + }, + []string{"resource_name"}, + ) + + // PodResourcesEndpointRequestsTotalCount is a Counter that tracks the cumulative number of requests to the PodResource endpoints. + // Broken down by server API version. + PodResourcesEndpointRequestsTotalCount = metrics.NewCounterVec( + &metrics.CounterOpts{ + Subsystem: KubeletSubsystem, + Name: PodResourcesEndpointRequestsTotalKey, + Help: "Cumulative number of requests to the PodResource endpoint. Broken down by server api version.", + StabilityLevel: metrics.ALPHA, + }, + []string{"server_api_version"}, + ) + + // PodResourcesEndpointRequestsListCount is a Counter that tracks the number of requests to the PodResource List() endpoint. + // Broken down by server API version. + PodResourcesEndpointRequestsListCount = metrics.NewCounterVec( + &metrics.CounterOpts{ + Subsystem: KubeletSubsystem, + Name: PodResourcesEndpointRequestsListKey, + Help: "Number of requests to the PodResource List endpoint. Broken down by server api version.", + StabilityLevel: metrics.ALPHA, + }, + []string{"server_api_version"}, + ) + + // PodResourcesEndpointRequestsGetAllocatableCount is a Counter that tracks the number of requests to the PodResource GetAllocatableResources() endpoint. + // Broken down by server API version. + PodResourcesEndpointRequestsGetAllocatableCount = metrics.NewCounterVec( + &metrics.CounterOpts{ + Subsystem: KubeletSubsystem, + Name: PodResourcesEndpointRequestsGetAllocatableKey, + Help: "Number of requests to the PodResource GetAllocatableResources endpoint. Broken down by server api version.", + StabilityLevel: metrics.ALPHA, + }, + []string{"server_api_version"}, + ) + + // PodResourcesEndpointErrorsListCount is a Counter that tracks the number of errors returned by he PodResource List() endpoint. + // Broken down by server API version. + PodResourcesEndpointErrorsListCount = metrics.NewCounterVec( + &metrics.CounterOpts{ + Subsystem: KubeletSubsystem, + Name: PodResourcesEndpointErrorsListKey, + Help: "Number of requests to the PodResource List endpoint which returned error. Broken down by server api version.", + StabilityLevel: metrics.ALPHA, + }, + []string{"server_api_version"}, + ) + + // PodResourcesEndpointErrorsGetAllocatableCount is a Counter that tracks the number of errors returned by the PodResource GetAllocatableResources() endpoint. + // Broken down by server API version. + PodResourcesEndpointErrorsGetAllocatableCount = metrics.NewCounterVec( + &metrics.CounterOpts{ + Subsystem: KubeletSubsystem, + Name: PodResourcesEndpointErrorsGetAllocatableKey, + Help: "Number of requests to the PodResource GetAllocatableResources endpoint which returned error. Broken down by server api version.", + StabilityLevel: metrics.ALPHA, + }, + []string{"server_api_version"}, + ) + + // Metrics for node config + + // AssignedConfig is a Gauge that is set 1 if the Kubelet has a NodeConfig assigned. + AssignedConfig = metrics.NewGaugeVec( + &metrics.GaugeOpts{ + Subsystem: KubeletSubsystem, + Name: AssignedConfigKey, + Help: "The node's understanding of intended config. The count is always 1.", + StabilityLevel: metrics.ALPHA, + }, + []string{ConfigSourceLabelKey, ConfigUIDLabelKey, ConfigResourceVersionLabelKey, KubeletConfigKeyLabelKey}, + ) + // ActiveConfig is a Gauge that is set to 1 if the Kubelet has an active NodeConfig. + ActiveConfig = metrics.NewGaugeVec( + &metrics.GaugeOpts{ + Subsystem: KubeletSubsystem, + Name: ActiveConfigKey, + Help: "The config source the node is actively using. The count is always 1.", + StabilityLevel: metrics.ALPHA, + }, + []string{ConfigSourceLabelKey, ConfigUIDLabelKey, ConfigResourceVersionLabelKey, KubeletConfigKeyLabelKey}, + ) + // LastKnownGoodConfig is a Gauge that is set to 1 if the Kubelet has a NodeConfig it can fall back to if there + // are certain errors. + LastKnownGoodConfig = metrics.NewGaugeVec( + &metrics.GaugeOpts{ + Subsystem: KubeletSubsystem, + Name: LastKnownGoodConfigKey, + Help: "The config source the node will fall back to when it encounters certain errors. The count is always 1.", + StabilityLevel: metrics.ALPHA, + }, + []string{ConfigSourceLabelKey, ConfigUIDLabelKey, ConfigResourceVersionLabelKey, KubeletConfigKeyLabelKey}, + ) + // ConfigError is a Gauge that is set to 1 if the node is experiencing a configuration-related error. + ConfigError = metrics.NewGauge( + &metrics.GaugeOpts{ + Subsystem: KubeletSubsystem, + Name: ConfigErrorKey, + Help: "This metric is true (1) if the node is experiencing a configuration-related error, false (0) otherwise.", + StabilityLevel: metrics.ALPHA, + }, + ) + // RunPodSandboxDuration is a Histogram that tracks the duration (in seconds) it takes to run Pod Sandbox operations. + // Broken down by RuntimeClass.Handler. + RunPodSandboxDuration = metrics.NewHistogramVec( + &metrics.HistogramOpts{ + Subsystem: KubeletSubsystem, + Name: RunPodSandboxDurationKey, + Help: "Duration in seconds of the run_podsandbox operations. Broken down by RuntimeClass.Handler.", + // Use DefBuckets for now, will customize the buckets if necessary. + Buckets: metrics.DefBuckets, + StabilityLevel: metrics.ALPHA, + }, + []string{"runtime_handler"}, + ) + // RunPodSandboxErrors is a Counter that tracks the cumulative number of Pod Sandbox operations errors. + // Broken down by RuntimeClass.Handler. + RunPodSandboxErrors = metrics.NewCounterVec( + &metrics.CounterOpts{ + Subsystem: KubeletSubsystem, + Name: RunPodSandboxErrorsKey, + Help: "Cumulative number of the run_podsandbox operation errors by RuntimeClass.Handler.", + StabilityLevel: metrics.ALPHA, + }, + []string{"runtime_handler"}, + ) + + // RunningPodCount is a gauge that tracks the number of Pods currently with a running sandbox + // It is used to expose the kubelet internal state: how many pods have running containers in the container runtime, and mainly for debugging purpose. + RunningPodCount = metrics.NewGauge( + &metrics.GaugeOpts{ + Subsystem: KubeletSubsystem, + Name: RunningPodsKey, + Help: "Number of pods that have a running pod sandbox", + StabilityLevel: metrics.ALPHA, + }, + ) + // RunningContainerCount is a gauge that tracks the number of containers currently running + RunningContainerCount = metrics.NewGaugeVec( + &metrics.GaugeOpts{ + Subsystem: KubeletSubsystem, + Name: RunningContainersKey, + Help: "Number of containers currently running", + StabilityLevel: metrics.ALPHA, + }, + []string{"container_state"}, + ) +) + +var registerMetrics sync.Once + +// Register registers all metrics. +func Register(collectors ...metrics.StableCollector) { + // Register the metrics. + registerMetrics.Do(func() { + legacyregistry.MustRegister(NodeName) + legacyregistry.MustRegister(PodWorkerDuration) + legacyregistry.MustRegister(PodStartDuration) + legacyregistry.MustRegister(CgroupManagerDuration) + legacyregistry.MustRegister(PodWorkerStartDuration) + legacyregistry.MustRegister(ContainersPerPodCount) + legacyregistry.MustRegister(PLEGRelistDuration) + legacyregistry.MustRegister(PLEGDiscardEvents) + legacyregistry.MustRegister(PLEGRelistInterval) + legacyregistry.MustRegister(PLEGLastSeen) + legacyregistry.MustRegister(RuntimeOperations) + legacyregistry.MustRegister(RuntimeOperationsDuration) + legacyregistry.MustRegister(RuntimeOperationsErrors) + legacyregistry.MustRegister(Evictions) + legacyregistry.MustRegister(EvictionStatsAge) + legacyregistry.MustRegister(Preemptions) + legacyregistry.MustRegister(DevicePluginRegistrationCount) + legacyregistry.MustRegister(DevicePluginAllocationDuration) + legacyregistry.MustRegister(RunningContainerCount) + legacyregistry.MustRegister(RunningPodCount) + legacyregistry.MustRegister(RunPodSandboxDuration) + legacyregistry.MustRegister(RunPodSandboxErrors) + if utilfeature.DefaultFeatureGate.Enabled(features.DynamicKubeletConfig) { + legacyregistry.MustRegister(AssignedConfig) + legacyregistry.MustRegister(ActiveConfig) + legacyregistry.MustRegister(LastKnownGoodConfig) + legacyregistry.MustRegister(ConfigError) + } + for _, collector := range collectors { + legacyregistry.CustomMustRegister(collector) + } + }) +} + +// GetGather returns the gatherer. It used by test case outside current package. +func GetGather() metrics.Gatherer { + return legacyregistry.DefaultGatherer +} + +// SinceInSeconds gets the time since the specified start in seconds. +func SinceInSeconds(start time.Time) float64 { + return time.Since(start).Seconds() +} + +const configMapAPIPathFmt = "/api/v1/namespaces/%s/configmaps/%s" + +func configLabels(source *corev1.NodeConfigSource) (map[string]string, error) { + if source == nil { + return map[string]string{ + // prometheus requires all of the labels that can be set on the metric + ConfigSourceLabelKey: "local", + ConfigUIDLabelKey: "", + ConfigResourceVersionLabelKey: "", + KubeletConfigKeyLabelKey: "", + }, nil + } + if source.ConfigMap != nil { + return map[string]string{ + ConfigSourceLabelKey: fmt.Sprintf(configMapAPIPathFmt, source.ConfigMap.Namespace, source.ConfigMap.Name), + ConfigUIDLabelKey: string(source.ConfigMap.UID), + ConfigResourceVersionLabelKey: source.ConfigMap.ResourceVersion, + KubeletConfigKeyLabelKey: source.ConfigMap.KubeletConfigKey, + }, nil + } + return nil, fmt.Errorf("unrecognized config source type, all source subfields were nil") +} + +// track labels across metric updates, so we can delete old label sets and prevent leaks +var assignedConfigLabels map[string]string + +// SetAssignedConfig tracks labels according to the assigned NodeConfig. It also tracks labels +// across metric updates so old labels can be safely deleted. +func SetAssignedConfig(source *corev1.NodeConfigSource) error { + // compute the timeseries labels from the source + labels, err := configLabels(source) + if err != nil { + return err + } + // clean up the old timeseries (WithLabelValues creates a new one for each distinct label set) + if !AssignedConfig.Delete(assignedConfigLabels) { + klog.InfoS("Failed to delete metric for labels. This may result in ambiguity from multiple metrics concurrently indicating different assigned configs.", "labels", assignedConfigLabels) + } + // record the new timeseries + assignedConfigLabels = labels + // expose the new timeseries with a constant count of 1 + AssignedConfig.With(assignedConfigLabels).Set(1) + return nil +} + +// track labels across metric updates, so we can delete old label sets and prevent leaks +var activeConfigLabels map[string]string + +// SetActiveConfig tracks labels according to the NodeConfig that is currently used by the Kubelet. +// It also tracks labels across metric updates so old labels can be safely deleted. +func SetActiveConfig(source *corev1.NodeConfigSource) error { + // compute the timeseries labels from the source + labels, err := configLabels(source) + if err != nil { + return err + } + // clean up the old timeseries (WithLabelValues creates a new one for each distinct label set) + if !ActiveConfig.Delete(activeConfigLabels) { + klog.InfoS("Failed to delete metric for labels. This may result in ambiguity from multiple metrics concurrently indicating different active configs.", "labels", activeConfigLabels) + } + // record the new timeseries + activeConfigLabels = labels + // expose the new timeseries with a constant count of 1 + ActiveConfig.With(activeConfigLabels).Set(1) + return nil +} + +// track labels across metric updates, so we can delete old label sets and prevent leaks +var lastKnownGoodConfigLabels map[string]string + +// SetLastKnownGoodConfig tracks labels according to the NodeConfig that was successfully applied last. +// It also tracks labels across metric updates so old labels can be safely deleted. +func SetLastKnownGoodConfig(source *corev1.NodeConfigSource) error { + // compute the timeseries labels from the source + labels, err := configLabels(source) + if err != nil { + return err + } + // clean up the old timeseries (WithLabelValues creates a new one for each distinct label set) + if !LastKnownGoodConfig.Delete(lastKnownGoodConfigLabels) { + klog.InfoS("Failed to delete metric for labels. This may result in ambiguity from multiple metrics concurrently indicating different last known good configs.", "labels", lastKnownGoodConfigLabels) + } + // record the new timeseries + lastKnownGoodConfigLabels = labels + // expose the new timeseries with a constant count of 1 + LastKnownGoodConfig.With(lastKnownGoodConfigLabels).Set(1) + return nil +} + +// SetConfigError sets a the ConfigError metric to 1 in case any errors were encountered. +func SetConfigError(err bool) { + if err { + ConfigError.Set(1) + } else { + ConfigError.Set(0) + } +} + +// SetNodeName sets the NodeName Gauge to 1. +func SetNodeName(name types.NodeName) { + NodeName.WithLabelValues(string(name)).Set(1) +} diff --git a/test/instrumentation/testdata/staging/src/k8s.io/metrics/metrics.go b/test/instrumentation/testdata/staging/src/k8s.io/metrics/metrics.go new file mode 100644 index 00000000000..e6188090539 --- /dev/null +++ b/test/instrumentation/testdata/staging/src/k8s.io/metrics/metrics.go @@ -0,0 +1,19 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +const OKGO = "ThisIsNotTheSoundOfTheTrain"