Merge pull request #103654 from coffeepac/sig-inst/metric-static-analysis/remote-package

seems to work, needs tests and a lot of cleanup
This commit is contained in:
Kubernetes Prow Robot 2021-08-11 08:06:46 -07:00 committed by GitHub
commit a87838481b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 894 additions and 15 deletions

View File

@ -182,6 +182,25 @@ func (c *metricDecoder) decodeOpts(expr ast.Expr) (metric, error) {
if err != nil {
return m, err
}
case *ast.SelectorExpr:
s, ok := v.X.(*ast.Ident)
if !ok {
return m, newDecodeErrorf(expr, errExprNotIdent, v.X)
}
variableExpr, found := c.variables[strings.Join([]string{s.Name, v.Sel.Name}, ".")]
if !found {
return m, newDecodeErrorf(expr, errBadImportedVariableAttribute)
}
bl, ok := variableExpr.(*ast.BasicLit)
if !ok {
return m, newDecodeErrorf(expr, errNonStringAttribute)
}
value, err = stringValue(bl)
if err != nil {
return m, err
}
default:
return m, newDecodeErrorf(expr, errNonStringAttribute)
}

View File

@ -23,17 +23,19 @@ import (
)
const (
errNotDirectCall = "Opts for STABLE metric was not directly passed to new metric function"
errPositionalArguments = "Positional arguments are not supported"
errStabilityLevel = "StabilityLevel should be passed STABLE, ALPHA or removed"
errStableSummary = "Stable summary metric is not supported"
errInvalidNewMetricCall = "Invalid new metric call, please ensure code compiles"
errNonStringAttribute = "Non string attribute it not supported"
errBadVariableAttribute = "Metric attribute was not correctly set. Please use only global consts in same file"
errFieldNotSupported = "Field %s is not supported"
errBuckets = "Buckets should be set to list of floats, result from function call of prometheus.LinearBuckets or prometheus.ExponentialBuckets"
errLabels = "Labels were not set to list of strings"
errImport = `Importing using "." is not supported`
errNotDirectCall = "Opts for STABLE metric was not directly passed to new metric function"
errPositionalArguments = "Positional arguments are not supported"
errStabilityLevel = "StabilityLevel should be passed STABLE, ALPHA or removed"
errStableSummary = "Stable summary metric is not supported"
errInvalidNewMetricCall = "Invalid new metric call, please ensure code compiles"
errNonStringAttribute = "Non string attribute is not supported"
errBadVariableAttribute = "Metric attribute was not correctly set. Please use only global consts in same file"
errBadImportedVariableAttribute = "Metric attribute was not correctly set. Please use only global consts in correctly impoprted same file"
errFieldNotSupported = "Field %s is not supported"
errBuckets = "Buckets should be set to list of floats, result from function call of prometheus.LinearBuckets or prometheus.ExponentialBuckets"
errLabels = "Labels were not set to list of strings"
errImport = `Importing using "." is not supported`
errExprNotIdent = "expr selector does not refer to type ast.Ident, is type %s"
)
type decodeError struct {

View File

@ -23,6 +23,7 @@ import (
"go/ast"
"go/parser"
"go/token"
"io/ioutil"
"os"
"path/filepath"
"sort"
@ -35,6 +36,15 @@ const (
kubeMetricImportPath = `"k8s.io/component-base/metrics"`
// Should equal to final directory name of kubeMetricImportPath
kubeMetricsDefaultImportName = "metrics"
kubeURLRoot = "k8s.io/kubernetes/"
)
var (
// env configs
GOROOT string = os.Getenv("GOROOT")
GOOS string = os.Getenv("GOOS")
KUBE_ROOT string = os.Getenv("KUBE_ROOT")
)
func main() {
@ -124,6 +134,11 @@ func searchFileForStableMetrics(filename string, src interface{}) ([]metric, []e
}
variables := globalVariableDeclarations(tree)
variables, err = importedGlobalVariableDeclaration(variables, tree.Imports)
if err != nil {
return []metric{}, addFileInformationToErrors([]error{err}, fileset)
}
stableMetricsFunctionCalls, errors := findStableMetricDeclaration(tree, metricsImportName)
metrics, es := decodeMetricCalls(stableMetricsFunctionCalls, metricsImportName, variables)
errors = append(errors, es...)
@ -173,3 +188,107 @@ func globalVariableDeclarations(tree *ast.File) map[string]ast.Expr {
}
return consts
}
func localImportPath(importExpr string) (string, error) {
// parse directory path
var pathPrefix string
if strings.Contains(importExpr, kubeURLRoot) {
// search k/k local checkout
pathPrefix = KUBE_ROOT
importExpr = strings.Replace(importExpr, kubeURLRoot, "", 1)
} else if strings.Contains(importExpr, "k8s.io/klog/v2") || strings.Contains(importExpr, "k8s.io/util") {
pathPrefix = strings.Join([]string{KUBE_ROOT, "vendor"}, string(os.PathSeparator))
} else if strings.Contains(importExpr, "k8s.io/") {
// search k/k/staging local checkout
pathPrefix = strings.Join([]string{KUBE_ROOT, "staging", "src"}, string(os.PathSeparator))
} else if strings.Contains(importExpr, ".") {
// not stdlib -> prefix with GOMODCACHE
// pathPrefix = strings.Join([]string{KUBE_ROOT, "vendor"}, string(os.PathSeparator))
// this requires implementing SIV, skip for now
return "", fmt.Errorf("unable to handle general, non STL imports for metric analysis. import path: %s", importExpr)
} else {
// stdlib -> prefix with GOROOT
pathPrefix = strings.Join([]string{GOROOT, "src"}, string(os.PathSeparator))
} // ToDo: support non go mod
crossPlatformImportExpr := strings.Replace(importExpr, "/", string(os.PathSeparator), -1)
importDirectory := strings.Join([]string{pathPrefix, strings.Trim(crossPlatformImportExpr, "\"")}, string(os.PathSeparator))
return importDirectory, nil
}
func importedGlobalVariableDeclaration(localVariables map[string]ast.Expr, imports []*ast.ImportSpec) (map[string]ast.Expr, error) {
for _, im := range imports {
// get imported label
var importAlias string
if im.Name == nil {
pathSegments := strings.Split(im.Path.Value, "/")
importAlias = strings.Trim(pathSegments[len(pathSegments)-1], "\"")
} else {
importAlias = im.Name.String()
}
// find local path on disk for listed import
importDirectory, err := localImportPath(im.Path.Value)
if err != nil {
// uncomment the below log line if you want to start using non k8s/non stl libs for resolving const/var in metric definitions
// fmt.Fprint(os.Stderr, err.Error() + "\n")
continue
}
files, err := ioutil.ReadDir(importDirectory)
if err != nil {
fmt.Fprintf(os.Stderr, "failed to read import path directory %s with error %s, skipping\n", importDirectory, err)
continue
}
for _, file := range files {
if file.IsDir() {
// do not grab constants from subpackages
continue
}
if strings.Contains(file.Name(), "_test") {
// do not parse test files
continue
}
if !strings.HasSuffix(file.Name(), ".go") {
// not a go code file, do not attempt to parse
continue
}
fileset := token.NewFileSet()
tree, err := parser.ParseFile(fileset, strings.Join([]string{importDirectory, file.Name()}, string(os.PathSeparator)), nil, parser.AllErrors)
if err != nil {
return nil, fmt.Errorf("failed to parse path %s with error %w", im.Path.Value, err)
}
// pass parsed filepath into globalVariableDeclarations
variables := globalVariableDeclarations(tree)
// add returned map into supplied map and prepend import label to all keys
for k, v := range variables {
importK := strings.Join([]string{importAlias, k}, ".")
if _, ok := localVariables[importK]; !ok {
localVariables[importK] = v
} else {
// cross-platform file that gets included in the correct OS build via OS build tags
// use whatever matches GOOS
if strings.Contains(file.Name(), GOOS) {
// assume at some point we will find the correct OS version of this file
// if we are running on an OS that does not have an OS specific file for something then we will include a constant we shouldn't
// TODO: should we include/exclude based on the build tags?
localVariables[importK] = v
}
}
}
}
}
return localVariables, nil
}

View File

@ -18,7 +18,9 @@ package main
import (
"fmt"
"os"
"reflect"
"strings"
"testing"
"k8s.io/component-base/metrics"
@ -117,10 +119,16 @@ var _ = NewCounter(
}
func TestStableMetric(t *testing.T) {
wd, err := os.Getwd()
if err != nil {
t.Fatalf("unable to fetch path to testing package - needed for simulating import path tests")
}
for _, test := range []struct {
testName string
src string
metric metric
kubeRoot string
}{
{
testName: "Counter",
@ -434,9 +442,80 @@ var _ = metrics.NewHistogram(
Buckets: metrics.DefBuckets,
},
)
`},
{
testName: "Imported stdlib constant",
metric: metric{
Name: "importedCounter",
StabilityLevel: "STABLE",
Subsystem: "GET",
Type: counterMetricType,
},
src: `
package test
import "k8s.io/component-base/metrics"
import "net/http"
var _ = metrics.NewCounter(
&metrics.CounterOpts{
Name: "importedCounter",
StabilityLevel: metrics.STABLE,
Subsystem: http.MethodGet,
},
)
`},
{
testName: "Imported k8s.io constant",
metric: metric{
Name: "importedCounter",
StabilityLevel: "STABLE",
Subsystem: "kubelet",
Type: counterMetricType,
},
kubeRoot: strings.Join([]string{wd, "testdata"}, string(os.PathSeparator)),
src: `
package test
import compbasemetrics "k8s.io/component-base/metrics"
import "k8s.io/kubernetes/pkg/kubelet/metrics"
var _ = compbasemetrics.NewCounter(
&compbasemetrics.CounterOpts{
Name: "importedCounter",
StabilityLevel: compbasemetrics.STABLE,
Subsystem: metrics.KubeletSubsystem,
},
)
`},
{
testName: "Imported k8s.io/staging constant",
metric: metric{
Name: "importedCounter",
StabilityLevel: "STABLE",
Subsystem: "ThisIsNotTheSoundOfTheTrain",
Type: counterMetricType,
},
kubeRoot: strings.Join([]string{wd, "testdata"}, string(os.PathSeparator)),
src: `
package test
import compbasemetrics "k8s.io/component-base/metrics"
import "k8s.io/metrics"
var _ = compbasemetrics.NewCounter(
&compbasemetrics.CounterOpts{
Name: "importedCounter",
StabilityLevel: compbasemetrics.STABLE,
Subsystem: metrics.OKGO,
},
)
`},
} {
t.Run(test.testName, func(t *testing.T) {
// these sub-tests cannot be run in parallel with the below
if test.kubeRoot != "" {
priorKRoot := KUBE_ROOT
KUBE_ROOT = test.kubeRoot
defer func() {
KUBE_ROOT = priorKRoot
}()
}
metrics, errors := searchFileForStableMetrics(fakeFilename, test.src)
if len(errors) != 0 {
t.Errorf("Unexpected errors: %s", errors)
@ -487,7 +566,7 @@ var _ = metrics.NewCounter(
`},
{
testName: "Fail on stable metric with attribute set to local function return",
err: fmt.Errorf("testdata/metric.go:9:4: Non string attribute it not supported"),
err: fmt.Errorf("testdata/metric.go:9:4: Non string attribute is not supported"),
src: `
package test
import "k8s.io/component-base/metrics"
@ -503,7 +582,7 @@ var _ = metrics.NewCounter(
`},
{
testName: "Fail on stable metric with attribute set to imported function return",
err: fmt.Errorf("testdata/metric.go:7:4: Non string attribute it not supported"),
err: fmt.Errorf("testdata/metric.go:7:4: Non string attribute is not supported"),
src: `
package test
import "k8s.io/component-base/metrics"
@ -685,3 +764,56 @@ var _ = metrics.NewHistogram(
})
}
}
func Test_localImportPath(t *testing.T) {
KUBE_ROOT = "/home/pchristopher/go/src/k8s.io/kubernetes"
GOROOT := os.Getenv("GOROOT")
for _, test := range []struct {
name string
importExpr string
expectedPath string
errorExp bool
}{
{
name: "k8s local package",
importExpr: "k8s.io/kubernetes/pkg/kubelet/metrics",
expectedPath: strings.Join([]string{KUBE_ROOT, "pkg", "kubelet", "metrics"}, string(os.PathSeparator)),
errorExp: false,
},
{
name: "k8s staging package",
importExpr: "k8s.io/kubelet/metrics",
expectedPath: strings.Join([]string{KUBE_ROOT, "staging", "src", "k8s.io", "kubelet", "metrics"}, string(os.PathSeparator)),
errorExp: false,
},
{
name: "public package",
importExpr: "github.com/thisisnot/thesoundofthetrain",
errorExp: true,
},
{
name: "stl package",
importExpr: "os",
expectedPath: strings.Join([]string{GOROOT, "src", "os"}, string(os.PathSeparator)),
errorExp: false,
},
} {
t.Run(test.name, func(t *testing.T) {
path, err := localImportPath(test.importExpr)
if test.errorExp {
if err == nil {
t.Error("did not receive error as expected")
}
} else {
if err != nil {
t.Errorf("received unexpected error %s", err)
}
}
if path != test.expectedPath {
t.Errorf("did not received expected path. \nwant: %s \ngot: %s", test.expectedPath, path)
}
})
}
}

View File

@ -58,7 +58,7 @@ reset=$(tput sgr0)
kube::validate::stablemetrics() {
stability_check_setup
temp_file=$(mktemp)
doValidate=$(find_files_to_check | grep -E ".*.go" | grep -v ".*_test.go" | sort | xargs -L 200 go run "test/instrumentation/main.go" "test/instrumentation/decode_metric.go" "test/instrumentation/find_stable_metric.go" "test/instrumentation/error.go" "test/instrumentation/metric.go" -- 1>"${temp_file}")
doValidate=$(find_files_to_check | grep -E ".*.go" | grep -v ".*_test.go" | sort | KUBE_ROOT=${KUBE_ROOT} xargs -L 200 go run "test/instrumentation/main.go" "test/instrumentation/decode_metric.go" "test/instrumentation/find_stable_metric.go" "test/instrumentation/error.go" "test/instrumentation/metric.go" -- 1>"${temp_file}")
if $doValidate; then
echo -e "${green}Diffing test/instrumentation/testdata/stable-metrics-list.yaml\n${reset}"
@ -76,7 +76,7 @@ kube::validate::stablemetrics() {
kube::update::stablemetrics() {
stability_check_setup
temp_file=$(mktemp)
doCheckStability=$(find_files_to_check | grep -E ".*.go" | grep -v ".*_test.go" | sort | xargs -L 200 go run "test/instrumentation/main.go" "test/instrumentation/decode_metric.go" "test/instrumentation/find_stable_metric.go" "test/instrumentation/error.go" "test/instrumentation/metric.go" -- 1>"${temp_file}")
doCheckStability=$(find_files_to_check | grep -E ".*.go" | grep -v ".*_test.go" | sort | KUBE_ROOT=${KUBE_ROOT} xargs -L 200 go run "test/instrumentation/main.go" "test/instrumentation/decode_metric.go" "test/instrumentation/find_stable_metric.go" "test/instrumentation/error.go" "test/instrumentation/metric.go" -- 1>"${temp_file}")
if ! $doCheckStability; then
echo "${red}!!! updating golden list of metrics has failed! ${reset}" >&2

View File

@ -0,0 +1,588 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metrics
import (
"fmt"
"sync"
"time"
"k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/legacyregistry"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/klog/v2"
"k8s.io/kubernetes/pkg/features"
)
// This const block defines the metric names for the kubelet metrics.
const (
KubeletSubsystem = "kubelet"
NodeNameKey = "node_name"
NodeLabelKey = "node"
PodWorkerDurationKey = "pod_worker_duration_seconds"
PodStartDurationKey = "pod_start_duration_seconds"
CgroupManagerOperationsKey = "cgroup_manager_duration_seconds"
PodWorkerStartDurationKey = "pod_worker_start_duration_seconds"
PLEGRelistDurationKey = "pleg_relist_duration_seconds"
PLEGDiscardEventsKey = "pleg_discard_events"
PLEGRelistIntervalKey = "pleg_relist_interval_seconds"
PLEGLastSeenKey = "pleg_last_seen_seconds"
EvictionsKey = "evictions"
EvictionStatsAgeKey = "eviction_stats_age_seconds"
PreemptionsKey = "preemptions"
VolumeStatsCapacityBytesKey = "volume_stats_capacity_bytes"
VolumeStatsAvailableBytesKey = "volume_stats_available_bytes"
VolumeStatsUsedBytesKey = "volume_stats_used_bytes"
VolumeStatsInodesKey = "volume_stats_inodes"
VolumeStatsInodesFreeKey = "volume_stats_inodes_free"
VolumeStatsInodesUsedKey = "volume_stats_inodes_used"
RunningPodsKey = "running_pods"
RunningContainersKey = "running_containers"
// Metrics keys of remote runtime operations
RuntimeOperationsKey = "runtime_operations_total"
RuntimeOperationsDurationKey = "runtime_operations_duration_seconds"
RuntimeOperationsErrorsKey = "runtime_operations_errors_total"
// Metrics keys of device plugin operations
DevicePluginRegistrationCountKey = "device_plugin_registration_total"
DevicePluginAllocationDurationKey = "device_plugin_alloc_duration_seconds"
// Metrics keys of pod resources operations
PodResourcesEndpointRequestsTotalKey = "pod_resources_endpoint_requests_total"
PodResourcesEndpointRequestsListKey = "pod_resources_endpoint_requests_list"
PodResourcesEndpointRequestsGetAllocatableKey = "pod_resources_endpoint_requests_get_allocatable"
PodResourcesEndpointErrorsListKey = "pod_resources_endpoint_errors_list"
PodResourcesEndpointErrorsGetAllocatableKey = "pod_resources_endpoint_errors_get_allocatable"
// Metric keys for node config
AssignedConfigKey = "node_config_assigned"
ActiveConfigKey = "node_config_active"
LastKnownGoodConfigKey = "node_config_last_known_good"
ConfigErrorKey = "node_config_error"
ConfigSourceLabelKey = "node_config_source"
ConfigSourceLabelValueLocal = "local"
ConfigUIDLabelKey = "node_config_uid"
ConfigResourceVersionLabelKey = "node_config_resource_version"
KubeletConfigKeyLabelKey = "node_config_kubelet_key"
// Metrics keys for RuntimeClass
RunPodSandboxDurationKey = "run_podsandbox_duration_seconds"
RunPodSandboxErrorsKey = "run_podsandbox_errors_total"
)
var (
// NodeName is a Gauge that tracks the ode's name. The count is always 1.
NodeName = metrics.NewGaugeVec(
&metrics.GaugeOpts{
Subsystem: KubeletSubsystem,
Name: NodeNameKey,
Help: "The node's name. The count is always 1.",
StabilityLevel: metrics.ALPHA,
},
[]string{NodeLabelKey},
)
// ContainersPerPodCount is a Histogram that tracks the number of containers per pod.
ContainersPerPodCount = metrics.NewHistogram(
&metrics.HistogramOpts{
Subsystem: KubeletSubsystem,
Name: "containers_per_pod_count",
Help: "The number of containers per pod.",
Buckets: metrics.ExponentialBuckets(1, 2, 5),
StabilityLevel: metrics.ALPHA,
},
)
// PodWorkerDuration is a Histogram that tracks the duration (in seconds) in takes to sync a single pod.
// Broken down by the operation type.
PodWorkerDuration = metrics.NewHistogramVec(
&metrics.HistogramOpts{
Subsystem: KubeletSubsystem,
Name: PodWorkerDurationKey,
Help: "Duration in seconds to sync a single pod. Broken down by operation type: create, update, or sync",
Buckets: metrics.DefBuckets,
StabilityLevel: metrics.ALPHA,
},
[]string{"operation_type"},
)
// PodStartDuration is a Histogram that tracks the duration (in seconds) it takes for a single pod to go from pending to running.
PodStartDuration = metrics.NewHistogram(
&metrics.HistogramOpts{
Subsystem: KubeletSubsystem,
Name: PodStartDurationKey,
Help: "Duration in seconds for a single pod to go from pending to running.",
Buckets: metrics.DefBuckets,
StabilityLevel: metrics.ALPHA,
},
)
// CgroupManagerDuration is a Histogram that tracks the duration (in seconds) it takes for cgroup manager operations to complete.
// Broken down by method.
CgroupManagerDuration = metrics.NewHistogramVec(
&metrics.HistogramOpts{
Subsystem: KubeletSubsystem,
Name: CgroupManagerOperationsKey,
Help: "Duration in seconds for cgroup manager operations. Broken down by method.",
Buckets: metrics.DefBuckets,
StabilityLevel: metrics.ALPHA,
},
[]string{"operation_type"},
)
// PodWorkerStartDuration is a Histogram that tracks the duration (in seconds) it takes from seeing a pod to starting a worker.
PodWorkerStartDuration = metrics.NewHistogram(
&metrics.HistogramOpts{
Subsystem: KubeletSubsystem,
Name: PodWorkerStartDurationKey,
Help: "Duration in seconds from seeing a pod to starting a worker.",
Buckets: metrics.DefBuckets,
StabilityLevel: metrics.ALPHA,
},
)
// PLEGRelistDuration is a Histogram that tracks the duration (in seconds) it takes for relisting pods in the Kubelet's
// Pod Lifecycle Event Generator (PLEG).
PLEGRelistDuration = metrics.NewHistogram(
&metrics.HistogramOpts{
Subsystem: KubeletSubsystem,
Name: PLEGRelistDurationKey,
Help: "Duration in seconds for relisting pods in PLEG.",
Buckets: metrics.DefBuckets,
StabilityLevel: metrics.ALPHA,
},
)
// PLEGDiscardEvents is a Counter that tracks the number of discarding events in the Kubelet's Pod Lifecycle Event Generator (PLEG).
PLEGDiscardEvents = metrics.NewCounter(
&metrics.CounterOpts{
Subsystem: KubeletSubsystem,
Name: PLEGDiscardEventsKey,
Help: "The number of discard events in PLEG.",
StabilityLevel: metrics.ALPHA,
},
)
// PLEGRelistInterval is a Histogram that tracks the intervals (in seconds) between relisting in the Kubelet's
// Pod Lifecycle Event Generator (PLEG).
PLEGRelistInterval = metrics.NewHistogram(
&metrics.HistogramOpts{
Subsystem: KubeletSubsystem,
Name: PLEGRelistIntervalKey,
Help: "Interval in seconds between relisting in PLEG.",
Buckets: metrics.DefBuckets,
StabilityLevel: metrics.ALPHA,
},
)
// PLEGLastSeen is a Gauge giving the Unix timestamp when the Kubelet's
// Pod Lifecycle Event Generator (PLEG) was last seen active.
PLEGLastSeen = metrics.NewGauge(
&metrics.GaugeOpts{
Subsystem: KubeletSubsystem,
Name: PLEGLastSeenKey,
Help: "Timestamp in seconds when PLEG was last seen active.",
StabilityLevel: metrics.ALPHA,
},
)
// RuntimeOperations is a Counter that tracks the cumulative number of remote runtime operations.
// Broken down by operation type.
RuntimeOperations = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: KubeletSubsystem,
Name: RuntimeOperationsKey,
Help: "Cumulative number of runtime operations by operation type.",
StabilityLevel: metrics.ALPHA,
},
[]string{"operation_type"},
)
// RuntimeOperationsDuration is a Histogram that tracks the duration (in seconds) for remote runtime operations to complete.
// Broken down by operation type.
RuntimeOperationsDuration = metrics.NewHistogramVec(
&metrics.HistogramOpts{
Subsystem: KubeletSubsystem,
Name: RuntimeOperationsDurationKey,
Help: "Duration in seconds of runtime operations. Broken down by operation type.",
Buckets: metrics.ExponentialBuckets(.005, 2.5, 14),
StabilityLevel: metrics.ALPHA,
},
[]string{"operation_type"},
)
// RuntimeOperationsErrors is a Counter that tracks the cumulative number of remote runtime operations errors.
// Broken down by operation type.
RuntimeOperationsErrors = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: KubeletSubsystem,
Name: RuntimeOperationsErrorsKey,
Help: "Cumulative number of runtime operation errors by operation type.",
StabilityLevel: metrics.ALPHA,
},
[]string{"operation_type"},
)
// Evictions is a Counter that tracks the cumulative number of pod evictions initiated by the kubelet.
// Broken down by eviction signal.
Evictions = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: KubeletSubsystem,
Name: EvictionsKey,
Help: "Cumulative number of pod evictions by eviction signal",
StabilityLevel: metrics.ALPHA,
},
[]string{"eviction_signal"},
)
// EvictionStatsAge is a Histogram that tracks the time (in seconds) between when stats are collected and when a pod is evicted
// based on those stats. Broken down by eviction signal.
EvictionStatsAge = metrics.NewHistogramVec(
&metrics.HistogramOpts{
Subsystem: KubeletSubsystem,
Name: EvictionStatsAgeKey,
Help: "Time between when stats are collected, and when pod is evicted based on those stats by eviction signal",
Buckets: metrics.DefBuckets,
StabilityLevel: metrics.ALPHA,
},
[]string{"eviction_signal"},
)
// Preemptions is a Counter that tracks the cumulative number of pod preemptions initiated by the kubelet.
// Broken down by preemption signal. A preemption is only recorded for one resource, the sum of all signals
// is the number of preemptions on the given node.
Preemptions = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: KubeletSubsystem,
Name: PreemptionsKey,
Help: "Cumulative number of pod preemptions by preemption resource",
StabilityLevel: metrics.ALPHA,
},
[]string{"preemption_signal"},
)
// DevicePluginRegistrationCount is a Counter that tracks the cumulative number of device plugin registrations.
// Broken down by resource name.
DevicePluginRegistrationCount = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: KubeletSubsystem,
Name: DevicePluginRegistrationCountKey,
Help: "Cumulative number of device plugin registrations. Broken down by resource name.",
StabilityLevel: metrics.ALPHA,
},
[]string{"resource_name"},
)
// DevicePluginAllocationDuration is a Histogram that tracks the duration (in seconds) to serve a device plugin allocation request.
// Broken down by resource name.
DevicePluginAllocationDuration = metrics.NewHistogramVec(
&metrics.HistogramOpts{
Subsystem: KubeletSubsystem,
Name: DevicePluginAllocationDurationKey,
Help: "Duration in seconds to serve a device plugin Allocation request. Broken down by resource name.",
Buckets: metrics.DefBuckets,
StabilityLevel: metrics.ALPHA,
},
[]string{"resource_name"},
)
// PodResourcesEndpointRequestsTotalCount is a Counter that tracks the cumulative number of requests to the PodResource endpoints.
// Broken down by server API version.
PodResourcesEndpointRequestsTotalCount = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: KubeletSubsystem,
Name: PodResourcesEndpointRequestsTotalKey,
Help: "Cumulative number of requests to the PodResource endpoint. Broken down by server api version.",
StabilityLevel: metrics.ALPHA,
},
[]string{"server_api_version"},
)
// PodResourcesEndpointRequestsListCount is a Counter that tracks the number of requests to the PodResource List() endpoint.
// Broken down by server API version.
PodResourcesEndpointRequestsListCount = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: KubeletSubsystem,
Name: PodResourcesEndpointRequestsListKey,
Help: "Number of requests to the PodResource List endpoint. Broken down by server api version.",
StabilityLevel: metrics.ALPHA,
},
[]string{"server_api_version"},
)
// PodResourcesEndpointRequestsGetAllocatableCount is a Counter that tracks the number of requests to the PodResource GetAllocatableResources() endpoint.
// Broken down by server API version.
PodResourcesEndpointRequestsGetAllocatableCount = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: KubeletSubsystem,
Name: PodResourcesEndpointRequestsGetAllocatableKey,
Help: "Number of requests to the PodResource GetAllocatableResources endpoint. Broken down by server api version.",
StabilityLevel: metrics.ALPHA,
},
[]string{"server_api_version"},
)
// PodResourcesEndpointErrorsListCount is a Counter that tracks the number of errors returned by he PodResource List() endpoint.
// Broken down by server API version.
PodResourcesEndpointErrorsListCount = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: KubeletSubsystem,
Name: PodResourcesEndpointErrorsListKey,
Help: "Number of requests to the PodResource List endpoint which returned error. Broken down by server api version.",
StabilityLevel: metrics.ALPHA,
},
[]string{"server_api_version"},
)
// PodResourcesEndpointErrorsGetAllocatableCount is a Counter that tracks the number of errors returned by the PodResource GetAllocatableResources() endpoint.
// Broken down by server API version.
PodResourcesEndpointErrorsGetAllocatableCount = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: KubeletSubsystem,
Name: PodResourcesEndpointErrorsGetAllocatableKey,
Help: "Number of requests to the PodResource GetAllocatableResources endpoint which returned error. Broken down by server api version.",
StabilityLevel: metrics.ALPHA,
},
[]string{"server_api_version"},
)
// Metrics for node config
// AssignedConfig is a Gauge that is set 1 if the Kubelet has a NodeConfig assigned.
AssignedConfig = metrics.NewGaugeVec(
&metrics.GaugeOpts{
Subsystem: KubeletSubsystem,
Name: AssignedConfigKey,
Help: "The node's understanding of intended config. The count is always 1.",
StabilityLevel: metrics.ALPHA,
},
[]string{ConfigSourceLabelKey, ConfigUIDLabelKey, ConfigResourceVersionLabelKey, KubeletConfigKeyLabelKey},
)
// ActiveConfig is a Gauge that is set to 1 if the Kubelet has an active NodeConfig.
ActiveConfig = metrics.NewGaugeVec(
&metrics.GaugeOpts{
Subsystem: KubeletSubsystem,
Name: ActiveConfigKey,
Help: "The config source the node is actively using. The count is always 1.",
StabilityLevel: metrics.ALPHA,
},
[]string{ConfigSourceLabelKey, ConfigUIDLabelKey, ConfigResourceVersionLabelKey, KubeletConfigKeyLabelKey},
)
// LastKnownGoodConfig is a Gauge that is set to 1 if the Kubelet has a NodeConfig it can fall back to if there
// are certain errors.
LastKnownGoodConfig = metrics.NewGaugeVec(
&metrics.GaugeOpts{
Subsystem: KubeletSubsystem,
Name: LastKnownGoodConfigKey,
Help: "The config source the node will fall back to when it encounters certain errors. The count is always 1.",
StabilityLevel: metrics.ALPHA,
},
[]string{ConfigSourceLabelKey, ConfigUIDLabelKey, ConfigResourceVersionLabelKey, KubeletConfigKeyLabelKey},
)
// ConfigError is a Gauge that is set to 1 if the node is experiencing a configuration-related error.
ConfigError = metrics.NewGauge(
&metrics.GaugeOpts{
Subsystem: KubeletSubsystem,
Name: ConfigErrorKey,
Help: "This metric is true (1) if the node is experiencing a configuration-related error, false (0) otherwise.",
StabilityLevel: metrics.ALPHA,
},
)
// RunPodSandboxDuration is a Histogram that tracks the duration (in seconds) it takes to run Pod Sandbox operations.
// Broken down by RuntimeClass.Handler.
RunPodSandboxDuration = metrics.NewHistogramVec(
&metrics.HistogramOpts{
Subsystem: KubeletSubsystem,
Name: RunPodSandboxDurationKey,
Help: "Duration in seconds of the run_podsandbox operations. Broken down by RuntimeClass.Handler.",
// Use DefBuckets for now, will customize the buckets if necessary.
Buckets: metrics.DefBuckets,
StabilityLevel: metrics.ALPHA,
},
[]string{"runtime_handler"},
)
// RunPodSandboxErrors is a Counter that tracks the cumulative number of Pod Sandbox operations errors.
// Broken down by RuntimeClass.Handler.
RunPodSandboxErrors = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: KubeletSubsystem,
Name: RunPodSandboxErrorsKey,
Help: "Cumulative number of the run_podsandbox operation errors by RuntimeClass.Handler.",
StabilityLevel: metrics.ALPHA,
},
[]string{"runtime_handler"},
)
// RunningPodCount is a gauge that tracks the number of Pods currently with a running sandbox
// It is used to expose the kubelet internal state: how many pods have running containers in the container runtime, and mainly for debugging purpose.
RunningPodCount = metrics.NewGauge(
&metrics.GaugeOpts{
Subsystem: KubeletSubsystem,
Name: RunningPodsKey,
Help: "Number of pods that have a running pod sandbox",
StabilityLevel: metrics.ALPHA,
},
)
// RunningContainerCount is a gauge that tracks the number of containers currently running
RunningContainerCount = metrics.NewGaugeVec(
&metrics.GaugeOpts{
Subsystem: KubeletSubsystem,
Name: RunningContainersKey,
Help: "Number of containers currently running",
StabilityLevel: metrics.ALPHA,
},
[]string{"container_state"},
)
)
var registerMetrics sync.Once
// Register registers all metrics.
func Register(collectors ...metrics.StableCollector) {
// Register the metrics.
registerMetrics.Do(func() {
legacyregistry.MustRegister(NodeName)
legacyregistry.MustRegister(PodWorkerDuration)
legacyregistry.MustRegister(PodStartDuration)
legacyregistry.MustRegister(CgroupManagerDuration)
legacyregistry.MustRegister(PodWorkerStartDuration)
legacyregistry.MustRegister(ContainersPerPodCount)
legacyregistry.MustRegister(PLEGRelistDuration)
legacyregistry.MustRegister(PLEGDiscardEvents)
legacyregistry.MustRegister(PLEGRelistInterval)
legacyregistry.MustRegister(PLEGLastSeen)
legacyregistry.MustRegister(RuntimeOperations)
legacyregistry.MustRegister(RuntimeOperationsDuration)
legacyregistry.MustRegister(RuntimeOperationsErrors)
legacyregistry.MustRegister(Evictions)
legacyregistry.MustRegister(EvictionStatsAge)
legacyregistry.MustRegister(Preemptions)
legacyregistry.MustRegister(DevicePluginRegistrationCount)
legacyregistry.MustRegister(DevicePluginAllocationDuration)
legacyregistry.MustRegister(RunningContainerCount)
legacyregistry.MustRegister(RunningPodCount)
legacyregistry.MustRegister(RunPodSandboxDuration)
legacyregistry.MustRegister(RunPodSandboxErrors)
if utilfeature.DefaultFeatureGate.Enabled(features.DynamicKubeletConfig) {
legacyregistry.MustRegister(AssignedConfig)
legacyregistry.MustRegister(ActiveConfig)
legacyregistry.MustRegister(LastKnownGoodConfig)
legacyregistry.MustRegister(ConfigError)
}
for _, collector := range collectors {
legacyregistry.CustomMustRegister(collector)
}
})
}
// GetGather returns the gatherer. It used by test case outside current package.
func GetGather() metrics.Gatherer {
return legacyregistry.DefaultGatherer
}
// SinceInSeconds gets the time since the specified start in seconds.
func SinceInSeconds(start time.Time) float64 {
return time.Since(start).Seconds()
}
const configMapAPIPathFmt = "/api/v1/namespaces/%s/configmaps/%s"
func configLabels(source *corev1.NodeConfigSource) (map[string]string, error) {
if source == nil {
return map[string]string{
// prometheus requires all of the labels that can be set on the metric
ConfigSourceLabelKey: "local",
ConfigUIDLabelKey: "",
ConfigResourceVersionLabelKey: "",
KubeletConfigKeyLabelKey: "",
}, nil
}
if source.ConfigMap != nil {
return map[string]string{
ConfigSourceLabelKey: fmt.Sprintf(configMapAPIPathFmt, source.ConfigMap.Namespace, source.ConfigMap.Name),
ConfigUIDLabelKey: string(source.ConfigMap.UID),
ConfigResourceVersionLabelKey: source.ConfigMap.ResourceVersion,
KubeletConfigKeyLabelKey: source.ConfigMap.KubeletConfigKey,
}, nil
}
return nil, fmt.Errorf("unrecognized config source type, all source subfields were nil")
}
// track labels across metric updates, so we can delete old label sets and prevent leaks
var assignedConfigLabels map[string]string
// SetAssignedConfig tracks labels according to the assigned NodeConfig. It also tracks labels
// across metric updates so old labels can be safely deleted.
func SetAssignedConfig(source *corev1.NodeConfigSource) error {
// compute the timeseries labels from the source
labels, err := configLabels(source)
if err != nil {
return err
}
// clean up the old timeseries (WithLabelValues creates a new one for each distinct label set)
if !AssignedConfig.Delete(assignedConfigLabels) {
klog.InfoS("Failed to delete metric for labels. This may result in ambiguity from multiple metrics concurrently indicating different assigned configs.", "labels", assignedConfigLabels)
}
// record the new timeseries
assignedConfigLabels = labels
// expose the new timeseries with a constant count of 1
AssignedConfig.With(assignedConfigLabels).Set(1)
return nil
}
// track labels across metric updates, so we can delete old label sets and prevent leaks
var activeConfigLabels map[string]string
// SetActiveConfig tracks labels according to the NodeConfig that is currently used by the Kubelet.
// It also tracks labels across metric updates so old labels can be safely deleted.
func SetActiveConfig(source *corev1.NodeConfigSource) error {
// compute the timeseries labels from the source
labels, err := configLabels(source)
if err != nil {
return err
}
// clean up the old timeseries (WithLabelValues creates a new one for each distinct label set)
if !ActiveConfig.Delete(activeConfigLabels) {
klog.InfoS("Failed to delete metric for labels. This may result in ambiguity from multiple metrics concurrently indicating different active configs.", "labels", activeConfigLabels)
}
// record the new timeseries
activeConfigLabels = labels
// expose the new timeseries with a constant count of 1
ActiveConfig.With(activeConfigLabels).Set(1)
return nil
}
// track labels across metric updates, so we can delete old label sets and prevent leaks
var lastKnownGoodConfigLabels map[string]string
// SetLastKnownGoodConfig tracks labels according to the NodeConfig that was successfully applied last.
// It also tracks labels across metric updates so old labels can be safely deleted.
func SetLastKnownGoodConfig(source *corev1.NodeConfigSource) error {
// compute the timeseries labels from the source
labels, err := configLabels(source)
if err != nil {
return err
}
// clean up the old timeseries (WithLabelValues creates a new one for each distinct label set)
if !LastKnownGoodConfig.Delete(lastKnownGoodConfigLabels) {
klog.InfoS("Failed to delete metric for labels. This may result in ambiguity from multiple metrics concurrently indicating different last known good configs.", "labels", lastKnownGoodConfigLabels)
}
// record the new timeseries
lastKnownGoodConfigLabels = labels
// expose the new timeseries with a constant count of 1
LastKnownGoodConfig.With(lastKnownGoodConfigLabels).Set(1)
return nil
}
// SetConfigError sets a the ConfigError metric to 1 in case any errors were encountered.
func SetConfigError(err bool) {
if err {
ConfigError.Set(1)
} else {
ConfigError.Set(0)
}
}
// SetNodeName sets the NodeName Gauge to 1.
func SetNodeName(name types.NodeName) {
NodeName.WithLabelValues(string(name)).Set(1)
}

View File

@ -0,0 +1,19 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metrics
const OKGO = "ThisIsNotTheSoundOfTheTrain"