diff --git a/test/instrumentation/decode_metric.go b/test/instrumentation/decode_metric.go index 0e2d913483d..6394cb74896 100644 --- a/test/instrumentation/decode_metric.go +++ b/test/instrumentation/decode_metric.go @@ -45,7 +45,6 @@ func decodeMetricCalls(fs []*ast.CallExpr, metricsImportName string, variables m if m != nil { ms = append(ms, *m) } - } return ms, errors } @@ -65,6 +64,7 @@ func (c *metricDecoder) decodeNewMetricCall(fc *ast.CallExpr) (*metric, error) { case *ast.Ident: if v.Name == "NewTimingRatioHistogramVec" { m, err = c.decodeMetricVecForTimingRatioHistogram(fc) + m.Type = timingRatioHistogram return &m, err } } @@ -85,6 +85,8 @@ func (c *metricDecoder) decodeNewMetricCall(fc *ast.CallExpr) (*metric, error) { m, err = c.decodeMetricVec(fc) case "Labels", "HandlerOpts", "HandlerFor", "HandlerWithReset": return nil, nil + case "NewDesc": + m, err = c.decodeDesc(fc) default: return &m, newDecodeErrorf(fc, errNotDirectCall) } @@ -97,6 +99,8 @@ func (c *metricDecoder) decodeNewMetricCall(fc *ast.CallExpr) (*metric, error) { func getMetricType(functionName string) string { switch functionName { + case "NewDesc": + return customType case "NewCounter", "NewCounterVec": return counterMetricType case "NewGauge", "NewGaugeVec", "NewGaugeFunc": @@ -119,6 +123,97 @@ func (c *metricDecoder) decodeMetric(call *ast.CallExpr) (metric, error) { return c.decodeOpts(call.Args[0]) } +func (c *metricDecoder) decodeDesc(ce *ast.CallExpr) (metric, error) { + m := &metric{} + name, err := c.decodeString(ce.Args[0]) + if err != nil { + return *m, newDecodeErrorf(ce, "can't decode string") + } + m.Name = *name + help, err := c.decodeString(ce.Args[1]) + if err != nil { + return *m, newDecodeErrorf(ce, "can't decode string") + } + m.Help = *help + labels, err := c.decodeLabels(ce.Args[2]) + if err != nil { + return *m, newDecodeErrorf(ce, "can't decode labels") + } + m.Labels = labels + cLabels, err := c.decodeConstLabels(ce.Args[3]) + if err != nil { + return *m, newDecodeErrorf(ce, "can't decode const labels") + } + m.ConstLabels = cLabels + sl, err := decodeStabilityLevel(ce.Args[4], "metrics") + if err != nil { + return *m, newDecodeErrorf(ce, "can't decode stability level") + } + if sl != nil { + m.StabilityLevel = string(*sl) + } + deprecatedVersion, err := c.decodeString(ce.Args[5]) + if err != nil { + return *m, newDecodeErrorf(ce, "can't decode string") + } + if deprecatedVersion != nil { + m.DeprecatedVersion = *deprecatedVersion + } + return *m, nil +} + +func (c *metricDecoder) decodeString(expr ast.Expr) (*string, error) { + switch e := expr.(type) { + case *ast.BasicLit: + s, err := stringValue(e) + return &s, err + case *ast.Ident: + variableExpr, found := c.variables[e.Name] + if !found { + return nil, fmt.Errorf("can't decode string") + } + bl, ok := variableExpr.(*ast.BasicLit) + if !ok { + return nil, fmt.Errorf("can't decode string") + } + v, err := stringValue(bl) + return &v, err + case *ast.CallExpr: + firstArg, secondArg, thirdArg, err := c.decodeBuildFQNameArguments(e) + if err != nil { + return nil, err + } + se, ok := e.Fun.(*ast.SelectorExpr) + if ok { + functionName := se.Sel.Name + switch functionName { + case "BuildFQName": + n := metrics.BuildFQName(firstArg, secondArg, thirdArg) + return &n, nil + } + } + case *ast.SelectorExpr: + s, ok := e.X.(*ast.Ident) + if !ok { + return nil, newDecodeErrorf(e, errExprNotIdent, e.X) + } + variableExpr, found := c.variables[strings.Join([]string{s.Name, e.Sel.Name}, ".")] + if !found { + return nil, newDecodeErrorf(e, errBadImportedVariableAttribute) + } + bl, ok := variableExpr.(*ast.BasicLit) + if !ok { + return nil, newDecodeErrorf(e, errNonStringAttribute) + } + value, err := stringValue(bl) + if err != nil { + return nil, newDecodeErrorf(e, err.Error()) + } + return &value, nil + } + return nil, fmt.Errorf("can't decode string") +} + func (c *metricDecoder) decodeMetricVec(call *ast.CallExpr) (metric, error) { if len(call.Args) != 2 { return metric{}, newDecodeErrorf(call, errInvalidNewMetricCall) @@ -186,19 +281,21 @@ func (c *metricDecoder) decodeLabelsFromArray(exprs []ast.Expr) ([]string, error func (c *metricDecoder) decodeLabels(expr ast.Expr) ([]string, error) { cl, ok := expr.(*ast.CompositeLit) if !ok { - id, ok := expr.(*ast.Ident) - if !ok { - return nil, newDecodeErrorf(expr, errInvalidNewMetricCall) + switch e := expr.(type) { + case *ast.Ident: + if e.Name == "nil" { + return []string{}, nil + } + variableExpr, found := c.variables[e.Name] + if !found { + return nil, newDecodeErrorf(expr, "couldn't find variable for labels") + } + cl2, ok := variableExpr.(*ast.CompositeLit) + if !ok { + return nil, newDecodeErrorf(expr, "couldn't interpret variable for labels") + } + cl = cl2 } - variableExpr, found := c.variables[id.Name] - if !found { - return nil, newDecodeErrorf(expr, "couldn't find variable for labels") - } - cl2, ok := variableExpr.(*ast.CompositeLit) - if !ok { - return nil, newDecodeErrorf(expr, "couldn't interpret variable for labels") - } - cl = cl2 } labels := make([]string, len(cl.Elts)) for i, el := range cl.Elts { @@ -280,7 +377,6 @@ func (c *metricDecoder) decodeOpts(expr ast.Expr) (metric, error) { if !ok { return m, newDecodeErrorf(expr, errExprNotIdent, v.X) } - variableExpr, found := c.variables[strings.Join([]string{s.Name, v.Sel.Name}, ".")] if !found { return m, newDecodeErrorf(expr, errBadImportedVariableAttribute) @@ -737,6 +833,20 @@ func decodeBucketArguments(fc *ast.CallExpr) (float64, float64, int, error) { return firstArg, secondArg, int(thirdArg), nil } +func (c *metricDecoder) decodeBuildFQNameArguments(fc *ast.CallExpr) (string, string, string, error) { + if len(fc.Args) != 3 { + return "", "", "", newDecodeErrorf(fc, "can't decode fq name args") + } + strArgs := make([]string, len(fc.Args)) + for i, elt := range fc.Args { + s, err := c.decodeString(elt) + if err != nil || s == nil { + return "", "", "", newDecodeErrorf(fc, err.Error()) + } + strArgs[i] = *s + } + return strArgs[0], strArgs[1], strArgs[2], nil +} func decodeStabilityLevel(expr ast.Expr, metricsFrameworkImportName string) (*metrics.StabilityLevel, error) { se, ok := expr.(*ast.SelectorExpr) diff --git a/test/instrumentation/documentation/documentation-list.yaml b/test/instrumentation/documentation/documentation-list.yaml index 5585e648855..674a1893d61 100644 --- a/test/instrumentation/documentation/documentation-list.yaml +++ b/test/instrumentation/documentation/documentation-list.yaml @@ -274,6 +274,13 @@ help: Number of times the A/D Controller performed a forced detach type: Counter stabilityLevel: ALPHA +- name: attachdetach_controller_total_volumes + help: Number of volumes in A/D Controller + type: Custom + stabilityLevel: ALPHA + labels: + - plugin_name + - state - name: job_finished_total subsystem: job_controller help: The number of finished job @@ -459,6 +466,13 @@ - 2 - 4 - 8 +- name: storage_count_attachable_volumes_in_use + help: Measure number of volumes in use + type: Custom + stabilityLevel: ALPHA + labels: + - node + - volume_plugin - name: job_deletion_duration_seconds subsystem: ttl_after_finished_controller help: The time it took to delete the job since it became eligible for deletion @@ -558,6 +572,37 @@ help: Counter of certificate renewal errors. type: Counter stabilityLevel: ALPHA +- name: pv_collector_bound_pv_count + help: Gauge measuring number of persistent volume currently bound + type: Custom + stabilityLevel: ALPHA + labels: + - storage_class +- name: pv_collector_bound_pvc_count + help: Gauge measuring number of persistent volume claim currently bound + type: Custom + stabilityLevel: ALPHA + labels: + - namespace +- name: pv_collector_total_pv_count + help: Gauge measuring total number of persistent volumes + type: Custom + stabilityLevel: ALPHA + labels: + - plugin_name + - volume_mode +- name: pv_collector_unbound_pv_count + help: Gauge measuring number of persistent volume currently unbound + type: Custom + stabilityLevel: ALPHA + labels: + - storage_class +- name: pv_collector_unbound_pvc_count + help: Gauge measuring number of persistent volume claim currently unbound + type: Custom + stabilityLevel: ALPHA + labels: + - namespace - name: retroactive_storageclass_errors_total help: Total number of failed retroactive StorageClass assignments to persistent volume claim @@ -575,6 +620,30 @@ labels: - operation_name - plugin_name +- name: container_cpu_usage_seconds_total + help: Cumulative cpu time consumed by the container in core-seconds + type: Custom + stabilityLevel: ALPHA + labels: + - container + - pod + - namespace +- name: container_memory_working_set_bytes + help: Current working set of the container in bytes + type: Custom + stabilityLevel: ALPHA + labels: + - container + - pod + - namespace +- name: container_start_time_seconds + help: Start time of the container since unix epoch in seconds + type: Custom + stabilityLevel: ALPHA + labels: + - container + - pod + - namespace - name: cgroup_manager_duration_seconds subsystem: kubelet help: Duration in seconds for cgroup manager operations. Broken down by method. @@ -594,6 +663,15 @@ - 2.5 - 5 - 10 +- name: kubelet_container_log_filesystem_used_bytes + help: Bytes used by the container's logs on the filesystem. + type: Custom + stabilityLevel: ALPHA + labels: + - uid + - namespace + - pod + - container - name: containers_per_pod_count subsystem: kubelet help: The number of containers per pod. @@ -1044,6 +1122,85 @@ - 2.5 - 5 - 10 +- name: kubelet_volume_stats_available_bytes + help: Number of available bytes in the volume + type: Custom + stabilityLevel: ALPHA + labels: + - namespace + - persistentvolumeclaim +- name: kubelet_volume_stats_capacity_bytes + help: Capacity in bytes of the volume + type: Custom + stabilityLevel: ALPHA + labels: + - namespace + - persistentvolumeclaim +- name: kubelet_volume_stats_health_status_abnormal + help: Abnormal volume health status. The count is either 1 or 0. 1 indicates the + volume is unhealthy, 0 indicates volume is healthy + type: Custom + stabilityLevel: ALPHA + labels: + - namespace + - persistentvolumeclaim +- name: kubelet_volume_stats_inodes + help: Maximum number of inodes in the volume + type: Custom + stabilityLevel: ALPHA + labels: + - namespace + - persistentvolumeclaim +- name: kubelet_volume_stats_inodes_free + help: Number of free inodes in the volume + type: Custom + stabilityLevel: ALPHA + labels: + - namespace + - persistentvolumeclaim +- name: kubelet_volume_stats_inodes_used + help: Number of used inodes in the volume + type: Custom + stabilityLevel: ALPHA + labels: + - namespace + - persistentvolumeclaim +- name: kubelet_volume_stats_used_bytes + help: Number of used bytes in the volume + type: Custom + stabilityLevel: ALPHA + labels: + - namespace + - persistentvolumeclaim +- name: node_cpu_usage_seconds_total + help: Cumulative cpu time consumed by the node in core-seconds + type: Custom + stabilityLevel: ALPHA +- name: node_memory_working_set_bytes + help: Current working set of the node in bytes + type: Custom + stabilityLevel: ALPHA +- name: plugin_manager_total_plugins + help: Number of plugins in Plugin Manager + type: Custom + stabilityLevel: ALPHA + labels: + - socket_path + - state +- name: pod_cpu_usage_seconds_total + help: Cumulative cpu time consumed by the pod in core-seconds + type: Custom + stabilityLevel: ALPHA + labels: + - pod + - namespace +- name: pod_memory_working_set_bytes + help: Current working set of the pod in bytes + type: Custom + stabilityLevel: ALPHA + labels: + - pod + - namespace - name: probe_duration_seconds subsystem: prober help: Duration in seconds for a probe response. @@ -1067,6 +1224,10 @@ - pod_uid - probe_type - result +- name: scrape_error + help: 1 if there was an error while getting container metrics, 0 otherwise + type: Custom + stabilityLevel: ALPHA - name: csr_honored_duration_total subsystem: certificates_registry namespace: apiserver @@ -1284,6 +1445,13 @@ mount -o context option. type: Gauge stabilityLevel: ALPHA +- name: volume_manager_total_volumes + help: Number of volumes in Volume Manager + type: Custom + stabilityLevel: ALPHA + labels: + - plugin_name + - state - name: allocated_ips subsystem: clusterip_allocator namespace: kube_apiserver @@ -1334,6 +1502,34 @@ stabilityLevel: ALPHA labels: - usage +- name: kube_pod_resource_limit + help: Resources limit for workloads on the cluster, broken down by pod. This shows + the resource usage the scheduler and kubelet expect per pod for resources along + with the unit for the resource if any. + type: Custom + stabilityLevel: ALPHA + labels: + - namespace + - pod + - node + - scheduler + - priority + - resource + - unit +- name: kube_pod_resource_request + help: Resources requested by workloads on the cluster, broken down by pod. This + shows the resource usage the scheduler and kubelet expect per pod for resources + along with the unit for the resource if any. + type: Custom + stabilityLevel: ALPHA + labels: + - namespace + - pod + - node + - scheduler + - priority + - resource + - unit - name: e2e_scheduling_duration_seconds subsystem: scheduler help: E2e scheduling latency in seconds (scheduling algorithm + binding). This metric @@ -2579,6 +2775,7 @@ help: Observations, at the end of every nanosecond, of number of requests (as a fraction of the relevant limit) waiting or in any stage of execution (but only initial stage for WATCHes) + type: TimingRatioHistogram stabilityLevel: ALPHA labels: - phase @@ -2599,6 +2796,7 @@ namespace: apiserver help: Observations, at the end of every nanosecond, of utilization of seats for any stage of execution (but only initial stage for WATCHes) + type: TimingRatioHistogram stabilityLevel: ALPHA labels: - priority_level @@ -2623,6 +2821,7 @@ namespace: apiserver help: Observations, at the end of every nanosecond, of the number of requests (as a fraction of the relevant limit) waiting or in regular stage of execution + type: TimingRatioHistogram stabilityLevel: ALPHA labels: - phase @@ -3332,6 +3531,13 @@ stabilityLevel: ALPHA labels: - reason +- name: aggregator_unavailable_apiservice + help: Gauge of APIServices which are marked as unavailable broken down by APIService + name. + type: Custom + stabilityLevel: ALPHA + labels: + - name - name: aggregator_unavailable_apiservice_total help: Counter of APIServices which are marked as unavailable broken down by APIService name and reason. @@ -3489,6 +3695,14 @@ stabilityLevel: ALPHA labels: - operation +- name: cloudprovider_vsphere_vcenter_versions + help: Versions for connected vSphere vCenters + type: Custom + stabilityLevel: ALPHA + labels: + - hostname + - version + - build - name: get_token_count help: Counter of total Token() requests to the alternate token source type: Counter diff --git a/test/instrumentation/documentation/documentation.md b/test/instrumentation/documentation/documentation.md index b197b5b9e3f..196c9b97d3e 100644 --- a/test/instrumentation/documentation/documentation.md +++ b/test/instrumentation/documentation/documentation.md @@ -6,7 +6,7 @@ description: >- --- -## Metrics (auto-generated 2022 Oct 31) +## Metrics (auto-generated 2022 Nov 01) This page details the metrics that different Kubernetes components export. You can query the metrics endpoint for these components using an HTTP scrape, and fetch the current metrics data in Prometheus format. @@ -176,6 +176,12 @@ components using an HTTP scrape, and fetch the current metrics data in Prometheu