kubelet: podresources: per-endpoint metrics

Before the addition of GetAllocatableResources, the podresources API had just one endpoint `List()`, thus we could just account for the total of the calls to have a good pulse of the API usage. Now that we extend the API with more endpoints (`GetAlloctableResources`), in order to improve the observability we add per-endpoint counters, in addition to the existing counter of the total API calls. Signed-off-by: Francesco Romani <fromani@redhat.com>
2025-11-14 07:52:01 +00:00 · 2021-02-08 10:28:42 +01:00
parent d7a30e1b08
commit 1e7bb20c52
2 changed files with 58 additions and 1 deletions
--- a/pkg/kubelet/apis/podresources/server_v1.go
+++ b/pkg/kubelet/apis/podresources/server_v1.go
@@ -47,6 +47,7 @@ func NewV1PodResourcesServer(podsProvider PodsProvider, devicesProvider DevicesP
 // List returns information about the resources assigned to pods on the node
 func (p *v1PodResourcesServer) List(ctx context.Context, req *v1.ListPodResourcesRequest) (*v1.ListPodResourcesResponse, error) {
 	metrics.PodResourcesEndpointRequestsTotalCount.WithLabelValues("v1").Inc()
 	metrics.PodResourcesEndpointRequestsListCount.WithLabelValues("v1").Inc()
 	pods := p.podsProvider.GetPods()
 	podResources := make([]*v1.PodResources, len(pods))
@@ -76,7 +77,11 @@ func (p *v1PodResourcesServer) List(ctx context.Context, req *v1.ListPodResource
 // GetAllocatableResources returns information about all the resources known by the server - this more like the capacity, not like the current amount of free resources.
 func (p *v1PodResourcesServer) GetAllocatableResources(ctx context.Context, req *v1.AllocatableResourcesRequest) (*v1.AllocatableResourcesResponse, error) {
 	metrics.PodResourcesEndpointRequestsTotalCount.WithLabelValues("v1").Inc()
 	metrics.PodResourcesEndpointRequestsGetAllocatableCount.WithLabelValues("v1").Inc()
 	if !utilfeature.DefaultFeatureGate.Enabled(kubefeatures.KubeletPodResourcesGetAllocatable) {
 		metrics.PodResourcesEndpointErrorsGetAllocatableCount.WithLabelValues("v1").Inc()
 		return nil, fmt.Errorf("Pod Resources API GetAllocatableResources disabled")
 	}
--- a/pkg/kubelet/metrics/metrics.go
+++ b/pkg/kubelet/metrics/metrics.go
@@ -63,7 +63,11 @@ const (
 	DevicePluginRegistrationCountKey  = "device_plugin_registration_total"
 	DevicePluginAllocationDurationKey = "device_plugin_alloc_duration_seconds"
 	// Metrics keys of pod resources operations
-	PodResourcesEndpointRequestsTotalKey = "pod_resources_endpoint_requests_total"
+	PodResourcesEndpointRequestsTotalKey          = "pod_resources_endpoint_requests_total"
 	PodResourcesEndpointRequestsListKey           = "pod_resources_endpoint_requests_list"
 	PodResourcesEndpointRequestsGetAllocatableKey = "pod_resources_endpoint_requests_get_allocatable"
 	PodResourcesEndpointErrorsListKey             = "pod_resources_endpoint_errors_list"
 	PodResourcesEndpointErrorsGetAllocatableKey   = "pod_resources_endpoint_errors_get_allocatable"
 	// Metric keys for node config
 	AssignedConfigKey             = "node_config_assigned"
@@ -293,6 +297,54 @@ var (
 		[]string{"server_api_version"},
 	)
 	// PodResourcesEndpointRequestsListCount is a Counter that tracks the number of requests to the PodResource List() endpoint.
 	// Broken down by server API version.
 	PodResourcesEndpointRequestsListCount = metrics.NewCounterVec(
 		&metrics.CounterOpts{
 			Subsystem:      KubeletSubsystem,
 			Name:           PodResourcesEndpointRequestsListKey,
 			Help:           "Number of requests to the PodResource List endpoint. Broken down by server api version.",
 			StabilityLevel: metrics.ALPHA,
 		},
 		[]string{"server_api_version"},
 	)
 	// PodResourcesEndpointRequestsGetAllocatableCount is a Counter that tracks the number of requests to the PodResource GetAllocatableResources() endpoint.
 	// Broken down by server API version.
 	PodResourcesEndpointRequestsGetAllocatableCount = metrics.NewCounterVec(
 		&metrics.CounterOpts{
 			Subsystem:      KubeletSubsystem,
 			Name:           PodResourcesEndpointRequestsGetAllocatableKey,
 			Help:           "Number of requests to the PodResource GetAllocatableResources endpoint. Broken down by server api version.",
 			StabilityLevel: metrics.ALPHA,
 		},
 		[]string{"server_api_version"},
 	)
 	// PodResourcesEndpointErrorsListCount is a Counter that tracks the number of errors returned by he PodResource List() endpoint.
 	// Broken down by server API version.
 	PodResourcesEndpointErrorsListCount = metrics.NewCounterVec(
 		&metrics.CounterOpts{
 			Subsystem:      KubeletSubsystem,
 			Name:           PodResourcesEndpointErrorsListKey,
 			Help:           "Number of requests to the PodResource List endpoint which returned error. Broken down by server api version.",
 			StabilityLevel: metrics.ALPHA,
 		},
 		[]string{"server_api_version"},
 	)
 	// PodResourcesEndpointErrorsGetAllocatableCount is a Counter that tracks the number of errors returned by the PodResource GetAllocatableResources() endpoint.
 	// Broken down by server API version.
 	PodResourcesEndpointErrorsGetAllocatableCount = metrics.NewCounterVec(
 		&metrics.CounterOpts{
 			Subsystem:      KubeletSubsystem,
 			Name:           PodResourcesEndpointErrorsGetAllocatableKey,
 			Help:           "Number of requests to the PodResource GetAllocatableResources endpoint which returned error. Broken down by server api version.",
 			StabilityLevel: metrics.ALPHA,
 		},
 		[]string{"server_api_version"},
 	)
 	// Metrics for node config
 	// AssignedConfig is a Gauge that is set 1 if the Kubelet has a NodeConfig assigned.