kubelet: podresources: per-endpoint metrics

Before the addition of GetAllocatableResources, the
podresources API had just one endpoint `List()`, thus we could just
account for the total of the calls to have a good pulse of the API usage.
Now that we extend the API with more endpoints
(`GetAlloctableResources`), in order to improve the observability we add
per-endpoint counters, in addition to the existing counter of the total
API calls.

Signed-off-by: Francesco Romani <fromani@redhat.com>
This commit is contained in:
Francesco Romani 2021-02-08 10:28:42 +01:00
parent d7a30e1b08
commit 1e7bb20c52
2 changed files with 58 additions and 1 deletions

View File

@ -47,6 +47,7 @@ func NewV1PodResourcesServer(podsProvider PodsProvider, devicesProvider DevicesP
// List returns information about the resources assigned to pods on the node
func (p *v1PodResourcesServer) List(ctx context.Context, req *v1.ListPodResourcesRequest) (*v1.ListPodResourcesResponse, error) {
metrics.PodResourcesEndpointRequestsTotalCount.WithLabelValues("v1").Inc()
metrics.PodResourcesEndpointRequestsListCount.WithLabelValues("v1").Inc()
pods := p.podsProvider.GetPods()
podResources := make([]*v1.PodResources, len(pods))
@ -76,7 +77,11 @@ func (p *v1PodResourcesServer) List(ctx context.Context, req *v1.ListPodResource
// GetAllocatableResources returns information about all the resources known by the server - this more like the capacity, not like the current amount of free resources.
func (p *v1PodResourcesServer) GetAllocatableResources(ctx context.Context, req *v1.AllocatableResourcesRequest) (*v1.AllocatableResourcesResponse, error) {
metrics.PodResourcesEndpointRequestsTotalCount.WithLabelValues("v1").Inc()
metrics.PodResourcesEndpointRequestsGetAllocatableCount.WithLabelValues("v1").Inc()
if !utilfeature.DefaultFeatureGate.Enabled(kubefeatures.KubeletPodResourcesGetAllocatable) {
metrics.PodResourcesEndpointErrorsGetAllocatableCount.WithLabelValues("v1").Inc()
return nil, fmt.Errorf("Pod Resources API GetAllocatableResources disabled")
}

View File

@ -63,7 +63,11 @@ const (
DevicePluginRegistrationCountKey = "device_plugin_registration_total"
DevicePluginAllocationDurationKey = "device_plugin_alloc_duration_seconds"
// Metrics keys of pod resources operations
PodResourcesEndpointRequestsTotalKey = "pod_resources_endpoint_requests_total"
PodResourcesEndpointRequestsTotalKey = "pod_resources_endpoint_requests_total"
PodResourcesEndpointRequestsListKey = "pod_resources_endpoint_requests_list"
PodResourcesEndpointRequestsGetAllocatableKey = "pod_resources_endpoint_requests_get_allocatable"
PodResourcesEndpointErrorsListKey = "pod_resources_endpoint_errors_list"
PodResourcesEndpointErrorsGetAllocatableKey = "pod_resources_endpoint_errors_get_allocatable"
// Metric keys for node config
AssignedConfigKey = "node_config_assigned"
@ -293,6 +297,54 @@ var (
[]string{"server_api_version"},
)
// PodResourcesEndpointRequestsListCount is a Counter that tracks the number of requests to the PodResource List() endpoint.
// Broken down by server API version.
PodResourcesEndpointRequestsListCount = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: KubeletSubsystem,
Name: PodResourcesEndpointRequestsListKey,
Help: "Number of requests to the PodResource List endpoint. Broken down by server api version.",
StabilityLevel: metrics.ALPHA,
},
[]string{"server_api_version"},
)
// PodResourcesEndpointRequestsGetAllocatableCount is a Counter that tracks the number of requests to the PodResource GetAllocatableResources() endpoint.
// Broken down by server API version.
PodResourcesEndpointRequestsGetAllocatableCount = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: KubeletSubsystem,
Name: PodResourcesEndpointRequestsGetAllocatableKey,
Help: "Number of requests to the PodResource GetAllocatableResources endpoint. Broken down by server api version.",
StabilityLevel: metrics.ALPHA,
},
[]string{"server_api_version"},
)
// PodResourcesEndpointErrorsListCount is a Counter that tracks the number of errors returned by he PodResource List() endpoint.
// Broken down by server API version.
PodResourcesEndpointErrorsListCount = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: KubeletSubsystem,
Name: PodResourcesEndpointErrorsListKey,
Help: "Number of requests to the PodResource List endpoint which returned error. Broken down by server api version.",
StabilityLevel: metrics.ALPHA,
},
[]string{"server_api_version"},
)
// PodResourcesEndpointErrorsGetAllocatableCount is a Counter that tracks the number of errors returned by the PodResource GetAllocatableResources() endpoint.
// Broken down by server API version.
PodResourcesEndpointErrorsGetAllocatableCount = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: KubeletSubsystem,
Name: PodResourcesEndpointErrorsGetAllocatableKey,
Help: "Number of requests to the PodResource GetAllocatableResources endpoint which returned error. Broken down by server api version.",
StabilityLevel: metrics.ALPHA,
},
[]string{"server_api_version"},
)
// Metrics for node config
// AssignedConfig is a Gauge that is set 1 if the Kubelet has a NodeConfig assigned.