From c1cd8495a5b4521d226c531299d0a345d6cc8af0 Mon Sep 17 00:00:00 2001 From: Ed Bartosh Date: Mon, 28 Oct 2024 13:07:14 +0200 Subject: [PATCH] kubelet: define custom buckets for DRA metrics --- pkg/kubelet/metrics/metrics.go | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pkg/kubelet/metrics/metrics.go b/pkg/kubelet/metrics/metrics.go index be0a4cbfa53..319aedd286d 100644 --- a/pkg/kubelet/metrics/metrics.go +++ b/pkg/kubelet/metrics/metrics.go @@ -173,6 +173,11 @@ var ( {60 * 1024 * 1024 * 1024, "60GB-100GB"}, {100 * 1024 * 1024 * 1024, "GT100GB"}, } + // DRADurationBuckets is the bucket boundaries for DRA operation duration metrics + // DRAOperationsDuration and DRAGRPCOperationsDuration defined below in this file. + // The buckets max value 40 is based on the 45sec max gRPC timeout value defined + // for the DRA gRPC calls in the pkg/kubelet/cm/dra/plugin/registration.go + DRADurationBuckets = metrics.ExponentialBucketsRange(.1, 40, 15) ) var ( @@ -950,7 +955,7 @@ var ( Subsystem: DRASubsystem, Name: DRAOperationsDurationKey, Help: "Latency histogram in seconds for the duration of handling all ResourceClaims referenced by a pod when the pod starts or stops. Identified by the name of the operation (PrepareResources or UnprepareResources) and separated by the success of the operation. The number of failed operations is provided through the histogram's overall count.", - Buckets: metrics.DefBuckets, + Buckets: DRADurationBuckets, StabilityLevel: metrics.ALPHA, }, []string{"operation_name", "is_error"}, @@ -962,7 +967,7 @@ var ( Subsystem: DRASubsystem, Name: DRAGRPCOperationsDurationKey, Help: "Duration in seconds of the DRA gRPC operations", - Buckets: metrics.DefBuckets, + Buckets: DRADurationBuckets, StabilityLevel: metrics.ALPHA, }, []string{"driver_name", "method_name", "grpc_status_code"},