From 1493da92d9513e383f8382c7e80316a3fa6c94fa Mon Sep 17 00:00:00 2001 From: Damien Grisonnet Date: Thu, 22 Sep 2022 19:08:34 +0200 Subject: [PATCH] metrics: improve apiserver SLI metric name Add new kube-apiserver SLI metric better reflecting that the metric is an SLI and not an SLO and deprecate the existing apiserver_request_slo_duration_seconds in 1.27. Although the metric is still in alpha, we prefer deprecating it for one release since it is a critical metric used for SLOs and to make sure that users that are using it have time to make the transition. Going forward we prefer going with SLI specific metrics, we will use _sli_ instead of _slo_ so for consistency purposes. Signed-off-by: Damien Grisonnet --- .../pkg/endpoints/metrics/metrics.go | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/staging/src/k8s.io/apiserver/pkg/endpoints/metrics/metrics.go b/staging/src/k8s.io/apiserver/pkg/endpoints/metrics/metrics.go index 9aac28d567d..268727ffc9b 100644 --- a/staging/src/k8s.io/apiserver/pkg/endpoints/metrics/metrics.go +++ b/staging/src/k8s.io/apiserver/pkg/endpoints/metrics/metrics.go @@ -112,6 +112,21 @@ var ( // This metric is supplementary to the requestLatencies metric. // It measures request duration excluding webhooks as they are mostly // dependant on user configuration. + Buckets: []float64{0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0, 1.25, 1.5, 2, 3, + 4, 5, 6, 8, 10, 15, 20, 30, 45, 60}, + StabilityLevel: compbasemetrics.ALPHA, + DeprecatedVersion: "1.27.0", + }, + []string{"verb", "group", "version", "resource", "subresource", "scope", "component"}, + ) + requestSliLatencies = compbasemetrics.NewHistogramVec( + &compbasemetrics.HistogramOpts{ + Subsystem: APIServerComponent, + Name: "request_sli_duration_seconds", + Help: "Response latency distribution (not counting webhook duration) in seconds for each verb, group, version, resource, subresource, scope and component.", + // This metric is supplementary to the requestLatencies metric. + // It measures request duration excluding webhooks as they are mostly + // dependant on user configuration. Buckets: []float64{0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0, 1.25, 1.5, 2, 3, 4, 5, 6, 8, 10, 15, 20, 30, 45, 60}, StabilityLevel: compbasemetrics.ALPHA, @@ -273,6 +288,7 @@ var ( longRunningRequestsGauge, requestLatencies, requestSloLatencies, + requestSliLatencies, fieldValidationRequestLatencies, responseSizes, TLSHandshakeErrors, @@ -519,8 +535,9 @@ func MonitorRequest(req *http.Request, verb, group, version, resource, subresour fieldValidationRequestLatencies.WithContext(req.Context()).WithLabelValues(fieldValidation, fieldValidationEnabled) if wd, ok := request.LatencyTrackersFrom(req.Context()); ok { - sloLatency := elapsedSeconds - (wd.MutatingWebhookTracker.GetLatency() + wd.ValidatingWebhookTracker.GetLatency()).Seconds() - requestSloLatencies.WithContext(req.Context()).WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(sloLatency) + sliLatency := elapsedSeconds - (wd.MutatingWebhookTracker.GetLatency() + wd.ValidatingWebhookTracker.GetLatency()).Seconds() + requestSloLatencies.WithContext(req.Context()).WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(sliLatency) + requestSliLatencies.WithContext(req.Context()).WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(sliLatency) } // We are only interested in response sizes of read requests. if verb == "GET" || verb == "LIST" {