From 88c4b644005c5f674e9d03abae7e1cccda4fd23e Mon Sep 17 00:00:00 2001 From: danielqsj Date: Wed, 26 Dec 2018 22:29:13 +0800 Subject: [PATCH] Change endpoints metrics to conform guideline --- .../pkg/endpoints/metrics/metrics.go | 47 +++++++++++++++++-- .../pkg/server/filters/maxinflight.go | 2 + 2 files changed, 46 insertions(+), 3 deletions(-) diff --git a/staging/src/k8s.io/apiserver/pkg/endpoints/metrics/metrics.go b/staging/src/k8s.io/apiserver/pkg/endpoints/metrics/metrics.go index 27f416e6b5c..5b35a432a32 100644 --- a/staging/src/k8s.io/apiserver/pkg/endpoints/metrics/metrics.go +++ b/staging/src/k8s.io/apiserver/pkg/endpoints/metrics/metrics.go @@ -48,6 +48,13 @@ var ( // TODO(a-robinson): Add unit tests for the handling of these metrics once // the upstream library supports it. requestCounter = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "apiserver_request_total", + Help: "Counter of apiserver requests broken out for each verb, group, version, resource, scope, component, client, and HTTP response contentType and code.", + }, + []string{"verb", "group", "version", "resource", "subresource", "scope", "component", "client", "contentType", "code"}, + ) + deprecatedRequestCounter = prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "apiserver_request_count", Help: "Counter of apiserver requests broken out for each verb, group, version, resource, scope, component, client, and HTTP response contentType and code.", @@ -62,6 +69,25 @@ var ( []string{"verb", "group", "version", "resource", "subresource", "scope", "component"}, ) requestLatencies = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "apiserver_request_latency_seconds", + Help: "Response latency distribution in seconds for each verb, group, version, resource, subresource, scope and component.", + // Use buckets ranging from 125 ms to 8 seconds. + Buckets: prometheus.ExponentialBuckets(0.125, 2.0, 7), + }, + []string{"verb", "group", "version", "resource", "subresource", "scope", "component"}, + ) + requestLatenciesSummary = prometheus.NewSummaryVec( + prometheus.SummaryOpts{ + Name: "apiserver_request_latency_seconds_summary", + Help: "Response latency summary in seconds for each verb, group, version, resource, subresource, scope and component.", + // Make the sliding window of 5h. + // TODO: The value for this should be based on our SLI definition (medium term). + MaxAge: 5 * time.Hour, + }, + []string{"verb", "group", "version", "resource", "subresource", "scope", "component"}, + ) + deprecatedRequestLatencies = prometheus.NewHistogramVec( prometheus.HistogramOpts{ Name: "apiserver_request_latencies", Help: "Response latency distribution in microseconds for each verb, group, version, resource, subresource, scope and component.", @@ -70,7 +96,7 @@ var ( }, []string{"verb", "group", "version", "resource", "subresource", "scope", "component"}, ) - requestLatenciesSummary = prometheus.NewSummaryVec( + deprecatedRequestLatenciesSummary = prometheus.NewSummaryVec( prometheus.SummaryOpts{ Name: "apiserver_request_latencies_summary", Help: "Response latency summary in microseconds for each verb, group, version, resource, subresource, scope and component.", @@ -91,6 +117,13 @@ var ( ) // DroppedRequests is a number of requests dropped with 'Try again later' response" DroppedRequests = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "apiserver_dropped_requests_total", + Help: "Number of requests dropped with 'Try again later' response", + }, + []string{"requestKind"}, + ) + DeprecatedDroppedRequests = prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "apiserver_dropped_requests", Help: "Number of requests dropped with 'Try again later' response", @@ -118,11 +151,15 @@ var ( metrics = []resettableCollector{ requestCounter, + deprecatedRequestCounter, longRunningRequestGauge, requestLatencies, requestLatenciesSummary, + deprecatedRequestLatencies, + deprecatedRequestLatenciesSummary, responseSizes, DroppedRequests, + DeprecatedDroppedRequests, RegisteredWatchers, currentInflightRequests, } @@ -198,9 +235,13 @@ func MonitorRequest(req *http.Request, verb, group, version, resource, subresour reportedVerb := cleanVerb(verb, req) client := cleanUserAgent(utilnet.GetHTTPClient(req)) elapsedMicroseconds := float64(elapsed / time.Microsecond) + elapsedSeconds := elapsed.Seconds() requestCounter.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component, client, contentType, codeToString(httpCode)).Inc() - requestLatencies.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(elapsedMicroseconds) - requestLatenciesSummary.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(elapsedMicroseconds) + deprecatedRequestCounter.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component, client, contentType, codeToString(httpCode)).Inc() + requestLatencies.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(elapsedSeconds) + deprecatedRequestLatencies.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(elapsedMicroseconds) + requestLatenciesSummary.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(elapsedSeconds) + deprecatedRequestLatenciesSummary.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(elapsedMicroseconds) // We are only interested in response sizes of read requests. if verb == "GET" || verb == "LIST" { responseSizes.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(float64(respSize)) diff --git a/staging/src/k8s.io/apiserver/pkg/server/filters/maxinflight.go b/staging/src/k8s.io/apiserver/pkg/server/filters/maxinflight.go index cc10d0abd8f..f7bc691c762 100644 --- a/staging/src/k8s.io/apiserver/pkg/server/filters/maxinflight.go +++ b/staging/src/k8s.io/apiserver/pkg/server/filters/maxinflight.go @@ -163,8 +163,10 @@ func WithMaxInFlightLimit( // We need to split this data between buckets used for throttling. if isMutatingRequest { metrics.DroppedRequests.WithLabelValues(metrics.MutatingKind).Inc() + metrics.DeprecatedDroppedRequests.WithLabelValues(metrics.MutatingKind).Inc() } else { metrics.DroppedRequests.WithLabelValues(metrics.ReadOnlyKind).Inc() + metrics.DeprecatedDroppedRequests.WithLabelValues(metrics.ReadOnlyKind).Inc() } // at this point we're about to return a 429, BUT not all actors should be rate limited. A system:master is so powerful // that they should always get an answer. It's a super-admin or a loopback connection.