Change endpoints metrics to conform guideline

This commit is contained in:
danielqsj 2018-12-26 22:29:13 +08:00
parent 47938c3733
commit 88c4b64400
2 changed files with 46 additions and 3 deletions

View File

@ -48,6 +48,13 @@ var (
// TODO(a-robinson): Add unit tests for the handling of these metrics once
// the upstream library supports it.
requestCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "apiserver_request_total",
Help: "Counter of apiserver requests broken out for each verb, group, version, resource, scope, component, client, and HTTP response contentType and code.",
},
[]string{"verb", "group", "version", "resource", "subresource", "scope", "component", "client", "contentType", "code"},
)
deprecatedRequestCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "apiserver_request_count",
Help: "Counter of apiserver requests broken out for each verb, group, version, resource, scope, component, client, and HTTP response contentType and code.",
@ -62,6 +69,25 @@ var (
[]string{"verb", "group", "version", "resource", "subresource", "scope", "component"},
)
requestLatencies = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "apiserver_request_latency_seconds",
Help: "Response latency distribution in seconds for each verb, group, version, resource, subresource, scope and component.",
// Use buckets ranging from 125 ms to 8 seconds.
Buckets: prometheus.ExponentialBuckets(0.125, 2.0, 7),
},
[]string{"verb", "group", "version", "resource", "subresource", "scope", "component"},
)
requestLatenciesSummary = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Name: "apiserver_request_latency_seconds_summary",
Help: "Response latency summary in seconds for each verb, group, version, resource, subresource, scope and component.",
// Make the sliding window of 5h.
// TODO: The value for this should be based on our SLI definition (medium term).
MaxAge: 5 * time.Hour,
},
[]string{"verb", "group", "version", "resource", "subresource", "scope", "component"},
)
deprecatedRequestLatencies = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "apiserver_request_latencies",
Help: "Response latency distribution in microseconds for each verb, group, version, resource, subresource, scope and component.",
@ -70,7 +96,7 @@ var (
},
[]string{"verb", "group", "version", "resource", "subresource", "scope", "component"},
)
requestLatenciesSummary = prometheus.NewSummaryVec(
deprecatedRequestLatenciesSummary = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Name: "apiserver_request_latencies_summary",
Help: "Response latency summary in microseconds for each verb, group, version, resource, subresource, scope and component.",
@ -91,6 +117,13 @@ var (
)
// DroppedRequests is a number of requests dropped with 'Try again later' response"
DroppedRequests = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "apiserver_dropped_requests_total",
Help: "Number of requests dropped with 'Try again later' response",
},
[]string{"requestKind"},
)
DeprecatedDroppedRequests = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "apiserver_dropped_requests",
Help: "Number of requests dropped with 'Try again later' response",
@ -118,11 +151,15 @@ var (
metrics = []resettableCollector{
requestCounter,
deprecatedRequestCounter,
longRunningRequestGauge,
requestLatencies,
requestLatenciesSummary,
deprecatedRequestLatencies,
deprecatedRequestLatenciesSummary,
responseSizes,
DroppedRequests,
DeprecatedDroppedRequests,
RegisteredWatchers,
currentInflightRequests,
}
@ -198,9 +235,13 @@ func MonitorRequest(req *http.Request, verb, group, version, resource, subresour
reportedVerb := cleanVerb(verb, req)
client := cleanUserAgent(utilnet.GetHTTPClient(req))
elapsedMicroseconds := float64(elapsed / time.Microsecond)
elapsedSeconds := elapsed.Seconds()
requestCounter.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component, client, contentType, codeToString(httpCode)).Inc()
requestLatencies.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(elapsedMicroseconds)
requestLatenciesSummary.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(elapsedMicroseconds)
deprecatedRequestCounter.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component, client, contentType, codeToString(httpCode)).Inc()
requestLatencies.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(elapsedSeconds)
deprecatedRequestLatencies.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(elapsedMicroseconds)
requestLatenciesSummary.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(elapsedSeconds)
deprecatedRequestLatenciesSummary.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(elapsedMicroseconds)
// We are only interested in response sizes of read requests.
if verb == "GET" || verb == "LIST" {
responseSizes.WithLabelValues(reportedVerb, group, version, resource, subresource, scope, component).Observe(float64(respSize))

View File

@ -163,8 +163,10 @@ func WithMaxInFlightLimit(
// We need to split this data between buckets used for throttling.
if isMutatingRequest {
metrics.DroppedRequests.WithLabelValues(metrics.MutatingKind).Inc()
metrics.DeprecatedDroppedRequests.WithLabelValues(metrics.MutatingKind).Inc()
} else {
metrics.DroppedRequests.WithLabelValues(metrics.ReadOnlyKind).Inc()
metrics.DeprecatedDroppedRequests.WithLabelValues(metrics.ReadOnlyKind).Inc()
}
// at this point we're about to return a 429, BUT not all actors should be rate limited. A system:master is so powerful
// that they should always get an answer. It's a super-admin or a loopback connection.