diff --git a/staging/src/k8s.io/client-go/rest/request.go b/staging/src/k8s.io/client-go/rest/request.go index 96e725692d3..e1ba760d64b 100644 --- a/staging/src/k8s.io/client-go/rest/request.go +++ b/staging/src/k8s.io/client-go/rest/request.go @@ -726,7 +726,6 @@ func (r *Request) Watch(ctx context.Context) (watch.Interface, error) { } resp, err := client.Do(req) - updateURLMetrics(ctx, r, resp, err) retry.After(ctx, r, resp, err) if err == nil && resp.StatusCode == http.StatusOK { return r.newStreamWatcher(resp) @@ -786,22 +785,36 @@ func (r *Request) newStreamWatcher(resp *http.Response) (watch.Interface, error) ), nil } -// updateURLMetrics is a convenience function for pushing metrics. -// It also handles corner cases for incomplete/invalid request data. -func updateURLMetrics(ctx context.Context, req *Request, resp *http.Response, err error) { - url := "none" +// updateRequestResultMetric increments the RequestResult metric counter, +// it should be called with the (response, err) tuple from the final +// reply from the server. +func updateRequestResultMetric(ctx context.Context, req *Request, resp *http.Response, err error) { + code, host := sanitize(req, resp, err) + metrics.RequestResult.Increment(ctx, code, req.verb, host) +} + +// updateRequestRetryMetric increments the RequestRetry metric counter, +// it should be called with the (response, err) tuple for each retry +// except for the final attempt. +func updateRequestRetryMetric(ctx context.Context, req *Request, resp *http.Response, err error) { + code, host := sanitize(req, resp, err) + metrics.RequestRetry.IncrementRetry(ctx, code, req.verb, host) +} + +func sanitize(req *Request, resp *http.Response, err error) (string, string) { + host := "none" if req.c.base != nil { - url = req.c.base.Host + host = req.c.base.Host } // Errors can be arbitrary strings. Unbound label cardinality is not suitable for a metric // system so we just report them as ``. - if err != nil { - metrics.RequestResult.Increment(ctx, "", req.verb, url) - } else { - // Metrics for failure codes - metrics.RequestResult.Increment(ctx, strconv.Itoa(resp.StatusCode), req.verb, url) + code := "" + if resp != nil { + code = strconv.Itoa(resp.StatusCode) } + + return code, host } // Stream formats and executes the request, and offers streaming of the response. @@ -834,7 +847,6 @@ func (r *Request) Stream(ctx context.Context) (io.ReadCloser, error) { return nil, err } resp, err := client.Do(req) - updateURLMetrics(ctx, r, resp, err) retry.After(ctx, r, resp, err) if err != nil { // we only retry on an HTTP response with 'Retry-After' header @@ -979,7 +991,6 @@ func (r *Request) request(ctx context.Context, fn func(*http.Request, *http.Resp return err } resp, err := client.Do(req) - updateURLMetrics(ctx, r, resp, err) // The value -1 or a value of 0 with a non-nil Body indicates that the length is unknown. // https://pkg.go.dev/net/http#Request if req.ContentLength >= 0 && !(req.Body != nil && req.ContentLength == 0) { diff --git a/staging/src/k8s.io/client-go/rest/request_test.go b/staging/src/k8s.io/client-go/rest/request_test.go index 884409ed9ef..dbc54fa758b 100644 --- a/staging/src/k8s.io/client-go/rest/request_test.go +++ b/staging/src/k8s.io/client-go/rest/request_test.go @@ -2991,6 +2991,7 @@ type withRateLimiterBackoffManagerAndMetrics struct { metrics.ResultMetric calculateBackoffSeq int64 calculateBackoffFn func(i int64) time.Duration + metrics.RetryMetric invokeOrderGot []string sleepsGot []string @@ -3027,6 +3028,14 @@ func (lb *withRateLimiterBackoffManagerAndMetrics) Increment(ctx context.Context } } +func (lb *withRateLimiterBackoffManagerAndMetrics) IncrementRetry(ctx context.Context, code, _, _ string) { + // we are interested in the request context that is marked by this test + if marked, ok := ctx.Value(retryTestKey).(bool); ok && marked { + lb.invokeOrderGot = append(lb.invokeOrderGot, "RequestRetry.IncrementRetry") + lb.statusCodesGot = append(lb.statusCodesGot, code) + } +} + func (lb *withRateLimiterBackoffManagerAndMetrics) Do() { lb.invokeOrderGot = append(lb.invokeOrderGot, "Client.Do") } @@ -3072,13 +3081,17 @@ func testRetryWithRateLimiterBackoffAndMetrics(t *testing.T, key string, doFunc "Client.Do", // it's a success, so do the following: - // - call metrics and update backoff parameters + // count the result metric, and since it's a retry, + // count the retry metric, and then update backoff parameters. "RequestResult.Increment", + "RequestRetry.IncrementRetry", "BackoffManager.UpdateBackoff", } statusCodesWant := []string{ + // first attempt (A): we count the result metric only "500", - "200", + // final attempt (B): we count the result metric, and the retry metric + "200", "200", } tests := []struct { @@ -3192,10 +3205,13 @@ func testRetryWithRateLimiterBackoffAndMetrics(t *testing.T, key string, doFunc // to override as well, and we want tests to be able to run in // parallel then we will need to provide a way for tests to // register/deregister their own metric inerfaces. - old := metrics.RequestResult + oldRequestResult := metrics.RequestResult + oldRequestRetry := metrics.RequestRetry metrics.RequestResult = interceptor + metrics.RequestRetry = interceptor defer func() { - metrics.RequestResult = old + metrics.RequestResult = oldRequestResult + metrics.RequestRetry = oldRequestRetry }() ctx, cancel := context.WithCancel(context.Background()) diff --git a/staging/src/k8s.io/client-go/rest/with_retry.go b/staging/src/k8s.io/client-go/rest/with_retry.go index cc3c08f05a1..eaaadc6a4c3 100644 --- a/staging/src/k8s.io/client-go/rest/with_retry.go +++ b/staging/src/k8s.io/client-go/rest/with_retry.go @@ -242,8 +242,20 @@ func (r *withRetry) After(ctx context.Context, request *Request, resp *http.Resp // parameters calculated from the (response, err) tuple from // attempt N-1, so r.retryAfter is outdated and should not be // referred to here. + isRetry := r.retryAfter != nil r.retryAfter = nil + // the client finishes a single request after N attempts (1..N) + // - all attempts (1..N) are counted to the rest_client_requests_total + // metric (current behavior). + // - every attempt after the first (2..N) are counted to the + // rest_client_request_retries_total metric. + updateRequestResultMetric(ctx, request, resp, err) + if isRetry { + // this is attempt 2 or later + updateRequestRetryMetric(ctx, request, resp, err) + } + if request.c.base != nil { if err != nil { request.backoff.UpdateBackoff(request.URL(), err, 0) diff --git a/staging/src/k8s.io/client-go/tools/metrics/metrics.go b/staging/src/k8s.io/client-go/tools/metrics/metrics.go index 6c684c7fa12..f36430dc3ed 100644 --- a/staging/src/k8s.io/client-go/tools/metrics/metrics.go +++ b/staging/src/k8s.io/client-go/tools/metrics/metrics.go @@ -58,6 +58,12 @@ type CallsMetric interface { Increment(exitCode int, callStatus string) } +// RetryMetric counts the number of retries sent to the server +// partitioned by code, method, and host. +type RetryMetric interface { + IncrementRetry(ctx context.Context, code string, method string, host string) +} + var ( // ClientCertExpiry is the expiry time of a client certificate ClientCertExpiry ExpiryMetric = noopExpiry{} @@ -76,6 +82,9 @@ var ( // ExecPluginCalls is the number of calls made to an exec plugin, partitioned by // exit code and call status. ExecPluginCalls CallsMetric = noopCalls{} + // RequestRetry is the retry metric that tracks the number of + // retries sent to the server. + RequestRetry RetryMetric = noopRetry{} ) // RegisterOpts contains all the metrics to register. Metrics may be nil. @@ -88,6 +97,7 @@ type RegisterOpts struct { RateLimiterLatency LatencyMetric RequestResult ResultMetric ExecPluginCalls CallsMetric + RequestRetry RetryMetric } // Register registers metrics for the rest client to use. This can @@ -118,6 +128,9 @@ func Register(opts RegisterOpts) { if opts.ExecPluginCalls != nil { ExecPluginCalls = opts.ExecPluginCalls } + if opts.RequestRetry != nil { + RequestRetry = opts.RequestRetry + } }) } @@ -144,3 +157,7 @@ func (noopResult) Increment(context.Context, string, string, string) {} type noopCalls struct{} func (noopCalls) Increment(int, string) {} + +type noopRetry struct{} + +func (noopRetry) IncrementRetry(context.Context, string, string, string) {} diff --git a/staging/src/k8s.io/component-base/metrics/prometheus/restclient/metrics.go b/staging/src/k8s.io/component-base/metrics/prometheus/restclient/metrics.go index 92abee81fd7..ccc3cb8f039 100644 --- a/staging/src/k8s.io/component-base/metrics/prometheus/restclient/metrics.go +++ b/staging/src/k8s.io/component-base/metrics/prometheus/restclient/metrics.go @@ -82,6 +82,15 @@ var ( []string{"code", "verb", "host"}, ) + requestRetry = k8smetrics.NewCounterVec( + &k8smetrics.CounterOpts{ + Name: "rest_client_request_retries_total", + StabilityLevel: k8smetrics.ALPHA, + Help: "Number of request retries, partitioned by status code, verb, and host.", + }, + []string{"code", "verb", "host"}, + ) + execPluginCertTTLAdapter = &expiryToTTLAdapter{} execPluginCertTTL = k8smetrics.NewGaugeFunc( @@ -152,6 +161,7 @@ func init() { legacyregistry.MustRegister(responseSize) legacyregistry.MustRegister(rateLimiterLatency) legacyregistry.MustRegister(requestResult) + legacyregistry.MustRegister(requestRetry) legacyregistry.RawMustRegister(execPluginCertTTL) legacyregistry.MustRegister(execPluginCertRotation) metrics.Register(metrics.RegisterOpts{ @@ -162,6 +172,7 @@ func init() { ResponseSize: &sizeAdapter{m: responseSize}, RateLimiterLatency: &latencyAdapter{m: rateLimiterLatency}, RequestResult: &resultAdapter{requestResult}, + RequestRetry: &retryAdapter{requestRetry}, ExecPluginCalls: &callsAdapter{m: execPluginCalls}, }) } @@ -213,3 +224,11 @@ type callsAdapter struct { func (r *callsAdapter) Increment(code int, callStatus string) { r.m.WithLabelValues(fmt.Sprintf("%d", code), callStatus).Inc() } + +type retryAdapter struct { + m *k8smetrics.CounterVec +} + +func (r *retryAdapter) IncrementRetry(ctx context.Context, code, method, host string) { + r.m.WithContext(ctx).WithLabelValues(code, method, host).Inc() +} diff --git a/staging/src/k8s.io/component-base/metrics/prometheus/restclient/metrics_test.go b/staging/src/k8s.io/component-base/metrics/prometheus/restclient/metrics_test.go new file mode 100644 index 00000000000..6a2bb923d24 --- /dev/null +++ b/staging/src/k8s.io/component-base/metrics/prometheus/restclient/metrics_test.go @@ -0,0 +1,87 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package restclient + +import ( + "context" + "strings" + "testing" + + "k8s.io/client-go/tools/metrics" + "k8s.io/component-base/metrics/legacyregistry" + "k8s.io/component-base/metrics/testutil" +) + +func TestClientGOMetrics(t *testing.T) { + tests := []struct { + description string + name string + metric interface{} + update func() + want string + }{ + { + description: "Number of HTTP requests, partitioned by status code, verb, and host.", + name: "rest_client_requests_total", + metric: requestResult, + update: func() { + metrics.RequestResult.Increment(context.TODO(), "200", "POST", "www.foo.com") + }, + want: ` + # HELP rest_client_requests_total [ALPHA] Number of HTTP requests, partitioned by status code, verb, and host. + # TYPE rest_client_requests_total counter + rest_client_requests_total{code="200",host="www.foo.com",verb="POST"} 1 + `, + }, + { + description: "Number of request retries, partitioned by status code, verb, and host.", + name: "rest_client_request_retries_total", + metric: requestRetry, + update: func() { + metrics.RequestRetry.IncrementRetry(context.TODO(), "500", "GET", "www.bar.com") + }, + want: ` + # HELP rest_client_request_retries_total [ALPHA] Number of request retries, partitioned by status code, verb, and host. + # TYPE rest_client_request_retries_total counter + rest_client_request_retries_total{code="500",host="www.bar.com",verb="GET"} 1 + `, + }, + } + + // no need to register the metrics here, since the init function of + // the package registers all the client-go metrics. + for _, test := range tests { + t.Run(test.description, func(t *testing.T) { + resetter, resettable := test.metric.(interface { + Reset() + }) + if !resettable { + t.Fatalf("the metric must be resettaable: %s", test.name) + } + + // Since prometheus' gatherer is global, other tests may have updated + // metrics already, so we need to reset them prior to running this test. + // This also implies that we can't run this test in parallel with other tests. + resetter.Reset() + test.update() + + if err := testutil.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(test.want), test.name); err != nil { + t.Fatal(err) + } + }) + } +}