diff --git a/rest/request.go b/rest/request.go index 96e72569..e1ba760d 100644 --- a/rest/request.go +++ b/rest/request.go @@ -726,7 +726,6 @@ func (r *Request) Watch(ctx context.Context) (watch.Interface, error) { } resp, err := client.Do(req) - updateURLMetrics(ctx, r, resp, err) retry.After(ctx, r, resp, err) if err == nil && resp.StatusCode == http.StatusOK { return r.newStreamWatcher(resp) @@ -786,22 +785,36 @@ func (r *Request) newStreamWatcher(resp *http.Response) (watch.Interface, error) ), nil } -// updateURLMetrics is a convenience function for pushing metrics. -// It also handles corner cases for incomplete/invalid request data. -func updateURLMetrics(ctx context.Context, req *Request, resp *http.Response, err error) { - url := "none" +// updateRequestResultMetric increments the RequestResult metric counter, +// it should be called with the (response, err) tuple from the final +// reply from the server. +func updateRequestResultMetric(ctx context.Context, req *Request, resp *http.Response, err error) { + code, host := sanitize(req, resp, err) + metrics.RequestResult.Increment(ctx, code, req.verb, host) +} + +// updateRequestRetryMetric increments the RequestRetry metric counter, +// it should be called with the (response, err) tuple for each retry +// except for the final attempt. +func updateRequestRetryMetric(ctx context.Context, req *Request, resp *http.Response, err error) { + code, host := sanitize(req, resp, err) + metrics.RequestRetry.IncrementRetry(ctx, code, req.verb, host) +} + +func sanitize(req *Request, resp *http.Response, err error) (string, string) { + host := "none" if req.c.base != nil { - url = req.c.base.Host + host = req.c.base.Host } // Errors can be arbitrary strings. Unbound label cardinality is not suitable for a metric // system so we just report them as ``. - if err != nil { - metrics.RequestResult.Increment(ctx, "", req.verb, url) - } else { - // Metrics for failure codes - metrics.RequestResult.Increment(ctx, strconv.Itoa(resp.StatusCode), req.verb, url) + code := "" + if resp != nil { + code = strconv.Itoa(resp.StatusCode) } + + return code, host } // Stream formats and executes the request, and offers streaming of the response. @@ -834,7 +847,6 @@ func (r *Request) Stream(ctx context.Context) (io.ReadCloser, error) { return nil, err } resp, err := client.Do(req) - updateURLMetrics(ctx, r, resp, err) retry.After(ctx, r, resp, err) if err != nil { // we only retry on an HTTP response with 'Retry-After' header @@ -979,7 +991,6 @@ func (r *Request) request(ctx context.Context, fn func(*http.Request, *http.Resp return err } resp, err := client.Do(req) - updateURLMetrics(ctx, r, resp, err) // The value -1 or a value of 0 with a non-nil Body indicates that the length is unknown. // https://pkg.go.dev/net/http#Request if req.ContentLength >= 0 && !(req.Body != nil && req.ContentLength == 0) { diff --git a/rest/request_test.go b/rest/request_test.go index 884409ed..dbc54fa7 100644 --- a/rest/request_test.go +++ b/rest/request_test.go @@ -2991,6 +2991,7 @@ type withRateLimiterBackoffManagerAndMetrics struct { metrics.ResultMetric calculateBackoffSeq int64 calculateBackoffFn func(i int64) time.Duration + metrics.RetryMetric invokeOrderGot []string sleepsGot []string @@ -3027,6 +3028,14 @@ func (lb *withRateLimiterBackoffManagerAndMetrics) Increment(ctx context.Context } } +func (lb *withRateLimiterBackoffManagerAndMetrics) IncrementRetry(ctx context.Context, code, _, _ string) { + // we are interested in the request context that is marked by this test + if marked, ok := ctx.Value(retryTestKey).(bool); ok && marked { + lb.invokeOrderGot = append(lb.invokeOrderGot, "RequestRetry.IncrementRetry") + lb.statusCodesGot = append(lb.statusCodesGot, code) + } +} + func (lb *withRateLimiterBackoffManagerAndMetrics) Do() { lb.invokeOrderGot = append(lb.invokeOrderGot, "Client.Do") } @@ -3072,13 +3081,17 @@ func testRetryWithRateLimiterBackoffAndMetrics(t *testing.T, key string, doFunc "Client.Do", // it's a success, so do the following: - // - call metrics and update backoff parameters + // count the result metric, and since it's a retry, + // count the retry metric, and then update backoff parameters. "RequestResult.Increment", + "RequestRetry.IncrementRetry", "BackoffManager.UpdateBackoff", } statusCodesWant := []string{ + // first attempt (A): we count the result metric only "500", - "200", + // final attempt (B): we count the result metric, and the retry metric + "200", "200", } tests := []struct { @@ -3192,10 +3205,13 @@ func testRetryWithRateLimiterBackoffAndMetrics(t *testing.T, key string, doFunc // to override as well, and we want tests to be able to run in // parallel then we will need to provide a way for tests to // register/deregister their own metric inerfaces. - old := metrics.RequestResult + oldRequestResult := metrics.RequestResult + oldRequestRetry := metrics.RequestRetry metrics.RequestResult = interceptor + metrics.RequestRetry = interceptor defer func() { - metrics.RequestResult = old + metrics.RequestResult = oldRequestResult + metrics.RequestRetry = oldRequestRetry }() ctx, cancel := context.WithCancel(context.Background()) diff --git a/rest/with_retry.go b/rest/with_retry.go index cc3c08f0..eaaadc6a 100644 --- a/rest/with_retry.go +++ b/rest/with_retry.go @@ -242,8 +242,20 @@ func (r *withRetry) After(ctx context.Context, request *Request, resp *http.Resp // parameters calculated from the (response, err) tuple from // attempt N-1, so r.retryAfter is outdated and should not be // referred to here. + isRetry := r.retryAfter != nil r.retryAfter = nil + // the client finishes a single request after N attempts (1..N) + // - all attempts (1..N) are counted to the rest_client_requests_total + // metric (current behavior). + // - every attempt after the first (2..N) are counted to the + // rest_client_request_retries_total metric. + updateRequestResultMetric(ctx, request, resp, err) + if isRetry { + // this is attempt 2 or later + updateRequestRetryMetric(ctx, request, resp, err) + } + if request.c.base != nil { if err != nil { request.backoff.UpdateBackoff(request.URL(), err, 0) diff --git a/tools/metrics/metrics.go b/tools/metrics/metrics.go index 6c684c7f..f36430dc 100644 --- a/tools/metrics/metrics.go +++ b/tools/metrics/metrics.go @@ -58,6 +58,12 @@ type CallsMetric interface { Increment(exitCode int, callStatus string) } +// RetryMetric counts the number of retries sent to the server +// partitioned by code, method, and host. +type RetryMetric interface { + IncrementRetry(ctx context.Context, code string, method string, host string) +} + var ( // ClientCertExpiry is the expiry time of a client certificate ClientCertExpiry ExpiryMetric = noopExpiry{} @@ -76,6 +82,9 @@ var ( // ExecPluginCalls is the number of calls made to an exec plugin, partitioned by // exit code and call status. ExecPluginCalls CallsMetric = noopCalls{} + // RequestRetry is the retry metric that tracks the number of + // retries sent to the server. + RequestRetry RetryMetric = noopRetry{} ) // RegisterOpts contains all the metrics to register. Metrics may be nil. @@ -88,6 +97,7 @@ type RegisterOpts struct { RateLimiterLatency LatencyMetric RequestResult ResultMetric ExecPluginCalls CallsMetric + RequestRetry RetryMetric } // Register registers metrics for the rest client to use. This can @@ -118,6 +128,9 @@ func Register(opts RegisterOpts) { if opts.ExecPluginCalls != nil { ExecPluginCalls = opts.ExecPluginCalls } + if opts.RequestRetry != nil { + RequestRetry = opts.RequestRetry + } }) } @@ -144,3 +157,7 @@ func (noopResult) Increment(context.Context, string, string, string) {} type noopCalls struct{} func (noopCalls) Increment(int, string) {} + +type noopRetry struct{} + +func (noopRetry) IncrementRetry(context.Context, string, string, string) {}