client-go: add metric to count retries

Kubernetes-commit: b6c369f5c90bbef8058b3d44949f4e815dd6607f
This commit is contained in:
Abu Kashem 2022-02-28 12:22:09 -05:00 committed by Kubernetes Publisher
parent f457a57d6d
commit d2388d199c
4 changed files with 73 additions and 17 deletions

View File

@ -726,7 +726,6 @@ func (r *Request) Watch(ctx context.Context) (watch.Interface, error) {
}
resp, err := client.Do(req)
updateURLMetrics(ctx, r, resp, err)
retry.After(ctx, r, resp, err)
if err == nil && resp.StatusCode == http.StatusOK {
return r.newStreamWatcher(resp)
@ -786,22 +785,36 @@ func (r *Request) newStreamWatcher(resp *http.Response) (watch.Interface, error)
), nil
}
// updateURLMetrics is a convenience function for pushing metrics.
// It also handles corner cases for incomplete/invalid request data.
func updateURLMetrics(ctx context.Context, req *Request, resp *http.Response, err error) {
url := "none"
// updateRequestResultMetric increments the RequestResult metric counter,
// it should be called with the (response, err) tuple from the final
// reply from the server.
func updateRequestResultMetric(ctx context.Context, req *Request, resp *http.Response, err error) {
code, host := sanitize(req, resp, err)
metrics.RequestResult.Increment(ctx, code, req.verb, host)
}
// updateRequestRetryMetric increments the RequestRetry metric counter,
// it should be called with the (response, err) tuple for each retry
// except for the final attempt.
func updateRequestRetryMetric(ctx context.Context, req *Request, resp *http.Response, err error) {
code, host := sanitize(req, resp, err)
metrics.RequestRetry.IncrementRetry(ctx, code, req.verb, host)
}
func sanitize(req *Request, resp *http.Response, err error) (string, string) {
host := "none"
if req.c.base != nil {
url = req.c.base.Host
host = req.c.base.Host
}
// Errors can be arbitrary strings. Unbound label cardinality is not suitable for a metric
// system so we just report them as `<error>`.
if err != nil {
metrics.RequestResult.Increment(ctx, "<error>", req.verb, url)
} else {
// Metrics for failure codes
metrics.RequestResult.Increment(ctx, strconv.Itoa(resp.StatusCode), req.verb, url)
code := "<error>"
if resp != nil {
code = strconv.Itoa(resp.StatusCode)
}
return code, host
}
// Stream formats and executes the request, and offers streaming of the response.
@ -834,7 +847,6 @@ func (r *Request) Stream(ctx context.Context) (io.ReadCloser, error) {
return nil, err
}
resp, err := client.Do(req)
updateURLMetrics(ctx, r, resp, err)
retry.After(ctx, r, resp, err)
if err != nil {
// we only retry on an HTTP response with 'Retry-After' header
@ -979,7 +991,6 @@ func (r *Request) request(ctx context.Context, fn func(*http.Request, *http.Resp
return err
}
resp, err := client.Do(req)
updateURLMetrics(ctx, r, resp, err)
// The value -1 or a value of 0 with a non-nil Body indicates that the length is unknown.
// https://pkg.go.dev/net/http#Request
if req.ContentLength >= 0 && !(req.Body != nil && req.ContentLength == 0) {

View File

@ -2991,6 +2991,7 @@ type withRateLimiterBackoffManagerAndMetrics struct {
metrics.ResultMetric
calculateBackoffSeq int64
calculateBackoffFn func(i int64) time.Duration
metrics.RetryMetric
invokeOrderGot []string
sleepsGot []string
@ -3027,6 +3028,14 @@ func (lb *withRateLimiterBackoffManagerAndMetrics) Increment(ctx context.Context
}
}
func (lb *withRateLimiterBackoffManagerAndMetrics) IncrementRetry(ctx context.Context, code, _, _ string) {
// we are interested in the request context that is marked by this test
if marked, ok := ctx.Value(retryTestKey).(bool); ok && marked {
lb.invokeOrderGot = append(lb.invokeOrderGot, "RequestRetry.IncrementRetry")
lb.statusCodesGot = append(lb.statusCodesGot, code)
}
}
func (lb *withRateLimiterBackoffManagerAndMetrics) Do() {
lb.invokeOrderGot = append(lb.invokeOrderGot, "Client.Do")
}
@ -3072,13 +3081,17 @@ func testRetryWithRateLimiterBackoffAndMetrics(t *testing.T, key string, doFunc
"Client.Do",
// it's a success, so do the following:
// - call metrics and update backoff parameters
// count the result metric, and since it's a retry,
// count the retry metric, and then update backoff parameters.
"RequestResult.Increment",
"RequestRetry.IncrementRetry",
"BackoffManager.UpdateBackoff",
}
statusCodesWant := []string{
// first attempt (A): we count the result metric only
"500",
"200",
// final attempt (B): we count the result metric, and the retry metric
"200", "200",
}
tests := []struct {
@ -3192,10 +3205,13 @@ func testRetryWithRateLimiterBackoffAndMetrics(t *testing.T, key string, doFunc
// to override as well, and we want tests to be able to run in
// parallel then we will need to provide a way for tests to
// register/deregister their own metric inerfaces.
old := metrics.RequestResult
oldRequestResult := metrics.RequestResult
oldRequestRetry := metrics.RequestRetry
metrics.RequestResult = interceptor
metrics.RequestRetry = interceptor
defer func() {
metrics.RequestResult = old
metrics.RequestResult = oldRequestResult
metrics.RequestRetry = oldRequestRetry
}()
ctx, cancel := context.WithCancel(context.Background())

View File

@ -242,8 +242,20 @@ func (r *withRetry) After(ctx context.Context, request *Request, resp *http.Resp
// parameters calculated from the (response, err) tuple from
// attempt N-1, so r.retryAfter is outdated and should not be
// referred to here.
isRetry := r.retryAfter != nil
r.retryAfter = nil
// the client finishes a single request after N attempts (1..N)
// - all attempts (1..N) are counted to the rest_client_requests_total
// metric (current behavior).
// - every attempt after the first (2..N) are counted to the
// rest_client_request_retries_total metric.
updateRequestResultMetric(ctx, request, resp, err)
if isRetry {
// this is attempt 2 or later
updateRequestRetryMetric(ctx, request, resp, err)
}
if request.c.base != nil {
if err != nil {
request.backoff.UpdateBackoff(request.URL(), err, 0)

View File

@ -58,6 +58,12 @@ type CallsMetric interface {
Increment(exitCode int, callStatus string)
}
// RetryMetric counts the number of retries sent to the server
// partitioned by code, method, and host.
type RetryMetric interface {
IncrementRetry(ctx context.Context, code string, method string, host string)
}
var (
// ClientCertExpiry is the expiry time of a client certificate
ClientCertExpiry ExpiryMetric = noopExpiry{}
@ -76,6 +82,9 @@ var (
// ExecPluginCalls is the number of calls made to an exec plugin, partitioned by
// exit code and call status.
ExecPluginCalls CallsMetric = noopCalls{}
// RequestRetry is the retry metric that tracks the number of
// retries sent to the server.
RequestRetry RetryMetric = noopRetry{}
)
// RegisterOpts contains all the metrics to register. Metrics may be nil.
@ -88,6 +97,7 @@ type RegisterOpts struct {
RateLimiterLatency LatencyMetric
RequestResult ResultMetric
ExecPluginCalls CallsMetric
RequestRetry RetryMetric
}
// Register registers metrics for the rest client to use. This can
@ -118,6 +128,9 @@ func Register(opts RegisterOpts) {
if opts.ExecPluginCalls != nil {
ExecPluginCalls = opts.ExecPluginCalls
}
if opts.RequestRetry != nil {
RequestRetry = opts.RequestRetry
}
})
}
@ -144,3 +157,7 @@ func (noopResult) Increment(context.Context, string, string, string) {}
type noopCalls struct{}
func (noopCalls) Increment(int, string) {}
type noopRetry struct{}
func (noopRetry) IncrementRetry(context.Context, string, string, string) {}