mirror of
https://github.com/kubernetes/client-go.git
synced 2025-08-08 10:47:26 +00:00
client-go: add metric to count retries
Kubernetes-commit: b6c369f5c90bbef8058b3d44949f4e815dd6607f
This commit is contained in:
parent
f457a57d6d
commit
d2388d199c
@ -726,7 +726,6 @@ func (r *Request) Watch(ctx context.Context) (watch.Interface, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
resp, err := client.Do(req)
|
resp, err := client.Do(req)
|
||||||
updateURLMetrics(ctx, r, resp, err)
|
|
||||||
retry.After(ctx, r, resp, err)
|
retry.After(ctx, r, resp, err)
|
||||||
if err == nil && resp.StatusCode == http.StatusOK {
|
if err == nil && resp.StatusCode == http.StatusOK {
|
||||||
return r.newStreamWatcher(resp)
|
return r.newStreamWatcher(resp)
|
||||||
@ -786,22 +785,36 @@ func (r *Request) newStreamWatcher(resp *http.Response) (watch.Interface, error)
|
|||||||
), nil
|
), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// updateURLMetrics is a convenience function for pushing metrics.
|
// updateRequestResultMetric increments the RequestResult metric counter,
|
||||||
// It also handles corner cases for incomplete/invalid request data.
|
// it should be called with the (response, err) tuple from the final
|
||||||
func updateURLMetrics(ctx context.Context, req *Request, resp *http.Response, err error) {
|
// reply from the server.
|
||||||
url := "none"
|
func updateRequestResultMetric(ctx context.Context, req *Request, resp *http.Response, err error) {
|
||||||
|
code, host := sanitize(req, resp, err)
|
||||||
|
metrics.RequestResult.Increment(ctx, code, req.verb, host)
|
||||||
|
}
|
||||||
|
|
||||||
|
// updateRequestRetryMetric increments the RequestRetry metric counter,
|
||||||
|
// it should be called with the (response, err) tuple for each retry
|
||||||
|
// except for the final attempt.
|
||||||
|
func updateRequestRetryMetric(ctx context.Context, req *Request, resp *http.Response, err error) {
|
||||||
|
code, host := sanitize(req, resp, err)
|
||||||
|
metrics.RequestRetry.IncrementRetry(ctx, code, req.verb, host)
|
||||||
|
}
|
||||||
|
|
||||||
|
func sanitize(req *Request, resp *http.Response, err error) (string, string) {
|
||||||
|
host := "none"
|
||||||
if req.c.base != nil {
|
if req.c.base != nil {
|
||||||
url = req.c.base.Host
|
host = req.c.base.Host
|
||||||
}
|
}
|
||||||
|
|
||||||
// Errors can be arbitrary strings. Unbound label cardinality is not suitable for a metric
|
// Errors can be arbitrary strings. Unbound label cardinality is not suitable for a metric
|
||||||
// system so we just report them as `<error>`.
|
// system so we just report them as `<error>`.
|
||||||
if err != nil {
|
code := "<error>"
|
||||||
metrics.RequestResult.Increment(ctx, "<error>", req.verb, url)
|
if resp != nil {
|
||||||
} else {
|
code = strconv.Itoa(resp.StatusCode)
|
||||||
// Metrics for failure codes
|
|
||||||
metrics.RequestResult.Increment(ctx, strconv.Itoa(resp.StatusCode), req.verb, url)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return code, host
|
||||||
}
|
}
|
||||||
|
|
||||||
// Stream formats and executes the request, and offers streaming of the response.
|
// Stream formats and executes the request, and offers streaming of the response.
|
||||||
@ -834,7 +847,6 @@ func (r *Request) Stream(ctx context.Context) (io.ReadCloser, error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
resp, err := client.Do(req)
|
resp, err := client.Do(req)
|
||||||
updateURLMetrics(ctx, r, resp, err)
|
|
||||||
retry.After(ctx, r, resp, err)
|
retry.After(ctx, r, resp, err)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// we only retry on an HTTP response with 'Retry-After' header
|
// we only retry on an HTTP response with 'Retry-After' header
|
||||||
@ -979,7 +991,6 @@ func (r *Request) request(ctx context.Context, fn func(*http.Request, *http.Resp
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
resp, err := client.Do(req)
|
resp, err := client.Do(req)
|
||||||
updateURLMetrics(ctx, r, resp, err)
|
|
||||||
// The value -1 or a value of 0 with a non-nil Body indicates that the length is unknown.
|
// The value -1 or a value of 0 with a non-nil Body indicates that the length is unknown.
|
||||||
// https://pkg.go.dev/net/http#Request
|
// https://pkg.go.dev/net/http#Request
|
||||||
if req.ContentLength >= 0 && !(req.Body != nil && req.ContentLength == 0) {
|
if req.ContentLength >= 0 && !(req.Body != nil && req.ContentLength == 0) {
|
||||||
|
@ -2991,6 +2991,7 @@ type withRateLimiterBackoffManagerAndMetrics struct {
|
|||||||
metrics.ResultMetric
|
metrics.ResultMetric
|
||||||
calculateBackoffSeq int64
|
calculateBackoffSeq int64
|
||||||
calculateBackoffFn func(i int64) time.Duration
|
calculateBackoffFn func(i int64) time.Duration
|
||||||
|
metrics.RetryMetric
|
||||||
|
|
||||||
invokeOrderGot []string
|
invokeOrderGot []string
|
||||||
sleepsGot []string
|
sleepsGot []string
|
||||||
@ -3027,6 +3028,14 @@ func (lb *withRateLimiterBackoffManagerAndMetrics) Increment(ctx context.Context
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (lb *withRateLimiterBackoffManagerAndMetrics) IncrementRetry(ctx context.Context, code, _, _ string) {
|
||||||
|
// we are interested in the request context that is marked by this test
|
||||||
|
if marked, ok := ctx.Value(retryTestKey).(bool); ok && marked {
|
||||||
|
lb.invokeOrderGot = append(lb.invokeOrderGot, "RequestRetry.IncrementRetry")
|
||||||
|
lb.statusCodesGot = append(lb.statusCodesGot, code)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (lb *withRateLimiterBackoffManagerAndMetrics) Do() {
|
func (lb *withRateLimiterBackoffManagerAndMetrics) Do() {
|
||||||
lb.invokeOrderGot = append(lb.invokeOrderGot, "Client.Do")
|
lb.invokeOrderGot = append(lb.invokeOrderGot, "Client.Do")
|
||||||
}
|
}
|
||||||
@ -3072,13 +3081,17 @@ func testRetryWithRateLimiterBackoffAndMetrics(t *testing.T, key string, doFunc
|
|||||||
"Client.Do",
|
"Client.Do",
|
||||||
|
|
||||||
// it's a success, so do the following:
|
// it's a success, so do the following:
|
||||||
// - call metrics and update backoff parameters
|
// count the result metric, and since it's a retry,
|
||||||
|
// count the retry metric, and then update backoff parameters.
|
||||||
"RequestResult.Increment",
|
"RequestResult.Increment",
|
||||||
|
"RequestRetry.IncrementRetry",
|
||||||
"BackoffManager.UpdateBackoff",
|
"BackoffManager.UpdateBackoff",
|
||||||
}
|
}
|
||||||
statusCodesWant := []string{
|
statusCodesWant := []string{
|
||||||
|
// first attempt (A): we count the result metric only
|
||||||
"500",
|
"500",
|
||||||
"200",
|
// final attempt (B): we count the result metric, and the retry metric
|
||||||
|
"200", "200",
|
||||||
}
|
}
|
||||||
|
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
@ -3192,10 +3205,13 @@ func testRetryWithRateLimiterBackoffAndMetrics(t *testing.T, key string, doFunc
|
|||||||
// to override as well, and we want tests to be able to run in
|
// to override as well, and we want tests to be able to run in
|
||||||
// parallel then we will need to provide a way for tests to
|
// parallel then we will need to provide a way for tests to
|
||||||
// register/deregister their own metric inerfaces.
|
// register/deregister their own metric inerfaces.
|
||||||
old := metrics.RequestResult
|
oldRequestResult := metrics.RequestResult
|
||||||
|
oldRequestRetry := metrics.RequestRetry
|
||||||
metrics.RequestResult = interceptor
|
metrics.RequestResult = interceptor
|
||||||
|
metrics.RequestRetry = interceptor
|
||||||
defer func() {
|
defer func() {
|
||||||
metrics.RequestResult = old
|
metrics.RequestResult = oldRequestResult
|
||||||
|
metrics.RequestRetry = oldRequestRetry
|
||||||
}()
|
}()
|
||||||
|
|
||||||
ctx, cancel := context.WithCancel(context.Background())
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
@ -242,8 +242,20 @@ func (r *withRetry) After(ctx context.Context, request *Request, resp *http.Resp
|
|||||||
// parameters calculated from the (response, err) tuple from
|
// parameters calculated from the (response, err) tuple from
|
||||||
// attempt N-1, so r.retryAfter is outdated and should not be
|
// attempt N-1, so r.retryAfter is outdated and should not be
|
||||||
// referred to here.
|
// referred to here.
|
||||||
|
isRetry := r.retryAfter != nil
|
||||||
r.retryAfter = nil
|
r.retryAfter = nil
|
||||||
|
|
||||||
|
// the client finishes a single request after N attempts (1..N)
|
||||||
|
// - all attempts (1..N) are counted to the rest_client_requests_total
|
||||||
|
// metric (current behavior).
|
||||||
|
// - every attempt after the first (2..N) are counted to the
|
||||||
|
// rest_client_request_retries_total metric.
|
||||||
|
updateRequestResultMetric(ctx, request, resp, err)
|
||||||
|
if isRetry {
|
||||||
|
// this is attempt 2 or later
|
||||||
|
updateRequestRetryMetric(ctx, request, resp, err)
|
||||||
|
}
|
||||||
|
|
||||||
if request.c.base != nil {
|
if request.c.base != nil {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
request.backoff.UpdateBackoff(request.URL(), err, 0)
|
request.backoff.UpdateBackoff(request.URL(), err, 0)
|
||||||
|
@ -58,6 +58,12 @@ type CallsMetric interface {
|
|||||||
Increment(exitCode int, callStatus string)
|
Increment(exitCode int, callStatus string)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// RetryMetric counts the number of retries sent to the server
|
||||||
|
// partitioned by code, method, and host.
|
||||||
|
type RetryMetric interface {
|
||||||
|
IncrementRetry(ctx context.Context, code string, method string, host string)
|
||||||
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
// ClientCertExpiry is the expiry time of a client certificate
|
// ClientCertExpiry is the expiry time of a client certificate
|
||||||
ClientCertExpiry ExpiryMetric = noopExpiry{}
|
ClientCertExpiry ExpiryMetric = noopExpiry{}
|
||||||
@ -76,6 +82,9 @@ var (
|
|||||||
// ExecPluginCalls is the number of calls made to an exec plugin, partitioned by
|
// ExecPluginCalls is the number of calls made to an exec plugin, partitioned by
|
||||||
// exit code and call status.
|
// exit code and call status.
|
||||||
ExecPluginCalls CallsMetric = noopCalls{}
|
ExecPluginCalls CallsMetric = noopCalls{}
|
||||||
|
// RequestRetry is the retry metric that tracks the number of
|
||||||
|
// retries sent to the server.
|
||||||
|
RequestRetry RetryMetric = noopRetry{}
|
||||||
)
|
)
|
||||||
|
|
||||||
// RegisterOpts contains all the metrics to register. Metrics may be nil.
|
// RegisterOpts contains all the metrics to register. Metrics may be nil.
|
||||||
@ -88,6 +97,7 @@ type RegisterOpts struct {
|
|||||||
RateLimiterLatency LatencyMetric
|
RateLimiterLatency LatencyMetric
|
||||||
RequestResult ResultMetric
|
RequestResult ResultMetric
|
||||||
ExecPluginCalls CallsMetric
|
ExecPluginCalls CallsMetric
|
||||||
|
RequestRetry RetryMetric
|
||||||
}
|
}
|
||||||
|
|
||||||
// Register registers metrics for the rest client to use. This can
|
// Register registers metrics for the rest client to use. This can
|
||||||
@ -118,6 +128,9 @@ func Register(opts RegisterOpts) {
|
|||||||
if opts.ExecPluginCalls != nil {
|
if opts.ExecPluginCalls != nil {
|
||||||
ExecPluginCalls = opts.ExecPluginCalls
|
ExecPluginCalls = opts.ExecPluginCalls
|
||||||
}
|
}
|
||||||
|
if opts.RequestRetry != nil {
|
||||||
|
RequestRetry = opts.RequestRetry
|
||||||
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -144,3 +157,7 @@ func (noopResult) Increment(context.Context, string, string, string) {}
|
|||||||
type noopCalls struct{}
|
type noopCalls struct{}
|
||||||
|
|
||||||
func (noopCalls) Increment(int, string) {}
|
func (noopCalls) Increment(int, string) {}
|
||||||
|
|
||||||
|
type noopRetry struct{}
|
||||||
|
|
||||||
|
func (noopRetry) IncrementRetry(context.Context, string, string, string) {}
|
||||||
|
Loading…
Reference in New Issue
Block a user