diff --git a/go.mod b/go.mod index 4c2ea018..83818dbb 100644 --- a/go.mod +++ b/go.mod @@ -25,7 +25,7 @@ require ( golang.org/x/time v0.0.0-20220210224613-90d013bbcef8 google.golang.org/protobuf v1.27.1 k8s.io/api v0.0.0-20220512153301-be84346886a4 - k8s.io/apimachinery v0.0.0-20220517160406-e11374f12506 + k8s.io/apimachinery v0.0.0-20220518000438-e57249028810 k8s.io/klog/v2 v2.60.1 k8s.io/kube-openapi v0.0.0-20220328201542-3ee0da9b0b42 k8s.io/utils v0.0.0-20220210201930-3a6ce19ff2f9 @@ -64,5 +64,5 @@ require ( replace ( k8s.io/api => k8s.io/api v0.0.0-20220512153301-be84346886a4 - k8s.io/apimachinery => k8s.io/apimachinery v0.0.0-20220517160406-e11374f12506 + k8s.io/apimachinery => k8s.io/apimachinery v0.0.0-20220518000438-e57249028810 ) diff --git a/go.sum b/go.sum index 1a2fc741..671c43ab 100644 --- a/go.sum +++ b/go.sum @@ -513,8 +513,8 @@ honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9 honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= k8s.io/api v0.0.0-20220512153301-be84346886a4 h1:ATL01GmGTBjwQjXxfD0UGj3une2C8WyzTOXXzNFI790= k8s.io/api v0.0.0-20220512153301-be84346886a4/go.mod h1:swy7cXqCDjYcBv6ylU8ErrJ2ALa+9PSVEdM4zw65jFQ= -k8s.io/apimachinery v0.0.0-20220517160406-e11374f12506 h1:A+vVaBjqEcFqPLVX9RBO98INUqJCFLlI4XDnH+dDclY= -k8s.io/apimachinery v0.0.0-20220517160406-e11374f12506/go.mod h1:1oBVxgNUfLl978lJAlywA+H45m2ctSuqJU2stpbcjT4= +k8s.io/apimachinery v0.0.0-20220518000438-e57249028810 h1:AaArI0wyQ6/2bFlQ0mKgDTYNi8ibRDMBR29PHD6ZesI= +k8s.io/apimachinery v0.0.0-20220518000438-e57249028810/go.mod h1:1oBVxgNUfLl978lJAlywA+H45m2ctSuqJU2stpbcjT4= k8s.io/gengo v0.0.0-20210813121822-485abfe95c7c/go.mod h1:FiNAH4ZV3gBg2Kwh89tzAEV2be7d5xI0vBa/VySYy3E= k8s.io/klog/v2 v2.0.0/go.mod h1:PBfzABfn139FHAV07az/IF9Wp1bkk3vpT2XSJ76fSDE= k8s.io/klog/v2 v2.2.0/go.mod h1:Od+F08eJP+W3HUb4pSrPpgp9DGU4GzlpG/TmITuYh/Y= diff --git a/rest/request_test.go b/rest/request_test.go index c685fce7..01e18251 100644 --- a/rest/request_test.go +++ b/rest/request_test.go @@ -1392,7 +1392,9 @@ func TestCheckRetryClosesBody(t *testing.T) { defer testServer.Close() backoff := &testBackoffManager{} - expectedSleeps := []time.Duration{0, time.Second, 0, time.Second, 0, time.Second, 0, time.Second, 0} + + // testBackoffManager.CalculateBackoff always returns 0 + expectedSleeps := []time.Duration{0, time.Second, time.Second, time.Second, time.Second} c := testRESTClient(t, testServer) c.createBackoffMgr = func() BackoffManager { return backoff } @@ -1440,10 +1442,12 @@ func TestConnectionResetByPeerIsRetried(t *testing.T) { if err != nil { t.Errorf("Unexpected error: %v", err) } - // We have a sleep before each retry (including the initial one) and for - // every "retry-after" call - thus 5 together. - if len(backoff.sleeps) != 5 { - t.Errorf("Expected 5 retries, got: %d", len(backoff.sleeps)) + if count != 3 { + t.Errorf("Expected 3 attempts, got: %d", count) + } + // We have a sleep before each retry (including the initial one) thus 3 together. + if len(backoff.sleeps) != 3 { + t.Errorf("Expected 3 backoff.Sleep, got: %d", len(backoff.sleeps)) } } @@ -2824,7 +2828,8 @@ type withRateLimiterBackoffManagerAndMetrics struct { flowcontrol.RateLimiter *NoBackoff metrics.ResultMetric - backoffWaitSeconds int + calculateBackoffSeq int64 + calculateBackoffFn func(i int64) time.Duration invokeOrderGot []string sleepsGot []string @@ -2839,9 +2844,8 @@ func (lb *withRateLimiterBackoffManagerAndMetrics) Wait(ctx context.Context) err func (lb *withRateLimiterBackoffManagerAndMetrics) CalculateBackoff(actualUrl *url.URL) time.Duration { lb.invokeOrderGot = append(lb.invokeOrderGot, "BackoffManager.CalculateBackoff") - // we simulate a sleep sequence of 0m, 2m, 4m, 6m, ... - waitFor := time.Duration(lb.backoffWaitSeconds) * time.Minute - lb.backoffWaitSeconds += 2 + waitFor := lb.calculateBackoffFn(lb.calculateBackoffSeq) + lb.calculateBackoffSeq++ return waitFor } @@ -2868,14 +2872,16 @@ func (lb *withRateLimiterBackoffManagerAndMetrics) Do() { func testRetryWithRateLimiterBackoffAndMetrics(t *testing.T, key string, doFunc func(ctx context.Context, r *Request)) { type expected struct { - attempts int - order []string + attempts int + order []string + sleeps []string + statusCodes []string } // we define the expected order of how the client invokes the // rate limiter, backoff, and metrics methods. // scenario: - // - A: original request fails with a retryable response: (500, 'Retry-After: 1') + // - A: original request fails with a retryable response: (500, 'Retry-After: N') // - B: retry 1: successful with a status code 200 // so we have a total of 2 attempts invokeOrderWant := []string{ @@ -2887,17 +2893,16 @@ func testRetryWithRateLimiterBackoffAndMetrics(t *testing.T, key string, doFunc "BackoffManager.Sleep", // A: first attempt for which the server sends a retryable response + // status code: 500, Retry-Afer: N "Client.Do", - // we got a response object, status code: 500, Retry-Afer: 1 + // we got a response object, status code: 500, Retry-Afer: N // - call metrics method with appropriate status code // - update backoff parameters with the status code returned - // - sleep for N seconds from 'Retry-After: N' response header "RequestResult.Increment", "BackoffManager.UpdateBackoff", - "BackoffManager.Sleep", - // sleep for delay dictated by backoff parameters "BackoffManager.CalculateBackoff", + // sleep for delay=max(BackoffManager.CalculateBackoff, Retry-After: N) "BackoffManager.Sleep", // wait as dictated by the client rate lmiter "RateLimiter.Wait", @@ -2910,46 +2915,104 @@ func testRetryWithRateLimiterBackoffAndMetrics(t *testing.T, key string, doFunc "RequestResult.Increment", "BackoffManager.UpdateBackoff", } - sleepWant := []string{ - // initial backoff.Sleep before we send the request to the server for the first time - "0s", - // from 'Retry-After: 1' response header (A) - (1 * time.Second).String(), - // backoff.Sleep before retry 1 (B) - (2 * time.Minute).String(), - } statusCodesWant := []string{ "500", "200", } tests := []struct { - name string - maxRetries int - serverReturns []responseErr + name string + maxRetries int + serverReturns []responseErr + calculateBackoffFn func(i int64) time.Duration // expectations differ based on whether it is 'Watch', 'Stream' or 'Do' expectations map[string]expected }{ { - name: "success after one retry", + name: "success after one retry, Retry-After: N > BackoffManager.CalculateBackoff", maxRetries: 1, serverReturns: []responseErr{ - {response: retryAfterResponse(), err: nil}, + {response: retryAfterResponseWithDelay("5"), err: nil}, {response: &http.Response{StatusCode: http.StatusOK}, err: nil}, }, + // we simulate a sleep sequence of 0s, 1s, 2s, 3s, ... + calculateBackoffFn: func(i int64) time.Duration { return time.Duration(i * int64(time.Second)) }, expectations: map[string]expected{ "Do": { - attempts: 2, - order: invokeOrderWant, + attempts: 2, + order: invokeOrderWant, + statusCodes: statusCodesWant, + sleeps: []string{ + // initial backoff.Sleep before we send the request to the server for the first time + "0s", + // maximum of: + // - 'Retry-After: 5' response header from (A) + // - BackoffManager.CalculateBackoff (will return 1s) + (5 * time.Second).String(), + }, }, "Watch": { attempts: 2, // Watch does not do 'RateLimiter.Wait' before initially sending the request to the server - order: invokeOrderWant[1:], + order: invokeOrderWant[1:], + statusCodes: statusCodesWant, + sleeps: []string{ + "0s", + (5 * time.Second).String(), + }, }, "Stream": { + attempts: 2, + order: invokeOrderWant, + statusCodes: statusCodesWant, + sleeps: []string{ + "0s", + (5 * time.Second).String(), + }, + }, + }, + }, + { + name: "success after one retry, Retry-After: N < BackoffManager.CalculateBackoff", + maxRetries: 1, + serverReturns: []responseErr{ + {response: retryAfterResponseWithDelay("2"), err: nil}, + {response: &http.Response{StatusCode: http.StatusOK}, err: nil}, + }, + // we simulate a sleep sequence of 0s, 4s, 8s, 16s, ... + calculateBackoffFn: func(i int64) time.Duration { return time.Duration(i * int64(4*time.Second)) }, + expectations: map[string]expected{ + "Do": { + attempts: 2, + order: invokeOrderWant, + statusCodes: statusCodesWant, + sleeps: []string{ + // initial backoff.Sleep before we send the request to the server for the first time + "0s", + // maximum of: + // - 'Retry-After: 2' response header from (A) + // - BackoffManager.CalculateBackoff (will return 4s) + (4 * time.Second).String(), + }, + }, + "Watch": { attempts: 2, - order: invokeOrderWant, + // Watch does not do 'RateLimiter.Wait' before initially sending the request to the server + order: invokeOrderWant[1:], + statusCodes: statusCodesWant, + sleeps: []string{ + "0s", + (4 * time.Second).String(), + }, + }, + "Stream": { + attempts: 2, + order: invokeOrderWant, + statusCodes: statusCodesWant, + sleeps: []string{ + "0s", + (4 * time.Second).String(), + }, }, }, }, @@ -2958,8 +3021,9 @@ func testRetryWithRateLimiterBackoffAndMetrics(t *testing.T, key string, doFunc for _, test := range tests { t.Run(test.name, func(t *testing.T) { interceptor := &withRateLimiterBackoffManagerAndMetrics{ - RateLimiter: flowcontrol.NewFakeAlwaysRateLimiter(), - NoBackoff: &NoBackoff{}, + RateLimiter: flowcontrol.NewFakeAlwaysRateLimiter(), + NoBackoff: &NoBackoff{}, + calculateBackoffFn: test.calculateBackoffFn, } // TODO: today this is the only site where a test overrides the @@ -3027,11 +3091,11 @@ func testRetryWithRateLimiterBackoffAndMetrics(t *testing.T, key string, doFunc if !cmp.Equal(want.order, interceptor.invokeOrderGot) { t.Errorf("%s: Expected invoke order to match, diff: %s", key, cmp.Diff(want.order, interceptor.invokeOrderGot)) } - if !cmp.Equal(sleepWant, interceptor.sleepsGot) { - t.Errorf("%s: Expected sleep sequence to match, diff: %s", key, cmp.Diff(sleepWant, interceptor.sleepsGot)) + if !cmp.Equal(want.sleeps, interceptor.sleepsGot) { + t.Errorf("%s: Expected sleep sequence to match, diff: %s", key, cmp.Diff(want.sleeps, interceptor.sleepsGot)) } - if !cmp.Equal(statusCodesWant, interceptor.statusCodesGot) { - t.Errorf("%s: Expected status codes to match, diff: %s", key, cmp.Diff(statusCodesWant, interceptor.statusCodesGot)) + if !cmp.Equal(want.statusCodes, interceptor.statusCodesGot) { + t.Errorf("%s: Expected status codes to match, diff: %s", key, cmp.Diff(want.statusCodes, interceptor.statusCodesGot)) } }) } diff --git a/rest/with_retry.go b/rest/with_retry.go index 497d2608..383a7ca7 100644 --- a/rest/with_retry.go +++ b/rest/with_retry.go @@ -204,7 +204,9 @@ func (r *withRetry) Before(ctx context.Context, request *Request) error { if r.retryAfter == nil { // we do a backoff sleep before the first attempt is made, // (preserving current behavior). - request.backoff.Sleep(request.backoff.CalculateBackoff(url)) + if request.backoff != nil { + request.backoff.Sleep(request.backoff.CalculateBackoff(url)) + } return nil } @@ -222,12 +224,11 @@ func (r *withRetry) Before(ctx context.Context, request *Request) error { // if we are here, we have made attempt(s) al least once before. if request.backoff != nil { - // TODO(tkashem) with default set to use exponential backoff - // we can merge these two sleeps: - // BackOffManager.Sleep(max(backoffManager.CalculateBackoff(), retryAfter)) - // see https://github.com/kubernetes/kubernetes/issues/108302 - request.backoff.Sleep(r.retryAfter.Wait) - request.backoff.Sleep(request.backoff.CalculateBackoff(url)) + delay := request.backoff.CalculateBackoff(url) + if r.retryAfter.Wait > delay { + delay = r.retryAfter.Wait + } + request.backoff.Sleep(delay) } // We are retrying the request that we already send to @@ -349,8 +350,12 @@ func readAndCloseResponseBody(resp *http.Response) { } func retryAfterResponse() *http.Response { + return retryAfterResponseWithDelay("1") +} + +func retryAfterResponseWithDelay(delay string) *http.Response { return &http.Response{ StatusCode: http.StatusInternalServerError, - Header: http.Header{"Retry-After": []string{"1"}}, + Header: http.Header{"Retry-After": []string{delay}}, } }