mirror of
https://github.com/kubernetes/client-go.git
synced 2025-09-04 00:24:59 +00:00
client-go: refactor retry logic for backoff, rate limiter and metric
Kubernetes-commit: cecc563d3b9a9438cd3e6ae1576baa0a36f2d843
This commit is contained in:
committed by
Kubernetes Publisher
parent
8e46da3fd1
commit
34f3aff43e
@@ -57,36 +57,32 @@ type WithRetry interface {
|
||||
// A zero maxRetries should prevent from doing any retry and return immediately.
|
||||
SetMaxRetries(maxRetries int)
|
||||
|
||||
// NextRetry advances the retry counter appropriately and returns true if the
|
||||
// request should be retried, otherwise it returns false if:
|
||||
// IsNextRetry advances the retry counter appropriately
|
||||
// and returns true if the request should be retried,
|
||||
// otherwise it returns false, if:
|
||||
// - we have already reached the maximum retry threshold.
|
||||
// - the error does not fall into the retryable category.
|
||||
// - the server has not sent us a 429, or 5xx status code and the
|
||||
// 'Retry-After' response header is not set with a value.
|
||||
// - we need to seek to the beginning of the request body before we
|
||||
// initiate the next retry, the function should log an error and
|
||||
// return false if it fails to do so.
|
||||
//
|
||||
// if retry is set to true, retryAfter will contain the information
|
||||
// regarding the next retry.
|
||||
//
|
||||
// request: the original request sent to the server
|
||||
// restReq: the associated rest.Request
|
||||
// httpReq: the HTTP Request sent to the server
|
||||
// resp: the response sent from the server, it is set if err is nil
|
||||
// err: the server sent this error to us, if err is set then resp is nil.
|
||||
// f: a IsRetryableErrorFunc function provided by the client that determines
|
||||
// if the err sent by the server is retryable.
|
||||
NextRetry(req *http.Request, resp *http.Response, err error, f IsRetryableErrorFunc) (*RetryAfter, bool)
|
||||
IsNextRetry(ctx context.Context, restReq *Request, httpReq *http.Request, resp *http.Response, err error, f IsRetryableErrorFunc) bool
|
||||
|
||||
// BeforeNextRetry is responsible for carrying out operations that need
|
||||
// to be completed before the next retry is initiated:
|
||||
// - if the request context is already canceled there is no need to
|
||||
// retry, the function will return ctx.Err().
|
||||
// - we need to seek to the beginning of the request body before we
|
||||
// initiate the next retry, the function should return an error if
|
||||
// it fails to do so.
|
||||
// - we should wait the number of seconds the server has asked us to
|
||||
// in the 'Retry-After' response header.
|
||||
//
|
||||
// If BeforeNextRetry returns an error the client should abort the retry,
|
||||
// otherwise it is safe to initiate the next retry.
|
||||
BeforeNextRetry(ctx context.Context, backoff BackoffManager, retryAfter *RetryAfter, url string, body io.Reader) error
|
||||
// Before should be invoked prior to each attempt, including
|
||||
// the first one. if an error is returned, the request
|
||||
// should be aborted immediately.
|
||||
Before(ctx context.Context, r *Request) error
|
||||
|
||||
// After should be invoked immediately after an attempt is made.
|
||||
After(ctx context.Context, r *Request, resp *http.Response, err error)
|
||||
}
|
||||
|
||||
// RetryAfter holds information associated with the next retry.
|
||||
@@ -107,6 +103,14 @@ type RetryAfter struct {
|
||||
type withRetry struct {
|
||||
maxRetries int
|
||||
attempts int
|
||||
|
||||
// retry after parameters that pertain to the attempt that is to
|
||||
// be made soon, so as to enable 'Before' and 'After' to refer
|
||||
// to the retry parameters.
|
||||
// - for the first attempt, it will always be nil
|
||||
// - for consecutive attempts, it is non nil and holds the
|
||||
// retry after parameters for the next attempt to be made.
|
||||
retryAfter *RetryAfter
|
||||
}
|
||||
|
||||
func (r *withRetry) SetMaxRetries(maxRetries int) {
|
||||
@@ -116,28 +120,28 @@ func (r *withRetry) SetMaxRetries(maxRetries int) {
|
||||
r.maxRetries = maxRetries
|
||||
}
|
||||
|
||||
func (r *withRetry) NextRetry(req *http.Request, resp *http.Response, err error, f IsRetryableErrorFunc) (*RetryAfter, bool) {
|
||||
if req == nil || (resp == nil && err == nil) {
|
||||
func (r *withRetry) IsNextRetry(ctx context.Context, restReq *Request, httpReq *http.Request, resp *http.Response, err error, f IsRetryableErrorFunc) bool {
|
||||
if httpReq == nil || (resp == nil && err == nil) {
|
||||
// bad input, we do nothing.
|
||||
return nil, false
|
||||
return false
|
||||
}
|
||||
|
||||
r.attempts++
|
||||
retryAfter := &RetryAfter{Attempt: r.attempts}
|
||||
r.retryAfter = &RetryAfter{Attempt: r.attempts}
|
||||
if r.attempts > r.maxRetries {
|
||||
return retryAfter, false
|
||||
return false
|
||||
}
|
||||
|
||||
// if the server returned an error, it takes precedence over the http response.
|
||||
var errIsRetryable bool
|
||||
if f != nil && err != nil && f.IsErrorRetryable(req, err) {
|
||||
if f != nil && err != nil && f.IsErrorRetryable(httpReq, err) {
|
||||
errIsRetryable = true
|
||||
// we have a retryable error, for which we will create an
|
||||
// artificial "Retry-After" response.
|
||||
resp = retryAfterResponse()
|
||||
}
|
||||
if err != nil && !errIsRetryable {
|
||||
return retryAfter, false
|
||||
return false
|
||||
}
|
||||
|
||||
// if we are here, we have either a or b:
|
||||
@@ -147,34 +151,100 @@ func (r *withRetry) NextRetry(req *http.Request, resp *http.Response, err error,
|
||||
// need to check if it is retryable
|
||||
seconds, wait := checkWait(resp)
|
||||
if !wait {
|
||||
return retryAfter, false
|
||||
return false
|
||||
}
|
||||
|
||||
retryAfter.Wait = time.Duration(seconds) * time.Second
|
||||
retryAfter.Reason = getRetryReason(r.attempts, seconds, resp, err)
|
||||
return retryAfter, true
|
||||
r.retryAfter.Wait = time.Duration(seconds) * time.Second
|
||||
r.retryAfter.Reason = getRetryReason(r.attempts, seconds, resp, err)
|
||||
|
||||
if err := r.prepareForNextRetry(ctx, restReq); err != nil {
|
||||
klog.V(4).Infof("Could not retry request - %v", err)
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func (r *withRetry) BeforeNextRetry(ctx context.Context, backoff BackoffManager, retryAfter *RetryAfter, url string, body io.Reader) error {
|
||||
// Ensure the response body is fully read and closed before
|
||||
// we reconnect, so that we reuse the same TCP connection.
|
||||
// prepareForNextRetry is responsible for carrying out operations that need
|
||||
// to be completed before the next retry is initiated:
|
||||
// - if the request context is already canceled there is no need to
|
||||
// retry, the function will return ctx.Err().
|
||||
// - we need to seek to the beginning of the request body before we
|
||||
// initiate the next retry, the function should return an error if
|
||||
// it fails to do so.
|
||||
func (r *withRetry) prepareForNextRetry(ctx context.Context, request *Request) error {
|
||||
if ctx.Err() != nil {
|
||||
return ctx.Err()
|
||||
}
|
||||
|
||||
if seeker, ok := body.(io.Seeker); ok && body != nil {
|
||||
// Ensure the response body is fully read and closed before
|
||||
// we reconnect, so that we reuse the same TCP connection.
|
||||
if seeker, ok := request.body.(io.Seeker); ok && request.body != nil {
|
||||
if _, err := seeker.Seek(0, 0); err != nil {
|
||||
return fmt.Errorf("can't Seek() back to beginning of body for %T", r)
|
||||
return fmt.Errorf("can't Seek() back to beginning of body for %T", request)
|
||||
}
|
||||
}
|
||||
|
||||
klog.V(4).Infof("Got a Retry-After %s response for attempt %d to %v", retryAfter.Wait, retryAfter.Attempt, url)
|
||||
if backoff != nil {
|
||||
backoff.Sleep(retryAfter.Wait)
|
||||
}
|
||||
klog.V(4).Infof("Got a Retry-After %s response for attempt %d to %v", r.retryAfter.Wait, r.retryAfter.Attempt, request.URL().String())
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *withRetry) Before(ctx context.Context, request *Request) error {
|
||||
if ctx.Err() != nil {
|
||||
return ctx.Err()
|
||||
}
|
||||
|
||||
url := request.URL()
|
||||
|
||||
// r.retryAfter represents the retry after parameters calculated
|
||||
// from the (response, err) tuple from the last attempt, so 'Before'
|
||||
// can apply these retry after parameters prior to the next attempt.
|
||||
// 'r.retryAfter == nil' indicates that this is the very first attempt.
|
||||
if r.retryAfter == nil {
|
||||
// we do a backoff sleep before the first attempt is made,
|
||||
// (preserving current behavior).
|
||||
request.backoff.Sleep(request.backoff.CalculateBackoff(url))
|
||||
return nil
|
||||
}
|
||||
|
||||
// if we are here, we have made attempt(s) al least once before.
|
||||
if request.backoff != nil {
|
||||
// TODO(tkashem) with default set to use exponential backoff
|
||||
// we can merge these two sleeps:
|
||||
// BackOffManager.Sleep(max(backoffManager.CalculateBackoff(), retryAfter))
|
||||
// see https://github.com/kubernetes/kubernetes/issues/108302
|
||||
request.backoff.Sleep(r.retryAfter.Wait)
|
||||
request.backoff.Sleep(request.backoff.CalculateBackoff(url))
|
||||
}
|
||||
|
||||
// We are retrying the request that we already send to
|
||||
// apiserver at least once before. This request should
|
||||
// also be throttled with the client-internal rate limiter.
|
||||
if err := request.tryThrottleWithInfo(ctx, r.retryAfter.Reason); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *withRetry) After(ctx context.Context, request *Request, resp *http.Response, err error) {
|
||||
// 'After' is invoked immediately after an attempt is made, let's label
|
||||
// the attempt we have just made as attempt 'N'.
|
||||
// the current value of r.retryAfter represents the retry after
|
||||
// parameters calculated from the (response, err) tuple from
|
||||
// attempt N-1, so r.retryAfter is outdated and should not be
|
||||
// referred to here.
|
||||
r.retryAfter = nil
|
||||
|
||||
if request.c.base != nil {
|
||||
if err != nil {
|
||||
request.backoff.UpdateBackoff(request.URL(), err, 0)
|
||||
} else {
|
||||
request.backoff.UpdateBackoff(request.URL(), err, resp.StatusCode)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// checkWait returns true along with a number of seconds if
|
||||
// the server instructed us to wait before retrying.
|
||||
func checkWait(resp *http.Response) (int, bool) {
|
||||
|
Reference in New Issue
Block a user