mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-22 11:21:47 +00:00
Merge pull request #88017 from feiskyer/fix-409
Make Azure clients only retry on specified HTTP status codes
This commit is contained in:
commit
50c8f73a4b
@ -504,23 +504,17 @@ func (az *Cloud) InitializeCloudFromConfig(config *Config, fromSecret bool) erro
|
||||
az.VirtualMachineSizesClient = vmsizeclient.New(azClientConfig.WithRateLimiter(config.VirtualMachineSizeRateLimit))
|
||||
az.SnapshotsClient = snapshotclient.New(azClientConfig.WithRateLimiter(config.SnapshotRateLimit))
|
||||
az.StorageAccountClient = storageaccountclient.New(azClientConfig.WithRateLimiter(config.StorageAccountRateLimit))
|
||||
|
||||
az.VirtualMachinesClient = vmclient.New(azClientConfig.WithRateLimiter(config.VirtualMachineRateLimit))
|
||||
az.DisksClient = diskclient.New(azClientConfig.WithRateLimiter(config.DiskRateLimit))
|
||||
// fileClient is not based on armclient, but it's still backoff retried.
|
||||
az.FileClient = newAzureFileClient(env, azClientConfig.Backoff.WithNonRetriableErrors([]string{}, []int{http.StatusNotFound}))
|
||||
|
||||
vmClientConfig := azClientConfig.WithRateLimiter(config.VirtualMachineRateLimit)
|
||||
vmClientConfig.Backoff = vmClientConfig.Backoff.WithNonRetriableErrors([]string{}, []int{http.StatusConflict})
|
||||
az.VirtualMachinesClient = vmclient.New(vmClientConfig)
|
||||
az.FileClient = newAzureFileClient(env, azClientConfig.Backoff)
|
||||
|
||||
// Error "not an active Virtual Machine Scale Set VM" is not retriable for VMSS VM.
|
||||
// But http.StatusNotFound is retriable because of ARM replication latency.
|
||||
vmssVMClientConfig := azClientConfig.WithRateLimiter(config.VirtualMachineScaleSetRateLimit)
|
||||
vmssVMClientConfig.Backoff = vmssVMClientConfig.Backoff.WithNonRetriableErrors([]string{vmssVMNotActiveErrorMessage}, []int{http.StatusConflict})
|
||||
vmssVMClientConfig.Backoff = vmssVMClientConfig.Backoff.WithNonRetriableErrors([]string{vmssVMNotActiveErrorMessage}).WithRetriableHTTPStatusCodes([]int{http.StatusNotFound})
|
||||
az.VirtualMachineScaleSetVMsClient = vmssvmclient.New(vmssVMClientConfig)
|
||||
|
||||
disksClientConfig := azClientConfig.WithRateLimiter(config.DiskRateLimit)
|
||||
disksClientConfig.Backoff = disksClientConfig.Backoff.WithNonRetriableErrors([]string{}, []int{http.StatusNotFound, http.StatusConflict})
|
||||
az.DisksClient = diskclient.New(disksClientConfig)
|
||||
|
||||
if az.MaximumLoadBalancerRuleCount == 0 {
|
||||
az.MaximumLoadBalancerRuleCount = maximumLoadBalancerRuleCount
|
||||
}
|
||||
|
@ -801,7 +801,7 @@ func TestReconcileSecurityGroupEtagMismatch(t *testing.T) {
|
||||
HTTPStatusCode: http.StatusPreconditionFailed,
|
||||
RawError: errPreconditionFailedEtagMismatch,
|
||||
}
|
||||
assert.Equal(t, err, expectedError.Error())
|
||||
assert.Equal(t, expectedError.Error(), err)
|
||||
}
|
||||
|
||||
func TestReconcilePublicIPWithNewService(t *testing.T) {
|
||||
|
@ -154,7 +154,7 @@ func TestSendAsync(t *testing.T) {
|
||||
assert.Nil(t, response)
|
||||
assert.Equal(t, 1, count)
|
||||
assert.NotNil(t, rerr)
|
||||
assert.Equal(t, true, rerr.Retriable)
|
||||
assert.Equal(t, false, rerr.Retriable)
|
||||
}
|
||||
|
||||
func TestNormalizeAzureRegion(t *testing.T) {
|
||||
@ -236,7 +236,7 @@ func TestDeleteResourceAsync(t *testing.T) {
|
||||
count := 0
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
count++
|
||||
http.Error(w, "failed", http.StatusForbidden)
|
||||
http.Error(w, "failed", http.StatusInternalServerError)
|
||||
}))
|
||||
|
||||
backoff := &retry.Backoff{Steps: 3}
|
||||
|
@ -38,6 +38,15 @@ const (
|
||||
var (
|
||||
// The function to get current time.
|
||||
now = time.Now
|
||||
|
||||
// StatusCodesForRetry are a defined group of status code for which the client will retry.
|
||||
StatusCodesForRetry = []int{
|
||||
http.StatusRequestTimeout, // 408
|
||||
http.StatusInternalServerError, // 500
|
||||
http.StatusBadGateway, // 502
|
||||
http.StatusServiceUnavailable, // 503
|
||||
http.StatusGatewayTimeout, // 504
|
||||
}
|
||||
)
|
||||
|
||||
// Error indicates an error returned by Azure APIs.
|
||||
@ -184,18 +193,21 @@ func getHTTPStatusCode(resp *http.Response) int {
|
||||
// shouldRetryHTTPRequest determines if the request is retriable.
|
||||
func shouldRetryHTTPRequest(resp *http.Response, err error) bool {
|
||||
if resp != nil {
|
||||
// HTTP 412 (StatusPreconditionFailed) means etag mismatch
|
||||
// HTTP 400 (BadRequest) means the request cannot be accepted, hence we shouldn't retry.
|
||||
if resp.StatusCode == http.StatusPreconditionFailed || resp.StatusCode == http.StatusBadRequest {
|
||||
return false
|
||||
for _, code := range StatusCodesForRetry {
|
||||
if resp.StatusCode == code {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
// HTTP 4xx (except 412) or 5xx suggests we should retry.
|
||||
if 399 < resp.StatusCode && resp.StatusCode < 600 {
|
||||
// should retry on <200, error>.
|
||||
if isSuccessHTTPResponse(resp) && err != nil {
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// should retry when error is not nil and no http.Response.
|
||||
if err != nil {
|
||||
return true
|
||||
}
|
||||
@ -224,6 +236,24 @@ func getRetryAfter(resp *http.Response) time.Duration {
|
||||
return dur
|
||||
}
|
||||
|
||||
// GetErrorWithRetriableHTTPStatusCodes gets an error with RetriableHTTPStatusCodes.
|
||||
// It is used to retry on some HTTPStatusCodes.
|
||||
func GetErrorWithRetriableHTTPStatusCodes(resp *http.Response, err error, retriableHTTPStatusCodes []int) *Error {
|
||||
rerr := GetError(resp, err)
|
||||
if rerr == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, code := range retriableHTTPStatusCodes {
|
||||
if rerr.HTTPStatusCode == code {
|
||||
rerr.Retriable = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return rerr
|
||||
}
|
||||
|
||||
// GetStatusNotFoundAndForbiddenIgnoredError gets an error with StatusNotFound and StatusForbidden ignored.
|
||||
// It is only used in DELETE operations.
|
||||
func GetStatusNotFoundAndForbiddenIgnoredError(resp *http.Response, err error) *Error {
|
||||
|
@ -73,7 +73,7 @@ func TestGetError(t *testing.T) {
|
||||
code: http.StatusSeeOther,
|
||||
err: fmt.Errorf("some error"),
|
||||
expected: &Error{
|
||||
Retriable: true,
|
||||
Retriable: false,
|
||||
HTTPStatusCode: http.StatusSeeOther,
|
||||
RawError: fmt.Errorf("some error"),
|
||||
},
|
||||
@ -82,7 +82,7 @@ func TestGetError(t *testing.T) {
|
||||
code: http.StatusTooManyRequests,
|
||||
retryAfter: 100,
|
||||
expected: &Error{
|
||||
Retriable: true,
|
||||
Retriable: false,
|
||||
HTTPStatusCode: http.StatusTooManyRequests,
|
||||
RetryAfter: now().Add(100 * time.Second),
|
||||
RawError: fmt.Errorf("some error"),
|
||||
@ -156,7 +156,7 @@ func TestGetStatusNotFoundAndForbiddenIgnoredError(t *testing.T) {
|
||||
code: http.StatusSeeOther,
|
||||
err: fmt.Errorf("some error"),
|
||||
expected: &Error{
|
||||
Retriable: true,
|
||||
Retriable: false,
|
||||
HTTPStatusCode: http.StatusSeeOther,
|
||||
RawError: fmt.Errorf("some error"),
|
||||
},
|
||||
@ -165,7 +165,7 @@ func TestGetStatusNotFoundAndForbiddenIgnoredError(t *testing.T) {
|
||||
code: http.StatusTooManyRequests,
|
||||
retryAfter: 100,
|
||||
expected: &Error{
|
||||
Retriable: true,
|
||||
Retriable: false,
|
||||
HTTPStatusCode: http.StatusTooManyRequests,
|
||||
RetryAfter: now().Add(100 * time.Second),
|
||||
RawError: fmt.Errorf("some error"),
|
||||
|
@ -58,8 +58,8 @@ type Backoff struct {
|
||||
Cap time.Duration
|
||||
// The errors indicate that the request shouldn't do more retrying.
|
||||
NonRetriableErrors []string
|
||||
// The HTTPStatusCode indicates that the request shouldn't do more retrying.
|
||||
NonRetriableHTTPStatusCodes []int
|
||||
// The RetriableHTTPStatusCodes indicates that the HTTPStatusCode should do more retrying.
|
||||
RetriableHTTPStatusCodes []int
|
||||
}
|
||||
|
||||
// NewBackoff creates a new Backoff.
|
||||
@ -73,11 +73,17 @@ func NewBackoff(duration time.Duration, factor float64, jitter float64, steps in
|
||||
}
|
||||
}
|
||||
|
||||
// WithNonRetriableErrors returns a new *Backoff with NonRetriableErrors, NonRetriableHTTPStatusCodes assigned.
|
||||
func (b *Backoff) WithNonRetriableErrors(errs []string, httpStatusCodes []int) *Backoff {
|
||||
// WithNonRetriableErrors returns a new *Backoff with NonRetriableErrors assigned.
|
||||
func (b *Backoff) WithNonRetriableErrors(errs []string) *Backoff {
|
||||
newBackoff := *b
|
||||
newBackoff.NonRetriableErrors = errs
|
||||
newBackoff.NonRetriableHTTPStatusCodes = httpStatusCodes
|
||||
return &newBackoff
|
||||
}
|
||||
|
||||
// WithRetriableHTTPStatusCodes returns a new *Backoff with RetriableHTTPStatusCode assigned.
|
||||
func (b *Backoff) WithRetriableHTTPStatusCodes(httpStatusCodes []int) *Backoff {
|
||||
newBackoff := *b
|
||||
newBackoff.RetriableHTTPStatusCodes = httpStatusCodes
|
||||
return &newBackoff
|
||||
}
|
||||
|
||||
@ -93,11 +99,6 @@ func (b *Backoff) isNonRetriableError(rerr *Error) bool {
|
||||
}
|
||||
}
|
||||
|
||||
for _, code := range b.NonRetriableHTTPStatusCodes {
|
||||
if rerr.HTTPStatusCode == code {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
@ -162,7 +163,7 @@ func doBackoffRetry(s autorest.Sender, r *http.Request, backoff *Backoff) (resp
|
||||
return
|
||||
}
|
||||
resp, err = s.Do(rr.Request())
|
||||
rerr := GetError(resp, err)
|
||||
rerr := GetErrorWithRetriableHTTPStatusCodes(resp, err, backoff.RetriableHTTPStatusCodes)
|
||||
// Abort retries in the following scenarios:
|
||||
// 1) request succeed
|
||||
// 2) request is not retriable
|
||||
|
@ -75,11 +75,6 @@ func TestStep(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestDoBackoffRetry(t *testing.T) {
|
||||
backoff := &Backoff{
|
||||
Factor: 1.0,
|
||||
Steps: 3,
|
||||
NonRetriableHTTPStatusCodes: []int{http.StatusNotFound},
|
||||
}
|
||||
fakeRequest := &http.Request{
|
||||
URL: &url.URL{
|
||||
Host: "localhost",
|
||||
@ -96,7 +91,7 @@ func TestDoBackoffRetry(t *testing.T) {
|
||||
HTTPStatusCode: 500,
|
||||
RawError: fmt.Errorf("HTTP status code (500)"),
|
||||
}
|
||||
resp, err := doBackoffRetry(client, fakeRequest, backoff)
|
||||
resp, err := doBackoffRetry(client, fakeRequest, &Backoff{Factor: 1.0, Steps: 3})
|
||||
assert.NotNil(t, resp)
|
||||
assert.Equal(t, 500, resp.StatusCode)
|
||||
assert.Equal(t, expectedErr.Error(), err)
|
||||
@ -106,7 +101,7 @@ func TestDoBackoffRetry(t *testing.T) {
|
||||
r = mocks.NewResponseWithStatus("200 OK", http.StatusOK)
|
||||
client = mocks.NewSender()
|
||||
client.AppendAndRepeatResponse(r, 1)
|
||||
resp, err = doBackoffRetry(client, fakeRequest, backoff)
|
||||
resp, err = doBackoffRetry(client, fakeRequest, &Backoff{Factor: 1.0, Steps: 3})
|
||||
assert.Nil(t, err)
|
||||
assert.Equal(t, 1, client.Attempts())
|
||||
assert.NotNil(t, resp)
|
||||
@ -117,28 +112,47 @@ func TestDoBackoffRetry(t *testing.T) {
|
||||
client = mocks.NewSender()
|
||||
client.AppendAndRepeatResponse(r, 1)
|
||||
expectedErr = &Error{
|
||||
Retriable: true,
|
||||
Retriable: false,
|
||||
HTTPStatusCode: 429,
|
||||
RawError: fmt.Errorf("HTTP status code (429)"),
|
||||
}
|
||||
resp, err = doBackoffRetry(client, fakeRequest, backoff)
|
||||
resp, err = doBackoffRetry(client, fakeRequest, &Backoff{Factor: 1.0, Steps: 3})
|
||||
assert.Equal(t, expectedErr.Error(), err)
|
||||
assert.Equal(t, 1, client.Attempts())
|
||||
assert.NotNil(t, resp)
|
||||
assert.Equal(t, 429, resp.StatusCode)
|
||||
|
||||
// retry on non retriable error
|
||||
// don't retry on non retriable error
|
||||
r = mocks.NewResponseWithStatus("404 StatusNotFound", http.StatusNotFound)
|
||||
client = mocks.NewSender()
|
||||
client.AppendAndRepeatResponse(r, 1)
|
||||
expectedErr = &Error{
|
||||
Retriable: true,
|
||||
Retriable: false,
|
||||
HTTPStatusCode: 404,
|
||||
RawError: fmt.Errorf("HTTP status code (404)"),
|
||||
}
|
||||
resp, err = doBackoffRetry(client, fakeRequest, backoff)
|
||||
resp, err = doBackoffRetry(client, fakeRequest, &Backoff{Factor: 1.0, Steps: 3})
|
||||
assert.NotNil(t, resp)
|
||||
assert.Equal(t, 404, resp.StatusCode)
|
||||
assert.Equal(t, expectedErr.Error(), err)
|
||||
assert.Equal(t, 1, client.Attempts())
|
||||
|
||||
// retry on RetriableHTTPStatusCodes
|
||||
r = mocks.NewResponseWithStatus("102 StatusProcessing", http.StatusProcessing)
|
||||
client = mocks.NewSender()
|
||||
client.AppendAndRepeatResponse(r, 3)
|
||||
expectedErr = &Error{
|
||||
Retriable: true,
|
||||
HTTPStatusCode: 102,
|
||||
RawError: fmt.Errorf("HTTP status code (102)"),
|
||||
}
|
||||
resp, err = doBackoffRetry(client, fakeRequest, &Backoff{
|
||||
Factor: 1.0,
|
||||
Steps: 3,
|
||||
RetriableHTTPStatusCodes: []int{http.StatusProcessing},
|
||||
})
|
||||
assert.NotNil(t, resp)
|
||||
assert.Equal(t, 102, resp.StatusCode)
|
||||
assert.Equal(t, expectedErr.Error(), err)
|
||||
assert.Equal(t, 3, client.Attempts())
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user