mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-24 20:24:09 +00:00
Make Azure clients only retries on specified HTTP status codes
This commit is contained in:
parent
919871e86a
commit
6a487723ed
@ -504,23 +504,17 @@ func (az *Cloud) InitializeCloudFromConfig(config *Config, fromSecret bool) erro
|
|||||||
az.VirtualMachineSizesClient = vmsizeclient.New(azClientConfig.WithRateLimiter(config.VirtualMachineSizeRateLimit))
|
az.VirtualMachineSizesClient = vmsizeclient.New(azClientConfig.WithRateLimiter(config.VirtualMachineSizeRateLimit))
|
||||||
az.SnapshotsClient = snapshotclient.New(azClientConfig.WithRateLimiter(config.SnapshotRateLimit))
|
az.SnapshotsClient = snapshotclient.New(azClientConfig.WithRateLimiter(config.SnapshotRateLimit))
|
||||||
az.StorageAccountClient = storageaccountclient.New(azClientConfig.WithRateLimiter(config.StorageAccountRateLimit))
|
az.StorageAccountClient = storageaccountclient.New(azClientConfig.WithRateLimiter(config.StorageAccountRateLimit))
|
||||||
|
az.VirtualMachinesClient = vmclient.New(azClientConfig.WithRateLimiter(config.VirtualMachineRateLimit))
|
||||||
|
az.DisksClient = diskclient.New(azClientConfig.WithRateLimiter(config.DiskRateLimit))
|
||||||
// fileClient is not based on armclient, but it's still backoff retried.
|
// fileClient is not based on armclient, but it's still backoff retried.
|
||||||
az.FileClient = newAzureFileClient(env, azClientConfig.Backoff.WithNonRetriableErrors([]string{}, []int{http.StatusNotFound}))
|
az.FileClient = newAzureFileClient(env, azClientConfig.Backoff)
|
||||||
|
|
||||||
vmClientConfig := azClientConfig.WithRateLimiter(config.VirtualMachineRateLimit)
|
|
||||||
vmClientConfig.Backoff = vmClientConfig.Backoff.WithNonRetriableErrors([]string{}, []int{http.StatusConflict})
|
|
||||||
az.VirtualMachinesClient = vmclient.New(vmClientConfig)
|
|
||||||
|
|
||||||
// Error "not an active Virtual Machine Scale Set VM" is not retriable for VMSS VM.
|
// Error "not an active Virtual Machine Scale Set VM" is not retriable for VMSS VM.
|
||||||
|
// But http.StatusNotFound is retriable because of ARM replication latency.
|
||||||
vmssVMClientConfig := azClientConfig.WithRateLimiter(config.VirtualMachineScaleSetRateLimit)
|
vmssVMClientConfig := azClientConfig.WithRateLimiter(config.VirtualMachineScaleSetRateLimit)
|
||||||
vmssVMClientConfig.Backoff = vmssVMClientConfig.Backoff.WithNonRetriableErrors([]string{vmssVMNotActiveErrorMessage}, []int{http.StatusConflict})
|
vmssVMClientConfig.Backoff = vmssVMClientConfig.Backoff.WithNonRetriableErrors([]string{vmssVMNotActiveErrorMessage}).WithRetriableHTTPStatusCodes([]int{http.StatusNotFound})
|
||||||
az.VirtualMachineScaleSetVMsClient = vmssvmclient.New(vmssVMClientConfig)
|
az.VirtualMachineScaleSetVMsClient = vmssvmclient.New(vmssVMClientConfig)
|
||||||
|
|
||||||
disksClientConfig := azClientConfig.WithRateLimiter(config.DiskRateLimit)
|
|
||||||
disksClientConfig.Backoff = disksClientConfig.Backoff.WithNonRetriableErrors([]string{}, []int{http.StatusNotFound, http.StatusConflict})
|
|
||||||
az.DisksClient = diskclient.New(disksClientConfig)
|
|
||||||
|
|
||||||
if az.MaximumLoadBalancerRuleCount == 0 {
|
if az.MaximumLoadBalancerRuleCount == 0 {
|
||||||
az.MaximumLoadBalancerRuleCount = maximumLoadBalancerRuleCount
|
az.MaximumLoadBalancerRuleCount = maximumLoadBalancerRuleCount
|
||||||
}
|
}
|
||||||
|
@ -801,7 +801,7 @@ func TestReconcileSecurityGroupEtagMismatch(t *testing.T) {
|
|||||||
HTTPStatusCode: http.StatusPreconditionFailed,
|
HTTPStatusCode: http.StatusPreconditionFailed,
|
||||||
RawError: errPreconditionFailedEtagMismatch,
|
RawError: errPreconditionFailedEtagMismatch,
|
||||||
}
|
}
|
||||||
assert.Equal(t, err, expectedError.Error())
|
assert.Equal(t, expectedError.Error(), err)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestReconcilePublicIPWithNewService(t *testing.T) {
|
func TestReconcilePublicIPWithNewService(t *testing.T) {
|
||||||
|
@ -154,7 +154,7 @@ func TestSendAsync(t *testing.T) {
|
|||||||
assert.Nil(t, response)
|
assert.Nil(t, response)
|
||||||
assert.Equal(t, 1, count)
|
assert.Equal(t, 1, count)
|
||||||
assert.NotNil(t, rerr)
|
assert.NotNil(t, rerr)
|
||||||
assert.Equal(t, true, rerr.Retriable)
|
assert.Equal(t, false, rerr.Retriable)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestNormalizeAzureRegion(t *testing.T) {
|
func TestNormalizeAzureRegion(t *testing.T) {
|
||||||
@ -236,7 +236,7 @@ func TestDeleteResourceAsync(t *testing.T) {
|
|||||||
count := 0
|
count := 0
|
||||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
count++
|
count++
|
||||||
http.Error(w, "failed", http.StatusForbidden)
|
http.Error(w, "failed", http.StatusInternalServerError)
|
||||||
}))
|
}))
|
||||||
|
|
||||||
backoff := &retry.Backoff{Steps: 3}
|
backoff := &retry.Backoff{Steps: 3}
|
||||||
|
@ -38,6 +38,15 @@ const (
|
|||||||
var (
|
var (
|
||||||
// The function to get current time.
|
// The function to get current time.
|
||||||
now = time.Now
|
now = time.Now
|
||||||
|
|
||||||
|
// StatusCodesForRetry are a defined group of status code for which the client will retry.
|
||||||
|
StatusCodesForRetry = []int{
|
||||||
|
http.StatusRequestTimeout, // 408
|
||||||
|
http.StatusInternalServerError, // 500
|
||||||
|
http.StatusBadGateway, // 502
|
||||||
|
http.StatusServiceUnavailable, // 503
|
||||||
|
http.StatusGatewayTimeout, // 504
|
||||||
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
// Error indicates an error returned by Azure APIs.
|
// Error indicates an error returned by Azure APIs.
|
||||||
@ -184,18 +193,21 @@ func getHTTPStatusCode(resp *http.Response) int {
|
|||||||
// shouldRetryHTTPRequest determines if the request is retriable.
|
// shouldRetryHTTPRequest determines if the request is retriable.
|
||||||
func shouldRetryHTTPRequest(resp *http.Response, err error) bool {
|
func shouldRetryHTTPRequest(resp *http.Response, err error) bool {
|
||||||
if resp != nil {
|
if resp != nil {
|
||||||
// HTTP 412 (StatusPreconditionFailed) means etag mismatch
|
for _, code := range StatusCodesForRetry {
|
||||||
// HTTP 400 (BadRequest) means the request cannot be accepted, hence we shouldn't retry.
|
if resp.StatusCode == code {
|
||||||
if resp.StatusCode == http.StatusPreconditionFailed || resp.StatusCode == http.StatusBadRequest {
|
return true
|
||||||
return false
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// HTTP 4xx (except 412) or 5xx suggests we should retry.
|
// should retry on <200, error>.
|
||||||
if 399 < resp.StatusCode && resp.StatusCode < 600 {
|
if isSuccessHTTPResponse(resp) && err != nil {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// should retry when error is not nil and no http.Response.
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
@ -224,6 +236,24 @@ func getRetryAfter(resp *http.Response) time.Duration {
|
|||||||
return dur
|
return dur
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetErrorWithRetriableHTTPStatusCodes gets an error with RetriableHTTPStatusCodes.
|
||||||
|
// It is used to retry on some HTTPStatusCodes.
|
||||||
|
func GetErrorWithRetriableHTTPStatusCodes(resp *http.Response, err error, retriableHTTPStatusCodes []int) *Error {
|
||||||
|
rerr := GetError(resp, err)
|
||||||
|
if rerr == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, code := range retriableHTTPStatusCodes {
|
||||||
|
if rerr.HTTPStatusCode == code {
|
||||||
|
rerr.Retriable = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return rerr
|
||||||
|
}
|
||||||
|
|
||||||
// GetStatusNotFoundAndForbiddenIgnoredError gets an error with StatusNotFound and StatusForbidden ignored.
|
// GetStatusNotFoundAndForbiddenIgnoredError gets an error with StatusNotFound and StatusForbidden ignored.
|
||||||
// It is only used in DELETE operations.
|
// It is only used in DELETE operations.
|
||||||
func GetStatusNotFoundAndForbiddenIgnoredError(resp *http.Response, err error) *Error {
|
func GetStatusNotFoundAndForbiddenIgnoredError(resp *http.Response, err error) *Error {
|
||||||
|
@ -73,7 +73,7 @@ func TestGetError(t *testing.T) {
|
|||||||
code: http.StatusSeeOther,
|
code: http.StatusSeeOther,
|
||||||
err: fmt.Errorf("some error"),
|
err: fmt.Errorf("some error"),
|
||||||
expected: &Error{
|
expected: &Error{
|
||||||
Retriable: true,
|
Retriable: false,
|
||||||
HTTPStatusCode: http.StatusSeeOther,
|
HTTPStatusCode: http.StatusSeeOther,
|
||||||
RawError: fmt.Errorf("some error"),
|
RawError: fmt.Errorf("some error"),
|
||||||
},
|
},
|
||||||
@ -82,7 +82,7 @@ func TestGetError(t *testing.T) {
|
|||||||
code: http.StatusTooManyRequests,
|
code: http.StatusTooManyRequests,
|
||||||
retryAfter: 100,
|
retryAfter: 100,
|
||||||
expected: &Error{
|
expected: &Error{
|
||||||
Retriable: true,
|
Retriable: false,
|
||||||
HTTPStatusCode: http.StatusTooManyRequests,
|
HTTPStatusCode: http.StatusTooManyRequests,
|
||||||
RetryAfter: now().Add(100 * time.Second),
|
RetryAfter: now().Add(100 * time.Second),
|
||||||
RawError: fmt.Errorf("some error"),
|
RawError: fmt.Errorf("some error"),
|
||||||
@ -156,7 +156,7 @@ func TestGetStatusNotFoundAndForbiddenIgnoredError(t *testing.T) {
|
|||||||
code: http.StatusSeeOther,
|
code: http.StatusSeeOther,
|
||||||
err: fmt.Errorf("some error"),
|
err: fmt.Errorf("some error"),
|
||||||
expected: &Error{
|
expected: &Error{
|
||||||
Retriable: true,
|
Retriable: false,
|
||||||
HTTPStatusCode: http.StatusSeeOther,
|
HTTPStatusCode: http.StatusSeeOther,
|
||||||
RawError: fmt.Errorf("some error"),
|
RawError: fmt.Errorf("some error"),
|
||||||
},
|
},
|
||||||
@ -165,7 +165,7 @@ func TestGetStatusNotFoundAndForbiddenIgnoredError(t *testing.T) {
|
|||||||
code: http.StatusTooManyRequests,
|
code: http.StatusTooManyRequests,
|
||||||
retryAfter: 100,
|
retryAfter: 100,
|
||||||
expected: &Error{
|
expected: &Error{
|
||||||
Retriable: true,
|
Retriable: false,
|
||||||
HTTPStatusCode: http.StatusTooManyRequests,
|
HTTPStatusCode: http.StatusTooManyRequests,
|
||||||
RetryAfter: now().Add(100 * time.Second),
|
RetryAfter: now().Add(100 * time.Second),
|
||||||
RawError: fmt.Errorf("some error"),
|
RawError: fmt.Errorf("some error"),
|
||||||
|
@ -58,8 +58,8 @@ type Backoff struct {
|
|||||||
Cap time.Duration
|
Cap time.Duration
|
||||||
// The errors indicate that the request shouldn't do more retrying.
|
// The errors indicate that the request shouldn't do more retrying.
|
||||||
NonRetriableErrors []string
|
NonRetriableErrors []string
|
||||||
// The HTTPStatusCode indicates that the request shouldn't do more retrying.
|
// The RetriableHTTPStatusCodes indicates that the HTTPStatusCode should do more retrying.
|
||||||
NonRetriableHTTPStatusCodes []int
|
RetriableHTTPStatusCodes []int
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewBackoff creates a new Backoff.
|
// NewBackoff creates a new Backoff.
|
||||||
@ -73,11 +73,17 @@ func NewBackoff(duration time.Duration, factor float64, jitter float64, steps in
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// WithNonRetriableErrors returns a new *Backoff with NonRetriableErrors, NonRetriableHTTPStatusCodes assigned.
|
// WithNonRetriableErrors returns a new *Backoff with NonRetriableErrors assigned.
|
||||||
func (b *Backoff) WithNonRetriableErrors(errs []string, httpStatusCodes []int) *Backoff {
|
func (b *Backoff) WithNonRetriableErrors(errs []string) *Backoff {
|
||||||
newBackoff := *b
|
newBackoff := *b
|
||||||
newBackoff.NonRetriableErrors = errs
|
newBackoff.NonRetriableErrors = errs
|
||||||
newBackoff.NonRetriableHTTPStatusCodes = httpStatusCodes
|
return &newBackoff
|
||||||
|
}
|
||||||
|
|
||||||
|
// WithRetriableHTTPStatusCodes returns a new *Backoff with RetriableHTTPStatusCode assigned.
|
||||||
|
func (b *Backoff) WithRetriableHTTPStatusCodes(httpStatusCodes []int) *Backoff {
|
||||||
|
newBackoff := *b
|
||||||
|
newBackoff.RetriableHTTPStatusCodes = httpStatusCodes
|
||||||
return &newBackoff
|
return &newBackoff
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -93,11 +99,6 @@ func (b *Backoff) isNonRetriableError(rerr *Error) bool {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, code := range b.NonRetriableHTTPStatusCodes {
|
|
||||||
if rerr.HTTPStatusCode == code {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -162,7 +163,7 @@ func doBackoffRetry(s autorest.Sender, r *http.Request, backoff *Backoff) (resp
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
resp, err = s.Do(rr.Request())
|
resp, err = s.Do(rr.Request())
|
||||||
rerr := GetError(resp, err)
|
rerr := GetErrorWithRetriableHTTPStatusCodes(resp, err, backoff.RetriableHTTPStatusCodes)
|
||||||
// Abort retries in the following scenarios:
|
// Abort retries in the following scenarios:
|
||||||
// 1) request succeed
|
// 1) request succeed
|
||||||
// 2) request is not retriable
|
// 2) request is not retriable
|
||||||
|
@ -75,11 +75,6 @@ func TestStep(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestDoBackoffRetry(t *testing.T) {
|
func TestDoBackoffRetry(t *testing.T) {
|
||||||
backoff := &Backoff{
|
|
||||||
Factor: 1.0,
|
|
||||||
Steps: 3,
|
|
||||||
NonRetriableHTTPStatusCodes: []int{http.StatusNotFound},
|
|
||||||
}
|
|
||||||
fakeRequest := &http.Request{
|
fakeRequest := &http.Request{
|
||||||
URL: &url.URL{
|
URL: &url.URL{
|
||||||
Host: "localhost",
|
Host: "localhost",
|
||||||
@ -96,7 +91,7 @@ func TestDoBackoffRetry(t *testing.T) {
|
|||||||
HTTPStatusCode: 500,
|
HTTPStatusCode: 500,
|
||||||
RawError: fmt.Errorf("HTTP status code (500)"),
|
RawError: fmt.Errorf("HTTP status code (500)"),
|
||||||
}
|
}
|
||||||
resp, err := doBackoffRetry(client, fakeRequest, backoff)
|
resp, err := doBackoffRetry(client, fakeRequest, &Backoff{Factor: 1.0, Steps: 3})
|
||||||
assert.NotNil(t, resp)
|
assert.NotNil(t, resp)
|
||||||
assert.Equal(t, 500, resp.StatusCode)
|
assert.Equal(t, 500, resp.StatusCode)
|
||||||
assert.Equal(t, expectedErr.Error(), err)
|
assert.Equal(t, expectedErr.Error(), err)
|
||||||
@ -106,7 +101,7 @@ func TestDoBackoffRetry(t *testing.T) {
|
|||||||
r = mocks.NewResponseWithStatus("200 OK", http.StatusOK)
|
r = mocks.NewResponseWithStatus("200 OK", http.StatusOK)
|
||||||
client = mocks.NewSender()
|
client = mocks.NewSender()
|
||||||
client.AppendAndRepeatResponse(r, 1)
|
client.AppendAndRepeatResponse(r, 1)
|
||||||
resp, err = doBackoffRetry(client, fakeRequest, backoff)
|
resp, err = doBackoffRetry(client, fakeRequest, &Backoff{Factor: 1.0, Steps: 3})
|
||||||
assert.Nil(t, err)
|
assert.Nil(t, err)
|
||||||
assert.Equal(t, 1, client.Attempts())
|
assert.Equal(t, 1, client.Attempts())
|
||||||
assert.NotNil(t, resp)
|
assert.NotNil(t, resp)
|
||||||
@ -117,28 +112,47 @@ func TestDoBackoffRetry(t *testing.T) {
|
|||||||
client = mocks.NewSender()
|
client = mocks.NewSender()
|
||||||
client.AppendAndRepeatResponse(r, 1)
|
client.AppendAndRepeatResponse(r, 1)
|
||||||
expectedErr = &Error{
|
expectedErr = &Error{
|
||||||
Retriable: true,
|
Retriable: false,
|
||||||
HTTPStatusCode: 429,
|
HTTPStatusCode: 429,
|
||||||
RawError: fmt.Errorf("HTTP status code (429)"),
|
RawError: fmt.Errorf("HTTP status code (429)"),
|
||||||
}
|
}
|
||||||
resp, err = doBackoffRetry(client, fakeRequest, backoff)
|
resp, err = doBackoffRetry(client, fakeRequest, &Backoff{Factor: 1.0, Steps: 3})
|
||||||
assert.Equal(t, expectedErr.Error(), err)
|
assert.Equal(t, expectedErr.Error(), err)
|
||||||
assert.Equal(t, 1, client.Attempts())
|
assert.Equal(t, 1, client.Attempts())
|
||||||
assert.NotNil(t, resp)
|
assert.NotNil(t, resp)
|
||||||
assert.Equal(t, 429, resp.StatusCode)
|
assert.Equal(t, 429, resp.StatusCode)
|
||||||
|
|
||||||
// retry on non retriable error
|
// don't retry on non retriable error
|
||||||
r = mocks.NewResponseWithStatus("404 StatusNotFound", http.StatusNotFound)
|
r = mocks.NewResponseWithStatus("404 StatusNotFound", http.StatusNotFound)
|
||||||
client = mocks.NewSender()
|
client = mocks.NewSender()
|
||||||
client.AppendAndRepeatResponse(r, 1)
|
client.AppendAndRepeatResponse(r, 1)
|
||||||
expectedErr = &Error{
|
expectedErr = &Error{
|
||||||
Retriable: true,
|
Retriable: false,
|
||||||
HTTPStatusCode: 404,
|
HTTPStatusCode: 404,
|
||||||
RawError: fmt.Errorf("HTTP status code (404)"),
|
RawError: fmt.Errorf("HTTP status code (404)"),
|
||||||
}
|
}
|
||||||
resp, err = doBackoffRetry(client, fakeRequest, backoff)
|
resp, err = doBackoffRetry(client, fakeRequest, &Backoff{Factor: 1.0, Steps: 3})
|
||||||
assert.NotNil(t, resp)
|
assert.NotNil(t, resp)
|
||||||
assert.Equal(t, 404, resp.StatusCode)
|
assert.Equal(t, 404, resp.StatusCode)
|
||||||
assert.Equal(t, expectedErr.Error(), err)
|
assert.Equal(t, expectedErr.Error(), err)
|
||||||
assert.Equal(t, 1, client.Attempts())
|
assert.Equal(t, 1, client.Attempts())
|
||||||
|
|
||||||
|
// retry on RetriableHTTPStatusCodes
|
||||||
|
r = mocks.NewResponseWithStatus("102 StatusProcessing", http.StatusProcessing)
|
||||||
|
client = mocks.NewSender()
|
||||||
|
client.AppendAndRepeatResponse(r, 3)
|
||||||
|
expectedErr = &Error{
|
||||||
|
Retriable: true,
|
||||||
|
HTTPStatusCode: 102,
|
||||||
|
RawError: fmt.Errorf("HTTP status code (102)"),
|
||||||
|
}
|
||||||
|
resp, err = doBackoffRetry(client, fakeRequest, &Backoff{
|
||||||
|
Factor: 1.0,
|
||||||
|
Steps: 3,
|
||||||
|
RetriableHTTPStatusCodes: []int{http.StatusProcessing},
|
||||||
|
})
|
||||||
|
assert.NotNil(t, resp)
|
||||||
|
assert.Equal(t, 102, resp.StatusCode)
|
||||||
|
assert.Equal(t, expectedErr.Error(), err)
|
||||||
|
assert.Equal(t, 3, client.Attempts())
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user