From 92e576e01cebed55086a7e2f05fecf5a53a9b5d5 Mon Sep 17 00:00:00 2001 From: Jan Safranek Date: Wed, 14 Dec 2016 14:00:29 +0100 Subject: [PATCH 1/2] AWS: Add exponential backoff to waitForAttachmentStatus() We should use exponential backoff while waiting for a volume to get attached/ detached to/from a node. This will lower AWS load and reduce its API call throttling. --- pkg/cloudprovider/providers/aws/BUILD | 1 + pkg/cloudprovider/providers/aws/aws.go | 50 +++++++++++++------------- 2 files changed, 26 insertions(+), 25 deletions(-) diff --git a/pkg/cloudprovider/providers/aws/BUILD b/pkg/cloudprovider/providers/aws/BUILD index df3f0f800f6..af6b19f200a 100644 --- a/pkg/cloudprovider/providers/aws/BUILD +++ b/pkg/cloudprovider/providers/aws/BUILD @@ -30,6 +30,7 @@ go_library( "//pkg/credentialprovider/aws:go_default_library", "//pkg/types:go_default_library", "//pkg/util/sets:go_default_library", + "//pkg/util/wait:go_default_library", "//pkg/volume:go_default_library", "//vendor:github.com/aws/aws-sdk-go/aws", "//vendor:github.com/aws/aws-sdk-go/aws/awserr", diff --git a/pkg/cloudprovider/providers/aws/aws.go b/pkg/cloudprovider/providers/aws/aws.go index c5d4ab626d9..49bc1cf9982 100644 --- a/pkg/cloudprovider/providers/aws/aws.go +++ b/pkg/cloudprovider/providers/aws/aws.go @@ -48,6 +48,7 @@ import ( awscredentials "k8s.io/kubernetes/pkg/credentialprovider/aws" "k8s.io/kubernetes/pkg/types" "k8s.io/kubernetes/pkg/util/sets" + "k8s.io/kubernetes/pkg/util/wait" "k8s.io/kubernetes/pkg/volume" ) @@ -136,16 +137,16 @@ const ServiceAnnotationLoadBalancerSSLPorts = "service.beta.kubernetes.io/aws-lo const ServiceAnnotationLoadBalancerBEProtocol = "service.beta.kubernetes.io/aws-load-balancer-backend-protocol" const ( - // volumeAttachmentStatusTimeout is the maximum time to wait for a volume attach/detach to complete - volumeAttachmentStatusTimeout = 30 * time.Minute // volumeAttachmentConsecutiveErrorLimit is the number of consecutive errors we will ignore when waiting for a volume to attach/detach volumeAttachmentStatusConsecutiveErrorLimit = 10 - // volumeAttachmentErrorDelay is the amount of time we wait before retrying after encountering an error, - // while waiting for a volume attach/detach to complete - volumeAttachmentStatusErrorDelay = 20 * time.Second - // volumeAttachmentStatusPollInterval is the interval at which we poll the volume, - // while waiting for a volume attach/detach to complete - volumeAttachmentStatusPollInterval = 10 * time.Second + // volumeAttachmentStatus* is configuration of exponential backoff for + // waiting for attach/detach operation to complete. Starting with 10 + // seconds, multiplying by 1.2 with each step and taking 21 steps at maximum + // it will time out after 31.11 minutes, which roughly corresponds to GCE + // timeout (30 minutes). + volumeAttachmentStatusInitialDelay = 10 * time.Second + volumeAttachmentStatusFactor = 1.2 + volumeAttachmentStatusSteps = 21 ) // Maps from backend protocol to ELB protocol @@ -1303,25 +1304,28 @@ func (d *awsDisk) describeVolume() (*ec2.Volume, error) { // waitForAttachmentStatus polls until the attachment status is the expected value // On success, it returns the last attachment state. func (d *awsDisk) waitForAttachmentStatus(status string) (*ec2.VolumeAttachment, error) { - // We wait up to 30 minutes for the attachment to complete. - // This mirrors the GCE timeout. - timeoutAt := time.Now().UTC().Add(volumeAttachmentStatusTimeout).Unix() + backoff := wait.Backoff{ + Duration: volumeAttachmentStatusInitialDelay, + Factor: volumeAttachmentStatusFactor, + Steps: volumeAttachmentStatusSteps, + } // Because of rate limiting, we often see errors from describeVolume // So we tolerate a limited number of failures. // But once we see more than 10 errors in a row, we return the error describeErrorCount := 0 + var attachment *ec2.VolumeAttachment - for { + err := wait.ExponentialBackoff(backoff, func() (bool, error) { info, err := d.describeVolume() if err != nil { describeErrorCount++ if describeErrorCount > volumeAttachmentStatusConsecutiveErrorLimit { - return nil, err + // report the error + return false, err } else { glog.Warningf("Ignoring error from describe volume; will retry: %q", err) - time.Sleep(volumeAttachmentStatusErrorDelay) - continue + return false, nil } } else { describeErrorCount = 0 @@ -1330,7 +1334,6 @@ func (d *awsDisk) waitForAttachmentStatus(status string) (*ec2.VolumeAttachment, // Shouldn't happen; log so we know if it is glog.Warningf("Found multiple attachments for volume %q: %v", d.awsID, info) } - var attachment *ec2.VolumeAttachment attachmentStatus := "" for _, a := range info.Attachments { if attachmentStatus != "" { @@ -1349,18 +1352,15 @@ func (d *awsDisk) waitForAttachmentStatus(status string) (*ec2.VolumeAttachment, attachmentStatus = "detached" } if attachmentStatus == status { - return attachment, nil + // Attachment is in requested state, finish waiting + return true, nil } - - if time.Now().Unix() > timeoutAt { - glog.Warningf("Timeout waiting for volume %q state: actual=%s, desired=%s", d.awsID, attachmentStatus, status) - return nil, fmt.Errorf("Timeout waiting for volume %q state: actual=%s, desired=%s", d.awsID, attachmentStatus, status) - } - + // continue waiting glog.V(2).Infof("Waiting for volume %q state: actual=%s, desired=%s", d.awsID, attachmentStatus, status) + return false, nil + }) - time.Sleep(volumeAttachmentStatusPollInterval) - } + return attachment, err } // Deletes the EBS disk From be3fcd43835b1a4aa6426a78b45d07fb80c46936 Mon Sep 17 00:00:00 2001 From: Jan Safranek Date: Wed, 14 Dec 2016 16:05:28 +0100 Subject: [PATCH 2/2] AWS: Add exponential backoff to createTags() We should have something more reliable than 1 second sleep --- pkg/cloudprovider/providers/aws/aws.go | 36 +++++++++++++++++--------- 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/pkg/cloudprovider/providers/aws/aws.go b/pkg/cloudprovider/providers/aws/aws.go index 49bc1cf9982..62511002d31 100644 --- a/pkg/cloudprovider/providers/aws/aws.go +++ b/pkg/cloudprovider/providers/aws/aws.go @@ -147,6 +147,15 @@ const ( volumeAttachmentStatusInitialDelay = 10 * time.Second volumeAttachmentStatusFactor = 1.2 volumeAttachmentStatusSteps = 21 + + // createTag* is configuration of exponential backoff for CreateTag call. We + // retry mainly because if we create an object, we cannot tag it until it is + // "fully created" (eventual consistency). Starting with 1 second, doubling + // it every step and taking 9 steps results in 255 second total waiting + // time. + createTagInitialDelay = 1 * time.Second + createTagFactor = 2.0 + createTagSteps = 9 ) // Maps from backend protocol to ELB protocol @@ -2241,30 +2250,33 @@ func (c *Cloud) createTags(resourceID string, tags map[string]string) error { awsTags = append(awsTags, tag) } + backoff := wait.Backoff{ + Duration: createTagInitialDelay, + Factor: createTagFactor, + Steps: createTagSteps, + } request := &ec2.CreateTagsInput{} request.Resources = []*string{&resourceID} request.Tags = awsTags - // TODO: We really should do exponential backoff here - attempt := 0 - maxAttempts := 60 - - for { + var lastErr error + err := wait.ExponentialBackoff(backoff, func() (bool, error) { _, err := c.ec2.CreateTags(request) if err == nil { - return nil + return true, nil } // We could check that the error is retryable, but the error code changes based on what we are tagging // SecurityGroup: InvalidGroup.NotFound - attempt++ - if attempt > maxAttempts { - glog.Warningf("Failed to create tags (too many attempts): %v", err) - return err - } glog.V(2).Infof("Failed to create tags; will retry. Error was %v", err) - time.Sleep(1 * time.Second) + lastErr = err + return false, nil + }) + if err == wait.ErrWaitTimeout { + // return real CreateTags error instead of timeout + err = lastErr } + return err } // Finds the value for a given tag.