From 92e576e01cebed55086a7e2f05fecf5a53a9b5d5 Mon Sep 17 00:00:00 2001
From: Jan Safranek <jsafrane@redhat.com>
Date: Wed, 14 Dec 2016 14:00:29 +0100
Subject: [PATCH 1/2] AWS: Add exponential backoff to waitForAttachmentStatus()

We should use exponential backoff while waiting for a volume to get attached/
detached to/from a node. This will lower AWS load and reduce its API call
throttling.
---
 pkg/cloudprovider/providers/aws/BUILD  |  1 +
 pkg/cloudprovider/providers/aws/aws.go | 50 +++++++++++++-------------
 2 files changed, 26 insertions(+), 25 deletions(-)

diff --git a/pkg/cloudprovider/providers/aws/BUILD b/pkg/cloudprovider/providers/aws/BUILD
index df3f0f800f6..af6b19f200a 100644
--- a/pkg/cloudprovider/providers/aws/BUILD
+++ b/pkg/cloudprovider/providers/aws/BUILD
@@ -30,6 +30,7 @@ go_library(
         "//pkg/credentialprovider/aws:go_default_library",
         "//pkg/types:go_default_library",
         "//pkg/util/sets:go_default_library",
+        "//pkg/util/wait:go_default_library",
         "//pkg/volume:go_default_library",
         "//vendor:github.com/aws/aws-sdk-go/aws",
         "//vendor:github.com/aws/aws-sdk-go/aws/awserr",
diff --git a/pkg/cloudprovider/providers/aws/aws.go b/pkg/cloudprovider/providers/aws/aws.go
index c5d4ab626d9..49bc1cf9982 100644
--- a/pkg/cloudprovider/providers/aws/aws.go
+++ b/pkg/cloudprovider/providers/aws/aws.go
@@ -48,6 +48,7 @@ import (
 	awscredentials "k8s.io/kubernetes/pkg/credentialprovider/aws"
 	"k8s.io/kubernetes/pkg/types"
 	"k8s.io/kubernetes/pkg/util/sets"
+	"k8s.io/kubernetes/pkg/util/wait"
 	"k8s.io/kubernetes/pkg/volume"
 )
 
@@ -136,16 +137,16 @@ const ServiceAnnotationLoadBalancerSSLPorts = "service.beta.kubernetes.io/aws-lo
 const ServiceAnnotationLoadBalancerBEProtocol = "service.beta.kubernetes.io/aws-load-balancer-backend-protocol"
 
 const (
-	// volumeAttachmentStatusTimeout is the maximum time to wait for a volume attach/detach to complete
-	volumeAttachmentStatusTimeout = 30 * time.Minute
 	// volumeAttachmentConsecutiveErrorLimit is the number of consecutive errors we will ignore when waiting for a volume to attach/detach
 	volumeAttachmentStatusConsecutiveErrorLimit = 10
-	// volumeAttachmentErrorDelay is the amount of time we wait before retrying after encountering an error,
-	// while waiting for a volume attach/detach to complete
-	volumeAttachmentStatusErrorDelay = 20 * time.Second
-	// volumeAttachmentStatusPollInterval is the interval at which we poll the volume,
-	// while waiting for a volume attach/detach to complete
-	volumeAttachmentStatusPollInterval = 10 * time.Second
+	// volumeAttachmentStatus* is configuration of exponential backoff for
+	// waiting for attach/detach operation to complete. Starting with 10
+	// seconds, multiplying by 1.2 with each step and taking 21 steps at maximum
+	// it will time out after 31.11 minutes, which roughly corresponds to GCE
+	// timeout (30 minutes).
+	volumeAttachmentStatusInitialDelay = 10 * time.Second
+	volumeAttachmentStatusFactor       = 1.2
+	volumeAttachmentStatusSteps        = 21
 )
 
 // Maps from backend protocol to ELB protocol
@@ -1303,25 +1304,28 @@ func (d *awsDisk) describeVolume() (*ec2.Volume, error) {
 // waitForAttachmentStatus polls until the attachment status is the expected value
 // On success, it returns the last attachment state.
 func (d *awsDisk) waitForAttachmentStatus(status string) (*ec2.VolumeAttachment, error) {
-	// We wait up to 30 minutes for the attachment to complete.
-	// This mirrors the GCE timeout.
-	timeoutAt := time.Now().UTC().Add(volumeAttachmentStatusTimeout).Unix()
+	backoff := wait.Backoff{
+		Duration: volumeAttachmentStatusInitialDelay,
+		Factor:   volumeAttachmentStatusFactor,
+		Steps:    volumeAttachmentStatusSteps,
+	}
 
 	// Because of rate limiting, we often see errors from describeVolume
 	// So we tolerate a limited number of failures.
 	// But once we see more than 10 errors in a row, we return the error
 	describeErrorCount := 0
+	var attachment *ec2.VolumeAttachment
 
-	for {
+	err := wait.ExponentialBackoff(backoff, func() (bool, error) {
 		info, err := d.describeVolume()
 		if err != nil {
 			describeErrorCount++
 			if describeErrorCount > volumeAttachmentStatusConsecutiveErrorLimit {
-				return nil, err
+				// report the error
+				return false, err
 			} else {
 				glog.Warningf("Ignoring error from describe volume; will retry: %q", err)
-				time.Sleep(volumeAttachmentStatusErrorDelay)
-				continue
+				return false, nil
 			}
 		} else {
 			describeErrorCount = 0
@@ -1330,7 +1334,6 @@ func (d *awsDisk) waitForAttachmentStatus(status string) (*ec2.VolumeAttachment,
 			// Shouldn't happen; log so we know if it is
 			glog.Warningf("Found multiple attachments for volume %q: %v", d.awsID, info)
 		}
-		var attachment *ec2.VolumeAttachment
 		attachmentStatus := ""
 		for _, a := range info.Attachments {
 			if attachmentStatus != "" {
@@ -1349,18 +1352,15 @@ func (d *awsDisk) waitForAttachmentStatus(status string) (*ec2.VolumeAttachment,
 			attachmentStatus = "detached"
 		}
 		if attachmentStatus == status {
-			return attachment, nil
+			// Attachment is in requested state, finish waiting
+			return true, nil
 		}
-
-		if time.Now().Unix() > timeoutAt {
-			glog.Warningf("Timeout waiting for volume %q state: actual=%s, desired=%s", d.awsID, attachmentStatus, status)
-			return nil, fmt.Errorf("Timeout waiting for volume %q state: actual=%s, desired=%s", d.awsID, attachmentStatus, status)
-		}
-
+		// continue waiting
 		glog.V(2).Infof("Waiting for volume %q state: actual=%s, desired=%s", d.awsID, attachmentStatus, status)
+		return false, nil
+	})
 
-		time.Sleep(volumeAttachmentStatusPollInterval)
-	}
+	return attachment, err
 }
 
 // Deletes the EBS disk

From be3fcd43835b1a4aa6426a78b45d07fb80c46936 Mon Sep 17 00:00:00 2001
From: Jan Safranek <jsafrane@redhat.com>
Date: Wed, 14 Dec 2016 16:05:28 +0100
Subject: [PATCH 2/2] AWS: Add exponential backoff to createTags()

We should have something more reliable than 1 second sleep
---
 pkg/cloudprovider/providers/aws/aws.go | 36 +++++++++++++++++---------
 1 file changed, 24 insertions(+), 12 deletions(-)

diff --git a/pkg/cloudprovider/providers/aws/aws.go b/pkg/cloudprovider/providers/aws/aws.go
index 49bc1cf9982..62511002d31 100644
--- a/pkg/cloudprovider/providers/aws/aws.go
+++ b/pkg/cloudprovider/providers/aws/aws.go
@@ -147,6 +147,15 @@ const (
 	volumeAttachmentStatusInitialDelay = 10 * time.Second
 	volumeAttachmentStatusFactor       = 1.2
 	volumeAttachmentStatusSteps        = 21
+
+	// createTag* is configuration of exponential backoff for CreateTag call. We
+	// retry mainly because if we create an object, we cannot tag it until it is
+	// "fully created" (eventual consistency). Starting with 1 second, doubling
+	// it every step and taking 9 steps results in 255 second total waiting
+	// time.
+	createTagInitialDelay = 1 * time.Second
+	createTagFactor       = 2.0
+	createTagSteps        = 9
 )
 
 // Maps from backend protocol to ELB protocol
@@ -2241,30 +2250,33 @@ func (c *Cloud) createTags(resourceID string, tags map[string]string) error {
 		awsTags = append(awsTags, tag)
 	}
 
+	backoff := wait.Backoff{
+		Duration: createTagInitialDelay,
+		Factor:   createTagFactor,
+		Steps:    createTagSteps,
+	}
 	request := &ec2.CreateTagsInput{}
 	request.Resources = []*string{&resourceID}
 	request.Tags = awsTags
 
-	// TODO: We really should do exponential backoff here
-	attempt := 0
-	maxAttempts := 60
-
-	for {
+	var lastErr error
+	err := wait.ExponentialBackoff(backoff, func() (bool, error) {
 		_, err := c.ec2.CreateTags(request)
 		if err == nil {
-			return nil
+			return true, nil
 		}
 
 		// We could check that the error is retryable, but the error code changes based on what we are tagging
 		// SecurityGroup: InvalidGroup.NotFound
-		attempt++
-		if attempt > maxAttempts {
-			glog.Warningf("Failed to create tags (too many attempts): %v", err)
-			return err
-		}
 		glog.V(2).Infof("Failed to create tags; will retry.  Error was %v", err)
-		time.Sleep(1 * time.Second)
+		lastErr = err
+		return false, nil
+	})
+	if err == wait.ErrWaitTimeout {
+		// return real CreateTags error instead of timeout
+		err = lastErr
 	}
+	return err
 }
 
 // Finds the value for a given tag.