From b3bd620b2a47fa3ed7397a4afef3a7ac3d604adf Mon Sep 17 00:00:00 2001
From: Jan Safranek <jsafrane@redhat.com>
Date: Tue, 22 Oct 2019 12:50:45 +0200
Subject: [PATCH] Lower AWS DescribeVolume frequency

Call DescribeVolumes less frequently so controller-manager is not throttled
by AWS. DescribeVolumes is basically the only Kubernetes call that suffers API
throttling by AWS.
---
 .../k8s.io/legacy-cloud-providers/aws/aws.go  | 32 +++++++++++++------
 1 file changed, 23 insertions(+), 9 deletions(-)

diff --git a/staging/src/k8s.io/legacy-cloud-providers/aws/aws.go b/staging/src/k8s.io/legacy-cloud-providers/aws/aws.go
index 52a0d05b5f7..4320d8861cf 100644
--- a/staging/src/k8s.io/legacy-cloud-providers/aws/aws.go
+++ b/staging/src/k8s.io/legacy-cloud-providers/aws/aws.go
@@ -219,13 +219,15 @@ const nodeWithImpairedVolumes = "NodeWithImpairedVolumes"
 const (
 	// volumeAttachmentConsecutiveErrorLimit is the number of consecutive errors we will ignore when waiting for a volume to attach/detach
 	volumeAttachmentStatusConsecutiveErrorLimit = 10
-	// most attach/detach operations on AWS finish within 1-4 seconds
-	// By using 1 second starting interval with a backoff of 1.8
-	// we get -  [1, 1.8, 3.24, 5.832000000000001, 10.4976]
-	// in total we wait for 2601 seconds
-	volumeAttachmentStatusInitialDelay = 1 * time.Second
-	volumeAttachmentStatusFactor       = 1.8
-	volumeAttachmentStatusSteps        = 13
+
+	// Attach typically takes 2-5 seconds (average is 2). Asking before 2 seconds is just waste of API quota.
+	volumeAttachmentStatusInitialDelay = 2 * time.Second
+	// Detach typically takes 5-10 seconds (average is 6). Asking before 5 seconds is just waste of API quota.
+	volumeDetachmentStatusInitialDelay = 5 * time.Second
+	// After the initial delay, poll attach/detach with exponential backoff (2046 seconds total)
+	volumeAttachmentStatusPollDelay = 2 * time.Second
+	volumeAttachmentStatusFactor    = 2
+	volumeAttachmentStatusSteps     = 11
 
 	// createTag* is configuration of exponential backoff for CreateTag call. We
 	// retry mainly because if we create an object, we cannot tag it until it is
@@ -2105,7 +2107,7 @@ func (c *Cloud) applyUnSchedulableTaint(nodeName types.NodeName, reason string)
 // On success, it returns the last attachment state.
 func (d *awsDisk) waitForAttachmentStatus(status string) (*ec2.VolumeAttachment, error) {
 	backoff := wait.Backoff{
-		Duration: volumeAttachmentStatusInitialDelay,
+		Duration: volumeAttachmentStatusPollDelay,
 		Factor:   volumeAttachmentStatusFactor,
 		Steps:    volumeAttachmentStatusSteps,
 	}
@@ -2114,6 +2116,12 @@ func (d *awsDisk) waitForAttachmentStatus(status string) (*ec2.VolumeAttachment,
 	// So we tolerate a limited number of failures.
 	// But once we see more than 10 errors in a row, we return the error
 	describeErrorCount := 0
+
+	// Attach/detach usually takes time. It does not make sense to start
+	// polling DescribeVolumes before some initial delay to let AWS
+	// process the request.
+	time.Sleep(getInitialAttachDetachDelay(status))
+
 	var attachment *ec2.VolumeAttachment
 
 	err := wait.ExponentialBackoff(backoff, func() (bool, error) {
@@ -2177,7 +2185,6 @@ func (d *awsDisk) waitForAttachmentStatus(status string) (*ec2.VolumeAttachment,
 		klog.V(2).Infof("Waiting for volume %q state: actual=%s, desired=%s", d.awsID, attachmentStatus, status)
 		return false, nil
 	})
-
 	return attachment, err
 }
 
@@ -4641,3 +4648,10 @@ func setNodeDisk(
 	}
 	volumeMap[volumeID] = check
 }
+
+func getInitialAttachDetachDelay(status string) time.Duration {
+	if status == "detached" {
+		return volumeDetachmentStatusInitialDelay
+	}
+	return volumeAttachmentStatusInitialDelay
+}