Merge pull request #84181 from jsafrane/aws-lower-describevolumes

Lower AWS DescribeVolume frequency
This commit is contained in:
Kubernetes Prow Robot 2019-10-23 01:54:13 -07:00 committed by GitHub
commit 5f03d33fc9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -219,13 +219,15 @@ const nodeWithImpairedVolumes = "NodeWithImpairedVolumes"
const (
// volumeAttachmentConsecutiveErrorLimit is the number of consecutive errors we will ignore when waiting for a volume to attach/detach
volumeAttachmentStatusConsecutiveErrorLimit = 10
// most attach/detach operations on AWS finish within 1-4 seconds
// By using 1 second starting interval with a backoff of 1.8
// we get - [1, 1.8, 3.24, 5.832000000000001, 10.4976]
// in total we wait for 2601 seconds
volumeAttachmentStatusInitialDelay = 1 * time.Second
volumeAttachmentStatusFactor = 1.8
volumeAttachmentStatusSteps = 13
// Attach typically takes 2-5 seconds (average is 2). Asking before 2 seconds is just waste of API quota.
volumeAttachmentStatusInitialDelay = 2 * time.Second
// Detach typically takes 5-10 seconds (average is 6). Asking before 5 seconds is just waste of API quota.
volumeDetachmentStatusInitialDelay = 5 * time.Second
// After the initial delay, poll attach/detach with exponential backoff (2046 seconds total)
volumeAttachmentStatusPollDelay = 2 * time.Second
volumeAttachmentStatusFactor = 2
volumeAttachmentStatusSteps = 11
// createTag* is configuration of exponential backoff for CreateTag call. We
// retry mainly because if we create an object, we cannot tag it until it is
@ -2105,7 +2107,7 @@ func (c *Cloud) applyUnSchedulableTaint(nodeName types.NodeName, reason string)
// On success, it returns the last attachment state.
func (d *awsDisk) waitForAttachmentStatus(status string) (*ec2.VolumeAttachment, error) {
backoff := wait.Backoff{
Duration: volumeAttachmentStatusInitialDelay,
Duration: volumeAttachmentStatusPollDelay,
Factor: volumeAttachmentStatusFactor,
Steps: volumeAttachmentStatusSteps,
}
@ -2114,6 +2116,12 @@ func (d *awsDisk) waitForAttachmentStatus(status string) (*ec2.VolumeAttachment,
// So we tolerate a limited number of failures.
// But once we see more than 10 errors in a row, we return the error
describeErrorCount := 0
// Attach/detach usually takes time. It does not make sense to start
// polling DescribeVolumes before some initial delay to let AWS
// process the request.
time.Sleep(getInitialAttachDetachDelay(status))
var attachment *ec2.VolumeAttachment
err := wait.ExponentialBackoff(backoff, func() (bool, error) {
@ -2177,7 +2185,6 @@ func (d *awsDisk) waitForAttachmentStatus(status string) (*ec2.VolumeAttachment,
klog.V(2).Infof("Waiting for volume %q state: actual=%s, desired=%s", d.awsID, attachmentStatus, status)
return false, nil
})
return attachment, err
}
@ -4641,3 +4648,10 @@ func setNodeDisk(
}
volumeMap[volumeID] = check
}
func getInitialAttachDetachDelay(status string) time.Duration {
if status == "detached" {
return volumeDetachmentStatusInitialDelay
}
return volumeAttachmentStatusInitialDelay
}