Merge pull request #84181 from jsafrane/aws-lower-describevolumes

Lower AWS DescribeVolume frequency
This commit is contained in:
Kubernetes Prow Robot 2019-10-23 01:54:13 -07:00 committed by GitHub
commit 5f03d33fc9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -219,13 +219,15 @@ const nodeWithImpairedVolumes = "NodeWithImpairedVolumes"
const ( const (
// volumeAttachmentConsecutiveErrorLimit is the number of consecutive errors we will ignore when waiting for a volume to attach/detach // volumeAttachmentConsecutiveErrorLimit is the number of consecutive errors we will ignore when waiting for a volume to attach/detach
volumeAttachmentStatusConsecutiveErrorLimit = 10 volumeAttachmentStatusConsecutiveErrorLimit = 10
// most attach/detach operations on AWS finish within 1-4 seconds
// By using 1 second starting interval with a backoff of 1.8 // Attach typically takes 2-5 seconds (average is 2). Asking before 2 seconds is just waste of API quota.
// we get - [1, 1.8, 3.24, 5.832000000000001, 10.4976] volumeAttachmentStatusInitialDelay = 2 * time.Second
// in total we wait for 2601 seconds // Detach typically takes 5-10 seconds (average is 6). Asking before 5 seconds is just waste of API quota.
volumeAttachmentStatusInitialDelay = 1 * time.Second volumeDetachmentStatusInitialDelay = 5 * time.Second
volumeAttachmentStatusFactor = 1.8 // After the initial delay, poll attach/detach with exponential backoff (2046 seconds total)
volumeAttachmentStatusSteps = 13 volumeAttachmentStatusPollDelay = 2 * time.Second
volumeAttachmentStatusFactor = 2
volumeAttachmentStatusSteps = 11
// createTag* is configuration of exponential backoff for CreateTag call. We // createTag* is configuration of exponential backoff for CreateTag call. We
// retry mainly because if we create an object, we cannot tag it until it is // retry mainly because if we create an object, we cannot tag it until it is
@ -2105,7 +2107,7 @@ func (c *Cloud) applyUnSchedulableTaint(nodeName types.NodeName, reason string)
// On success, it returns the last attachment state. // On success, it returns the last attachment state.
func (d *awsDisk) waitForAttachmentStatus(status string) (*ec2.VolumeAttachment, error) { func (d *awsDisk) waitForAttachmentStatus(status string) (*ec2.VolumeAttachment, error) {
backoff := wait.Backoff{ backoff := wait.Backoff{
Duration: volumeAttachmentStatusInitialDelay, Duration: volumeAttachmentStatusPollDelay,
Factor: volumeAttachmentStatusFactor, Factor: volumeAttachmentStatusFactor,
Steps: volumeAttachmentStatusSteps, Steps: volumeAttachmentStatusSteps,
} }
@ -2114,6 +2116,12 @@ func (d *awsDisk) waitForAttachmentStatus(status string) (*ec2.VolumeAttachment,
// So we tolerate a limited number of failures. // So we tolerate a limited number of failures.
// But once we see more than 10 errors in a row, we return the error // But once we see more than 10 errors in a row, we return the error
describeErrorCount := 0 describeErrorCount := 0
// Attach/detach usually takes time. It does not make sense to start
// polling DescribeVolumes before some initial delay to let AWS
// process the request.
time.Sleep(getInitialAttachDetachDelay(status))
var attachment *ec2.VolumeAttachment var attachment *ec2.VolumeAttachment
err := wait.ExponentialBackoff(backoff, func() (bool, error) { err := wait.ExponentialBackoff(backoff, func() (bool, error) {
@ -2177,7 +2185,6 @@ func (d *awsDisk) waitForAttachmentStatus(status string) (*ec2.VolumeAttachment,
klog.V(2).Infof("Waiting for volume %q state: actual=%s, desired=%s", d.awsID, attachmentStatus, status) klog.V(2).Infof("Waiting for volume %q state: actual=%s, desired=%s", d.awsID, attachmentStatus, status)
return false, nil return false, nil
}) })
return attachment, err return attachment, err
} }
@ -4641,3 +4648,10 @@ func setNodeDisk(
} }
volumeMap[volumeID] = check volumeMap[volumeID] = check
} }
func getInitialAttachDetachDelay(status string) time.Duration {
if status == "detached" {
return volumeDetachmentStatusInitialDelay
}
return volumeAttachmentStatusInitialDelay
}