Make AttachDisk idempotent again

if DescribeInstance tells us that Volume is attached, we enter
waitForAttach loop without calling AttachDisk. But if DescribeVolume
tell us - device is actually detached, then we don't wait for 20
minutes and return error early.
This commit is contained in:
Hemant Kumar 2020-07-30 07:32:12 -04:00
parent db28b0239a
commit 77e070fb59

View File

@ -257,6 +257,14 @@ const (
filterNodeLimit = 150 filterNodeLimit = 150
) )
const (
// represents expected attachment status of a volume after attach
volumeAttachedStatus = "attached"
// represents expected attachment status of a volume after detach
volumeDetachedStatus = "detached"
)
// awsTagNameMasterRoles is a set of well-known AWS tag names that indicate the instance is a master // awsTagNameMasterRoles is a set of well-known AWS tag names that indicate the instance is a master
// The major consequence is that it is then not considered for AWS zone discovery for dynamic volume creation. // The major consequence is that it is then not considered for AWS zone discovery for dynamic volume creation.
var awsTagNameMasterRoles = sets.NewString("kubernetes.io/role/master", "k8s.io/role/master") var awsTagNameMasterRoles = sets.NewString("kubernetes.io/role/master", "k8s.io/role/master")
@ -1967,7 +1975,6 @@ func (c *Cloud) getMountDevice(
// AWS API returns consistent result next time (i.e. the volume is detached). // AWS API returns consistent result next time (i.e. the volume is detached).
status := volumeStatus[mappingVolumeID] status := volumeStatus[mappingVolumeID]
klog.Warningf("Got assignment call for already-assigned volume: %s@%s, volume status: %s", mountDevice, mappingVolumeID, status) klog.Warningf("Got assignment call for already-assigned volume: %s@%s, volume status: %s", mountDevice, mappingVolumeID, status)
return mountDevice, false, fmt.Errorf("volume is still being detached from the node")
} }
return mountDevice, true, nil return mountDevice, true, nil
} }
@ -2168,7 +2175,7 @@ func (c *Cloud) applyUnSchedulableTaint(nodeName types.NodeName, reason string)
// waitForAttachmentStatus polls until the attachment status is the expected value // waitForAttachmentStatus polls until the attachment status is the expected value
// On success, it returns the last attachment state. // On success, it returns the last attachment state.
func (d *awsDisk) waitForAttachmentStatus(status string, expectedInstance, expectedDevice string) (*ec2.VolumeAttachment, error) { func (d *awsDisk) waitForAttachmentStatus(status string, expectedInstance, expectedDevice string, alreadyAttached bool) (*ec2.VolumeAttachment, error) {
backoff := wait.Backoff{ backoff := wait.Backoff{
Duration: volumeAttachmentStatusPollDelay, Duration: volumeAttachmentStatusPollDelay,
Factor: volumeAttachmentStatusFactor, Factor: volumeAttachmentStatusFactor,
@ -2193,7 +2200,7 @@ func (d *awsDisk) waitForAttachmentStatus(status string, expectedInstance, expec
if err != nil { if err != nil {
// The VolumeNotFound error is special -- we don't need to wait for it to repeat // The VolumeNotFound error is special -- we don't need to wait for it to repeat
if isAWSErrorVolumeNotFound(err) { if isAWSErrorVolumeNotFound(err) {
if status == "detached" { if status == volumeDetachedStatus {
// The disk doesn't exist, assume it's detached, log warning and stop waiting // The disk doesn't exist, assume it's detached, log warning and stop waiting
klog.Warningf("Waiting for volume %q to be detached but the volume does not exist", d.awsID) klog.Warningf("Waiting for volume %q to be detached but the volume does not exist", d.awsID)
stateStr := "detached" stateStr := "detached"
@ -2202,7 +2209,7 @@ func (d *awsDisk) waitForAttachmentStatus(status string, expectedInstance, expec
} }
return true, nil return true, nil
} }
if status == "attached" { if status == volumeAttachedStatus {
// The disk doesn't exist, complain, give up waiting and report error // The disk doesn't exist, complain, give up waiting and report error
klog.Warningf("Waiting for volume %q to be attached but the volume does not exist", d.awsID) klog.Warningf("Waiting for volume %q to be attached but the volume does not exist", d.awsID)
return false, err return false, err
@ -2237,7 +2244,7 @@ func (d *awsDisk) waitForAttachmentStatus(status string, expectedInstance, expec
} }
} }
if attachmentStatus == "" { if attachmentStatus == "" {
attachmentStatus = "detached" attachmentStatus = volumeDetachedStatus
} }
if attachment != nil { if attachment != nil {
// AWS eventual consistency can go back in time. // AWS eventual consistency can go back in time.
@ -2266,6 +2273,13 @@ func (d *awsDisk) waitForAttachmentStatus(status string, expectedInstance, expec
} }
} }
// if we expected volume to be attached and it was reported as already attached via DescribeInstance call
// but DescribeVolume told us volume is detached, we will short-circuit this long wait loop and return error
// so as AttachDisk can be retried without waiting for 20 minutes.
if (status == volumeAttachedStatus) && alreadyAttached && (attachmentStatus != status) {
return false, fmt.Errorf("attachment of disk %q failed, expected device to be attached but was %s", d.name, attachmentStatus)
}
if attachmentStatus == status { if attachmentStatus == status {
// Attachment is in requested state, finish waiting // Attachment is in requested state, finish waiting
return true, nil return true, nil
@ -2411,7 +2425,7 @@ func (c *Cloud) AttachDisk(diskName KubernetesVolumeID, nodeName types.NodeName)
klog.V(2).Infof("AttachVolume volume=%q instance=%q request returned %v", disk.awsID, awsInstance.awsID, attachResponse) klog.V(2).Infof("AttachVolume volume=%q instance=%q request returned %v", disk.awsID, awsInstance.awsID, attachResponse)
} }
attachment, err := disk.waitForAttachmentStatus("attached", awsInstance.awsID, ec2Device) attachment, err := disk.waitForAttachmentStatus("attached", awsInstance.awsID, ec2Device, alreadyAttached)
if err != nil { if err != nil {
if err == wait.ErrWaitTimeout { if err == wait.ErrWaitTimeout {
@ -2489,7 +2503,7 @@ func (c *Cloud) DetachDisk(diskName KubernetesVolumeID, nodeName types.NodeName)
return "", errors.New("no response from DetachVolume") return "", errors.New("no response from DetachVolume")
} }
attachment, err := diskInfo.disk.waitForAttachmentStatus("detached", awsInstance.awsID, "") attachment, err := diskInfo.disk.waitForAttachmentStatus("detached", awsInstance.awsID, "", false)
if err != nil { if err != nil {
return "", err return "", err
} }
@ -4797,7 +4811,7 @@ func setNodeDisk(
} }
func getInitialAttachDetachDelay(status string) time.Duration { func getInitialAttachDetachDelay(status string) time.Duration {
if status == "detached" { if status == volumeDetachedStatus {
return volumeDetachmentStatusInitialDelay return volumeDetachmentStatusInitialDelay
} }
return volumeAttachmentStatusInitialDelay return volumeAttachmentStatusInitialDelay