From 4dd6fd448060b5a1ae29ed3bf766459f03963b2c Mon Sep 17 00:00:00 2001 From: Jan Safranek Date: Tue, 26 Nov 2019 18:22:48 +0100 Subject: [PATCH 1/2] Fix AWS eventual consistency of AttachDisk AWS eventual consistency can go back in time. It can return that a volume is detached and then that it is attached. When this happens during attachment of the same volume to the same node, but with a different device name, retry DescribeVolumes a few times before reporting an error. 10 retries should be enough to get a consistent result. --- .../k8s.io/legacy-cloud-providers/aws/aws.go | 38 ++++++++++++++----- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/staging/src/k8s.io/legacy-cloud-providers/aws/aws.go b/staging/src/k8s.io/legacy-cloud-providers/aws/aws.go index 4320d8861cf..7a906b82c4f 100644 --- a/staging/src/k8s.io/legacy-cloud-providers/aws/aws.go +++ b/staging/src/k8s.io/legacy-cloud-providers/aws/aws.go @@ -2105,17 +2105,18 @@ func (c *Cloud) applyUnSchedulableTaint(nodeName types.NodeName, reason string) // waitForAttachmentStatus polls until the attachment status is the expected value // On success, it returns the last attachment state. -func (d *awsDisk) waitForAttachmentStatus(status string) (*ec2.VolumeAttachment, error) { +func (d *awsDisk) waitForAttachmentStatus(status string, expectedDevice string) (*ec2.VolumeAttachment, error) { backoff := wait.Backoff{ Duration: volumeAttachmentStatusPollDelay, Factor: volumeAttachmentStatusFactor, Steps: volumeAttachmentStatusSteps, } - // Because of rate limiting, we often see errors from describeVolume + // Because of rate limiting, we often see errors from describeVolume. + // Or AWS eventual consistency returns unexpected data. // So we tolerate a limited number of failures. - // But once we see more than 10 errors in a row, we return the error - describeErrorCount := 0 + // But once we see more than 10 errors in a row, we return the error. + errorCount := 0 // Attach/detach usually takes time. It does not make sense to start // polling DescribeVolumes before some initial delay to let AWS @@ -2144,8 +2145,8 @@ func (d *awsDisk) waitForAttachmentStatus(status string) (*ec2.VolumeAttachment, return false, err } } - describeErrorCount++ - if describeErrorCount > volumeAttachmentStatusConsecutiveErrorLimit { + errorCount++ + if errorCount > volumeAttachmentStatusConsecutiveErrorLimit { // report the error return false, err } @@ -2154,8 +2155,6 @@ func (d *awsDisk) waitForAttachmentStatus(status string) (*ec2.VolumeAttachment, return false, nil } - describeErrorCount = 0 - if len(info.Attachments) > 1 { // Shouldn't happen; log so we know if it is klog.Warningf("Found multiple attachments for volume %q: %v", d.awsID, info) @@ -2177,11 +2176,29 @@ func (d *awsDisk) waitForAttachmentStatus(status string) (*ec2.VolumeAttachment, if attachmentStatus == "" { attachmentStatus = "detached" } + if attachment != nil && expectedDevice != "" { + device := aws.StringValue(attachment.Device) + if device != "" && device != expectedDevice { + // AWS eventual consistency can go back in time. + // For example, we're waiting for a volume to be attached as /dev/xvdba, but AWS can tell us it's + // attached as /dev/xvdbb, where it was attached before and it was already detached. + // Retry couple of times, hoping AWS starts reporting the right status. + klog.Warningf("Expected device %s %s, but found device %s %s", expectedDevice, status, device, attachmentStatus) + errorCount++ + if errorCount > volumeAttachmentStatusConsecutiveErrorLimit { + // report the error + return false, fmt.Errorf("attachment of disk %q failed: requested device %q but found %q", d.name, expectedDevice, device) + } + return false, nil + } + } + if attachmentStatus == status { // Attachment is in requested state, finish waiting return true, nil } // continue waiting + errorCount = 0 klog.V(2).Infof("Waiting for volume %q state: actual=%s, desired=%s", d.awsID, attachmentStatus, status) return false, nil }) @@ -2321,7 +2338,7 @@ func (c *Cloud) AttachDisk(diskName KubernetesVolumeID, nodeName types.NodeName) klog.V(2).Infof("AttachVolume volume=%q instance=%q request returned %v", disk.awsID, awsInstance.awsID, attachResponse) } - attachment, err := disk.waitForAttachmentStatus("attached") + attachment, err := disk.waitForAttachmentStatus("attached", ec2Device) if err != nil { if err == wait.ErrWaitTimeout { @@ -2341,6 +2358,7 @@ func (c *Cloud) AttachDisk(diskName KubernetesVolumeID, nodeName types.NodeName) return "", fmt.Errorf("unexpected state: attachment nil after attached %q to %q", diskName, nodeName) } if ec2Device != aws.StringValue(attachment.Device) { + // Already checked in waitForAttachmentStatus(), but just to be sure... return "", fmt.Errorf("disk attachment of %q to %q failed: requested device %q but found %q", diskName, nodeName, ec2Device, aws.StringValue(attachment.Device)) } if awsInstance.awsID != aws.StringValue(attachment.InstanceId) { @@ -2398,7 +2416,7 @@ func (c *Cloud) DetachDisk(diskName KubernetesVolumeID, nodeName types.NodeName) return "", errors.New("no response from DetachVolume") } - attachment, err := diskInfo.disk.waitForAttachmentStatus("detached") + attachment, err := diskInfo.disk.waitForAttachmentStatus("detached", "") if err != nil { return "", err } From 044b315d6113c068bf0a635d2ade9189ab80fb92 Mon Sep 17 00:00:00 2001 From: Jan Safranek Date: Thu, 28 Nov 2019 15:58:05 +0100 Subject: [PATCH 2/2] All check for instanceID In case DescribeVolumes returns stale attachment and the volume was previously attached to a different node. --- .../k8s.io/legacy-cloud-providers/aws/aws.go | 30 ++++++++++++------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/staging/src/k8s.io/legacy-cloud-providers/aws/aws.go b/staging/src/k8s.io/legacy-cloud-providers/aws/aws.go index 7a906b82c4f..fdb387f46e2 100644 --- a/staging/src/k8s.io/legacy-cloud-providers/aws/aws.go +++ b/staging/src/k8s.io/legacy-cloud-providers/aws/aws.go @@ -2105,7 +2105,7 @@ func (c *Cloud) applyUnSchedulableTaint(nodeName types.NodeName, reason string) // waitForAttachmentStatus polls until the attachment status is the expected value // On success, it returns the last attachment state. -func (d *awsDisk) waitForAttachmentStatus(status string, expectedDevice string) (*ec2.VolumeAttachment, error) { +func (d *awsDisk) waitForAttachmentStatus(status string, expectedInstance, expectedDevice string) (*ec2.VolumeAttachment, error) { backoff := wait.Backoff{ Duration: volumeAttachmentStatusPollDelay, Factor: volumeAttachmentStatusFactor, @@ -2176,14 +2176,24 @@ func (d *awsDisk) waitForAttachmentStatus(status string, expectedDevice string) if attachmentStatus == "" { attachmentStatus = "detached" } - if attachment != nil && expectedDevice != "" { + if attachment != nil { + // AWS eventual consistency can go back in time. + // For example, we're waiting for a volume to be attached as /dev/xvdba, but AWS can tell us it's + // attached as /dev/xvdbb, where it was attached before and it was already detached. + // Retry couple of times, hoping AWS starts reporting the right status. device := aws.StringValue(attachment.Device) - if device != "" && device != expectedDevice { - // AWS eventual consistency can go back in time. - // For example, we're waiting for a volume to be attached as /dev/xvdba, but AWS can tell us it's - // attached as /dev/xvdbb, where it was attached before and it was already detached. - // Retry couple of times, hoping AWS starts reporting the right status. - klog.Warningf("Expected device %s %s, but found device %s %s", expectedDevice, status, device, attachmentStatus) + if expectedDevice != "" && device != "" && device != expectedDevice { + klog.Warningf("Expected device %s %s for volume %s, but found device %s %s", expectedDevice, status, d.name, device, attachmentStatus) + errorCount++ + if errorCount > volumeAttachmentStatusConsecutiveErrorLimit { + // report the error + return false, fmt.Errorf("attachment of disk %q failed: requested device %q but found %q", d.name, expectedDevice, device) + } + return false, nil + } + instanceID := aws.StringValue(attachment.InstanceId) + if expectedInstance != "" && instanceID != "" && instanceID != expectedInstance { + klog.Warningf("Expected instance %s/%s for volume %s, but found instance %s/%s", expectedInstance, status, d.name, instanceID, attachmentStatus) errorCount++ if errorCount > volumeAttachmentStatusConsecutiveErrorLimit { // report the error @@ -2338,7 +2348,7 @@ func (c *Cloud) AttachDisk(diskName KubernetesVolumeID, nodeName types.NodeName) klog.V(2).Infof("AttachVolume volume=%q instance=%q request returned %v", disk.awsID, awsInstance.awsID, attachResponse) } - attachment, err := disk.waitForAttachmentStatus("attached", ec2Device) + attachment, err := disk.waitForAttachmentStatus("attached", awsInstance.awsID, ec2Device) if err != nil { if err == wait.ErrWaitTimeout { @@ -2416,7 +2426,7 @@ func (c *Cloud) DetachDisk(diskName KubernetesVolumeID, nodeName types.NodeName) return "", errors.New("no response from DetachVolume") } - attachment, err := diskInfo.disk.waitForAttachmentStatus("detached", "") + attachment, err := diskInfo.disk.waitForAttachmentStatus("detached", awsInstance.awsID, "") if err != nil { return "", err }