mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 03:41:45 +00:00
Make AttachDisk idempotent again
if DescribeInstance tells us that Volume is attached, we enter waitForAttach loop without calling AttachDisk. But if DescribeVolume tell us - device is actually detached, then we don't wait for 20 minutes and return error early.
This commit is contained in:
parent
db28b0239a
commit
77e070fb59
@ -257,6 +257,14 @@ const (
|
||||
filterNodeLimit = 150
|
||||
)
|
||||
|
||||
const (
|
||||
// represents expected attachment status of a volume after attach
|
||||
volumeAttachedStatus = "attached"
|
||||
|
||||
// represents expected attachment status of a volume after detach
|
||||
volumeDetachedStatus = "detached"
|
||||
)
|
||||
|
||||
// awsTagNameMasterRoles is a set of well-known AWS tag names that indicate the instance is a master
|
||||
// The major consequence is that it is then not considered for AWS zone discovery for dynamic volume creation.
|
||||
var awsTagNameMasterRoles = sets.NewString("kubernetes.io/role/master", "k8s.io/role/master")
|
||||
@ -1967,7 +1975,6 @@ func (c *Cloud) getMountDevice(
|
||||
// AWS API returns consistent result next time (i.e. the volume is detached).
|
||||
status := volumeStatus[mappingVolumeID]
|
||||
klog.Warningf("Got assignment call for already-assigned volume: %s@%s, volume status: %s", mountDevice, mappingVolumeID, status)
|
||||
return mountDevice, false, fmt.Errorf("volume is still being detached from the node")
|
||||
}
|
||||
return mountDevice, true, nil
|
||||
}
|
||||
@ -2168,7 +2175,7 @@ func (c *Cloud) applyUnSchedulableTaint(nodeName types.NodeName, reason string)
|
||||
|
||||
// waitForAttachmentStatus polls until the attachment status is the expected value
|
||||
// On success, it returns the last attachment state.
|
||||
func (d *awsDisk) waitForAttachmentStatus(status string, expectedInstance, expectedDevice string) (*ec2.VolumeAttachment, error) {
|
||||
func (d *awsDisk) waitForAttachmentStatus(status string, expectedInstance, expectedDevice string, alreadyAttached bool) (*ec2.VolumeAttachment, error) {
|
||||
backoff := wait.Backoff{
|
||||
Duration: volumeAttachmentStatusPollDelay,
|
||||
Factor: volumeAttachmentStatusFactor,
|
||||
@ -2193,7 +2200,7 @@ func (d *awsDisk) waitForAttachmentStatus(status string, expectedInstance, expec
|
||||
if err != nil {
|
||||
// The VolumeNotFound error is special -- we don't need to wait for it to repeat
|
||||
if isAWSErrorVolumeNotFound(err) {
|
||||
if status == "detached" {
|
||||
if status == volumeDetachedStatus {
|
||||
// The disk doesn't exist, assume it's detached, log warning and stop waiting
|
||||
klog.Warningf("Waiting for volume %q to be detached but the volume does not exist", d.awsID)
|
||||
stateStr := "detached"
|
||||
@ -2202,7 +2209,7 @@ func (d *awsDisk) waitForAttachmentStatus(status string, expectedInstance, expec
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
if status == "attached" {
|
||||
if status == volumeAttachedStatus {
|
||||
// The disk doesn't exist, complain, give up waiting and report error
|
||||
klog.Warningf("Waiting for volume %q to be attached but the volume does not exist", d.awsID)
|
||||
return false, err
|
||||
@ -2237,7 +2244,7 @@ func (d *awsDisk) waitForAttachmentStatus(status string, expectedInstance, expec
|
||||
}
|
||||
}
|
||||
if attachmentStatus == "" {
|
||||
attachmentStatus = "detached"
|
||||
attachmentStatus = volumeDetachedStatus
|
||||
}
|
||||
if attachment != nil {
|
||||
// AWS eventual consistency can go back in time.
|
||||
@ -2266,6 +2273,13 @@ func (d *awsDisk) waitForAttachmentStatus(status string, expectedInstance, expec
|
||||
}
|
||||
}
|
||||
|
||||
// if we expected volume to be attached and it was reported as already attached via DescribeInstance call
|
||||
// but DescribeVolume told us volume is detached, we will short-circuit this long wait loop and return error
|
||||
// so as AttachDisk can be retried without waiting for 20 minutes.
|
||||
if (status == volumeAttachedStatus) && alreadyAttached && (attachmentStatus != status) {
|
||||
return false, fmt.Errorf("attachment of disk %q failed, expected device to be attached but was %s", d.name, attachmentStatus)
|
||||
}
|
||||
|
||||
if attachmentStatus == status {
|
||||
// Attachment is in requested state, finish waiting
|
||||
return true, nil
|
||||
@ -2411,7 +2425,7 @@ func (c *Cloud) AttachDisk(diskName KubernetesVolumeID, nodeName types.NodeName)
|
||||
klog.V(2).Infof("AttachVolume volume=%q instance=%q request returned %v", disk.awsID, awsInstance.awsID, attachResponse)
|
||||
}
|
||||
|
||||
attachment, err := disk.waitForAttachmentStatus("attached", awsInstance.awsID, ec2Device)
|
||||
attachment, err := disk.waitForAttachmentStatus("attached", awsInstance.awsID, ec2Device, alreadyAttached)
|
||||
|
||||
if err != nil {
|
||||
if err == wait.ErrWaitTimeout {
|
||||
@ -2489,7 +2503,7 @@ func (c *Cloud) DetachDisk(diskName KubernetesVolumeID, nodeName types.NodeName)
|
||||
return "", errors.New("no response from DetachVolume")
|
||||
}
|
||||
|
||||
attachment, err := diskInfo.disk.waitForAttachmentStatus("detached", awsInstance.awsID, "")
|
||||
attachment, err := diskInfo.disk.waitForAttachmentStatus("detached", awsInstance.awsID, "", false)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
@ -4797,7 +4811,7 @@ func setNodeDisk(
|
||||
}
|
||||
|
||||
func getInitialAttachDetachDelay(status string) time.Duration {
|
||||
if status == "detached" {
|
||||
if status == volumeDetachedStatus {
|
||||
return volumeDetachmentStatusInitialDelay
|
||||
}
|
||||
return volumeAttachmentStatusInitialDelay
|
||||
|
Loading…
Reference in New Issue
Block a user