mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-25 20:53:33 +00:00
Make AttachDisk idempotent again
if DescribeInstance tells us that Volume is attached, we enter waitForAttach loop without calling AttachDisk. But if DescribeVolume tell us - device is actually detached, then we don't wait for 20 minutes and return error early.
This commit is contained in:
parent
db28b0239a
commit
77e070fb59
@ -257,6 +257,14 @@ const (
|
|||||||
filterNodeLimit = 150
|
filterNodeLimit = 150
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
// represents expected attachment status of a volume after attach
|
||||||
|
volumeAttachedStatus = "attached"
|
||||||
|
|
||||||
|
// represents expected attachment status of a volume after detach
|
||||||
|
volumeDetachedStatus = "detached"
|
||||||
|
)
|
||||||
|
|
||||||
// awsTagNameMasterRoles is a set of well-known AWS tag names that indicate the instance is a master
|
// awsTagNameMasterRoles is a set of well-known AWS tag names that indicate the instance is a master
|
||||||
// The major consequence is that it is then not considered for AWS zone discovery for dynamic volume creation.
|
// The major consequence is that it is then not considered for AWS zone discovery for dynamic volume creation.
|
||||||
var awsTagNameMasterRoles = sets.NewString("kubernetes.io/role/master", "k8s.io/role/master")
|
var awsTagNameMasterRoles = sets.NewString("kubernetes.io/role/master", "k8s.io/role/master")
|
||||||
@ -1967,7 +1975,6 @@ func (c *Cloud) getMountDevice(
|
|||||||
// AWS API returns consistent result next time (i.e. the volume is detached).
|
// AWS API returns consistent result next time (i.e. the volume is detached).
|
||||||
status := volumeStatus[mappingVolumeID]
|
status := volumeStatus[mappingVolumeID]
|
||||||
klog.Warningf("Got assignment call for already-assigned volume: %s@%s, volume status: %s", mountDevice, mappingVolumeID, status)
|
klog.Warningf("Got assignment call for already-assigned volume: %s@%s, volume status: %s", mountDevice, mappingVolumeID, status)
|
||||||
return mountDevice, false, fmt.Errorf("volume is still being detached from the node")
|
|
||||||
}
|
}
|
||||||
return mountDevice, true, nil
|
return mountDevice, true, nil
|
||||||
}
|
}
|
||||||
@ -2168,7 +2175,7 @@ func (c *Cloud) applyUnSchedulableTaint(nodeName types.NodeName, reason string)
|
|||||||
|
|
||||||
// waitForAttachmentStatus polls until the attachment status is the expected value
|
// waitForAttachmentStatus polls until the attachment status is the expected value
|
||||||
// On success, it returns the last attachment state.
|
// On success, it returns the last attachment state.
|
||||||
func (d *awsDisk) waitForAttachmentStatus(status string, expectedInstance, expectedDevice string) (*ec2.VolumeAttachment, error) {
|
func (d *awsDisk) waitForAttachmentStatus(status string, expectedInstance, expectedDevice string, alreadyAttached bool) (*ec2.VolumeAttachment, error) {
|
||||||
backoff := wait.Backoff{
|
backoff := wait.Backoff{
|
||||||
Duration: volumeAttachmentStatusPollDelay,
|
Duration: volumeAttachmentStatusPollDelay,
|
||||||
Factor: volumeAttachmentStatusFactor,
|
Factor: volumeAttachmentStatusFactor,
|
||||||
@ -2193,7 +2200,7 @@ func (d *awsDisk) waitForAttachmentStatus(status string, expectedInstance, expec
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
// The VolumeNotFound error is special -- we don't need to wait for it to repeat
|
// The VolumeNotFound error is special -- we don't need to wait for it to repeat
|
||||||
if isAWSErrorVolumeNotFound(err) {
|
if isAWSErrorVolumeNotFound(err) {
|
||||||
if status == "detached" {
|
if status == volumeDetachedStatus {
|
||||||
// The disk doesn't exist, assume it's detached, log warning and stop waiting
|
// The disk doesn't exist, assume it's detached, log warning and stop waiting
|
||||||
klog.Warningf("Waiting for volume %q to be detached but the volume does not exist", d.awsID)
|
klog.Warningf("Waiting for volume %q to be detached but the volume does not exist", d.awsID)
|
||||||
stateStr := "detached"
|
stateStr := "detached"
|
||||||
@ -2202,7 +2209,7 @@ func (d *awsDisk) waitForAttachmentStatus(status string, expectedInstance, expec
|
|||||||
}
|
}
|
||||||
return true, nil
|
return true, nil
|
||||||
}
|
}
|
||||||
if status == "attached" {
|
if status == volumeAttachedStatus {
|
||||||
// The disk doesn't exist, complain, give up waiting and report error
|
// The disk doesn't exist, complain, give up waiting and report error
|
||||||
klog.Warningf("Waiting for volume %q to be attached but the volume does not exist", d.awsID)
|
klog.Warningf("Waiting for volume %q to be attached but the volume does not exist", d.awsID)
|
||||||
return false, err
|
return false, err
|
||||||
@ -2237,7 +2244,7 @@ func (d *awsDisk) waitForAttachmentStatus(status string, expectedInstance, expec
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if attachmentStatus == "" {
|
if attachmentStatus == "" {
|
||||||
attachmentStatus = "detached"
|
attachmentStatus = volumeDetachedStatus
|
||||||
}
|
}
|
||||||
if attachment != nil {
|
if attachment != nil {
|
||||||
// AWS eventual consistency can go back in time.
|
// AWS eventual consistency can go back in time.
|
||||||
@ -2266,6 +2273,13 @@ func (d *awsDisk) waitForAttachmentStatus(status string, expectedInstance, expec
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// if we expected volume to be attached and it was reported as already attached via DescribeInstance call
|
||||||
|
// but DescribeVolume told us volume is detached, we will short-circuit this long wait loop and return error
|
||||||
|
// so as AttachDisk can be retried without waiting for 20 minutes.
|
||||||
|
if (status == volumeAttachedStatus) && alreadyAttached && (attachmentStatus != status) {
|
||||||
|
return false, fmt.Errorf("attachment of disk %q failed, expected device to be attached but was %s", d.name, attachmentStatus)
|
||||||
|
}
|
||||||
|
|
||||||
if attachmentStatus == status {
|
if attachmentStatus == status {
|
||||||
// Attachment is in requested state, finish waiting
|
// Attachment is in requested state, finish waiting
|
||||||
return true, nil
|
return true, nil
|
||||||
@ -2411,7 +2425,7 @@ func (c *Cloud) AttachDisk(diskName KubernetesVolumeID, nodeName types.NodeName)
|
|||||||
klog.V(2).Infof("AttachVolume volume=%q instance=%q request returned %v", disk.awsID, awsInstance.awsID, attachResponse)
|
klog.V(2).Infof("AttachVolume volume=%q instance=%q request returned %v", disk.awsID, awsInstance.awsID, attachResponse)
|
||||||
}
|
}
|
||||||
|
|
||||||
attachment, err := disk.waitForAttachmentStatus("attached", awsInstance.awsID, ec2Device)
|
attachment, err := disk.waitForAttachmentStatus("attached", awsInstance.awsID, ec2Device, alreadyAttached)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if err == wait.ErrWaitTimeout {
|
if err == wait.ErrWaitTimeout {
|
||||||
@ -2489,7 +2503,7 @@ func (c *Cloud) DetachDisk(diskName KubernetesVolumeID, nodeName types.NodeName)
|
|||||||
return "", errors.New("no response from DetachVolume")
|
return "", errors.New("no response from DetachVolume")
|
||||||
}
|
}
|
||||||
|
|
||||||
attachment, err := diskInfo.disk.waitForAttachmentStatus("detached", awsInstance.awsID, "")
|
attachment, err := diskInfo.disk.waitForAttachmentStatus("detached", awsInstance.awsID, "", false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
@ -4797,7 +4811,7 @@ func setNodeDisk(
|
|||||||
}
|
}
|
||||||
|
|
||||||
func getInitialAttachDetachDelay(status string) time.Duration {
|
func getInitialAttachDetachDelay(status string) time.Duration {
|
||||||
if status == "detached" {
|
if status == volumeDetachedStatus {
|
||||||
return volumeDetachmentStatusInitialDelay
|
return volumeDetachmentStatusInitialDelay
|
||||||
}
|
}
|
||||||
return volumeAttachmentStatusInitialDelay
|
return volumeAttachmentStatusInitialDelay
|
||||||
|
Loading…
Reference in New Issue
Block a user