Merge pull request #120595 from jsafrane/fix-detach-uncertain

Mark a volume as uncertain-attached after detach error
This commit is contained in:
Kubernetes Prow Robot 2023-10-08 05:54:01 +02:00 committed by GitHub
commit d3559bf77f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 13 additions and 10 deletions

View File

@ -750,17 +750,15 @@ func Test_Run_OneVolumeDetachFailNodeWithReadWriteOnce(t *testing.T) {
// The first detach will be triggered after at least 50ms (maxWaitForUnmountDuration in test).
// Right before detach operation is performed, the volume will be first removed from being reported
// as attached on node status (RemoveVolumeFromReportAsAttached). After detach operation which is expected to fail,
// controller then added the volume back as attached.
// controller then treats the attachment as Uncertain.
// Here it sleeps 100ms so that detach should be triggered already at this point.
// verifyVolumeReportedAsAttachedToNode will check volume is in the list of volume attached that needs to be updated
// in node status. By calling this function (GetVolumesToReportAttached), node status should be updated, and the volume
// will not need to be updated until new changes are applied (detach is triggered again)
time.Sleep(100 * time.Millisecond)
verifyVolumeAttachedToNode(t, generatedVolumeName, nodeName1, cache.AttachStateAttached, asw)
verifyVolumeReportedAsAttachedToNode(t, logger, generatedVolumeName, nodeName1, true, asw, volumeAttachedCheckTimeout)
verifyVolumeAttachedToNode(t, generatedVolumeName, nodeName1, cache.AttachStateUncertain, asw)
verifyVolumeReportedAsAttachedToNode(t, logger, generatedVolumeName, nodeName1, false, asw, volumeAttachedCheckTimeout)
// Add a second pod which tries to attach the volume to the same node.
// After adding pod to the same node, detach will not be triggered any more.
// After adding pod to the same node, detach will not be triggered any more,
// the volume gets attached and reported as attached to the node.
generatedVolumeName, podAddErr = dsw.AddPod(types.UniquePodName(podName2), controllervolumetesting.NewPod(podName2, podName2), volumeSpec, nodeName1)
if podAddErr != nil {
t.Fatalf("AddPod failed. Expected: <no error> Actual: <%v>", podAddErr)
@ -768,6 +766,7 @@ func Test_Run_OneVolumeDetachFailNodeWithReadWriteOnce(t *testing.T) {
// Sleep 1s to verify no detach are triggered after second pod is added in the future.
time.Sleep(1000 * time.Millisecond)
verifyVolumeAttachedToNode(t, generatedVolumeName, nodeName1, cache.AttachStateAttached, asw)
verifyVolumeReportedAsAttachedToNode(t, logger, generatedVolumeName, nodeName1, true, asw, volumeAttachedCheckTimeout)
// verifyVolumeNoStatusUpdateNeeded(t, logger, generatedVolumeName, nodeName1, asw)
// Add a third pod which tries to attach the volume to a different node.

View File

@ -506,9 +506,13 @@ func (og *operationGenerator) GenerateDetachVolumeFunc(
migrated := getMigratedStatusBySpec(volumeToDetach.VolumeSpec)
if err != nil {
// On failure, add volume back to ReportAsAttached list
actualStateOfWorld.AddVolumeToReportAsAttached(
logger, volumeToDetach.VolumeName, volumeToDetach.NodeName)
// On failure, mark the volume as uncertain. Attach() must succeed before adding the volume back
// to node status as attached.
uncertainError := actualStateOfWorld.MarkVolumeAsUncertain(
logger, volumeToDetach.VolumeName, volumeToDetach.VolumeSpec, volumeToDetach.NodeName)
if uncertainError != nil {
klog.Errorf("DetachVolume.MarkVolumeAsUncertain failed to add the volume %q to actual state after detach error: %s", volumeToDetach.VolumeName, uncertainError)
}
eventErr, detailedErr := volumeToDetach.GenerateError("DetachVolume.Detach failed", err)
return volumetypes.NewOperationContext(eventErr, detailedErr, migrated)
}