From 186ddce07b9f1ae9b2acbe0fdab2bdff37e17c14 Mon Sep 17 00:00:00 2001 From: ZhangKe10140699 Date: Mon, 18 Jul 2022 15:11:17 +0800 Subject: [PATCH] Fix problem in updating VolumeAttached in node status --- .../attachdetach/reconciler/reconciler.go | 3 + .../reconciler/reconciler_test.go | 60 +++++++++++++++++++ 2 files changed, 63 insertions(+) diff --git a/pkg/controller/volume/attachdetach/reconciler/reconciler.go b/pkg/controller/volume/attachdetach/reconciler/reconciler.go index 92b2d49eadc..79e0df3614a 100644 --- a/pkg/controller/volume/attachdetach/reconciler/reconciler.go +++ b/pkg/controller/volume/attachdetach/reconciler/reconciler.go @@ -251,6 +251,9 @@ func (rc *reconciler) reconcile() { if err != nil { // Skip detaching this volume if unable to update node status klog.ErrorS(err, "UpdateNodeStatusForNode failed while attempting to report volume as attached", "volume", attachedVolume) + // Add volume back to ReportAsAttached if UpdateNodeStatusForNode call failed so that node status updater will add it back to VolumeAttached list. + // It is needed here too because DetachVolume is not call actually and we keep the data consistency for every reconcile. + rc.actualStateOfWorld.AddVolumeToReportAsAttached(attachedVolume.VolumeName, attachedVolume.NodeName) continue } diff --git a/pkg/controller/volume/attachdetach/reconciler/reconciler_test.go b/pkg/controller/volume/attachdetach/reconciler/reconciler_test.go index da6d988dcc8..73b2f77be35 100644 --- a/pkg/controller/volume/attachdetach/reconciler/reconciler_test.go +++ b/pkg/controller/volume/attachdetach/reconciler/reconciler_test.go @@ -613,6 +613,66 @@ func Test_Run_OneVolumeAttachAndDetachUncertainNodesWithReadWriteOnce(t *testing } +func Test_Run_UpdateNodeStatusFailBeforeOneVolumeDetachNodeWithReadWriteOnce(t *testing.T) { + // Arrange + volumePluginMgr, _ := volumetesting.GetTestVolumePluginMgr(t) + dsw := cache.NewDesiredStateOfWorld(volumePluginMgr) + asw := cache.NewActualStateOfWorld(volumePluginMgr) + fakeKubeClient := controllervolumetesting.CreateTestClient() + fakeRecorder := &record.FakeRecorder{} + fakeHandler := volumetesting.NewBlockVolumePathHandler() + ad := operationexecutor.NewOperationExecutor(operationexecutor.NewOperationGenerator( + fakeKubeClient, + volumePluginMgr, + fakeRecorder, + fakeHandler)) + informerFactory := informers.NewSharedInformerFactory(fakeKubeClient, controller.NoResyncPeriodFunc()) + nodeLister := informerFactory.Core().V1().Nodes().Lister() + nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */) + rc := NewReconciler( + reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder) + reconciliationLoopFunc := rc.(*reconciler).reconciliationLoopFunc() + podName1 := "pod-uid1" + volumeName := v1.UniqueVolumeName("volume-name") + volumeSpec := controllervolumetesting.GetTestVolumeSpec(string(volumeName), volumeName) + volumeSpec.PersistentVolume.Spec.AccessModes = []v1.PersistentVolumeAccessMode{v1.ReadWriteOnce} + nodeName1 := k8stypes.NodeName("node-name1") + dsw.AddNode(nodeName1, false /*keepTerminatedPodVolumes*/) + + // Add the pod in which the volume is attached to the FailDetachNode + generatedVolumeName, podAddErr := dsw.AddPod(types.UniquePodName(podName1), controllervolumetesting.NewPod(podName1, podName1), volumeSpec, nodeName1) + if podAddErr != nil { + t.Fatalf("AddPod failed. Expected: Actual: <%v>", podAddErr) + } + + // Act + reconciliationLoopFunc() + + // Volume is added to asw, volume should be reported as attached to the node. + waitForVolumeAddedToNode(t, generatedVolumeName, nodeName1, asw) + verifyVolumeAttachedToNode(t, generatedVolumeName, nodeName1, cache.AttachStateAttached, asw) + verifyVolumeReportedAsAttachedToNode(t, generatedVolumeName, nodeName1, true, asw) + + // Delete the pod + dsw.DeletePod(types.UniquePodName(podName1), generatedVolumeName, nodeName1) + + // Mock NodeStatusUpdate fail + rc.(*reconciler).nodeStatusUpdater = statusupdater.NewFakeNodeStatusUpdater(true /* returnError */) + reconciliationLoopFunc() + // The first detach will be triggered after at leaset 50ms (maxWaitForUnmountDuration in test). + time.Sleep(100 * time.Millisecond) + reconciliationLoopFunc() + // Right before detach operation is performed, the volume will be first removed from being reported + // as attached on node status (RemoveVolumeFromReportAsAttached). After UpdateNodeStatus operation which is expected to fail, + // controller then added the volume back as attached. + // verifyVolumeReportedAsAttachedToNode will check volume is in the list of volume attached that needs to be updated + // in node status. By calling this function (GetVolumesToReportAttached), node status should be updated, and the volume + // will not need to be updated until new changes are applied (detach is triggered again) + verifyVolumeAttachedToNode(t, generatedVolumeName, nodeName1, cache.AttachStateAttached, asw) + verifyVolumeReportedAsAttachedToNode(t, generatedVolumeName, nodeName1, true, asw) + +} + func Test_Run_OneVolumeDetachFailNodeWithReadWriteOnce(t *testing.T) { // Arrange volumePluginMgr, _ := volumetesting.GetTestVolumePluginMgr(t)