diff --git a/cmd/kube-controller-manager/app/core.go b/cmd/kube-controller-manager/app/core.go index 467487739ba..69e556eda5b 100644 --- a/cmd/kube-controller-manager/app/core.go +++ b/cmd/kube-controller-manager/app/core.go @@ -385,6 +385,7 @@ func startPersistentVolumeAttachDetachController(ctx context.Context, controller GetDynamicPluginProber(controllerContext.ComponentConfig.PersistentVolumeBinderController.VolumeConfiguration), controllerContext.ComponentConfig.AttachDetachController.DisableAttachDetachReconcilerSync, controllerContext.ComponentConfig.AttachDetachController.ReconcilerSyncLoopPeriod.Duration, + controllerContext.ComponentConfig.AttachDetachController.DisableForceDetachOnTimeout, attachdetach.DefaultTimerConfig, ) if attachDetachControllerErr != nil { diff --git a/cmd/kube-controller-manager/app/options/attachdetachcontroller.go b/cmd/kube-controller-manager/app/options/attachdetachcontroller.go index 7628f33a3c6..2449ca0038d 100644 --- a/cmd/kube-controller-manager/app/options/attachdetachcontroller.go +++ b/cmd/kube-controller-manager/app/options/attachdetachcontroller.go @@ -37,6 +37,7 @@ func (o *AttachDetachControllerOptions) AddFlags(fs *pflag.FlagSet) { fs.BoolVar(&o.DisableAttachDetachReconcilerSync, "disable-attach-detach-reconcile-sync", false, "Disable volume attach detach reconciler sync. Disabling this may cause volumes to be mismatched with pods. Use wisely.") fs.DurationVar(&o.ReconcilerSyncLoopPeriod.Duration, "attach-detach-reconcile-sync-period", o.ReconcilerSyncLoopPeriod.Duration, "The reconciler sync wait time between volume attach detach. This duration must be larger than one second, and increasing this value from the default may allow for volumes to be mismatched with pods.") + fs.BoolVar(&o.DisableForceDetachOnTimeout, "disable-force-detach-on-timeout", false, "Prevent force detaching volumes based on maximum unmount time and node status. If this flag is set to true, the non-graceful node shutdown feature must be used to recover from node failure. See https://k8s.io/docs/storage-disable-force-detach-on-timeout/.") } // ApplyTo fills up AttachDetachController config with options. @@ -47,6 +48,7 @@ func (o *AttachDetachControllerOptions) ApplyTo(cfg *attachdetachconfig.AttachDe cfg.DisableAttachDetachReconcilerSync = o.DisableAttachDetachReconcilerSync cfg.ReconcilerSyncLoopPeriod = o.ReconcilerSyncLoopPeriod + cfg.DisableForceDetachOnTimeout = o.DisableForceDetachOnTimeout return nil } diff --git a/pkg/controller/volume/attachdetach/attach_detach_controller.go b/pkg/controller/volume/attachdetach/attach_detach_controller.go index d1d91ea4583..936a2cf619b 100644 --- a/pkg/controller/volume/attachdetach/attach_detach_controller.go +++ b/pkg/controller/volume/attachdetach/attach_detach_controller.go @@ -120,6 +120,7 @@ func NewAttachDetachController( prober volume.DynamicPluginProber, disableReconciliationSync bool, reconcilerSyncDuration time.Duration, + disableForceDetachOnTimeout bool, timerConfig TimerConfig) (AttachDetachController, error) { adc := &attachDetachController{ @@ -171,6 +172,7 @@ func NewAttachDetachController( timerConfig.ReconcilerMaxWaitForUnmountDuration, reconcilerSyncDuration, disableReconciliationSync, + disableForceDetachOnTimeout, adc.desiredStateOfWorld, adc.actualStateOfWorld, adc.attacherDetacher, diff --git a/pkg/controller/volume/attachdetach/attach_detach_controller_test.go b/pkg/controller/volume/attachdetach/attach_detach_controller_test.go index 5cb9fed9fff..c4d2f45a9b8 100644 --- a/pkg/controller/volume/attachdetach/attach_detach_controller_test.go +++ b/pkg/controller/volume/attachdetach/attach_detach_controller_test.go @@ -66,6 +66,7 @@ func Test_NewAttachDetachController_Positive(t *testing.T) { nil, /* prober */ false, 5*time.Second, + false, DefaultTimerConfig, ) @@ -98,6 +99,7 @@ func Test_AttachDetachControllerStateOfWorldPopulators_Positive(t *testing.T) { nil, /* prober */ false, 5*time.Second, + false, DefaultTimerConfig, ) @@ -222,6 +224,7 @@ func BenchmarkPopulateActualStateOfWorld(b *testing.B) { nil, /* prober */ false, 5*time.Second, + false, DefaultTimerConfig, ) @@ -282,6 +285,7 @@ func attachDetachRecoveryTestCase(t *testing.T, extraPods1 []*v1.Pod, extraPods2 prober, false, 1*time.Second, + false, DefaultTimerConfig, ) @@ -547,6 +551,7 @@ func volumeAttachmentRecoveryTestCase(t *testing.T, tc vaTest) { nil, /* prober */ false, 1*time.Second, + false, DefaultTimerConfig, ) if err != nil { diff --git a/pkg/controller/volume/attachdetach/config/types.go b/pkg/controller/volume/attachdetach/config/types.go index aa3306933f5..3263a33fcd4 100644 --- a/pkg/controller/volume/attachdetach/config/types.go +++ b/pkg/controller/volume/attachdetach/config/types.go @@ -29,4 +29,8 @@ type AttachDetachControllerConfiguration struct { // ReconcilerSyncLoopPeriod is the amount of time the reconciler sync states loop // wait between successive executions. Is set to 60 sec by default. ReconcilerSyncLoopPeriod metav1.Duration + // DisableForceDetachOnTimeout disables force detach when the maximum unmount + // time is exceeded. Is false by default, and thus force detach on unmount is + // enabled. + DisableForceDetachOnTimeout bool } diff --git a/pkg/controller/volume/attachdetach/config/v1alpha1/zz_generated.conversion.go b/pkg/controller/volume/attachdetach/config/v1alpha1/zz_generated.conversion.go index 6c68adb4460..e49ed4f81dd 100644 --- a/pkg/controller/volume/attachdetach/config/v1alpha1/zz_generated.conversion.go +++ b/pkg/controller/volume/attachdetach/config/v1alpha1/zz_generated.conversion.go @@ -62,12 +62,14 @@ func RegisterConversions(s *runtime.Scheme) error { func autoConvert_v1alpha1_AttachDetachControllerConfiguration_To_config_AttachDetachControllerConfiguration(in *v1alpha1.AttachDetachControllerConfiguration, out *config.AttachDetachControllerConfiguration, s conversion.Scope) error { out.DisableAttachDetachReconcilerSync = in.DisableAttachDetachReconcilerSync out.ReconcilerSyncLoopPeriod = in.ReconcilerSyncLoopPeriod + out.DisableForceDetachOnTimeout = in.DisableForceDetachOnTimeout return nil } func autoConvert_config_AttachDetachControllerConfiguration_To_v1alpha1_AttachDetachControllerConfiguration(in *config.AttachDetachControllerConfiguration, out *v1alpha1.AttachDetachControllerConfiguration, s conversion.Scope) error { out.DisableAttachDetachReconcilerSync = in.DisableAttachDetachReconcilerSync out.ReconcilerSyncLoopPeriod = in.ReconcilerSyncLoopPeriod + out.DisableForceDetachOnTimeout = in.DisableForceDetachOnTimeout return nil } diff --git a/pkg/controller/volume/attachdetach/reconciler/reconciler.go b/pkg/controller/volume/attachdetach/reconciler/reconciler.go index 45783e98ccc..ae01e4868cd 100644 --- a/pkg/controller/volume/attachdetach/reconciler/reconciler.go +++ b/pkg/controller/volume/attachdetach/reconciler/reconciler.go @@ -69,6 +69,7 @@ func NewReconciler( maxWaitForUnmountDuration time.Duration, syncDuration time.Duration, disableReconciliationSync bool, + disableForceDetachOnTimeout bool, desiredStateOfWorld cache.DesiredStateOfWorld, actualStateOfWorld cache.ActualStateOfWorld, attacherDetacher operationexecutor.OperationExecutor, @@ -76,32 +77,34 @@ func NewReconciler( nodeLister corelisters.NodeLister, recorder record.EventRecorder) Reconciler { return &reconciler{ - loopPeriod: loopPeriod, - maxWaitForUnmountDuration: maxWaitForUnmountDuration, - syncDuration: syncDuration, - disableReconciliationSync: disableReconciliationSync, - desiredStateOfWorld: desiredStateOfWorld, - actualStateOfWorld: actualStateOfWorld, - attacherDetacher: attacherDetacher, - nodeStatusUpdater: nodeStatusUpdater, - nodeLister: nodeLister, - timeOfLastSync: time.Now(), - recorder: recorder, + loopPeriod: loopPeriod, + maxWaitForUnmountDuration: maxWaitForUnmountDuration, + syncDuration: syncDuration, + disableReconciliationSync: disableReconciliationSync, + disableForceDetachOnTimeout: disableForceDetachOnTimeout, + desiredStateOfWorld: desiredStateOfWorld, + actualStateOfWorld: actualStateOfWorld, + attacherDetacher: attacherDetacher, + nodeStatusUpdater: nodeStatusUpdater, + nodeLister: nodeLister, + timeOfLastSync: time.Now(), + recorder: recorder, } } type reconciler struct { - loopPeriod time.Duration - maxWaitForUnmountDuration time.Duration - syncDuration time.Duration - desiredStateOfWorld cache.DesiredStateOfWorld - actualStateOfWorld cache.ActualStateOfWorld - attacherDetacher operationexecutor.OperationExecutor - nodeStatusUpdater statusupdater.NodeStatusUpdater - nodeLister corelisters.NodeLister - timeOfLastSync time.Time - disableReconciliationSync bool - recorder record.EventRecorder + loopPeriod time.Duration + maxWaitForUnmountDuration time.Duration + syncDuration time.Duration + desiredStateOfWorld cache.DesiredStateOfWorld + actualStateOfWorld cache.ActualStateOfWorld + attacherDetacher operationexecutor.OperationExecutor + nodeStatusUpdater statusupdater.NodeStatusUpdater + nodeLister corelisters.NodeLister + timeOfLastSync time.Time + disableReconciliationSync bool + disableForceDetachOnTimeout bool + recorder record.EventRecorder } func (rc *reconciler) Run(ctx context.Context) { @@ -207,24 +210,29 @@ func (rc *reconciler) reconcile(ctx context.Context) { logger.Error(err, "Cannot trigger detach because it fails to set detach request time with error") continue } - // Check whether timeout has reached the maximum waiting time - timeout := elapsedTime > rc.maxWaitForUnmountDuration + // Check whether the umount drain timer expired + maxWaitForUnmountDurationExpired := elapsedTime > rc.maxWaitForUnmountDuration isHealthy, err := rc.nodeIsHealthy(attachedVolume.NodeName) if err != nil { logger.Error(err, "Failed to get health of node", "node", klog.KRef("", string(attachedVolume.NodeName))) } - // Force detach volumes from unhealthy nodes after maxWaitForUnmountDuration. - forceDetach := !isHealthy && timeout + // Force detach volumes from unhealthy nodes after maxWaitForUnmountDuration if force detach is enabled + // Ensure that the timeout condition checks this correctly so that the correct metric is updated below + forceDetatchTimeoutExpired := maxWaitForUnmountDurationExpired && !rc.disableForceDetachOnTimeout + if maxWaitForUnmountDurationExpired && rc.disableForceDetachOnTimeout { + logger.V(5).Info("Drain timeout expired for volume but disableForceDetachOnTimeout was set", "node", klog.KRef("", string(attachedVolume.NodeName)), "volumeName", attachedVolume.VolumeName) + } + forceDetach := !isHealthy && forceDetatchTimeoutExpired hasOutOfServiceTaint, err := rc.hasOutOfServiceTaint(attachedVolume.NodeName) if err != nil { logger.Error(err, "Failed to get taint specs for node", "node", klog.KRef("", string(attachedVolume.NodeName))) } - // Check whether volume is still mounted. Skip detach if it is still mounted unless force detach timeout - // or the node has `node.kubernetes.io/out-of-service` taint. + // Check whether volume is still mounted. Skip detach if it is still mounted unless we have + // decided to force detach or the node has `node.kubernetes.io/out-of-service` taint. if attachedVolume.MountedByNode && !forceDetach && !hasOutOfServiceTaint { logger.V(5).Info("Cannot detach volume because it is still mounted", "node", klog.KRef("", string(attachedVolume.NodeName)), "volumeName", attachedVolume.VolumeName) continue @@ -254,19 +262,19 @@ func (rc *reconciler) reconcile(ctx context.Context) { } // Trigger detach volume which requires verifying safe to detach step - // If timeout is true, skip verifySafeToDetach check + // If forceDetatchTimeoutExpired is true, skip verifySafeToDetach check // If the node has node.kubernetes.io/out-of-service taint with NoExecute effect, skip verifySafeToDetach check logger.V(5).Info("Starting attacherDetacher.DetachVolume", "node", klog.KRef("", string(attachedVolume.NodeName)), "volumeName", attachedVolume.VolumeName) if hasOutOfServiceTaint { logger.V(4).Info("node has out-of-service taint", "node", klog.KRef("", string(attachedVolume.NodeName))) } - verifySafeToDetach := !(timeout || hasOutOfServiceTaint) + verifySafeToDetach := !(forceDetatchTimeoutExpired || hasOutOfServiceTaint) err = rc.attacherDetacher.DetachVolume(logger, attachedVolume.AttachedVolume, verifySafeToDetach, rc.actualStateOfWorld) if err == nil { if verifySafeToDetach { // normal detach logger.Info("attacherDetacher.DetachVolume started", "node", klog.KRef("", string(attachedVolume.NodeName)), "volumeName", attachedVolume.VolumeName) } else { // force detach - if timeout { + if forceDetatchTimeoutExpired { metrics.RecordForcedDetachMetric(metrics.ForceDetachReasonTimeout) logger.Info("attacherDetacher.DetachVolume started: this volume is not safe to detach, but maxWaitForUnmountDuration expired, force detaching", "duration", rc.maxWaitForUnmountDuration, diff --git a/pkg/controller/volume/attachdetach/reconciler/reconciler_test.go b/pkg/controller/volume/attachdetach/reconciler/reconciler_test.go index 8e1aec04ed7..ff2fcb8b62f 100644 --- a/pkg/controller/volume/attachdetach/reconciler/reconciler_test.go +++ b/pkg/controller/volume/attachdetach/reconciler/reconciler_test.go @@ -75,7 +75,7 @@ func Test_Run_Positive_DoNothing(t *testing.T) { fakeKubeClient, informerFactory.Core().V1().Nodes().Lister(), asw) nodeLister := informerFactory.Core().V1().Nodes().Lister() reconciler := NewReconciler( - reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder) + reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder) // Act _, ctx := ktesting.NewTestContext(t) @@ -111,7 +111,7 @@ func Test_Run_Positive_OneDesiredVolumeAttach(t *testing.T) { nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */) nodeLister := informerFactory.Core().V1().Nodes().Lister() reconciler := NewReconciler( - reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder) + reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder) podName := "pod-uid" volumeName := v1.UniqueVolumeName("volume-name") volumeSpec := controllervolumetesting.GetTestVolumeSpec(string(volumeName), volumeName) @@ -165,7 +165,7 @@ func Test_Run_Positive_OneDesiredVolumeAttachThenDetachWithUnmountedVolume(t *te nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */) nodeLister := informerFactory.Core().V1().Nodes().Lister() reconciler := NewReconciler( - reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder) + reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder) podName := "pod-uid" volumeName := v1.UniqueVolumeName("volume-name") volumeSpec := controllervolumetesting.GetTestVolumeSpec(string(volumeName), volumeName) @@ -243,7 +243,7 @@ func Test_Run_Positive_OneDesiredVolumeAttachThenDetachWithMountedVolume(t *test nodeLister := informerFactory.Core().V1().Nodes().Lister() nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */) reconciler := NewReconciler( - reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder) + reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder) podName := "pod-uid" volumeName := v1.UniqueVolumeName("volume-name") volumeSpec := controllervolumetesting.GetTestVolumeSpec(string(volumeName), volumeName) @@ -322,7 +322,7 @@ func Test_Run_Negative_OneDesiredVolumeAttachThenDetachWithUnmountedVolumeUpdate nodeLister := informerFactory.Core().V1().Nodes().Lister() nsu := statusupdater.NewFakeNodeStatusUpdater(true /* returnError */) reconciler := NewReconciler( - reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder) + reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder) podName := "pod-uid" volumeName := v1.UniqueVolumeName("volume-name") volumeSpec := controllervolumetesting.GetTestVolumeSpec(string(volumeName), volumeName) @@ -400,7 +400,7 @@ func Test_Run_OneVolumeAttachAndDetachMultipleNodesWithReadWriteMany(t *testing. informerFactory := informers.NewSharedInformerFactory(fakeKubeClient, controller.NoResyncPeriodFunc()) nodeLister := informerFactory.Core().V1().Nodes().Lister() reconciler := NewReconciler( - reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder) + reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder) podName1 := "pod-uid1" podName2 := "pod-uid2" volumeName := v1.UniqueVolumeName("volume-name") @@ -495,7 +495,7 @@ func Test_Run_OneVolumeAttachAndDetachMultipleNodesWithReadWriteOnce(t *testing. nodeLister := informerFactory.Core().V1().Nodes().Lister() nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */) reconciler := NewReconciler( - reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder) + reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder) podName1 := "pod-uid1" podName2 := "pod-uid2" volumeName := v1.UniqueVolumeName("volume-name") @@ -588,7 +588,7 @@ func Test_Run_OneVolumeAttachAndDetachUncertainNodesWithReadWriteOnce(t *testing nodeLister := informerFactory.Core().V1().Nodes().Lister() nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */) reconciler := NewReconciler( - reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder) + reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder) podName1 := "pod-uid1" podName2 := "pod-uid2" volumeName := v1.UniqueVolumeName("volume-name") @@ -655,7 +655,7 @@ func Test_Run_UpdateNodeStatusFailBeforeOneVolumeDetachNodeWithReadWriteOnce(t * ctx, cancel := context.WithCancel(ctx) defer cancel() rc := NewReconciler( - reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder) + reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder) reconciliationLoopFunc := rc.(*reconciler).reconciliationLoopFunc(ctx) podName1 := "pod-uid1" volumeName := v1.UniqueVolumeName("volume-name") @@ -715,7 +715,7 @@ func Test_Run_OneVolumeDetachFailNodeWithReadWriteOnce(t *testing.T) { nodeLister := informerFactory.Core().V1().Nodes().Lister() nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */) reconciler := NewReconciler( - reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder) + reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder) podName1 := "pod-uid1" podName2 := "pod-uid2" podName3 := "pod-uid3" @@ -802,7 +802,7 @@ func Test_Run_OneVolumeAttachAndDetachTimeoutNodesWithReadWriteOnce(t *testing.T nodeLister := informerFactory.Core().V1().Nodes().Lister() nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */) reconciler := NewReconciler( - reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder) + reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder) podName1 := "pod-uid1" podName2 := "pod-uid2" volumeName := v1.UniqueVolumeName("volume-name") @@ -878,7 +878,7 @@ func Test_Run_OneVolumeDetachOnOutOfServiceTaintedNode(t *testing.T) { nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */) nodeLister := informerFactory.Core().V1().Nodes().Lister() reconciler := NewReconciler( - reconcilerLoopPeriod, maxLongWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, + reconcilerLoopPeriod, maxLongWaitForUnmountDuration, syncLoopPeriod, false, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder) podName1 := "pod-uid1" volumeName1 := v1.UniqueVolumeName("volume-name1") @@ -962,7 +962,7 @@ func Test_Run_OneVolumeDetachOnNoOutOfServiceTaintedNode(t *testing.T) { nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */) nodeLister := informerFactory.Core().V1().Nodes().Lister() reconciler := NewReconciler( - reconcilerLoopPeriod, maxLongWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, + reconcilerLoopPeriod, maxLongWaitForUnmountDuration, syncLoopPeriod, false, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder) podName1 := "pod-uid1" volumeName1 := v1.UniqueVolumeName("volume-name1") @@ -1039,7 +1039,7 @@ func Test_Run_OneVolumeDetachOnUnhealthyNode(t *testing.T) { nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */) nodeLister := informerFactory.Core().V1().Nodes().Lister() reconciler := NewReconciler( - reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, + reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder) podName1 := "pod-uid1" volumeName1 := v1.UniqueVolumeName("volume-name1") @@ -1110,6 +1110,147 @@ func Test_Run_OneVolumeDetachOnUnhealthyNode(t *testing.T) { waitForDetachCallCount(t, 1 /* expectedDetachCallCount */, fakePlugin) } +// Populates desiredStateOfWorld cache with one node/volume/pod tuple. +// The node starts as healthy. +// +// Calls Run() +// Verifies there is one attach call and no detach calls. +// Deletes the pod from desiredStateOfWorld cache without first marking the node/volume as unmounted. +// Verifies that the volume is NOT detached after maxWaitForUnmountDuration. +// Marks the node as unhealthy. +// Sets forceDetachOnUmountDisabled to true. +// Verifies that the volume is not detached after maxWaitForUnmountDuration. +// +// Then applies the node.kubernetes.io/out-of-service taint. +// Verifies that there is still just one attach call. +// Verifies there is now one detach call. +func Test_Run_OneVolumeDetachOnUnhealthyNodeWithForceDetachOnUnmountDisabled(t *testing.T) { + registerMetrics.Do(func() { + legacyregistry.MustRegister(metrics.ForceDetachMetricCounter) + }) + // NOTE: This value is being pulled from a global variable, so it won't necessarily be 0 at the start of the test + // For example, if Test_Run_OneVolumeDetachOnOutOfServiceTaintedNode runs before this test, then it will be 1 + initialForceDetachCount, err := metricstestutil.GetCounterMetricValue(metrics.ForceDetachMetricCounter.WithLabelValues(metrics.ForceDetachReasonOutOfService)) + if err != nil { + t.Errorf("Error getting initialForceDetachCount") + } + + // Arrange + volumePluginMgr, fakePlugin := volumetesting.GetTestVolumePluginMgr(t) + dsw := cache.NewDesiredStateOfWorld(volumePluginMgr) + asw := cache.NewActualStateOfWorld(volumePluginMgr) + fakeKubeClient := controllervolumetesting.CreateTestClient() + fakeRecorder := &record.FakeRecorder{} + fakeHandler := volumetesting.NewBlockVolumePathHandler() + ad := operationexecutor.NewOperationExecutor(operationexecutor.NewOperationGenerator( + fakeKubeClient, + volumePluginMgr, + fakeRecorder, + fakeHandler)) + informerFactory := informers.NewSharedInformerFactory(fakeKubeClient, controller.NoResyncPeriodFunc()) + nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */) + nodeLister := informerFactory.Core().V1().Nodes().Lister() + reconciler := NewReconciler( + reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, true, dsw, asw, ad, + nsu, nodeLister, fakeRecorder) + podName1 := "pod-uid1" + volumeName1 := v1.UniqueVolumeName("volume-name1") + volumeSpec1 := controllervolumetesting.GetTestVolumeSpec(string(volumeName1), volumeName1) + nodeName1 := k8stypes.NodeName("worker-0") + node1 := &v1.Node{ + ObjectMeta: metav1.ObjectMeta{Name: string(nodeName1)}, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + addErr := informerFactory.Core().V1().Nodes().Informer().GetStore().Add(node1) + if addErr != nil { + t.Fatalf("Add node failed. Expected: Actual: <%v>", addErr) + } + dsw.AddNode(nodeName1, false /*keepTerminatedPodVolumes*/) + volumeExists := dsw.VolumeExists(volumeName1, nodeName1) + if volumeExists { + t.Fatalf( + "Volume %q/node %q should not exist, but it does.", + volumeName1, + nodeName1) + } + + generatedVolumeName, podErr := dsw.AddPod(types.UniquePodName(podName1), controllervolumetesting.NewPod(podName1, + podName1), volumeSpec1, nodeName1) + if podErr != nil { + t.Fatalf("AddPod failed. Expected: Actual: <%v>", podErr) + } + + // Act + _, ctx := ktesting.NewTestContext(t) + ctx, cancel := context.WithCancel(ctx) + defer cancel() + go reconciler.Run(ctx) + + // Assert + waitForNewAttacherCallCount(t, 1 /* expectedCallCount */, fakePlugin) + verifyNewAttacherCallCount(t, false /* expectZeroNewAttacherCallCount */, fakePlugin) + waitForAttachCallCount(t, 1 /* expectedAttachCallCount */, fakePlugin) + verifyNewDetacherCallCount(t, true /* expectZeroNewDetacherCallCount */, fakePlugin) + waitForDetachCallCount(t, 0 /* expectedDetachCallCount */, fakePlugin) + + // Act + // Delete the pod and the volume will be detached even after the maxWaitForUnmountDuration expires as volume is + // not unmounted and the node is healthy. + dsw.DeletePod(types.UniquePodName(podName1), generatedVolumeName, nodeName1) + time.Sleep(maxWaitForUnmountDuration * 5) + // Assert + waitForNewDetacherCallCount(t, 0 /* expectedCallCount */, fakePlugin) + verifyNewAttacherCallCount(t, false /* expectZeroNewAttacherCallCount */, fakePlugin) + waitForAttachCallCount(t, 1 /* expectedAttachCallCount */, fakePlugin) + verifyNewDetacherCallCount(t, true /* expectZeroNewDetacherCallCount */, fakePlugin) + waitForDetachCallCount(t, 0 /* expectedDetachCallCount */, fakePlugin) + + // Act + // Mark the node unhealthy + node2 := node1.DeepCopy() + node2.Status.Conditions[0].Status = v1.ConditionFalse + updateErr := informerFactory.Core().V1().Nodes().Informer().GetStore().Update(node2) + if updateErr != nil { + t.Fatalf("Update node failed. Expected: Actual: <%v>", updateErr) + } + // Assert -- Detach was not triggered after maxWaitForUnmountDuration + waitForNewDetacherCallCount(t, 0 /* expectedCallCount */, fakePlugin) + verifyNewAttacherCallCount(t, false /* expectZeroNewAttacherCallCount */, fakePlugin) + waitForAttachCallCount(t, 1 /* expectedAttachCallCount */, fakePlugin) + verifyNewDetacherCallCount(t, true /* expectZeroNewDetacherCallCount */, fakePlugin) + waitForDetachCallCount(t, 0 /* expectedDetachCallCount */, fakePlugin) + + // Force detach metric due to out-of-service taint + // We shouldn't see any additional force detaches, so only consider the initial count + testForceDetachMetric(t, int(initialForceDetachCount), metrics.ForceDetachReasonOutOfService) + + // Act + // Taint the node + node3 := node2.DeepCopy() + node3.Spec.Taints = append(node3.Spec.Taints, v1.Taint{Key: v1.TaintNodeOutOfService, Effect: v1.TaintEffectNoExecute}) + updateErr = informerFactory.Core().V1().Nodes().Informer().GetStore().Update(node3) + if updateErr != nil { + t.Fatalf("Update node failed. Expected: Actual: <%v>", updateErr) + } + // Assert -- Detach was triggered after maxWaitForUnmountDuration + waitForNewDetacherCallCount(t, 1 /* expectedCallCount */, fakePlugin) + verifyNewAttacherCallCount(t, false /* expectZeroNewAttacherCallCount */, fakePlugin) + waitForAttachCallCount(t, 1 /* expectedAttachCallCount */, fakePlugin) + verifyNewDetacherCallCount(t, false /* expectZeroNewDetacherCallCount */, fakePlugin) + waitForDetachCallCount(t, 1 /* expectedDetachCallCount */, fakePlugin) + + // Force detach metric due to out-of-service taint + // We should see one more force detach, so consider the initial count + 1 + testForceDetachMetric(t, int(initialForceDetachCount)+1, metrics.ForceDetachReasonOutOfService) +} + func Test_ReportMultiAttachError(t *testing.T) { type nodeWithPods struct { name k8stypes.NodeName @@ -1172,7 +1313,7 @@ func Test_ReportMultiAttachError(t *testing.T) { nodeLister := informerFactory.Core().V1().Nodes().Lister() nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */) rc := NewReconciler( - reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder) + reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder) nodes := []k8stypes.NodeName{} for _, n := range test.nodes { diff --git a/pkg/generated/openapi/zz_generated.openapi.go b/pkg/generated/openapi/zz_generated.openapi.go index 84136e51c00..3f9c89b71f8 100644 --- a/pkg/generated/openapi/zz_generated.openapi.go +++ b/pkg/generated/openapi/zz_generated.openapi.go @@ -54651,8 +54651,16 @@ func schema_k8sio_kube_controller_manager_config_v1alpha1_AttachDetachController Ref: ref("k8s.io/apimachinery/pkg/apis/meta/v1.Duration"), }, }, + "disableForceDetachOnTimeout": { + SchemaProps: spec.SchemaProps{ + Description: "DisableForceDetachOnTimeout disables force detach when the maximum unmount time is exceeded. Is false by default, and thus force detach on unmount is enabled.", + Default: false, + Type: []string{"boolean"}, + Format: "", + }, + }, }, - Required: []string{"DisableAttachDetachReconcilerSync", "ReconcilerSyncLoopPeriod"}, + Required: []string{"DisableAttachDetachReconcilerSync", "ReconcilerSyncLoopPeriod", "disableForceDetachOnTimeout"}, }, }, Dependencies: []string{ diff --git a/staging/src/k8s.io/kube-controller-manager/config/v1alpha1/types.go b/staging/src/k8s.io/kube-controller-manager/config/v1alpha1/types.go index df201d90d2c..a00fe57c99b 100644 --- a/staging/src/k8s.io/kube-controller-manager/config/v1alpha1/types.go +++ b/staging/src/k8s.io/kube-controller-manager/config/v1alpha1/types.go @@ -179,6 +179,10 @@ type AttachDetachControllerConfiguration struct { // ReconcilerSyncLoopPeriod is the amount of time the reconciler sync states loop // wait between successive executions. Is set to 60 sec by default. ReconcilerSyncLoopPeriod metav1.Duration + // DisableForceDetachOnTimeout disables force detach when the maximum unmount + // time is exceeded. Is false by default, and thus force detach on unmount is + // enabled. + DisableForceDetachOnTimeout bool `json:"disableForceDetachOnTimeout"` } // CSRSigningControllerConfiguration contains elements describing CSRSigningController. diff --git a/test/integration/volume/attach_detach_test.go b/test/integration/volume/attach_detach_test.go index c7d4164ce09..c1c6f92fdcc 100644 --- a/test/integration/volume/attach_detach_test.go +++ b/test/integration/volume/attach_detach_test.go @@ -441,6 +441,7 @@ func createAdClients(t *testing.T, server *kubeapiservertesting.TestServer, sync nil, /* prober */ false, 5*time.Second, + false, timers, )