diff --git a/cmd/kube-controller-manager/app/controllermanager.go b/cmd/kube-controller-manager/app/controllermanager.go index d829beb1759..f4f0dfc962c 100644 --- a/cmd/kube-controller-manager/app/controllermanager.go +++ b/cmd/kube-controller-manager/app/controllermanager.go @@ -408,6 +408,10 @@ func StartControllers(controllers map[string]InitFunc, s *options.CMServer, root go volumeController.Run(stop) time.Sleep(wait.Jitter(s.ControllerStartInterval.Duration, ControllerStartJitter)) + if s.ReconcilerSyncLoopPeriod.Duration < time.Second { + return fmt.Errorf("Duration time must be greater than one second as set via command line option reconcile-sync-loop-period.") + } + attachDetachController, attachDetachControllerErr := attachdetach.NewAttachDetachController( clientBuilder.ClientOrDie("attachdetach-controller"), @@ -416,7 +420,10 @@ func StartControllers(controllers map[string]InitFunc, s *options.CMServer, root sharedInformers.PersistentVolumeClaims().Informer(), sharedInformers.PersistentVolumes().Informer(), cloud, - ProbeAttachableVolumePlugins(s.VolumeConfiguration)) + ProbeAttachableVolumePlugins(s.VolumeConfiguration), + s.DisableAttachDetachReconcilerSync, + s.ReconcilerSyncLoopPeriod.Duration, + ) if attachDetachControllerErr != nil { return fmt.Errorf("failed to start attach/detach controller: %v", attachDetachControllerErr) } diff --git a/cmd/kube-controller-manager/app/options/options.go b/cmd/kube-controller-manager/app/options/options.go index 3609c3abd31..1c1f07a60e4 100644 --- a/cmd/kube-controller-manager/app/options/options.go +++ b/cmd/kube-controller-manager/app/options/options.go @@ -86,15 +86,16 @@ func NewCMServer() *CMServer { }, FlexVolumePluginDir: "/usr/libexec/kubernetes/kubelet-plugins/volume/exec/", }, - ContentType: "application/vnd.kubernetes.protobuf", - KubeAPIQPS: 20.0, - KubeAPIBurst: 30, - LeaderElection: leaderelection.DefaultLeaderElectionConfiguration(), - ControllerStartInterval: metav1.Duration{Duration: 0 * time.Second}, - EnableGarbageCollector: true, - ConcurrentGCSyncs: 20, - ClusterSigningCertFile: "/etc/kubernetes/ca/ca.pem", - ClusterSigningKeyFile: "/etc/kubernetes/ca/ca.key", + ContentType: "application/vnd.kubernetes.protobuf", + KubeAPIQPS: 20.0, + KubeAPIBurst: 30, + LeaderElection: leaderelection.DefaultLeaderElectionConfiguration(), + ControllerStartInterval: metav1.Duration{Duration: 0 * time.Second}, + EnableGarbageCollector: true, + ConcurrentGCSyncs: 20, + ClusterSigningCertFile: "/etc/kubernetes/ca/ca.pem", + ClusterSigningKeyFile: "/etc/kubernetes/ca/ca.key", + ReconcilerSyncLoopPeriod: metav1.Duration{Duration: 5 * time.Second}, }, } s.LeaderElection.LeaderElect = true @@ -181,6 +182,8 @@ func (s *CMServer) AddFlags(fs *pflag.FlagSet) { fs.Float32Var(&s.SecondaryNodeEvictionRate, "secondary-node-eviction-rate", 0.01, "Number of nodes per second on which pods are deleted in case of node failure when a zone is unhealthy (see --unhealthy-zone-threshold for definition of healthy/unhealthy). Zone refers to entire cluster in non-multizone clusters. This value is implicitly overridden to 0 if the cluster size is smaller than --large-cluster-size-threshold.") fs.Int32Var(&s.LargeClusterSizeThreshold, "large-cluster-size-threshold", 50, "Number of nodes from which NodeController treats the cluster as large for the eviction logic purposes. --secondary-node-eviction-rate is implicitly overridden to 0 for clusters this size or smaller.") fs.Float32Var(&s.UnhealthyZoneThreshold, "unhealthy-zone-threshold", 0.55, "Fraction of Nodes in a zone which needs to be not Ready (minimum 3) for zone to be treated as unhealthy. ") + fs.BoolVar(&s.DisableAttachDetachReconcilerSync, "disable-attach-detach-reconcile-sync", false, "Disable volume attach detach reconciler sync. Disabling this may cause volumes to be mismatched with pods. Use wisely.") + fs.DurationVar(&s.ReconcilerSyncLoopPeriod.Duration, "attach-detach-reconcile-sync-period", s.ReconcilerSyncLoopPeriod.Duration, "The reconciler sync wait time between volume attach detach. This duration must be larger than one second, and increasing this value from the default may allow for volumes to be mismatched with pods.") leaderelection.BindFlags(&s.LeaderElection, fs) config.DefaultFeatureGate.AddFlag(fs) diff --git a/hack/verify-flags/known-flags.txt b/hack/verify-flags/known-flags.txt index 3b25ad6c185..d10620213c8 100644 --- a/hack/verify-flags/known-flags.txt +++ b/hack/verify-flags/known-flags.txt @@ -635,3 +635,5 @@ garbage-collector-enabled viper-config log-lines-total run-duration +attach-detach-reconcile-sync-period +disable-attach-detach-reconcile-sync diff --git a/pkg/apis/componentconfig/types.go b/pkg/apis/componentconfig/types.go index 56dad915739..560e4b42df4 100644 --- a/pkg/apis/componentconfig/types.go +++ b/pkg/apis/componentconfig/types.go @@ -772,6 +772,13 @@ type KubeControllerManagerConfiguration struct { // Zone is treated as unhealthy in nodeEvictionRate and secondaryNodeEvictionRate when at least // unhealthyZoneThreshold (no less than 3) of Nodes in the zone are NotReady UnhealthyZoneThreshold float32 + // Reconciler runs a periodic loop to reconcile the desired state of the with + // the actual state of the world by triggering attach detach operations. + // This flag enables or disables reconcile. Is false by default, and thus enabled. + DisableAttachDetachReconcilerSync bool + // ReconcilerSyncLoopPeriod is the amount of time the reconciler sync states loop + // wait between successive executions. Is set to 5 sec by default. + ReconcilerSyncLoopPeriod metav1.Duration } // VolumeConfiguration contains *all* enumerated flags meant to configure all volume diff --git a/pkg/controller/volume/attachdetach/attach_detach_controller.go b/pkg/controller/volume/attachdetach/attach_detach_controller.go index 7299137a7fe..945ea5cf0db 100644 --- a/pkg/controller/volume/attachdetach/attach_detach_controller.go +++ b/pkg/controller/volume/attachdetach/attach_detach_controller.go @@ -58,10 +58,6 @@ const ( // desiredStateOfWorldPopulatorLoopSleepPeriod is the amount of time the // DesiredStateOfWorldPopulator loop waits between successive executions desiredStateOfWorldPopulatorLoopSleepPeriod time.Duration = 1 * time.Minute - - // reconcilerSyncDuration is the amount of time the reconciler sync states loop - // wait between successive executions - reconcilerSyncDuration time.Duration = 5 * time.Second ) // AttachDetachController defines the operations supported by this controller. @@ -78,7 +74,9 @@ func NewAttachDetachController( pvcInformer kcache.SharedInformer, pvInformer kcache.SharedInformer, cloud cloudprovider.Interface, - plugins []volume.VolumePlugin) (AttachDetachController, error) { + plugins []volume.VolumePlugin, + disableReconciliationSync bool, + reconcilerSyncDuration time.Duration) (AttachDetachController, error) { // TODO: The default resyncPeriod for shared informers is 12 hours, this is // unacceptable for the attach/detach controller. For example, if a pod is // skipped because the node it is scheduled to didn't set its annotation in @@ -131,10 +129,13 @@ func NewAttachDetachController( false)) // flag for experimental binary check for volume mount adc.nodeStatusUpdater = statusupdater.NewNodeStatusUpdater( kubeClient, nodeInformer, adc.actualStateOfWorld) + + // Default these to values in options adc.reconciler = reconciler.NewReconciler( reconcilerLoopPeriod, reconcilerMaxWaitForUnmountDuration, reconcilerSyncDuration, + disableReconciliationSync, adc.desiredStateOfWorld, adc.actualStateOfWorld, adc.attacherDetacher, diff --git a/pkg/controller/volume/attachdetach/attach_detach_controller_test.go b/pkg/controller/volume/attachdetach/attach_detach_controller_test.go index c301acd2b66..defe9132c5a 100644 --- a/pkg/controller/volume/attachdetach/attach_detach_controller_test.go +++ b/pkg/controller/volume/attachdetach/attach_detach_controller_test.go @@ -41,7 +41,9 @@ func Test_NewAttachDetachController_Positive(t *testing.T) { pvcInformer, pvInformer, nil, /* cloud */ - nil /* plugins */) + nil, /* plugins */ + false, + time.Second*5) // Assert if err != nil { diff --git a/pkg/controller/volume/attachdetach/reconciler/reconciler.go b/pkg/controller/volume/attachdetach/reconciler/reconciler.go index b12bd113eb3..5bce52f92d6 100644 --- a/pkg/controller/volume/attachdetach/reconciler/reconciler.go +++ b/pkg/controller/volume/attachdetach/reconciler/reconciler.go @@ -57,6 +57,7 @@ func NewReconciler( loopPeriod time.Duration, maxWaitForUnmountDuration time.Duration, syncDuration time.Duration, + disableReconciliationSync bool, desiredStateOfWorld cache.DesiredStateOfWorld, actualStateOfWorld cache.ActualStateOfWorld, attacherDetacher operationexecutor.OperationExecutor, @@ -65,6 +66,7 @@ func NewReconciler( loopPeriod: loopPeriod, maxWaitForUnmountDuration: maxWaitForUnmountDuration, syncDuration: syncDuration, + disableReconciliationSync: disableReconciliationSync, desiredStateOfWorld: desiredStateOfWorld, actualStateOfWorld: actualStateOfWorld, attacherDetacher: attacherDetacher, @@ -82,18 +84,27 @@ type reconciler struct { attacherDetacher operationexecutor.OperationExecutor nodeStatusUpdater statusupdater.NodeStatusUpdater timeOfLastSync time.Time + disableReconciliationSync bool } func (rc *reconciler) Run(stopCh <-chan struct{}) { wait.Until(rc.reconciliationLoopFunc(), rc.loopPeriod, stopCh) } +// reconciliationLoopFunc this can be disabled via cli option disableReconciliation. +// It periodically checks whether the attached volumes from actual state +// are still attached to the node and udpate the status if they are not. func (rc *reconciler) reconciliationLoopFunc() func() { return func() { + rc.reconcile() - // reconciler periodically checks whether the attached volumes from actual state - // are still attached to the node and udpate the status if they are not. - if time.Since(rc.timeOfLastSync) > rc.syncDuration { + + if rc.disableReconciliationSync { + glog.V(5).Info("Skipping reconciling attached volumes still attached since it is disabled via the command line.") + } else if rc.syncDuration < time.Second { + glog.V(5).Info("Skipping reconciling attached volumes still attached since it is set to less than one second via the command line.") + } else if time.Since(rc.timeOfLastSync) > rc.syncDuration { + glog.V(5).Info("Starting reconciling attached volumes still attached") rc.sync() } } diff --git a/pkg/controller/volume/attachdetach/reconciler/reconciler_test.go b/pkg/controller/volume/attachdetach/reconciler/reconciler_test.go index fa3729bc9cb..69f4ad235c7 100644 --- a/pkg/controller/volume/attachdetach/reconciler/reconciler_test.go +++ b/pkg/controller/volume/attachdetach/reconciler/reconciler_test.go @@ -55,7 +55,7 @@ func Test_Run_Positive_DoNothing(t *testing.T) { nsu := statusupdater.NewNodeStatusUpdater( fakeKubeClient, nodeInformer, asw) reconciler := NewReconciler( - reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, dsw, asw, ad, nsu) + reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu) // Act ch := make(chan struct{}) @@ -83,7 +83,7 @@ func Test_Run_Positive_OneDesiredVolumeAttach(t *testing.T) { ad := operationexecutor.NewOperationExecutor(operationexecutor.NewOperationGenerator(fakeKubeClient, volumePluginMgr, fakeRecorder, false /* checkNodeCapabilitiesBeforeMount */)) nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */) reconciler := NewReconciler( - reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, dsw, asw, ad, nsu) + reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu) podName := "pod-uid" volumeName := v1.UniqueVolumeName("volume-name") volumeSpec := controllervolumetesting.GetTestVolumeSpec(string(volumeName), volumeName) @@ -129,7 +129,7 @@ func Test_Run_Positive_OneDesiredVolumeAttachThenDetachWithUnmountedVolume(t *te ad := operationexecutor.NewOperationExecutor(operationexecutor.NewOperationGenerator(fakeKubeClient, volumePluginMgr, fakeRecorder, false /* checkNodeCapabilitiesBeforeMount */)) nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */) reconciler := NewReconciler( - reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, dsw, asw, ad, nsu) + reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu) podName := "pod-uid" volumeName := v1.UniqueVolumeName("volume-name") volumeSpec := controllervolumetesting.GetTestVolumeSpec(string(volumeName), volumeName) @@ -196,7 +196,7 @@ func Test_Run_Positive_OneDesiredVolumeAttachThenDetachWithMountedVolume(t *test ad := operationexecutor.NewOperationExecutor(operationexecutor.NewOperationGenerator(fakeKubeClient, volumePluginMgr, fakeRecorder, false /* checkNodeCapabilitiesBeforeMount */)) nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */) reconciler := NewReconciler( - reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, dsw, asw, ad, nsu) + reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu) podName := "pod-uid" volumeName := v1.UniqueVolumeName("volume-name") volumeSpec := controllervolumetesting.GetTestVolumeSpec(string(volumeName), volumeName) @@ -263,7 +263,7 @@ func Test_Run_Negative_OneDesiredVolumeAttachThenDetachWithUnmountedVolumeUpdate ad := operationexecutor.NewOperationExecutor(operationexecutor.NewOperationGenerator(fakeKubeClient, volumePluginMgr, fakeRecorder, false /* checkNodeCapabilitiesBeforeMount */)) nsu := statusupdater.NewFakeNodeStatusUpdater(true /* returnError */) reconciler := NewReconciler( - reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, dsw, asw, ad, nsu) + reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu) podName := "pod-uid" volumeName := v1.UniqueVolumeName("volume-name") volumeSpec := controllervolumetesting.GetTestVolumeSpec(string(volumeName), volumeName) diff --git a/test/integration/volume/attach_detach_test.go b/test/integration/volume/attach_detach_test.go index eb266bd24cd..34fa8f94582 100644 --- a/test/integration/volume/attach_detach_test.go +++ b/test/integration/volume/attach_detach_test.go @@ -195,7 +195,7 @@ func createAdClients(ns *v1.Namespace, t *testing.T, server *httptest.Server, sy nodeInformer := informers.NewNodeInformer(clientset.NewForConfigOrDie(restclient.AddUserAgent(&config, "node-informer")), resyncPeriod) pvcInformer := informers.NewNodeInformer(clientset.NewForConfigOrDie(restclient.AddUserAgent(&config, "pvc-informer")), resyncPeriod) pvInformer := informers.NewNodeInformer(clientset.NewForConfigOrDie(restclient.AddUserAgent(&config, "pv-informer")), resyncPeriod) - ctrl, err := attachdetach.NewAttachDetachController(testClient, podInformer, nodeInformer, pvcInformer, pvInformer, cloud, plugins) + ctrl, err := attachdetach.NewAttachDetachController(testClient, podInformer, nodeInformer, pvcInformer, pvInformer, cloud, plugins, false, time.Second*5) if err != nil { t.Fatalf("Error creating AttachDetach : %v", err) }