diff --git a/cmd/kube-controller-manager/app/controllermanager.go b/cmd/kube-controller-manager/app/controllermanager.go index d829beb1759..baf3dbffcb3 100644 --- a/cmd/kube-controller-manager/app/controllermanager.go +++ b/cmd/kube-controller-manager/app/controllermanager.go @@ -408,6 +408,10 @@ func StartControllers(controllers map[string]InitFunc, s *options.CMServer, root go volumeController.Run(stop) time.Sleep(wait.Jitter(s.ControllerStartInterval.Duration, ControllerStartJitter)) + if s.ReconcilerSyncLoopPeriod.Duration < time.Second { + return fmt.Errorf("Duration time must be greater than one second as set via command line option reconcile-sync-loop-period. One minute is recommended.") + } + attachDetachController, attachDetachControllerErr := attachdetach.NewAttachDetachController( clientBuilder.ClientOrDie("attachdetach-controller"), @@ -416,7 +420,10 @@ func StartControllers(controllers map[string]InitFunc, s *options.CMServer, root sharedInformers.PersistentVolumeClaims().Informer(), sharedInformers.PersistentVolumes().Informer(), cloud, - ProbeAttachableVolumePlugins(s.VolumeConfiguration)) + ProbeAttachableVolumePlugins(s.VolumeConfiguration), + s.DisableAttachDetachReconcilerSync, + s.ReconcilerSyncLoopPeriod.Duration, + ) if attachDetachControllerErr != nil { return fmt.Errorf("failed to start attach/detach controller: %v", attachDetachControllerErr) } diff --git a/cmd/kube-controller-manager/app/options/options.go b/cmd/kube-controller-manager/app/options/options.go index 3609c3abd31..5c18f02062c 100644 --- a/cmd/kube-controller-manager/app/options/options.go +++ b/cmd/kube-controller-manager/app/options/options.go @@ -86,15 +86,16 @@ func NewCMServer() *CMServer { }, FlexVolumePluginDir: "/usr/libexec/kubernetes/kubelet-plugins/volume/exec/", }, - ContentType: "application/vnd.kubernetes.protobuf", - KubeAPIQPS: 20.0, - KubeAPIBurst: 30, - LeaderElection: leaderelection.DefaultLeaderElectionConfiguration(), - ControllerStartInterval: metav1.Duration{Duration: 0 * time.Second}, - EnableGarbageCollector: true, - ConcurrentGCSyncs: 20, - ClusterSigningCertFile: "/etc/kubernetes/ca/ca.pem", - ClusterSigningKeyFile: "/etc/kubernetes/ca/ca.key", + ContentType: "application/vnd.kubernetes.protobuf", + KubeAPIQPS: 20.0, + KubeAPIBurst: 30, + LeaderElection: leaderelection.DefaultLeaderElectionConfiguration(), + ControllerStartInterval: metav1.Duration{Duration: 0 * time.Second}, + EnableGarbageCollector: true, + ConcurrentGCSyncs: 20, + ClusterSigningCertFile: "/etc/kubernetes/ca/ca.pem", + ClusterSigningKeyFile: "/etc/kubernetes/ca/ca.key", + ReconcilerSyncLoopPeriod: metav1.Duration{Duration: 5 * time.Second}, }, } s.LeaderElection.LeaderElect = true @@ -181,6 +182,8 @@ func (s *CMServer) AddFlags(fs *pflag.FlagSet) { fs.Float32Var(&s.SecondaryNodeEvictionRate, "secondary-node-eviction-rate", 0.01, "Number of nodes per second on which pods are deleted in case of node failure when a zone is unhealthy (see --unhealthy-zone-threshold for definition of healthy/unhealthy). Zone refers to entire cluster in non-multizone clusters. This value is implicitly overridden to 0 if the cluster size is smaller than --large-cluster-size-threshold.") fs.Int32Var(&s.LargeClusterSizeThreshold, "large-cluster-size-threshold", 50, "Number of nodes from which NodeController treats the cluster as large for the eviction logic purposes. --secondary-node-eviction-rate is implicitly overridden to 0 for clusters this size or smaller.") fs.Float32Var(&s.UnhealthyZoneThreshold, "unhealthy-zone-threshold", 0.55, "Fraction of Nodes in a zone which needs to be not Ready (minimum 3) for zone to be treated as unhealthy. ") + fs.BoolVar(&s.DisableAttachDetachReconcilerSync, "disable-attach-detach-reconcile", false, "Disable volume attach detach reconciler sync. Disabling this may cause volumes to be mismatched with pods. Use wisely.") + fs.DurationVar(&s.ReconcilerSyncLoopPeriod.Duration, "attach-detach-reconcile-period", s.ReconcilerSyncLoopPeriod.Duration, "The reconciler sync wait time between volume attach detach. This duration must be larger than one second, and increasing this value from the default my allow for volume mismatches.") leaderelection.BindFlags(&s.LeaderElection, fs) config.DefaultFeatureGate.AddFlag(fs) diff --git a/hack/verify-flags/known-flags.txt b/hack/verify-flags/known-flags.txt index 3b25ad6c185..9a9ecf6b191 100644 --- a/hack/verify-flags/known-flags.txt +++ b/hack/verify-flags/known-flags.txt @@ -635,3 +635,5 @@ garbage-collector-enabled viper-config log-lines-total run-duration +disable-attach-detach-reconcile +attach-detach-reconcile-period diff --git a/pkg/apis/componentconfig/types.go b/pkg/apis/componentconfig/types.go index 56dad915739..ad2a0993a87 100644 --- a/pkg/apis/componentconfig/types.go +++ b/pkg/apis/componentconfig/types.go @@ -772,6 +772,13 @@ type KubeControllerManagerConfiguration struct { // Zone is treated as unhealthy in nodeEvictionRate and secondaryNodeEvictionRate when at least // unhealthyZoneThreshold (no less than 3) of Nodes in the zone are NotReady UnhealthyZoneThreshold float32 + // Reconciler runs a periodic loop to reconcile the desired state of the with + // the actual state of the world by triggering attach detach operations. + // This flag enables or disables reconcile. Is false by default, and thus enabled. + DisableAttachDetachReconcilerSync bool + // ReconcilerSyncLoopPeriod is the amount of time the reconciler sync states loop + // wait between successive executions. Is set to 5 min by default. + ReconcilerSyncLoopPeriod metav1.Duration } // VolumeConfiguration contains *all* enumerated flags meant to configure all volume diff --git a/pkg/controller/volume/attachdetach/attach_detach_controller.go b/pkg/controller/volume/attachdetach/attach_detach_controller.go index 94a26d353a9..948c8cc7006 100644 --- a/pkg/controller/volume/attachdetach/attach_detach_controller.go +++ b/pkg/controller/volume/attachdetach/attach_detach_controller.go @@ -58,10 +58,6 @@ const ( // desiredStateOfWorldPopulatorLoopSleepPeriod is the amount of time the // DesiredStateOfWorldPopulator loop waits between successive executions desiredStateOfWorldPopulatorLoopSleepPeriod time.Duration = 1 * time.Minute - - // reconcilerSyncDuration is the amount of time the reconciler sync states loop - // wait between successive executions - reconcilerSyncDuration time.Duration = 5 * time.Second ) // AttachDetachController defines the operations supported by this controller. @@ -78,7 +74,9 @@ func NewAttachDetachController( pvcInformer kcache.SharedInformer, pvInformer kcache.SharedInformer, cloud cloudprovider.Interface, - plugins []volume.VolumePlugin) (AttachDetachController, error) { + plugins []volume.VolumePlugin, + disableReconciliation bool, + reconcilerSyncDuration time.Duration) (AttachDetachController, error) { // TODO: The default resyncPeriod for shared informers is 12 hours, this is // unacceptable for the attach/detach controller. For example, if a pod is // skipped because the node it is scheduled to didn't set its annotation in @@ -131,10 +129,13 @@ func NewAttachDetachController( false)) // flag for experimental binary check for volume mount adc.nodeStatusUpdater = statusupdater.NewNodeStatusUpdater( kubeClient, nodeInformer, adc.actualStateOfWorld) + + // Default these to values in options adc.reconciler = reconciler.NewReconciler( reconcilerLoopPeriod, reconcilerMaxWaitForUnmountDuration, reconcilerSyncDuration, + disableReconciliation, adc.desiredStateOfWorld, adc.actualStateOfWorld, adc.attacherDetacher, diff --git a/pkg/controller/volume/attachdetach/attach_detach_controller_test.go b/pkg/controller/volume/attachdetach/attach_detach_controller_test.go index c301acd2b66..defe9132c5a 100644 --- a/pkg/controller/volume/attachdetach/attach_detach_controller_test.go +++ b/pkg/controller/volume/attachdetach/attach_detach_controller_test.go @@ -41,7 +41,9 @@ func Test_NewAttachDetachController_Positive(t *testing.T) { pvcInformer, pvInformer, nil, /* cloud */ - nil /* plugins */) + nil, /* plugins */ + false, + time.Second*5) // Assert if err != nil { diff --git a/pkg/controller/volume/attachdetach/reconciler/reconciler.go b/pkg/controller/volume/attachdetach/reconciler/reconciler.go index b12bd113eb3..6537603f163 100644 --- a/pkg/controller/volume/attachdetach/reconciler/reconciler.go +++ b/pkg/controller/volume/attachdetach/reconciler/reconciler.go @@ -57,6 +57,7 @@ func NewReconciler( loopPeriod time.Duration, maxWaitForUnmountDuration time.Duration, syncDuration time.Duration, + disableReconciliation bool, desiredStateOfWorld cache.DesiredStateOfWorld, actualStateOfWorld cache.ActualStateOfWorld, attacherDetacher operationexecutor.OperationExecutor, @@ -65,6 +66,7 @@ func NewReconciler( loopPeriod: loopPeriod, maxWaitForUnmountDuration: maxWaitForUnmountDuration, syncDuration: syncDuration, + disableReconciliation: disableReconciliation, desiredStateOfWorld: desiredStateOfWorld, actualStateOfWorld: actualStateOfWorld, attacherDetacher: attacherDetacher, @@ -82,18 +84,27 @@ type reconciler struct { attacherDetacher operationexecutor.OperationExecutor nodeStatusUpdater statusupdater.NodeStatusUpdater timeOfLastSync time.Time + disableReconciliation bool } func (rc *reconciler) Run(stopCh <-chan struct{}) { wait.Until(rc.reconciliationLoopFunc(), rc.loopPeriod, stopCh) } +// reconciliationLoopFunc this can be disabled via cli option disableReconciliation. +// It periodically checks whether the attached volumes from actual state +// are still attached to the node and udpate the status if they are not. func (rc *reconciler) reconciliationLoopFunc() func() { return func() { + rc.reconcile() - // reconciler periodically checks whether the attached volumes from actual state - // are still attached to the node and udpate the status if they are not. - if time.Since(rc.timeOfLastSync) > rc.syncDuration { + + if rc.disableReconciliation { + glog.V(5).Info("Skipping reconciling attached volumes still attached since it is disabled via the command line.") + } else if rc.syncDuration < time.Second { + glog.V(5).Info("Skipping reconciling attached volumes still attached since it is set to less than one second via the command line.") + } else if time.Since(rc.timeOfLastSync) > rc.syncDuration { + glog.V(5).Info("Starting reconciling attached volumes still attached") rc.sync() } } diff --git a/pkg/controller/volume/attachdetach/reconciler/reconciler_test.go b/pkg/controller/volume/attachdetach/reconciler/reconciler_test.go index fa3729bc9cb..69f4ad235c7 100644 --- a/pkg/controller/volume/attachdetach/reconciler/reconciler_test.go +++ b/pkg/controller/volume/attachdetach/reconciler/reconciler_test.go @@ -55,7 +55,7 @@ func Test_Run_Positive_DoNothing(t *testing.T) { nsu := statusupdater.NewNodeStatusUpdater( fakeKubeClient, nodeInformer, asw) reconciler := NewReconciler( - reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, dsw, asw, ad, nsu) + reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu) // Act ch := make(chan struct{}) @@ -83,7 +83,7 @@ func Test_Run_Positive_OneDesiredVolumeAttach(t *testing.T) { ad := operationexecutor.NewOperationExecutor(operationexecutor.NewOperationGenerator(fakeKubeClient, volumePluginMgr, fakeRecorder, false /* checkNodeCapabilitiesBeforeMount */)) nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */) reconciler := NewReconciler( - reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, dsw, asw, ad, nsu) + reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu) podName := "pod-uid" volumeName := v1.UniqueVolumeName("volume-name") volumeSpec := controllervolumetesting.GetTestVolumeSpec(string(volumeName), volumeName) @@ -129,7 +129,7 @@ func Test_Run_Positive_OneDesiredVolumeAttachThenDetachWithUnmountedVolume(t *te ad := operationexecutor.NewOperationExecutor(operationexecutor.NewOperationGenerator(fakeKubeClient, volumePluginMgr, fakeRecorder, false /* checkNodeCapabilitiesBeforeMount */)) nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */) reconciler := NewReconciler( - reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, dsw, asw, ad, nsu) + reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu) podName := "pod-uid" volumeName := v1.UniqueVolumeName("volume-name") volumeSpec := controllervolumetesting.GetTestVolumeSpec(string(volumeName), volumeName) @@ -196,7 +196,7 @@ func Test_Run_Positive_OneDesiredVolumeAttachThenDetachWithMountedVolume(t *test ad := operationexecutor.NewOperationExecutor(operationexecutor.NewOperationGenerator(fakeKubeClient, volumePluginMgr, fakeRecorder, false /* checkNodeCapabilitiesBeforeMount */)) nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */) reconciler := NewReconciler( - reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, dsw, asw, ad, nsu) + reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu) podName := "pod-uid" volumeName := v1.UniqueVolumeName("volume-name") volumeSpec := controllervolumetesting.GetTestVolumeSpec(string(volumeName), volumeName) @@ -263,7 +263,7 @@ func Test_Run_Negative_OneDesiredVolumeAttachThenDetachWithUnmountedVolumeUpdate ad := operationexecutor.NewOperationExecutor(operationexecutor.NewOperationGenerator(fakeKubeClient, volumePluginMgr, fakeRecorder, false /* checkNodeCapabilitiesBeforeMount */)) nsu := statusupdater.NewFakeNodeStatusUpdater(true /* returnError */) reconciler := NewReconciler( - reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, dsw, asw, ad, nsu) + reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu) podName := "pod-uid" volumeName := v1.UniqueVolumeName("volume-name") volumeSpec := controllervolumetesting.GetTestVolumeSpec(string(volumeName), volumeName) diff --git a/test/integration/volume/attach_detach_test.go b/test/integration/volume/attach_detach_test.go index eb266bd24cd..34fa8f94582 100644 --- a/test/integration/volume/attach_detach_test.go +++ b/test/integration/volume/attach_detach_test.go @@ -195,7 +195,7 @@ func createAdClients(ns *v1.Namespace, t *testing.T, server *httptest.Server, sy nodeInformer := informers.NewNodeInformer(clientset.NewForConfigOrDie(restclient.AddUserAgent(&config, "node-informer")), resyncPeriod) pvcInformer := informers.NewNodeInformer(clientset.NewForConfigOrDie(restclient.AddUserAgent(&config, "pvc-informer")), resyncPeriod) pvInformer := informers.NewNodeInformer(clientset.NewForConfigOrDie(restclient.AddUserAgent(&config, "pv-informer")), resyncPeriod) - ctrl, err := attachdetach.NewAttachDetachController(testClient, podInformer, nodeInformer, pvcInformer, pvInformer, cloud, plugins) + ctrl, err := attachdetach.NewAttachDetachController(testClient, podInformer, nodeInformer, pvcInformer, pvInformer, cloud, plugins, false, time.Second*5) if err != nil { t.Fatalf("Error creating AttachDetach : %v", err) }