Merge pull request #39551 from chrislovecnm/reconciler-time-increases

Automatic merge from submit-queue (batch tested with PRs 39628, 39551, 38746, 38352, 39607) Increasing times on reconciling volumes fixing impact to AWS. #**What this PR does / why we need it**: We are currently blocked by API timeouts with PV volumes. See https://github.com/kubernetes/kubernetes/issues/39526. This is a workaround, not a fix. **Special notes for your reviewer**: A second PR will be dropped with CLI cobra options in it, but we are starting with increasing the reconciliation periods. I am dropping this without major testing and will test on our AWS account. Will be marked WIP until I run smoke tests. **Release note**: ```release-note Provide kubernetes-controller-manager flags to control volume attach/detach reconciler sync. The duration of the syncs can be controlled, and the syncs can be shut off as well. ```
2025-08-30 15:02:23 +00:00 · 2017-01-10 11:54:15 -08:00 · 2017-01-10 11:54:15 -08:00 · 7c3fff1a95
commit 7c3fff1a95
parent fb256b2911 ac49139c9f
9 changed files with 58 additions and 25 deletions
--- a/cmd/kube-controller-manager/app/controllermanager.go
+++ b/cmd/kube-controller-manager/app/controllermanager.go
@ -408,6 +408,10 @@ func StartControllers(controllers map[string]InitFunc, s *options.CMServer, root
 	go volumeController.Run(stop)
 	time.Sleep(wait.Jitter(s.ControllerStartInterval.Duration, ControllerStartJitter))

+	if s.ReconcilerSyncLoopPeriod.Duration < time.Second {
+		return fmt.Errorf("Duration time must be greater than one second as set via command line option reconcile-sync-loop-period.")
+	}
+
 	attachDetachController, attachDetachControllerErr :=
 		attachdetach.NewAttachDetachController(
 			clientBuilder.ClientOrDie("attachdetach-controller"),
@ -416,7 +420,10 @@ func StartControllers(controllers map[string]InitFunc, s *options.CMServer, root
 			sharedInformers.PersistentVolumeClaims().Informer(),
 			sharedInformers.PersistentVolumes().Informer(),
 			cloud,
-			ProbeAttachableVolumePlugins(s.VolumeConfiguration))
+			ProbeAttachableVolumePlugins(s.VolumeConfiguration),
+			s.DisableAttachDetachReconcilerSync,
+			s.ReconcilerSyncLoopPeriod.Duration,
+		)
 	if attachDetachControllerErr != nil {
 		return fmt.Errorf("failed to start attach/detach controller: %v", attachDetachControllerErr)
 	}
--- a/cmd/kube-controller-manager/app/options/options.go
+++ b/cmd/kube-controller-manager/app/options/options.go
@ -86,15 +86,16 @@ func NewCMServer() *CMServer {
 				},
 				FlexVolumePluginDir: "/usr/libexec/kubernetes/kubelet-plugins/volume/exec/",
 			},
-			ContentType:             "application/vnd.kubernetes.protobuf",
-			KubeAPIQPS:              20.0,
-			KubeAPIBurst:            30,
-			LeaderElection:          leaderelection.DefaultLeaderElectionConfiguration(),
-			ControllerStartInterval: metav1.Duration{Duration: 0 * time.Second},
-			EnableGarbageCollector:  true,
-			ConcurrentGCSyncs:       20,
-			ClusterSigningCertFile:  "/etc/kubernetes/ca/ca.pem",
-			ClusterSigningKeyFile:   "/etc/kubernetes/ca/ca.key",
+			ContentType:              "application/vnd.kubernetes.protobuf",
+			KubeAPIQPS:               20.0,
+			KubeAPIBurst:             30,
+			LeaderElection:           leaderelection.DefaultLeaderElectionConfiguration(),
+			ControllerStartInterval:  metav1.Duration{Duration: 0 * time.Second},
+			EnableGarbageCollector:   true,
+			ConcurrentGCSyncs:        20,
+			ClusterSigningCertFile:   "/etc/kubernetes/ca/ca.pem",
+			ClusterSigningKeyFile:    "/etc/kubernetes/ca/ca.key",
+			ReconcilerSyncLoopPeriod: metav1.Duration{Duration: 5 * time.Second},
 		},
 	}
 	s.LeaderElection.LeaderElect = true
@ -181,6 +182,8 @@ func (s *CMServer) AddFlags(fs *pflag.FlagSet) {
 	fs.Float32Var(&s.SecondaryNodeEvictionRate, "secondary-node-eviction-rate", 0.01, "Number of nodes per second on which pods are deleted in case of node failure when a zone is unhealthy (see --unhealthy-zone-threshold for definition of healthy/unhealthy). Zone refers to entire cluster in non-multizone clusters. This value is implicitly overridden to 0 if the cluster size is smaller than --large-cluster-size-threshold.")
 	fs.Int32Var(&s.LargeClusterSizeThreshold, "large-cluster-size-threshold", 50, "Number of nodes from which NodeController treats the cluster as large for the eviction logic purposes. --secondary-node-eviction-rate is implicitly overridden to 0 for clusters this size or smaller.")
 	fs.Float32Var(&s.UnhealthyZoneThreshold, "unhealthy-zone-threshold", 0.55, "Fraction of Nodes in a zone which needs to be not Ready (minimum 3) for zone to be treated as unhealthy. ")
+	fs.BoolVar(&s.DisableAttachDetachReconcilerSync, "disable-attach-detach-reconcile-sync", false, "Disable volume attach detach reconciler sync. Disabling this may cause volumes to be mismatched with pods. Use wisely.")
+	fs.DurationVar(&s.ReconcilerSyncLoopPeriod.Duration, "attach-detach-reconcile-sync-period", s.ReconcilerSyncLoopPeriod.Duration, "The reconciler sync wait time between volume attach detach. This duration must be larger than one second, and increasing this value from the default may allow for volumes to be mismatched with pods.")

 	leaderelection.BindFlags(&s.LeaderElection, fs)
 	config.DefaultFeatureGate.AddFlag(fs)
--- a/hack/verify-flags/known-flags.txt
+++ b/hack/verify-flags/known-flags.txt
@ -635,3 +635,5 @@ garbage-collector-enabled
 viper-config
 log-lines-total
 run-duration
+attach-detach-reconcile-sync-period
+disable-attach-detach-reconcile-sync
--- a/pkg/apis/componentconfig/types.go
+++ b/pkg/apis/componentconfig/types.go
@ -772,6 +772,13 @@ type KubeControllerManagerConfiguration struct {
 	// Zone is treated as unhealthy in nodeEvictionRate and secondaryNodeEvictionRate when at least
 	// unhealthyZoneThreshold (no less than 3) of Nodes in the zone are NotReady
 	UnhealthyZoneThreshold float32
+	// Reconciler runs a periodic loop to reconcile the desired state of the with
+	// the actual state of the world by triggering attach detach operations.
+	// This flag enables or disables reconcile.  Is false by default, and thus enabled.
+	DisableAttachDetachReconcilerSync bool
+	// ReconcilerSyncLoopPeriod is the amount of time the reconciler sync states loop
+	// wait between successive executions. Is set to 5 sec by default.
+	ReconcilerSyncLoopPeriod metav1.Duration
 }

 // VolumeConfiguration contains *all* enumerated flags meant to configure all volume
--- a/pkg/controller/volume/attachdetach/attach_detach_controller.go
+++ b/pkg/controller/volume/attachdetach/attach_detach_controller.go
@ -58,10 +58,6 @@ const (
 	// desiredStateOfWorldPopulatorLoopSleepPeriod is the amount of time the
 	// DesiredStateOfWorldPopulator loop waits between successive executions
 	desiredStateOfWorldPopulatorLoopSleepPeriod time.Duration = 1 * time.Minute
-
-	// reconcilerSyncDuration is the amount of time the reconciler sync states loop
-	// wait between successive executions
-	reconcilerSyncDuration time.Duration = 5 * time.Second
 )

 // AttachDetachController defines the operations supported by this controller.
@ -78,7 +74,9 @@ func NewAttachDetachController(
 	pvcInformer kcache.SharedInformer,
 	pvInformer kcache.SharedInformer,
 	cloud cloudprovider.Interface,
-	plugins []volume.VolumePlugin) (AttachDetachController, error) {
+	plugins []volume.VolumePlugin,
+	disableReconciliationSync bool,
+	reconcilerSyncDuration time.Duration) (AttachDetachController, error) {
 	// TODO: The default resyncPeriod for shared informers is 12 hours, this is
 	// unacceptable for the attach/detach controller. For example, if a pod is
 	// skipped because the node it is scheduled to didn't set its annotation in
@ -131,10 +129,13 @@ func NewAttachDetachController(
 			false)) // flag for experimental binary check for volume mount
 	adc.nodeStatusUpdater = statusupdater.NewNodeStatusUpdater(
 		kubeClient, nodeInformer, adc.actualStateOfWorld)
+
+	// Default these to values in options
 	adc.reconciler = reconciler.NewReconciler(
 		reconcilerLoopPeriod,
 		reconcilerMaxWaitForUnmountDuration,
 		reconcilerSyncDuration,
+		disableReconciliationSync,
 		adc.desiredStateOfWorld,
 		adc.actualStateOfWorld,
 		adc.attacherDetacher,
--- a/pkg/controller/volume/attachdetach/attach_detach_controller_test.go
+++ b/pkg/controller/volume/attachdetach/attach_detach_controller_test.go
@ -41,7 +41,9 @@ func Test_NewAttachDetachController_Positive(t *testing.T) {
 		pvcInformer,
 		pvInformer,
 		nil, /* cloud */
-		nil /* plugins */)
+		nil, /* plugins */
+		false,
+		time.Second*5)

 	// Assert
 	if err != nil {
--- a/pkg/controller/volume/attachdetach/reconciler/reconciler.go
+++ b/pkg/controller/volume/attachdetach/reconciler/reconciler.go
@ -57,6 +57,7 @@ func NewReconciler(
 	loopPeriod time.Duration,
 	maxWaitForUnmountDuration time.Duration,
 	syncDuration time.Duration,
+	disableReconciliationSync bool,
 	desiredStateOfWorld cache.DesiredStateOfWorld,
 	actualStateOfWorld cache.ActualStateOfWorld,
 	attacherDetacher operationexecutor.OperationExecutor,
@ -65,6 +66,7 @@ func NewReconciler(
 		loopPeriod:                loopPeriod,
 		maxWaitForUnmountDuration: maxWaitForUnmountDuration,
 		syncDuration:              syncDuration,
+		disableReconciliationSync: disableReconciliationSync,
 		desiredStateOfWorld:       desiredStateOfWorld,
 		actualStateOfWorld:        actualStateOfWorld,
 		attacherDetacher:          attacherDetacher,
@ -82,18 +84,27 @@ type reconciler struct {
 	attacherDetacher          operationexecutor.OperationExecutor
 	nodeStatusUpdater         statusupdater.NodeStatusUpdater
 	timeOfLastSync            time.Time
+	disableReconciliationSync bool
 }

 func (rc *reconciler) Run(stopCh <-chan struct{}) {
 	wait.Until(rc.reconciliationLoopFunc(), rc.loopPeriod, stopCh)
 }

+// reconciliationLoopFunc this can be disabled via cli option disableReconciliation.
+// It periodically checks whether the attached volumes from actual state
+// are still attached to the node and udpate the status if they are not.
 func (rc *reconciler) reconciliationLoopFunc() func() {
 	return func() {
+
 		rc.reconcile()
-		// reconciler periodically checks whether the attached volumes from actual state
-		// are still attached to the node and udpate the status if they are not.
-		if time.Since(rc.timeOfLastSync) > rc.syncDuration {
+
+		if rc.disableReconciliationSync {
+			glog.V(5).Info("Skipping reconciling attached volumes still attached since it is disabled via the command line.")
+		} else if rc.syncDuration < time.Second {
+			glog.V(5).Info("Skipping reconciling attached volumes still attached since it is set to less than one second via the command line.")
+		} else if time.Since(rc.timeOfLastSync) > rc.syncDuration {
+			glog.V(5).Info("Starting reconciling attached volumes still attached")
 			rc.sync()
 		}
 	}
--- a/pkg/controller/volume/attachdetach/reconciler/reconciler_test.go
+++ b/pkg/controller/volume/attachdetach/reconciler/reconciler_test.go
@ -55,7 +55,7 @@ func Test_Run_Positive_DoNothing(t *testing.T) {
 	nsu := statusupdater.NewNodeStatusUpdater(
 		fakeKubeClient, nodeInformer, asw)
 	reconciler := NewReconciler(
-		reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, dsw, asw, ad, nsu)
+		reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu)

 	// Act
 	ch := make(chan struct{})
@ -83,7 +83,7 @@ func Test_Run_Positive_OneDesiredVolumeAttach(t *testing.T) {
 	ad := operationexecutor.NewOperationExecutor(operationexecutor.NewOperationGenerator(fakeKubeClient, volumePluginMgr, fakeRecorder, false /* checkNodeCapabilitiesBeforeMount */))
 	nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */)
 	reconciler := NewReconciler(
-		reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, dsw, asw, ad, nsu)
+		reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu)
 	podName := "pod-uid"
 	volumeName := v1.UniqueVolumeName("volume-name")
 	volumeSpec := controllervolumetesting.GetTestVolumeSpec(string(volumeName), volumeName)
@ -129,7 +129,7 @@ func Test_Run_Positive_OneDesiredVolumeAttachThenDetachWithUnmountedVolume(t *te
 	ad := operationexecutor.NewOperationExecutor(operationexecutor.NewOperationGenerator(fakeKubeClient, volumePluginMgr, fakeRecorder, false /* checkNodeCapabilitiesBeforeMount */))
 	nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */)
 	reconciler := NewReconciler(
-		reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, dsw, asw, ad, nsu)
+		reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu)
 	podName := "pod-uid"
 	volumeName := v1.UniqueVolumeName("volume-name")
 	volumeSpec := controllervolumetesting.GetTestVolumeSpec(string(volumeName), volumeName)
@ -196,7 +196,7 @@ func Test_Run_Positive_OneDesiredVolumeAttachThenDetachWithMountedVolume(t *test
 	ad := operationexecutor.NewOperationExecutor(operationexecutor.NewOperationGenerator(fakeKubeClient, volumePluginMgr, fakeRecorder, false /* checkNodeCapabilitiesBeforeMount */))
 	nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */)
 	reconciler := NewReconciler(
-		reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, dsw, asw, ad, nsu)
+		reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu)
 	podName := "pod-uid"
 	volumeName := v1.UniqueVolumeName("volume-name")
 	volumeSpec := controllervolumetesting.GetTestVolumeSpec(string(volumeName), volumeName)
@ -263,7 +263,7 @@ func Test_Run_Negative_OneDesiredVolumeAttachThenDetachWithUnmountedVolumeUpdate
 	ad := operationexecutor.NewOperationExecutor(operationexecutor.NewOperationGenerator(fakeKubeClient, volumePluginMgr, fakeRecorder, false /* checkNodeCapabilitiesBeforeMount */))
 	nsu := statusupdater.NewFakeNodeStatusUpdater(true /* returnError */)
 	reconciler := NewReconciler(
-		reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, dsw, asw, ad, nsu)
+		reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu)
 	podName := "pod-uid"
 	volumeName := v1.UniqueVolumeName("volume-name")
 	volumeSpec := controllervolumetesting.GetTestVolumeSpec(string(volumeName), volumeName)
--- a/test/integration/volume/attach_detach_test.go
+++ b/test/integration/volume/attach_detach_test.go
@ -195,7 +195,7 @@ func createAdClients(ns *v1.Namespace, t *testing.T, server *httptest.Server, sy
 	nodeInformer := informers.NewNodeInformer(clientset.NewForConfigOrDie(restclient.AddUserAgent(&config, "node-informer")), resyncPeriod)
 	pvcInformer := informers.NewNodeInformer(clientset.NewForConfigOrDie(restclient.AddUserAgent(&config, "pvc-informer")), resyncPeriod)
 	pvInformer := informers.NewNodeInformer(clientset.NewForConfigOrDie(restclient.AddUserAgent(&config, "pv-informer")), resyncPeriod)
-	ctrl, err := attachdetach.NewAttachDetachController(testClient, podInformer, nodeInformer, pvcInformer, pvInformer, cloud, plugins)
+	ctrl, err := attachdetach.NewAttachDetachController(testClient, podInformer, nodeInformer, pvcInformer, pvInformer, cloud, plugins, false, time.Second*5)
 	if err != nil {
 		t.Fatalf("Error creating AttachDetach : %v", err)
 	}