The capability to control duration via controller-manager flags,

and the option to shut off reconciliation.
This commit is contained in:
chrislovecnm 2017-01-06 15:24:51 -07:00
parent de59ede6b2
commit a973c38c7d
9 changed files with 58 additions and 25 deletions

View File

@ -408,6 +408,10 @@ func StartControllers(controllers map[string]InitFunc, s *options.CMServer, root
go volumeController.Run(stop)
time.Sleep(wait.Jitter(s.ControllerStartInterval.Duration, ControllerStartJitter))
if s.ReconcilerSyncLoopPeriod.Duration < time.Second {
return fmt.Errorf("Duration time must be greater than one second as set via command line option reconcile-sync-loop-period. One minute is recommended.")
}
attachDetachController, attachDetachControllerErr :=
attachdetach.NewAttachDetachController(
clientBuilder.ClientOrDie("attachdetach-controller"),
@ -416,7 +420,10 @@ func StartControllers(controllers map[string]InitFunc, s *options.CMServer, root
sharedInformers.PersistentVolumeClaims().Informer(),
sharedInformers.PersistentVolumes().Informer(),
cloud,
ProbeAttachableVolumePlugins(s.VolumeConfiguration))
ProbeAttachableVolumePlugins(s.VolumeConfiguration),
s.DisableAttachDetachReconcilerSync,
s.ReconcilerSyncLoopPeriod.Duration,
)
if attachDetachControllerErr != nil {
return fmt.Errorf("failed to start attach/detach controller: %v", attachDetachControllerErr)
}

View File

@ -95,6 +95,7 @@ func NewCMServer() *CMServer {
ConcurrentGCSyncs: 20,
ClusterSigningCertFile: "/etc/kubernetes/ca/ca.pem",
ClusterSigningKeyFile: "/etc/kubernetes/ca/ca.key",
ReconcilerSyncLoopPeriod: metav1.Duration{Duration: 5 * time.Second},
},
}
s.LeaderElection.LeaderElect = true
@ -181,6 +182,8 @@ func (s *CMServer) AddFlags(fs *pflag.FlagSet) {
fs.Float32Var(&s.SecondaryNodeEvictionRate, "secondary-node-eviction-rate", 0.01, "Number of nodes per second on which pods are deleted in case of node failure when a zone is unhealthy (see --unhealthy-zone-threshold for definition of healthy/unhealthy). Zone refers to entire cluster in non-multizone clusters. This value is implicitly overridden to 0 if the cluster size is smaller than --large-cluster-size-threshold.")
fs.Int32Var(&s.LargeClusterSizeThreshold, "large-cluster-size-threshold", 50, "Number of nodes from which NodeController treats the cluster as large for the eviction logic purposes. --secondary-node-eviction-rate is implicitly overridden to 0 for clusters this size or smaller.")
fs.Float32Var(&s.UnhealthyZoneThreshold, "unhealthy-zone-threshold", 0.55, "Fraction of Nodes in a zone which needs to be not Ready (minimum 3) for zone to be treated as unhealthy. ")
fs.BoolVar(&s.DisableAttachDetachReconcilerSync, "disable-attach-detach-reconcile", false, "Disable volume attach detach reconciler sync. Disabling this may cause volumes to be mismatched with pods. Use wisely.")
fs.DurationVar(&s.ReconcilerSyncLoopPeriod.Duration, "attach-detach-reconcile-period", s.ReconcilerSyncLoopPeriod.Duration, "The reconciler sync wait time between volume attach detach. This duration must be larger than one second, and increasing this value from the default my allow for volume mismatches.")
leaderelection.BindFlags(&s.LeaderElection, fs)
config.DefaultFeatureGate.AddFlag(fs)

View File

@ -635,3 +635,5 @@ garbage-collector-enabled
viper-config
log-lines-total
run-duration
disable-attach-detach-reconcile
attach-detach-reconcile-period

View File

@ -772,6 +772,13 @@ type KubeControllerManagerConfiguration struct {
// Zone is treated as unhealthy in nodeEvictionRate and secondaryNodeEvictionRate when at least
// unhealthyZoneThreshold (no less than 3) of Nodes in the zone are NotReady
UnhealthyZoneThreshold float32
// Reconciler runs a periodic loop to reconcile the desired state of the with
// the actual state of the world by triggering attach detach operations.
// This flag enables or disables reconcile. Is false by default, and thus enabled.
DisableAttachDetachReconcilerSync bool
// ReconcilerSyncLoopPeriod is the amount of time the reconciler sync states loop
// wait between successive executions. Is set to 5 min by default.
ReconcilerSyncLoopPeriod metav1.Duration
}
// VolumeConfiguration contains *all* enumerated flags meant to configure all volume

View File

@ -58,10 +58,6 @@ const (
// desiredStateOfWorldPopulatorLoopSleepPeriod is the amount of time the
// DesiredStateOfWorldPopulator loop waits between successive executions
desiredStateOfWorldPopulatorLoopSleepPeriod time.Duration = 1 * time.Minute
// reconcilerSyncDuration is the amount of time the reconciler sync states loop
// wait between successive executions
reconcilerSyncDuration time.Duration = 5 * time.Second
)
// AttachDetachController defines the operations supported by this controller.
@ -78,7 +74,9 @@ func NewAttachDetachController(
pvcInformer kcache.SharedInformer,
pvInformer kcache.SharedInformer,
cloud cloudprovider.Interface,
plugins []volume.VolumePlugin) (AttachDetachController, error) {
plugins []volume.VolumePlugin,
disableReconciliation bool,
reconcilerSyncDuration time.Duration) (AttachDetachController, error) {
// TODO: The default resyncPeriod for shared informers is 12 hours, this is
// unacceptable for the attach/detach controller. For example, if a pod is
// skipped because the node it is scheduled to didn't set its annotation in
@ -131,10 +129,13 @@ func NewAttachDetachController(
false)) // flag for experimental binary check for volume mount
adc.nodeStatusUpdater = statusupdater.NewNodeStatusUpdater(
kubeClient, nodeInformer, adc.actualStateOfWorld)
// Default these to values in options
adc.reconciler = reconciler.NewReconciler(
reconcilerLoopPeriod,
reconcilerMaxWaitForUnmountDuration,
reconcilerSyncDuration,
disableReconciliation,
adc.desiredStateOfWorld,
adc.actualStateOfWorld,
adc.attacherDetacher,

View File

@ -41,7 +41,9 @@ func Test_NewAttachDetachController_Positive(t *testing.T) {
pvcInformer,
pvInformer,
nil, /* cloud */
nil /* plugins */)
nil, /* plugins */
false,
time.Second*5)
// Assert
if err != nil {

View File

@ -57,6 +57,7 @@ func NewReconciler(
loopPeriod time.Duration,
maxWaitForUnmountDuration time.Duration,
syncDuration time.Duration,
disableReconciliation bool,
desiredStateOfWorld cache.DesiredStateOfWorld,
actualStateOfWorld cache.ActualStateOfWorld,
attacherDetacher operationexecutor.OperationExecutor,
@ -65,6 +66,7 @@ func NewReconciler(
loopPeriod: loopPeriod,
maxWaitForUnmountDuration: maxWaitForUnmountDuration,
syncDuration: syncDuration,
disableReconciliation: disableReconciliation,
desiredStateOfWorld: desiredStateOfWorld,
actualStateOfWorld: actualStateOfWorld,
attacherDetacher: attacherDetacher,
@ -82,18 +84,27 @@ type reconciler struct {
attacherDetacher operationexecutor.OperationExecutor
nodeStatusUpdater statusupdater.NodeStatusUpdater
timeOfLastSync time.Time
disableReconciliation bool
}
func (rc *reconciler) Run(stopCh <-chan struct{}) {
wait.Until(rc.reconciliationLoopFunc(), rc.loopPeriod, stopCh)
}
// reconciliationLoopFunc this can be disabled via cli option disableReconciliation.
// It periodically checks whether the attached volumes from actual state
// are still attached to the node and udpate the status if they are not.
func (rc *reconciler) reconciliationLoopFunc() func() {
return func() {
rc.reconcile()
// reconciler periodically checks whether the attached volumes from actual state
// are still attached to the node and udpate the status if they are not.
if time.Since(rc.timeOfLastSync) > rc.syncDuration {
if rc.disableReconciliation {
glog.V(5).Info("Skipping reconciling attached volumes still attached since it is disabled via the command line.")
} else if rc.syncDuration < time.Second {
glog.V(5).Info("Skipping reconciling attached volumes still attached since it is set to less than one second via the command line.")
} else if time.Since(rc.timeOfLastSync) > rc.syncDuration {
glog.V(5).Info("Starting reconciling attached volumes still attached")
rc.sync()
}
}

View File

@ -55,7 +55,7 @@ func Test_Run_Positive_DoNothing(t *testing.T) {
nsu := statusupdater.NewNodeStatusUpdater(
fakeKubeClient, nodeInformer, asw)
reconciler := NewReconciler(
reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, dsw, asw, ad, nsu)
reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu)
// Act
ch := make(chan struct{})
@ -83,7 +83,7 @@ func Test_Run_Positive_OneDesiredVolumeAttach(t *testing.T) {
ad := operationexecutor.NewOperationExecutor(operationexecutor.NewOperationGenerator(fakeKubeClient, volumePluginMgr, fakeRecorder, false /* checkNodeCapabilitiesBeforeMount */))
nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */)
reconciler := NewReconciler(
reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, dsw, asw, ad, nsu)
reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu)
podName := "pod-uid"
volumeName := v1.UniqueVolumeName("volume-name")
volumeSpec := controllervolumetesting.GetTestVolumeSpec(string(volumeName), volumeName)
@ -129,7 +129,7 @@ func Test_Run_Positive_OneDesiredVolumeAttachThenDetachWithUnmountedVolume(t *te
ad := operationexecutor.NewOperationExecutor(operationexecutor.NewOperationGenerator(fakeKubeClient, volumePluginMgr, fakeRecorder, false /* checkNodeCapabilitiesBeforeMount */))
nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */)
reconciler := NewReconciler(
reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, dsw, asw, ad, nsu)
reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu)
podName := "pod-uid"
volumeName := v1.UniqueVolumeName("volume-name")
volumeSpec := controllervolumetesting.GetTestVolumeSpec(string(volumeName), volumeName)
@ -196,7 +196,7 @@ func Test_Run_Positive_OneDesiredVolumeAttachThenDetachWithMountedVolume(t *test
ad := operationexecutor.NewOperationExecutor(operationexecutor.NewOperationGenerator(fakeKubeClient, volumePluginMgr, fakeRecorder, false /* checkNodeCapabilitiesBeforeMount */))
nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */)
reconciler := NewReconciler(
reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, dsw, asw, ad, nsu)
reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu)
podName := "pod-uid"
volumeName := v1.UniqueVolumeName("volume-name")
volumeSpec := controllervolumetesting.GetTestVolumeSpec(string(volumeName), volumeName)
@ -263,7 +263,7 @@ func Test_Run_Negative_OneDesiredVolumeAttachThenDetachWithUnmountedVolumeUpdate
ad := operationexecutor.NewOperationExecutor(operationexecutor.NewOperationGenerator(fakeKubeClient, volumePluginMgr, fakeRecorder, false /* checkNodeCapabilitiesBeforeMount */))
nsu := statusupdater.NewFakeNodeStatusUpdater(true /* returnError */)
reconciler := NewReconciler(
reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, dsw, asw, ad, nsu)
reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu)
podName := "pod-uid"
volumeName := v1.UniqueVolumeName("volume-name")
volumeSpec := controllervolumetesting.GetTestVolumeSpec(string(volumeName), volumeName)

View File

@ -195,7 +195,7 @@ func createAdClients(ns *v1.Namespace, t *testing.T, server *httptest.Server, sy
nodeInformer := informers.NewNodeInformer(clientset.NewForConfigOrDie(restclient.AddUserAgent(&config, "node-informer")), resyncPeriod)
pvcInformer := informers.NewNodeInformer(clientset.NewForConfigOrDie(restclient.AddUserAgent(&config, "pvc-informer")), resyncPeriod)
pvInformer := informers.NewNodeInformer(clientset.NewForConfigOrDie(restclient.AddUserAgent(&config, "pv-informer")), resyncPeriod)
ctrl, err := attachdetach.NewAttachDetachController(testClient, podInformer, nodeInformer, pvcInformer, pvInformer, cloud, plugins)
ctrl, err := attachdetach.NewAttachDetachController(testClient, podInformer, nodeInformer, pvcInformer, pvInformer, cloud, plugins, false, time.Second*5)
if err != nil {
t.Fatalf("Error creating AttachDetach : %v", err)
}