Merge pull request #39551 from chrislovecnm/reconciler-time-increases

Automatic merge from submit-queue (batch tested with PRs 39628, 39551, 38746, 38352, 39607)

Increasing times on reconciling volumes fixing impact to AWS.

#**What this PR does / why we need it**:

We are currently blocked by API timeouts with PV volumes.  See https://github.com/kubernetes/kubernetes/issues/39526.  This is a workaround, not a fix.

**Special notes for your reviewer**:

A second PR will be dropped with CLI cobra options in it, but we are starting with increasing the reconciliation periods.  I am dropping this without major testing and will test on our AWS account. Will be marked WIP until I run smoke tests.

**Release note**:

```release-note
Provide kubernetes-controller-manager flags to control volume attach/detach reconciler sync.  The duration of the syncs can be controlled, and the syncs can be shut off as well. 
```
This commit is contained in:
Kubernetes Submit Queue 2017-01-10 11:54:15 -08:00 committed by GitHub
commit 7c3fff1a95
9 changed files with 58 additions and 25 deletions

View File

@ -408,6 +408,10 @@ func StartControllers(controllers map[string]InitFunc, s *options.CMServer, root
go volumeController.Run(stop)
time.Sleep(wait.Jitter(s.ControllerStartInterval.Duration, ControllerStartJitter))
if s.ReconcilerSyncLoopPeriod.Duration < time.Second {
return fmt.Errorf("Duration time must be greater than one second as set via command line option reconcile-sync-loop-period.")
}
attachDetachController, attachDetachControllerErr :=
attachdetach.NewAttachDetachController(
clientBuilder.ClientOrDie("attachdetach-controller"),
@ -416,7 +420,10 @@ func StartControllers(controllers map[string]InitFunc, s *options.CMServer, root
sharedInformers.PersistentVolumeClaims().Informer(),
sharedInformers.PersistentVolumes().Informer(),
cloud,
ProbeAttachableVolumePlugins(s.VolumeConfiguration))
ProbeAttachableVolumePlugins(s.VolumeConfiguration),
s.DisableAttachDetachReconcilerSync,
s.ReconcilerSyncLoopPeriod.Duration,
)
if attachDetachControllerErr != nil {
return fmt.Errorf("failed to start attach/detach controller: %v", attachDetachControllerErr)
}

View File

@ -86,15 +86,16 @@ func NewCMServer() *CMServer {
},
FlexVolumePluginDir: "/usr/libexec/kubernetes/kubelet-plugins/volume/exec/",
},
ContentType: "application/vnd.kubernetes.protobuf",
KubeAPIQPS: 20.0,
KubeAPIBurst: 30,
LeaderElection: leaderelection.DefaultLeaderElectionConfiguration(),
ControllerStartInterval: metav1.Duration{Duration: 0 * time.Second},
EnableGarbageCollector: true,
ConcurrentGCSyncs: 20,
ClusterSigningCertFile: "/etc/kubernetes/ca/ca.pem",
ClusterSigningKeyFile: "/etc/kubernetes/ca/ca.key",
ContentType: "application/vnd.kubernetes.protobuf",
KubeAPIQPS: 20.0,
KubeAPIBurst: 30,
LeaderElection: leaderelection.DefaultLeaderElectionConfiguration(),
ControllerStartInterval: metav1.Duration{Duration: 0 * time.Second},
EnableGarbageCollector: true,
ConcurrentGCSyncs: 20,
ClusterSigningCertFile: "/etc/kubernetes/ca/ca.pem",
ClusterSigningKeyFile: "/etc/kubernetes/ca/ca.key",
ReconcilerSyncLoopPeriod: metav1.Duration{Duration: 5 * time.Second},
},
}
s.LeaderElection.LeaderElect = true
@ -181,6 +182,8 @@ func (s *CMServer) AddFlags(fs *pflag.FlagSet) {
fs.Float32Var(&s.SecondaryNodeEvictionRate, "secondary-node-eviction-rate", 0.01, "Number of nodes per second on which pods are deleted in case of node failure when a zone is unhealthy (see --unhealthy-zone-threshold for definition of healthy/unhealthy). Zone refers to entire cluster in non-multizone clusters. This value is implicitly overridden to 0 if the cluster size is smaller than --large-cluster-size-threshold.")
fs.Int32Var(&s.LargeClusterSizeThreshold, "large-cluster-size-threshold", 50, "Number of nodes from which NodeController treats the cluster as large for the eviction logic purposes. --secondary-node-eviction-rate is implicitly overridden to 0 for clusters this size or smaller.")
fs.Float32Var(&s.UnhealthyZoneThreshold, "unhealthy-zone-threshold", 0.55, "Fraction of Nodes in a zone which needs to be not Ready (minimum 3) for zone to be treated as unhealthy. ")
fs.BoolVar(&s.DisableAttachDetachReconcilerSync, "disable-attach-detach-reconcile-sync", false, "Disable volume attach detach reconciler sync. Disabling this may cause volumes to be mismatched with pods. Use wisely.")
fs.DurationVar(&s.ReconcilerSyncLoopPeriod.Duration, "attach-detach-reconcile-sync-period", s.ReconcilerSyncLoopPeriod.Duration, "The reconciler sync wait time between volume attach detach. This duration must be larger than one second, and increasing this value from the default may allow for volumes to be mismatched with pods.")
leaderelection.BindFlags(&s.LeaderElection, fs)
config.DefaultFeatureGate.AddFlag(fs)

View File

@ -635,3 +635,5 @@ garbage-collector-enabled
viper-config
log-lines-total
run-duration
attach-detach-reconcile-sync-period
disable-attach-detach-reconcile-sync

View File

@ -772,6 +772,13 @@ type KubeControllerManagerConfiguration struct {
// Zone is treated as unhealthy in nodeEvictionRate and secondaryNodeEvictionRate when at least
// unhealthyZoneThreshold (no less than 3) of Nodes in the zone are NotReady
UnhealthyZoneThreshold float32
// Reconciler runs a periodic loop to reconcile the desired state of the with
// the actual state of the world by triggering attach detach operations.
// This flag enables or disables reconcile. Is false by default, and thus enabled.
DisableAttachDetachReconcilerSync bool
// ReconcilerSyncLoopPeriod is the amount of time the reconciler sync states loop
// wait between successive executions. Is set to 5 sec by default.
ReconcilerSyncLoopPeriod metav1.Duration
}
// VolumeConfiguration contains *all* enumerated flags meant to configure all volume

View File

@ -58,10 +58,6 @@ const (
// desiredStateOfWorldPopulatorLoopSleepPeriod is the amount of time the
// DesiredStateOfWorldPopulator loop waits between successive executions
desiredStateOfWorldPopulatorLoopSleepPeriod time.Duration = 1 * time.Minute
// reconcilerSyncDuration is the amount of time the reconciler sync states loop
// wait between successive executions
reconcilerSyncDuration time.Duration = 5 * time.Second
)
// AttachDetachController defines the operations supported by this controller.
@ -78,7 +74,9 @@ func NewAttachDetachController(
pvcInformer kcache.SharedInformer,
pvInformer kcache.SharedInformer,
cloud cloudprovider.Interface,
plugins []volume.VolumePlugin) (AttachDetachController, error) {
plugins []volume.VolumePlugin,
disableReconciliationSync bool,
reconcilerSyncDuration time.Duration) (AttachDetachController, error) {
// TODO: The default resyncPeriod for shared informers is 12 hours, this is
// unacceptable for the attach/detach controller. For example, if a pod is
// skipped because the node it is scheduled to didn't set its annotation in
@ -131,10 +129,13 @@ func NewAttachDetachController(
false)) // flag for experimental binary check for volume mount
adc.nodeStatusUpdater = statusupdater.NewNodeStatusUpdater(
kubeClient, nodeInformer, adc.actualStateOfWorld)
// Default these to values in options
adc.reconciler = reconciler.NewReconciler(
reconcilerLoopPeriod,
reconcilerMaxWaitForUnmountDuration,
reconcilerSyncDuration,
disableReconciliationSync,
adc.desiredStateOfWorld,
adc.actualStateOfWorld,
adc.attacherDetacher,

View File

@ -41,7 +41,9 @@ func Test_NewAttachDetachController_Positive(t *testing.T) {
pvcInformer,
pvInformer,
nil, /* cloud */
nil /* plugins */)
nil, /* plugins */
false,
time.Second*5)
// Assert
if err != nil {

View File

@ -57,6 +57,7 @@ func NewReconciler(
loopPeriod time.Duration,
maxWaitForUnmountDuration time.Duration,
syncDuration time.Duration,
disableReconciliationSync bool,
desiredStateOfWorld cache.DesiredStateOfWorld,
actualStateOfWorld cache.ActualStateOfWorld,
attacherDetacher operationexecutor.OperationExecutor,
@ -65,6 +66,7 @@ func NewReconciler(
loopPeriod: loopPeriod,
maxWaitForUnmountDuration: maxWaitForUnmountDuration,
syncDuration: syncDuration,
disableReconciliationSync: disableReconciliationSync,
desiredStateOfWorld: desiredStateOfWorld,
actualStateOfWorld: actualStateOfWorld,
attacherDetacher: attacherDetacher,
@ -82,18 +84,27 @@ type reconciler struct {
attacherDetacher operationexecutor.OperationExecutor
nodeStatusUpdater statusupdater.NodeStatusUpdater
timeOfLastSync time.Time
disableReconciliationSync bool
}
func (rc *reconciler) Run(stopCh <-chan struct{}) {
wait.Until(rc.reconciliationLoopFunc(), rc.loopPeriod, stopCh)
}
// reconciliationLoopFunc this can be disabled via cli option disableReconciliation.
// It periodically checks whether the attached volumes from actual state
// are still attached to the node and udpate the status if they are not.
func (rc *reconciler) reconciliationLoopFunc() func() {
return func() {
rc.reconcile()
// reconciler periodically checks whether the attached volumes from actual state
// are still attached to the node and udpate the status if they are not.
if time.Since(rc.timeOfLastSync) > rc.syncDuration {
if rc.disableReconciliationSync {
glog.V(5).Info("Skipping reconciling attached volumes still attached since it is disabled via the command line.")
} else if rc.syncDuration < time.Second {
glog.V(5).Info("Skipping reconciling attached volumes still attached since it is set to less than one second via the command line.")
} else if time.Since(rc.timeOfLastSync) > rc.syncDuration {
glog.V(5).Info("Starting reconciling attached volumes still attached")
rc.sync()
}
}

View File

@ -55,7 +55,7 @@ func Test_Run_Positive_DoNothing(t *testing.T) {
nsu := statusupdater.NewNodeStatusUpdater(
fakeKubeClient, nodeInformer, asw)
reconciler := NewReconciler(
reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, dsw, asw, ad, nsu)
reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu)
// Act
ch := make(chan struct{})
@ -83,7 +83,7 @@ func Test_Run_Positive_OneDesiredVolumeAttach(t *testing.T) {
ad := operationexecutor.NewOperationExecutor(operationexecutor.NewOperationGenerator(fakeKubeClient, volumePluginMgr, fakeRecorder, false /* checkNodeCapabilitiesBeforeMount */))
nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */)
reconciler := NewReconciler(
reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, dsw, asw, ad, nsu)
reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu)
podName := "pod-uid"
volumeName := v1.UniqueVolumeName("volume-name")
volumeSpec := controllervolumetesting.GetTestVolumeSpec(string(volumeName), volumeName)
@ -129,7 +129,7 @@ func Test_Run_Positive_OneDesiredVolumeAttachThenDetachWithUnmountedVolume(t *te
ad := operationexecutor.NewOperationExecutor(operationexecutor.NewOperationGenerator(fakeKubeClient, volumePluginMgr, fakeRecorder, false /* checkNodeCapabilitiesBeforeMount */))
nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */)
reconciler := NewReconciler(
reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, dsw, asw, ad, nsu)
reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu)
podName := "pod-uid"
volumeName := v1.UniqueVolumeName("volume-name")
volumeSpec := controllervolumetesting.GetTestVolumeSpec(string(volumeName), volumeName)
@ -196,7 +196,7 @@ func Test_Run_Positive_OneDesiredVolumeAttachThenDetachWithMountedVolume(t *test
ad := operationexecutor.NewOperationExecutor(operationexecutor.NewOperationGenerator(fakeKubeClient, volumePluginMgr, fakeRecorder, false /* checkNodeCapabilitiesBeforeMount */))
nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */)
reconciler := NewReconciler(
reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, dsw, asw, ad, nsu)
reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu)
podName := "pod-uid"
volumeName := v1.UniqueVolumeName("volume-name")
volumeSpec := controllervolumetesting.GetTestVolumeSpec(string(volumeName), volumeName)
@ -263,7 +263,7 @@ func Test_Run_Negative_OneDesiredVolumeAttachThenDetachWithUnmountedVolumeUpdate
ad := operationexecutor.NewOperationExecutor(operationexecutor.NewOperationGenerator(fakeKubeClient, volumePluginMgr, fakeRecorder, false /* checkNodeCapabilitiesBeforeMount */))
nsu := statusupdater.NewFakeNodeStatusUpdater(true /* returnError */)
reconciler := NewReconciler(
reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, dsw, asw, ad, nsu)
reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, dsw, asw, ad, nsu)
podName := "pod-uid"
volumeName := v1.UniqueVolumeName("volume-name")
volumeSpec := controllervolumetesting.GetTestVolumeSpec(string(volumeName), volumeName)

View File

@ -195,7 +195,7 @@ func createAdClients(ns *v1.Namespace, t *testing.T, server *httptest.Server, sy
nodeInformer := informers.NewNodeInformer(clientset.NewForConfigOrDie(restclient.AddUserAgent(&config, "node-informer")), resyncPeriod)
pvcInformer := informers.NewNodeInformer(clientset.NewForConfigOrDie(restclient.AddUserAgent(&config, "pvc-informer")), resyncPeriod)
pvInformer := informers.NewNodeInformer(clientset.NewForConfigOrDie(restclient.AddUserAgent(&config, "pv-informer")), resyncPeriod)
ctrl, err := attachdetach.NewAttachDetachController(testClient, podInformer, nodeInformer, pvcInformer, pvInformer, cloud, plugins)
ctrl, err := attachdetach.NewAttachDetachController(testClient, podInformer, nodeInformer, pvcInformer, pvInformer, cloud, plugins, false, time.Second*5)
if err != nil {
t.Fatalf("Error creating AttachDetach : %v", err)
}