From 68ebe29529b861cfdd075f76bad5643c58ece70f Mon Sep 17 00:00:00 2001 From: Indeed Date: Tue, 2 Mar 2021 15:29:22 -0800 Subject: [PATCH 1/6] fix leader migration options not applied to kube-controller-manager or cloud-controller-manager --- cmd/kube-controller-manager/app/options/options_test.go | 2 ++ staging/src/k8s.io/cloud-provider/options/options_test.go | 3 +++ staging/src/k8s.io/controller-manager/options/generic.go | 2 +- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/cmd/kube-controller-manager/app/options/options_test.go b/cmd/kube-controller-manager/app/options/options_test.go index 83b85e3d25a..5b65358b479 100644 --- a/cmd/kube-controller-manager/app/options/options_test.go +++ b/cmd/kube-controller-manager/app/options/options_test.go @@ -35,6 +35,7 @@ import ( "k8s.io/component-base/metrics" cmconfig "k8s.io/controller-manager/config" cmoptions "k8s.io/controller-manager/options" + migration "k8s.io/controller-manager/pkg/leadermigration/options" kubecontrollerconfig "k8s.io/kubernetes/cmd/kube-controller-manager/app/config" kubectrlmgrconfig "k8s.io/kubernetes/pkg/controller/apis/config" csrsigningconfig "k8s.io/kubernetes/pkg/controller/certificates/signer/config" @@ -197,6 +198,7 @@ func TestAddFlags(t *testing.T) { EnableContentionProfiling: true, }, }, + LeaderMigration: &migration.LeaderMigrationOptions{}, }, KubeCloudShared: &cpoptions.KubeCloudSharedOptions{ KubeCloudSharedConfiguration: &cpconfig.KubeCloudSharedConfiguration{ diff --git a/staging/src/k8s.io/cloud-provider/options/options_test.go b/staging/src/k8s.io/cloud-provider/options/options_test.go index 0a7ace3aab8..d63169b6c79 100644 --- a/staging/src/k8s.io/cloud-provider/options/options_test.go +++ b/staging/src/k8s.io/cloud-provider/options/options_test.go @@ -31,6 +31,7 @@ import ( componentbaseconfig "k8s.io/component-base/config" cmconfig "k8s.io/controller-manager/config" cmoptions "k8s.io/controller-manager/options" + migration "k8s.io/controller-manager/pkg/leadermigration/options" ) func TestDefaultFlags(t *testing.T) { @@ -65,6 +66,7 @@ func TestDefaultFlags(t *testing.T) { EnableContentionProfiling: false, }, }, + LeaderMigration: &migration.LeaderMigrationOptions{}, }, KubeCloudShared: &KubeCloudSharedOptions{ KubeCloudSharedConfiguration: &cpconfig.KubeCloudSharedConfiguration{ @@ -203,6 +205,7 @@ func TestAddFlags(t *testing.T) { EnableContentionProfiling: true, }, }, + LeaderMigration: &migration.LeaderMigrationOptions{}, }, KubeCloudShared: &KubeCloudSharedOptions{ KubeCloudSharedConfiguration: &cpconfig.KubeCloudSharedConfiguration{ diff --git a/staging/src/k8s.io/controller-manager/options/generic.go b/staging/src/k8s.io/controller-manager/options/generic.go index a22369d63b5..bb7e8c7d411 100644 --- a/staging/src/k8s.io/controller-manager/options/generic.go +++ b/staging/src/k8s.io/controller-manager/options/generic.go @@ -42,7 +42,7 @@ func NewGenericControllerManagerConfigurationOptions(cfg *cmconfig.GenericContro o := &GenericControllerManagerConfigurationOptions{ GenericControllerManagerConfiguration: cfg, Debugging: RecommendedDebuggingOptions(), - LeaderMigration: nil, + LeaderMigration: &migration.LeaderMigrationOptions{}, } return o From 721b1822d6f2c35b75684eb9835af937cf91ae38 Mon Sep 17 00:00:00 2001 From: Indeed Date: Wed, 3 Mar 2021 18:08:45 -0800 Subject: [PATCH 2/6] implementation of leader migration. --- .../app/controllermanager.go | 187 +++++++++++++----- .../cloud-provider/app/controllermanager.go | 134 ++++++++++--- .../pkg/leadermigration/migrator.go | 67 +++++++ .../pkg/leadermigration/util.go | 24 +++ 4 files changed, 340 insertions(+), 72 deletions(-) create mode 100644 staging/src/k8s.io/controller-manager/pkg/leadermigration/migrator.go create mode 100644 staging/src/k8s.io/controller-manager/pkg/leadermigration/util.go diff --git a/cmd/kube-controller-manager/app/controllermanager.go b/cmd/kube-controller-manager/app/controllermanager.go index e1455969de0..22ab003ade3 100644 --- a/cmd/kube-controller-manager/app/controllermanager.go +++ b/cmd/kube-controller-manager/app/controllermanager.go @@ -61,6 +61,7 @@ import ( genericcontrollermanager "k8s.io/controller-manager/app" "k8s.io/controller-manager/pkg/clientbuilder" "k8s.io/controller-manager/pkg/informerfactory" + "k8s.io/controller-manager/pkg/leadermigration" "k8s.io/klog/v2" "k8s.io/kubernetes/cmd/kube-controller-manager/app/config" "k8s.io/kubernetes/cmd/kube-controller-manager/app/options" @@ -207,32 +208,17 @@ func Run(c *config.CompletedConfig, stopCh <-chan struct{}) error { } } - run := func(ctx context.Context) { - rootClientBuilder := clientbuilder.SimpleControllerClientBuilder{ - ClientConfig: c.Kubeconfig, - } - var clientBuilder clientbuilder.ControllerClientBuilder - if c.ComponentConfig.KubeCloudShared.UseServiceAccountCredentials { - if len(c.ComponentConfig.SAController.ServiceAccountKeyFile) == 0 { - // It's possible another controller process is creating the tokens for us. - // If one isn't, we'll timeout and exit when our client builder is unable to create the tokens. - klog.Warningf("--use-service-account-credentials was specified without providing a --service-account-private-key-file") - } + clientBuilder, rootClientBuilder := createClientBuilders(c) - clientBuilder = clientbuilder.NewDynamicClientBuilder( - restclient.AnonymousClientConfig(c.Kubeconfig), - c.Client.CoreV1(), - metav1.NamespaceSystem) - } else { - clientBuilder = rootClientBuilder - } + saTokenControllerInitFunc := serviceAccountTokenControllerStarter{rootClientBuilder: rootClientBuilder}.startServiceAccountTokenController + + run := func(ctx context.Context, startSATokenController InitFunc, filterFunc leadermigration.FilterFunc) { controllerContext, err := CreateControllerContext(c, rootClientBuilder, clientBuilder, ctx.Done()) if err != nil { klog.Fatalf("error building controller context: %v", err) } - saTokenControllerInitFunc := serviceAccountTokenControllerStarter{rootClientBuilder: rootClientBuilder}.startServiceAccountTokenController - - if err := StartControllers(controllerContext, saTokenControllerInitFunc, NewControllerInitializers(controllerContext.LoopMode), unsecuredMux); err != nil { + controllerInitializers := filterInitializers(NewControllerInitializers(controllerContext.LoopMode), filterFunc) + if err := StartControllers(controllerContext, startSATokenController, controllerInitializers, unsecuredMux); err != nil { klog.Fatalf("error starting controllers: %v", err) } @@ -243,8 +229,9 @@ func Run(c *config.CompletedConfig, stopCh <-chan struct{}) error { select {} } + // No leader election, run directly if !c.ComponentConfig.Generic.LeaderElection.LeaderElect { - run(context.TODO()) + run(context.TODO(), saTokenControllerInitFunc, nil) panic("unreachable") } @@ -256,33 +243,102 @@ func Run(c *config.CompletedConfig, stopCh <-chan struct{}) error { // add a uniquifier so that two processes on the same host don't accidentally both become active id = id + "_" + string(uuid.NewUUID()) - rl, err := resourcelock.NewFromKubeconfig(c.ComponentConfig.Generic.LeaderElection.ResourceLock, - c.ComponentConfig.Generic.LeaderElection.ResourceNamespace, - c.ComponentConfig.Generic.LeaderElection.ResourceName, - resourcelock.ResourceLockConfig{ - Identity: id, - EventRecorder: c.EventRecorder, - }, - c.Kubeconfig, - c.ComponentConfig.Generic.LeaderElection.RenewDeadline.Duration) - if err != nil { - klog.Fatalf("error creating lock: %v", err) + // leaderElectAndRun runs the leader election, and runs the callbacks once the leader lease is acquired. + leaderElectAndRun := func(resourceLock string, leaseName string, callbacks leaderelection.LeaderCallbacks) { + rl, err := resourcelock.NewFromKubeconfig(resourceLock, + c.ComponentConfig.Generic.LeaderElection.ResourceNamespace, + leaseName, + resourcelock.ResourceLockConfig{ + Identity: id, + EventRecorder: c.EventRecorder, + }, + c.Kubeconfig, + c.ComponentConfig.Generic.LeaderElection.RenewDeadline.Duration) + if err != nil { + klog.Fatalf("error creating lock: %v", err) + } + + leaderelection.RunOrDie(context.TODO(), leaderelection.LeaderElectionConfig{ + Lock: rl, + LeaseDuration: c.ComponentConfig.Generic.LeaderElection.LeaseDuration.Duration, + RenewDeadline: c.ComponentConfig.Generic.LeaderElection.RenewDeadline.Duration, + RetryPeriod: c.ComponentConfig.Generic.LeaderElection.RetryPeriod.Duration, + Callbacks: callbacks, + WatchDog: electionChecker, + Name: leaseName, + }) } - leaderelection.RunOrDie(context.TODO(), leaderelection.LeaderElectionConfig{ - Lock: rl, - LeaseDuration: c.ComponentConfig.Generic.LeaderElection.LeaseDuration.Duration, - RenewDeadline: c.ComponentConfig.Generic.LeaderElection.RenewDeadline.Duration, - RetryPeriod: c.ComponentConfig.Generic.LeaderElection.RetryPeriod.Duration, - Callbacks: leaderelection.LeaderCallbacks{ - OnStartedLeading: run, + // If leader migration is enabled, use the redirected initialization + // Check feature gate and configuration separately so that any error in configuration checking will not + // affect the result if the feature is not enabled. + if leadermigration.FeatureEnabled() && leadermigration.Enabled(&c.ComponentConfig.Generic) { + klog.Infof("starting leader migration") + + leaderMigrator := leadermigration.NewLeaderMigrator(&c.ComponentConfig.Generic.LeaderMigration, + "kube-controller-manager") + + // We need a channel to signal the start of Service Account Token Controller + // all migrated channel must start afterwards. + saTokenControllerStarted := make(chan struct{}) + + // Wrap saTokenControllerInitFunc to close the channel after returning. + wrappedSATokenControllerInitFunc := func(ctx ControllerContext) (http.Handler, bool, error) { + defer close(saTokenControllerStarted) + return saTokenControllerInitFunc(ctx) + } + + // Start the main lock, using LeaderElectionConfiguration for the type and name of the lease. + go leaderElectAndRun( + c.ComponentConfig.Generic.LeaderElection.ResourceLock, + c.ComponentConfig.Generic.LeaderElection.ResourceName, + leaderelection.LeaderCallbacks{ + OnStartedLeading: func(ctx context.Context) { + klog.Info("leader migration: starting main controllers.") + // saTokenController should only run under the main lock + run(ctx, wrappedSATokenControllerInitFunc, leaderMigrator.FilterFunc(false)) + }, + OnStoppedLeading: func() { + klog.Fatalf("leaderelection lost") + }, + }) + + // Wait for Service Account Token Controller to start before acquiring the migration lock. + // At this point, the main lock must have already been acquired, or the KCM process already exited. + // We wait for the main lock before acquiring the migration lock to prevent the situation + // where KCM instance A holds the main lock while KCM instance B holds the migration lock. + <-saTokenControllerStarted + + // Start the migration lock, using LeaderMigrationConfiguration for the type and name of the lease. + leaderElectAndRun( + c.ComponentConfig.Generic.LeaderMigration.ResourceLock, + c.ComponentConfig.Generic.LeaderMigration.LeaderName, + leaderelection.LeaderCallbacks{ + OnStartedLeading: func(ctx context.Context) { + klog.Info("leader migration: starting migrated controllers.") + // DO NOT start saTokenController under migration lock (passing nil) + run(ctx, nil, leaderMigrator.FilterFunc(true)) + }, + OnStoppedLeading: func() { + klog.Fatalf("migration leaderelection lost") + }, + }) + panic("unreachable") + } // end of leader migration + + // Normal leader election, without leader migration + leaderElectAndRun( + c.ComponentConfig.Generic.LeaderElection.ResourceLock, + c.ComponentConfig.Generic.LeaderElection.ResourceName, + leaderelection.LeaderCallbacks{ + OnStartedLeading: func(ctx context.Context) { + run(ctx, saTokenControllerInitFunc, nil) + }, OnStoppedLeading: func() { klog.Fatalf("leaderelection lost") }, - }, - WatchDog: electionChecker, - Name: "kube-controller-manager", - }) + }) + panic("unreachable") } @@ -504,8 +560,10 @@ func CreateControllerContext(s *config.CompletedConfig, rootClientBuilder, clien func StartControllers(ctx ControllerContext, startSATokenController InitFunc, controllers map[string]InitFunc, unsecuredMux *mux.PathRecorderMux) error { // Always start the SA token controller first using a full-power client, since it needs to mint tokens for the rest // If this fails, just return here and fail since other controllers won't be able to get credentials. - if _, _, err := startSATokenController(ctx); err != nil { - return err + if startSATokenController != nil { + if _, _, err := startSATokenController(ctx); err != nil { + return err + } } // Initialize the cloud provider with a reference to the clientBuilder only after token controller @@ -609,3 +667,40 @@ func readCA(file string) ([]byte, error) { return rootCA, err } + +// createClientBuilders creates clientBuilder and rootClientBuilder from the given configuration +func createClientBuilders(c *config.CompletedConfig) (clientBuilder clientbuilder.ControllerClientBuilder, rootClientBuilder clientbuilder.ControllerClientBuilder) { + rootClientBuilder = clientbuilder.SimpleControllerClientBuilder{ + ClientConfig: c.Kubeconfig, + } + if c.ComponentConfig.KubeCloudShared.UseServiceAccountCredentials { + if len(c.ComponentConfig.SAController.ServiceAccountKeyFile) == 0 { + // It's possible another controller process is creating the tokens for us. + // If one isn't, we'll timeout and exit when our client builder is unable to create the tokens. + klog.Warningf("--use-service-account-credentials was specified without providing a --service-account-private-key-file") + } + + clientBuilder = clientbuilder.NewDynamicClientBuilder( + restclient.AnonymousClientConfig(c.Kubeconfig), + c.Client.CoreV1(), + metav1.NamespaceSystem) + } else { + clientBuilder = rootClientBuilder + } + return +} + +// filterInitializers returns initializers that has filterFunc(name) == true. +// filterFunc can be nil, in which case the original initializers will be returned directly. +func filterInitializers(allInitializers map[string]InitFunc, filterFunc leadermigration.FilterFunc) map[string]InitFunc { + if filterFunc == nil { + return allInitializers + } + initializers := make(map[string]InitFunc) + for name, initFunc := range allInitializers { + if filterFunc(name) { + initializers[name] = initFunc + } + } + return initializers +} diff --git a/staging/src/k8s.io/cloud-provider/app/controllermanager.go b/staging/src/k8s.io/cloud-provider/app/controllermanager.go index 4acefc386a8..599367843ae 100644 --- a/staging/src/k8s.io/cloud-provider/app/controllermanager.go +++ b/staging/src/k8s.io/cloud-provider/app/controllermanager.go @@ -52,6 +52,7 @@ import ( genericcontrollermanager "k8s.io/controller-manager/app" "k8s.io/controller-manager/pkg/clientbuilder" "k8s.io/controller-manager/pkg/informerfactory" + "k8s.io/controller-manager/pkg/leadermigration" "k8s.io/klog/v2" ) @@ -173,7 +174,7 @@ func Run(c *cloudcontrollerconfig.CompletedConfig, cloud cloudprovider.Interface } } - run := func(ctx context.Context) { + run := func(ctx context.Context, controllerInitializers map[string]InitFunc) { clientBuilder := clientbuilder.SimpleControllerClientBuilder{ ClientConfig: c.Kubeconfig, } @@ -187,7 +188,7 @@ func Run(c *cloudcontrollerconfig.CompletedConfig, cloud cloudprovider.Interface } if !c.ComponentConfig.Generic.LeaderElection.LeaderElect { - run(context.TODO()) + run(context.TODO(), controllerInitializers) panic("unreachable") } @@ -199,35 +200,96 @@ func Run(c *cloudcontrollerconfig.CompletedConfig, cloud cloudprovider.Interface // add a uniquifier so that two processes on the same host don't accidentally both become active id = id + "_" + string(uuid.NewUUID()) - // Lock required for leader election - rl, err := resourcelock.NewFromKubeconfig(c.ComponentConfig.Generic.LeaderElection.ResourceLock, - c.ComponentConfig.Generic.LeaderElection.ResourceNamespace, - c.ComponentConfig.Generic.LeaderElection.ResourceName, - resourcelock.ResourceLockConfig{ - Identity: id, - EventRecorder: c.EventRecorder, - }, - c.Kubeconfig, - c.ComponentConfig.Generic.LeaderElection.RenewDeadline.Duration) - if err != nil { - klog.Fatalf("error creating lock: %v", err) + // leaderElectAndRun runs the callbacks once the leader lease is acquired. + leaderElectAndRun := func(resourceLock string, leaseName string, callbacks leaderelection.LeaderCallbacks) { + // Lock required for leader election + rl, err := resourcelock.NewFromKubeconfig(resourceLock, + c.ComponentConfig.Generic.LeaderElection.ResourceNamespace, + leaseName, + resourcelock.ResourceLockConfig{ + Identity: id, + EventRecorder: c.EventRecorder, + }, + c.Kubeconfig, + c.ComponentConfig.Generic.LeaderElection.RenewDeadline.Duration) + if err != nil { + klog.Fatalf("error creating lock: %v", err) + } + + // Try and become the leader and start cloud controller manager loops + leaderelection.RunOrDie(context.TODO(), leaderelection.LeaderElectionConfig{ + Lock: rl, + LeaseDuration: c.ComponentConfig.Generic.LeaderElection.LeaseDuration.Duration, + RenewDeadline: c.ComponentConfig.Generic.LeaderElection.RenewDeadline.Duration, + RetryPeriod: c.ComponentConfig.Generic.LeaderElection.RetryPeriod.Duration, + Callbacks: callbacks, + WatchDog: electionChecker, + Name: leaseName, + }) } - // Try and become the leader and start cloud controller manager loops - leaderelection.RunOrDie(context.TODO(), leaderelection.LeaderElectionConfig{ - Lock: rl, - LeaseDuration: c.ComponentConfig.Generic.LeaderElection.LeaseDuration.Duration, - RenewDeadline: c.ComponentConfig.Generic.LeaderElection.RenewDeadline.Duration, - RetryPeriod: c.ComponentConfig.Generic.LeaderElection.RetryPeriod.Duration, - Callbacks: leaderelection.LeaderCallbacks{ - OnStartedLeading: run, + // If leader migration is enabled, use the redirected initialization + // Check feature gate and configuration separately so that any error in configuration checking will not + // affect the result if the feature is not enabled. + if leadermigration.FeatureEnabled() && leadermigration.Enabled(&c.ComponentConfig.Generic) { + klog.Info("starting leader migration") + + leaderMigrator := leadermigration.NewLeaderMigrator(&c.ComponentConfig.Generic.LeaderMigration, + "cloud-controller-manager") + + // Split initializers based on which lease they require, main lease or migration lease. + migratedInitializers, unmigratedInitializers := devideInitializers(controllerInitializers, leaderMigrator.FilterFunc(true)) + + // We need to signal acquiring the main lock before attempting to acquire the migration lock + // so that no instance will hold the migration lock but not the main lock at any point. + mainLockAcquired := make(chan struct{}) + + // Start the main lock. + go leaderElectAndRun(c.ComponentConfig.Generic.LeaderElection.ResourceLock, + c.ComponentConfig.Generic.LeaderElection.ResourceName, + leaderelection.LeaderCallbacks{ + OnStartedLeading: func(ctx context.Context) { + klog.Info("leader migration: starting main controllers.") + // signal acquiring the main lock + close(mainLockAcquired) + run(ctx, unmigratedInitializers) + }, + OnStoppedLeading: func() { + klog.Fatalf("leaderelection lost") + }, + }) + + // Wait for the main lease to be acquired before attempting the migration lease. + <-mainLockAcquired + + // Start the migration lock. + leaderElectAndRun(c.ComponentConfig.Generic.LeaderMigration.ResourceLock, + c.ComponentConfig.Generic.LeaderMigration.LeaderName, + leaderelection.LeaderCallbacks{ + OnStartedLeading: func(ctx context.Context) { + klog.Info("leader migration: starting migrated controllers.") + run(ctx, migratedInitializers) + }, + OnStoppedLeading: func() { + klog.Fatalf("migration leaderelection lost") + }, + }) + + panic("unreachable") + } // end of leader migration + + // Normal leader election, without leader migration + leaderElectAndRun(c.ComponentConfig.Generic.LeaderElection.ResourceLock, + c.ComponentConfig.Generic.LeaderElection.ResourceName, + leaderelection.LeaderCallbacks{ + OnStartedLeading: func(ctx context.Context) { + run(ctx, controllerInitializers) + }, OnStoppedLeading: func() { klog.Fatalf("leaderelection lost") }, - }, - WatchDog: electionChecker, - Name: "cloud-controller-manager", - }) + }) + panic("unreachable") } @@ -417,3 +479,23 @@ func ResyncPeriod(c *cloudcontrollerconfig.CompletedConfig) func() time.Duration return time.Duration(float64(c.ComponentConfig.Generic.MinResyncPeriod.Nanoseconds()) * factor) } } + +// devideInitializers applies filterFunc to each of the given intializiers. +// filtered contains all controllers that have filterFunc(name) == true +// rest contains all controllers that have filterFunc(name) == false +func devideInitializers(allInitializers map[string]InitFunc, + filterFunc leadermigration.FilterFunc) (filtered map[string]InitFunc, rest map[string]InitFunc) { + if filterFunc == nil { + return make(map[string]InitFunc), allInitializers + } + filtered = make(map[string]InitFunc) + rest = make(map[string]InitFunc) + for name, initFunc := range allInitializers { + if filterFunc(name) { + filtered[name] = initFunc + } else { + rest[name] = initFunc + } + } + return +} diff --git a/staging/src/k8s.io/controller-manager/pkg/leadermigration/migrator.go b/staging/src/k8s.io/controller-manager/pkg/leadermigration/migrator.go new file mode 100644 index 00000000000..f35a2a1c9f3 --- /dev/null +++ b/staging/src/k8s.io/controller-manager/pkg/leadermigration/migrator.go @@ -0,0 +1,67 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package leadermigration + +import ( + internal "k8s.io/controller-manager/config" +) + +// LeaderMigrator holds information required by the leader migration process. +type LeaderMigrator struct { + config *internal.LeaderMigrationConfiguration + migratedControllers map[string]bool + // component indicates the name of the control-plane component that uses leader migration, + // which should be a controller manager, i.e. kube-controller-manager or cloud-controller-manager + component string +} + +// FilterFunc takes a name of controller, returning whether the controller should be started. +type FilterFunc func(controllerName string) bool + +// NewLeaderMigrator creates a LeaderMigrator with given config for the given component. The component +// indicates which controller manager is requesting this leader migration, and it should be consistent +// with the component field of ControllerLeaderConfiguration. +func NewLeaderMigrator(config *internal.LeaderMigrationConfiguration, component string) *LeaderMigrator { + migratedControllers := make(map[string]bool) + for _, leader := range config.ControllerLeaders { + migratedControllers[leader.Name] = leader.Component == component + } + return &LeaderMigrator{ + config: config, + migratedControllers: migratedControllers, + component: component, + } +} + +// FilterFunc returns the filter function that, when migrated == true +// - returns true if the controller should start under the migration lock +// - returns false if the controller should start under the main lock +// when migrated == false, the result is inverted. +func (m *LeaderMigrator) FilterFunc(migrated bool) FilterFunc { + return func(controllerName string) bool { + shouldRun, ok := m.migratedControllers[controllerName] + if !ok { + // The controller is not included in the migration + // If the caller wants the controllers outside migration, then we should include it. + return !migrated + } + // The controller is included in the migration + // If the caller wants the controllers within migration, we should only include it + // if current component should run the controller + return migrated && shouldRun + } +} diff --git a/staging/src/k8s.io/controller-manager/pkg/leadermigration/util.go b/staging/src/k8s.io/controller-manager/pkg/leadermigration/util.go new file mode 100644 index 00000000000..773c750ee0b --- /dev/null +++ b/staging/src/k8s.io/controller-manager/pkg/leadermigration/util.go @@ -0,0 +1,24 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package leadermigration + +import config "k8s.io/controller-manager/config" + +// Enabled checks whether Leader Migration should be enabled, given the GenericControllerManagerConfiguration. +func Enabled(genericConfig *config.GenericControllerManagerConfiguration) bool { + return genericConfig.LeaderElection.LeaderElect && genericConfig.LeaderMigrationEnabled +} From 3362918f8f12f93c7316cb8ecd464c5325f0c6a7 Mon Sep 17 00:00:00 2001 From: Indeed Date: Tue, 9 Mar 2021 10:22:05 -0800 Subject: [PATCH 3/6] extract electAndRun to a top-level func. --- .../app/controllermanager.go | 61 +++++++++-------- .../cloud-provider/app/controllermanager.go | 66 ++++++++++--------- 2 files changed, 67 insertions(+), 60 deletions(-) diff --git a/cmd/kube-controller-manager/app/controllermanager.go b/cmd/kube-controller-manager/app/controllermanager.go index 22ab003ade3..3f0d248ecb4 100644 --- a/cmd/kube-controller-manager/app/controllermanager.go +++ b/cmd/kube-controller-manager/app/controllermanager.go @@ -243,32 +243,6 @@ func Run(c *config.CompletedConfig, stopCh <-chan struct{}) error { // add a uniquifier so that two processes on the same host don't accidentally both become active id = id + "_" + string(uuid.NewUUID()) - // leaderElectAndRun runs the leader election, and runs the callbacks once the leader lease is acquired. - leaderElectAndRun := func(resourceLock string, leaseName string, callbacks leaderelection.LeaderCallbacks) { - rl, err := resourcelock.NewFromKubeconfig(resourceLock, - c.ComponentConfig.Generic.LeaderElection.ResourceNamespace, - leaseName, - resourcelock.ResourceLockConfig{ - Identity: id, - EventRecorder: c.EventRecorder, - }, - c.Kubeconfig, - c.ComponentConfig.Generic.LeaderElection.RenewDeadline.Duration) - if err != nil { - klog.Fatalf("error creating lock: %v", err) - } - - leaderelection.RunOrDie(context.TODO(), leaderelection.LeaderElectionConfig{ - Lock: rl, - LeaseDuration: c.ComponentConfig.Generic.LeaderElection.LeaseDuration.Duration, - RenewDeadline: c.ComponentConfig.Generic.LeaderElection.RenewDeadline.Duration, - RetryPeriod: c.ComponentConfig.Generic.LeaderElection.RetryPeriod.Duration, - Callbacks: callbacks, - WatchDog: electionChecker, - Name: leaseName, - }) - } - // If leader migration is enabled, use the redirected initialization // Check feature gate and configuration separately so that any error in configuration checking will not // affect the result if the feature is not enabled. @@ -289,7 +263,7 @@ func Run(c *config.CompletedConfig, stopCh <-chan struct{}) error { } // Start the main lock, using LeaderElectionConfiguration for the type and name of the lease. - go leaderElectAndRun( + go leaderElectAndRun(c, id, electionChecker, c.ComponentConfig.Generic.LeaderElection.ResourceLock, c.ComponentConfig.Generic.LeaderElection.ResourceName, leaderelection.LeaderCallbacks{ @@ -310,7 +284,7 @@ func Run(c *config.CompletedConfig, stopCh <-chan struct{}) error { <-saTokenControllerStarted // Start the migration lock, using LeaderMigrationConfiguration for the type and name of the lease. - leaderElectAndRun( + leaderElectAndRun(c, id, electionChecker, c.ComponentConfig.Generic.LeaderMigration.ResourceLock, c.ComponentConfig.Generic.LeaderMigration.LeaderName, leaderelection.LeaderCallbacks{ @@ -327,7 +301,7 @@ func Run(c *config.CompletedConfig, stopCh <-chan struct{}) error { } // end of leader migration // Normal leader election, without leader migration - leaderElectAndRun( + leaderElectAndRun(c, id, electionChecker, c.ComponentConfig.Generic.LeaderElection.ResourceLock, c.ComponentConfig.Generic.LeaderElection.ResourceName, leaderelection.LeaderCallbacks{ @@ -690,6 +664,35 @@ func createClientBuilders(c *config.CompletedConfig) (clientBuilder clientbuilde return } +// leaderElectAndRun runs the leader election, and runs the callbacks once the leader lease is acquired. +// TODO: extract this function into staging/controller-manager +func leaderElectAndRun(c *config.CompletedConfig, lockIdentity string, electionChecker *leaderelection.HealthzAdaptor, resourceLock string, leaseName string, callbacks leaderelection.LeaderCallbacks) { + rl, err := resourcelock.NewFromKubeconfig(resourceLock, + c.ComponentConfig.Generic.LeaderElection.ResourceNamespace, + leaseName, + resourcelock.ResourceLockConfig{ + Identity: lockIdentity, + EventRecorder: c.EventRecorder, + }, + c.Kubeconfig, + c.ComponentConfig.Generic.LeaderElection.RenewDeadline.Duration) + if err != nil { + klog.Fatalf("error creating lock: %v", err) + } + + leaderelection.RunOrDie(context.TODO(), leaderelection.LeaderElectionConfig{ + Lock: rl, + LeaseDuration: c.ComponentConfig.Generic.LeaderElection.LeaseDuration.Duration, + RenewDeadline: c.ComponentConfig.Generic.LeaderElection.RenewDeadline.Duration, + RetryPeriod: c.ComponentConfig.Generic.LeaderElection.RetryPeriod.Duration, + Callbacks: callbacks, + WatchDog: electionChecker, + Name: leaseName, + }) + + panic("unreachable") +} + // filterInitializers returns initializers that has filterFunc(name) == true. // filterFunc can be nil, in which case the original initializers will be returned directly. func filterInitializers(allInitializers map[string]InitFunc, filterFunc leadermigration.FilterFunc) map[string]InitFunc { diff --git a/staging/src/k8s.io/cloud-provider/app/controllermanager.go b/staging/src/k8s.io/cloud-provider/app/controllermanager.go index 599367843ae..9d72a76ee21 100644 --- a/staging/src/k8s.io/cloud-provider/app/controllermanager.go +++ b/staging/src/k8s.io/cloud-provider/app/controllermanager.go @@ -200,34 +200,6 @@ func Run(c *cloudcontrollerconfig.CompletedConfig, cloud cloudprovider.Interface // add a uniquifier so that two processes on the same host don't accidentally both become active id = id + "_" + string(uuid.NewUUID()) - // leaderElectAndRun runs the callbacks once the leader lease is acquired. - leaderElectAndRun := func(resourceLock string, leaseName string, callbacks leaderelection.LeaderCallbacks) { - // Lock required for leader election - rl, err := resourcelock.NewFromKubeconfig(resourceLock, - c.ComponentConfig.Generic.LeaderElection.ResourceNamespace, - leaseName, - resourcelock.ResourceLockConfig{ - Identity: id, - EventRecorder: c.EventRecorder, - }, - c.Kubeconfig, - c.ComponentConfig.Generic.LeaderElection.RenewDeadline.Duration) - if err != nil { - klog.Fatalf("error creating lock: %v", err) - } - - // Try and become the leader and start cloud controller manager loops - leaderelection.RunOrDie(context.TODO(), leaderelection.LeaderElectionConfig{ - Lock: rl, - LeaseDuration: c.ComponentConfig.Generic.LeaderElection.LeaseDuration.Duration, - RenewDeadline: c.ComponentConfig.Generic.LeaderElection.RenewDeadline.Duration, - RetryPeriod: c.ComponentConfig.Generic.LeaderElection.RetryPeriod.Duration, - Callbacks: callbacks, - WatchDog: electionChecker, - Name: leaseName, - }) - } - // If leader migration is enabled, use the redirected initialization // Check feature gate and configuration separately so that any error in configuration checking will not // affect the result if the feature is not enabled. @@ -245,7 +217,8 @@ func Run(c *cloudcontrollerconfig.CompletedConfig, cloud cloudprovider.Interface mainLockAcquired := make(chan struct{}) // Start the main lock. - go leaderElectAndRun(c.ComponentConfig.Generic.LeaderElection.ResourceLock, + go leaderElectAndRun(c, id, electionChecker, + c.ComponentConfig.Generic.LeaderElection.ResourceLock, c.ComponentConfig.Generic.LeaderElection.ResourceName, leaderelection.LeaderCallbacks{ OnStartedLeading: func(ctx context.Context) { @@ -263,7 +236,8 @@ func Run(c *cloudcontrollerconfig.CompletedConfig, cloud cloudprovider.Interface <-mainLockAcquired // Start the migration lock. - leaderElectAndRun(c.ComponentConfig.Generic.LeaderMigration.ResourceLock, + leaderElectAndRun(c, id, electionChecker, + c.ComponentConfig.Generic.LeaderMigration.ResourceLock, c.ComponentConfig.Generic.LeaderMigration.LeaderName, leaderelection.LeaderCallbacks{ OnStartedLeading: func(ctx context.Context) { @@ -279,7 +253,8 @@ func Run(c *cloudcontrollerconfig.CompletedConfig, cloud cloudprovider.Interface } // end of leader migration // Normal leader election, without leader migration - leaderElectAndRun(c.ComponentConfig.Generic.LeaderElection.ResourceLock, + leaderElectAndRun(c, id, electionChecker, + c.ComponentConfig.Generic.LeaderElection.ResourceLock, c.ComponentConfig.Generic.LeaderElection.ResourceName, leaderelection.LeaderCallbacks{ OnStartedLeading: func(ctx context.Context) { @@ -480,6 +455,35 @@ func ResyncPeriod(c *cloudcontrollerconfig.CompletedConfig) func() time.Duration } } +// leaderElectAndRun runs the leader election, and runs the callbacks once the leader lease is acquired. +// TODO: extract this function into staging/controller-manager +func leaderElectAndRun(c *cloudcontrollerconfig.CompletedConfig, lockIdentity string, electionChecker *leaderelection.HealthzAdaptor, resourceLock string, leaseName string, callbacks leaderelection.LeaderCallbacks) { + rl, err := resourcelock.NewFromKubeconfig(resourceLock, + c.ComponentConfig.Generic.LeaderElection.ResourceNamespace, + leaseName, + resourcelock.ResourceLockConfig{ + Identity: lockIdentity, + EventRecorder: c.EventRecorder, + }, + c.Kubeconfig, + c.ComponentConfig.Generic.LeaderElection.RenewDeadline.Duration) + if err != nil { + klog.Fatalf("error creating lock: %v", err) + } + + leaderelection.RunOrDie(context.TODO(), leaderelection.LeaderElectionConfig{ + Lock: rl, + LeaseDuration: c.ComponentConfig.Generic.LeaderElection.LeaseDuration.Duration, + RenewDeadline: c.ComponentConfig.Generic.LeaderElection.RenewDeadline.Duration, + RetryPeriod: c.ComponentConfig.Generic.LeaderElection.RetryPeriod.Duration, + Callbacks: callbacks, + WatchDog: electionChecker, + Name: leaseName, + }) + + panic("unreachable") +} + // devideInitializers applies filterFunc to each of the given intializiers. // filtered contains all controllers that have filterFunc(name) == true // rest contains all controllers that have filterFunc(name) == false From e8479414abd4ea7c7ccf80566b002820f9d1dbc8 Mon Sep 17 00:00:00 2001 From: Indeed Date: Tue, 9 Mar 2021 11:17:49 -0800 Subject: [PATCH 4/6] extract common code for the main lock. --- .../app/controllermanager.go | 84 +++++++++--------- .../cloud-provider/app/controllermanager.go | 88 +++++++++---------- .../pkg/leadermigration/migrator.go | 5 ++ .../pkg/leadermigration/util.go | 3 +- 4 files changed, 90 insertions(+), 90 deletions(-) diff --git a/cmd/kube-controller-manager/app/controllermanager.go b/cmd/kube-controller-manager/app/controllermanager.go index 3f0d248ecb4..81c66c1969b 100644 --- a/cmd/kube-controller-manager/app/controllermanager.go +++ b/cmd/kube-controller-manager/app/controllermanager.go @@ -243,48 +243,57 @@ func Run(c *config.CompletedConfig, stopCh <-chan struct{}) error { // add a uniquifier so that two processes on the same host don't accidentally both become active id = id + "_" + string(uuid.NewUUID()) - // If leader migration is enabled, use the redirected initialization - // Check feature gate and configuration separately so that any error in configuration checking will not - // affect the result if the feature is not enabled. - if leadermigration.FeatureEnabled() && leadermigration.Enabled(&c.ComponentConfig.Generic) { + // leaderMigrator will be non-nil if and only if Leader Migration is enabled. + var leaderMigrator *leadermigration.LeaderMigrator = nil + + // startSATokenController will be original saTokenControllerInitFunc if leader migration is not enabled. + startSATokenController := saTokenControllerInitFunc + + // If leader migration is enabled, create the LeaderMigrator and prepare for migration + if leadermigration.Enabled(&c.ComponentConfig.Generic) { klog.Infof("starting leader migration") - leaderMigrator := leadermigration.NewLeaderMigrator(&c.ComponentConfig.Generic.LeaderMigration, + leaderMigrator = leadermigration.NewLeaderMigrator(&c.ComponentConfig.Generic.LeaderMigration, "kube-controller-manager") - // We need a channel to signal the start of Service Account Token Controller - // all migrated channel must start afterwards. - saTokenControllerStarted := make(chan struct{}) - - // Wrap saTokenControllerInitFunc to close the channel after returning. - wrappedSATokenControllerInitFunc := func(ctx ControllerContext) (http.Handler, bool, error) { - defer close(saTokenControllerStarted) + // Wrap saTokenControllerInitFunc to signal readiness for migration after starting + // the controller. + startSATokenController = func(ctx ControllerContext) (http.Handler, bool, error) { + defer close(leaderMigrator.MigrationReady) return saTokenControllerInitFunc(ctx) } + } - // Start the main lock, using LeaderElectionConfiguration for the type and name of the lease. - go leaderElectAndRun(c, id, electionChecker, - c.ComponentConfig.Generic.LeaderElection.ResourceLock, - c.ComponentConfig.Generic.LeaderElection.ResourceName, - leaderelection.LeaderCallbacks{ - OnStartedLeading: func(ctx context.Context) { + // Start the main lock + go leaderElectAndRun(c, id, electionChecker, + c.ComponentConfig.Generic.LeaderElection.ResourceLock, + c.ComponentConfig.Generic.LeaderElection.ResourceName, + leaderelection.LeaderCallbacks{ + OnStartedLeading: func(ctx context.Context) { + var filterFunc leadermigration.FilterFunc = nil + if leaderMigrator != nil { + // If leader migration is enabled, we should start only non-migrated controllers + // for the main lock. + filterFunc = leaderMigrator.FilterFunc(false) klog.Info("leader migration: starting main controllers.") - // saTokenController should only run under the main lock - run(ctx, wrappedSATokenControllerInitFunc, leaderMigrator.FilterFunc(false)) - }, - OnStoppedLeading: func() { - klog.Fatalf("leaderelection lost") - }, - }) + } + run(ctx, startSATokenController, filterFunc) + }, + OnStoppedLeading: func() { + klog.Fatalf("leaderelection lost") + }, + }) + // If Leader Migration is enabled, proceed to attempt the migration lock. + if leaderMigrator != nil { // Wait for Service Account Token Controller to start before acquiring the migration lock. // At this point, the main lock must have already been acquired, or the KCM process already exited. // We wait for the main lock before acquiring the migration lock to prevent the situation // where KCM instance A holds the main lock while KCM instance B holds the migration lock. - <-saTokenControllerStarted + <-leaderMigrator.MigrationReady - // Start the migration lock, using LeaderMigrationConfiguration for the type and name of the lease. - leaderElectAndRun(c, id, electionChecker, + // Start the migration lock. + go leaderElectAndRun(c, id, electionChecker, c.ComponentConfig.Generic.LeaderMigration.ResourceLock, c.ComponentConfig.Generic.LeaderMigration.LeaderName, leaderelection.LeaderCallbacks{ @@ -297,23 +306,9 @@ func Run(c *config.CompletedConfig, stopCh <-chan struct{}) error { klog.Fatalf("migration leaderelection lost") }, }) - panic("unreachable") - } // end of leader migration + } - // Normal leader election, without leader migration - leaderElectAndRun(c, id, electionChecker, - c.ComponentConfig.Generic.LeaderElection.ResourceLock, - c.ComponentConfig.Generic.LeaderElection.ResourceName, - leaderelection.LeaderCallbacks{ - OnStartedLeading: func(ctx context.Context) { - run(ctx, saTokenControllerInitFunc, nil) - }, - OnStoppedLeading: func() { - klog.Fatalf("leaderelection lost") - }, - }) - - panic("unreachable") + select {} } // ControllerContext defines the context object for controller @@ -695,6 +690,7 @@ func leaderElectAndRun(c *config.CompletedConfig, lockIdentity string, electionC // filterInitializers returns initializers that has filterFunc(name) == true. // filterFunc can be nil, in which case the original initializers will be returned directly. +// InitFunc is local to kube-controller-manager, and thus filterInitializers has to be local too. func filterInitializers(allInitializers map[string]InitFunc, filterFunc leadermigration.FilterFunc) map[string]InitFunc { if filterFunc == nil { return allInitializers diff --git a/staging/src/k8s.io/cloud-provider/app/controllermanager.go b/staging/src/k8s.io/cloud-provider/app/controllermanager.go index 9d72a76ee21..57ca0eb04a6 100644 --- a/staging/src/k8s.io/cloud-provider/app/controllermanager.go +++ b/staging/src/k8s.io/cloud-provider/app/controllermanager.go @@ -200,43 +200,55 @@ func Run(c *cloudcontrollerconfig.CompletedConfig, cloud cloudprovider.Interface // add a uniquifier so that two processes on the same host don't accidentally both become active id = id + "_" + string(uuid.NewUUID()) + // leaderMigrator will be non-nil if and only if Leader Migration is enabled. + var leaderMigrator *leadermigration.LeaderMigrator = nil + + // Leader migration requires splitting initializers for the main and the migration lock + // If leader migration is not enabled, these two will be unused. + var migratedInitializers, unmigratedInitializers map[string]InitFunc + // If leader migration is enabled, use the redirected initialization // Check feature gate and configuration separately so that any error in configuration checking will not // affect the result if the feature is not enabled. - if leadermigration.FeatureEnabled() && leadermigration.Enabled(&c.ComponentConfig.Generic) { + if leadermigration.Enabled(&c.ComponentConfig.Generic) { klog.Info("starting leader migration") - leaderMigrator := leadermigration.NewLeaderMigrator(&c.ComponentConfig.Generic.LeaderMigration, + leaderMigrator = leadermigration.NewLeaderMigrator(&c.ComponentConfig.Generic.LeaderMigration, "cloud-controller-manager") // Split initializers based on which lease they require, main lease or migration lease. - migratedInitializers, unmigratedInitializers := devideInitializers(controllerInitializers, leaderMigrator.FilterFunc(true)) + migratedInitializers, unmigratedInitializers = divideInitializers(controllerInitializers, leaderMigrator.FilterFunc(true)) + } - // We need to signal acquiring the main lock before attempting to acquire the migration lock - // so that no instance will hold the migration lock but not the main lock at any point. - mainLockAcquired := make(chan struct{}) - - // Start the main lock. - go leaderElectAndRun(c, id, electionChecker, - c.ComponentConfig.Generic.LeaderElection.ResourceLock, - c.ComponentConfig.Generic.LeaderElection.ResourceName, - leaderelection.LeaderCallbacks{ - OnStartedLeading: func(ctx context.Context) { + // Start the main lock + go leaderElectAndRun(c, id, electionChecker, + c.ComponentConfig.Generic.LeaderElection.ResourceLock, + c.ComponentConfig.Generic.LeaderElection.ResourceName, + leaderelection.LeaderCallbacks{ + OnStartedLeading: func(ctx context.Context) { + initializers := controllerInitializers + if leaderMigrator != nil { + // If leader migration is enabled, we should start only non-migrated controllers + // for the main lock. + initializers = unmigratedInitializers klog.Info("leader migration: starting main controllers.") - // signal acquiring the main lock - close(mainLockAcquired) - run(ctx, unmigratedInitializers) - }, - OnStoppedLeading: func() { - klog.Fatalf("leaderelection lost") - }, - }) + // Signal the main lock is acquired, and thus migration lock is ready to attempt. + close(leaderMigrator.MigrationReady) + } + run(ctx, initializers) + }, + OnStoppedLeading: func() { + klog.Fatalf("leaderelection lost") + }, + }) - // Wait for the main lease to be acquired before attempting the migration lease. - <-mainLockAcquired + // If Leader Migration is enabled, proceed to attempt the migration lock. + if leaderMigrator != nil { + // Wait for the signal of main lock being acquired. + <-leaderMigrator.MigrationReady // Start the migration lock. - leaderElectAndRun(c, id, electionChecker, + go leaderElectAndRun(c, id, electionChecker, c.ComponentConfig.Generic.LeaderMigration.ResourceLock, c.ComponentConfig.Generic.LeaderMigration.LeaderName, leaderelection.LeaderCallbacks{ @@ -248,24 +260,9 @@ func Run(c *cloudcontrollerconfig.CompletedConfig, cloud cloudprovider.Interface klog.Fatalf("migration leaderelection lost") }, }) + } - panic("unreachable") - } // end of leader migration - - // Normal leader election, without leader migration - leaderElectAndRun(c, id, electionChecker, - c.ComponentConfig.Generic.LeaderElection.ResourceLock, - c.ComponentConfig.Generic.LeaderElection.ResourceName, - leaderelection.LeaderCallbacks{ - OnStartedLeading: func(ctx context.Context) { - run(ctx, controllerInitializers) - }, - OnStoppedLeading: func() { - klog.Fatalf("leaderelection lost") - }, - }) - - panic("unreachable") + select {} } // startControllers starts the cloud specific controller loops. @@ -484,10 +481,11 @@ func leaderElectAndRun(c *cloudcontrollerconfig.CompletedConfig, lockIdentity st panic("unreachable") } -// devideInitializers applies filterFunc to each of the given intializiers. -// filtered contains all controllers that have filterFunc(name) == true -// rest contains all controllers that have filterFunc(name) == false -func devideInitializers(allInitializers map[string]InitFunc, +// divideInitializers applies filterFunc to each of the given intializiers. +// filtered contains all controllers that have filterFunc(name) == true +// rest contains all controllers that have filterFunc(name) == false +// InitFunc is local to cloud-controller-manager, and thus divideInitializers has to be local too. +func divideInitializers(allInitializers map[string]InitFunc, filterFunc leadermigration.FilterFunc) (filtered map[string]InitFunc, rest map[string]InitFunc) { if filterFunc == nil { return make(map[string]InitFunc), allInitializers diff --git a/staging/src/k8s.io/controller-manager/pkg/leadermigration/migrator.go b/staging/src/k8s.io/controller-manager/pkg/leadermigration/migrator.go index f35a2a1c9f3..a1001ed9e00 100644 --- a/staging/src/k8s.io/controller-manager/pkg/leadermigration/migrator.go +++ b/staging/src/k8s.io/controller-manager/pkg/leadermigration/migrator.go @@ -22,6 +22,10 @@ import ( // LeaderMigrator holds information required by the leader migration process. type LeaderMigrator struct { + // MigrationReady is closed after the coontroller manager finishes preparing for the migration lock. + // After this point, the leader migration process will proceed to acquire the migration lock. + MigrationReady chan struct{} + config *internal.LeaderMigrationConfiguration migratedControllers map[string]bool // component indicates the name of the control-plane component that uses leader migration, @@ -41,6 +45,7 @@ func NewLeaderMigrator(config *internal.LeaderMigrationConfiguration, component migratedControllers[leader.Name] = leader.Component == component } return &LeaderMigrator{ + MigrationReady: make(chan struct{}), config: config, migratedControllers: migratedControllers, component: component, diff --git a/staging/src/k8s.io/controller-manager/pkg/leadermigration/util.go b/staging/src/k8s.io/controller-manager/pkg/leadermigration/util.go index 773c750ee0b..83eacc0df24 100644 --- a/staging/src/k8s.io/controller-manager/pkg/leadermigration/util.go +++ b/staging/src/k8s.io/controller-manager/pkg/leadermigration/util.go @@ -19,6 +19,7 @@ package leadermigration import config "k8s.io/controller-manager/config" // Enabled checks whether Leader Migration should be enabled, given the GenericControllerManagerConfiguration. +// It considers the feature gate first, and will always return false if the feature gate is not enabled. func Enabled(genericConfig *config.GenericControllerManagerConfiguration) bool { - return genericConfig.LeaderElection.LeaderElect && genericConfig.LeaderMigrationEnabled + return FeatureEnabled() && genericConfig.LeaderElection.LeaderElect && genericConfig.LeaderMigrationEnabled } From ba47f60e4b9061e586235a818502f6054b58b17f Mon Sep 17 00:00:00 2001 From: Indeed Date: Tue, 9 Mar 2021 13:22:53 -0800 Subject: [PATCH 5/6] change filter to return a FilterResult. --- .../app/controllermanager.go | 18 +-- .../cloud-provider/app/controllermanager.go | 34 ++--- .../pkg/leadermigration/filter.go | 35 ++++++ .../pkg/leadermigration/migrator.go | 55 ++++---- .../pkg/leadermigration/migrator_test.go | 119 ++++++++++++++++++ 5 files changed, 196 insertions(+), 65 deletions(-) create mode 100644 staging/src/k8s.io/controller-manager/pkg/leadermigration/filter.go create mode 100644 staging/src/k8s.io/controller-manager/pkg/leadermigration/migrator_test.go diff --git a/cmd/kube-controller-manager/app/controllermanager.go b/cmd/kube-controller-manager/app/controllermanager.go index 81c66c1969b..36508b0a6ca 100644 --- a/cmd/kube-controller-manager/app/controllermanager.go +++ b/cmd/kube-controller-manager/app/controllermanager.go @@ -212,12 +212,12 @@ func Run(c *config.CompletedConfig, stopCh <-chan struct{}) error { saTokenControllerInitFunc := serviceAccountTokenControllerStarter{rootClientBuilder: rootClientBuilder}.startServiceAccountTokenController - run := func(ctx context.Context, startSATokenController InitFunc, filterFunc leadermigration.FilterFunc) { + run := func(ctx context.Context, startSATokenController InitFunc, filterFunc leadermigration.FilterFunc, requiredResult leadermigration.FilterResult) { controllerContext, err := CreateControllerContext(c, rootClientBuilder, clientBuilder, ctx.Done()) if err != nil { klog.Fatalf("error building controller context: %v", err) } - controllerInitializers := filterInitializers(NewControllerInitializers(controllerContext.LoopMode), filterFunc) + controllerInitializers := filterInitializers(NewControllerInitializers(controllerContext.LoopMode), filterFunc, requiredResult) if err := StartControllers(controllerContext, startSATokenController, controllerInitializers, unsecuredMux); err != nil { klog.Fatalf("error starting controllers: %v", err) } @@ -231,7 +231,7 @@ func Run(c *config.CompletedConfig, stopCh <-chan struct{}) error { // No leader election, run directly if !c.ComponentConfig.Generic.LeaderElection.LeaderElect { - run(context.TODO(), saTokenControllerInitFunc, nil) + run(context.TODO(), saTokenControllerInitFunc, nil, leadermigration.ControllerNonMigrated) panic("unreachable") } @@ -274,10 +274,10 @@ func Run(c *config.CompletedConfig, stopCh <-chan struct{}) error { if leaderMigrator != nil { // If leader migration is enabled, we should start only non-migrated controllers // for the main lock. - filterFunc = leaderMigrator.FilterFunc(false) + filterFunc = leaderMigrator.FilterFunc klog.Info("leader migration: starting main controllers.") } - run(ctx, startSATokenController, filterFunc) + run(ctx, startSATokenController, filterFunc, leadermigration.ControllerNonMigrated) }, OnStoppedLeading: func() { klog.Fatalf("leaderelection lost") @@ -300,7 +300,7 @@ func Run(c *config.CompletedConfig, stopCh <-chan struct{}) error { OnStartedLeading: func(ctx context.Context) { klog.Info("leader migration: starting migrated controllers.") // DO NOT start saTokenController under migration lock (passing nil) - run(ctx, nil, leaderMigrator.FilterFunc(true)) + run(ctx, nil, leaderMigrator.FilterFunc, leadermigration.ControllerMigrated) }, OnStoppedLeading: func() { klog.Fatalf("migration leaderelection lost") @@ -688,16 +688,16 @@ func leaderElectAndRun(c *config.CompletedConfig, lockIdentity string, electionC panic("unreachable") } -// filterInitializers returns initializers that has filterFunc(name) == true. +// filterInitializers returns initializers that has filterFunc(name) == expected. // filterFunc can be nil, in which case the original initializers will be returned directly. // InitFunc is local to kube-controller-manager, and thus filterInitializers has to be local too. -func filterInitializers(allInitializers map[string]InitFunc, filterFunc leadermigration.FilterFunc) map[string]InitFunc { +func filterInitializers(allInitializers map[string]InitFunc, filterFunc leadermigration.FilterFunc, expected leadermigration.FilterResult) map[string]InitFunc { if filterFunc == nil { return allInitializers } initializers := make(map[string]InitFunc) for name, initFunc := range allInitializers { - if filterFunc(name) { + if filterFunc(name) == expected { initializers[name] = initFunc } } diff --git a/staging/src/k8s.io/cloud-provider/app/controllermanager.go b/staging/src/k8s.io/cloud-provider/app/controllermanager.go index 57ca0eb04a6..82d9ab2da1c 100644 --- a/staging/src/k8s.io/cloud-provider/app/controllermanager.go +++ b/staging/src/k8s.io/cloud-provider/app/controllermanager.go @@ -203,10 +203,6 @@ func Run(c *cloudcontrollerconfig.CompletedConfig, cloud cloudprovider.Interface // leaderMigrator will be non-nil if and only if Leader Migration is enabled. var leaderMigrator *leadermigration.LeaderMigrator = nil - // Leader migration requires splitting initializers for the main and the migration lock - // If leader migration is not enabled, these two will be unused. - var migratedInitializers, unmigratedInitializers map[string]InitFunc - // If leader migration is enabled, use the redirected initialization // Check feature gate and configuration separately so that any error in configuration checking will not // affect the result if the feature is not enabled. @@ -215,9 +211,6 @@ func Run(c *cloudcontrollerconfig.CompletedConfig, cloud cloudprovider.Interface leaderMigrator = leadermigration.NewLeaderMigrator(&c.ComponentConfig.Generic.LeaderMigration, "cloud-controller-manager") - - // Split initializers based on which lease they require, main lease or migration lease. - migratedInitializers, unmigratedInitializers = divideInitializers(controllerInitializers, leaderMigrator.FilterFunc(true)) } // Start the main lock @@ -230,7 +223,7 @@ func Run(c *cloudcontrollerconfig.CompletedConfig, cloud cloudprovider.Interface if leaderMigrator != nil { // If leader migration is enabled, we should start only non-migrated controllers // for the main lock. - initializers = unmigratedInitializers + initializers = filterInitializers(controllerInitializers, leaderMigrator.FilterFunc, leadermigration.ControllerNonMigrated) klog.Info("leader migration: starting main controllers.") // Signal the main lock is acquired, and thus migration lock is ready to attempt. close(leaderMigrator.MigrationReady) @@ -254,7 +247,7 @@ func Run(c *cloudcontrollerconfig.CompletedConfig, cloud cloudprovider.Interface leaderelection.LeaderCallbacks{ OnStartedLeading: func(ctx context.Context) { klog.Info("leader migration: starting migrated controllers.") - run(ctx, migratedInitializers) + run(ctx, filterInitializers(controllerInitializers, leaderMigrator.FilterFunc, leadermigration.ControllerMigrated)) }, OnStoppedLeading: func() { klog.Fatalf("migration leaderelection lost") @@ -481,23 +474,18 @@ func leaderElectAndRun(c *cloudcontrollerconfig.CompletedConfig, lockIdentity st panic("unreachable") } -// divideInitializers applies filterFunc to each of the given intializiers. -// filtered contains all controllers that have filterFunc(name) == true -// rest contains all controllers that have filterFunc(name) == false -// InitFunc is local to cloud-controller-manager, and thus divideInitializers has to be local too. -func divideInitializers(allInitializers map[string]InitFunc, - filterFunc leadermigration.FilterFunc) (filtered map[string]InitFunc, rest map[string]InitFunc) { +// filterInitializers returns initializers that has filterFunc(name) == expected. +// filterFunc can be nil, in which case the original initializers will be returned directly. +// InitFunc is local to cloud-controller-manager, and thus filterInitializers has to be local too. +func filterInitializers(allInitializers map[string]InitFunc, filterFunc leadermigration.FilterFunc, expected leadermigration.FilterResult) map[string]InitFunc { if filterFunc == nil { - return make(map[string]InitFunc), allInitializers + return allInitializers } - filtered = make(map[string]InitFunc) - rest = make(map[string]InitFunc) + initializers := make(map[string]InitFunc) for name, initFunc := range allInitializers { - if filterFunc(name) { - filtered[name] = initFunc - } else { - rest[name] = initFunc + if filterFunc(name) == expected { + initializers[name] = initFunc } } - return + return initializers } diff --git a/staging/src/k8s.io/controller-manager/pkg/leadermigration/filter.go b/staging/src/k8s.io/controller-manager/pkg/leadermigration/filter.go new file mode 100644 index 00000000000..34496a1c14d --- /dev/null +++ b/staging/src/k8s.io/controller-manager/pkg/leadermigration/filter.go @@ -0,0 +1,35 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package leadermigration + +// FilterResult indicates whether and how the controller manager should start the controller. +type FilterResult int32 + +const ( + // ControllerOwned indicates that the controller is owned by another controller manager + // and thus should NOT be started by this controller manager. + ControllerUnowned = iota + // ControllerMigrated indicates that the controller manager should start this controller + // with thte migration lock. + ControllerMigrated + // ControllerNonMigrated indicates that the controller manager should start this controller + // with thte main lock. + ControllerNonMigrated +) + +// FilterFunc takes a name of controller, returning a FilterResult indicating how to start controller. +type FilterFunc func(controllerName string) FilterResult diff --git a/staging/src/k8s.io/controller-manager/pkg/leadermigration/migrator.go b/staging/src/k8s.io/controller-manager/pkg/leadermigration/migrator.go index a1001ed9e00..3ff19e3c1d0 100644 --- a/staging/src/k8s.io/controller-manager/pkg/leadermigration/migrator.go +++ b/staging/src/k8s.io/controller-manager/pkg/leadermigration/migrator.go @@ -22,21 +22,15 @@ import ( // LeaderMigrator holds information required by the leader migration process. type LeaderMigrator struct { - // MigrationReady is closed after the coontroller manager finishes preparing for the migration lock. + // MigrationReady is closed after the controller manager finishes preparing for the migration lock. // After this point, the leader migration process will proceed to acquire the migration lock. MigrationReady chan struct{} - config *internal.LeaderMigrationConfiguration - migratedControllers map[string]bool - // component indicates the name of the control-plane component that uses leader migration, - // which should be a controller manager, i.e. kube-controller-manager or cloud-controller-manager - component string + // FilterFunc returns a FilterResult telling the controller manager what to do with the controller. + FilterFunc FilterFunc } -// FilterFunc takes a name of controller, returning whether the controller should be started. -type FilterFunc func(controllerName string) bool - -// NewLeaderMigrator creates a LeaderMigrator with given config for the given component. The component +// NewLeaderMigrator creates a LeaderMigrator with given config for the given component. component // indicates which controller manager is requesting this leader migration, and it should be consistent // with the component field of ControllerLeaderConfiguration. func NewLeaderMigrator(config *internal.LeaderMigrationConfiguration, component string) *LeaderMigrator { @@ -45,28 +39,23 @@ func NewLeaderMigrator(config *internal.LeaderMigrationConfiguration, component migratedControllers[leader.Name] = leader.Component == component } return &LeaderMigrator{ - MigrationReady: make(chan struct{}), - config: config, - migratedControllers: migratedControllers, - component: component, - } -} - -// FilterFunc returns the filter function that, when migrated == true -// - returns true if the controller should start under the migration lock -// - returns false if the controller should start under the main lock -// when migrated == false, the result is inverted. -func (m *LeaderMigrator) FilterFunc(migrated bool) FilterFunc { - return func(controllerName string) bool { - shouldRun, ok := m.migratedControllers[controllerName] - if !ok { - // The controller is not included in the migration - // If the caller wants the controllers outside migration, then we should include it. - return !migrated - } - // The controller is included in the migration - // If the caller wants the controllers within migration, we should only include it - // if current component should run the controller - return migrated && shouldRun + MigrationReady: make(chan struct{}), + FilterFunc: func(controllerName string) FilterResult { + shouldRun, ok := migratedControllers[controllerName] + if ok { + // The controller is included in the migration + if shouldRun { + // If the controller manager should run the controller, + // start it in the migration lock. + return ControllerMigrated + } + // Otherwise, the controller should be started by + // some other controller manager. + return ControllerUnowned + } + // The controller is not included in the migration, + // and should be started in the main lock. + return ControllerNonMigrated + }, } } diff --git a/staging/src/k8s.io/controller-manager/pkg/leadermigration/migrator_test.go b/staging/src/k8s.io/controller-manager/pkg/leadermigration/migrator_test.go new file mode 100644 index 00000000000..104990405dc --- /dev/null +++ b/staging/src/k8s.io/controller-manager/pkg/leadermigration/migrator_test.go @@ -0,0 +1,119 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package leadermigration + +import ( + "testing" + + internal "k8s.io/controller-manager/config" +) + +func TestLeaderMigratorFilterFunc(t *testing.T) { + fromConfig := &internal.LeaderMigrationConfiguration{ + ResourceLock: "leases", + LeaderName: "cloud-provider-extraction-migration", + ControllerLeaders: []internal.ControllerLeaderConfiguration{ + { + Name: "route", + Component: "kube-controller-manager", + }, { + Name: "service", + Component: "kube-controller-manager", + }, { + Name: "cloud-node-lifecycle", + Component: "kube-controller-manager", + }, + }, + } + toConfig := &internal.LeaderMigrationConfiguration{ + ResourceLock: "leases", + LeaderName: "cloud-provider-extraction-migration", + ControllerLeaders: []internal.ControllerLeaderConfiguration{ + { + Name: "route", + Component: "cloud-controller-manager", + }, { + Name: "service", + Component: "cloud-controller-manager", + }, { + Name: "cloud-node-lifecycle", + Component: "cloud-controller-manager", + }, + }, + } + for _, tc := range []struct { + name string + config *internal.LeaderMigrationConfiguration + component string + migrated bool + expectResult map[string]FilterResult + }{ + { + name: "from config, kcm", + config: fromConfig, + component: "kube-controller-manager", + expectResult: map[string]FilterResult{ + "deployment": ControllerNonMigrated, + "route": ControllerMigrated, + "service": ControllerMigrated, + "cloud-node-lifecycle": ControllerMigrated, + }, + }, + { + name: "from config, ccm", + config: fromConfig, + component: "cloud-controller-manager", + expectResult: map[string]FilterResult{ + "cloud-node": ControllerNonMigrated, + "route": ControllerUnowned, + "service": ControllerUnowned, + "cloud-node-lifecycle": ControllerUnowned, + }, + }, + { + name: "to config, kcm", + config: toConfig, + component: "kube-controller-manager", + expectResult: map[string]FilterResult{ + "deployment": ControllerNonMigrated, + "route": ControllerUnowned, + "service": ControllerUnowned, + "cloud-node-lifecycle": ControllerUnowned, + }, + }, + { + name: "to config, ccm", + config: toConfig, + component: "cloud-controller-manager", + expectResult: map[string]FilterResult{ + "cloud-node": ControllerNonMigrated, + "route": ControllerMigrated, + "service": ControllerMigrated, + "cloud-node-lifecycle": ControllerMigrated, + }, + }, + } { + t.Run(tc.name, func(t *testing.T) { + migrator := NewLeaderMigrator(tc.config, tc.component) + for name, expected := range tc.expectResult { + if result := migrator.FilterFunc(name); expected != result { + t.Errorf("controller %s, expect %v, got %v", name, expected, result) + } + } + }) + } +} From 2a73fdf9ea144bb8b4981516755f5e2510994376 Mon Sep 17 00:00:00 2001 From: Indeed Date: Tue, 9 Mar 2021 13:52:59 -0800 Subject: [PATCH 6/6] refactor run to use a callback instead. --- .../app/controllermanager.go | 45 ++++++++++--------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/cmd/kube-controller-manager/app/controllermanager.go b/cmd/kube-controller-manager/app/controllermanager.go index 36508b0a6ca..d18bf67732b 100644 --- a/cmd/kube-controller-manager/app/controllermanager.go +++ b/cmd/kube-controller-manager/app/controllermanager.go @@ -212,12 +212,13 @@ func Run(c *config.CompletedConfig, stopCh <-chan struct{}) error { saTokenControllerInitFunc := serviceAccountTokenControllerStarter{rootClientBuilder: rootClientBuilder}.startServiceAccountTokenController - run := func(ctx context.Context, startSATokenController InitFunc, filterFunc leadermigration.FilterFunc, requiredResult leadermigration.FilterResult) { + run := func(ctx context.Context, startSATokenController InitFunc, initializersFunc ControllerInitializersFunc) { + controllerContext, err := CreateControllerContext(c, rootClientBuilder, clientBuilder, ctx.Done()) if err != nil { klog.Fatalf("error building controller context: %v", err) } - controllerInitializers := filterInitializers(NewControllerInitializers(controllerContext.LoopMode), filterFunc, requiredResult) + controllerInitializers := initializersFunc(controllerContext.LoopMode) if err := StartControllers(controllerContext, startSATokenController, controllerInitializers, unsecuredMux); err != nil { klog.Fatalf("error starting controllers: %v", err) } @@ -231,7 +232,7 @@ func Run(c *config.CompletedConfig, stopCh <-chan struct{}) error { // No leader election, run directly if !c.ComponentConfig.Generic.LeaderElection.LeaderElect { - run(context.TODO(), saTokenControllerInitFunc, nil, leadermigration.ControllerNonMigrated) + run(context.TODO(), saTokenControllerInitFunc, NewControllerInitializers) panic("unreachable") } @@ -270,14 +271,14 @@ func Run(c *config.CompletedConfig, stopCh <-chan struct{}) error { c.ComponentConfig.Generic.LeaderElection.ResourceName, leaderelection.LeaderCallbacks{ OnStartedLeading: func(ctx context.Context) { - var filterFunc leadermigration.FilterFunc = nil + initializersFunc := NewControllerInitializers if leaderMigrator != nil { // If leader migration is enabled, we should start only non-migrated controllers // for the main lock. - filterFunc = leaderMigrator.FilterFunc + initializersFunc = createInitializersFunc(leaderMigrator.FilterFunc, leadermigration.ControllerNonMigrated) klog.Info("leader migration: starting main controllers.") } - run(ctx, startSATokenController, filterFunc, leadermigration.ControllerNonMigrated) + run(ctx, startSATokenController, initializersFunc) }, OnStoppedLeading: func() { klog.Fatalf("leaderelection lost") @@ -299,8 +300,8 @@ func Run(c *config.CompletedConfig, stopCh <-chan struct{}) error { leaderelection.LeaderCallbacks{ OnStartedLeading: func(ctx context.Context) { klog.Info("leader migration: starting migrated controllers.") - // DO NOT start saTokenController under migration lock (passing nil) - run(ctx, nil, leaderMigrator.FilterFunc, leadermigration.ControllerMigrated) + // DO NOT start saTokenController under migration lock + run(ctx, nil, createInitializersFunc(leaderMigrator.FilterFunc, leadermigration.ControllerMigrated)) }, OnStoppedLeading: func() { klog.Fatalf("migration leaderelection lost") @@ -368,6 +369,12 @@ func (c ControllerContext) IsControllerEnabled(name string) bool { // The bool indicates whether the controller was enabled. type InitFunc func(ctx ControllerContext) (debuggingHandler http.Handler, enabled bool, err error) +// ControllerInitializersFunc is used to create a collection of initializers +// given the loopMode. +type ControllerInitializersFunc func(loopMode ControllerLoopMode) (initializers map[string]InitFunc) + +var _ ControllerInitializersFunc = NewControllerInitializers + // KnownControllers returns all known controllers's name func KnownControllers() []string { ret := sets.StringKeySet(NewControllerInitializers(IncludeCloudLoops)) @@ -688,18 +695,16 @@ func leaderElectAndRun(c *config.CompletedConfig, lockIdentity string, electionC panic("unreachable") } -// filterInitializers returns initializers that has filterFunc(name) == expected. -// filterFunc can be nil, in which case the original initializers will be returned directly. -// InitFunc is local to kube-controller-manager, and thus filterInitializers has to be local too. -func filterInitializers(allInitializers map[string]InitFunc, filterFunc leadermigration.FilterFunc, expected leadermigration.FilterResult) map[string]InitFunc { - if filterFunc == nil { - return allInitializers - } - initializers := make(map[string]InitFunc) - for name, initFunc := range allInitializers { - if filterFunc(name) == expected { - initializers[name] = initFunc +// createInitializersFunc creates a initializersFunc that returns all initializer +// with expected as the result after filtering through filterFunc. +func createInitializersFunc(filterFunc leadermigration.FilterFunc, expected leadermigration.FilterResult) ControllerInitializersFunc { + return func(loopMode ControllerLoopMode) map[string]InitFunc { + initializers := make(map[string]InitFunc) + for name, initializer := range NewControllerInitializers(loopMode) { + if filterFunc(name) == expected { + initializers[name] = initializer + } } + return initializers } - return initializers }