From 79cc4033c791cf0da7e9f57f3ab3dac2a3a8664e Mon Sep 17 00:00:00 2001 From: Mikhail Mazurskiy Date: Mon, 12 Feb 2018 20:48:53 +1100 Subject: [PATCH 1/3] Cancellable leader election with channels Kubernetes-commit: 1d99fff1acb1503755b94d4c72e6dedd35c2d249 --- tools/leaderelection/leaderelection.go | 79 +++++++++++++++++++------- 1 file changed, 58 insertions(+), 21 deletions(-) diff --git a/tools/leaderelection/leaderelection.go b/tools/leaderelection/leaderelection.go index aed55574..cf56069d 100644 --- a/tools/leaderelection/leaderelection.go +++ b/tools/leaderelection/leaderelection.go @@ -51,6 +51,7 @@ package leaderelection import ( "fmt" "reflect" + "sync" "time" "k8s.io/apimachinery/pkg/api/errors" @@ -145,26 +146,28 @@ type LeaderElector struct { } // Run starts the leader election loop -func (le *LeaderElector) Run() { +func (le *LeaderElector) Run(stop <-chan struct{}) { defer func() { runtime.HandleCrash() le.config.Callbacks.OnStoppedLeading() }() - le.acquire() - stop := make(chan struct{}) - go le.config.Callbacks.OnStartedLeading(stop) - le.renew() - close(stop) + if !le.acquire(stop) { + return // stop signalled done + } + internalStop := make(chan struct{}) + defer close(internalStop) + go le.config.Callbacks.OnStartedLeading(internalStop) + le.renew(stop) } // RunOrDie starts a client with the provided config or panics if the config // fails to validate. -func RunOrDie(lec LeaderElectionConfig) { +func RunOrDie(stop <-chan struct{}, lec LeaderElectionConfig) { le, err := NewLeaderElector(lec) if err != nil { panic(err) } - le.Run() + le.Run(stop) } // GetLeader returns the identity of the last observed leader or returns the empty string if @@ -178,13 +181,23 @@ func (le *LeaderElector) IsLeader() bool { return le.observedRecord.HolderIdentity == le.config.Lock.Identity() } -// acquire loops calling tryAcquireOrRenew and returns immediately when tryAcquireOrRenew succeeds. -func (le *LeaderElector) acquire() { - stop := make(chan struct{}) +// acquire loops calling tryAcquireOrRenew and returns true immediately when tryAcquireOrRenew succeeds. +// Returns false if stop signals done. +func (le *LeaderElector) acquire(stop <-chan struct{}) bool { + tmpStop := make(chan struct{}) + once := sync.Once{} + go func() { + select { + case <-stop: + once.Do(func() { close(tmpStop) }) + case <-tmpStop: + } + }() + succeeded := false desc := le.config.Lock.Describe() glog.Infof("attempting to acquire leader lease %v...", desc) wait.JitterUntil(func() { - succeeded := le.tryAcquireOrRenew() + succeeded = le.tryAcquireOrRenew() le.maybeReportTransition() if !succeeded { glog.V(4).Infof("failed to acquire lease %v", desc) @@ -192,17 +205,41 @@ func (le *LeaderElector) acquire() { } le.config.Lock.RecordEvent("became leader") glog.Infof("successfully acquired lease %v", desc) - close(stop) - }, le.config.RetryPeriod, JitterFactor, true, stop) + once.Do(func() { close(tmpStop) }) + }, le.config.RetryPeriod, JitterFactor, true, tmpStop) + return succeeded } -// renew loops calling tryAcquireOrRenew and returns immediately when tryAcquireOrRenew fails. -func (le *LeaderElector) renew() { - stop := make(chan struct{}) +// renew loops calling tryAcquireOrRenew and returns immediately when tryAcquireOrRenew fails or ctx signals done. +func (le *LeaderElector) renew(stop <-chan struct{}) { + tmpStop := make(chan struct{}) + once := sync.Once{} + go func() { + select { + case <-stop: + once.Do(func() { close(tmpStop) }) + case <-tmpStop: + } + }() wait.Until(func() { - err := wait.Poll(le.config.RetryPeriod, le.config.RenewDeadline, func() (bool, error) { + // PollUntil() sleeps for "interval" duration before calling the function so we need to increase the timeout by le.config.RetryPeriod + t := time.NewTimer(le.config.RetryPeriod + le.config.RenewDeadline) + defer t.Stop() + internalStop := make(chan struct{}) + internalOnce := sync.Once{} + defer internalOnce.Do(func() { close(internalStop) }) + go func() { + select { + case <-tmpStop: + internalOnce.Do(func() { close(internalStop) }) + case <-t.C: + internalOnce.Do(func() { close(internalStop) }) + case <-internalStop: + } + }() + err := wait.PollUntil(le.config.RetryPeriod, func() (bool, error) { return le.tryAcquireOrRenew(), nil - }) + }, internalStop) le.maybeReportTransition() desc := le.config.Lock.Describe() if err == nil { @@ -211,8 +248,8 @@ func (le *LeaderElector) renew() { } le.config.Lock.RecordEvent("stopped leading") glog.Infof("failed to renew lease %v: %v", desc, err) - close(stop) - }, 0, stop) + once.Do(func() { close(tmpStop) }) + }, 0, tmpStop) } // tryAcquireOrRenew tries to acquire a leader lease if it is not already acquired, From ad39df114e7a8863546b7114c8541c9dd2efbec5 Mon Sep 17 00:00:00 2001 From: Mikhail Mazurskiy Date: Mon, 12 Feb 2018 21:02:56 +1100 Subject: [PATCH 2/3] Cancellable leader election with context Kubernetes-commit: dc32a341c01ec122f54604e9fdbdf9b77d2e19e3 --- tools/leaderelection/leaderelection.go | 76 +++++++++----------------- 1 file changed, 25 insertions(+), 51 deletions(-) diff --git a/tools/leaderelection/leaderelection.go b/tools/leaderelection/leaderelection.go index cf56069d..63c29189 100644 --- a/tools/leaderelection/leaderelection.go +++ b/tools/leaderelection/leaderelection.go @@ -49,9 +49,9 @@ limitations under the License. package leaderelection import ( + "context" "fmt" "reflect" - "sync" "time" "k8s.io/apimachinery/pkg/api/errors" @@ -120,7 +120,7 @@ type LeaderElectionConfig struct { // * OnChallenge() type LeaderCallbacks struct { // OnStartedLeading is called when a LeaderElector client starts leading - OnStartedLeading func(stop <-chan struct{}) + OnStartedLeading func(context.Context) // OnStoppedLeading is called when a LeaderElector client stops leading OnStoppedLeading func() // OnNewLeader is called when the client observes a leader that is @@ -146,28 +146,28 @@ type LeaderElector struct { } // Run starts the leader election loop -func (le *LeaderElector) Run(stop <-chan struct{}) { +func (le *LeaderElector) Run(ctx context.Context) { defer func() { runtime.HandleCrash() le.config.Callbacks.OnStoppedLeading() }() - if !le.acquire(stop) { - return // stop signalled done + if !le.acquire(ctx) { + return // ctx signalled done } - internalStop := make(chan struct{}) - defer close(internalStop) - go le.config.Callbacks.OnStartedLeading(internalStop) - le.renew(stop) + ctx, cancel := context.WithCancel(ctx) + defer cancel() + go le.config.Callbacks.OnStartedLeading(ctx) + le.renew(ctx) } // RunOrDie starts a client with the provided config or panics if the config // fails to validate. -func RunOrDie(stop <-chan struct{}, lec LeaderElectionConfig) { +func RunOrDie(ctx context.Context, lec LeaderElectionConfig) { le, err := NewLeaderElector(lec) if err != nil { panic(err) } - le.Run(stop) + le.Run(ctx) } // GetLeader returns the identity of the last observed leader or returns the empty string if @@ -182,17 +182,10 @@ func (le *LeaderElector) IsLeader() bool { } // acquire loops calling tryAcquireOrRenew and returns true immediately when tryAcquireOrRenew succeeds. -// Returns false if stop signals done. -func (le *LeaderElector) acquire(stop <-chan struct{}) bool { - tmpStop := make(chan struct{}) - once := sync.Once{} - go func() { - select { - case <-stop: - once.Do(func() { close(tmpStop) }) - case <-tmpStop: - } - }() +// Returns false if ctx signals done. +func (le *LeaderElector) acquire(ctx context.Context) bool { + ctx, cancel := context.WithCancel(ctx) + defer cancel() succeeded := false desc := le.config.Lock.Describe() glog.Infof("attempting to acquire leader lease %v...", desc) @@ -205,41 +198,22 @@ func (le *LeaderElector) acquire(stop <-chan struct{}) bool { } le.config.Lock.RecordEvent("became leader") glog.Infof("successfully acquired lease %v", desc) - once.Do(func() { close(tmpStop) }) - }, le.config.RetryPeriod, JitterFactor, true, tmpStop) + cancel() + }, le.config.RetryPeriod, JitterFactor, true, ctx.Done()) return succeeded } // renew loops calling tryAcquireOrRenew and returns immediately when tryAcquireOrRenew fails or ctx signals done. -func (le *LeaderElector) renew(stop <-chan struct{}) { - tmpStop := make(chan struct{}) - once := sync.Once{} - go func() { - select { - case <-stop: - once.Do(func() { close(tmpStop) }) - case <-tmpStop: - } - }() +func (le *LeaderElector) renew(ctx context.Context) { + ctx, cancel := context.WithCancel(ctx) + defer cancel() wait.Until(func() { // PollUntil() sleeps for "interval" duration before calling the function so we need to increase the timeout by le.config.RetryPeriod - t := time.NewTimer(le.config.RetryPeriod + le.config.RenewDeadline) - defer t.Stop() - internalStop := make(chan struct{}) - internalOnce := sync.Once{} - defer internalOnce.Do(func() { close(internalStop) }) - go func() { - select { - case <-tmpStop: - internalOnce.Do(func() { close(internalStop) }) - case <-t.C: - internalOnce.Do(func() { close(internalStop) }) - case <-internalStop: - } - }() + timeoutCtx, timeoutCancel := context.WithTimeout(ctx, le.config.RetryPeriod+le.config.RenewDeadline) + defer timeoutCancel() err := wait.PollUntil(le.config.RetryPeriod, func() (bool, error) { return le.tryAcquireOrRenew(), nil - }, internalStop) + }, timeoutCtx.Done()) le.maybeReportTransition() desc := le.config.Lock.Describe() if err == nil { @@ -248,8 +222,8 @@ func (le *LeaderElector) renew(stop <-chan struct{}) { } le.config.Lock.RecordEvent("stopped leading") glog.Infof("failed to renew lease %v: %v", desc, err) - once.Do(func() { close(tmpStop) }) - }, 0, tmpStop) + cancel() + }, 0, ctx.Done()) } // tryAcquireOrRenew tries to acquire a leader lease if it is not already acquired, From f6745953479db3e3c620dbac668e27aceb3c8f63 Mon Sep 17 00:00:00 2001 From: Mikhail Mazurskiy Date: Thu, 7 Jun 2018 14:33:03 +1000 Subject: [PATCH 3/3] Use context.TODO() to be explicit that cancellation is not implemented Kubernetes-commit: 102090d1f12f9b00571d440470ba040e0632fbe0 --- tools/leaderelection/leaderelection.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/leaderelection/leaderelection.go b/tools/leaderelection/leaderelection.go index 63c29189..233dae53 100644 --- a/tools/leaderelection/leaderelection.go +++ b/tools/leaderelection/leaderelection.go @@ -208,10 +208,9 @@ func (le *LeaderElector) renew(ctx context.Context) { ctx, cancel := context.WithCancel(ctx) defer cancel() wait.Until(func() { - // PollUntil() sleeps for "interval" duration before calling the function so we need to increase the timeout by le.config.RetryPeriod - timeoutCtx, timeoutCancel := context.WithTimeout(ctx, le.config.RetryPeriod+le.config.RenewDeadline) + timeoutCtx, timeoutCancel := context.WithTimeout(ctx, le.config.RenewDeadline) defer timeoutCancel() - err := wait.PollUntil(le.config.RetryPeriod, func() (bool, error) { + err := wait.PollImmediateUntil(le.config.RetryPeriod, func() (bool, error) { return le.tryAcquireOrRenew(), nil }, timeoutCtx.Done()) le.maybeReportTransition()