From 500bfe00cdbec423ebb23467d3d49967d4f8ea8f Mon Sep 17 00:00:00 2001 From: Keisuke Ishigami Date: Thu, 10 Jul 2025 02:59:21 +0900 Subject: [PATCH 1/5] Prevent the failure of releasing the lock by updating the resource version in case of a resource conflict Kubernetes-commit: 271233a62aeab02eb0f7a6567d3ddba6d09a4f21 --- tools/leaderelection/leaderelection.go | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/tools/leaderelection/leaderelection.go b/tools/leaderelection/leaderelection.go index c3c1d9be..35afc372 100644 --- a/tools/leaderelection/leaderelection.go +++ b/tools/leaderelection/leaderelection.go @@ -306,17 +306,37 @@ func (le *LeaderElector) renew(ctx context.Context) { // release attempts to release the leader lease if we have acquired it. func (le *LeaderElector) release() bool { + ctx := context.Background() + + // 1. obtain the electionRecord + oldLeaderElectionRecord, oldLeaderElectionRawRecord, err := le.config.Lock.Get(ctx) + if err != nil { + if !errors.IsNotFound(err) { + klog.Errorf("error retrieving resource lock %v: %v", le.config.Lock.Describe(), err) + return false + } + klog.Infof("lease lock not found: %v", le.config.Lock.Describe()) + return false + } + // 2. Record obtained, check the Identity & Time + if !bytes.Equal(le.observedRawRecord, oldLeaderElectionRawRecord) { + le.setObservedRecord(oldLeaderElectionRecord) + + le.observedRawRecord = oldLeaderElectionRawRecord + } + if !le.IsLeader() { return true } now := metav1.NewTime(le.clock.Now()) leaderElectionRecord := rl.LeaderElectionRecord{ - LeaderTransitions: le.observedRecord.LeaderTransitions, + LeaderTransitions: oldLeaderElectionRecord.LeaderTransitions, LeaseDurationSeconds: 1, RenewTime: now, AcquireTime: now, } - timeoutCtx, timeoutCancel := context.WithTimeout(context.Background(), le.config.RenewDeadline) + + timeoutCtx, timeoutCancel := context.WithTimeout(ctx, le.config.RenewDeadline) defer timeoutCancel() if err := le.config.Lock.Update(timeoutCtx, leaderElectionRecord); err != nil { klog.Errorf("Failed to release lock: %v", err) From b62c1cc29f6cf1b5076c84e05849973031afd341 Mon Sep 17 00:00:00 2001 From: Keisuke Ishigami Date: Mon, 14 Jul 2025 23:17:19 +0900 Subject: [PATCH 2/5] add test which checks release method calls Get Kubernetes-commit: f20be4c094cdc037e54d755dcd6292991cee701d --- tools/leaderelection/leaderelection_test.go | 53 +++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/tools/leaderelection/leaderelection_test.go b/tools/leaderelection/leaderelection_test.go index 211f11df..8d692b90 100644 --- a/tools/leaderelection/leaderelection_test.go +++ b/tools/leaderelection/leaderelection_test.go @@ -677,6 +677,59 @@ func TestReleaseLeaseLeases(t *testing.T) { testReleaseLease(t, "leases") } +// TestReleaseMethodCallsGet test release method calls Get +func TestReleaseMethodCallsGet(t *testing.T) { + objectType := "leases" + getCalled := false + + lockMeta := metav1.ObjectMeta{Namespace: "foo", Name: "bar"} + recorder := record.NewFakeRecorder(100) + resourceLockConfig := rl.ResourceLockConfig{ + Identity: "baz", + EventRecorder: recorder, + } + c := &fake.Clientset{} + c.AddReactor("get", objectType, func(action fakeclient.Action) (bool, runtime.Object, error) { + // flag to check if Get is called + getCalled = true + return true, createLockObject(t, objectType, action.GetNamespace(), action.(fakeclient.GetAction).GetName(), &rl.LeaderElectionRecord{ + HolderIdentity: "baz", + LeaseDurationSeconds: 10, + }), nil + }) + c.AddReactor("update", objectType, func(action fakeclient.Action) (bool, runtime.Object, error) { + return true, action.(fakeclient.UpdateAction).GetObject(), nil + }) + + lock := &rl.LeaseLock{ + LeaseMeta: lockMeta, + LockConfig: resourceLockConfig, + Client: c.CoordinationV1(), + } + lec := LeaderElectionConfig{ + Lock: lock, + LeaseDuration: 10 * time.Second, + Callbacks: LeaderCallbacks{ + OnNewLeader: func(l string) {}, + }, + } + observedRawRecord := GetRawRecordOrDie(t, objectType, rl.LeaderElectionRecord{HolderIdentity: "baz"}) + le := &LeaderElector{ + config: lec, + observedRecord: rl.LeaderElectionRecord{HolderIdentity: "baz"}, + observedRawRecord: observedRawRecord, + observedTime: time.Now(), + clock: clock.RealClock{}, + metrics: globalMetricsFactory.newLeaderMetrics(), + } + + le.release() + + if !getCalled { + t.Errorf("release method does not call Get") + } +} + func TestReleaseOnCancellation_Leases(t *testing.T) { testReleaseOnCancellation(t, "leases") } From 3633ea23d32da9dfa4119ab7774313a3ea3e062c Mon Sep 17 00:00:00 2001 From: Keisuke Ishigami Date: Tue, 15 Jul 2025 01:07:25 +0900 Subject: [PATCH 3/5] chore Kubernetes-commit: 09ec6f744833fb72a319013ae25cf0286cdd125e --- tools/leaderelection/leaderelection.go | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/tools/leaderelection/leaderelection.go b/tools/leaderelection/leaderelection.go index 35afc372..f7d9f882 100644 --- a/tools/leaderelection/leaderelection.go +++ b/tools/leaderelection/leaderelection.go @@ -308,8 +308,8 @@ func (le *LeaderElector) renew(ctx context.Context) { func (le *LeaderElector) release() bool { ctx := context.Background() - // 1. obtain the electionRecord - oldLeaderElectionRecord, oldLeaderElectionRawRecord, err := le.config.Lock.Get(ctx) + // update the resourceVersion of lease + oldLeaderElectionRecord, _, err := le.config.Lock.Get(ctx) if err != nil { if !errors.IsNotFound(err) { klog.Errorf("error retrieving resource lock %v: %v", le.config.Lock.Describe(), err) @@ -318,12 +318,6 @@ func (le *LeaderElector) release() bool { klog.Infof("lease lock not found: %v", le.config.Lock.Describe()) return false } - // 2. Record obtained, check the Identity & Time - if !bytes.Equal(le.observedRawRecord, oldLeaderElectionRawRecord) { - le.setObservedRecord(oldLeaderElectionRecord) - - le.observedRawRecord = oldLeaderElectionRawRecord - } if !le.IsLeader() { return true From be4c8bc00266129880a1841bca5b92a432753d4b Mon Sep 17 00:00:00 2001 From: Keisuke Ishigami Date: Tue, 15 Jul 2025 01:07:43 +0900 Subject: [PATCH 4/5] fix test Kubernetes-commit: 9a7dddf5a713e109418f3726514fdf60246ada3b --- tools/leaderelection/leaderelection_test.go | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/tools/leaderelection/leaderelection_test.go b/tools/leaderelection/leaderelection_test.go index 8d692b90..eff46177 100644 --- a/tools/leaderelection/leaderelection_test.go +++ b/tools/leaderelection/leaderelection_test.go @@ -557,7 +557,7 @@ func testReleaseLease(t *testing.T, objectType string) { verb: "get", objectType: objectType, reaction: func(action fakeclient.Action) (handled bool, ret runtime.Object, err error) { - return true, nil, errors.NewNotFound(action.(fakeclient.GetAction).GetResource().GroupResource(), action.(fakeclient.GetAction).GetName()) + return true, createLockObject(t, objectType, action.GetNamespace(), action.(fakeclient.GetAction).GetName(), &rl.LeaderElectionRecord{HolderIdentity: "baz"}), nil }, }, { @@ -574,6 +574,13 @@ func testReleaseLease(t *testing.T, objectType string) { return true, action.(fakeclient.UpdateAction).GetObject(), nil }, }, + { + verb: "get", + objectType: objectType, + reaction: func(action fakeclient.Action) (handled bool, ret runtime.Object, err error) { + return true, nil, errors.NewNotFound(action.(fakeclient.GetAction).GetResource().GroupResource(), action.(fakeclient.GetAction).GetName()) + }, + }, }, expectSuccess: true, outHolder: "", @@ -844,9 +851,11 @@ func testReleaseOnCancellation(t *testing.T, objectType string) { if lockObj != nil { // Third and more get (first create, second renew) should return our canceled error // FakeClient doesn't do anything with the context so we're doing this ourselves - if gets >= 3 { - close(onRenewCalled) - <-onRenewResume + if gets >= 4 { + if gets == 4 { + close(onRenewCalled) + <-onRenewResume + } return true, nil, context.Canceled } return true, lockObj, nil From e36950929bf7595ba8f3adac31ee1b43d6b32a4e Mon Sep 17 00:00:00 2001 From: Keisuke Ishigami Date: Thu, 17 Jul 2025 22:33:32 +0900 Subject: [PATCH 5/5] set the timeout to Get method Kubernetes-commit: 300c7b815a2eb9598c53c0b386715650f5052804 --- tools/leaderelection/leaderelection.go | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tools/leaderelection/leaderelection.go b/tools/leaderelection/leaderelection.go index f7d9f882..07180630 100644 --- a/tools/leaderelection/leaderelection.go +++ b/tools/leaderelection/leaderelection.go @@ -307,9 +307,10 @@ func (le *LeaderElector) renew(ctx context.Context) { // release attempts to release the leader lease if we have acquired it. func (le *LeaderElector) release() bool { ctx := context.Background() - + timeoutCtx, timeoutCancel := context.WithTimeout(ctx, le.config.RenewDeadline) + defer timeoutCancel() // update the resourceVersion of lease - oldLeaderElectionRecord, _, err := le.config.Lock.Get(ctx) + oldLeaderElectionRecord, _, err := le.config.Lock.Get(timeoutCtx) if err != nil { if !errors.IsNotFound(err) { klog.Errorf("error retrieving resource lock %v: %v", le.config.Lock.Describe(), err) @@ -329,9 +330,6 @@ func (le *LeaderElector) release() bool { RenewTime: now, AcquireTime: now, } - - timeoutCtx, timeoutCancel := context.WithTimeout(ctx, le.config.RenewDeadline) - defer timeoutCancel() if err := le.config.Lock.Update(timeoutCtx, leaderElectionRecord); err != nil { klog.Errorf("Failed to release lock: %v", err) return false