Merge pull request #119661 from cartermckinnon/lease-leak-node-not-found

Handle errors when preparing lease for update
This commit is contained in:
Kubernetes Prow Robot 2023-08-21 11:09:23 -07:00 committed by GitHub
commit 92b7905143
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 33 additions and 15 deletions

View File

@ -65,9 +65,8 @@ type controller struct {
latestLease *coordinationv1.Lease
// newLeasePostProcessFunc allows customizing a lease object (e.g. setting OwnerReference)
// before every time the lease is created/refreshed(updated). Note that an error will block
// a lease CREATE, causing the controller to retry next time, but an error won't block a
// lease UPDATE.
// before every time the lease is created/refreshed(updated).
// Note that an error will block the lease operation.
newLeasePostProcessFunc ProcessLeaseFunc
}
@ -184,17 +183,21 @@ func (c *controller) ensureLease(ctx context.Context) (*coordinationv1.Lease, bo
// call this once you're sure the lease has been created
func (c *controller) retryUpdateLease(ctx context.Context, base *coordinationv1.Lease) error {
for i := 0; i < maxUpdateRetries; i++ {
leaseToUpdate, _ := c.newLease(base)
lease, err := c.leaseClient.Update(ctx, leaseToUpdate, metav1.UpdateOptions{})
if err == nil {
c.latestLease = lease
return nil
}
klog.FromContext(ctx).Error(err, "Failed to update lease")
// OptimisticLockError requires getting the newer version of lease to proceed.
if apierrors.IsConflict(err) {
base, _ = c.backoffEnsureLease(ctx)
continue
leaseToUpdate, err := c.newLease(base)
if err != nil {
klog.FromContext(ctx).Error(err, "Failed to prepare lease")
} else {
lease, err := c.leaseClient.Update(ctx, leaseToUpdate, metav1.UpdateOptions{})
if err == nil {
c.latestLease = lease
return nil
}
klog.FromContext(ctx).Error(err, "Failed to update lease")
// OptimisticLockError requires getting the newer version of lease to proceed.
if apierrors.IsConflict(err) {
base, _ = c.backoffEnsureLease(ctx)
continue
}
}
if i > 0 && c.onRepeatedHeartbeatFailure != nil {
c.onRepeatedHeartbeatFailure()

View File

@ -227,6 +227,7 @@ func TestRetryUpdateNodeLease(t *testing.T) {
getReactor func(action clienttesting.Action) (bool, runtime.Object, error)
onRepeatedHeartbeatFailure func()
expectErr bool
client *fake.Clientset
}{
{
desc: "no errors",
@ -236,6 +237,7 @@ func TestRetryUpdateNodeLease(t *testing.T) {
getReactor: nil,
onRepeatedHeartbeatFailure: nil,
expectErr: false,
client: fake.NewSimpleClientset(node),
},
{
desc: "connection errors",
@ -245,6 +247,7 @@ func TestRetryUpdateNodeLease(t *testing.T) {
getReactor: nil,
onRepeatedHeartbeatFailure: nil,
expectErr: true,
client: fake.NewSimpleClientset(node),
},
{
desc: "optimistic lock errors",
@ -267,12 +270,24 @@ func TestRetryUpdateNodeLease(t *testing.T) {
},
onRepeatedHeartbeatFailure: func() { t.Fatalf("onRepeatedHeartbeatFailure called") },
expectErr: false,
client: fake.NewSimpleClientset(node),
},
{
desc: "node not found errors",
updateReactor: func(action clienttesting.Action) (bool, runtime.Object, error) {
t.Fatalf("lease was updated when node does not exist!")
return true, nil, nil
},
getReactor: nil,
onRepeatedHeartbeatFailure: nil,
expectErr: true,
client: fake.NewSimpleClientset(),
},
}
for _, tc := range cases {
t.Run(tc.desc, func(t *testing.T) {
_, ctx := ktesting.NewTestContext(t)
cl := fake.NewSimpleClientset(node)
cl := tc.client
if tc.updateReactor != nil {
cl.PrependReactor("update", "leases", tc.updateReactor)
}