diff --git a/pkg/kubelet/nodelease/controller.go b/pkg/kubelet/nodelease/controller.go index 2ac6bfcf92b..8d71e08d72d 100644 --- a/pkg/kubelet/nodelease/controller.go +++ b/pkg/kubelet/nodelease/controller.go @@ -57,6 +57,9 @@ type controller struct { renewInterval time.Duration clock clock.Clock onRepeatedHeartbeatFailure func() + + // latestLease is the latest node lease which Kubelet updated or created + latestLease *coordinationv1.Lease } // NewController constructs and returns a controller @@ -87,7 +90,25 @@ func (c *controller) Run(stopCh <-chan struct{}) { } func (c *controller) sync() { + if c.latestLease != nil { + // As long as node lease is not (or very rarely) updated by any other agent than Kubelet, + // we can optimistically assume it didn't change since our last update and try updating + // based on the version from that time. Thanks to it we avoid GET call and reduce load + // on etcd and kube-apiserver. + // If at some point other agents will also be frequently updating the Lease object, this + // can result in performance degradation, because we will end up with calling additional + // GET/PUT - at this point this whole "if" should be removed. + lease, err := c.leaseClient.Update(c.newLease(c.latestLease)) + if err == nil { + c.latestLease = lease + return + } + + klog.Infof("failed to update lease using latest lease, fallback to ensure lease, err: %v", err) + } + lease, created := c.backoffEnsureLease() + c.latestLease = lease // we don't need to update the lease if we just created it if !created { if err := c.retryUpdateLease(lease); err != nil { @@ -143,8 +164,9 @@ func (c *controller) ensureLease() (*coordinationv1.Lease, bool, error) { // call this once you're sure the lease has been created func (c *controller) retryUpdateLease(base *coordinationv1.Lease) error { for i := 0; i < maxUpdateRetries; i++ { - _, err := c.leaseClient.Update(c.newLease(base)) + lease, err := c.leaseClient.Update(c.newLease(base)) if err == nil { + c.latestLease = lease return nil } klog.Errorf("failed to update node lease, error: %v", err) diff --git a/pkg/kubelet/nodelease/controller_test.go b/pkg/kubelet/nodelease/controller_test.go index 6e43ea0daae..a2eaea5ac11 100644 --- a/pkg/kubelet/nodelease/controller_test.go +++ b/pkg/kubelet/nodelease/controller_test.go @@ -17,6 +17,7 @@ limitations under the License. package nodelease import ( + "errors" "fmt" "testing" "time" @@ -282,3 +283,118 @@ func TestRetryUpdateLease(t *testing.T) { }) } } + +func TestUpdateUsingLatestLease(t *testing.T) { + nodeName := "foo" + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: nodeName, + UID: types.UID("foo-uid"), + }, + } + + notFoundErr := apierrors.NewNotFound(coordinationv1.Resource("lease"), nodeName) + internalErr := apierrors.NewInternalError(errors.New("unreachable code")) + + makeLease := func(name, resourceVersion string) *coordinationv1.Lease { + return &coordinationv1.Lease{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: corev1.NamespaceNodeLease, + Name: name, + ResourceVersion: resourceVersion, + }, + } + } + + cases := []struct { + desc string + latestLease *coordinationv1.Lease + updateReactor func(action clienttesting.Action) (bool, runtime.Object, error) + getReactor func(action clienttesting.Action) (bool, runtime.Object, error) + createReactor func(action clienttesting.Action) (bool, runtime.Object, error) + expectLeaseResourceVersion string + }{ + { + desc: "latestLease is nil and need to create", + latestLease: nil, + updateReactor: nil, + getReactor: func(action clienttesting.Action) (bool, runtime.Object, error) { + return true, nil, notFoundErr + }, + createReactor: func(action clienttesting.Action) (bool, runtime.Object, error) { + return true, makeLease(nodeName, "1"), nil + }, + expectLeaseResourceVersion: "1", + }, + { + desc: "latestLease is nil and need to update", + latestLease: nil, + updateReactor: func(action clienttesting.Action) (bool, runtime.Object, error) { + return true, makeLease(nodeName, "2"), nil + }, + getReactor: func(action clienttesting.Action) (bool, runtime.Object, error) { + return true, makeLease(nodeName, "1"), nil + }, + expectLeaseResourceVersion: "2", + }, + { + desc: "latestLease exist and need to update", + latestLease: makeLease(nodeName, "1"), + updateReactor: func(action clienttesting.Action) (bool, runtime.Object, error) { + return true, makeLease(nodeName, "2"), nil + }, + expectLeaseResourceVersion: "2", + }, + { + desc: "update with latest lease failed", + latestLease: makeLease(nodeName, "1"), + updateReactor: func() func(action clienttesting.Action) (bool, runtime.Object, error) { + i := 0 + return func(action clienttesting.Action) (bool, runtime.Object, error) { + i++ + switch i { + case 1: + return true, nil, notFoundErr + case 2: + return true, makeLease(nodeName, "3"), nil + default: + t.Fatalf("unexpect call update lease") + return true, nil, internalErr + } + } + }(), + getReactor: func(action clienttesting.Action) (bool, runtime.Object, error) { + return true, makeLease(nodeName, "2"), nil + }, + expectLeaseResourceVersion: "3", + }, + } + for _, tc := range cases { + t.Run(tc.desc, func(t *testing.T) { + cl := fake.NewSimpleClientset(node) + if tc.updateReactor != nil { + cl.PrependReactor("update", "leases", tc.updateReactor) + } + if tc.getReactor != nil { + cl.PrependReactor("get", "leases", tc.getReactor) + } + if tc.createReactor != nil { + cl.PrependReactor("create", "leases", tc.createReactor) + } + c := &controller{ + clock: clock.NewFakeClock(time.Now()), + client: cl, + leaseClient: cl.CoordinationV1().Leases(corev1.NamespaceNodeLease), + holderIdentity: node.Name, + leaseDurationSeconds: 10, + latestLease: tc.latestLease, + } + + c.sync() + + if tc.expectLeaseResourceVersion != c.latestLease.ResourceVersion { + t.Fatalf("latestLease RV got %v, expected %v", c.latestLease.ResourceVersion, tc.expectLeaseResourceVersion) + } + }) + } +}