From 28fffe71c469243912df266944ca702d7f1ba41f Mon Sep 17 00:00:00 2001 From: "Lubomir I. Ivanov" Date: Tue, 16 Dec 2025 19:09:01 +0100 Subject: [PATCH] kubeadm: always retry Patch() Node API calls The PatchNodeOnce function has historically exited early in scanarious when we Get a Node object, but the next Patch API call on the same Node object fails. This can happen in setups that are under a lot of resource pressure or different network timeout scenarious. Instead of exiting early and allow listing certain errors, always retry on any Patch error. This aligns with the general idea that kubeadm retries *all* API calls. --- cmd/kubeadm/app/util/apiclient/idempotency.go | 5 +- .../app/util/apiclient/idempotency_test.go | 48 ------------------- 2 files changed, 1 insertion(+), 52 deletions(-) diff --git a/cmd/kubeadm/app/util/apiclient/idempotency.go b/cmd/kubeadm/app/util/apiclient/idempotency.go index 1e97385d676..d02b1b9c406 100644 --- a/cmd/kubeadm/app/util/apiclient/idempotency.go +++ b/cmd/kubeadm/app/util/apiclient/idempotency.go @@ -192,10 +192,7 @@ func PatchNodeOnce(client clientset.Interface, nodeName string, patchFn func(*v1 if _, err := client.CoreV1().Nodes().Patch(ctx, n.Name, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{}); err != nil { *lastError = errors.Wrapf(err, "error patching Node %q", n.Name) - if apierrors.IsTimeout(err) || apierrors.IsConflict(err) || apierrors.IsServerTimeout(err) || apierrors.IsServiceUnavailable(err) { - return false, nil - } - return false, *lastError + return false, nil } return true, nil diff --git a/cmd/kubeadm/app/util/apiclient/idempotency_test.go b/cmd/kubeadm/app/util/apiclient/idempotency_test.go index 02a3a2ddcb5..a9fac6b5ab8 100644 --- a/cmd/kubeadm/app/util/apiclient/idempotency_test.go +++ b/cmd/kubeadm/app/util/apiclient/idempotency_test.go @@ -386,54 +386,6 @@ func TestPatchNodeOnce(t *testing.T) { }, success: false, }, - { - name: "patch node when timeout", - lookupName: "testnode", - node: v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "testnode", - Labels: map[string]string{v1.LabelHostname: ""}, - }, - }, - success: false, - fakeError: apierrors.NewTimeoutError("fake timeout", -1), - }, - { - name: "patch node when conflict", - lookupName: "testnode", - node: v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "testnode", - Labels: map[string]string{v1.LabelHostname: ""}, - }, - }, - success: false, - fakeError: apierrors.NewConflict(schema.GroupResource{}, "fake conflict", nil), - }, - { - name: "patch node when there is a server timeout", - lookupName: "testnode", - node: v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "testnode", - Labels: map[string]string{v1.LabelHostname: ""}, - }, - }, - success: false, - fakeError: apierrors.NewServerTimeout(schema.GroupResource{}, "fake server timeout", 1), - }, - { - name: "patch node when the service is unavailable", - lookupName: "testnode", - node: v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "testnode", - Labels: map[string]string{v1.LabelHostname: ""}, - }, - }, - success: false, - fakeError: apierrors.NewServiceUnavailable("fake service unavailable"), - }, { name: "patch node failed with unknown error", lookupName: "testnode",