From 4e36355c5de4733994637dfa3e176caeed765aa1 Mon Sep 17 00:00:00 2001 From: "Lubomir I. Ivanov" Date: Tue, 16 Dec 2025 19:09:01 +0100 Subject: [PATCH] kubeadm: always retry Patch() Node API calls The PatchNodeOnce function has historically exited early in scanarious when we Get a Node object, but the next Patch API call on the same Node object fails. This can happen in setups that are under a lot of resource pressure or different network timeout scenarious. Instead of exiting early and allow listing certain errors, always retry on any Patch error. This aligns with the general idea that kubeadm retries *all* API calls. --- cmd/kubeadm/app/util/apiclient/idempotency.go | 5 +- .../app/util/apiclient/idempotency_test.go | 48 ------------------- 2 files changed, 1 insertion(+), 52 deletions(-) diff --git a/cmd/kubeadm/app/util/apiclient/idempotency.go b/cmd/kubeadm/app/util/apiclient/idempotency.go index e524e47c416..3ece15582a7 100644 --- a/cmd/kubeadm/app/util/apiclient/idempotency.go +++ b/cmd/kubeadm/app/util/apiclient/idempotency.go @@ -193,10 +193,7 @@ func PatchNodeOnce(client clientset.Interface, nodeName string, patchFn func(*v1 if _, err := client.CoreV1().Nodes().Patch(ctx, n.Name, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{}); err != nil { *lastError = errors.Wrapf(err, "error patching Node %q", n.Name) - if apierrors.IsTimeout(err) || apierrors.IsConflict(err) || apierrors.IsServerTimeout(err) || apierrors.IsServiceUnavailable(err) { - return false, nil - } - return false, *lastError + return false, nil } return true, nil diff --git a/cmd/kubeadm/app/util/apiclient/idempotency_test.go b/cmd/kubeadm/app/util/apiclient/idempotency_test.go index ec0caf78465..a7c349426cb 100644 --- a/cmd/kubeadm/app/util/apiclient/idempotency_test.go +++ b/cmd/kubeadm/app/util/apiclient/idempotency_test.go @@ -386,54 +386,6 @@ func TestPatchNodeOnce(t *testing.T) { }, success: false, }, - { - name: "patch node when timeout", - lookupName: "testnode", - node: v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "testnode", - Labels: map[string]string{v1.LabelHostname: ""}, - }, - }, - success: false, - fakeError: apierrors.NewTimeoutError("fake timeout", -1), - }, - { - name: "patch node when conflict", - lookupName: "testnode", - node: v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "testnode", - Labels: map[string]string{v1.LabelHostname: ""}, - }, - }, - success: false, - fakeError: apierrors.NewConflict(schema.GroupResource{}, "fake conflict", nil), - }, - { - name: "patch node when there is a server timeout", - lookupName: "testnode", - node: v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "testnode", - Labels: map[string]string{v1.LabelHostname: ""}, - }, - }, - success: false, - fakeError: apierrors.NewServerTimeout(schema.GroupResource{}, "fake server timeout", 1), - }, - { - name: "patch node when the service is unavailable", - lookupName: "testnode", - node: v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "testnode", - Labels: map[string]string{v1.LabelHostname: ""}, - }, - }, - success: false, - fakeError: apierrors.NewServiceUnavailable("fake service unavailable"), - }, { name: "patch node failed with unknown error", lookupName: "testnode",