kubeadm: always retry Patch() Node API calls

The PatchNodeOnce function has historically exited early
in scanarious when we Get a Node object, but the next Patch
API call on the same Node object fails. This can happen
in setups that are under a lot of resource pressure
or different network timeout scenarious.

Instead of exiting early and allow listing certain errors,
always retry on any Patch error. This aligns with the
general idea that kubeadm retries *all* API calls.
This commit is contained in:
Lubomir I. Ivanov
2025-12-16 19:09:01 +01:00
parent 39a3f49b80
commit 28fffe71c4
2 changed files with 1 additions and 52 deletions

View File

@@ -192,10 +192,7 @@ func PatchNodeOnce(client clientset.Interface, nodeName string, patchFn func(*v1
if _, err := client.CoreV1().Nodes().Patch(ctx, n.Name, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{}); err != nil {
*lastError = errors.Wrapf(err, "error patching Node %q", n.Name)
if apierrors.IsTimeout(err) || apierrors.IsConflict(err) || apierrors.IsServerTimeout(err) || apierrors.IsServiceUnavailable(err) {
return false, nil
}
return false, *lastError
return false, nil
}
return true, nil

View File

@@ -386,54 +386,6 @@ func TestPatchNodeOnce(t *testing.T) {
},
success: false,
},
{
name: "patch node when timeout",
lookupName: "testnode",
node: v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "testnode",
Labels: map[string]string{v1.LabelHostname: ""},
},
},
success: false,
fakeError: apierrors.NewTimeoutError("fake timeout", -1),
},
{
name: "patch node when conflict",
lookupName: "testnode",
node: v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "testnode",
Labels: map[string]string{v1.LabelHostname: ""},
},
},
success: false,
fakeError: apierrors.NewConflict(schema.GroupResource{}, "fake conflict", nil),
},
{
name: "patch node when there is a server timeout",
lookupName: "testnode",
node: v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "testnode",
Labels: map[string]string{v1.LabelHostname: ""},
},
},
success: false,
fakeError: apierrors.NewServerTimeout(schema.GroupResource{}, "fake server timeout", 1),
},
{
name: "patch node when the service is unavailable",
lookupName: "testnode",
node: v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "testnode",
Labels: map[string]string{v1.LabelHostname: ""},
},
},
success: false,
fakeError: apierrors.NewServiceUnavailable("fake service unavailable"),
},
{
name: "patch node failed with unknown error",
lookupName: "testnode",