From 83bb5d570580a3f477737fec5c24ba8fc3554264 Mon Sep 17 00:00:00 2001 From: Natasha Sarkar Date: Thu, 27 Mar 2025 09:56:40 -0500 Subject: [PATCH] deflake restart count assertions in in-place resize tests (#131055) --- test/e2e/common/node/pod_resize.go | 9 +++++---- test/e2e/framework/pod/resize.go | 18 ++++++++++++++++++ test/e2e/node/pod_resize.go | 6 ++++-- 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/test/e2e/common/node/pod_resize.go b/test/e2e/common/node/pod_resize.go index 1a05f90e9fa..8548a829227 100644 --- a/test/e2e/common/node/pod_resize.go +++ b/test/e2e/common/node/pod_resize.go @@ -1200,13 +1200,14 @@ func doPodResizeTests() { patchedPod, pErr = f.ClientSet.CoreV1().Pods(newPod.Namespace).Patch(ctx, newPod.Name, types.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") framework.ExpectNoError(pErr, fmt.Sprintf("failed to patch pod for %s", opStr)) + expected := e2epod.UpdateExpectedContainerRestarts(ctx, patchedPod, expectedContainers) ginkgo.By(fmt.Sprintf("verifying pod patched for %s", opStr)) - e2epod.VerifyPodResources(patchedPod, expectedContainers) + e2epod.VerifyPodResources(patchedPod, expected) ginkgo.By(fmt.Sprintf("waiting for %s to be actuated", opStr)) - resizedPod := e2epod.WaitForPodResizeActuation(ctx, f, podClient, newPod, expectedContainers) - e2epod.ExpectPodResized(ctx, f, resizedPod, expectedContainers) + resizedPod := e2epod.WaitForPodResizeActuation(ctx, f, podClient, newPod, expected) + e2epod.ExpectPodResized(ctx, f, resizedPod, expected) } patchAndVerify(tc.patchString, tc.expected, "resize") @@ -1219,7 +1220,7 @@ func doPodResizeTests() { gomega.Expect(c.Name).To(gomega.Equal(tc.expected[i].Name), "test case containers & expectations should be in the same order") // Resizes that trigger a restart should trigger a second restart when rolling back. - rollbackContainers[i].RestartCount = tc.expected[i].RestartCount * 2 + rollbackContainers[i].RestartCount = tc.expected[i].RestartCount } rbPatchStr, err := e2epod.ResizeContainerPatch(tc.containers) diff --git a/test/e2e/framework/pod/resize.go b/test/e2e/framework/pod/resize.go index 5a7afbded02..719ba6339cd 100644 --- a/test/e2e/framework/pod/resize.go +++ b/test/e2e/framework/pod/resize.go @@ -505,6 +505,24 @@ func ResizeContainerPatch(containers []ResizableContainerInfo) (string, error) { return string(patchBytes), nil } +// UpdateExpectedContainerRestarts updates the RestartCounts in expectedContainers by +// adding them to the existing RestartCounts in the containerStatuses of the provided pod. +// This reduces the flakiness of the RestartCount assertions by grabbing the current +// restart count right before the resize operation, and verify the expected increment (0 or 1) +// rather than the absolute count. +func UpdateExpectedContainerRestarts(ctx context.Context, pod *v1.Pod, expectedContainers []ResizableContainerInfo) []ResizableContainerInfo { + initialRestarts := make(map[string]int32) + newExpectedContainers := []ResizableContainerInfo{} + for _, ctr := range pod.Status.ContainerStatuses { + initialRestarts[ctr.Name] = ctr.RestartCount + } + for i, ctr := range expectedContainers { + newExpectedContainers = append(newExpectedContainers, expectedContainers[i]) + newExpectedContainers[i].RestartCount += initialRestarts[ctr.Name] + } + return newExpectedContainers +} + func formatErrors(err error) error { // Put each error on a new line for readability. var agg utilerrors.Aggregate diff --git a/test/e2e/node/pod_resize.go b/test/e2e/node/pod_resize.go index 4f39457ff25..ae072f71412 100644 --- a/test/e2e/node/pod_resize.go +++ b/test/e2e/node/pod_resize.go @@ -155,6 +155,7 @@ func doPodResizeAdmissionPluginsTests() { patchedPod, pErr := f.ClientSet.CoreV1().Pods(newPods[0].Namespace).Patch(ctx, newPods[0].Name, types.StrategicMergePatchType, []byte(patchString), metav1.PatchOptions{}, "resize") framework.ExpectNoError(pErr, "failed to patch pod for resize") + expected = e2epod.UpdateExpectedContainerRestarts(ctx, patchedPod, expected) ginkgo.By("verifying pod patched for resize within resource quota") e2epod.VerifyPodResources(patchedPod, expected) @@ -405,8 +406,9 @@ func doPodResizeSchedulerTests(f *framework.Framework) { ginkgo.By(fmt.Sprintf("TEST3: Verify pod '%s' is resized successfully after pod deletion '%s' and '%s", testPod1.Name, testPod2.Name, testPod3.Name)) expected := []e2epod.ResizableContainerInfo{ { - Name: "c1", - Resources: &e2epod.ContainerResources{CPUReq: testPod1CPUQuantity.String(), CPULim: testPod1CPUQuantity.String()}, + Name: "c1", + Resources: &e2epod.ContainerResources{CPUReq: testPod1CPUQuantity.String(), CPULim: testPod1CPUQuantity.String()}, + RestartCount: testPod1.Status.ContainerStatuses[0].RestartCount, }, } resizedPod := e2epod.WaitForPodResizeActuation(ctx, f, podClient, testPod1, expected)