From e03d0f60ef27a777de278bb8969acf6c134b68f3 Mon Sep 17 00:00:00 2001 From: Laura Lorenz Date: Tue, 12 Nov 2024 21:48:28 +0000 Subject: [PATCH] Orient tests to run faster, but tolerate infra slowdowns up to 5 minutes Signed-off-by: Laura Lorenz --- test/e2e_node/container_restart_test.go | 14 ++++++++------ test/e2e_node/image_pull_test.go | 9 ++++----- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/test/e2e_node/container_restart_test.go b/test/e2e_node/container_restart_test.go index b7b5eacc4d1..5a2d530f3f2 100644 --- a/test/e2e_node/container_restart_test.go +++ b/test/e2e_node/container_restart_test.go @@ -27,6 +27,7 @@ import ( "github.com/onsi/ginkgo/v2" "github.com/onsi/gomega" + "github.com/pkg/errors" kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" v1 "k8s.io/api/core/v1" @@ -59,7 +60,7 @@ var _ = SIGDescribe("Container Restart", feature.CriProxy, framework.WithSerial( ginkgo.It("Container restart backs off.", func(ctx context.Context) { // 0s, 0s, 10s, 30s, 70s, 150s, 310s - doTest(ctx, f, 5, containerName, 7) + doTest(ctx, f, 3, containerName, 7) }) }) @@ -82,8 +83,8 @@ var _ = SIGDescribe("Container Restart", feature.CriProxy, framework.WithSerial( }) ginkgo.It("Alternate restart backs off.", func(ctx context.Context) { - // 0s, 0s, 10s, 30s, 60s, 90s, 120s, 150, 180, 210) - doTest(ctx, f, 7, containerName, 10) + // 0s, 0s, 10s, 30s, 60s, 90s, 120s, 150s, 180s, 210s, 240s, 270s, 300s + doTest(ctx, f, 3, containerName, 13) }) }) }) @@ -94,8 +95,9 @@ func doTest(ctx context.Context, f *framework.Framework, targetRestarts int, con podErr := e2epod.WaitForPodContainerToFail(ctx, f.ClientSet, f.Namespace.Name, pod.Name, 0, "CrashLoopBackOff", 1*time.Minute) gomega.Expect(podErr).To(gomega.HaveOccurred()) - // Wait for 210s worth of backoffs to occur so we can confirm the backoff growth. - podErr = e2epod.WaitForContainerRestartedNTimes(ctx, f.ClientSet, f.Namespace.Name, pod.Name, containerName, 210*time.Second, targetRestarts) + // Hard wait 30 seconds for targetRestarts in the best case; longer timeout later will handle if infra was slow. + time.Sleep(30 * time.Second) + podErr = e2epod.WaitForContainerRestartedNTimes(ctx, f.ClientSet, f.Namespace.Name, pod.Name, containerName, 5*time.Minute, targetRestarts) gomega.Expect(podErr).ShouldNot(gomega.HaveOccurred(), "Expected container to repeatedly back off container failures") r, err := extractObservedBackoff(ctx, f, pod.Name, containerName) @@ -117,7 +119,7 @@ func extractObservedBackoff(ctx context.Context, f *framework.Framework, podName } } } - return r, nil + return r, errors.Errorf("Could not find container status for container %s in pod %s", containerName, podName) } func newFailAlwaysPod() *v1.Pod { diff --git a/test/e2e_node/image_pull_test.go b/test/e2e_node/image_pull_test.go index d759a883143..fce685e5264 100644 --- a/test/e2e_node/image_pull_test.go +++ b/test/e2e_node/image_pull_test.go @@ -257,16 +257,15 @@ var _ = SIGDescribe("Pull Image", feature.CriProxy, framework.WithSerial(), func isExpectedErrMsg := strings.Contains(eventMsg, expectedErr.Error()) gomega.Expect(isExpectedErrMsg).To(gomega.BeTrueBecause("we injected an exception into the PullImage interface of the cri proxy")) - // Wait for ~60s worth of backoffs to occur so we can confirm the backoff growth. - podErr = e2epod.WaitForPodContainerStarted(ctx, f.ClientSet, f.Namespace.Name, pod.Name, 0, 1*time.Minute) + podErr = e2epod.WaitForPodContainerStarted(ctx, f.ClientSet, f.Namespace.Name, pod.Name, 0, 30*time.Second) gomega.Expect(podErr).To(gomega.HaveOccurred(), "Expected container not to start from repeatedly backing off image pulls") e, err := getImagePullAttempts(ctx, f, pod.Name) framework.ExpectNoError(err) - // 3 would take 10s best case + // 3 would take 10s best case. gomega.Expect(e.Count).Should(gomega.BeNumerically(">", 3)) - // 6 would take 150s best case - gomega.Expect(e.Count).Should(gomega.BeNumerically("<=", 6)) + // 7 would take 310s best case, if the infra went slow. + gomega.Expect(e.Count).Should(gomega.BeNumerically("<=", 7)) })