From f913b7afe88352d4589d78bf4ed38db45a9cbc68 Mon Sep 17 00:00:00 2001
From: Laura Lorenz <lauralorenz@google.com>
Date: Tue, 5 Nov 2024 01:59:38 +0000
Subject: [PATCH 01/18] Adding imagepull backoff test

Signed-off-by: Laura Lorenz <lauralorenz@google.com>
---
 test/e2e_node/criproxy_test.go | 77 ++++++++++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)

diff --git a/test/e2e_node/criproxy_test.go b/test/e2e_node/criproxy_test.go
index 949a2379cbc..0e32e9f94f2 100644
--- a/test/e2e_node/criproxy_test.go
+++ b/test/e2e_node/criproxy_test.go
@@ -82,6 +82,41 @@ var _ = SIGDescribe(feature.CriProxy, framework.WithSerial(), func() {
 			isExpectedErrMsg := strings.Contains(eventMsg, expectedErr.Error())
 			gomega.Expect(isExpectedErrMsg).To(gomega.BeTrueBecause("we injected an exception into the PullImage interface of the cri proxy"))
 		})
+		ginkgo.It("Image pull retry backs off on error.", func(ctx context.Context) {
+			expectedErr := fmt.Errorf("PullImage failed")
+			err := addCRIProxyInjector(func(apiName string) error {
+				if apiName == criproxy.PullImage {
+					return expectedErr
+				}
+				return nil
+			})
+			framework.ExpectNoError(err)
+
+			pod := e2epod.NewPodClient(f).Create(ctx, newPullImageAlwaysPod())
+			podErr := e2epod.WaitForPodCondition(ctx, f.ClientSet, f.Namespace.Name, pod.Name, "ImagePullBackOff", 1*time.Minute, func(pod *v1.Pod) (bool, error) {
+				if len(pod.Status.ContainerStatuses) > 0 && pod.Status.Reason == images.ErrImagePullBackOff.Error() {
+					return true, nil
+				}
+				return false, nil
+			})
+			gomega.Expect(podErr).To(gomega.HaveOccurred())
+
+			eventMsg, err := getFailedToPullImageMsg(ctx, f, pod.Name)
+			framework.ExpectNoError(err)
+			isExpectedErrMsg := strings.Contains(eventMsg, expectedErr.Error())
+			gomega.Expect(isExpectedErrMsg).To(gomega.BeTrueBecause("we injected an exception into the PullImage interface of the cri proxy"))
+
+			// remove error so after backoff we will succeed
+			resetCRIProxyInjector()
+
+			podErr = e2epod.WaitForPodRunningInNamespace(ctx, f.ClientSet, pod)
+			framework.ExpectNoError(podErr)
+
+			durations, err := getImageBackOffDurations(ctx, f, pod.Name)
+			framework.ExpectNoError(err)
+			gomega.Expect(durations[0]).Should(gomega.BeNumerically("~", time.Duration(10*time.Second), time.Duration(2*time.Second)))
+
+		})
 	})
 
 	ginkgo.Context("Inject a pull image timeout exception into the CriProxy", func() {
@@ -133,6 +168,48 @@ func getFailedToPullImageMsg(ctx context.Context, f *framework.Framework, podNam
 	return "", fmt.Errorf("failed to find FailedToPullImage event for pod: %s", podName)
 }
 
+func getImageBackOffDurations(ctx context.Context, f *framework.Framework, podName string) ([]time.Duration, error) {
+	events, err := f.ClientSet.CoreV1().Events(f.Namespace.Name).List(ctx, metav1.ListOptions{})
+	if err != nil {
+		return nil, err
+	}
+
+	var backoffs []time.Duration
+
+	type BackOffRecord struct {
+		podName           string
+		initialEventTime  time.Time
+		backoffEventTimes []time.Time
+		duration          time.Duration
+	}
+
+	records := make(map[int]*BackOffRecord)
+	var backoffCount int
+	var pullTime time.Time
+	var r *BackOffRecord
+	for _, event := range events.Items {
+		if event.InvolvedObject.Name == podName {
+			switch event.Reason {
+			case kubeletevents.PullingImage:
+				if !pullTime.IsZero() {
+					if event.FirstTimestamp.Time.After(pullTime) {
+						r = records[backoffCount]
+						r.duration = r.initialEventTime.Sub(r.backoffEventTimes[len(r.backoffEventTimes)-1])
+						backoffs = append(backoffs, r.duration)
+						backoffCount++
+					}
+				}
+				pullTime = event.FirstTimestamp.Time
+				records[backoffCount].initialEventTime = pullTime
+			case kubeletevents.BackOffPullImage:
+				current := records[backoffCount].backoffEventTimes
+				current = append(current, event.FirstTimestamp.Time)
+			}
+		}
+	}
+	return backoffs, nil
+}
+
 func getPodImagePullDuration(ctx context.Context, f *framework.Framework, podName string) (time.Duration, error) {
 	events, err := f.ClientSet.CoreV1().Events(f.Namespace.Name).List(ctx, metav1.ListOptions{})
 	if err != nil {

From 6337a28a682126c4e8b243c40face3821341c787 Mon Sep 17 00:00:00 2001
From: Laura Lorenz <lauralorenz@google.com>
Date: Tue, 5 Nov 2024 02:36:30 +0000
Subject: [PATCH 02/18] Organize into its own context

Signed-off-by: Laura Lorenz <lauralorenz@google.com>
---
 test/e2e_node/criproxy_test.go | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/test/e2e_node/criproxy_test.go b/test/e2e_node/criproxy_test.go
index 0e32e9f94f2..21bd79bc2f8 100644
--- a/test/e2e_node/criproxy_test.go
+++ b/test/e2e_node/criproxy_test.go
@@ -82,6 +82,20 @@ var _ = SIGDescribe(feature.CriProxy, framework.WithSerial(), func() {
 			isExpectedErrMsg := strings.Contains(eventMsg, expectedErr.Error())
 			gomega.Expect(isExpectedErrMsg).To(gomega.BeTrueBecause("we injected an exception into the PullImage interface of the cri proxy"))
 		})
+	})
+
+	ginkgo.Context("Inject a pull image error exception, then reset it, into the CriProxy", func() {
+		ginkgo.BeforeEach(func() {
+			if err := resetCRIProxyInjector(); err != nil {
+				ginkgo.Skip("Skip the test since the CRI Proxy is undefined.")
+			}
+		})
+
+		ginkgo.AfterEach(func() {
+			err := resetCRIProxyInjector()
+			framework.ExpectNoError(err)
+		})
+
 		ginkgo.It("Image pull retry backs off on error.", func(ctx context.Context) {
 			expectedErr := fmt.Errorf("PullImage failed")
 			err := addCRIProxyInjector(func(apiName string) error {
@@ -107,7 +121,7 @@ var _ = SIGDescribe(feature.CriProxy, framework.WithSerial(), func() {
 			gomega.Expect(isExpectedErrMsg).To(gomega.BeTrueBecause("we injected an exception into the PullImage interface of the cri proxy"))
 
 			// remove error so after backoff we will succeed
-			resetCRIProxyInjector()
+			_ = resetCRIProxyInjector()
 
 			podErr = e2epod.WaitForPodRunningInNamespace(ctx, f.ClientSet, pod)
 			framework.ExpectNoError(podErr)
@@ -189,11 +203,13 @@ func getImageBackOffDurations(ctx context.Context, f *framework.Framework, podNa
 	var r *BackOffRecord
 	for _, event := range events.Items {
 		if event.InvolvedObject.Name == podName {
+
 			switch event.Reason {
 			case kubeletevents.PullingImage:
 				if !pullTime.IsZero() {
 					if event.FirstTimestamp.Time.After(pullTime) {
 						r = records[backoffCount]
+						r.podName = podName
 						r.duration = r.initialEventTime.Sub(r.backoffEventTimes[len(r.backoffEventTimes)-1])
 						backoffs = append(backoffs, r.duration)
 						backoffCount++

From 6ef05dbd012c31cb2d74f046e5b5ecb3110211f1 Mon Sep 17 00:00:00 2001
From: Laura Lorenz <lauralorenz@google.com>
Date: Tue, 5 Nov 2024 06:09:06 +0000
Subject: [PATCH 03/18] The idea of how this test should work

Signed-off-by: Laura Lorenz <lauralorenz@google.com>
---
 test/e2e_node/criproxy_test.go | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/test/e2e_node/criproxy_test.go b/test/e2e_node/criproxy_test.go
index 21bd79bc2f8..7de25fa3553 100644
--- a/test/e2e_node/criproxy_test.go
+++ b/test/e2e_node/criproxy_test.go
@@ -97,6 +97,7 @@ var _ = SIGDescribe(feature.CriProxy, framework.WithSerial(), func() {
 		})
 
 		ginkgo.It("Image pull retry backs off on error.", func(ctx context.Context) {
+			// inject PullImage failed to trigger backoff
 			expectedErr := fmt.Errorf("PullImage failed")
 			err := addCRIProxyInjector(func(apiName string) error {
 				if apiName == criproxy.PullImage {
@@ -120,15 +121,22 @@ var _ = SIGDescribe(feature.CriProxy, framework.WithSerial(), func() {
 			isExpectedErrMsg := strings.Contains(eventMsg, expectedErr.Error())
 			gomega.Expect(isExpectedErrMsg).To(gomega.BeTrueBecause("we injected an exception into the PullImage interface of the cri proxy"))
 
-			// remove error so after backoff we will succeed
+			// Wait for ~40s worth of backoffs to occur so we can confirm the backoff growth.
+			podErr = e2epod.WaitForPodContainerStarted(ctx, f.ClientSet, f.Namespace.Name, pod.Name, 0, 40*time.Second)
+			gomega.Expect(podErr).To(gomega.HaveOccurred(), "Expected container not to start from repeadedly backing off image pulls")
+
+			// Remove error so after next backoff we will succeed.
 			_ = resetCRIProxyInjector()
 
 			podErr = e2epod.WaitForPodRunningInNamespace(ctx, f.ClientSet, pod)
 			framework.ExpectNoError(podErr)
 
+			// Parse observed backoffs (TODO: don't use Events, but logs) and compare to expectations
 			durations, err := getImageBackOffDurations(ctx, f, pod.Name)
 			framework.ExpectNoError(err)
+			gomega.Expect(durations).Error().ShouldNot(gomega.BeNil(), "Should have observed backoffs in Pod event log")
 			gomega.Expect(durations[0]).Should(gomega.BeNumerically("~", time.Duration(10*time.Second), time.Duration(2*time.Second)))
+			// TODO: and check the next set of durations are 2x, etc
 
 		})
 	})
@@ -191,25 +199,26 @@ func getImageBackOffDurations(ctx context.Context, f *framework.Framework, podNa
 	var backoffs []time.Duration
 
 	type BackOffRecord struct {
-		podName           string
 		initialEventTime  time.Time
 		backoffEventTimes []time.Time
 		duration          time.Duration
 	}
 
 	records := make(map[int]*BackOffRecord)
+	records[0] = &BackOffRecord{}
 	var backoffCount int
 	var pullTime time.Time
 	var r *BackOffRecord
+	// I'm doing this here for events but really it needs to be off kubelet logs or some kind of synchronous watch
+	// Because the events normalize to the latest
+	// But this is the idea
 	for _, event := range events.Items {
 		if event.InvolvedObject.Name == podName {
-
 			switch event.Reason {
 			case kubeletevents.PullingImage:
 				if !pullTime.IsZero() {
 					if event.FirstTimestamp.Time.After(pullTime) {
 						r = records[backoffCount]
-						r.podName = podName
 						r.duration = r.initialEventTime.Sub(r.backoffEventTimes[len(r.backoffEventTimes)-1])
 						backoffs = append(backoffs, r.duration)
 						backoffCount++

From 2479d91f2a443850c29c02e0cf64faed6c620cc2 Mon Sep 17 00:00:00 2001
From: Laura Lorenz <lauralorenz@google.com>
Date: Mon, 11 Nov 2024 22:34:15 +0000
Subject: [PATCH 04/18] Fix test to count pull tries

Signed-off-by: Laura Lorenz <lauralorenz@google.com>
---
 test/e2e_node/criproxy_test.go | 97 ++++++++++++++--------------------
 1 file changed, 41 insertions(+), 56 deletions(-)

diff --git a/test/e2e_node/criproxy_test.go b/test/e2e_node/criproxy_test.go
index 7de25fa3553..7438c0ca150 100644
--- a/test/e2e_node/criproxy_test.go
+++ b/test/e2e_node/criproxy_test.go
@@ -84,22 +84,22 @@ var _ = SIGDescribe(feature.CriProxy, framework.WithSerial(), func() {
 		})
 	})
 
-	ginkgo.Context("Inject a pull image error exception, then reset it, into the CriProxy", func() {
+	ginkgo.Context("Image pull backoff", func() {
 		ginkgo.BeforeEach(func() {
-			if err := resetCRIProxyInjector(); err != nil {
+			if err := resetCRIProxyInjector(e2eCriProxy); err != nil {
 				ginkgo.Skip("Skip the test since the CRI Proxy is undefined.")
 			}
 		})
 
 		ginkgo.AfterEach(func() {
-			err := resetCRIProxyInjector()
+			err := resetCRIProxyInjector(e2eCriProxy)
 			framework.ExpectNoError(err)
 		})
 
 		ginkgo.It("Image pull retry backs off on error.", func(ctx context.Context) {
 			// inject PullImage failed to trigger backoff
 			expectedErr := fmt.Errorf("PullImage failed")
-			err := addCRIProxyInjector(func(apiName string) error {
+			err := addCRIProxyInjector(e2eCriProxy, func(apiName string) error {
 				if apiName == criproxy.PullImage {
 					return expectedErr
 				}
@@ -107,7 +107,8 @@ var _ = SIGDescribe(feature.CriProxy, framework.WithSerial(), func() {
 			})
 			framework.ExpectNoError(err)
 
-			pod := e2epod.NewPodClient(f).Create(ctx, newPullImageAlwaysPod())
+			pod := e2epod.NewPodClient(f).Create(ctx, newImageBackoffPod())
+			framework.Logf("pod name: %s", pod.Name)
 			podErr := e2epod.WaitForPodCondition(ctx, f.ClientSet, f.Namespace.Name, pod.Name, "ImagePullBackOff", 1*time.Minute, func(pod *v1.Pod) (bool, error) {
 				if len(pod.Status.ContainerStatuses) > 0 && pod.Status.Reason == images.ErrImagePullBackOff.Error() {
 					return true, nil
@@ -121,22 +122,16 @@ var _ = SIGDescribe(feature.CriProxy, framework.WithSerial(), func() {
 			isExpectedErrMsg := strings.Contains(eventMsg, expectedErr.Error())
 			gomega.Expect(isExpectedErrMsg).To(gomega.BeTrueBecause("we injected an exception into the PullImage interface of the cri proxy"))
 
-			// Wait for ~40s worth of backoffs to occur so we can confirm the backoff growth.
-			podErr = e2epod.WaitForPodContainerStarted(ctx, f.ClientSet, f.Namespace.Name, pod.Name, 0, 40*time.Second)
+			// Wait for ~60s worth of backoffs to occur so we can confirm the backoff growth.
+			podErr = e2epod.WaitForPodContainerStarted(ctx, f.ClientSet, f.Namespace.Name, pod.Name, 0, 1*time.Minute)
 			gomega.Expect(podErr).To(gomega.HaveOccurred(), "Expected container not to start from repeadedly backing off image pulls")
 
-			// Remove error so after next backoff we will succeed.
-			_ = resetCRIProxyInjector()
-
-			podErr = e2epod.WaitForPodRunningInNamespace(ctx, f.ClientSet, pod)
-			framework.ExpectNoError(podErr)
-
-			// Parse observed backoffs (TODO: don't use Events, but logs) and compare to expectations
-			durations, err := getImageBackOffDurations(ctx, f, pod.Name)
+			e, err := getImageBackoffs(ctx, f, pod.Name)
 			framework.ExpectNoError(err)
-			gomega.Expect(durations).Error().ShouldNot(gomega.BeNil(), "Should have observed backoffs in Pod event log")
-			gomega.Expect(durations[0]).Should(gomega.BeNumerically("~", time.Duration(10*time.Second), time.Duration(2*time.Second)))
-			// TODO: and check the next set of durations are 2x, etc
+			// 3 would take 10s best case
+			gomega.Expect(e.Count).Should(gomega.BeNumerically(">", 3))
+			// 6 would take 150s best case
+			gomega.Expect(e.Count).Should(gomega.BeNumerically("<=", 6))
 
 		})
 	})
@@ -190,49 +185,20 @@ func getFailedToPullImageMsg(ctx context.Context, f *framework.Framework, podNam
 	return "", fmt.Errorf("failed to find FailedToPullImage event for pod: %s", podName)
 }
 
-func getImageBackOffDurations(ctx context.Context, f *framework.Framework, podName string) ([]time.Duration, error) {
-	events, err := f.ClientSet.CoreV1().Events(f.Namespace.Name).List(ctx, metav1.ListOptions{})
+func getImageBackoffs(ctx context.Context, f *framework.Framework, podName string) (v1.Event, error) {
+
+	event := v1.Event{}
+	e, err := f.ClientSet.CoreV1().Events(f.Namespace.Name).List(ctx, metav1.ListOptions{})
 	if err != nil {
-		return nil, err
+		return event, err
 	}
 
-	var backoffs []time.Duration
-
-	type BackOffRecord struct {
-		initialEventTime  time.Time
-		backoffEventTimes []time.Time
-		duration          time.Duration
-	}
-
-	records := make(map[int]*BackOffRecord)
-	records[0] = &BackOffRecord{}
-	var backoffCount int
-	var pullTime time.Time
-	var r *BackOffRecord
-	// I'm doing this here for events but really it needs to be off kubelet logs or some kind of synchronous watch
-	// Because the events normalize to the latest
-	// But this is the idea
-	for _, event := range events.Items {
-		if event.InvolvedObject.Name == podName {
-			switch event.Reason {
-			case kubeletevents.PullingImage:
-				if !pullTime.IsZero() {
-					if event.FirstTimestamp.Time.After(pullTime) {
-						r = records[backoffCount]
-						r.duration = r.initialEventTime.Sub(r.backoffEventTimes[len(r.backoffEventTimes)-1])
-						backoffs = append(backoffs, r.duration)
-						backoffCount++
-					}
-				}
-				pullTime = event.FirstTimestamp.Time
-				records[backoffCount].initialEventTime = pullTime
-			case kubeletevents.BackOffPullImage:
-				current := records[backoffCount].backoffEventTimes
-				current = append(current, event.FirstTimestamp.Time)
-			}
+	for _, event := range e.Items {
+		if event.InvolvedObject.Name == podName && event.Reason == kubeletevents.PullingImage {
+			return event, nil
 		}
 	}
-	return backoffs, nil
+	return event, nil
 }
 
 func getPodImagePullDuration(ctx context.Context, f *framework.Framework, podName string) (time.Duration, error) {
@@ -260,6 +226,25 @@ func getPodImagePullDuration(ctx context.Context, f *framework.Framework, podNam
 	return endTime.Sub(startTime), nil
 }
 
+func newImageBackoffPod() *v1.Pod {
+	podName := "image-backoff" + string(uuid.NewUUID())
+	pod := &v1.Pod{
+		ObjectMeta: metav1.ObjectMeta{
+			Name: podName,
+		},
+		Spec: v1.PodSpec{
+			Containers: []v1.Container{
+				{
+					Image:           imageutils.GetPauseImageName(),
+					Name:            podName,
+					ImagePullPolicy: v1.PullAlways,
+				},
+			},
+		},
+	}
+	return pod
+}
+
 func newPullImageAlwaysPod() *v1.Pod {
 	podName := "cri-proxy-test-" + string(uuid.NewUUID())
 	pod := &v1.Pod{

From fc4ac5efebd97124b7c608e0fe61ee9eb13c3f74 Mon Sep 17 00:00:00 2001
From: Laura Lorenz <lauralorenz@google.com>
Date: Tue, 12 Nov 2024 01:26:48 +0000
Subject: [PATCH 05/18] Move image pull backoff test to be with other image
 pull tests

Signed-off-by: Laura Lorenz <lauralorenz@google.com>
---
 test/e2e_node/criproxy_test.go   | 71 --------------------------------
 test/e2e_node/image_pull_test.go | 55 +++++++++++++++++++++++++
 2 files changed, 55 insertions(+), 71 deletions(-)

diff --git a/test/e2e_node/criproxy_test.go b/test/e2e_node/criproxy_test.go
index 7438c0ca150..93f87a93f76 100644
--- a/test/e2e_node/criproxy_test.go
+++ b/test/e2e_node/criproxy_test.go
@@ -96,44 +96,6 @@ var _ = SIGDescribe(feature.CriProxy, framework.WithSerial(), func() {
 			framework.ExpectNoError(err)
 		})
 
-		ginkgo.It("Image pull retry backs off on error.", func(ctx context.Context) {
-			// inject PullImage failed to trigger backoff
-			expectedErr := fmt.Errorf("PullImage failed")
-			err := addCRIProxyInjector(e2eCriProxy, func(apiName string) error {
-				if apiName == criproxy.PullImage {
-					return expectedErr
-				}
-				return nil
-			})
-			framework.ExpectNoError(err)
-
-			pod := e2epod.NewPodClient(f).Create(ctx, newImageBackoffPod())
-			framework.Logf("pod name: %s", pod.Name)
-			podErr := e2epod.WaitForPodCondition(ctx, f.ClientSet, f.Namespace.Name, pod.Name, "ImagePullBackOff", 1*time.Minute, func(pod *v1.Pod) (bool, error) {
-				if len(pod.Status.ContainerStatuses) > 0 && pod.Status.Reason == images.ErrImagePullBackOff.Error() {
-					return true, nil
-				}
-				return false, nil
-			})
-			gomega.Expect(podErr).To(gomega.HaveOccurred())
-
-			eventMsg, err := getFailedToPullImageMsg(ctx, f, pod.Name)
-			framework.ExpectNoError(err)
-			isExpectedErrMsg := strings.Contains(eventMsg, expectedErr.Error())
-			gomega.Expect(isExpectedErrMsg).To(gomega.BeTrueBecause("we injected an exception into the PullImage interface of the cri proxy"))
-
-			// Wait for ~60s worth of backoffs to occur so we can confirm the backoff growth.
-			podErr = e2epod.WaitForPodContainerStarted(ctx, f.ClientSet, f.Namespace.Name, pod.Name, 0, 1*time.Minute)
-			gomega.Expect(podErr).To(gomega.HaveOccurred(), "Expected container not to start from repeadedly backing off image pulls")
-
-			e, err := getImageBackoffs(ctx, f, pod.Name)
-			framework.ExpectNoError(err)
-			// 3 would take 10s best case
-			gomega.Expect(e.Count).Should(gomega.BeNumerically(">", 3))
-			// 6 would take 150s best case
-			gomega.Expect(e.Count).Should(gomega.BeNumerically("<=", 6))
-
-		})
 	})
 
 	ginkgo.Context("Inject a pull image timeout exception into the CriProxy", func() {
@@ -185,21 +147,7 @@ func getFailedToPullImageMsg(ctx context.Context, f *framework.Framework, podNam
 	return "", fmt.Errorf("failed to find FailedToPullImage event for pod: %s", podName)
 }
 
-func getImageBackoffs(ctx context.Context, f *framework.Framework, podName string) (v1.Event, error) {
 
-	event := v1.Event{}
-	e, err := f.ClientSet.CoreV1().Events(f.Namespace.Name).List(ctx, metav1.ListOptions{})
-	if err != nil {
-		return event, err
-	}
-
-	for _, event := range e.Items {
-		if event.InvolvedObject.Name == podName && event.Reason == kubeletevents.PullingImage {
-			return event, nil
-		}
-	}
-	return event, nil
-}
 
 func getPodImagePullDuration(ctx context.Context, f *framework.Framework, podName string) (time.Duration, error) {
 	events, err := f.ClientSet.CoreV1().Events(f.Namespace.Name).List(ctx, metav1.ListOptions{})
@@ -226,25 +174,6 @@ func getPodImagePullDuration(ctx context.Context, f *framework.Framework, podNam
 	return endTime.Sub(startTime), nil
 }
 
-func newImageBackoffPod() *v1.Pod {
-	podName := "image-backoff" + string(uuid.NewUUID())
-	pod := &v1.Pod{
-		ObjectMeta: metav1.ObjectMeta{
-			Name: podName,
-		},
-		Spec: v1.PodSpec{
-			Containers: []v1.Container{
-				{
-					Image:           imageutils.GetPauseImageName(),
-					Name:            podName,
-					ImagePullPolicy: v1.PullAlways,
-				},
-			},
-		},
-	}
-	return pod
-}
-
 func newPullImageAlwaysPod() *v1.Pod {
 	podName := "cri-proxy-test-" + string(uuid.NewUUID())
 	pod := &v1.Pod{
diff --git a/test/e2e_node/image_pull_test.go b/test/e2e_node/image_pull_test.go
index 21e4a113a88..ecaa67e733d 100644
--- a/test/e2e_node/image_pull_test.go
+++ b/test/e2e_node/image_pull_test.go
@@ -34,6 +34,7 @@ import (
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
 	kubeletevents "k8s.io/kubernetes/pkg/kubelet/events"
+	"k8s.io/kubernetes/pkg/kubelet/images"
 	"k8s.io/kubernetes/test/e2e/feature"
 	"k8s.io/kubernetes/test/e2e/framework"
 	e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
@@ -230,6 +231,45 @@ var _ = SIGDescribe("Pull Image", feature.CriProxy, framework.WithSerial(), func
 		})
 
 	})
+
+	ginkgo.It("Image pull retry backs off on error.", func(ctx context.Context) {
+		// inject PullImage failed to trigger backoff
+		expectedErr := fmt.Errorf("PullImage failed")
+		err := addCRIProxyInjector(e2eCriProxy, func(apiName string) error {
+			if apiName == criproxy.PullImage {
+				return expectedErr
+			}
+			return nil
+		})
+		framework.ExpectNoError(err)
+
+		pod := e2epod.NewPodClient(f).Create(ctx, newPullImageAlwaysPod())
+		podErr := e2epod.WaitForPodCondition(ctx, f.ClientSet, f.Namespace.Name, pod.Name, "ImagePullBackOff", 1*time.Minute, func(pod *v1.Pod) (bool, error) {
+			if len(pod.Status.ContainerStatuses) > 0 && pod.Status.Reason == images.ErrImagePullBackOff.Error() {
+				return true, nil
+			}
+			return false, nil
+		})
+		gomega.Expect(podErr).To(gomega.HaveOccurred())
+
+		eventMsg, err := getFailedToPullImageMsg(ctx, f, pod.Name)
+		framework.ExpectNoError(err)
+		isExpectedErrMsg := strings.Contains(eventMsg, expectedErr.Error())
+		gomega.Expect(isExpectedErrMsg).To(gomega.BeTrueBecause("we injected an exception into the PullImage interface of the cri proxy"))
+
+		// Wait for ~60s worth of backoffs to occur so we can confirm the backoff growth.
+		podErr = e2epod.WaitForPodContainerStarted(ctx, f.ClientSet, f.Namespace.Name, pod.Name, 0, 1*time.Minute)
+		gomega.Expect(podErr).To(gomega.HaveOccurred(), "Expected container not to start from repeadedly backing off image pulls")
+
+		e, err := getImagePullAttempts(ctx, f, pod.Name)
+		framework.ExpectNoError(err)
+		// 3 would take 10s best case
+		gomega.Expect(e.Count).Should(gomega.BeNumerically(">", 3))
+		// 6 would take 150s best case
+		gomega.Expect(e.Count).Should(gomega.BeNumerically("<=", 6))
+
+	})
+
 })
 
 func getPodImagePullDurations(ctx context.Context, f *framework.Framework, testpods []*v1.Pod) (map[string]*pulledStruct, map[string]metav1.Time, map[string]metav1.Time, error) {
@@ -343,3 +383,18 @@ func getDurationsFromPulledEventMsg(msg string) (*pulledStruct, error) {
 		pulledIncludeWaitingDuration: pulledIncludeWaitingDuration,
 	}, nil
 }
+
+func getImagePullAttempts(ctx context.Context, f *framework.Framework, podName string) (v1.Event, error) {
+	event := v1.Event{}
+	e, err := f.ClientSet.CoreV1().Events(f.Namespace.Name).List(ctx, metav1.ListOptions{})
+	if err != nil {
+		return event, err
+	}
+
+	for _, event := range e.Items {
+		if event.InvolvedObject.Name == podName && event.Reason == kubeletevents.PullingImage {
+			return event, nil
+		}
+	}
+	return event, nil
+}

From 15bae1eadf74c4d12adcb50c8eea48e2a429a36d Mon Sep 17 00:00:00 2001
From: Laura Lorenz <lauralorenz@google.com>
Date: Tue, 12 Nov 2024 04:30:46 +0000
Subject: [PATCH 06/18] Add container restart test too

Signed-off-by: Laura Lorenz <lauralorenz@google.com>
---
 test/e2e/framework/pod/wait.go          |  15 +++
 test/e2e_node/container_restart_test.go | 120 ++++++++++++++++++++++++
 test/utils/image/manifest.go            |   5 +
 3 files changed, 140 insertions(+)
 create mode 100644 test/e2e_node/container_restart_test.go

diff --git a/test/e2e/framework/pod/wait.go b/test/e2e/framework/pod/wait.go
index 537d59c3224..40cf4db53b4 100644
--- a/test/e2e/framework/pod/wait.go
+++ b/test/e2e/framework/pod/wait.go
@@ -868,3 +868,18 @@ func WaitForContainerTerminated(ctx context.Context, c clientset.Interface, name
 		return false, nil
 	})
 }
+
+// WaitForContainerRestartedNTimes waits for the given Pod container to have restarted N times
+func WaitForContainerRestartedNTimes(ctx context.Context, c clientset.Interface, namespace, podName, containerName string, timeout time.Duration, target int) error {
+	conditionDesc := fmt.Sprintf("container %s restarted %d times", containerName, target)
+	return WaitForPodCondition(ctx, c, namespace, podName, conditionDesc, timeout, func(pod *v1.Pod) (bool, error) {
+		for _, statuses := range [][]v1.ContainerStatus{pod.Status.ContainerStatuses, pod.Status.InitContainerStatuses, pod.Status.EphemeralContainerStatuses} {
+			for _, cs := range statuses {
+				if cs.Name == containerName {
+					return cs.RestartCount >= int32(target), nil
+				}
+			}
+		}
+		return false, nil
+	})
+}
diff --git a/test/e2e_node/container_restart_test.go b/test/e2e_node/container_restart_test.go
new file mode 100644
index 00000000000..631d29e3bf5
--- /dev/null
+++ b/test/e2e_node/container_restart_test.go
@@ -0,0 +1,120 @@
+package e2enode
+
+import (
+	"context"
+	"time"
+
+	imageutils "k8s.io/kubernetes/test/utils/image"
+
+	"github.com/onsi/ginkgo/v2"
+	"github.com/onsi/gomega"
+	kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
+
+	v1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/util/uuid"
+	kubeletevents "k8s.io/kubernetes/pkg/kubelet/events"
+	"k8s.io/kubernetes/test/e2e/feature"
+	"k8s.io/kubernetes/test/e2e/framework"
+	e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
+	admissionapi "k8s.io/pod-security-admission/api"
+)
+
+var _ = SIGDescribe("Container Restart", feature.CriProxy, framework.WithSerial(), func() {
+	f := framework.NewDefaultFramework("container-restart")
+	f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
+
+	ginkgo.Context("Container restart backs off", func() {
+
+		ginkgo.BeforeEach(func() {
+			if err := resetCRIProxyInjector(e2eCriProxy); err != nil {
+				ginkgo.Skip("Skip the test since the CRI Proxy is undefined.")
+			}
+		})
+
+		ginkgo.AfterEach(func() {
+			err := resetCRIProxyInjector(e2eCriProxy)
+			framework.ExpectNoError(err)
+		})
+
+		ginkgo.It("Container restart backs off.", func(ctx context.Context) {
+			// 3 would take 10s best case, 6 would take 150s best case
+			doTest(ctx, f, 5, time.Duration(80*time.Second), time.Duration(10*time.Second))
+		})
+	})
+
+	ginkgo.Context("Alternate container restart backs off as expected", func() {
+
+		tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
+			initialConfig.CrashLoopBackOff.MaxContainerRestartPeriod = &metav1.Duration{Duration: time.Duration(30 * time.Second)}
+			initialConfig.FeatureGates = map[string]bool{"KubeletCrashLoopBackOffMax": true}
+		})
+
+		ginkgo.BeforeEach(func() {
+			if err := resetCRIProxyInjector(e2eCriProxy); err != nil {
+				ginkgo.Skip("Skip the test since the CRI Proxy is undefined.")
+			}
+		})
+
+		ginkgo.AfterEach(func() {
+			err := resetCRIProxyInjector(e2eCriProxy)
+			framework.ExpectNoError(err)
+		})
+
+		ginkgo.It("Alternate restart backs off.", func(ctx context.Context) {
+			doTest(ctx, f, 7, time.Duration(120*time.Second), time.Duration(10*time.Second))
+		})
+	})
+})
+
+func doTest(ctx context.Context, f *framework.Framework, maxRestarts int, target time.Duration, threshold time.Duration) {
+
+	pod := e2epod.NewPodClient(f).Create(ctx, newFailAlwaysPod())
+	podErr := e2epod.WaitForPodContainerToFail(ctx, f.ClientSet, f.Namespace.Name, pod.Name, 0, "CrashLoopBackOff", 1*time.Minute)
+	gomega.Expect(podErr).To(gomega.HaveOccurred())
+
+	// Wait for 120s worth of backoffs to occur so we can confirm the backoff growth.
+	podErr = e2epod.WaitForContainerRestartedNTimes(ctx, f.ClientSet, f.Namespace.Name, pod.Name, "restart", 150*time.Second, maxRestarts)
+	gomega.Expect(podErr).ShouldNot(gomega.HaveOccurred(), "Expected container to repeatedly back off container failures")
+
+	d, err := getContainerRetryDuration(ctx, f, pod.Name)
+	framework.ExpectNoError(err)
+
+	gomega.Expect(d).Should(gomega.BeNumerically("~", target, threshold))
+}
+
+func getContainerRetryDuration(ctx context.Context, f *framework.Framework, podName string) (time.Duration, error) {
+
+	var d time.Duration
+	e, err := f.ClientSet.CoreV1().Events(f.Namespace.Name).List(ctx, metav1.ListOptions{})
+	if err != nil {
+		return d, err
+	}
+
+	for _, event := range e.Items {
+		if event.InvolvedObject.Name == podName && event.Reason == kubeletevents.StartedContainer {
+			return event.LastTimestamp.Time.Sub(event.FirstTimestamp.Time), nil
+		}
+	}
+	return d, nil
+}
+
+func newFailAlwaysPod() *v1.Pod {
+	podName := "container-restart" + string(uuid.NewUUID())
+	pod := &v1.Pod{
+		ObjectMeta: metav1.ObjectMeta{
+			Name: podName,
+		},
+		Spec: v1.PodSpec{
+			Containers: []v1.Container{
+				{
+					Name:            "restart",
+					Image:           imageutils.GetBusyBoxImageName(),
+					ImagePullPolicy: v1.PullAlways,
+					Command:         []string{"exit 1"},
+				},
+			},
+		},
+	}
+	return pod
+}
diff --git a/test/utils/image/manifest.go b/test/utils/image/manifest.go
index 2bc34b09292..a027e7892d8 100644
--- a/test/utils/image/manifest.go
+++ b/test/utils/image/manifest.go
@@ -355,6 +355,11 @@ func GetPauseImageName() string {
 	return GetE2EImage(Pause)
 }
 
+// GetBusyBoxImageName returns the busybox image name with proper version
+func GetBusyBoxImageName() string {
+	return GetE2EImage(BusyBox)
+}
+
 // ReplaceRegistryInImageURL replaces the registry in the image URL with a custom one based
 // on the configured registries.
 func ReplaceRegistryInImageURL(imageURL string) (string, error) {

From bad037b505efd2a56ac41f7135438d7c6ec37d2f Mon Sep 17 00:00:00 2001
From: Laura Lorenz <lauralorenz@google.com>
Date: Tue, 12 Nov 2024 04:48:10 +0000
Subject: [PATCH 07/18] Formatting

Signed-off-by: Laura Lorenz <lauralorenz@google.com>
---
 test/e2e_node/criproxy_test.go | 2 --
 1 file changed, 2 deletions(-)

diff --git a/test/e2e_node/criproxy_test.go b/test/e2e_node/criproxy_test.go
index 93f87a93f76..63339f321f5 100644
--- a/test/e2e_node/criproxy_test.go
+++ b/test/e2e_node/criproxy_test.go
@@ -147,8 +147,6 @@ func getFailedToPullImageMsg(ctx context.Context, f *framework.Framework, podNam
 	return "", fmt.Errorf("failed to find FailedToPullImage event for pod: %s", podName)
 }
 
-
-
 func getPodImagePullDuration(ctx context.Context, f *framework.Framework, podName string) (time.Duration, error) {
 	events, err := f.ClientSet.CoreV1().Events(f.Namespace.Name).List(ctx, metav1.ListOptions{})
 	if err != nil {

From f032068ef75ae34fdc0bc406b91b05df19c22365 Mon Sep 17 00:00:00 2001
From: Laura Lorenz <lauralorenz@google.com>
Date: Tue, 12 Nov 2024 07:04:32 +0000
Subject: [PATCH 08/18] Focus on restart numbers instead of timing

Signed-off-by: Laura Lorenz <lauralorenz@google.com>
---
 test/e2e/framework/pod/wait.go          |  2 +-
 test/e2e_node/container_restart_test.go | 45 +++++++++++++------------
 2 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/test/e2e/framework/pod/wait.go b/test/e2e/framework/pod/wait.go
index 40cf4db53b4..6e69f8ec487 100644
--- a/test/e2e/framework/pod/wait.go
+++ b/test/e2e/framework/pod/wait.go
@@ -871,7 +871,7 @@ func WaitForContainerTerminated(ctx context.Context, c clientset.Interface, name
 
 // WaitForContainerRestartedNTimes waits for the given Pod container to have restarted N times
 func WaitForContainerRestartedNTimes(ctx context.Context, c clientset.Interface, namespace, podName, containerName string, timeout time.Duration, target int) error {
-	conditionDesc := fmt.Sprintf("container %s restarted %d times", containerName, target)
+	conditionDesc := fmt.Sprintf("container %s restarted at least %d times", containerName, target)
 	return WaitForPodCondition(ctx, c, namespace, podName, conditionDesc, timeout, func(pod *v1.Pod) (bool, error) {
 		for _, statuses := range [][]v1.ContainerStatus{pod.Status.ContainerStatuses, pod.Status.InitContainerStatuses, pod.Status.EphemeralContainerStatuses} {
 			for _, cs := range statuses {
diff --git a/test/e2e_node/container_restart_test.go b/test/e2e_node/container_restart_test.go
index 631d29e3bf5..503b3b2d0e2 100644
--- a/test/e2e_node/container_restart_test.go
+++ b/test/e2e_node/container_restart_test.go
@@ -13,13 +13,14 @@ import (
 	v1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/util/uuid"
-	kubeletevents "k8s.io/kubernetes/pkg/kubelet/events"
 	"k8s.io/kubernetes/test/e2e/feature"
 	"k8s.io/kubernetes/test/e2e/framework"
 	e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
 	admissionapi "k8s.io/pod-security-admission/api"
 )
 
+const containerName = "restarts"
+
 var _ = SIGDescribe("Container Restart", feature.CriProxy, framework.WithSerial(), func() {
 	f := framework.NewDefaultFramework("container-restart")
 	f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
@@ -38,8 +39,8 @@ var _ = SIGDescribe("Container Restart", feature.CriProxy, framework.WithSerial(
 		})
 
 		ginkgo.It("Container restart backs off.", func(ctx context.Context) {
-			// 3 would take 10s best case, 6 would take 150s best case
-			doTest(ctx, f, 5, time.Duration(80*time.Second), time.Duration(10*time.Second))
+			// 0s, 0s, 10s, 30s, 70s, 150s, 310s
+			doTest(ctx, f, 5, containerName, 7)
 		})
 	})
 
@@ -62,41 +63,42 @@ var _ = SIGDescribe("Container Restart", feature.CriProxy, framework.WithSerial(
 		})
 
 		ginkgo.It("Alternate restart backs off.", func(ctx context.Context) {
-			doTest(ctx, f, 7, time.Duration(120*time.Second), time.Duration(10*time.Second))
+			// 0s, 0s, 10s, 30s, 60s, 90s, 120s, 150, 180, 210)
+			doTest(ctx, f, 7, containerName, 10)
 		})
 	})
 })
 
-func doTest(ctx context.Context, f *framework.Framework, maxRestarts int, target time.Duration, threshold time.Duration) {
+func doTest(ctx context.Context, f *framework.Framework, targetRestarts int, containerName string, maxRestarts int) {
 
 	pod := e2epod.NewPodClient(f).Create(ctx, newFailAlwaysPod())
 	podErr := e2epod.WaitForPodContainerToFail(ctx, f.ClientSet, f.Namespace.Name, pod.Name, 0, "CrashLoopBackOff", 1*time.Minute)
 	gomega.Expect(podErr).To(gomega.HaveOccurred())
 
-	// Wait for 120s worth of backoffs to occur so we can confirm the backoff growth.
-	podErr = e2epod.WaitForContainerRestartedNTimes(ctx, f.ClientSet, f.Namespace.Name, pod.Name, "restart", 150*time.Second, maxRestarts)
+	// Wait for 150s worth of backoffs to occur so we can confirm the backoff growth.
+	podErr = e2epod.WaitForContainerRestartedNTimes(ctx, f.ClientSet, f.Namespace.Name, pod.Name, "restart", 150*time.Second, targetRestarts)
 	gomega.Expect(podErr).ShouldNot(gomega.HaveOccurred(), "Expected container to repeatedly back off container failures")
 
-	d, err := getContainerRetryDuration(ctx, f, pod.Name)
+	r, err := extractObservedBackoff(ctx, f, pod.Name, containerName)
 	framework.ExpectNoError(err)
 
-	gomega.Expect(d).Should(gomega.BeNumerically("~", target, threshold))
+	gomega.Expect(r).Should(gomega.BeNumerically("<=", maxRestarts))
 }
 
-func getContainerRetryDuration(ctx context.Context, f *framework.Framework, podName string) (time.Duration, error) {
-
-	var d time.Duration
-	e, err := f.ClientSet.CoreV1().Events(f.Namespace.Name).List(ctx, metav1.ListOptions{})
+func extractObservedBackoff(ctx context.Context, f *framework.Framework, podName string, containerName string) (int32, error) {
+	var r int32
+	pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get(ctx, podName, metav1.GetOptions{})
 	if err != nil {
-		return d, err
+		return r, err
 	}
-
-	for _, event := range e.Items {
-		if event.InvolvedObject.Name == podName && event.Reason == kubeletevents.StartedContainer {
-			return event.LastTimestamp.Time.Sub(event.FirstTimestamp.Time), nil
+	for _, statuses := range [][]v1.ContainerStatus{pod.Status.ContainerStatuses, pod.Status.InitContainerStatuses, pod.Status.EphemeralContainerStatuses} {
+		for _, cs := range statuses {
+			if cs.Name == containerName {
+				return r, nil
+			}
 		}
 	}
-	return d, nil
+	return r, nil
 }
 
 func newFailAlwaysPod() *v1.Pod {
@@ -108,10 +110,9 @@ func newFailAlwaysPod() *v1.Pod {
 		Spec: v1.PodSpec{
 			Containers: []v1.Container{
 				{
-					Name:            "restart",
+					Name:            containerName,
 					Image:           imageutils.GetBusyBoxImageName(),
-					ImagePullPolicy: v1.PullAlways,
-					Command:         []string{"exit 1"},
+					ImagePullPolicy: v1.PullIfNotPresent,
 				},
 			},
 		},

From e6059d7386479770ab5588dd33cc168b5c76e2e2 Mon Sep 17 00:00:00 2001
From: Laura Lorenz <lauralorenz@google.com>
Date: Tue, 12 Nov 2024 19:48:38 +0000
Subject: [PATCH 09/18] Fix typecheck and verify

Signed-off-by: Laura Lorenz <lauralorenz@google.com>
---
 test/e2e_node/container_restart_test.go | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/test/e2e_node/container_restart_test.go b/test/e2e_node/container_restart_test.go
index 503b3b2d0e2..17445ae7ada 100644
--- a/test/e2e_node/container_restart_test.go
+++ b/test/e2e_node/container_restart_test.go
@@ -1,3 +1,22 @@
+//go:build linux
+// +build linux
+
+/*
+Copyright 2024 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
 package e2enode
 
 import (

From 2732d57e33fd8c45a6f4856fce52f7c8ed8073c9 Mon Sep 17 00:00:00 2001
From: Laura Lorenz <lauralorenz@google.com>
Date: Tue, 12 Nov 2024 18:04:08 +0000
Subject: [PATCH 10/18] Missed refactor of container name here

Signed-off-by: Laura Lorenz <lauralorenz@google.com>
---
 test/e2e_node/container_restart_test.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/e2e_node/container_restart_test.go b/test/e2e_node/container_restart_test.go
index 17445ae7ada..97c908ef250 100644
--- a/test/e2e_node/container_restart_test.go
+++ b/test/e2e_node/container_restart_test.go
@@ -94,8 +94,8 @@ func doTest(ctx context.Context, f *framework.Framework, targetRestarts int, con
 	podErr := e2epod.WaitForPodContainerToFail(ctx, f.ClientSet, f.Namespace.Name, pod.Name, 0, "CrashLoopBackOff", 1*time.Minute)
 	gomega.Expect(podErr).To(gomega.HaveOccurred())
 
-	// Wait for 150s worth of backoffs to occur so we can confirm the backoff growth.
-	podErr = e2epod.WaitForContainerRestartedNTimes(ctx, f.ClientSet, f.Namespace.Name, pod.Name, "restart", 150*time.Second, targetRestarts)
+	// Wait for 210s worth of backoffs to occur so we can confirm the backoff growth.
+	podErr = e2epod.WaitForContainerRestartedNTimes(ctx, f.ClientSet, f.Namespace.Name, pod.Name, containerName, 210*time.Second, targetRestarts)
 	gomega.Expect(podErr).ShouldNot(gomega.HaveOccurred(), "Expected container to repeatedly back off container failures")
 
 	r, err := extractObservedBackoff(ctx, f, pod.Name, containerName)

From 1da8ca816eb8f7303b10508cdcbc4e0553285fd5 Mon Sep 17 00:00:00 2001
From: Laura Lorenz <lauralorenz@google.com>
Date: Tue, 12 Nov 2024 20:00:11 +0000
Subject: [PATCH 11/18] Extract restart number properly

Signed-off-by: Laura Lorenz <lauralorenz@google.com>
---
 test/e2e_node/container_restart_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/e2e_node/container_restart_test.go b/test/e2e_node/container_restart_test.go
index 97c908ef250..b7b5eacc4d1 100644
--- a/test/e2e_node/container_restart_test.go
+++ b/test/e2e_node/container_restart_test.go
@@ -113,7 +113,7 @@ func extractObservedBackoff(ctx context.Context, f *framework.Framework, podName
 	for _, statuses := range [][]v1.ContainerStatus{pod.Status.ContainerStatuses, pod.Status.InitContainerStatuses, pod.Status.EphemeralContainerStatuses} {
 		for _, cs := range statuses {
 			if cs.Name == containerName {
-				return r, nil
+				return cs.RestartCount, nil
 			}
 		}
 	}

From d293c5088fa110516cd2aec295f13ca1209e15e3 Mon Sep 17 00:00:00 2001
From: Laura Lorenz <lauralorenz@google.com>
Date: Tue, 12 Nov 2024 21:12:20 +0000
Subject: [PATCH 12/18] Fix spelling

Signed-off-by: Laura Lorenz <lauralorenz@google.com>
---
 test/e2e_node/image_pull_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/e2e_node/image_pull_test.go b/test/e2e_node/image_pull_test.go
index ecaa67e733d..d759a883143 100644
--- a/test/e2e_node/image_pull_test.go
+++ b/test/e2e_node/image_pull_test.go
@@ -259,7 +259,7 @@ var _ = SIGDescribe("Pull Image", feature.CriProxy, framework.WithSerial(), func
 
 		// Wait for ~60s worth of backoffs to occur so we can confirm the backoff growth.
 		podErr = e2epod.WaitForPodContainerStarted(ctx, f.ClientSet, f.Namespace.Name, pod.Name, 0, 1*time.Minute)
-		gomega.Expect(podErr).To(gomega.HaveOccurred(), "Expected container not to start from repeadedly backing off image pulls")
+		gomega.Expect(podErr).To(gomega.HaveOccurred(), "Expected container not to start from repeatedly backing off image pulls")
 
 		e, err := getImagePullAttempts(ctx, f, pod.Name)
 		framework.ExpectNoError(err)

From e03d0f60ef27a777de278bb8969acf6c134b68f3 Mon Sep 17 00:00:00 2001
From: Laura Lorenz <lauralorenz@google.com>
Date: Tue, 12 Nov 2024 21:48:28 +0000
Subject: [PATCH 13/18] Orient tests to run faster, but tolerate infra
 slowdowns up to 5 minutes

Signed-off-by: Laura Lorenz <lauralorenz@google.com>
---
 test/e2e_node/container_restart_test.go | 14 ++++++++------
 test/e2e_node/image_pull_test.go        |  9 ++++-----
 2 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/test/e2e_node/container_restart_test.go b/test/e2e_node/container_restart_test.go
index b7b5eacc4d1..5a2d530f3f2 100644
--- a/test/e2e_node/container_restart_test.go
+++ b/test/e2e_node/container_restart_test.go
@@ -27,6 +27,7 @@ import (
 
 	"github.com/onsi/ginkgo/v2"
 	"github.com/onsi/gomega"
+	"github.com/pkg/errors"
 	kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
 
 	v1 "k8s.io/api/core/v1"
@@ -59,7 +60,7 @@ var _ = SIGDescribe("Container Restart", feature.CriProxy, framework.WithSerial(
 
 		ginkgo.It("Container restart backs off.", func(ctx context.Context) {
 			// 0s, 0s, 10s, 30s, 70s, 150s, 310s
-			doTest(ctx, f, 5, containerName, 7)
+			doTest(ctx, f, 3, containerName, 7)
 		})
 	})
 
@@ -82,8 +83,8 @@ var _ = SIGDescribe("Container Restart", feature.CriProxy, framework.WithSerial(
 		})
 
 		ginkgo.It("Alternate restart backs off.", func(ctx context.Context) {
-			// 0s, 0s, 10s, 30s, 60s, 90s, 120s, 150, 180, 210)
-			doTest(ctx, f, 7, containerName, 10)
+			// 0s, 0s, 10s, 30s, 60s, 90s, 120s, 150s, 180s, 210s, 240s, 270s, 300s
+			doTest(ctx, f, 3, containerName, 13)
 		})
 	})
 })
@@ -94,8 +95,9 @@ func doTest(ctx context.Context, f *framework.Framework, targetRestarts int, con
 	podErr := e2epod.WaitForPodContainerToFail(ctx, f.ClientSet, f.Namespace.Name, pod.Name, 0, "CrashLoopBackOff", 1*time.Minute)
 	gomega.Expect(podErr).To(gomega.HaveOccurred())
 
-	// Wait for 210s worth of backoffs to occur so we can confirm the backoff growth.
-	podErr = e2epod.WaitForContainerRestartedNTimes(ctx, f.ClientSet, f.Namespace.Name, pod.Name, containerName, 210*time.Second, targetRestarts)
+	// Hard wait 30 seconds for targetRestarts in the best case; longer timeout later will handle if infra was slow.
+	time.Sleep(30 * time.Second)
+	podErr = e2epod.WaitForContainerRestartedNTimes(ctx, f.ClientSet, f.Namespace.Name, pod.Name, containerName, 5*time.Minute, targetRestarts)
 	gomega.Expect(podErr).ShouldNot(gomega.HaveOccurred(), "Expected container to repeatedly back off container failures")
 
 	r, err := extractObservedBackoff(ctx, f, pod.Name, containerName)
@@ -117,7 +119,7 @@ func extractObservedBackoff(ctx context.Context, f *framework.Framework, podName
 			}
 		}
 	}
-	return r, nil
+	return r, errors.Errorf("Could not find container status for container %s in pod %s", containerName, podName)
 }
 
 func newFailAlwaysPod() *v1.Pod {
diff --git a/test/e2e_node/image_pull_test.go b/test/e2e_node/image_pull_test.go
index d759a883143..fce685e5264 100644
--- a/test/e2e_node/image_pull_test.go
+++ b/test/e2e_node/image_pull_test.go
@@ -257,16 +257,15 @@ var _ = SIGDescribe("Pull Image", feature.CriProxy, framework.WithSerial(), func
 		isExpectedErrMsg := strings.Contains(eventMsg, expectedErr.Error())
 		gomega.Expect(isExpectedErrMsg).To(gomega.BeTrueBecause("we injected an exception into the PullImage interface of the cri proxy"))
 
-		// Wait for ~60s worth of backoffs to occur so we can confirm the backoff growth.
-		podErr = e2epod.WaitForPodContainerStarted(ctx, f.ClientSet, f.Namespace.Name, pod.Name, 0, 1*time.Minute)
+		podErr = e2epod.WaitForPodContainerStarted(ctx, f.ClientSet, f.Namespace.Name, pod.Name, 0, 30*time.Second)
 		gomega.Expect(podErr).To(gomega.HaveOccurred(), "Expected container not to start from repeatedly backing off image pulls")
 
 		e, err := getImagePullAttempts(ctx, f, pod.Name)
 		framework.ExpectNoError(err)
-		// 3 would take 10s best case
+		// 3 would take 10s best case.
 		gomega.Expect(e.Count).Should(gomega.BeNumerically(">", 3))
-		// 6 would take 150s best case
-		gomega.Expect(e.Count).Should(gomega.BeNumerically("<=", 6))
+		// 7 would take 310s best case, if the infra went slow.
+		gomega.Expect(e.Count).Should(gomega.BeNumerically("<=", 7))
 
 	})
 

From 285d433dea79e6838cdd0648c36a2a82fd398179 Mon Sep 17 00:00:00 2001
From: Laura Lorenz <lauralorenz@google.com>
Date: Tue, 12 Nov 2024 22:48:17 +0000
Subject: [PATCH 14/18] Clearer image pull test and utils

Signed-off-by: Laura Lorenz <lauralorenz@google.com>
---
 .../k8s.io/kubectl/pkg/util/podutils/podutils.go |  9 ++++++---
 test/e2e/framework/pod/wait.go                   | 16 +++++-----------
 test/e2e_node/container_restart_test.go          |  2 +-
 test/e2e_node/image_pull_test.go                 |  4 ++--
 4 files changed, 14 insertions(+), 17 deletions(-)

diff --git a/staging/src/k8s.io/kubectl/pkg/util/podutils/podutils.go b/staging/src/k8s.io/kubectl/pkg/util/podutils/podutils.go
index dfefdef4567..9e457f406d3 100644
--- a/staging/src/k8s.io/kubectl/pkg/util/podutils/podutils.go
+++ b/staging/src/k8s.io/kubectl/pkg/util/podutils/podutils.go
@@ -191,7 +191,10 @@ func podReadyTime(pod *corev1.Pod) *metav1.Time {
 	return &metav1.Time{}
 }
 
-func maxContainerRestarts(pod *corev1.Pod) (regularRestarts, sidecarRestarts int) {
+// MaxContainerRestarts iterates through all the normal containers and sidecar
+// containers in a Pod object and reports the highest restart count observed per
+// category.
+func MaxContainerRestarts(pod *corev1.Pod) (regularRestarts, sidecarRestarts int) {
 	for _, c := range pod.Status.ContainerStatuses {
 		regularRestarts = max(regularRestarts, int(c.RestartCount))
 	}
@@ -214,8 +217,8 @@ func maxContainerRestarts(pod *corev1.Pod) (regularRestarts, sidecarRestarts int
 // false: pj has a higher container restart count.
 // nil: Both have the same container restart count.
 func compareMaxContainerRestarts(pi *corev1.Pod, pj *corev1.Pod) *bool {
-	regularRestartsI, sidecarRestartsI := maxContainerRestarts(pi)
-	regularRestartsJ, sidecarRestartsJ := maxContainerRestarts(pj)
+	regularRestartsI, sidecarRestartsI := MaxContainerRestarts(pi)
+	regularRestartsJ, sidecarRestartsJ := MaxContainerRestarts(pj)
 	if regularRestartsI != regularRestartsJ {
 		res := regularRestartsI > regularRestartsJ
 		return &res
diff --git a/test/e2e/framework/pod/wait.go b/test/e2e/framework/pod/wait.go
index 6e69f8ec487..7a416c475c7 100644
--- a/test/e2e/framework/pod/wait.go
+++ b/test/e2e/framework/pod/wait.go
@@ -869,17 +869,11 @@ func WaitForContainerTerminated(ctx context.Context, c clientset.Interface, name
 	})
 }
 
-// WaitForContainerRestartedNTimes waits for the given Pod container to have restarted N times
-func WaitForContainerRestartedNTimes(ctx context.Context, c clientset.Interface, namespace, podName, containerName string, timeout time.Duration, target int) error {
-	conditionDesc := fmt.Sprintf("container %s restarted at least %d times", containerName, target)
+// WaitForContainerRestartedNTimes waits for a container in the Pod to have restarted N times
+func WaitForContainerRestartedNTimes(ctx context.Context, c clientset.Interface, namespace string, podName string, timeout time.Duration, target int) error {
+	conditionDesc := fmt.Sprintf("A container in pod %s restarted at least %d times", podName, target)
 	return WaitForPodCondition(ctx, c, namespace, podName, conditionDesc, timeout, func(pod *v1.Pod) (bool, error) {
-		for _, statuses := range [][]v1.ContainerStatus{pod.Status.ContainerStatuses, pod.Status.InitContainerStatuses, pod.Status.EphemeralContainerStatuses} {
-			for _, cs := range statuses {
-				if cs.Name == containerName {
-					return cs.RestartCount >= int32(target), nil
-				}
-			}
-		}
-		return false, nil
+		r, _ := podutils.MaxContainerRestarts(pod)
+		return r >= target, nil
 	})
 }
diff --git a/test/e2e_node/container_restart_test.go b/test/e2e_node/container_restart_test.go
index 5a2d530f3f2..5fdbae30a11 100644
--- a/test/e2e_node/container_restart_test.go
+++ b/test/e2e_node/container_restart_test.go
@@ -97,7 +97,7 @@ func doTest(ctx context.Context, f *framework.Framework, targetRestarts int, con
 
 	// Hard wait 30 seconds for targetRestarts in the best case; longer timeout later will handle if infra was slow.
 	time.Sleep(30 * time.Second)
-	podErr = e2epod.WaitForContainerRestartedNTimes(ctx, f.ClientSet, f.Namespace.Name, pod.Name, containerName, 5*time.Minute, targetRestarts)
+	podErr = e2epod.WaitForContainerRestartedNTimes(ctx, f.ClientSet, f.Namespace.Name, pod.Name, 5*time.Minute, targetRestarts)
 	gomega.Expect(podErr).ShouldNot(gomega.HaveOccurred(), "Expected container to repeatedly back off container failures")
 
 	r, err := extractObservedBackoff(ctx, f, pod.Name, containerName)
diff --git a/test/e2e_node/image_pull_test.go b/test/e2e_node/image_pull_test.go
index fce685e5264..28c393f2a96 100644
--- a/test/e2e_node/image_pull_test.go
+++ b/test/e2e_node/image_pull_test.go
@@ -257,8 +257,8 @@ var _ = SIGDescribe("Pull Image", feature.CriProxy, framework.WithSerial(), func
 		isExpectedErrMsg := strings.Contains(eventMsg, expectedErr.Error())
 		gomega.Expect(isExpectedErrMsg).To(gomega.BeTrueBecause("we injected an exception into the PullImage interface of the cri proxy"))
 
-		podErr = e2epod.WaitForPodContainerStarted(ctx, f.ClientSet, f.Namespace.Name, pod.Name, 0, 30*time.Second)
-		gomega.Expect(podErr).To(gomega.HaveOccurred(), "Expected container not to start from repeatedly backing off image pulls")
+		// Hard wait 30 seconds for image pulls to repeatedly back off.
+		time.Sleep(30 * time.Second)
 
 		e, err := getImagePullAttempts(ctx, f, pod.Name)
 		framework.ExpectNoError(err)

From 8e7b2af712a79db423b7bfdf2a7bf35f75542c13 Mon Sep 17 00:00:00 2001
From: Laura Lorenz <lauralorenz@google.com>
Date: Tue, 12 Nov 2024 23:26:04 +0000
Subject: [PATCH 15/18] Use a better util

Signed-off-by: Laura Lorenz <lauralorenz@google.com>
---
 .../src/k8s.io/kubectl/pkg/util/podutils/podutils.go |  9 +++------
 test/e2e/framework/pod/wait.go                       | 12 ++++++++----
 test/e2e_node/container_restart_test.go              |  2 +-
 3 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/staging/src/k8s.io/kubectl/pkg/util/podutils/podutils.go b/staging/src/k8s.io/kubectl/pkg/util/podutils/podutils.go
index 9e457f406d3..dfefdef4567 100644
--- a/staging/src/k8s.io/kubectl/pkg/util/podutils/podutils.go
+++ b/staging/src/k8s.io/kubectl/pkg/util/podutils/podutils.go
@@ -191,10 +191,7 @@ func podReadyTime(pod *corev1.Pod) *metav1.Time {
 	return &metav1.Time{}
 }
 
-// MaxContainerRestarts iterates through all the normal containers and sidecar
-// containers in a Pod object and reports the highest restart count observed per
-// category.
-func MaxContainerRestarts(pod *corev1.Pod) (regularRestarts, sidecarRestarts int) {
+func maxContainerRestarts(pod *corev1.Pod) (regularRestarts, sidecarRestarts int) {
 	for _, c := range pod.Status.ContainerStatuses {
 		regularRestarts = max(regularRestarts, int(c.RestartCount))
 	}
@@ -217,8 +214,8 @@ func MaxContainerRestarts(pod *corev1.Pod) (regularRestarts, sidecarRestarts int
 // false: pj has a higher container restart count.
 // nil: Both have the same container restart count.
 func compareMaxContainerRestarts(pi *corev1.Pod, pj *corev1.Pod) *bool {
-	regularRestartsI, sidecarRestartsI := MaxContainerRestarts(pi)
-	regularRestartsJ, sidecarRestartsJ := MaxContainerRestarts(pj)
+	regularRestartsI, sidecarRestartsI := maxContainerRestarts(pi)
+	regularRestartsJ, sidecarRestartsJ := maxContainerRestarts(pj)
 	if regularRestartsI != regularRestartsJ {
 		res := regularRestartsI > regularRestartsJ
 		return &res
diff --git a/test/e2e/framework/pod/wait.go b/test/e2e/framework/pod/wait.go
index 7a416c475c7..5cbaaab8a25 100644
--- a/test/e2e/framework/pod/wait.go
+++ b/test/e2e/framework/pod/wait.go
@@ -37,6 +37,7 @@ import (
 	apitypes "k8s.io/apimachinery/pkg/types"
 	clientset "k8s.io/client-go/kubernetes"
 	"k8s.io/kubectl/pkg/util/podutils"
+	podv1util "k8s.io/kubernetes/pkg/api/v1/pod"
 	"k8s.io/kubernetes/test/e2e/framework"
 	testutils "k8s.io/kubernetes/test/utils"
 	"k8s.io/kubernetes/test/utils/format"
@@ -869,11 +870,14 @@ func WaitForContainerTerminated(ctx context.Context, c clientset.Interface, name
 	})
 }
 
-// WaitForContainerRestartedNTimes waits for a container in the Pod to have restarted N times
-func WaitForContainerRestartedNTimes(ctx context.Context, c clientset.Interface, namespace string, podName string, timeout time.Duration, target int) error {
+// WaitForContainerRestartedNTimes waits for the given normal container in the Pod to have restarted N times
+func WaitForContainerRestartedNTimes(ctx context.Context, c clientset.Interface, namespace string, podName string, containerName string, timeout time.Duration, target int) error {
 	conditionDesc := fmt.Sprintf("A container in pod %s restarted at least %d times", podName, target)
 	return WaitForPodCondition(ctx, c, namespace, podName, conditionDesc, timeout, func(pod *v1.Pod) (bool, error) {
-		r, _ := podutils.MaxContainerRestarts(pod)
-		return r >= target, nil
+		cs, found := podv1util.GetContainerStatus(pod.Status.ContainerStatuses, containerName)
+		if !found {
+			return false, fmt.Errorf("could not find container %s in  pod %s", containerName, podName)
+		}
+		return cs.RestartCount >= int32(target), nil
 	})
 }
diff --git a/test/e2e_node/container_restart_test.go b/test/e2e_node/container_restart_test.go
index 5fdbae30a11..5a2d530f3f2 100644
--- a/test/e2e_node/container_restart_test.go
+++ b/test/e2e_node/container_restart_test.go
@@ -97,7 +97,7 @@ func doTest(ctx context.Context, f *framework.Framework, targetRestarts int, con
 
 	// Hard wait 30 seconds for targetRestarts in the best case; longer timeout later will handle if infra was slow.
 	time.Sleep(30 * time.Second)
-	podErr = e2epod.WaitForContainerRestartedNTimes(ctx, f.ClientSet, f.Namespace.Name, pod.Name, 5*time.Minute, targetRestarts)
+	podErr = e2epod.WaitForContainerRestartedNTimes(ctx, f.ClientSet, f.Namespace.Name, pod.Name, containerName, 5*time.Minute, targetRestarts)
 	gomega.Expect(podErr).ShouldNot(gomega.HaveOccurred(), "Expected container to repeatedly back off container failures")
 
 	r, err := extractObservedBackoff(ctx, f, pod.Name, containerName)

From 529d5ba9d33a09664718db9697fce9b2a93948c5 Mon Sep 17 00:00:00 2001
From: Laura Lorenz <lauralorenz@google.com>
Date: Tue, 12 Nov 2024 23:34:57 +0000
Subject: [PATCH 16/18] Don't overly indirect image name

Signed-off-by: Laura Lorenz <lauralorenz@google.com>
---
 test/e2e_node/container_restart_test.go | 2 +-
 test/utils/image/manifest.go            | 5 -----
 2 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/test/e2e_node/container_restart_test.go b/test/e2e_node/container_restart_test.go
index 5a2d530f3f2..48404b5e110 100644
--- a/test/e2e_node/container_restart_test.go
+++ b/test/e2e_node/container_restart_test.go
@@ -132,7 +132,7 @@ func newFailAlwaysPod() *v1.Pod {
 			Containers: []v1.Container{
 				{
 					Name:            containerName,
-					Image:           imageutils.GetBusyBoxImageName(),
+					Image:           imageutils.GetE2EImage(imageutils.BusyBox),
 					ImagePullPolicy: v1.PullIfNotPresent,
 				},
 			},
diff --git a/test/utils/image/manifest.go b/test/utils/image/manifest.go
index a027e7892d8..2bc34b09292 100644
--- a/test/utils/image/manifest.go
+++ b/test/utils/image/manifest.go
@@ -355,11 +355,6 @@ func GetPauseImageName() string {
 	return GetE2EImage(Pause)
 }
 
-// GetBusyBoxImageName returns the busybox image name with proper version
-func GetBusyBoxImageName() string {
-	return GetE2EImage(BusyBox)
-}
-
 // ReplaceRegistryInImageURL replaces the registry in the image URL with a custom one based
 // on the configured registries.
 func ReplaceRegistryInImageURL(imageURL string) (string, error) {

From 59f98580861e81441054b893efdcaeb0eac1a488 Mon Sep 17 00:00:00 2001
From: Laura Lorenz <lauralorenz@google.com>
Date: Tue, 12 Nov 2024 23:59:30 +0000
Subject: [PATCH 17/18] Move function specific to container restart test inline

Signed-off-by: Laura Lorenz <lauralorenz@google.com>
---
 test/e2e/framework/pod/wait.go          | 13 -------------
 test/e2e_node/container_restart_test.go | 15 ++++++++++++++-
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/test/e2e/framework/pod/wait.go b/test/e2e/framework/pod/wait.go
index 5cbaaab8a25..537d59c3224 100644
--- a/test/e2e/framework/pod/wait.go
+++ b/test/e2e/framework/pod/wait.go
@@ -37,7 +37,6 @@ import (
 	apitypes "k8s.io/apimachinery/pkg/types"
 	clientset "k8s.io/client-go/kubernetes"
 	"k8s.io/kubectl/pkg/util/podutils"
-	podv1util "k8s.io/kubernetes/pkg/api/v1/pod"
 	"k8s.io/kubernetes/test/e2e/framework"
 	testutils "k8s.io/kubernetes/test/utils"
 	"k8s.io/kubernetes/test/utils/format"
@@ -869,15 +868,3 @@ func WaitForContainerTerminated(ctx context.Context, c clientset.Interface, name
 		return false, nil
 	})
 }
-
-// WaitForContainerRestartedNTimes waits for the given normal container in the Pod to have restarted N times
-func WaitForContainerRestartedNTimes(ctx context.Context, c clientset.Interface, namespace string, podName string, containerName string, timeout time.Duration, target int) error {
-	conditionDesc := fmt.Sprintf("A container in pod %s restarted at least %d times", podName, target)
-	return WaitForPodCondition(ctx, c, namespace, podName, conditionDesc, timeout, func(pod *v1.Pod) (bool, error) {
-		cs, found := podv1util.GetContainerStatus(pod.Status.ContainerStatuses, containerName)
-		if !found {
-			return false, fmt.Errorf("could not find container %s in  pod %s", containerName, podName)
-		}
-		return cs.RestartCount >= int32(target), nil
-	})
-}
diff --git a/test/e2e_node/container_restart_test.go b/test/e2e_node/container_restart_test.go
index 48404b5e110..086ce2d7d4c 100644
--- a/test/e2e_node/container_restart_test.go
+++ b/test/e2e_node/container_restart_test.go
@@ -21,8 +21,10 @@ package e2enode
 
 import (
 	"context"
+	"fmt"
 	"time"
 
+	podv1util "k8s.io/kubernetes/pkg/api/v1/pod"
 	imageutils "k8s.io/kubernetes/test/utils/image"
 
 	"github.com/onsi/ginkgo/v2"
@@ -97,7 +99,7 @@ func doTest(ctx context.Context, f *framework.Framework, targetRestarts int, con
 
 	// Hard wait 30 seconds for targetRestarts in the best case; longer timeout later will handle if infra was slow.
 	time.Sleep(30 * time.Second)
-	podErr = e2epod.WaitForContainerRestartedNTimes(ctx, f.ClientSet, f.Namespace.Name, pod.Name, containerName, 5*time.Minute, targetRestarts)
+	podErr = waitForContainerRestartedNTimes(ctx, f, f.Namespace.Name, pod.Name, containerName, 5*time.Minute, targetRestarts)
 	gomega.Expect(podErr).ShouldNot(gomega.HaveOccurred(), "Expected container to repeatedly back off container failures")
 
 	r, err := extractObservedBackoff(ctx, f, pod.Name, containerName)
@@ -140,3 +142,14 @@ func newFailAlwaysPod() *v1.Pod {
 	}
 	return pod
 }
+
+func waitForContainerRestartedNTimes(ctx context.Context, f *framework.Framework, namespace string, podName string, containerName string, timeout time.Duration, target int) error {
+	conditionDesc := fmt.Sprintf("A container in pod %s restarted at least %d times", podName, target)
+	return e2epod.WaitForPodCondition(ctx, f.ClientSet, namespace, podName, conditionDesc, timeout, func(pod *v1.Pod) (bool, error) {
+		cs, found := podv1util.GetContainerStatus(pod.Status.ContainerStatuses, containerName)
+		if !found {
+			return false, fmt.Errorf("could not find container %s in  pod %s", containerName, podName)
+		}
+		return cs.RestartCount >= int32(target), nil
+	})
+}

From 9ab0d81d7667dd456a887dffe3c85f602de89fc7 Mon Sep 17 00:00:00 2001
From: Laura Lorenz <lauralorenz@google.com>
Date: Wed, 13 Nov 2024 01:39:58 +0000
Subject: [PATCH 18/18] Now that sleep is shorter, only expect to reach 3
 within 30s

Focused too much on the container restart one in commit that fixed that

Signed-off-by: Laura Lorenz <lauralorenz@google.com>
---
 test/e2e_node/image_pull_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/e2e_node/image_pull_test.go b/test/e2e_node/image_pull_test.go
index 28c393f2a96..a80795f8f37 100644
--- a/test/e2e_node/image_pull_test.go
+++ b/test/e2e_node/image_pull_test.go
@@ -263,7 +263,7 @@ var _ = SIGDescribe("Pull Image", feature.CriProxy, framework.WithSerial(), func
 		e, err := getImagePullAttempts(ctx, f, pod.Name)
 		framework.ExpectNoError(err)
 		// 3 would take 10s best case.
-		gomega.Expect(e.Count).Should(gomega.BeNumerically(">", 3))
+		gomega.Expect(e.Count).Should(gomega.BeNumerically(">=", 3))
 		// 7 would take 310s best case, if the infra went slow.
 		gomega.Expect(e.Count).Should(gomega.BeNumerically("<=", 7))