From b4a8861af306b8b4feb8723254cf10af4206a076 Mon Sep 17 00:00:00 2001
From: Elana Hashman <ehashman@redhat.com>
Date: Thu, 18 Nov 2021 14:57:22 -0800
Subject: [PATCH 1/2] Tweak resource requests for Kubelet restart test

---
 test/e2e_node/restart_test.go | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/test/e2e_node/restart_test.go b/test/e2e_node/restart_test.go
index 505ec85a4b2..d1e7af884eb 100644
--- a/test/e2e_node/restart_test.go
+++ b/test/e2e_node/restart_test.go
@@ -150,6 +150,10 @@ var _ = SIGDescribe("Restart [Serial] [Slow] [Disruptive]", func() {
 			if numCpus < 1 {
 				e2eskipper.Skipf("insufficient CPU available for kubelet restart test")
 			}
+			if numCpus > 18 {
+				// 950m * 19 = 1805 CPUs -> not enough to block the scheduling of another 950m pod
+				e2eskipper.Skipf("test will return false positives on a machine with >18 cores")
+			}
 
 			// create as many restartNever pods as there are allocatable CPU
 			// nodes; if they are not correctly accounted for as terminated
@@ -161,7 +165,7 @@ var _ = SIGDescribe("Restart [Serial] [Slow] [Disruptive]", func() {
 				pod.Spec.RestartPolicy = "Never"
 				pod.Spec.Containers[0].Command = []string{"echo", "hi"}
 				pod.Spec.Containers[0].Resources.Limits = v1.ResourceList{
-					v1.ResourceCPU: resource.MustParse("1"),
+					v1.ResourceCPU: resource.MustParse("950m"),  // leave a little room for other workloads
 				}
 			}
 			createBatchPodWithRateControl(f, restartNeverPods, podCreationInterval)

From 6ddf86d422a3ca2d6547bd2b0fe03202e23f8817 Mon Sep 17 00:00:00 2001
From: Elana Hashman <ehashman@redhat.com>
Date: Thu, 18 Nov 2021 14:59:01 -0800
Subject: [PATCH 2/2] Set startTimeout back to 3m, restore wait loop at end of
 test

---
 test/e2e_node/restart_test.go | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/test/e2e_node/restart_test.go b/test/e2e_node/restart_test.go
index d1e7af884eb..f3ee800678d 100644
--- a/test/e2e_node/restart_test.go
+++ b/test/e2e_node/restart_test.go
@@ -73,7 +73,7 @@ var _ = SIGDescribe("Restart [Serial] [Slow] [Disruptive]", func() {
 		podCount            = 100
 		podCreationInterval = 100 * time.Millisecond
 		recoverTimeout      = 5 * time.Minute
-		startTimeout        = 5 * time.Minute
+		startTimeout        = 3 * time.Minute
 		// restartCount is chosen so even with minPods we exhaust the default
 		// allocation of a /24.
 		minPods      = 50
@@ -165,7 +165,7 @@ var _ = SIGDescribe("Restart [Serial] [Slow] [Disruptive]", func() {
 				pod.Spec.RestartPolicy = "Never"
 				pod.Spec.Containers[0].Command = []string{"echo", "hi"}
 				pod.Spec.Containers[0].Resources.Limits = v1.ResourceList{
-					v1.ResourceCPU: resource.MustParse("950m"),  // leave a little room for other workloads
+					v1.ResourceCPU: resource.MustParse("950m"), // leave a little room for other workloads
 				}
 			}
 			createBatchPodWithRateControl(f, restartNeverPods, podCreationInterval)
@@ -203,9 +203,11 @@ var _ = SIGDescribe("Restart [Serial] [Slow] [Disruptive]", func() {
 			// restart may think these old pods are consuming CPU and we
 			// will get an OutOfCpu error.
 			ginkgo.By("verifying restartNever pods succeed and restartAlways pods stay running")
-			postRestartRunningPods := waitForPods(f, numAllPods, recoverTimeout)
-			if len(postRestartRunningPods) < numAllPods {
-				framework.Failf("less pods are running after node restart, got %d but expected %d", len(postRestartRunningPods), numAllPods)
+			for start := time.Now(); time.Since(start) < startTimeout; time.Sleep(10 * time.Second) {
+				postRestartRunningPods := waitForPods(f, numAllPods, recoverTimeout)
+				if len(postRestartRunningPods) < numAllPods {
+					framework.Failf("less pods are running after node restart, got %d but expected %d", len(postRestartRunningPods), numAllPods)
+				}
 			}
 		})
 	})