Merge pull request #37718 from dashpole/e2e_node_timeout

Automatic merge from submit-queue (batch tested with PRs 36419, 38330, 37718, 38244, 38375) adjusted timeouts for inode eviction and garbage collection tests Inode eviction tests appear to run slower on coreos than the other operating systems I tested on. I adjusted the timeout for the test from 10 to 30 minutes to compensate. Garbage collection tests also flake occasionally due to timeouts. I adjusted the timeout for runtime commands from 2 to 3 minutes, and removed an unused constant. cc: @Random-Liu
2025-07-23 19:56:01 +00:00 · 2016-12-08 17:13:56 -08:00 · 2016-12-08 17:13:56 -08:00 · a4a306ac81
commit a4a306ac81
parent 7c09b56494 93ca4bbf47
2 changed files with 40 additions and 15 deletions
--- a/test/e2e_node/garbage_collector_test.go
+++ b/test/e2e_node/garbage_collector_test.go
@ -38,10 +38,9 @@ const (
 	maxTotalContainers = -1

 	defaultRuntimeRequestTimeoutDuration = 1 * time.Minute
-	garbageCollectDuration               = 2 * time.Minute
+	garbageCollectDuration               = 3 * time.Minute
 	setupDuration                        = 10 * time.Minute
 	runtimePollInterval                  = 10 * time.Second
-	deleteTimeout                        = 4 * time.Minute
 )

 type testPodSpec struct {
--- a/test/e2e_node/inode_eviction_test.go
+++ b/test/e2e_node/inode_eviction_test.go
@ -33,6 +33,9 @@ import (
 const (
 	postTestConditionMonitoringPeriod = 2 * time.Minute
 	evictionPollInterval              = 5 * time.Second
+	// pressure conditions often surface after evictions because of delay in propegation of metrics to pressure
+	// we wait this period after evictions to make sure that we wait out this delay
+	pressureDelay = 20 * time.Second
 )

 var _ = framework.KubeDescribe("InodeEviction [Slow] [Serial] [Disruptive]", func() {
@ -203,26 +206,36 @@ func runEvictionTest(f *framework.Framework, testCondition string, podTestSpecs
 				}
 				return fmt.Errorf("pods that caused %s have not been evicted.", testCondition)
 			}, evictionTestTimeout, evictionPollInterval).Should(BeNil())
-		})

-		AfterEach(func() {
+			// We observe pressure from the API server.  The eviction manager observes pressure from the kubelet internal stats.
+			// This means the eviction manager will observe pressure before we will, creating a delay between when the eviction manager
+			// evicts a pod, and when we observe the pressure by querrying the API server.  Add a delay here to account for this delay
+			By("making sure pressure from test has surfaced before continuing")
+			time.Sleep(pressureDelay)
+
 			By("making sure conditions eventually return to normal")
-			Eventually(func() bool {
+			Eventually(func() error {
 				hasPressure, err := hasPressureCondition(f, testCondition)
 				framework.ExpectNoError(err, fmt.Sprintf("checking if we have %s", testCondition))
-				return hasPressure
-			}, evictionTestTimeout, evictionPollInterval).Should(BeFalse())
+				if hasPressure {
+					return fmt.Errorf("Conditions havent returned to normal, we still have %s", testCondition)
+				}
+				return nil
+			}, evictionTestTimeout, evictionPollInterval).Should(BeNil())

 			By("making sure conditions do not return")
-			Consistently(func() bool {
+			Consistently(func() error {
 				hasPressure, err := hasPressureCondition(f, testCondition)
 				framework.ExpectNoError(err, fmt.Sprintf("checking if we have %s", testCondition))
-				return hasPressure
-			}, postTestConditionMonitoringPeriod, evictionPollInterval).Should(BeFalse())
+				if hasPressure {
+					return fmt.Errorf("%s dissappeared and then reappeared", testCondition)
+				}
+				return nil
+			}, postTestConditionMonitoringPeriod, evictionPollInterval).Should(BeNil())

 			By("making sure we can start a new pod after the test")
 			podName := "test-admit-pod"
-			f.PodClient().Create(&v1.Pod{
+			f.PodClient().CreateSync(&v1.Pod{
 				ObjectMeta: v1.ObjectMeta{
 					Name: podName,
 				},
@ -230,15 +243,28 @@ func runEvictionTest(f *framework.Framework, testCondition string, podTestSpecs
 					RestartPolicy: v1.RestartPolicyNever,
 					Containers: []v1.Container{
 						{
-							Image: "gcr.io/google_containers/busybox:1.24",
+							Image: framework.GetPauseImageNameForHostArch(),
 							Name:  podName,
 						},
 					},
 				},
 			})
-			if CurrentGinkgoTestDescription().Failed && framework.TestContext.DumpLogsOnFailure {
-				logPodEvents(f)
-				logNodeEvents(f)
+		})
+
+		AfterEach(func() {
+			By("deleting pods")
+			for _, spec := range podTestSpecs {
+				By(fmt.Sprintf("deleting pod: %s", spec.pod.Name))
+				f.PodClient().DeleteSync(spec.pod.Name, &v1.DeleteOptions{}, podDisappearTimeout)
+			}
+
+			if CurrentGinkgoTestDescription().Failed {
+				if framework.TestContext.DumpLogsOnFailure {
+					logPodEvents(f)
+					logNodeEvents(f)
+				}
+				By("sleeping to allow for cleanup of test")
+				time.Sleep(postTestConditionMonitoringPeriod)
 			}
 		})
 	})