From dbb34a04cc6df48f4dd2a1064828e5675c68b777 Mon Sep 17 00:00:00 2001
From: Ayato Tokubi <atokubi@redhat.com>
Date: Mon, 27 Jan 2025 19:29:35 +0000
Subject: [PATCH] Reduce the number of processes used in e2e to prevent
 unexpected OOM

Signed-off-by: Ayato Tokubi <atokubi@redhat.com>
---
 test/e2e_node/eviction_test.go | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/test/e2e_node/eviction_test.go b/test/e2e_node/eviction_test.go
index c430645f3b7..cfe57e4f713 100644
--- a/test/e2e_node/eviction_test.go
+++ b/test/e2e_node/eviction_test.go
@@ -473,13 +473,16 @@ var _ = SIGDescribe("PriorityPidEvictionOrdering", framework.WithSlow(), framewo
 
 	highPriorityClassName := f.BaseName + "-high-priority"
 	highPriority := int32(999999999)
-	processes := 30000
+	// Apparently there is a threshold at around 10,000+. If it's over the threshold,
+	// the processes are likely to be oom-killed instead of evicted.
+	// One test can have at most two pidConsumingPods at a time not to cause oom-kill.
+	processes := 5000
 
 	// if criStats is true, PodAndContainerStatsFromCRI will use data from cri instead of cadvisor for kubelet to get pid count of pods
 	for _, criStats := range []bool{true, false} {
 		ginkgo.Context(fmt.Sprintf("when we run containers with PodAndContainerStatsFromCRI=%v that should cause %s", criStats, expectedNodeCondition), func() {
 			tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
-				pidsConsumed := int64(10000)
+				pidsConsumed := int64(4000)
 				summary := eventuallyGetSummary(ctx)
 				availablePids := *(summary.Node.Rlimit.MaxPID) - *(summary.Node.Rlimit.NumOfRunningProcesses)
 				initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalPIDAvailable): fmt.Sprintf("%d", availablePids-pidsConsumed)}
@@ -524,7 +527,7 @@ var _ = SIGDescribe("PriorityPidEvictionOrdering", framework.WithSlow(), framewo
 
 	f.Context(fmt.Sprintf(testContextFmt, expectedNodeCondition)+"; baseline scenario to verify DisruptionTarget is added", func() {
 		tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
-			pidsConsumed := int64(10000)
+			pidsConsumed := int64(4000)
 			summary := eventuallyGetSummary(ctx)
 			availablePids := *(summary.Node.Rlimit.MaxPID) - *(summary.Node.Rlimit.NumOfRunningProcesses)
 			initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalPIDAvailable): fmt.Sprintf("%d", availablePids-pidsConsumed)}
@@ -1049,8 +1052,9 @@ func diskConsumingPod(name string, diskConsumedMB int, volumeSource *v1.VolumeSo
 }
 
 func pidConsumingPod(name string, numProcesses int) *v1.Pod {
-	// Each iteration forks once, but creates two processes
-	return podWithCommand(nil, v1.ResourceRequirements{}, numProcesses/2, name, "(while true; do /bin/sleep 5; done)&")
+	// Slowing down the iteration speed to prevent a race condition where eviction may occur
+	// before the correct number of processes is captured in the stats during a sudden surge in processes.
+	return podWithCommand(nil, v1.ResourceRequirements{}, numProcesses, name, "/bin/sleep 0.01; (/bin/sleep 3600)&")
 }
 
 // podWithCommand returns a pod with the provided volumeSource and resourceRequirements.