From dbb34a04cc6df48f4dd2a1064828e5675c68b777 Mon Sep 17 00:00:00 2001 From: Ayato Tokubi Date: Mon, 27 Jan 2025 19:29:35 +0000 Subject: [PATCH] Reduce the number of processes used in e2e to prevent unexpected OOM Signed-off-by: Ayato Tokubi --- test/e2e_node/eviction_test.go | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/test/e2e_node/eviction_test.go b/test/e2e_node/eviction_test.go index c430645f3b7..cfe57e4f713 100644 --- a/test/e2e_node/eviction_test.go +++ b/test/e2e_node/eviction_test.go @@ -473,13 +473,16 @@ var _ = SIGDescribe("PriorityPidEvictionOrdering", framework.WithSlow(), framewo highPriorityClassName := f.BaseName + "-high-priority" highPriority := int32(999999999) - processes := 30000 + // Apparently there is a threshold at around 10,000+. If it's over the threshold, + // the processes are likely to be oom-killed instead of evicted. + // One test can have at most two pidConsumingPods at a time not to cause oom-kill. + processes := 5000 // if criStats is true, PodAndContainerStatsFromCRI will use data from cri instead of cadvisor for kubelet to get pid count of pods for _, criStats := range []bool{true, false} { ginkgo.Context(fmt.Sprintf("when we run containers with PodAndContainerStatsFromCRI=%v that should cause %s", criStats, expectedNodeCondition), func() { tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) { - pidsConsumed := int64(10000) + pidsConsumed := int64(4000) summary := eventuallyGetSummary(ctx) availablePids := *(summary.Node.Rlimit.MaxPID) - *(summary.Node.Rlimit.NumOfRunningProcesses) initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalPIDAvailable): fmt.Sprintf("%d", availablePids-pidsConsumed)} @@ -524,7 +527,7 @@ var _ = SIGDescribe("PriorityPidEvictionOrdering", framework.WithSlow(), framewo f.Context(fmt.Sprintf(testContextFmt, expectedNodeCondition)+"; baseline scenario to verify DisruptionTarget is added", func() { tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) { - pidsConsumed := int64(10000) + pidsConsumed := int64(4000) summary := eventuallyGetSummary(ctx) availablePids := *(summary.Node.Rlimit.MaxPID) - *(summary.Node.Rlimit.NumOfRunningProcesses) initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalPIDAvailable): fmt.Sprintf("%d", availablePids-pidsConsumed)} @@ -1049,8 +1052,9 @@ func diskConsumingPod(name string, diskConsumedMB int, volumeSource *v1.VolumeSo } func pidConsumingPod(name string, numProcesses int) *v1.Pod { - // Each iteration forks once, but creates two processes - return podWithCommand(nil, v1.ResourceRequirements{}, numProcesses/2, name, "(while true; do /bin/sleep 5; done)&") + // Slowing down the iteration speed to prevent a race condition where eviction may occur + // before the correct number of processes is captured in the stats during a sudden surge in processes. + return podWithCommand(nil, v1.ResourceRequirements{}, numProcesses, name, "/bin/sleep 0.01; (/bin/sleep 3600)&") } // podWithCommand returns a pod with the provided volumeSource and resourceRequirements.