Merge pull request #129845 from bitoku/fix-flake

Reduce the number of processes used in e2e to prevent unexpected OOM
This commit is contained in:
Kubernetes Prow Robot 2025-02-12 13:16:20 -08:00 committed by GitHub
commit 5d57d0c110
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -473,13 +473,16 @@ var _ = SIGDescribe("PriorityPidEvictionOrdering", framework.WithSlow(), framewo
highPriorityClassName := f.BaseName + "-high-priority"
highPriority := int32(999999999)
processes := 30000
// Apparently there is a threshold at around 10,000+. If it's over the threshold,
// the processes are likely to be oom-killed instead of evicted.
// One test can have at most two pidConsumingPods at a time not to cause oom-kill.
processes := 5000
// if criStats is true, PodAndContainerStatsFromCRI will use data from cri instead of cadvisor for kubelet to get pid count of pods
for _, criStats := range []bool{true, false} {
ginkgo.Context(fmt.Sprintf("when we run containers with PodAndContainerStatsFromCRI=%v that should cause %s", criStats, expectedNodeCondition), func() {
tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
pidsConsumed := int64(10000)
pidsConsumed := int64(4000)
summary := eventuallyGetSummary(ctx)
availablePids := *(summary.Node.Rlimit.MaxPID) - *(summary.Node.Rlimit.NumOfRunningProcesses)
initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalPIDAvailable): fmt.Sprintf("%d", availablePids-pidsConsumed)}
@ -524,7 +527,7 @@ var _ = SIGDescribe("PriorityPidEvictionOrdering", framework.WithSlow(), framewo
f.Context(fmt.Sprintf(testContextFmt, expectedNodeCondition)+"; baseline scenario to verify DisruptionTarget is added", func() {
tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
pidsConsumed := int64(10000)
pidsConsumed := int64(4000)
summary := eventuallyGetSummary(ctx)
availablePids := *(summary.Node.Rlimit.MaxPID) - *(summary.Node.Rlimit.NumOfRunningProcesses)
initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalPIDAvailable): fmt.Sprintf("%d", availablePids-pidsConsumed)}
@ -1049,8 +1052,9 @@ func diskConsumingPod(name string, diskConsumedMB int, volumeSource *v1.VolumeSo
}
func pidConsumingPod(name string, numProcesses int) *v1.Pod {
// Each iteration forks once, but creates two processes
return podWithCommand(nil, v1.ResourceRequirements{}, numProcesses/2, name, "(while true; do /bin/sleep 5; done)&")
// Slowing down the iteration speed to prevent a race condition where eviction may occur
// before the correct number of processes is captured in the stats during a sudden surge in processes.
return podWithCommand(nil, v1.ResourceRequirements{}, numProcesses, name, "/bin/sleep 0.01; (/bin/sleep 3600)&")
}
// podWithCommand returns a pod with the provided volumeSource and resourceRequirements.