From d2dbc583a066855afc4fc0b09d1874c82c645497 Mon Sep 17 00:00:00 2001
From: Dixita Narang <ndixita@google.com>
Date: Wed, 28 Jun 2023 21:52:19 +0000
Subject: [PATCH] Adding coverage for OOM Kill scenario due to node allocatable
 memory limits, when pod level memory limits are not set

---
 test/e2e_node/oomkiller_linux_test.go | 58 +++++++++++++++++++++++++--
 1 file changed, 55 insertions(+), 3 deletions(-)

diff --git a/test/e2e_node/oomkiller_linux_test.go b/test/e2e_node/oomkiller_linux_test.go
index d6353e45642..3c709a66618 100644
--- a/test/e2e_node/oomkiller_linux_test.go
+++ b/test/e2e_node/oomkiller_linux_test.go
@@ -23,6 +23,7 @@ import (
 	v1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
 	"k8s.io/kubernetes/test/e2e/framework"
 	e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
 	admissionapi "k8s.io/pod-security-admission/api"
@@ -37,6 +38,28 @@ type testCase struct {
 	oomTargetContainerName string
 }
 
+// KubeReservedMemory is default fraction value of node capacity memory to
+// be reserved for K8s components.
+const KubeReservedMemory = 0.35
+
+var _ = SIGDescribe("OOMKiller for pod using more memory than node allocatable [LinuxOnly] [Serial]", func() {
+	f := framework.NewDefaultFramework("nodeallocatable-oomkiller-test")
+	f.NamespacePodSecurityEnforceLevel = admissionapi.LevelPrivileged
+
+	testCases := []testCase{
+		{
+			name:                   "single process container without resource limits",
+			oomTargetContainerName: "oomkill-nodeallocatable-container",
+			podSpec: getOOMTargetPod("oomkill-nodeallocatable-pod", "oomkill-nodeallocatable-container",
+				getOOMTargetContainerWithoutLimit),
+		},
+	}
+
+	for _, testCase := range testCases {
+		runOomKillerTest(f, testCase, KubeReservedMemory)
+	}
+})
+
 var _ = SIGDescribe("OOMKiller [LinuxOnly] [NodeConformance]", func() {
 	f := framework.NewDefaultFramework("oomkiller-test")
 	f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
@@ -67,12 +90,26 @@ var _ = SIGDescribe("OOMKiller [LinuxOnly] [NodeConformance]", func() {
 		})
 	}
 	for _, tc := range testCases {
-		runOomKillerTest(f, tc)
+		runOomKillerTest(f, tc, 0)
 	}
 })
 
-func runOomKillerTest(f *framework.Framework, testCase testCase) {
+func runOomKillerTest(f *framework.Framework, testCase testCase, kubeReservedMemory float64) {
 	ginkgo.Context(testCase.name, func() {
+		// Update KubeReservedMemory in KubeletConfig.
+		if kubeReservedMemory > 0 {
+			tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
+				if initialConfig.KubeReserved == nil {
+					initialConfig.KubeReserved = map[string]string{}
+				}
+				// There's a race condition observed between system OOM and cgroup OOM if node alocatable
+				// memory is equal to node capacity. Hence, reserving a fraction of node's memory capacity for
+				// K8s components such that node allocatable memory is less than node capacity to
+				// observe OOM kills at cgroup level instead of system OOM kills.
+				initialConfig.KubeReserved["memory"] = fmt.Sprintf("%d", int(kubeReservedMemory*getLocalNode(context.TODO(), f).Status.Capacity.Memory().AsApproximateFloat64()))
+			})
+		}
+
 		ginkgo.BeforeEach(func() {
 			ginkgo.By("setting up the pod to be used in the test")
 			e2epod.NewPodClient(f).Create(context.TODO(), testCase.podSpec)
@@ -101,7 +138,7 @@ func runOomKillerTest(f *framework.Framework, testCase testCase) {
 func verifyReasonForOOMKilledContainer(pod *v1.Pod, oomTargetContainerName string) {
 	container := e2epod.FindContainerStatusInPod(pod, oomTargetContainerName)
 	if container == nil {
-		framework.Failf("OOM target pod %q, container %q does not have the expected state terminated", pod.Name, container.Name)
+		framework.Failf("OOM target pod %q, container %q does not have the expected state terminated", pod.Name, oomTargetContainerName)
 	}
 	if container.State.Terminated == nil {
 		framework.Failf("OOM target pod %q, container %q is not in the terminated state", pod.Name, container.Name)
@@ -191,3 +228,18 @@ func getOOMTargetContainerMultiProcess(name string) v1.Container {
 		},
 	}
 }
+
+// getOOMTargetContainerWithoutLimit returns a container with a single process which attempts to allocate more memory
+// than node allocatable and doesn't have resource limits set.
+func getOOMTargetContainerWithoutLimit(name string) v1.Container {
+	return v1.Container{
+		Name:  name,
+		Image: busyboxImage,
+		Command: []string{
+			"sh",
+			"-c",
+			// use the dd tool to attempt to allocate huge block of memory which exceeds the node allocatable
+			"sleep 5 && dd if=/dev/zero of=/dev/null iflag=fullblock count=10 bs=10G",
+		},
+	}
+}