diff --git a/pkg/kubelet/kuberuntime/kuberuntime_container_linux.go b/pkg/kubelet/kuberuntime/kuberuntime_container_linux.go index 4c753b466f3..6db16bd03d1 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_container_linux.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_container_linux.go @@ -215,6 +215,15 @@ func (m *kubeGenericRuntimeManager) calculateLinuxResources(cpuRequest, cpuLimit resources.CpuPeriod = cpuPeriod } + // runc requires cgroupv2 for unified mode + if libcontainercgroups.IsCgroup2UnifiedMode() { + resources.Unified = map[string]string{ + // Ask the kernel to kill all processes in the container cgroup in case of OOM. + // See memory.oom.group in https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html for + // more info. + "memory.oom.group": "1", + } + } return &resources } diff --git a/test/e2e_node/oomkiller_test.go b/test/e2e_node/oomkiller_linux_test.go similarity index 62% rename from test/e2e_node/oomkiller_test.go rename to test/e2e_node/oomkiller_linux_test.go index 6dfce311c2b..fb92133d2f2 100644 --- a/test/e2e_node/oomkiller_test.go +++ b/test/e2e_node/oomkiller_linux_test.go @@ -19,7 +19,6 @@ package e2enode import ( "context" "fmt" - "time" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" @@ -29,35 +28,52 @@ import ( admissionapi "k8s.io/pod-security-admission/api" "github.com/onsi/ginkgo/v2" + libcontainercgroups "github.com/opencontainers/runc/libcontainer/cgroups" ) type testCase struct { + name string podSpec *v1.Pod oomTargetContainerName string } -const PodOOMKilledTimeout = 2 * time.Minute - var _ = SIGDescribe("OOMKiller [LinuxOnly] [NodeConformance]", func() { f := framework.NewDefaultFramework("oomkiller-test") f.NamespacePodSecurityEnforceLevel = admissionapi.LevelPrivileged - containerName := "oomkill-target-container" - oomPodSpec := getOOMTargetPod("oomkill-target-pod", containerName) - runOomKillerTest(f, testCase{podSpec: oomPodSpec, oomTargetContainerName: containerName}) + testCases := []testCase{{ + name: "single process container", + oomTargetContainerName: "oomkill-single-target-container", + podSpec: getOOMTargetPod("oomkill-target-pod", "oomkill-single-target-container", + getOOMTargetContainer), + }} + + // If using cgroup v2, we set memory.oom.group=1 for the container cgroup so that any process which gets OOM killed + // in the process, causes all processes in the container to get OOM killed + if libcontainercgroups.IsCgroup2UnifiedMode() { + testCases = append(testCases, testCase{ + name: "multi process container", + oomTargetContainerName: "oomkill-multi-target-container", + podSpec: getOOMTargetPod("oomkill-target-pod", "oomkill-multi-target-container", + getOOMTargetContainerMultiProcess), + }) + } + for _, tc := range testCases { + runOomKillerTest(f, tc) + } }) func runOomKillerTest(f *framework.Framework, testCase testCase) { - ginkgo.Context("", func() { + ginkgo.Context(testCase.name, func() { ginkgo.BeforeEach(func() { ginkgo.By("setting up the pod to be used in the test") e2epod.NewPodClient(f).Create(context.TODO(), testCase.podSpec) }) ginkgo.It("The containers terminated by OOM killer should have the reason set to OOMKilled", func() { - ginkgo.By("Waiting for the pod to be failed") - e2epod.WaitForPodTerminatedInNamespace(context.TODO(), f.ClientSet, testCase.podSpec.Name, "", f.Namespace.Name) + err := e2epod.WaitForPodTerminatedInNamespace(context.TODO(), f.ClientSet, testCase.podSpec.Name, "", f.Namespace.Name) + framework.ExpectNoError(err, "Failed waiting for pod to terminate, %s/%s", f.Namespace.Name, testCase.podSpec.Name) ginkgo.By("Fetching the latest pod status") pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get(context.TODO(), testCase.podSpec.Name, metav1.GetOptions{}) @@ -88,7 +104,7 @@ func verifyReasonForOOMKilledContainer(pod *v1.Pod, oomTargetContainerName strin fmt.Sprintf("pod: %q, container: %q has unexpected reason: %q", pod.Name, container.Name, container.State.Terminated.Reason)) } -func getOOMTargetPod(podName string, ctnName string) *v1.Pod { +func getOOMTargetPod(podName string, ctnName string, createContainer func(name string) v1.Container) *v1.Pod { return &v1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: podName, @@ -96,12 +112,14 @@ func getOOMTargetPod(podName string, ctnName string) *v1.Pod { Spec: v1.PodSpec{ RestartPolicy: v1.RestartPolicyNever, Containers: []v1.Container{ - getOOMTargetContainer(ctnName), + createContainer(ctnName), }, }, } } +// getOOMTargetContainer returns a container with a single process, which attempts to allocate more memory than is +// allowed by the container memory limit. func getOOMTargetContainer(name string) v1.Container { return v1.Container{ Name: name, @@ -122,3 +140,26 @@ func getOOMTargetContainer(name string) v1.Container { }, } } + +// getOOMTargetContainerMultiProcess returns a container with two processes, one of which attempts to allocate more +// memory than is allowed by the container memory limit, and a second process which just sleeps. +func getOOMTargetContainerMultiProcess(name string) v1.Container { + return v1.Container{ + Name: name, + Image: busyboxImage, + Command: []string{ + "sh", + "-c", + // use the dd tool to attempt to allocate 20M in a block which exceeds the limit + "dd if=/dev/zero of=/dev/null bs=20M & sleep 86400", + }, + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceMemory: resource.MustParse("15Mi"), + }, + Limits: v1.ResourceList{ + v1.ResourceMemory: resource.MustParse("15Mi"), + }, + }, + } +}