From e64576af0acbbcafbe86a92108d9352e2bb3a2bf Mon Sep 17 00:00:00 2001 From: ravisantoshgudimetla Date: Wed, 3 Mar 2021 16:34:27 -0500 Subject: [PATCH] Only system-node-critical pods should be OOM Killed last --- pkg/kubelet/qos/policy.go | 4 ++-- pkg/kubelet/qos/policy_test.go | 27 ++++++++++++++++++++++++--- pkg/kubelet/types/pod_update.go | 5 +++++ 3 files changed, 31 insertions(+), 5 deletions(-) diff --git a/pkg/kubelet/qos/policy.go b/pkg/kubelet/qos/policy.go index fc428921348..93d0934c280 100644 --- a/pkg/kubelet/qos/policy.go +++ b/pkg/kubelet/qos/policy.go @@ -38,8 +38,8 @@ const ( // and 1000. Containers with higher OOM scores are killed if the system runs out of memory. // See https://lwn.net/Articles/391222/ for more information. func GetContainerOOMScoreAdjust(pod *v1.Pod, container *v1.Container, memoryCapacity int64) int { - if types.IsCriticalPod(pod) { - // Critical pods should be the last to get killed. + if types.IsNodeCriticalPod(pod) { + // Only node critical pod should be the last to get killed. return guaranteedOOMScoreAdj } diff --git a/pkg/kubelet/qos/policy_test.go b/pkg/kubelet/qos/policy_test.go index 19b2f912b4f..3519f25b113 100644 --- a/pkg/kubelet/qos/policy_test.go +++ b/pkg/kubelet/qos/policy_test.go @@ -139,9 +139,24 @@ var ( systemCritical = scheduling.SystemCriticalPriority - critical = v1.Pod{ + clusterCritical = v1.Pod{ Spec: v1.PodSpec{ - Priority: &systemCritical, + PriorityClassName: scheduling.SystemClusterCritical, + Priority: &systemCritical, + Containers: []v1.Container{ + { + Resources: v1.ResourceRequirements{}, + }, + }, + }, + } + + systemNodeCritical = scheduling.SystemCriticalPriority + 1000 + + nodeCritical = v1.Pod{ + Spec: v1.PodSpec{ + PriorityClassName: scheduling.SystemNodeCritical, + Priority: &systemNodeCritical, Containers: []v1.Container{ { Resources: v1.ResourceRequirements{}, @@ -203,7 +218,13 @@ func TestGetContainerOOMScoreAdjust(t *testing.T) { highOOMScoreAdj: 3, }, { - pod: &critical, + pod: &clusterCritical, + memoryCapacity: 4000000000, + lowOOMScoreAdj: 1000, + highOOMScoreAdj: 1000, + }, + { + pod: &nodeCritical, memoryCapacity: 4000000000, lowOOMScoreAdj: -997, highOOMScoreAdj: -997, diff --git a/pkg/kubelet/types/pod_update.go b/pkg/kubelet/types/pod_update.go index 2cb27019283..9420eef1eb3 100644 --- a/pkg/kubelet/types/pod_update.go +++ b/pkg/kubelet/types/pod_update.go @@ -184,3 +184,8 @@ func Preemptable(preemptor, preemptee *v1.Pod) bool { func IsCriticalPodBasedOnPriority(priority int32) bool { return priority >= scheduling.SystemCriticalPriority } + +// IsNodeCriticalPod checks if the given pod is a system-node-critical +func IsNodeCriticalPod(pod *v1.Pod) bool { + return IsCriticalPod(pod) && (pod.Spec.PriorityClassName == scheduling.SystemNodeCritical) +}