Adding OOM Score adjustment formula changes that takes pod level resources into account

Signed-off-by: ndixita <ndixita@google.com>
This commit is contained in:
ndixita 2024-10-31 21:46:53 +00:00
parent 5ea57fb3b4
commit 5a64597d2e
3 changed files with 490 additions and 4 deletions

View File

@ -27,6 +27,7 @@ package qos // import "k8s.io/kubernetes/pkg/kubelet/qos"
import (
v1 "k8s.io/api/core/v1"
resourcehelper "k8s.io/component-helpers/resource"
)
// minRegularContainerMemory returns the minimum memory resource quantity
@ -41,3 +42,30 @@ func minRegularContainerMemory(pod v1.Pod) int64 {
}
return memoryValue
}
// remainingPodMemReqPerContainer calculates the remaining pod memory request per
// container by:
// 1. Taking the total pod memory requests
// 2. Subtracting total container memory requests from pod memory requests
// 3. Dividing the remainder by the number of containers.
// This gives us the additional memory request that is not allocated to any
// containers in the pod. This value will be divided equally among all containers to
// calculate oom score adjusment.
// See https://github.com/kubernetes/enhancements/blob/master/keps/sig-node/2837-pod-level-resource-spec/README.md#oom-score-adjustment
// for more details.
func remainingPodMemReqPerContainer(pod *v1.Pod) int64 {
var remainingMemory int64
if pod.Spec.Resources.Requests.Memory().IsZero() {
return remainingMemory
}
numContainers := len(pod.Spec.Containers) + len(pod.Spec.InitContainers)
// Aggregated requests of all containers.
aggrContainerReqs := resourcehelper.AggregateContainerRequests(pod, resourcehelper.PodResourcesOptions{})
remainingMemory = pod.Spec.Resources.Requests.Memory().Value() - aggrContainerReqs.Memory().Value()
remainingMemoryPerContainer := remainingMemory / int64(numContainers)
return remainingMemoryPerContainer
}

View File

@ -19,6 +19,7 @@ package qos
import (
v1 "k8s.io/api/core/v1"
utilfeature "k8s.io/apiserver/pkg/util/feature"
resourcehelper "k8s.io/component-helpers/resource"
v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/kubelet/types"
@ -63,14 +64,41 @@ func GetContainerOOMScoreAdjust(pod *v1.Pod, container *v1.Container, memoryCapa
// which use more than their request will have an OOM score of 1000 and will be prime
// targets for OOM kills.
// Note that this is a heuristic, it won't work if a container has many small processes.
memoryRequest := container.Resources.Requests.Memory().Value()
oomScoreAdjust := 1000 - (1000*memoryRequest)/memoryCapacity
containerMemReq := container.Resources.Requests.Memory().Value()
var oomScoreAdjust, remainingReqPerContainer int64
// When PodLevelResources feature is enabled, the OOM score adjustment formula is modified
// to account for pod-level memory requests. Any extra pod memory request that's
// not allocated to the containers is divided equally among all containers and
// added to their individual memory requests when calculating the OOM score
// adjustment. Otherwise, only container-level memory requests are used. See
// https://github.com/kubernetes/enhancements/blob/master/keps/sig-node/2837-pod-level-resource-spec/README.md#oom-score-adjustment
// for more details.
if utilfeature.DefaultFeatureGate.Enabled(features.PodLevelResources) &&
resourcehelper.IsPodLevelRequestsSet(pod) {
// TODO(ndixita): Refactor to use this formula in all cases, as
// remainingReqPerContainer will be 0 when pod-level resources are not set.
remainingReqPerContainer = remainingPodMemReqPerContainer(pod)
oomScoreAdjust = 1000 - (1000 * (containerMemReq + remainingReqPerContainer) / memoryCapacity)
} else {
oomScoreAdjust = 1000 - (1000*containerMemReq)/memoryCapacity
}
// adapt the sidecarContainer memoryRequest for OOM ADJ calculation
// calculate the oom score adjustment based on: max-memory( currentSideCarContainer , min-memory(regular containers) ) .
if utilfeature.DefaultFeatureGate.Enabled(features.SidecarContainers) && isSidecarContainer(pod, container) {
// check min memory quantity in regular containers
minMemoryRequest := minRegularContainerMemory(*pod)
// When calculating minMemoryOomScoreAdjust for sidecar containers with PodLevelResources enabled,
// we add the per-container share of unallocated pod memory requests to the minimum memory request.
// This ensures the OOM score adjustment i.e. minMemoryOomScoreAdjust
// calculation remains consistent
// with how we handle pod-level memory requests for regular containers.
if utilfeature.DefaultFeatureGate.Enabled(features.PodLevelResources) &&
resourcehelper.IsPodLevelRequestsSet(pod) {
minMemoryRequest += remainingReqPerContainer
}
minMemoryOomScoreAdjust := 1000 - (1000*minMemoryRequest)/memoryCapacity
// the OOM adjustment for sidecar container will match
// or fall below the OOM score adjustment of regular containers in the Pod.

View File

@ -177,8 +177,10 @@ var (
},
},
}
sampleDefaultMemRequest = resource.MustParse(strconv.FormatInt(standardMemoryAmount/8, 10))
sampleDefaultMemLimit = resource.MustParse(strconv.FormatInt(1000+(standardMemoryAmount/8), 10))
sampleDefaultMemRequest = resource.MustParse(strconv.FormatInt(standardMemoryAmount/8, 10))
sampleDefaultMemLimit = resource.MustParse(strconv.FormatInt(1000+(standardMemoryAmount/8), 10))
sampleDefaultPodMemRequest = resource.MustParse(strconv.FormatInt(standardMemoryAmount/4, 10))
sampleDefaultPodMemLimit = resource.MustParse(strconv.FormatInt(1000+(standardMemoryAmount/4), 10))
sampleContainer = v1.Container{
Name: "main-1",
@ -300,6 +302,347 @@ var (
},
},
}
// Pod definitions with their resource specifications are defined in this section.
// TODO(ndixita): cleanup the tests to create a method that generates pod
// definitions based on input resource parameters, replacing the current
// approach of individual pod variables.
guaranteedPodResourcesNoContainerResources = v1.Pod{
Spec: v1.PodSpec{
Resources: &v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("5m"),
v1.ResourceMemory: sampleDefaultPodMemRequest,
},
Limits: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("5m"),
v1.ResourceMemory: sampleDefaultPodMemRequest,
},
},
Containers: []v1.Container{
{
Name: "no-request-limit-1",
Resources: v1.ResourceRequirements{},
},
{
Name: "no-request-limit-2",
Resources: v1.ResourceRequirements{},
},
},
},
}
guaranteedPodResourcesEqualContainerRequests = v1.Pod{
Spec: v1.PodSpec{
Resources: &v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("5m"),
v1.ResourceMemory: sampleDefaultPodMemRequest,
},
Limits: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("5m"),
v1.ResourceMemory: sampleDefaultPodMemRequest,
},
},
Containers: []v1.Container{
{
Name: "guaranteed-container-1",
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("5m"),
v1.ResourceMemory: sampleDefaultMemRequest,
},
Limits: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("5m"),
v1.ResourceMemory: sampleDefaultMemRequest,
},
},
},
{
Name: "guaranteed-container-2",
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("5m"),
v1.ResourceMemory: sampleDefaultMemRequest,
},
Limits: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("5m"),
v1.ResourceMemory: sampleDefaultMemRequest,
},
},
},
},
},
}
guaranteedPodResourcesUnequalContainerRequests = v1.Pod{
Spec: v1.PodSpec{
Resources: &v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("5m"),
v1.ResourceMemory: sampleDefaultPodMemRequest,
},
Limits: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("5m"),
v1.ResourceMemory: sampleDefaultPodMemRequest,
},
},
Containers: []v1.Container{
{
Name: "burstable-container",
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("3m"),
v1.ResourceMemory: sampleDefaultMemRequest,
},
Limits: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("5m"),
v1.ResourceMemory: sampleDefaultMemLimit,
},
},
},
{
Name: "best-effort-container",
Resources: v1.ResourceRequirements{},
},
},
},
}
burstablePodResourcesNoContainerResources = v1.Pod{
Spec: v1.PodSpec{
Resources: &v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("5m"),
v1.ResourceMemory: sampleDefaultPodMemRequest,
},
Limits: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("5m"),
v1.ResourceMemory: sampleDefaultPodMemLimit,
},
},
Containers: []v1.Container{
{
Name: "no-request-limit-1",
Resources: v1.ResourceRequirements{},
},
{
Name: "no-request-limit-2",
Resources: v1.ResourceRequirements{},
},
},
},
}
burstablePodResourcesEqualContainerRequests = v1.Pod{
Spec: v1.PodSpec{
Resources: &v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("5m"),
v1.ResourceMemory: sampleDefaultPodMemRequest,
},
Limits: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("5m"),
v1.ResourceMemory: sampleDefaultPodMemLimit,
},
},
Containers: []v1.Container{
{
Name: "guaranteed-container-1",
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("5m"),
v1.ResourceMemory: sampleDefaultMemRequest,
},
Limits: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("5m"),
v1.ResourceMemory: sampleDefaultMemRequest,
},
},
},
{
Name: "guaranteed-container-2",
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("5m"),
v1.ResourceMemory: sampleDefaultMemRequest,
},
Limits: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("5m"),
v1.ResourceMemory: sampleDefaultMemRequest,
},
},
},
},
},
}
burstablePodResourcesUnequalContainerRequests = v1.Pod{
Spec: v1.PodSpec{
Resources: &v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("5m"),
v1.ResourceMemory: sampleDefaultPodMemRequest,
},
Limits: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("5m"),
v1.ResourceMemory: sampleDefaultPodMemLimit,
},
},
Containers: []v1.Container{
{
Name: "burstable-container",
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("3m"),
v1.ResourceMemory: sampleDefaultMemRequest,
},
Limits: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("5m"),
v1.ResourceMemory: sampleDefaultMemRequest,
},
},
},
{
Name: "best-effort-container",
Resources: v1.ResourceRequirements{},
},
},
},
}
burstablePodResourcesNoContainerResourcesWithSidecar = v1.Pod{
Spec: v1.PodSpec{
Resources: &v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("5m"),
v1.ResourceMemory: sampleDefaultPodMemRequest,
},
Limits: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("5m"),
v1.ResourceMemory: sampleDefaultPodMemLimit,
},
},
Containers: []v1.Container{
{
Name: "no-request-limit",
Resources: v1.ResourceRequirements{},
},
},
InitContainers: []v1.Container{
{
Name: "no-request-limit-sidecar",
Resources: v1.ResourceRequirements{},
RestartPolicy: &restartPolicyAlways,
},
},
},
}
burstablePodResourcesEqualContainerRequestsWithSidecar = v1.Pod{
Spec: v1.PodSpec{
Resources: &v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("5m"),
v1.ResourceMemory: sampleDefaultPodMemRequest,
},
Limits: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("5m"),
v1.ResourceMemory: sampleDefaultPodMemLimit,
},
},
Containers: []v1.Container{
{
Name: "burstable-container",
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("5m"),
v1.ResourceMemory: sampleDefaultMemRequest,
},
Limits: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("5m"),
v1.ResourceMemory: sampleDefaultMemLimit,
},
},
},
},
InitContainers: []v1.Container{
{
Name: "burstable-sidecar",
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("5m"),
v1.ResourceMemory: sampleDefaultMemRequest,
},
Limits: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("5m"),
v1.ResourceMemory: sampleDefaultMemLimit,
},
},
RestartPolicy: &restartPolicyAlways,
},
},
},
}
burstablePodResourcesUnequalContainerRequestsWithSidecar = v1.Pod{
Spec: v1.PodSpec{
Resources: &v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("5m"),
v1.ResourceMemory: resource.MustParse("2000000000"),
},
Limits: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("5m"),
v1.ResourceMemory: sampleDefaultPodMemLimit,
},
},
Containers: []v1.Container{
{
Name: "burstable-container-1",
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("5m"),
v1.ResourceMemory: resource.MustParse("1000000000"),
},
Limits: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("5m"),
v1.ResourceMemory: sampleDefaultPodMemLimit,
},
},
},
{
Name: "burstable-container-2",
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("5m"),
v1.ResourceMemory: resource.MustParse("500000000"),
},
Limits: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("5m"),
v1.ResourceMemory: sampleDefaultPodMemLimit,
},
},
},
},
InitContainers: []v1.Container{
{
Name: "burstable-sidecar",
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("5m"),
v1.ResourceMemory: resource.MustParse("200000000"),
},
Limits: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("5m"),
v1.ResourceMemory: sampleDefaultPodMemLimit,
},
},
RestartPolicy: &restartPolicyAlways,
},
},
},
}
)
type lowHighOOMScoreAdjTest struct {
@ -311,6 +654,7 @@ type oomTest struct {
memoryCapacity int64
lowHighOOMScoreAdj map[string]lowHighOOMScoreAdjTest // [container-name] : min and max oom_score_adj score the container should be assigned.
sidecarContainersFeatureEnabled bool
podLevelResourcesFeatureEnabled bool
}
func TestGetContainerOOMScoreAdjust(t *testing.T) {
@ -425,10 +769,96 @@ func TestGetContainerOOMScoreAdjust(t *testing.T) {
},
sidecarContainersFeatureEnabled: true,
},
"guaranteed-pod-resources-no-container-resources": {
pod: &guaranteedPodResourcesNoContainerResources,
lowHighOOMScoreAdj: map[string]lowHighOOMScoreAdjTest{
"no-request-limit-1": {lowOOMScoreAdj: -997, highOOMScoreAdj: -997},
"no-request-limit-2": {lowOOMScoreAdj: -997, highOOMScoreAdj: -997},
},
memoryCapacity: 4000000000,
podLevelResourcesFeatureEnabled: true,
},
"guaranteed-pod-resources-equal-container-resources": {
pod: &guaranteedPodResourcesEqualContainerRequests,
lowHighOOMScoreAdj: map[string]lowHighOOMScoreAdjTest{
"guaranteed-container-1": {lowOOMScoreAdj: -997, highOOMScoreAdj: -997},
"guaranteed-container-2": {lowOOMScoreAdj: -997, highOOMScoreAdj: -997},
},
memoryCapacity: 4000000000,
podLevelResourcesFeatureEnabled: true,
},
"guaranteed-pod-resources-unequal-container-requests": {
pod: &guaranteedPodResourcesUnequalContainerRequests,
lowHighOOMScoreAdj: map[string]lowHighOOMScoreAdjTest{
"burstable-container": {lowOOMScoreAdj: -997, highOOMScoreAdj: -997},
"best-effort-container": {lowOOMScoreAdj: -997, highOOMScoreAdj: -997},
},
memoryCapacity: 4000000000,
podLevelResourcesFeatureEnabled: true,
},
"burstable-pod-resources-no-container-resources": {
pod: &burstablePodResourcesNoContainerResources,
lowHighOOMScoreAdj: map[string]lowHighOOMScoreAdjTest{
"no-request-limit-1": {lowOOMScoreAdj: 750, highOOMScoreAdj: 750},
"no-request-limit-2": {lowOOMScoreAdj: 750, highOOMScoreAdj: 750},
},
memoryCapacity: 4000000000,
podLevelResourcesFeatureEnabled: true,
},
"burstable-pod-resources-equal-container-requests": {
pod: &burstablePodResourcesEqualContainerRequests,
lowHighOOMScoreAdj: map[string]lowHighOOMScoreAdjTest{
"guaranteed-container-1": {lowOOMScoreAdj: 750, highOOMScoreAdj: 750},
"guaranteed-container-2": {lowOOMScoreAdj: 750, highOOMScoreAdj: 750},
},
memoryCapacity: 4000000000,
podLevelResourcesFeatureEnabled: true,
},
"burstable-pod-resources-unequal-container-requests": {
pod: &burstablePodResourcesUnequalContainerRequests,
lowHighOOMScoreAdj: map[string]lowHighOOMScoreAdjTest{
"burstable-container": {lowOOMScoreAdj: 625, highOOMScoreAdj: 625},
"best-effort-container": {lowOOMScoreAdj: 875, highOOMScoreAdj: 875},
},
memoryCapacity: 4000000000,
podLevelResourcesFeatureEnabled: true,
},
"burstable-pod-resources-no-container-resources-with-sidecar": {
pod: &burstablePodResourcesNoContainerResourcesWithSidecar,
lowHighOOMScoreAdj: map[string]lowHighOOMScoreAdjTest{
"no-request-limit": {lowOOMScoreAdj: 750, highOOMScoreAdj: 750},
"no-request-limit-sidecar": {lowOOMScoreAdj: 750, highOOMScoreAdj: 750},
},
memoryCapacity: 4000000000,
podLevelResourcesFeatureEnabled: true,
sidecarContainersFeatureEnabled: true,
},
"burstable-pod-resources-equal-container-requests-with-sidecar": {
pod: &burstablePodResourcesEqualContainerRequestsWithSidecar,
lowHighOOMScoreAdj: map[string]lowHighOOMScoreAdjTest{
"burstable-container": {lowOOMScoreAdj: 750, highOOMScoreAdj: 750},
"burstable-sidecar": {lowOOMScoreAdj: 750, highOOMScoreAdj: 750},
},
memoryCapacity: 4000000000,
podLevelResourcesFeatureEnabled: true,
sidecarContainersFeatureEnabled: true,
},
"burstable-pod-resources-unequal-container-requests-with-sidecar": {
pod: &burstablePodResourcesUnequalContainerRequestsWithSidecar,
lowHighOOMScoreAdj: map[string]lowHighOOMScoreAdjTest{
"burstable-container-1": {lowOOMScoreAdj: 725, highOOMScoreAdj: 725},
"burstable-container-2": {lowOOMScoreAdj: 850, highOOMScoreAdj: 850},
"burstable-sidecar": {lowOOMScoreAdj: 850, highOOMScoreAdj: 850},
},
memoryCapacity: 4000000000,
podLevelResourcesFeatureEnabled: true,
sidecarContainersFeatureEnabled: true,
},
}
for name, test := range oomTests {
t.Run(name, func(t *testing.T) {
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SidecarContainers, test.sidecarContainersFeatureEnabled)
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodLevelResources, test.podLevelResourcesFeatureEnabled)
listContainers := test.pod.Spec.InitContainers
listContainers = append(listContainers, test.pod.Spec.Containers...)
for _, container := range listContainers {