diff --git a/pkg/kubelet/qos/helpers.go b/pkg/kubelet/qos/helpers.go index 2b327e5a7d3..17ed12e3b33 100644 --- a/pkg/kubelet/qos/helpers.go +++ b/pkg/kubelet/qos/helpers.go @@ -27,6 +27,7 @@ package qos // import "k8s.io/kubernetes/pkg/kubelet/qos" import ( v1 "k8s.io/api/core/v1" + resourcehelper "k8s.io/component-helpers/resource" ) // minRegularContainerMemory returns the minimum memory resource quantity @@ -41,3 +42,30 @@ func minRegularContainerMemory(pod v1.Pod) int64 { } return memoryValue } + +// remainingPodMemReqPerContainer calculates the remaining pod memory request per +// container by: +// 1. Taking the total pod memory requests +// 2. Subtracting total container memory requests from pod memory requests +// 3. Dividing the remainder by the number of containers. +// This gives us the additional memory request that is not allocated to any +// containers in the pod. This value will be divided equally among all containers to +// calculate oom score adjusment. +// See https://github.com/kubernetes/enhancements/blob/master/keps/sig-node/2837-pod-level-resource-spec/README.md#oom-score-adjustment +// for more details. +func remainingPodMemReqPerContainer(pod *v1.Pod) int64 { + var remainingMemory int64 + if pod.Spec.Resources.Requests.Memory().IsZero() { + return remainingMemory + } + + numContainers := len(pod.Spec.Containers) + len(pod.Spec.InitContainers) + + // Aggregated requests of all containers. + aggrContainerReqs := resourcehelper.AggregateContainerRequests(pod, resourcehelper.PodResourcesOptions{}) + + remainingMemory = pod.Spec.Resources.Requests.Memory().Value() - aggrContainerReqs.Memory().Value() + + remainingMemoryPerContainer := remainingMemory / int64(numContainers) + return remainingMemoryPerContainer +} diff --git a/pkg/kubelet/qos/policy.go b/pkg/kubelet/qos/policy.go index c4beb95d410..f12da0e5bfb 100644 --- a/pkg/kubelet/qos/policy.go +++ b/pkg/kubelet/qos/policy.go @@ -19,6 +19,7 @@ package qos import ( v1 "k8s.io/api/core/v1" utilfeature "k8s.io/apiserver/pkg/util/feature" + resourcehelper "k8s.io/component-helpers/resource" v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos" "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/kubelet/types" @@ -63,14 +64,41 @@ func GetContainerOOMScoreAdjust(pod *v1.Pod, container *v1.Container, memoryCapa // which use more than their request will have an OOM score of 1000 and will be prime // targets for OOM kills. // Note that this is a heuristic, it won't work if a container has many small processes. - memoryRequest := container.Resources.Requests.Memory().Value() - oomScoreAdjust := 1000 - (1000*memoryRequest)/memoryCapacity + containerMemReq := container.Resources.Requests.Memory().Value() + + var oomScoreAdjust, remainingReqPerContainer int64 + // When PodLevelResources feature is enabled, the OOM score adjustment formula is modified + // to account for pod-level memory requests. Any extra pod memory request that's + // not allocated to the containers is divided equally among all containers and + // added to their individual memory requests when calculating the OOM score + // adjustment. Otherwise, only container-level memory requests are used. See + // https://github.com/kubernetes/enhancements/blob/master/keps/sig-node/2837-pod-level-resource-spec/README.md#oom-score-adjustment + // for more details. + if utilfeature.DefaultFeatureGate.Enabled(features.PodLevelResources) && + resourcehelper.IsPodLevelRequestsSet(pod) { + // TODO(ndixita): Refactor to use this formula in all cases, as + // remainingReqPerContainer will be 0 when pod-level resources are not set. + remainingReqPerContainer = remainingPodMemReqPerContainer(pod) + oomScoreAdjust = 1000 - (1000 * (containerMemReq + remainingReqPerContainer) / memoryCapacity) + } else { + oomScoreAdjust = 1000 - (1000*containerMemReq)/memoryCapacity + } // adapt the sidecarContainer memoryRequest for OOM ADJ calculation // calculate the oom score adjustment based on: max-memory( currentSideCarContainer , min-memory(regular containers) ) . if utilfeature.DefaultFeatureGate.Enabled(features.SidecarContainers) && isSidecarContainer(pod, container) { // check min memory quantity in regular containers minMemoryRequest := minRegularContainerMemory(*pod) + + // When calculating minMemoryOomScoreAdjust for sidecar containers with PodLevelResources enabled, + // we add the per-container share of unallocated pod memory requests to the minimum memory request. + // This ensures the OOM score adjustment i.e. minMemoryOomScoreAdjust + // calculation remains consistent + // with how we handle pod-level memory requests for regular containers. + if utilfeature.DefaultFeatureGate.Enabled(features.PodLevelResources) && + resourcehelper.IsPodLevelRequestsSet(pod) { + minMemoryRequest += remainingReqPerContainer + } minMemoryOomScoreAdjust := 1000 - (1000*minMemoryRequest)/memoryCapacity // the OOM adjustment for sidecar container will match // or fall below the OOM score adjustment of regular containers in the Pod. diff --git a/pkg/kubelet/qos/policy_test.go b/pkg/kubelet/qos/policy_test.go index c09cb1e6891..a7a7980f3f9 100644 --- a/pkg/kubelet/qos/policy_test.go +++ b/pkg/kubelet/qos/policy_test.go @@ -177,8 +177,10 @@ var ( }, }, } - sampleDefaultMemRequest = resource.MustParse(strconv.FormatInt(standardMemoryAmount/8, 10)) - sampleDefaultMemLimit = resource.MustParse(strconv.FormatInt(1000+(standardMemoryAmount/8), 10)) + sampleDefaultMemRequest = resource.MustParse(strconv.FormatInt(standardMemoryAmount/8, 10)) + sampleDefaultMemLimit = resource.MustParse(strconv.FormatInt(1000+(standardMemoryAmount/8), 10)) + sampleDefaultPodMemRequest = resource.MustParse(strconv.FormatInt(standardMemoryAmount/4, 10)) + sampleDefaultPodMemLimit = resource.MustParse(strconv.FormatInt(1000+(standardMemoryAmount/4), 10)) sampleContainer = v1.Container{ Name: "main-1", @@ -300,6 +302,347 @@ var ( }, }, } + + // Pod definitions with their resource specifications are defined in this section. + // TODO(ndixita): cleanup the tests to create a method that generates pod + // definitions based on input resource parameters, replacing the current + // approach of individual pod variables. + guaranteedPodResourcesNoContainerResources = v1.Pod{ + Spec: v1.PodSpec{ + Resources: &v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("5m"), + v1.ResourceMemory: sampleDefaultPodMemRequest, + }, + Limits: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("5m"), + v1.ResourceMemory: sampleDefaultPodMemRequest, + }, + }, + Containers: []v1.Container{ + { + Name: "no-request-limit-1", + Resources: v1.ResourceRequirements{}, + }, + { + Name: "no-request-limit-2", + Resources: v1.ResourceRequirements{}, + }, + }, + }, + } + + guaranteedPodResourcesEqualContainerRequests = v1.Pod{ + Spec: v1.PodSpec{ + Resources: &v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("5m"), + v1.ResourceMemory: sampleDefaultPodMemRequest, + }, + Limits: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("5m"), + v1.ResourceMemory: sampleDefaultPodMemRequest, + }, + }, + Containers: []v1.Container{ + { + Name: "guaranteed-container-1", + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("5m"), + v1.ResourceMemory: sampleDefaultMemRequest, + }, + Limits: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("5m"), + v1.ResourceMemory: sampleDefaultMemRequest, + }, + }, + }, + { + Name: "guaranteed-container-2", + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("5m"), + v1.ResourceMemory: sampleDefaultMemRequest, + }, + Limits: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("5m"), + v1.ResourceMemory: sampleDefaultMemRequest, + }, + }, + }, + }, + }, + } + + guaranteedPodResourcesUnequalContainerRequests = v1.Pod{ + Spec: v1.PodSpec{ + Resources: &v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("5m"), + v1.ResourceMemory: sampleDefaultPodMemRequest, + }, + Limits: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("5m"), + v1.ResourceMemory: sampleDefaultPodMemRequest, + }, + }, + Containers: []v1.Container{ + { + Name: "burstable-container", + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("3m"), + v1.ResourceMemory: sampleDefaultMemRequest, + }, + Limits: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("5m"), + v1.ResourceMemory: sampleDefaultMemLimit, + }, + }, + }, + { + Name: "best-effort-container", + Resources: v1.ResourceRequirements{}, + }, + }, + }, + } + + burstablePodResourcesNoContainerResources = v1.Pod{ + Spec: v1.PodSpec{ + Resources: &v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("5m"), + v1.ResourceMemory: sampleDefaultPodMemRequest, + }, + Limits: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("5m"), + v1.ResourceMemory: sampleDefaultPodMemLimit, + }, + }, + Containers: []v1.Container{ + { + Name: "no-request-limit-1", + Resources: v1.ResourceRequirements{}, + }, + { + Name: "no-request-limit-2", + Resources: v1.ResourceRequirements{}, + }, + }, + }, + } + + burstablePodResourcesEqualContainerRequests = v1.Pod{ + Spec: v1.PodSpec{ + Resources: &v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("5m"), + v1.ResourceMemory: sampleDefaultPodMemRequest, + }, + Limits: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("5m"), + v1.ResourceMemory: sampleDefaultPodMemLimit, + }, + }, + Containers: []v1.Container{ + { + Name: "guaranteed-container-1", + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("5m"), + v1.ResourceMemory: sampleDefaultMemRequest, + }, + Limits: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("5m"), + v1.ResourceMemory: sampleDefaultMemRequest, + }, + }, + }, + { + Name: "guaranteed-container-2", + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("5m"), + v1.ResourceMemory: sampleDefaultMemRequest, + }, + Limits: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("5m"), + v1.ResourceMemory: sampleDefaultMemRequest, + }, + }, + }, + }, + }, + } + + burstablePodResourcesUnequalContainerRequests = v1.Pod{ + Spec: v1.PodSpec{ + Resources: &v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("5m"), + v1.ResourceMemory: sampleDefaultPodMemRequest, + }, + Limits: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("5m"), + v1.ResourceMemory: sampleDefaultPodMemLimit, + }, + }, + Containers: []v1.Container{ + { + Name: "burstable-container", + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("3m"), + v1.ResourceMemory: sampleDefaultMemRequest, + }, + Limits: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("5m"), + v1.ResourceMemory: sampleDefaultMemRequest, + }, + }, + }, + { + Name: "best-effort-container", + Resources: v1.ResourceRequirements{}, + }, + }, + }, + } + + burstablePodResourcesNoContainerResourcesWithSidecar = v1.Pod{ + Spec: v1.PodSpec{ + Resources: &v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("5m"), + v1.ResourceMemory: sampleDefaultPodMemRequest, + }, + Limits: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("5m"), + v1.ResourceMemory: sampleDefaultPodMemLimit, + }, + }, + Containers: []v1.Container{ + { + Name: "no-request-limit", + Resources: v1.ResourceRequirements{}, + }, + }, + InitContainers: []v1.Container{ + { + Name: "no-request-limit-sidecar", + Resources: v1.ResourceRequirements{}, + RestartPolicy: &restartPolicyAlways, + }, + }, + }, + } + + burstablePodResourcesEqualContainerRequestsWithSidecar = v1.Pod{ + Spec: v1.PodSpec{ + Resources: &v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("5m"), + v1.ResourceMemory: sampleDefaultPodMemRequest, + }, + Limits: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("5m"), + v1.ResourceMemory: sampleDefaultPodMemLimit, + }, + }, + Containers: []v1.Container{ + { + Name: "burstable-container", + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("5m"), + v1.ResourceMemory: sampleDefaultMemRequest, + }, + Limits: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("5m"), + v1.ResourceMemory: sampleDefaultMemLimit, + }, + }, + }, + }, + InitContainers: []v1.Container{ + { + Name: "burstable-sidecar", + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("5m"), + v1.ResourceMemory: sampleDefaultMemRequest, + }, + Limits: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("5m"), + v1.ResourceMemory: sampleDefaultMemLimit, + }, + }, + RestartPolicy: &restartPolicyAlways, + }, + }, + }, + } + + burstablePodResourcesUnequalContainerRequestsWithSidecar = v1.Pod{ + Spec: v1.PodSpec{ + Resources: &v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("5m"), + v1.ResourceMemory: resource.MustParse("2000000000"), + }, + Limits: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("5m"), + v1.ResourceMemory: sampleDefaultPodMemLimit, + }, + }, + Containers: []v1.Container{ + { + Name: "burstable-container-1", + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("5m"), + v1.ResourceMemory: resource.MustParse("1000000000"), + }, + Limits: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("5m"), + v1.ResourceMemory: sampleDefaultPodMemLimit, + }, + }, + }, + { + Name: "burstable-container-2", + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("5m"), + v1.ResourceMemory: resource.MustParse("500000000"), + }, + Limits: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("5m"), + v1.ResourceMemory: sampleDefaultPodMemLimit, + }, + }, + }, + }, + InitContainers: []v1.Container{ + { + Name: "burstable-sidecar", + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("5m"), + v1.ResourceMemory: resource.MustParse("200000000"), + }, + Limits: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("5m"), + v1.ResourceMemory: sampleDefaultPodMemLimit, + }, + }, + RestartPolicy: &restartPolicyAlways, + }, + }, + }, + } ) type lowHighOOMScoreAdjTest struct { @@ -311,6 +654,7 @@ type oomTest struct { memoryCapacity int64 lowHighOOMScoreAdj map[string]lowHighOOMScoreAdjTest // [container-name] : min and max oom_score_adj score the container should be assigned. sidecarContainersFeatureEnabled bool + podLevelResourcesFeatureEnabled bool } func TestGetContainerOOMScoreAdjust(t *testing.T) { @@ -425,10 +769,96 @@ func TestGetContainerOOMScoreAdjust(t *testing.T) { }, sidecarContainersFeatureEnabled: true, }, + "guaranteed-pod-resources-no-container-resources": { + pod: &guaranteedPodResourcesNoContainerResources, + lowHighOOMScoreAdj: map[string]lowHighOOMScoreAdjTest{ + "no-request-limit-1": {lowOOMScoreAdj: -997, highOOMScoreAdj: -997}, + "no-request-limit-2": {lowOOMScoreAdj: -997, highOOMScoreAdj: -997}, + }, + memoryCapacity: 4000000000, + podLevelResourcesFeatureEnabled: true, + }, + "guaranteed-pod-resources-equal-container-resources": { + pod: &guaranteedPodResourcesEqualContainerRequests, + lowHighOOMScoreAdj: map[string]lowHighOOMScoreAdjTest{ + "guaranteed-container-1": {lowOOMScoreAdj: -997, highOOMScoreAdj: -997}, + "guaranteed-container-2": {lowOOMScoreAdj: -997, highOOMScoreAdj: -997}, + }, + memoryCapacity: 4000000000, + podLevelResourcesFeatureEnabled: true, + }, + "guaranteed-pod-resources-unequal-container-requests": { + pod: &guaranteedPodResourcesUnequalContainerRequests, + lowHighOOMScoreAdj: map[string]lowHighOOMScoreAdjTest{ + "burstable-container": {lowOOMScoreAdj: -997, highOOMScoreAdj: -997}, + "best-effort-container": {lowOOMScoreAdj: -997, highOOMScoreAdj: -997}, + }, + memoryCapacity: 4000000000, + podLevelResourcesFeatureEnabled: true, + }, + "burstable-pod-resources-no-container-resources": { + pod: &burstablePodResourcesNoContainerResources, + lowHighOOMScoreAdj: map[string]lowHighOOMScoreAdjTest{ + "no-request-limit-1": {lowOOMScoreAdj: 750, highOOMScoreAdj: 750}, + "no-request-limit-2": {lowOOMScoreAdj: 750, highOOMScoreAdj: 750}, + }, + memoryCapacity: 4000000000, + podLevelResourcesFeatureEnabled: true, + }, + "burstable-pod-resources-equal-container-requests": { + pod: &burstablePodResourcesEqualContainerRequests, + lowHighOOMScoreAdj: map[string]lowHighOOMScoreAdjTest{ + "guaranteed-container-1": {lowOOMScoreAdj: 750, highOOMScoreAdj: 750}, + "guaranteed-container-2": {lowOOMScoreAdj: 750, highOOMScoreAdj: 750}, + }, + memoryCapacity: 4000000000, + podLevelResourcesFeatureEnabled: true, + }, + "burstable-pod-resources-unequal-container-requests": { + pod: &burstablePodResourcesUnequalContainerRequests, + lowHighOOMScoreAdj: map[string]lowHighOOMScoreAdjTest{ + "burstable-container": {lowOOMScoreAdj: 625, highOOMScoreAdj: 625}, + "best-effort-container": {lowOOMScoreAdj: 875, highOOMScoreAdj: 875}, + }, + memoryCapacity: 4000000000, + podLevelResourcesFeatureEnabled: true, + }, + "burstable-pod-resources-no-container-resources-with-sidecar": { + pod: &burstablePodResourcesNoContainerResourcesWithSidecar, + lowHighOOMScoreAdj: map[string]lowHighOOMScoreAdjTest{ + "no-request-limit": {lowOOMScoreAdj: 750, highOOMScoreAdj: 750}, + "no-request-limit-sidecar": {lowOOMScoreAdj: 750, highOOMScoreAdj: 750}, + }, + memoryCapacity: 4000000000, + podLevelResourcesFeatureEnabled: true, + sidecarContainersFeatureEnabled: true, + }, + "burstable-pod-resources-equal-container-requests-with-sidecar": { + pod: &burstablePodResourcesEqualContainerRequestsWithSidecar, + lowHighOOMScoreAdj: map[string]lowHighOOMScoreAdjTest{ + "burstable-container": {lowOOMScoreAdj: 750, highOOMScoreAdj: 750}, + "burstable-sidecar": {lowOOMScoreAdj: 750, highOOMScoreAdj: 750}, + }, + memoryCapacity: 4000000000, + podLevelResourcesFeatureEnabled: true, + sidecarContainersFeatureEnabled: true, + }, + "burstable-pod-resources-unequal-container-requests-with-sidecar": { + pod: &burstablePodResourcesUnequalContainerRequestsWithSidecar, + lowHighOOMScoreAdj: map[string]lowHighOOMScoreAdjTest{ + "burstable-container-1": {lowOOMScoreAdj: 725, highOOMScoreAdj: 725}, + "burstable-container-2": {lowOOMScoreAdj: 850, highOOMScoreAdj: 850}, + "burstable-sidecar": {lowOOMScoreAdj: 850, highOOMScoreAdj: 850}, + }, + memoryCapacity: 4000000000, + podLevelResourcesFeatureEnabled: true, + sidecarContainersFeatureEnabled: true, + }, } for name, test := range oomTests { t.Run(name, func(t *testing.T) { featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.SidecarContainers, test.sidecarContainersFeatureEnabled) + featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodLevelResources, test.podLevelResourcesFeatureEnabled) listContainers := test.pod.Spec.InitContainers listContainers = append(listContainers, test.pod.Spec.Containers...) for _, container := range listContainers {