From d5d008a6bd4a73d9773ee4bdd861a93e1db8457f Mon Sep 17 00:00:00 2001 From: vinay kulkarni Date: Wed, 19 Mar 2025 06:33:27 +0000 Subject: [PATCH 1/8] Invoke UpdateContainerResources or trigger container restarts (for RestartContainer policy) when memory requests are resized --- .../kuberuntime/kuberuntime_manager.go | 15 +- .../kuberuntime/kuberuntime_manager_test.go | 90 +++++++ test/e2e/common/node/pod_resize.go | 240 +++++++++++------- test/e2e/framework/pod/resize.go | 15 +- test/e2e/framework/pod/utils.go | 19 ++ 5 files changed, 272 insertions(+), 107 deletions(-) diff --git a/pkg/kubelet/kuberuntime/kuberuntime_manager.go b/pkg/kubelet/kuberuntime/kuberuntime_manager.go index 988e754b84e..909e83e854b 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_manager.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_manager.go @@ -634,8 +634,8 @@ func (m *kubeGenericRuntimeManager) computePodResizeAction(pod *v1.Pod, containe return true } - determineContainerResize := func(rName v1.ResourceName, specValue, statusValue int64) (resize, restart bool) { - if specValue == statusValue { + determineContainerResize := func(rName v1.ResourceName, desiredValue, currentValue int64) (resize, restart bool) { + if desiredValue == currentValue { return false, false } for _, policy := range container.ResizePolicy { @@ -646,7 +646,7 @@ func (m *kubeGenericRuntimeManager) computePodResizeAction(pod *v1.Pod, containe // If a resource policy isn't set, the implicit default is NotRequired. return true, false } - markContainerForUpdate := func(rName v1.ResourceName, specValue, statusValue int64) { + markContainerForUpdate := func(rName v1.ResourceName, desiredValue, currentValue int64) { cUpdateInfo := containerToUpdateInfo{ container: &container, kubeContainerID: kubeContainerStatus.ID, @@ -655,18 +655,19 @@ func (m *kubeGenericRuntimeManager) computePodResizeAction(pod *v1.Pod, containe } // Order the container updates such that resource decreases are applied before increases switch { - case specValue > statusValue: // append + case desiredValue > currentValue: // append changes.ContainersToUpdate[rName] = append(changes.ContainersToUpdate[rName], cUpdateInfo) - case specValue < statusValue: // prepend + case desiredValue < currentValue: // prepend changes.ContainersToUpdate[rName] = append(changes.ContainersToUpdate[rName], containerToUpdateInfo{}) copy(changes.ContainersToUpdate[rName][1:], changes.ContainersToUpdate[rName]) changes.ContainersToUpdate[rName][0] = cUpdateInfo } } resizeMemLim, restartMemLim := determineContainerResize(v1.ResourceMemory, desiredResources.memoryLimit, currentResources.memoryLimit) + resizeMemReq, restartMemReq := determineContainerResize(v1.ResourceMemory, desiredResources.memoryRequest, currentResources.memoryRequest) resizeCPULim, restartCPULim := determineContainerResize(v1.ResourceCPU, desiredResources.cpuLimit, currentResources.cpuLimit) resizeCPUReq, restartCPUReq := determineContainerResize(v1.ResourceCPU, desiredResources.cpuRequest, currentResources.cpuRequest) - if restartCPULim || restartCPUReq || restartMemLim { + if restartCPULim || restartCPUReq || restartMemLim || restartMemReq { // resize policy requires this container to restart changes.ContainersToKill[kubeContainerStatus.ID] = containerToKillInfo{ name: kubeContainerStatus.Name, @@ -683,6 +684,8 @@ func (m *kubeGenericRuntimeManager) computePodResizeAction(pod *v1.Pod, containe } else { if resizeMemLim { markContainerForUpdate(v1.ResourceMemory, desiredResources.memoryLimit, currentResources.memoryLimit) + } else if resizeMemReq { + markContainerForUpdate(v1.ResourceMemory, desiredResources.memoryRequest, currentResources.memoryRequest) } if resizeCPULim { markContainerForUpdate(v1.ResourceCPU, desiredResources.cpuLimit, currentResources.cpuLimit) diff --git a/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go b/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go index baf0a6de8f6..bba8fd0552a 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_manager_test.go @@ -2890,6 +2890,96 @@ func TestComputePodActionsForPodResize(t *testing.T) { return &pa }, }, + "Update container memory (requests only) with RestartContainer policy for memory": { + setupFn: func(pod *v1.Pod) { + c := &pod.Spec.Containers[2] + c.ResizePolicy = []v1.ContainerResizePolicy{cpuPolicyRestartNotRequired, memPolicyRestartRequired} + c.Resources = v1.ResourceRequirements{ + Limits: v1.ResourceList{v1.ResourceCPU: cpu200m, v1.ResourceMemory: mem200M}, + Requests: v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem100M}, + } + setupActuatedResources(pod, c, v1.ResourceRequirements{ + Limits: v1.ResourceList{ + v1.ResourceCPU: cpu200m.DeepCopy(), + v1.ResourceMemory: mem200M.DeepCopy(), + }, + Requests: v1.ResourceList{ + v1.ResourceCPU: cpu100m.DeepCopy(), + v1.ResourceMemory: mem200M.DeepCopy(), + }, + }) + }, + getExpectedPodActionsFn: func(pod *v1.Pod, podStatus *kubecontainer.PodStatus) *podActions { + kcs := podStatus.FindContainerStatusByName(pod.Spec.Containers[2].Name) + killMap := make(map[kubecontainer.ContainerID]containerToKillInfo) + killMap[kcs.ID] = containerToKillInfo{ + container: &pod.Spec.Containers[2], + name: pod.Spec.Containers[2].Name, + } + pa := podActions{ + SandboxID: podStatus.SandboxStatuses[0].Id, + ContainersToStart: []int{2}, + ContainersToKill: killMap, + ContainersToUpdate: map[v1.ResourceName][]containerToUpdateInfo{}, + UpdatePodResources: true, + } + return &pa + }, + }, + "Update container memory (requests only) with RestartNotRequired policy for memory": { + setupFn: func(pod *v1.Pod) { + c := &pod.Spec.Containers[2] + c.ResizePolicy = []v1.ContainerResizePolicy{cpuPolicyRestartNotRequired, memPolicyRestartNotRequired} + c.Resources = v1.ResourceRequirements{ + Limits: v1.ResourceList{v1.ResourceCPU: cpu200m, v1.ResourceMemory: mem200M}, + Requests: v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem100M}, + } + setupActuatedResources(pod, c, v1.ResourceRequirements{ + Limits: v1.ResourceList{ + v1.ResourceCPU: cpu200m.DeepCopy(), + v1.ResourceMemory: mem200M.DeepCopy(), + }, + Requests: v1.ResourceList{ + v1.ResourceCPU: cpu100m.DeepCopy(), + v1.ResourceMemory: mem200M.DeepCopy(), + }, + }) + }, + getExpectedPodActionsFn: func(pod *v1.Pod, podStatus *kubecontainer.PodStatus) *podActions { + kcs := podStatus.FindContainerStatusByName(pod.Spec.Containers[2].Name) + killMap := make(map[kubecontainer.ContainerID]containerToKillInfo) + killMap[kcs.ID] = containerToKillInfo{ + container: &pod.Spec.Containers[2], + name: pod.Spec.Containers[2].Name, + } + pa := podActions{ + SandboxID: podStatus.SandboxStatuses[0].Id, + ContainersToStart: []int{}, + ContainersToKill: getKillMap(pod, podStatus, []int{}), + ContainersToUpdate: map[v1.ResourceName][]containerToUpdateInfo{ + v1.ResourceMemory: { + { + container: &pod.Spec.Containers[2], + kubeContainerID: kcs.ID, + desiredContainerResources: containerResources{ + memoryLimit: mem200M.Value(), + memoryRequest: mem100M.Value(), + cpuLimit: cpu200m.MilliValue(), + cpuRequest: cpu100m.MilliValue(), + }, + currentContainerResources: &containerResources{ + memoryLimit: mem200M.Value(), + memoryRequest: mem200M.Value(), + cpuLimit: cpu200m.MilliValue(), + cpuRequest: cpu100m.MilliValue(), + }, + }, + }, + }, + } + return &pa + }, + }, } { t.Run(desc, func(t *testing.T) { pod, status := makeBasePodAndStatus() diff --git a/test/e2e/common/node/pod_resize.go b/test/e2e/common/node/pod_resize.go index f5c7a5b8313..07264dbf792 100644 --- a/test/e2e/common/node/pod_resize.go +++ b/test/e2e/common/node/pod_resize.go @@ -93,8 +93,8 @@ func doPodResizeTests() { }, }, patchString: fmt.Sprintf(`{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"%s","memory":"%s"},"limits":{"cpu":"%s","memory":"%s"}}} - ]}}`, increasedCPU, increasedMem, increasedCPU, increasedMem), + {"name":"c1", "resources":{"requests":{"cpu":"%s","memory":"%s"},"limits":{"cpu":"%s","memory":"%s"}}} + ]}}`, increasedCPU, increasedMem, increasedCPU, increasedMem), expected: []e2epod.ResizableContainerInfo{ { Name: "c1", @@ -105,8 +105,7 @@ func doPodResizeTests() { }, }, { - name: "Guaranteed QoS pod, one container - decrease CPU only", - testRollback: true, + name: "Guaranteed QoS pod, one container - decrease CPU only", containers: []e2epod.ResizableContainerInfo{ { Name: "c1", @@ -116,8 +115,8 @@ func doPodResizeTests() { }, }, patchString: fmt.Sprintf(`{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"%s"},"limits":{"cpu":"%s"}}} - ]}}`, reducedCPU, reducedCPU), + {"name":"c1", "resources":{"requests":{"cpu":"%s"},"limits":{"cpu":"%s"}}} + ]}}`, reducedCPU, reducedCPU), expected: []e2epod.ResizableContainerInfo{ { Name: "c1", @@ -126,6 +125,7 @@ func doPodResizeTests() { MemPolicy: &noRestart, }, }, + testRollback: true, }, { name: "Guaranteed QoS pod, three containers (c1, c2, c3) - increase: CPU (c1,c3), memory (c2, c3) ; decrease: CPU (c2)", @@ -150,10 +150,10 @@ func doPodResizeTests() { }, }, patchString: fmt.Sprintf(`{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"%s"},"limits":{"cpu":"%s"}}}, - {"name":"c2", "resources":{"requests":{"cpu":"%s","memory":"%s"},"limits":{"cpu":"%s","memory":"%s"}}}, - {"name":"c3", "resources":{"requests":{"cpu":"%s","memory":"%s"},"limits":{"cpu":"%s","memory":"%s"}}} - ]}}`, + {"name":"c1", "resources":{"requests":{"cpu":"%s"},"limits":{"cpu":"%s"}}}, + {"name":"c2", "resources":{"requests":{"cpu":"%s","memory":"%s"},"limits":{"cpu":"%s","memory":"%s"}}}, + {"name":"c3", "resources":{"requests":{"cpu":"%s","memory":"%s"},"limits":{"cpu":"%s","memory":"%s"}}} + ]}}`, increasedCPU, increasedCPU, offsetCPU(1, reducedCPU), offsetMemory(1, increasedMem), offsetCPU(1, reducedCPU), offsetMemory(1, increasedMem), offsetCPU(2, increasedCPU), offsetMemory(2, increasedMem), offsetCPU(2, increasedCPU), offsetMemory(2, increasedMem)), @@ -179,8 +179,7 @@ func doPodResizeTests() { }, }, { - name: "Burstable QoS pod, one container with cpu & memory requests + limits - decrease memory requests only", - testRollback: true, + name: "Burstable QoS pod, one container with cpu & memory requests + limits - decrease memory requests only", containers: []e2epod.ResizableContainerInfo{ { Name: "c1", @@ -188,14 +187,15 @@ func doPodResizeTests() { }, }, patchString: fmt.Sprintf(`{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"memory":"%s"}}} - ]}}`, reducedMem), + {"name":"c1", "resources":{"requests":{"memory":"%s"}}} + ]}}`, reducedMem), expected: []e2epod.ResizableContainerInfo{ { Name: "c1", Resources: &e2epod.ContainerResources{CPUReq: originalCPU, CPULim: originalCPULimit, MemReq: reducedMem, MemLim: originalMemLimit}, }, }, + testRollback: true, }, { name: "Burstable QoS pod, one container with cpu & memory requests + limits - increase memory requests only", @@ -206,8 +206,8 @@ func doPodResizeTests() { }, }, patchString: fmt.Sprintf(`{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"memory":"%s"}}} - ]}}`, increasedMem), + {"name":"c1", "resources":{"requests":{"memory":"%s"}}} + ]}}`, increasedMem), expected: []e2epod.ResizableContainerInfo{ { Name: "c1", @@ -224,8 +224,8 @@ func doPodResizeTests() { }, }, patchString: fmt.Sprintf(`{"spec":{"containers":[ - {"name":"c1", "resources":{"limits":{"memory":"%s"}}} - ]}}`, increasedMemLimit), + {"name":"c1", "resources":{"limits":{"memory":"%s"}}} + ]}}`, increasedMemLimit), expected: []e2epod.ResizableContainerInfo{ { Name: "c1", @@ -234,8 +234,7 @@ func doPodResizeTests() { }, }, { - name: "Burstable QoS pod, one container with cpu & memory requests + limits - decrease CPU requests only", - testRollback: true, + name: "Burstable QoS pod, one container with cpu & memory requests + limits - decrease CPU requests only", containers: []e2epod.ResizableContainerInfo{ { Name: "c1", @@ -243,18 +242,18 @@ func doPodResizeTests() { }, }, patchString: fmt.Sprintf(`{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"%s"}}} - ]}}`, reducedCPU), + {"name":"c1", "resources":{"requests":{"cpu":"%s"}}} + ]}}`, reducedCPU), expected: []e2epod.ResizableContainerInfo{ { Name: "c1", Resources: &e2epod.ContainerResources{CPUReq: reducedCPU, CPULim: originalCPULimit, MemReq: originalMem, MemLim: originalMemLimit}, }, }, + testRollback: true, }, { - name: "Burstable QoS pod, one container with cpu & memory requests + limits - decrease CPU limits only", - testRollback: true, + name: "Burstable QoS pod, one container with cpu & memory requests + limits - decrease CPU limits only", containers: []e2epod.ResizableContainerInfo{ { Name: "c1", @@ -262,14 +261,15 @@ func doPodResizeTests() { }, }, patchString: fmt.Sprintf(`{"spec":{"containers":[ - {"name":"c1", "resources":{"limits":{"cpu":"%s"}}} - ]}}`, reducedCPULimit), + {"name":"c1", "resources":{"limits":{"cpu":"%s"}}} + ]}}`, reducedCPULimit), expected: []e2epod.ResizableContainerInfo{ { Name: "c1", Resources: &e2epod.ContainerResources{CPUReq: originalCPU, CPULim: reducedCPULimit, MemReq: originalMem, MemLim: originalMemLimit}, }, }, + testRollback: true, }, { name: "Burstable QoS pod, one container with cpu & memory requests + limits - increase CPU requests only", @@ -280,8 +280,8 @@ func doPodResizeTests() { }, }, patchString: fmt.Sprintf(`{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"%s"}}} - ]}}`, increasedCPU), + {"name":"c1", "resources":{"requests":{"cpu":"%s"}}} + ]}}`, increasedCPU), expected: []e2epod.ResizableContainerInfo{ { Name: "c1", @@ -298,8 +298,8 @@ func doPodResizeTests() { }, }, patchString: fmt.Sprintf(`{"spec":{"containers":[ - {"name":"c1", "resources":{"limits":{"cpu":"%s"}}} - ]}}`, increasedCPULimit), + {"name":"c1", "resources":{"limits":{"cpu":"%s"}}} + ]}}`, increasedCPULimit), expected: []e2epod.ResizableContainerInfo{ { Name: "c1", @@ -316,8 +316,8 @@ func doPodResizeTests() { }, }, patchString: fmt.Sprintf(`{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"%s"},"limits":{"cpu":"%s"}}} - ]}}`, reducedCPU, reducedCPULimit), + {"name":"c1", "resources":{"requests":{"cpu":"%s"},"limits":{"cpu":"%s"}}} + ]}}`, reducedCPU, reducedCPULimit), expected: []e2epod.ResizableContainerInfo{ { Name: "c1", @@ -334,8 +334,8 @@ func doPodResizeTests() { }, }, patchString: fmt.Sprintf(`{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"%s"},"limits":{"cpu":"%s"}}} - ]}}`, increasedCPU, increasedCPULimit), + {"name":"c1", "resources":{"requests":{"cpu":"%s"},"limits":{"cpu":"%s"}}} + ]}}`, increasedCPU, increasedCPULimit), expected: []e2epod.ResizableContainerInfo{ { Name: "c1", @@ -352,8 +352,8 @@ func doPodResizeTests() { }, }, patchString: fmt.Sprintf(`{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"%s"},"limits":{"cpu":"%s"}}} - ]}}`, reducedCPU, increasedCPULimit), + {"name":"c1", "resources":{"requests":{"cpu":"%s"},"limits":{"cpu":"%s"}}} + ]}}`, reducedCPU, increasedCPULimit), expected: []e2epod.ResizableContainerInfo{ { Name: "c1", @@ -370,8 +370,8 @@ func doPodResizeTests() { }, }, patchString: fmt.Sprintf(`{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"%s"},"limits":{"cpu":"%s"}}} - ]}}`, increasedCPU, reducedCPULimit), + {"name":"c1", "resources":{"requests":{"cpu":"%s"},"limits":{"cpu":"%s"}}} + ]}}`, increasedCPU, reducedCPULimit), expected: []e2epod.ResizableContainerInfo{ { Name: "c1", @@ -388,8 +388,8 @@ func doPodResizeTests() { }, }, patchString: fmt.Sprintf(`{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"memory":"%s"},"limits":{"memory":"%s"}}} - ]}}`, increasedMem, increasedMemLimit), + {"name":"c1", "resources":{"requests":{"memory":"%s"},"limits":{"memory":"%s"}}} + ]}}`, increasedMem, increasedMemLimit), expected: []e2epod.ResizableContainerInfo{ { Name: "c1", @@ -406,8 +406,8 @@ func doPodResizeTests() { }, }, patchString: fmt.Sprintf(`{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"memory":"%s"},"limits":{"memory":"%s"}}} - ]}}`, reducedMem, increasedMemLimit), + {"name":"c1", "resources":{"requests":{"memory":"%s"},"limits":{"memory":"%s"}}} + ]}}`, reducedMem, increasedMemLimit), expected: []e2epod.ResizableContainerInfo{ { Name: "c1", @@ -424,8 +424,8 @@ func doPodResizeTests() { }, }, patchString: fmt.Sprintf(`{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"%s"},"limits":{"memory":"%s"}}} - ]}}`, reducedCPU, increasedMemLimit), + {"name":"c1", "resources":{"requests":{"cpu":"%s"},"limits":{"memory":"%s"}}} + ]}}`, reducedCPU, increasedMemLimit), expected: []e2epod.ResizableContainerInfo{ { Name: "c1", @@ -442,8 +442,8 @@ func doPodResizeTests() { }, }, patchString: fmt.Sprintf(`{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"memory":"%s"},"limits":{"cpu":"%s"}}} - ]}}`, reducedMem, increasedCPULimit), + {"name":"c1", "resources":{"requests":{"memory":"%s"},"limits":{"cpu":"%s"}}} + ]}}`, reducedMem, increasedCPULimit), expected: []e2epod.ResizableContainerInfo{ { Name: "c1", @@ -460,8 +460,8 @@ func doPodResizeTests() { }, }, patchString: fmt.Sprintf(`{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"memory":"%s"},"limits":{"cpu":"%s"}}} - ]}}`, increasedMem, reducedCPULimit), + {"name":"c1", "resources":{"requests":{"memory":"%s"},"limits":{"cpu":"%s"}}} + ]}}`, increasedMem, reducedCPULimit), expected: []e2epod.ResizableContainerInfo{ { Name: "c1", @@ -478,8 +478,8 @@ func doPodResizeTests() { }, }, patchString: fmt.Sprintf(`{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"memory":"%s"}}} - ]}}`, reducedMem), + {"name":"c1", "resources":{"requests":{"memory":"%s"}}} + ]}}`, reducedMem), expected: []e2epod.ResizableContainerInfo{ { Name: "c1", @@ -496,8 +496,8 @@ func doPodResizeTests() { }, }, patchString: fmt.Sprintf(`{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"%s"}}} - ]}}`, increasedCPU), + {"name":"c1", "resources":{"requests":{"cpu":"%s"}}} + ]}}`, increasedCPU), expected: []e2epod.ResizableContainerInfo{ { Name: "c1", @@ -514,8 +514,8 @@ func doPodResizeTests() { }, }, patchString: `{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"1m"},"limits":{"cpu":"5m"}}} - ]}}`, + {"name":"c1", "resources":{"requests":{"cpu":"1m"},"limits":{"cpu":"5m"}}} + ]}}`, expected: []e2epod.ResizableContainerInfo{ { Name: "c1", @@ -524,8 +524,7 @@ func doPodResizeTests() { }, }, { - name: "Guaranteed QoS pod, one container - increase CPU (NotRequired) & memory (RestartContainer)", - testRollback: true, + name: "Guaranteed QoS pod, one container - increase CPU (NotRequired) & memory (RestartContainer)", containers: []e2epod.ResizableContainerInfo{ { Name: "c1", @@ -535,8 +534,8 @@ func doPodResizeTests() { }, }, patchString: fmt.Sprintf(`{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"%s","memory":"%s"},"limits":{"cpu":"%s","memory":"%s"}}} - ]}}`, increasedCPU, increasedMem, increasedCPU, increasedMem), + {"name":"c1", "resources":{"requests":{"cpu":"%s","memory":"%s"},"limits":{"cpu":"%s","memory":"%s"}}} + ]}}`, increasedCPU, increasedMem, increasedCPU, increasedMem), expected: []e2epod.ResizableContainerInfo{ { Name: "c1", @@ -546,10 +545,34 @@ func doPodResizeTests() { RestartCount: 1, }, }, + testRollback: true, }, { - name: "Burstable QoS pod, one container - decrease CPU (NotRequired) & memory (RestartContainer)", + name: "Burstable QoS pod, one container - decrease CPU (NotRequired) & memory (RestartContainer)", + containers: []e2epod.ResizableContainerInfo{ + { + Name: "c1", + Resources: &e2epod.ContainerResources{CPUReq: originalCPU, CPULim: originalCPULimit, MemReq: originalMem, MemLim: originalMemLimit}, + CPUPolicy: &noRestart, + MemPolicy: &doRestart, + }, + }, + patchString: fmt.Sprintf(`{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"%s","memory":"%s"},"limits":{"cpu":"%s","memory":"%s"}}} + ]}}`, reducedCPU, reducedMem, reducedCPULimit, reducedMemLimit), + expected: []e2epod.ResizableContainerInfo{ + { + Name: "c1", + Resources: &e2epod.ContainerResources{CPUReq: reducedCPU, CPULim: reducedCPULimit, MemReq: reducedMem, MemLim: reducedMemLimit}, + CPUPolicy: &noRestart, + MemPolicy: &doRestart, + RestartCount: 1, + }, + }, testRollback: true, + }, + { + name: "Burstable QoS pod, one container - decrease memory request (RestartContainer memory resize policy)", containers: []e2epod.ResizableContainerInfo{ { Name: "c1", @@ -560,17 +583,40 @@ func doPodResizeTests() { }, patchString: fmt.Sprintf(`{"spec":{"containers":[ {"name":"c1", "resources":{"requests":{"cpu":"%s","memory":"%s"},"limits":{"cpu":"%s","memory":"%s"}}} - ]}}`, reducedCPU, reducedMem, reducedCPULimit, reducedMemLimit), + ]}}`, originalCPU, reducedMem, originalCPULimit, originalMemLimit), expected: []e2epod.ResizableContainerInfo{ { Name: "c1", - Resources: &e2epod.ContainerResources{CPUReq: reducedCPU, CPULim: reducedCPULimit, MemReq: reducedMem, MemLim: reducedMemLimit}, + Resources: &e2epod.ContainerResources{CPUReq: originalCPU, CPULim: originalCPULimit, MemReq: reducedMem, MemLim: originalMemLimit}, CPUPolicy: &noRestart, MemPolicy: &doRestart, RestartCount: 1, }, }, }, + { + name: "Burstable QoS pod, one container - increase memory request (NoRestart memory resize policy)", + containers: []e2epod.ResizableContainerInfo{ + { + Name: "c1", + Resources: &e2epod.ContainerResources{CPUReq: originalCPU, CPULim: originalCPULimit, MemReq: originalMem, MemLim: originalMemLimit}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + }, + }, + patchString: fmt.Sprintf(`{"spec":{"containers":[ + {"name":"c1", "resources":{"requests":{"cpu":"%s","memory":"%s"},"limits":{"cpu":"%s","memory":"%s"}}} + ]}}`, originalCPU, increasedMem, originalCPULimit, originalMemLimit), + expected: []e2epod.ResizableContainerInfo{ + { + Name: "c1", + Resources: &e2epod.ContainerResources{CPUReq: originalCPU, CPULim: originalCPULimit, MemReq: increasedMem, MemLim: originalMemLimit}, + CPUPolicy: &noRestart, + MemPolicy: &noRestart, + RestartCount: 0, + }, + }, + }, { name: "Burstable QoS pod, three containers - increase c1 resources, no change for c2, decrease c3 resources (no net change for pod)", containers: []e2epod.ResizableContainerInfo{ @@ -594,9 +640,9 @@ func doPodResizeTests() { }, }, patchString: fmt.Sprintf(`{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"%s","memory":"%s"},"limits":{"cpu":"%s","memory":"%s"}}}, - {"name":"c3", "resources":{"requests":{"cpu":"%s","memory":"%s"},"limits":{"cpu":"%s"}}} - ]}}`, + {"name":"c1", "resources":{"requests":{"cpu":"%s","memory":"%s"},"limits":{"cpu":"%s","memory":"%s"}}}, + {"name":"c3", "resources":{"requests":{"cpu":"%s","memory":"%s"},"limits":{"cpu":"%s"}}} + ]}}`, increasedCPU, increasedMem, increasedCPULimit, increasedMemLimit, offsetCPU(2, reducedCPU), offsetMemory(2, reducedMem), offsetCPU(2, reducedCPULimit)), expected: []e2epod.ResizableContainerInfo{ @@ -643,9 +689,9 @@ func doPodResizeTests() { }, }, patchString: fmt.Sprintf(`{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"%s","memory":"%s"},"limits":{"cpu":"%s"}}}, - {"name":"c2", "resources":{"requests":{"cpu":"%s","memory":"%s"},"limits":{"cpu":"%s","memory":"%s"}}} - ]}}`, + {"name":"c1", "resources":{"requests":{"cpu":"%s","memory":"%s"},"limits":{"cpu":"%s"}}}, + {"name":"c2", "resources":{"requests":{"cpu":"%s","memory":"%s"},"limits":{"cpu":"%s","memory":"%s"}}} + ]}}`, reducedCPU, reducedMem, reducedCPULimit, offsetCPU(2, increasedCPU), offsetMemory(2, increasedMem), offsetCPU(2, increasedCPULimit), offsetMemory(2, increasedMemLimit)), expected: []e2epod.ResizableContainerInfo{ @@ -693,9 +739,9 @@ func doPodResizeTests() { }, }, patchString: fmt.Sprintf(`{"spec":{"containers":[ - {"name":"c2", "resources":{"requests":{"cpu":"%s","memory":"%s"},"limits":{"cpu":"%s","memory":"%s"}}}, - {"name":"c3", "resources":{"requests":{"cpu":"%s","memory":"%s"},"limits":{"cpu":"%s","memory":"%s"}}} - ]}}`, + {"name":"c2", "resources":{"requests":{"cpu":"%s","memory":"%s"},"limits":{"cpu":"%s","memory":"%s"}}}, + {"name":"c3", "resources":{"requests":{"cpu":"%s","memory":"%s"},"limits":{"cpu":"%s","memory":"%s"}}} + ]}}`, offsetCPU(1, increasedCPU), offsetMemory(1, increasedMem), offsetCPU(1, increasedCPULimit), offsetMemory(1, increasedMemLimit), reducedCPU, reducedMem, reducedCPULimit, reducedMemLimit), expected: []e2epod.ResizableContainerInfo{ @@ -736,8 +782,8 @@ func doPodResizeTests() { }, }, patchString: fmt.Sprintf(`{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"%s","memory":"%s"},"limits":{"cpu":"%s","memory":"%s"}}} - ]}}`, increasedCPU, increasedMem, increasedCPU, increasedMem), + {"name":"c1", "resources":{"requests":{"cpu":"%s","memory":"%s"},"limits":{"cpu":"%s","memory":"%s"}}} + ]}}`, increasedCPU, increasedMem, increasedCPU, increasedMem), expected: []e2epod.ResizableContainerInfo{ { Name: "c1", @@ -766,8 +812,8 @@ func doPodResizeTests() { }, }, patchString: fmt.Sprintf(`{"spec":{"containers":[ - {"name":"c2", "resources":{"requests":{"cpu":"%s","memory":"%s"}}} - ]}}`, originalCPU, originalMem), + {"name":"c2", "resources":{"requests":{"cpu":"%s","memory":"%s"}}} + ]}}`, originalCPU, originalMem), expected: []e2epod.ResizableContainerInfo{ { Name: "c1", @@ -800,8 +846,8 @@ func doPodResizeTests() { }, }, patchString: fmt.Sprintf(`{"spec":{"containers":[ - {"name":"c2", "resources":{"limits":{"cpu":"%s"}}} - ]}}`, originalCPULimit), + {"name":"c2", "resources":{"limits":{"cpu":"%s"}}} + ]}}`, originalCPULimit), expected: []e2epod.ResizableContainerInfo{ { Name: "c1", @@ -829,8 +875,8 @@ func doPodResizeTests() { }, }, patchString: fmt.Sprintf(`{"spec":{"containers":[ - {"name":"c1", "resources":{"requests":{"cpu":"%s","memory":"%s"},"limits":{"cpu":"%s","memory":"%s"}}} - ]}}`, increasedCPU, increasedMem, increasedCPU, increasedMem), + {"name":"c1", "resources":{"requests":{"cpu":"%s","memory":"%s"},"limits":{"cpu":"%s","memory":"%s"}}} + ]}}`, increasedCPU, increasedMem, increasedCPU, increasedMem), expected: []e2epod.ResizableContainerInfo{ { Name: "c1", @@ -877,8 +923,8 @@ func doPodResizeTests() { }, }, patchString: fmt.Sprintf(`{"spec":{"initContainers":[ - {"name":"c1-init", "resources":{"requests":{"cpu":"%s","memory":"%s"},"limits":{"cpu":"%s","memory":"%s"}}} - ]}}`, increasedCPU, increasedMem, increasedCPU, increasedMem), + {"name":"c1-init", "resources":{"requests":{"cpu":"%s","memory":"%s"},"limits":{"cpu":"%s","memory":"%s"}}} + ]}}`, increasedCPU, increasedMem, increasedCPU, increasedMem), expected: []e2epod.ResizableContainerInfo{ { Name: "c1", @@ -911,8 +957,8 @@ func doPodResizeTests() { }, }, patchString: fmt.Sprintf(`{"spec":{"initContainers":[ - {"name":"c1-init", "resources":{"requests":{"cpu":"%s","memory":"%s"},"limits":{"cpu":"%s","memory":"%s"}}} - ]}}`, reducedCPU, increasedMem, reducedCPU, increasedMem), + {"name":"c1-init", "resources":{"requests":{"cpu":"%s","memory":"%s"},"limits":{"cpu":"%s","memory":"%s"}}} + ]}}`, reducedCPU, increasedMem, reducedCPU, increasedMem), expected: []e2epod.ResizableContainerInfo{ { Name: "c1", @@ -941,8 +987,8 @@ func doPodResizeTests() { }, }, patchString: fmt.Sprintf(`{"spec":{"initContainers":[ - {"name":"c1-init", "resources":{"requests":{"cpu":"%s"},"limits":{"cpu":"%s"}}} - ]}}`, reducedCPU, reducedCPU), + {"name":"c1-init", "resources":{"requests":{"cpu":"%s"},"limits":{"cpu":"%s"}}} + ]}}`, reducedCPU, reducedCPU), expected: []e2epod.ResizableContainerInfo{ { Name: "c1", @@ -971,8 +1017,8 @@ func doPodResizeTests() { }, }, patchString: fmt.Sprintf(`{"spec":{"initContainers":[ - {"name":"c1-init", "resources":{"requests":{"cpu":"%s","memory":"%s"},"limits":{"cpu":"%s","memory":"%s"}}} - ]}}`, increasedCPU, increasedMem, increasedCPULimit, increasedMemLimit), + {"name":"c1-init", "resources":{"requests":{"cpu":"%s","memory":"%s"},"limits":{"cpu":"%s","memory":"%s"}}} + ]}}`, increasedCPU, increasedMem, increasedCPULimit, increasedMemLimit), expected: []e2epod.ResizableContainerInfo{ { Name: "c1", @@ -1001,8 +1047,8 @@ func doPodResizeTests() { }, }, patchString: fmt.Sprintf(`{"spec":{"initContainers":[ - {"name":"c1-init", "resources":{"requests":{"cpu":"%s"},"limits":{"cpu":"%s"}}} - ]}}`, reducedCPU, reducedCPULimit), + {"name":"c1-init", "resources":{"requests":{"cpu":"%s"},"limits":{"cpu":"%s"}}} + ]}}`, reducedCPU, reducedCPULimit), expected: []e2epod.ResizableContainerInfo{ { Name: "c1", @@ -1031,8 +1077,8 @@ func doPodResizeTests() { }, }, patchString: fmt.Sprintf(`{"spec":{"initContainers":[ - {"name":"c1-init", "resources":{"requests":{"cpu":"%s"},"limits":{"cpu":"%s"}}} - ]}}`, increasedCPU, increasedCPULimit), + {"name":"c1-init", "resources":{"requests":{"cpu":"%s"},"limits":{"cpu":"%s"}}} + ]}}`, increasedCPU, increasedCPULimit), expected: []e2epod.ResizableContainerInfo{ { Name: "c1", @@ -1061,8 +1107,8 @@ func doPodResizeTests() { }, }, patchString: fmt.Sprintf(`{"spec":{"initContainers":[ - {"name":"c1-init", "resources":{"requests":{"memory":"%s"}}} - ]}}`, reducedMem), + {"name":"c1-init", "resources":{"requests":{"memory":"%s"}}} + ]}}`, reducedMem), expected: []e2epod.ResizableContainerInfo{ { Name: "c1", @@ -1091,8 +1137,8 @@ func doPodResizeTests() { }, }, patchString: fmt.Sprintf(`{"spec":{"initContainers":[ - {"name":"c1-init", "resources":{"requests":{"memory":"%s"},"limits":{"memory":"%s"}}} - ]}}`, increasedMem, increasedMemLimit), + {"name":"c1-init", "resources":{"requests":{"memory":"%s"},"limits":{"memory":"%s"}}} + ]}}`, increasedMem, increasedMemLimit), expected: []e2epod.ResizableContainerInfo{ { Name: "c1", @@ -1394,13 +1440,13 @@ func doPodResizeErrorTests() { // Above tests are performed by doSheduletTests() and doPodResizeResourceQuotaTests() // in test/e2e/node/pod_resize.go -var _ = SIGDescribe("Pod InPlace Resize Container", feature.InPlacePodVerticalScaling, func() { +var _ = SIGDescribe("[InPlacePodResize] Pod InPlace Resize Container", feature.InPlacePodVerticalScaling, func() { f := framework.NewDefaultFramework("pod-resize-tests") ginkgo.BeforeEach(func(ctx context.Context) { - node, err := e2enode.GetRandomReadySchedulableNode(ctx, f.ClientSet) + _, err := e2enode.GetRandomReadySchedulableNode(ctx, f.ClientSet) framework.ExpectNoError(err) - if framework.NodeOSDistroIs("windows") || e2enode.IsARM64(node) { + if framework.NodeOSDistroIs("windows") { e2eskipper.Skipf("runtime does not support InPlacePodVerticalScaling -- skipping") } }) diff --git a/test/e2e/framework/pod/resize.go b/test/e2e/framework/pod/resize.go index 2edc5ed4c90..b077f62a9cd 100644 --- a/test/e2e/framework/pod/resize.go +++ b/test/e2e/framework/pod/resize.go @@ -23,6 +23,7 @@ import ( "fmt" "strconv" "strings" + "time" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" @@ -48,6 +49,7 @@ const ( Cgroupv2CPURequest string = "/sys/fs/cgroup/cpu.weight" CPUPeriod string = "100000" MinContainerRuntimeVersion string = "1.6.9" + MinimumGracePeriodSeconds int64 = 2 ) var ( @@ -167,6 +169,7 @@ func makeResizableContainer(tcInfo ResizableContainerInfo) v1.Container { func MakePodWithResizableContainers(ns, name, timeStamp string, tcInfo []ResizableContainerInfo) *v1.Pod { testInitContainers, testContainers := separateContainers(tcInfo) + var minGracePeriodSeconds int64 = MinimumGracePeriodSeconds pod := &v1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: name, @@ -176,10 +179,11 @@ func MakePodWithResizableContainers(ns, name, timeStamp string, tcInfo []Resizab }, }, Spec: v1.PodSpec{ - OS: &v1.PodOS{Name: v1.Linux}, - InitContainers: testInitContainers, - Containers: testContainers, - RestartPolicy: v1.RestartPolicyOnFailure, + OS: &v1.PodOS{Name: v1.Linux}, + InitContainers: testInitContainers, + Containers: testContainers, + RestartPolicy: v1.RestartPolicyOnFailure, + TerminationGracePeriodSeconds: &minGracePeriodSeconds, }, } return pod @@ -350,6 +354,7 @@ func VerifyPodContainersCgroupValues(ctx context.Context, f *framework.Framework } errs = append(errs, VerifyCgroupValue(f, pod, ci.Name, cgroupCPULimit, expectedCPULimitString)) errs = append(errs, VerifyCgroupValue(f, pod, ci.Name, cgroupCPURequest, strconv.FormatInt(expectedCPUShares, 10))) + //TODO(vinaykul,InPlacePodVerticalScaling): Verify oom_score_adj when runc adds support for updating it } } return utilerrors.NewAggregate(errs) @@ -420,6 +425,8 @@ func WaitForPodResizeActuation(ctx context.Context, f *framework.Framework, podC })), ) + // Wait 2x termination grace period to catch any restarts - expected or not + time.Sleep(time.Duration(*pod.Spec.TerminationGracePeriodSeconds*2) * time.Second) resizedPod, err := framework.GetObject(podClient.Get, pod.Name, metav1.GetOptions{})(ctx) framework.ExpectNoError(err, "failed to get resized pod") return resizedPod diff --git a/test/e2e/framework/pod/utils.go b/test/e2e/framework/pod/utils.go index 25188e7b9c0..824c1ada969 100644 --- a/test/e2e/framework/pod/utils.go +++ b/test/e2e/framework/pod/utils.go @@ -294,6 +294,25 @@ func VerifyCgroupValue(f *framework.Framework, pod *v1.Pod, cName, cgPath, expec return nil } +// VerifyOomScoreAdjValue verifies that oom_score_adj for pid 1 (pidof init/systemd -> app) +// has the expected value in specified container of the pod. It execs into the container, +// reads the oom_score_adj value from procfs, and compares it against the expected value. +func VerifyOomScoreAdjValue(f *framework.Framework, pod *v1.Pod, cName, expectedOomScoreAdj string) error { + cmd := fmt.Sprintf("cat /proc/1/oom_score_adj") + framework.Logf("Namespace %s Pod %s Container %s - looking for oom_score_adj value %s", + pod.Namespace, pod.Name, cName, expectedOomScoreAdj) + oomScoreAdj, _, err := ExecCommandInContainerWithFullOutput(f, pod.Name, cName, "/bin/sh", "-c", cmd) + if err != nil { + return fmt.Errorf("failed to find expected value %s for container app process", expectedOomScoreAdj) + } + oomScoreAdj = strings.Trim(oomScoreAdj, "\n") + if oomScoreAdj != expectedOomScoreAdj { + return fmt.Errorf("oom_score_adj value %s not equal to expected %s", oomScoreAdj, expectedOomScoreAdj) + } + fmt.Printf("VDBG: POD: %s EXPECTED_OOM_ADJ %s ACTUAL_OOM_ADJ %s\n", pod.Name, expectedOomScoreAdj, oomScoreAdj) + return nil +} + // IsPodOnCgroupv2Node checks whether the pod is running on cgroupv2 node. // TODO: Deduplicate this function with NPD cluster e2e test: // https://github.com/kubernetes/kubernetes/blob/2049360379bcc5d6467769cef112e6e492d3d2f0/test/e2e/node/node_problem_detector.go#L369 From 917c4b310bd1bca18134ff0c64cd47596ef7fcdf Mon Sep 17 00:00:00 2001 From: vinay kulkarni Date: Wed, 19 Mar 2025 13:26:01 +0000 Subject: [PATCH 2/8] Fix lint issues, use kuberuntime's minGracePeriod const, boost container restart wait period --- pkg/kubelet/kuberuntime/kuberuntime_manager.go | 2 ++ test/e2e/common/node/pod_resize.go | 2 +- test/e2e/framework/pod/resize.go | 16 +++++++++++----- test/e2e/framework/pod/utils.go | 2 +- 4 files changed, 15 insertions(+), 7 deletions(-) diff --git a/pkg/kubelet/kuberuntime/kuberuntime_manager.go b/pkg/kubelet/kuberuntime/kuberuntime_manager.go index 909e83e854b..4165b24af79 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_manager.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_manager.go @@ -77,6 +77,7 @@ const ( kubeRuntimeAPIVersion = "0.1.0" // A minimal shutdown window for avoiding unnecessary SIGKILLs minimumGracePeriodInSeconds = 2 + MinimumGracePeriodInSeconds = int64(minimumGracePeriodInSeconds) // The expiration time of version cache. versionCacheTTL = 60 * time.Second @@ -84,6 +85,7 @@ const ( identicalErrorDelay = 1 * time.Minute // OpenTelemetry instrumentation scope name instrumentationScope = "k8s.io/kubernetes/pkg/kubelet/kuberuntime" + ) var ( diff --git a/test/e2e/common/node/pod_resize.go b/test/e2e/common/node/pod_resize.go index 07264dbf792..69822033d7f 100644 --- a/test/e2e/common/node/pod_resize.go +++ b/test/e2e/common/node/pod_resize.go @@ -1440,7 +1440,7 @@ func doPodResizeErrorTests() { // Above tests are performed by doSheduletTests() and doPodResizeResourceQuotaTests() // in test/e2e/node/pod_resize.go -var _ = SIGDescribe("[InPlacePodResize] Pod InPlace Resize Container", feature.InPlacePodVerticalScaling, func() { +var _ = SIGDescribe("Pod InPlace Resize Container", feature.InPlacePodVerticalScaling, func() { f := framework.NewDefaultFramework("pod-resize-tests") ginkgo.BeforeEach(func(ctx context.Context) { diff --git a/test/e2e/framework/pod/resize.go b/test/e2e/framework/pod/resize.go index b077f62a9cd..00465bfb93a 100644 --- a/test/e2e/framework/pod/resize.go +++ b/test/e2e/framework/pod/resize.go @@ -31,6 +31,7 @@ import ( utilerrors "k8s.io/apimachinery/pkg/util/errors" helpers "k8s.io/component-helpers/resource" kubecm "k8s.io/kubernetes/pkg/kubelet/cm" + "k8s.io/kubernetes/pkg/kubelet/kuberuntime" "k8s.io/kubernetes/test/e2e/framework" imageutils "k8s.io/kubernetes/test/utils/image" @@ -49,7 +50,7 @@ const ( Cgroupv2CPURequest string = "/sys/fs/cgroup/cpu.weight" CPUPeriod string = "100000" MinContainerRuntimeVersion string = "1.6.9" - MinimumGracePeriodSeconds int64 = 2 + MinRestartWaitPeriod int = 10 ) var ( @@ -169,7 +170,7 @@ func makeResizableContainer(tcInfo ResizableContainerInfo) v1.Container { func MakePodWithResizableContainers(ns, name, timeStamp string, tcInfo []ResizableContainerInfo) *v1.Pod { testInitContainers, testContainers := separateContainers(tcInfo) - var minGracePeriodSeconds int64 = MinimumGracePeriodSeconds + minGracePeriodSeconds := kuberuntime.MinimumGracePeriodInSeconds pod := &v1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: name, @@ -354,7 +355,8 @@ func VerifyPodContainersCgroupValues(ctx context.Context, f *framework.Framework } errs = append(errs, VerifyCgroupValue(f, pod, ci.Name, cgroupCPULimit, expectedCPULimitString)) errs = append(errs, VerifyCgroupValue(f, pod, ci.Name, cgroupCPURequest, strconv.FormatInt(expectedCPUShares, 10))) - //TODO(vinaykul,InPlacePodVerticalScaling): Verify oom_score_adj when runc adds support for updating it + // TODO(vinaykul,InPlacePodVerticalScaling): Verify oom_score_adj when runc adds support for updating it + // See https://github.com/opencontainers/runc/pull/4669 } } return utilerrors.NewAggregate(errs) @@ -425,8 +427,12 @@ func WaitForPodResizeActuation(ctx context.Context, f *framework.Framework, podC })), ) - // Wait 2x termination grace period to catch any restarts - expected or not - time.Sleep(time.Duration(*pod.Spec.TerminationGracePeriodSeconds*2) * time.Second) + // Wait min(3 x termination grace period, MinRestartWaitPeriod) to catch any restarts - expected or not + containerRestartWaitPeriod := int(*pod.Spec.TerminationGracePeriodSeconds) * 3 + if containerRestartWaitPeriod < MinRestartWaitPeriod { + containerRestartWaitPeriod = MinRestartWaitPeriod + } + time.Sleep(time.Duration(containerRestartWaitPeriod) * time.Second) resizedPod, err := framework.GetObject(podClient.Get, pod.Name, metav1.GetOptions{})(ctx) framework.ExpectNoError(err, "failed to get resized pod") return resizedPod diff --git a/test/e2e/framework/pod/utils.go b/test/e2e/framework/pod/utils.go index 824c1ada969..1c4cb50d8de 100644 --- a/test/e2e/framework/pod/utils.go +++ b/test/e2e/framework/pod/utils.go @@ -298,7 +298,7 @@ func VerifyCgroupValue(f *framework.Framework, pod *v1.Pod, cName, cgPath, expec // has the expected value in specified container of the pod. It execs into the container, // reads the oom_score_adj value from procfs, and compares it against the expected value. func VerifyOomScoreAdjValue(f *framework.Framework, pod *v1.Pod, cName, expectedOomScoreAdj string) error { - cmd := fmt.Sprintf("cat /proc/1/oom_score_adj") + cmd := "cat /proc/1/oom_score_adj" framework.Logf("Namespace %s Pod %s Container %s - looking for oom_score_adj value %s", pod.Namespace, pod.Name, cName, expectedOomScoreAdj) oomScoreAdj, _, err := ExecCommandInContainerWithFullOutput(f, pod.Name, cName, "/bin/sh", "-c", cmd) From 951e33fdf94e2973264fcfe04a2abc96b3aead28 Mon Sep 17 00:00:00 2001 From: vinay kulkarni Date: Wed, 19 Mar 2025 15:13:44 +0000 Subject: [PATCH 3/8] Fix gofmt issues --- pkg/kubelet/kuberuntime/kuberuntime_manager.go | 1 - test/e2e/framework/pod/resize.go | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/pkg/kubelet/kuberuntime/kuberuntime_manager.go b/pkg/kubelet/kuberuntime/kuberuntime_manager.go index 4165b24af79..d44a05c1800 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_manager.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_manager.go @@ -85,7 +85,6 @@ const ( identicalErrorDelay = 1 * time.Minute // OpenTelemetry instrumentation scope name instrumentationScope = "k8s.io/kubernetes/pkg/kubelet/kuberuntime" - ) var ( diff --git a/test/e2e/framework/pod/resize.go b/test/e2e/framework/pod/resize.go index 00465bfb93a..4403bdba0e3 100644 --- a/test/e2e/framework/pod/resize.go +++ b/test/e2e/framework/pod/resize.go @@ -50,7 +50,7 @@ const ( Cgroupv2CPURequest string = "/sys/fs/cgroup/cpu.weight" CPUPeriod string = "100000" MinContainerRuntimeVersion string = "1.6.9" - MinRestartWaitPeriod int = 10 + MinRestartWaitPeriod int = 10 ) var ( From 7f4b9a52db28ffdf5138c37c5f4f8b83c9df4596 Mon Sep 17 00:00:00 2001 From: vinay kulkarni Date: Wed, 19 Mar 2025 19:11:21 +0000 Subject: [PATCH 4/8] Consider memory requests in determining if resize is in progress, set termination grace period to 0 --- pkg/kubelet/kubelet.go | 3 +-- pkg/kubelet/kuberuntime/kuberuntime_manager.go | 1 - test/e2e/framework/pod/resize.go | 4 ++-- test/e2e/framework/pod/utils.go | 1 - 4 files changed, 3 insertions(+), 6 deletions(-) diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index ced300f6432..d104a68f624 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -2989,7 +2989,6 @@ func (kl *Kubelet) handlePodResourcesResize(pod *v1.Pod, podStatus *kubecontaine // for any running containers. Specifically, the following differences are ignored: // - Non-resizable containers: non-restartable init containers, ephemeral containers // - Non-resizable resources: only CPU & memory are resizable -// - Non-actuated resources: memory requests are not actuated // - Non-running containers: they will be sized correctly when (re)started func (kl *Kubelet) isPodResizeInProgress(allocatedPod *v1.Pod, podStatus *kubecontainer.PodStatus) bool { return !podutil.VisitContainers(&allocatedPod.Spec, podutil.InitContainers|podutil.Containers, @@ -3007,9 +3006,9 @@ func (kl *Kubelet) isPodResizeInProgress(allocatedPod *v1.Pod, podStatus *kubeco actuatedResources, _ := kl.allocationManager.GetActuatedResources(allocatedPod.UID, allocatedContainer.Name) allocatedResources := allocatedContainer.Resources - // Memory requests are excluded since they don't need to be actuated. return allocatedResources.Requests[v1.ResourceCPU].Equal(actuatedResources.Requests[v1.ResourceCPU]) && allocatedResources.Limits[v1.ResourceCPU].Equal(actuatedResources.Limits[v1.ResourceCPU]) && + allocatedResources.Requests[v1.ResourceMemory].Equal(actuatedResources.Requests[v1.ResourceMemory]) && allocatedResources.Limits[v1.ResourceMemory].Equal(actuatedResources.Limits[v1.ResourceMemory]) }) } diff --git a/pkg/kubelet/kuberuntime/kuberuntime_manager.go b/pkg/kubelet/kuberuntime/kuberuntime_manager.go index d44a05c1800..909e83e854b 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_manager.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_manager.go @@ -77,7 +77,6 @@ const ( kubeRuntimeAPIVersion = "0.1.0" // A minimal shutdown window for avoiding unnecessary SIGKILLs minimumGracePeriodInSeconds = 2 - MinimumGracePeriodInSeconds = int64(minimumGracePeriodInSeconds) // The expiration time of version cache. versionCacheTTL = 60 * time.Second diff --git a/test/e2e/framework/pod/resize.go b/test/e2e/framework/pod/resize.go index 4403bdba0e3..1103be02a80 100644 --- a/test/e2e/framework/pod/resize.go +++ b/test/e2e/framework/pod/resize.go @@ -31,7 +31,6 @@ import ( utilerrors "k8s.io/apimachinery/pkg/util/errors" helpers "k8s.io/component-helpers/resource" kubecm "k8s.io/kubernetes/pkg/kubelet/cm" - "k8s.io/kubernetes/pkg/kubelet/kuberuntime" "k8s.io/kubernetes/test/e2e/framework" imageutils "k8s.io/kubernetes/test/utils/image" @@ -170,7 +169,7 @@ func makeResizableContainer(tcInfo ResizableContainerInfo) v1.Container { func MakePodWithResizableContainers(ns, name, timeStamp string, tcInfo []ResizableContainerInfo) *v1.Pod { testInitContainers, testContainers := separateContainers(tcInfo) - minGracePeriodSeconds := kuberuntime.MinimumGracePeriodInSeconds + minGracePeriodSeconds := int64(0) pod := &v1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: name, @@ -433,6 +432,7 @@ func WaitForPodResizeActuation(ctx context.Context, f *framework.Framework, podC containerRestartWaitPeriod = MinRestartWaitPeriod } time.Sleep(time.Duration(containerRestartWaitPeriod) * time.Second) + resizedPod, err := framework.GetObject(podClient.Get, pod.Name, metav1.GetOptions{})(ctx) framework.ExpectNoError(err, "failed to get resized pod") return resizedPod diff --git a/test/e2e/framework/pod/utils.go b/test/e2e/framework/pod/utils.go index 1c4cb50d8de..e4c122e0f7b 100644 --- a/test/e2e/framework/pod/utils.go +++ b/test/e2e/framework/pod/utils.go @@ -309,7 +309,6 @@ func VerifyOomScoreAdjValue(f *framework.Framework, pod *v1.Pod, cName, expected if oomScoreAdj != expectedOomScoreAdj { return fmt.Errorf("oom_score_adj value %s not equal to expected %s", oomScoreAdj, expectedOomScoreAdj) } - fmt.Printf("VDBG: POD: %s EXPECTED_OOM_ADJ %s ACTUAL_OOM_ADJ %s\n", pod.Name, expectedOomScoreAdj, oomScoreAdj) return nil } From 7fe7754e67f16c60a87fb031f0b3e2ac80d1f4b9 Mon Sep 17 00:00:00 2001 From: vinay kulkarni Date: Wed, 19 Mar 2025 21:27:32 +0000 Subject: [PATCH 5/8] Fix unit test, remove wait after resize --- pkg/kubelet/kubelet_test.go | 3 +-- test/e2e/framework/pod/resize.go | 9 --------- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/pkg/kubelet/kubelet_test.go b/pkg/kubelet/kubelet_test.go index c8e3500d6d6..2fd50bb425b 100644 --- a/pkg/kubelet/kubelet_test.go +++ b/pkg/kubelet/kubelet_test.go @@ -3857,8 +3857,7 @@ func TestIsPodResizeInProgress(t *testing.T) { actuated: &testResources{100, 200, 150, 200}, isRunning: true, }}, - // Memory requests aren't actuated and should be ignored. - expectHasResize: false, + expectHasResize: true, }, { name: "simple resized container/cpu+mem req", containers: []testContainer{{ diff --git a/test/e2e/framework/pod/resize.go b/test/e2e/framework/pod/resize.go index 1103be02a80..78067dc8171 100644 --- a/test/e2e/framework/pod/resize.go +++ b/test/e2e/framework/pod/resize.go @@ -23,7 +23,6 @@ import ( "fmt" "strconv" "strings" - "time" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" @@ -49,7 +48,6 @@ const ( Cgroupv2CPURequest string = "/sys/fs/cgroup/cpu.weight" CPUPeriod string = "100000" MinContainerRuntimeVersion string = "1.6.9" - MinRestartWaitPeriod int = 10 ) var ( @@ -426,13 +424,6 @@ func WaitForPodResizeActuation(ctx context.Context, f *framework.Framework, podC })), ) - // Wait min(3 x termination grace period, MinRestartWaitPeriod) to catch any restarts - expected or not - containerRestartWaitPeriod := int(*pod.Spec.TerminationGracePeriodSeconds) * 3 - if containerRestartWaitPeriod < MinRestartWaitPeriod { - containerRestartWaitPeriod = MinRestartWaitPeriod - } - time.Sleep(time.Duration(containerRestartWaitPeriod) * time.Second) - resizedPod, err := framework.GetObject(podClient.Get, pod.Name, metav1.GetOptions{})(ctx) framework.ExpectNoError(err, "failed to get resized pod") return resizedPod From ec1b493a088ef6b2675a3e4696d47c06189f52a6 Mon Sep 17 00:00:00 2001 From: vinay kulkarni Date: Thu, 20 Mar 2025 01:46:28 +0000 Subject: [PATCH 6/8] Populate status memory requests from actuated resources --- pkg/kubelet/kubelet_pods.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pkg/kubelet/kubelet_pods.go b/pkg/kubelet/kubelet_pods.go index adab3a58c34..07f43a68894 100644 --- a/pkg/kubelet/kubelet_pods.go +++ b/pkg/kubelet/kubelet_pods.go @@ -2105,6 +2105,13 @@ func (kl *Kubelet) convertToAPIContainerStatuses(pod *v1.Pod, podStatus *kubecon } else { preserveOldResourcesValue(v1.ResourceCPU, oldStatus.Resources.Requests, resources.Requests) } + // TODO(tallclair,vinaykul,InPlacePodVerticalScaling): Investigate defaulting to actuated resources instead of allocated resources above + if _, exists := resources.Requests[v1.ResourceMemory]; exists { + // Get memory requests from actuated resources + if actuatedResources, found := kl.allocationManager.GetActuatedResources(pod.UID, allocatedContainer.Name); found { + resources.Requests[v1.ResourceMemory] = actuatedResources.Requests.Memory().DeepCopy() + } + } } return resources From 1208f25b3f4a068235cd516937095ec4b1aa9a5c Mon Sep 17 00:00:00 2001 From: vinay kulkarni Date: Thu, 20 Mar 2025 05:57:41 +0000 Subject: [PATCH 7/8] Verify oom_score_adj for containers that have been restarted in pod resize e2e --- pkg/kubelet/kubelet_pods.go | 2 +- test/e2e/framework/pod/resize.go | 34 +++++++++++++++++++++++++++----- test/e2e/framework/pod/utils.go | 15 ++++++++++++++ 3 files changed, 45 insertions(+), 6 deletions(-) diff --git a/pkg/kubelet/kubelet_pods.go b/pkg/kubelet/kubelet_pods.go index 07f43a68894..68da1ce1c48 100644 --- a/pkg/kubelet/kubelet_pods.go +++ b/pkg/kubelet/kubelet_pods.go @@ -2109,7 +2109,7 @@ func (kl *Kubelet) convertToAPIContainerStatuses(pod *v1.Pod, podStatus *kubecon if _, exists := resources.Requests[v1.ResourceMemory]; exists { // Get memory requests from actuated resources if actuatedResources, found := kl.allocationManager.GetActuatedResources(pod.UID, allocatedContainer.Name); found { - resources.Requests[v1.ResourceMemory] = actuatedResources.Requests.Memory().DeepCopy() + resources.Requests[v1.ResourceMemory] = *actuatedResources.Requests.Memory() } } } diff --git a/test/e2e/framework/pod/resize.go b/test/e2e/framework/pod/resize.go index 78067dc8171..5a7afbded02 100644 --- a/test/e2e/framework/pod/resize.go +++ b/test/e2e/framework/pod/resize.go @@ -30,6 +30,7 @@ import ( utilerrors "k8s.io/apimachinery/pkg/util/errors" helpers "k8s.io/component-helpers/resource" kubecm "k8s.io/kubernetes/pkg/kubelet/cm" + kubeqos "k8s.io/kubernetes/pkg/kubelet/qos" "k8s.io/kubernetes/test/e2e/framework" imageutils "k8s.io/kubernetes/test/utils/image" @@ -359,22 +360,22 @@ func VerifyPodContainersCgroupValues(ctx context.Context, f *framework.Framework return utilerrors.NewAggregate(errs) } -func verifyPodRestarts(pod *v1.Pod, wantInfo []ResizableContainerInfo) error { +func verifyPodRestarts(f *framework.Framework, pod *v1.Pod, wantInfo []ResizableContainerInfo) error { ginkgo.GinkgoHelper() initCtrStatuses, ctrStatuses := separateContainerStatuses(wantInfo) errs := []error{} - if err := verifyContainerRestarts(pod.Status.InitContainerStatuses, initCtrStatuses); err != nil { + if err := verifyContainerRestarts(f, pod, pod.Status.InitContainerStatuses, initCtrStatuses); err != nil { errs = append(errs, err) } - if err := verifyContainerRestarts(pod.Status.ContainerStatuses, ctrStatuses); err != nil { + if err := verifyContainerRestarts(f, pod, pod.Status.ContainerStatuses, ctrStatuses); err != nil { errs = append(errs, err) } return utilerrors.NewAggregate(errs) } -func verifyContainerRestarts(gotStatuses []v1.ContainerStatus, wantStatuses []v1.ContainerStatus) error { +func verifyContainerRestarts(f *framework.Framework, pod *v1.Pod, gotStatuses []v1.ContainerStatus, wantStatuses []v1.ContainerStatus) error { ginkgo.GinkgoHelper() if len(gotStatuses) != len(wantStatuses) { @@ -386,11 +387,34 @@ func verifyContainerRestarts(gotStatuses []v1.ContainerStatus, wantStatuses []v1 for i, gotStatus := range gotStatuses { if gotStatus.RestartCount != wantStatuses[i].RestartCount { errs = append(errs, fmt.Errorf("unexpected number of restarts for container %s: got %d, want %d", gotStatus.Name, gotStatus.RestartCount, wantStatuses[i].RestartCount)) + } else if gotStatus.RestartCount > 0 { + err := verifyOomScoreAdj(f, pod, gotStatus.Name) + if err != nil { + errs = append(errs, err) + } } } return utilerrors.NewAggregate(errs) } +func verifyOomScoreAdj(f *framework.Framework, pod *v1.Pod, containerName string) error { + container := FindContainerInPod(pod, containerName) + if container == nil { + return fmt.Errorf("failed to find container %s in pod %s", containerName, pod.Name) + } + + node, err := f.ClientSet.CoreV1().Nodes().Get(context.Background(), pod.Spec.NodeName, metav1.GetOptions{}) + if err != nil { + return err + } + + nodeMemoryCapacity := node.Status.Capacity[v1.ResourceMemory] + oomScoreAdj := kubeqos.GetContainerOOMScoreAdjust(pod, container, int64(nodeMemoryCapacity.Value())) + expectedOomScoreAdj := strconv.FormatInt(int64(oomScoreAdj), 10) + + return VerifyOomScoreAdjValue(f, pod, container.Name, expectedOomScoreAdj) +} + func WaitForPodResizeActuation(ctx context.Context, f *framework.Framework, podClient *PodClient, pod *v1.Pod, expectedContainers []ResizableContainerInfo) *v1.Pod { ginkgo.GinkgoHelper() // Wait for resize to complete. @@ -440,7 +464,7 @@ func ExpectPodResized(ctx context.Context, f *framework.Framework, resizedPod *v if resourceErrs := VerifyPodStatusResources(resizedPod, expectedContainers); resourceErrs != nil { errs = append(errs, fmt.Errorf("container status resources don't match expected: %w", formatErrors(resourceErrs))) } - if restartErrs := verifyPodRestarts(resizedPod, expectedContainers); restartErrs != nil { + if restartErrs := verifyPodRestarts(f, resizedPod, expectedContainers); restartErrs != nil { errs = append(errs, fmt.Errorf("container restart counts don't match expected: %w", formatErrors(restartErrs))) } diff --git a/test/e2e/framework/pod/utils.go b/test/e2e/framework/pod/utils.go index e4c122e0f7b..fd080e1598f 100644 --- a/test/e2e/framework/pod/utils.go +++ b/test/e2e/framework/pod/utils.go @@ -256,6 +256,21 @@ func FindPodConditionByType(podStatus *v1.PodStatus, conditionType v1.PodConditi return nil } +// FindContainerByName finds the v1.Container in a pod by its name in the provided pod +func FindContainerInPod(pod *v1.Pod, containerName string) *v1.Container { + for _, container := range pod.Spec.InitContainers { + if container.Name == containerName { + return &container + } + } + for _, container := range pod.Spec.Containers { + if container.Name == containerName { + return &container + } + } + return nil +} + // FindContainerStatusInPod finds a container status by its name in the provided pod func FindContainerStatusInPod(pod *v1.Pod, containerName string) *v1.ContainerStatus { for _, containerStatus := range pod.Status.InitContainerStatuses { From d62e766dad190262bbd44a86619a0371172a1b6a Mon Sep 17 00:00:00 2001 From: vinay kulkarni Date: Thu, 20 Mar 2025 16:13:45 +0000 Subject: [PATCH 8/8] Fix function comment --- test/e2e/framework/pod/utils.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/e2e/framework/pod/utils.go b/test/e2e/framework/pod/utils.go index fd080e1598f..5658c9cb6c6 100644 --- a/test/e2e/framework/pod/utils.go +++ b/test/e2e/framework/pod/utils.go @@ -256,7 +256,7 @@ func FindPodConditionByType(podStatus *v1.PodStatus, conditionType v1.PodConditi return nil } -// FindContainerByName finds the v1.Container in a pod by its name in the provided pod +// FindContainerInPod finds the container in a pod by its name func FindContainerInPod(pod *v1.Pod, containerName string) *v1.Container { for _, container := range pod.Spec.InitContainers { if container.Name == containerName {