From 1208f25b3f4a068235cd516937095ec4b1aa9a5c Mon Sep 17 00:00:00 2001 From: vinay kulkarni Date: Thu, 20 Mar 2025 05:57:41 +0000 Subject: [PATCH] Verify oom_score_adj for containers that have been restarted in pod resize e2e --- pkg/kubelet/kubelet_pods.go | 2 +- test/e2e/framework/pod/resize.go | 34 +++++++++++++++++++++++++++----- test/e2e/framework/pod/utils.go | 15 ++++++++++++++ 3 files changed, 45 insertions(+), 6 deletions(-) diff --git a/pkg/kubelet/kubelet_pods.go b/pkg/kubelet/kubelet_pods.go index 07f43a68894..68da1ce1c48 100644 --- a/pkg/kubelet/kubelet_pods.go +++ b/pkg/kubelet/kubelet_pods.go @@ -2109,7 +2109,7 @@ func (kl *Kubelet) convertToAPIContainerStatuses(pod *v1.Pod, podStatus *kubecon if _, exists := resources.Requests[v1.ResourceMemory]; exists { // Get memory requests from actuated resources if actuatedResources, found := kl.allocationManager.GetActuatedResources(pod.UID, allocatedContainer.Name); found { - resources.Requests[v1.ResourceMemory] = actuatedResources.Requests.Memory().DeepCopy() + resources.Requests[v1.ResourceMemory] = *actuatedResources.Requests.Memory() } } } diff --git a/test/e2e/framework/pod/resize.go b/test/e2e/framework/pod/resize.go index 78067dc8171..5a7afbded02 100644 --- a/test/e2e/framework/pod/resize.go +++ b/test/e2e/framework/pod/resize.go @@ -30,6 +30,7 @@ import ( utilerrors "k8s.io/apimachinery/pkg/util/errors" helpers "k8s.io/component-helpers/resource" kubecm "k8s.io/kubernetes/pkg/kubelet/cm" + kubeqos "k8s.io/kubernetes/pkg/kubelet/qos" "k8s.io/kubernetes/test/e2e/framework" imageutils "k8s.io/kubernetes/test/utils/image" @@ -359,22 +360,22 @@ func VerifyPodContainersCgroupValues(ctx context.Context, f *framework.Framework return utilerrors.NewAggregate(errs) } -func verifyPodRestarts(pod *v1.Pod, wantInfo []ResizableContainerInfo) error { +func verifyPodRestarts(f *framework.Framework, pod *v1.Pod, wantInfo []ResizableContainerInfo) error { ginkgo.GinkgoHelper() initCtrStatuses, ctrStatuses := separateContainerStatuses(wantInfo) errs := []error{} - if err := verifyContainerRestarts(pod.Status.InitContainerStatuses, initCtrStatuses); err != nil { + if err := verifyContainerRestarts(f, pod, pod.Status.InitContainerStatuses, initCtrStatuses); err != nil { errs = append(errs, err) } - if err := verifyContainerRestarts(pod.Status.ContainerStatuses, ctrStatuses); err != nil { + if err := verifyContainerRestarts(f, pod, pod.Status.ContainerStatuses, ctrStatuses); err != nil { errs = append(errs, err) } return utilerrors.NewAggregate(errs) } -func verifyContainerRestarts(gotStatuses []v1.ContainerStatus, wantStatuses []v1.ContainerStatus) error { +func verifyContainerRestarts(f *framework.Framework, pod *v1.Pod, gotStatuses []v1.ContainerStatus, wantStatuses []v1.ContainerStatus) error { ginkgo.GinkgoHelper() if len(gotStatuses) != len(wantStatuses) { @@ -386,11 +387,34 @@ func verifyContainerRestarts(gotStatuses []v1.ContainerStatus, wantStatuses []v1 for i, gotStatus := range gotStatuses { if gotStatus.RestartCount != wantStatuses[i].RestartCount { errs = append(errs, fmt.Errorf("unexpected number of restarts for container %s: got %d, want %d", gotStatus.Name, gotStatus.RestartCount, wantStatuses[i].RestartCount)) + } else if gotStatus.RestartCount > 0 { + err := verifyOomScoreAdj(f, pod, gotStatus.Name) + if err != nil { + errs = append(errs, err) + } } } return utilerrors.NewAggregate(errs) } +func verifyOomScoreAdj(f *framework.Framework, pod *v1.Pod, containerName string) error { + container := FindContainerInPod(pod, containerName) + if container == nil { + return fmt.Errorf("failed to find container %s in pod %s", containerName, pod.Name) + } + + node, err := f.ClientSet.CoreV1().Nodes().Get(context.Background(), pod.Spec.NodeName, metav1.GetOptions{}) + if err != nil { + return err + } + + nodeMemoryCapacity := node.Status.Capacity[v1.ResourceMemory] + oomScoreAdj := kubeqos.GetContainerOOMScoreAdjust(pod, container, int64(nodeMemoryCapacity.Value())) + expectedOomScoreAdj := strconv.FormatInt(int64(oomScoreAdj), 10) + + return VerifyOomScoreAdjValue(f, pod, container.Name, expectedOomScoreAdj) +} + func WaitForPodResizeActuation(ctx context.Context, f *framework.Framework, podClient *PodClient, pod *v1.Pod, expectedContainers []ResizableContainerInfo) *v1.Pod { ginkgo.GinkgoHelper() // Wait for resize to complete. @@ -440,7 +464,7 @@ func ExpectPodResized(ctx context.Context, f *framework.Framework, resizedPod *v if resourceErrs := VerifyPodStatusResources(resizedPod, expectedContainers); resourceErrs != nil { errs = append(errs, fmt.Errorf("container status resources don't match expected: %w", formatErrors(resourceErrs))) } - if restartErrs := verifyPodRestarts(resizedPod, expectedContainers); restartErrs != nil { + if restartErrs := verifyPodRestarts(f, resizedPod, expectedContainers); restartErrs != nil { errs = append(errs, fmt.Errorf("container restart counts don't match expected: %w", formatErrors(restartErrs))) } diff --git a/test/e2e/framework/pod/utils.go b/test/e2e/framework/pod/utils.go index e4c122e0f7b..fd080e1598f 100644 --- a/test/e2e/framework/pod/utils.go +++ b/test/e2e/framework/pod/utils.go @@ -256,6 +256,21 @@ func FindPodConditionByType(podStatus *v1.PodStatus, conditionType v1.PodConditi return nil } +// FindContainerByName finds the v1.Container in a pod by its name in the provided pod +func FindContainerInPod(pod *v1.Pod, containerName string) *v1.Container { + for _, container := range pod.Spec.InitContainers { + if container.Name == containerName { + return &container + } + } + for _, container := range pod.Spec.Containers { + if container.Name == containerName { + return &container + } + } + return nil +} + // FindContainerStatusInPod finds a container status by its name in the provided pod func FindContainerStatusInPod(pod *v1.Pod, containerName string) *v1.ContainerStatus { for _, containerStatus := range pod.Status.InitContainerStatuses {