diff --git a/test/e2e/common/node/pod_level_resources.go b/test/e2e/common/node/pod_level_resources.go index cf5c1f5b5e7..3dd58ff051c 100644 --- a/test/e2e/common/node/pod_level_resources.go +++ b/test/e2e/common/node/pod_level_resources.go @@ -29,12 +29,17 @@ import ( "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" utilerrors "k8s.io/apimachinery/pkg/util/errors" + v1resource "k8s.io/kubernetes/pkg/api/v1/resource" + v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" kubecm "k8s.io/kubernetes/pkg/kubelet/cm" "k8s.io/kubernetes/test/e2e/feature" "k8s.io/kubernetes/test/e2e/framework" e2enode "k8s.io/kubernetes/test/e2e/framework/node" e2epod "k8s.io/kubernetes/test/e2e/framework/pod" e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" + + utils "k8s.io/kubernetes/test/utils" + imageutils "k8s.io/kubernetes/test/utils/image" admissionapi "k8s.io/pod-security-admission/api" ) @@ -43,9 +48,14 @@ const ( cgroupv2CPUWeight string = "cpu.weight" cgroupv2CPULimit string = "cpu.max" cgroupv2MemLimit string = "memory.max" - cgroupFsPath string = "/sys/fs/cgroup" - CPUPeriod string = "100000" - mountPath string = "/sysfscgroup" + + cgroupv2HugeTLBPrefix string = "hugetlb" + cgroupv2HugeTLBRsvd string = "rsvd" + + cgroupFsPath string = "/sys/fs/cgroup" + mountPath string = "/sysfscgroup" + + CPUPeriod string = "100000" ) var ( @@ -69,6 +79,7 @@ var _ = SIGDescribe("Pod Level Resources", framework.WithSerial(), feature.PodLe e2eskipper.Skipf("not supported on cgroupv1 -- skipping") } }) + podLevelResourcesTests(f) }) @@ -104,7 +115,7 @@ func isCgroupv2Node(f *framework.Framework, ctx context.Context) bool { func makeObjectMetadata(name, namespace string) metav1.ObjectMeta { return metav1.ObjectMeta{ - Name: "testpod", Namespace: namespace, + Name: name, Namespace: namespace, Labels: map[string]string{"time": strconv.Itoa(time.Now().Nanosecond())}, } } @@ -113,11 +124,16 @@ type containerInfo struct { Name string Resources *resourceInfo } + type resourceInfo struct { - CPUReq string - CPULim string - MemReq string - MemLim string + CPUReq string + CPULim string + MemReq string + MemLim string + HugePagesReq2Mi string + HugePagesLim2Mi string + HugePagesReq1Gi string + HugePagesLim1Gi string } func makeContainer(info containerInfo) v1.Container { @@ -140,7 +156,7 @@ func makeContainer(info containerInfo) v1.Container { func getResourceRequirements(info *resourceInfo) v1.ResourceRequirements { var res v1.ResourceRequirements if info != nil { - if info.CPUReq != "" || info.MemReq != "" { + if info.CPUReq != "" || info.MemReq != "" || info.HugePagesReq2Mi != "" || info.HugePagesReq1Gi != "" { res.Requests = make(v1.ResourceList) } if info.CPUReq != "" { @@ -149,8 +165,14 @@ func getResourceRequirements(info *resourceInfo) v1.ResourceRequirements { if info.MemReq != "" { res.Requests[v1.ResourceMemory] = resource.MustParse(info.MemReq) } + if info.HugePagesReq2Mi != "" { + res.Requests[v1.ResourceHugePagesPrefix+"2Mi"] = resource.MustParse(info.HugePagesReq2Mi) + } + if info.HugePagesReq1Gi != "" { + res.Requests[v1.ResourceHugePagesPrefix+"1Gi"] = resource.MustParse(info.HugePagesReq1Gi) + } - if info.CPULim != "" || info.MemLim != "" { + if info.CPULim != "" || info.MemLim != "" || info.HugePagesLim2Mi != "" || info.HugePagesLim1Gi != "" { res.Limits = make(v1.ResourceList) } if info.CPULim != "" { @@ -159,6 +181,12 @@ func getResourceRequirements(info *resourceInfo) v1.ResourceRequirements { if info.MemLim != "" { res.Limits[v1.ResourceMemory] = resource.MustParse(info.MemLim) } + if info.HugePagesLim2Mi != "" { + res.Limits[v1.ResourceHugePagesPrefix+"2Mi"] = resource.MustParse(info.HugePagesLim2Mi) + } + if info.HugePagesLim1Gi != "" { + res.Limits[v1.ResourceHugePagesPrefix+"1Gi"] = resource.MustParse(info.HugePagesLim1Gi) + } } return res } @@ -211,7 +239,7 @@ func verifyQoS(gotPod v1.Pod, expectedQoS v1.PodQOSClass) { } // TODO(ndixita): dedup the conversion logic in pod resize test and move to helpers/utils. -func verifyPodCgroups(ctx context.Context, f *framework.Framework, pod *v1.Pod, info *resourceInfo) error { +func verifyPodCgroups(f *framework.Framework, pod *v1.Pod, info *resourceInfo) error { ginkgo.GinkgoHelper() cmd := fmt.Sprintf("find %s -name '*%s*'", mountPath, strings.ReplaceAll(string(pod.UID), "-", "_")) framework.Logf("Namespace %s Pod %s - looking for Pod cgroup directory path: %q", f.Namespace, pod.Name, cmd) @@ -247,6 +275,70 @@ func verifyPodCgroups(ctx context.Context, f *framework.Framework, pod *v1.Pod, if err != nil { errs = append(errs, fmt.Errorf("failed to verify memory limit cgroup value: %w", err)) } + + // Verify cgroup limits for all the hugepage sizes in the pod + for resourceName, resourceAmount := range expectedResources.Limits { + if !v1resource.IsHugePageResourceName(resourceName) { + continue + } + + pageSize, err := v1helper.HugePageSizeFromResourceName(resourceName) + if err != nil { + errs = append(errs, fmt.Errorf("encountered error while obtaining hugepage size: %w", err)) + } + + sizeString, err := v1helper.HugePageUnitSizeFromByteSize(pageSize.Value()) + if err != nil { + errs = append(errs, fmt.Errorf("encountered error while obtaining hugepage unit size: %w", err)) + } + + hugepageCgroupv2Limits := []string{ + fmt.Sprintf("%s.%s.max", cgroupv2HugeTLBPrefix, sizeString), + fmt.Sprintf("%s.%s.%s.max", cgroupv2HugeTLBPrefix, sizeString, cgroupv2HugeTLBRsvd), + } + expectedHugepageLim := strconv.FormatInt(resourceAmount.Value(), 10) + + for _, hugepageCgroupv2Limit := range hugepageCgroupv2Limits { + hugepageLimCgPath := fmt.Sprintf("%s/%s", podCgPath, hugepageCgroupv2Limit) + err = e2epod.VerifyCgroupValue(f, pod, pod.Spec.Containers[0].Name, hugepageLimCgPath, expectedHugepageLim) + if err != nil { + errs = append(errs, fmt.Errorf("failed to verify hugepage limit cgroup value: %w, path: %s", err, hugepageLimCgPath)) + } + } + } + + return utilerrors.NewAggregate(errs) +} + +func verifyContainersCgroupLimits(f *framework.Framework, pod *v1.Pod) error { + var errs []error + for _, container := range pod.Spec.Containers { + if pod.Spec.Resources == nil { + continue + } + + if pod.Spec.Resources.Limits.Memory() != nil && container.Resources.Limits.Memory() == nil { + expectedCgroupMemLimit := strconv.FormatInt(pod.Spec.Resources.Limits.Memory().Value(), 10) + err := e2epod.VerifyCgroupValue(f, pod, container.Name, fmt.Sprintf("%s/%s", cgroupFsPath, cgroupv2MemLimit), expectedCgroupMemLimit) + if err != nil { + errs = append(errs, fmt.Errorf("failed to verify memory limit cgroup value: %w", err)) + } + } + + if pod.Spec.Resources.Limits.Cpu() != nil && container.Resources.Limits.Cpu() == nil { + cpuQuota := kubecm.MilliCPUToQuota(pod.Spec.Resources.Limits.Cpu().MilliValue(), kubecm.QuotaPeriod) + expectedCPULimit := strconv.FormatInt(cpuQuota, 10) + expectedCPULimit = fmt.Sprintf("%s %s", expectedCPULimit, CPUPeriod) + err := e2epod.VerifyCgroupValue(f, pod, container.Name, fmt.Sprintf("%s/%s", cgroupFsPath, cgroupv2CPULimit), expectedCPULimit) + if err != nil { + errs = append(errs, fmt.Errorf("failed to verify cpu limit cgroup value: %w", err)) + } + } + + // TODO(KevinTMtz) - Check for all hugepages for the pod, for this is + // required to enabled the Containerd Cgroup value, because if not, HugeTLB + // cgroup values will be just set to max + } return utilerrors.NewAggregate(errs) } @@ -257,7 +349,7 @@ func podLevelResourcesTests(f *framework.Framework) { // and limits for the pod. If pod-level resource specifications // are specified, totalPodResources is equal to pod-level resources. // Otherwise, it is calculated by aggregating resource requests and - // limits from all containers within the pod.. + // limits from all containers within the pod. totalPodResources *resourceInfo } @@ -266,6 +358,7 @@ func podLevelResourcesTests(f *framework.Framework) { podResources *resourceInfo containers []containerInfo expected expectedPodConfig + hugepages map[string]int } tests := []testCase{ @@ -349,10 +442,108 @@ func podLevelResourcesTests(f *framework.Framework) { totalPodResources: &resourceInfo{CPUReq: "50m", CPULim: "100m", MemReq: "50Mi", MemLim: "100Mi"}, }, }, + { + name: "Guaranteed QoS pod hugepages, no container resources, single page size", + podResources: &resourceInfo{CPUReq: "100m", CPULim: "100m", MemReq: "100Mi", MemLim: "100Mi", HugePagesLim2Mi: "10Mi"}, + containers: []containerInfo{{Name: "c1"}, {Name: "c2"}}, + expected: expectedPodConfig{ + qos: v1.PodQOSGuaranteed, + totalPodResources: &resourceInfo{CPUReq: "100m", CPULim: "100m", MemReq: "100Mi", MemLim: "100Mi", HugePagesReq2Mi: "10Mi", HugePagesLim2Mi: "10Mi"}, + }, + hugepages: map[string]int{ + v1.ResourceHugePagesPrefix + "2Mi": 5, + }, + }, + { + name: "Burstable QoS pod hugepages, container resources, single page size", + podResources: &resourceInfo{CPUReq: "100m", CPULim: "100m", MemReq: "50Mi", MemLim: "100Mi", HugePagesLim2Mi: "10Mi"}, + containers: []containerInfo{{Name: "c1", Resources: &resourceInfo{CPUReq: "20m", CPULim: "50m", HugePagesLim2Mi: "4Mi"}}, {Name: "c2"}}, + expected: expectedPodConfig{ + qos: v1.PodQOSBurstable, + totalPodResources: &resourceInfo{CPUReq: "100m", CPULim: "100m", MemReq: "50Mi", MemLim: "100Mi", HugePagesReq2Mi: "10Mi", HugePagesLim2Mi: "10Mi"}, + }, + hugepages: map[string]int{ + v1.ResourceHugePagesPrefix + "2Mi": 5, + }, + }, + { + name: "Burstable QoS pod hugepages, container resources, single page size, pod level does not specify hugepages", + podResources: &resourceInfo{CPUReq: "100m", CPULim: "100m", MemReq: "50Mi", MemLim: "100Mi"}, + containers: []containerInfo{{Name: "c1", Resources: &resourceInfo{CPUReq: "20m", CPULim: "50m", HugePagesLim2Mi: "4Mi"}}, {Name: "c2"}}, + expected: expectedPodConfig{ + qos: v1.PodQOSBurstable, + totalPodResources: &resourceInfo{CPUReq: "100m", CPULim: "100m", MemReq: "50Mi", MemLim: "100Mi", HugePagesReq2Mi: "4Mi", HugePagesLim2Mi: "4Mi"}, + }, + hugepages: map[string]int{ + v1.ResourceHugePagesPrefix + "2Mi": 2, + }, + }, + { + name: "Guaranteed QoS pod hugepages, no container resources, multiple page size", + podResources: &resourceInfo{CPUReq: "100m", CPULim: "100m", MemReq: "100Mi", MemLim: "100Mi", HugePagesLim2Mi: "10Mi", HugePagesLim1Gi: "1Gi"}, + containers: []containerInfo{{Name: "c1"}, {Name: "c2"}}, + expected: expectedPodConfig{ + qos: v1.PodQOSGuaranteed, + totalPodResources: &resourceInfo{CPUReq: "100m", CPULim: "100m", MemReq: "100Mi", MemLim: "100Mi", HugePagesReq2Mi: "10Mi", HugePagesLim2Mi: "10Mi", HugePagesReq1Gi: "1Gi", HugePagesLim1Gi: "1Gi"}, + }, + hugepages: map[string]int{ + v1.ResourceHugePagesPrefix + "2Mi": 5, + v1.ResourceHugePagesPrefix + "1Gi": 1, + }, + }, + { + name: "Burstable QoS pod hugepages, container resources, multiple page size", + podResources: &resourceInfo{CPUReq: "100m", CPULim: "100m", MemReq: "50Mi", MemLim: "100Mi", HugePagesLim2Mi: "10Mi", HugePagesLim1Gi: "1Gi"}, + containers: []containerInfo{{Name: "c1", Resources: &resourceInfo{CPUReq: "20m", CPULim: "50m", HugePagesLim2Mi: "4Mi", HugePagesLim1Gi: "1Gi"}}, {Name: "c2"}}, + expected: expectedPodConfig{ + qos: v1.PodQOSBurstable, + totalPodResources: &resourceInfo{CPUReq: "100m", CPULim: "100m", MemReq: "50Mi", MemLim: "100Mi", HugePagesReq2Mi: "10Mi", HugePagesLim2Mi: "10Mi", HugePagesReq1Gi: "1Gi", HugePagesLim1Gi: "1Gi"}, + }, + hugepages: map[string]int{ + v1.ResourceHugePagesPrefix + "2Mi": 5, + v1.ResourceHugePagesPrefix + "1Gi": 1, + }, + }, + { + name: "Burstable QoS pod hugepages, container resources, multiple page size, pod level does not specify hugepages", + podResources: &resourceInfo{CPUReq: "100m", CPULim: "100m", MemReq: "50Mi", MemLim: "100Mi"}, + containers: []containerInfo{{Name: "c1", Resources: &resourceInfo{CPUReq: "20m", CPULim: "50m", HugePagesLim2Mi: "4Mi", HugePagesLim1Gi: "1Gi"}}, {Name: "c2"}}, + expected: expectedPodConfig{ + qos: v1.PodQOSBurstable, + totalPodResources: &resourceInfo{CPUReq: "100m", CPULim: "100m", MemReq: "50Mi", MemLim: "100Mi", HugePagesReq2Mi: "4Mi", HugePagesLim2Mi: "4Mi", HugePagesReq1Gi: "1Gi", HugePagesLim1Gi: "1Gi"}, + }, + hugepages: map[string]int{ + v1.ResourceHugePagesPrefix + "2Mi": 2, + v1.ResourceHugePagesPrefix + "1Gi": 1, + }, + }, + { + name: "Burstable QoS pod hugepages, container resources, different page size between pod and container level", + podResources: &resourceInfo{CPUReq: "100m", CPULim: "100m", MemReq: "50Mi", MemLim: "100Mi", HugePagesLim2Mi: "10Mi"}, + containers: []containerInfo{{Name: "c1", Resources: &resourceInfo{CPUReq: "20m", CPULim: "50m", HugePagesLim1Gi: "1Gi"}}, {Name: "c2"}}, + expected: expectedPodConfig{ + qos: v1.PodQOSBurstable, + totalPodResources: &resourceInfo{CPUReq: "100m", CPULim: "100m", MemReq: "50Mi", MemLim: "100Mi", HugePagesReq2Mi: "10Mi", HugePagesLim2Mi: "10Mi", HugePagesReq1Gi: "1Gi", HugePagesLim1Gi: "1Gi"}, + }, + hugepages: map[string]int{ + v1.ResourceHugePagesPrefix + "2Mi": 5, + v1.ResourceHugePagesPrefix + "1Gi": 1, + }, + }, } for _, tc := range tests { ginkgo.It(tc.name, func(ctx context.Context) { + // Pre-allocate hugepages in the node + if tc.hugepages != nil { + utils.SetHugepages(ctx, tc.hugepages) + + ginkgo.By("restarting kubelet to pick up pre-allocated hugepages") + utils.RestartKubelet(ctx, false) + + utils.WaitForHugepages(ctx, f, tc.hugepages) + } + podMetadata := makeObjectMetadata("testpod", f.Namespace.Name) testPod := makePod(&podMetadata, tc.podResources, tc.containers) @@ -367,7 +558,7 @@ func podLevelResourcesTests(f *framework.Framework) { verifyQoS(*pod, tc.expected.qos) ginkgo.By("verifying pod cgroup values") - err := verifyPodCgroups(ctx, f, pod, tc.expected.totalPodResources) + err := verifyPodCgroups(f, pod, tc.expected.totalPodResources) framework.ExpectNoError(err, "failed to verify pod's cgroup values: %v", err) ginkgo.By("verifying containers cgroup limits are same as pod container's cgroup limits") @@ -377,32 +568,16 @@ func podLevelResourcesTests(f *framework.Framework) { ginkgo.By("deleting pods") delErr := e2epod.DeletePodWithWait(ctx, f.ClientSet, pod) framework.ExpectNoError(delErr, "failed to delete pod %s", delErr) + + // Release pre-allocated hugepages + if tc.hugepages != nil { + utils.ReleaseHugepages(ctx, tc.hugepages) + + ginkgo.By("restarting kubelet to pick up pre-allocated hugepages") + utils.RestartKubelet(ctx, true) + + utils.WaitForHugepages(ctx, f, tc.hugepages) + } }) } } - -func verifyContainersCgroupLimits(f *framework.Framework, pod *v1.Pod) error { - var errs []error - for _, container := range pod.Spec.Containers { - if pod.Spec.Resources != nil && pod.Spec.Resources.Limits.Memory() != nil && - container.Resources.Limits.Memory() == nil { - expectedCgroupMemLimit := strconv.FormatInt(pod.Spec.Resources.Limits.Memory().Value(), 10) - err := e2epod.VerifyCgroupValue(f, pod, container.Name, fmt.Sprintf("%s/%s", cgroupFsPath, cgroupv2MemLimit), expectedCgroupMemLimit) - if err != nil { - errs = append(errs, fmt.Errorf("failed to verify memory limit cgroup value: %w", err)) - } - } - - if pod.Spec.Resources != nil && pod.Spec.Resources.Limits.Cpu() != nil && - container.Resources.Limits.Cpu() == nil { - cpuQuota := kubecm.MilliCPUToQuota(pod.Spec.Resources.Limits.Cpu().MilliValue(), kubecm.QuotaPeriod) - expectedCPULimit := strconv.FormatInt(cpuQuota, 10) - expectedCPULimit = fmt.Sprintf("%s %s", expectedCPULimit, CPUPeriod) - err := e2epod.VerifyCgroupValue(f, pod, container.Name, fmt.Sprintf("%s/%s", cgroupFsPath, cgroupv2CPULimit), expectedCPULimit) - if err != nil { - errs = append(errs, fmt.Errorf("failed to verify cpu limit cgroup value: %w", err)) - } - } - } - return utilerrors.NewAggregate(errs) -} diff --git a/test/e2e_node/hugepages_test.go b/test/e2e_node/hugepages_test.go index 4330f026cd1..bc2bd6474ed 100644 --- a/test/e2e_node/hugepages_test.go +++ b/test/e2e_node/hugepages_test.go @@ -19,10 +19,6 @@ package e2enode import ( "context" "fmt" - "os" - "os/exec" - "strconv" - "strings" "time" "github.com/onsi/ginkgo/v2" @@ -37,7 +33,7 @@ import ( "k8s.io/kubernetes/test/e2e/feature" "k8s.io/kubernetes/test/e2e/framework" e2epod "k8s.io/kubernetes/test/e2e/framework/pod" - e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" + testutils "k8s.io/kubernetes/test/utils" admissionapi "k8s.io/pod-security-admission/api" ) @@ -119,64 +115,6 @@ func makePodToVerifyHugePages(baseName string, hugePagesLimit resource.Quantity, return pod } -// configureHugePages attempts to allocate hugepages of the specified size -func configureHugePages(hugepagesSize int, hugepagesCount int, numaNodeID *int) error { - // Compact memory to make bigger contiguous blocks of memory available - // before allocating huge pages. - // https://www.kernel.org/doc/Documentation/sysctl/vm.txt - if _, err := os.Stat("/proc/sys/vm/compact_memory"); err == nil { - if err := exec.Command("/bin/sh", "-c", "echo 1 > /proc/sys/vm/compact_memory").Run(); err != nil { - return err - } - } - - // e.g. hugepages/hugepages-2048kB/nr_hugepages - hugepagesSuffix := fmt.Sprintf("hugepages/hugepages-%dkB/%s", hugepagesSize, hugepagesCapacityFile) - - // e.g. /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages - hugepagesFile := fmt.Sprintf("/sys/kernel/mm/%s", hugepagesSuffix) - if numaNodeID != nil { - // e.g. /sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages - hugepagesFile = fmt.Sprintf("/sys/devices/system/node/node%d/%s", *numaNodeID, hugepagesSuffix) - } - - // Reserve number of hugepages - // e.g. /bin/sh -c "echo 5 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages" - command := fmt.Sprintf("echo %d > %s", hugepagesCount, hugepagesFile) - if err := exec.Command("/bin/sh", "-c", command).Run(); err != nil { - return err - } - - // verify that the number of hugepages was updated - // e.g. /bin/sh -c "cat /sys/kernel/mm/hugepages/hugepages-2048kB/vm.nr_hugepages" - command = fmt.Sprintf("cat %s", hugepagesFile) - outData, err := exec.Command("/bin/sh", "-c", command).Output() - if err != nil { - return err - } - - numHugePages, err := strconv.Atoi(strings.TrimSpace(string(outData))) - if err != nil { - return err - } - - framework.Logf("Hugepages total is set to %v", numHugePages) - if numHugePages == hugepagesCount { - return nil - } - - return fmt.Errorf("expected hugepages %v, but found %v", hugepagesCount, numHugePages) -} - -// isHugePageAvailable returns true if hugepages of the specified size is available on the host -func isHugePageAvailable(hugepagesSize int) bool { - path := fmt.Sprintf("%s-%dkB/%s", hugepagesDirPrefix, hugepagesSize, hugepagesCapacityFile) - if _, err := os.Stat(path); err != nil { - return false - } - return true -} - func getHugepagesTestPod(f *framework.Framework, limits v1.ResourceList, mounts []v1.VolumeMount, volumes []v1.Volume) *v1.Pod { return &v1.Pod{ ObjectMeta: metav1.ObjectMeta{ @@ -262,66 +200,6 @@ var _ = SIGDescribe("HugePages", framework.WithSerial(), feature.HugePages, func hugepages map[string]int ) - setHugepages := func(ctx context.Context) { - for hugepagesResource, count := range hugepages { - size := resourceToSize[hugepagesResource] - ginkgo.By(fmt.Sprintf("Verifying hugepages %d are supported", size)) - if !isHugePageAvailable(size) { - e2eskipper.Skipf("skipping test because hugepages of size %d not supported", size) - return - } - - ginkgo.By(fmt.Sprintf("Configuring the host to reserve %d of pre-allocated hugepages of size %d", count, size)) - gomega.Eventually(ctx, func() error { - if err := configureHugePages(size, count, nil); err != nil { - return err - } - return nil - }, 30*time.Second, framework.Poll).Should(gomega.BeNil()) - } - } - - waitForHugepages := func(ctx context.Context) { - ginkgo.By("Waiting for hugepages resource to become available on the local node") - gomega.Eventually(ctx, func(ctx context.Context) error { - node, err := f.ClientSet.CoreV1().Nodes().Get(ctx, framework.TestContext.NodeName, metav1.GetOptions{}) - if err != nil { - return err - } - - for hugepagesResource, count := range hugepages { - capacity, ok := node.Status.Capacity[v1.ResourceName(hugepagesResource)] - if !ok { - return fmt.Errorf("the node does not have the resource %s", hugepagesResource) - } - - size, succeed := capacity.AsInt64() - if !succeed { - return fmt.Errorf("failed to convert quantity to int64") - } - - expectedSize := count * resourceToSize[hugepagesResource] * 1024 - if size != int64(expectedSize) { - return fmt.Errorf("the actual size %d is different from the expected one %d", size, expectedSize) - } - } - return nil - }, time.Minute, framework.Poll).Should(gomega.BeNil()) - } - - releaseHugepages := func(ctx context.Context) { - ginkgo.By("Releasing hugepages") - gomega.Eventually(ctx, func() error { - for hugepagesResource := range hugepages { - command := fmt.Sprintf("echo 0 > %s-%dkB/%s", hugepagesDirPrefix, resourceToSize[hugepagesResource], hugepagesCapacityFile) - if err := exec.Command("/bin/sh", "-c", command).Run(); err != nil { - return err - } - } - return nil - }, 30*time.Second, framework.Poll).Should(gomega.BeNil()) - } - runHugePagesTests := func() { ginkgo.It("should set correct hugetlb mount and limit under the container cgroup", func(ctx context.Context) { ginkgo.By("getting mounts for the test pod") @@ -349,12 +227,12 @@ var _ = SIGDescribe("HugePages", framework.WithSerial(), feature.HugePages, func // setup ginkgo.JustBeforeEach(func(ctx context.Context) { - setHugepages(ctx) + testutils.SetHugepages(ctx, hugepages) ginkgo.By("restarting kubelet to pick up pre-allocated hugepages") restartKubelet(ctx, true) - waitForHugepages(ctx) + testutils.WaitForHugepages(ctx, f, hugepages) pod := getHugepagesTestPod(f, limits, mounts, volumes) @@ -367,12 +245,12 @@ var _ = SIGDescribe("HugePages", framework.WithSerial(), feature.HugePages, func ginkgo.By(fmt.Sprintf("deleting test pod %s", testpod.Name)) e2epod.NewPodClient(f).DeleteSync(ctx, testpod.Name, metav1.DeleteOptions{}, f.Timeouts.PodDelete) - releaseHugepages(ctx) + testutils.ReleaseHugepages(ctx, hugepages) ginkgo.By("restarting kubelet to pick up pre-allocated hugepages") restartKubelet(ctx, true) - waitForHugepages(ctx) + testutils.WaitForHugepages(ctx, f, hugepages) }) ginkgo.Context("with the resources requests that contain only one hugepages resource ", func() { diff --git a/test/e2e_node/memory_manager_test.go b/test/e2e_node/memory_manager_test.go index 1ff4688d59c..15764bb6925 100644 --- a/test/e2e_node/memory_manager_test.go +++ b/test/e2e_node/memory_manager_test.go @@ -42,6 +42,7 @@ import ( "k8s.io/kubernetes/test/e2e/feature" "k8s.io/kubernetes/test/e2e/framework" e2epod "k8s.io/kubernetes/test/e2e/framework/pod" + testutils "k8s.io/kubernetes/test/utils" admissionapi "k8s.io/pod-security-admission/api" "k8s.io/utils/cpuset" "k8s.io/utils/pointer" @@ -314,7 +315,7 @@ var _ = SIGDescribe("Memory Manager", framework.WithDisruptive(), framework.With } if is2MiHugepagesSupported == nil { - is2MiHugepagesSupported = pointer.BoolPtr(isHugePageAvailable(hugepagesSize2M)) + is2MiHugepagesSupported = pointer.BoolPtr(testutils.IsHugePageAvailable(hugepagesSize2M)) } if len(allNUMANodes) == 0 { @@ -325,7 +326,7 @@ var _ = SIGDescribe("Memory Manager", framework.WithDisruptive(), framework.With if *is2MiHugepagesSupported { ginkgo.By("Configuring hugepages") gomega.Eventually(ctx, func() error { - return configureHugePages(hugepagesSize2M, hugepages2MiCount, pointer.IntPtr(0)) + return testutils.ConfigureHugePages(hugepagesSize2M, hugepages2MiCount, pointer.IntPtr(0)) }, 30*time.Second, framework.Poll).Should(gomega.BeNil()) } }) @@ -358,7 +359,7 @@ var _ = SIGDescribe("Memory Manager", framework.WithDisruptive(), framework.With ginkgo.By("Releasing allocated hugepages") gomega.Eventually(ctx, func() error { // configure hugepages on the NUMA node 0 to avoid hugepages split across NUMA nodes - return configureHugePages(hugepagesSize2M, 0, pointer.IntPtr(0)) + return testutils.ConfigureHugePages(hugepagesSize2M, 0, pointer.IntPtr(0)) }, 90*time.Second, 15*time.Second).ShouldNot(gomega.HaveOccurred(), "failed to release hugepages") } }) diff --git a/test/utils/node.go b/test/utils/node.go index 3483088aa72..bcd4bab3c72 100644 --- a/test/utils/node.go +++ b/test/utils/node.go @@ -16,7 +16,39 @@ limitations under the License. package utils -import v1 "k8s.io/api/core/v1" +import ( + "context" + "fmt" + "os" + "os/exec" + "regexp" + "strconv" + "strings" + "time" + + "github.com/onsi/ginkgo/v2" + "github.com/onsi/gomega" + + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "k8s.io/kubernetes/test/e2e/framework" +) + +const ( + hugepagesCapacityFile = "nr_hugepages" + hugepagesDirPrefix = "/sys/kernel/mm/hugepages/hugepages" + + hugepagesSize2M = 2048 + hugepagesSize1G = 1048576 +) + +var ( + resourceToSize = map[string]int{ + v1.ResourceHugePagesPrefix + "2Mi": hugepagesSize2M, + v1.ResourceHugePagesPrefix + "1Gi": hugepagesSize1G, + } +) // GetNodeCondition extracts the provided condition from the given status and returns that. // Returns nil and -1 if the condition is not present, and the index of the located condition. @@ -31,3 +63,158 @@ func GetNodeCondition(status *v1.NodeStatus, conditionType v1.NodeConditionType) } return -1, nil } + +func SetHugepages(ctx context.Context, hugepages map[string]int) { + for hugepagesResource, count := range hugepages { + size := resourceToSize[hugepagesResource] + ginkgo.By(fmt.Sprintf("Verifying hugepages %d are supported", size)) + if !IsHugePageAvailable(size) { + skipf("skipping test because hugepages of size %d not supported", size) + return + } + + ginkgo.By(fmt.Sprintf("Configuring the host to reserve %d of pre-allocated hugepages of size %d", count, size)) + gomega.Eventually(ctx, func() error { + if err := ConfigureHugePages(size, count, nil); err != nil { + return err + } + return nil + }, 30*time.Second, framework.Poll).Should(gomega.BeNil()) + } +} + +func IsHugePageAvailable(size int) bool { + // e.g. /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages + hugepagesFile := fmt.Sprintf("/sys/kernel/mm/hugepages/hugepages-%dkB/nr_hugepages", size) + if _, err := os.Stat(hugepagesFile); err != nil { + framework.Logf("Hugepages file %s not found: %v", hugepagesFile, err) + return false + } + return true +} + +// configureHugePages attempts to allocate hugepages of the specified size +func ConfigureHugePages(hugepagesSize int, hugepagesCount int, numaNodeID *int) error { + // Compact memory to make bigger contiguous blocks of memory available + // before allocating huge pages. + // https://www.kernel.org/doc/Documentation/sysctl/vm.txt + if _, err := os.Stat("/proc/sys/vm/compact_memory"); err == nil { + if err := exec.Command("/bin/sh", "-c", "echo 1 > /proc/sys/vm/compact_memory").Run(); err != nil { + return err + } + } + + // e.g. hugepages/hugepages-2048kB/nr_hugepages + hugepagesSuffix := fmt.Sprintf("hugepages/hugepages-%dkB/%s", hugepagesSize, hugepagesCapacityFile) + + // e.g. /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages + hugepagesFile := fmt.Sprintf("/sys/kernel/mm/%s", hugepagesSuffix) + if numaNodeID != nil { + // e.g. /sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages + hugepagesFile = fmt.Sprintf("/sys/devices/system/node/node%d/%s", *numaNodeID, hugepagesSuffix) + } + + // Reserve number of hugepages + // e.g. /bin/sh -c "echo 5 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages" + command := fmt.Sprintf("echo %d > %s", hugepagesCount, hugepagesFile) + if err := exec.Command("/bin/sh", "-c", command).Run(); err != nil { + return err + } + + // verify that the number of hugepages was updated + // e.g. /bin/sh -c "cat /sys/kernel/mm/hugepages/hugepages-2048kB/vm.nr_hugepages" + command = fmt.Sprintf("cat %s", hugepagesFile) + outData, err := exec.Command("/bin/sh", "-c", command).Output() + if err != nil { + return err + } + + numHugePages, err := strconv.Atoi(strings.TrimSpace(string(outData))) + if err != nil { + return err + } + + framework.Logf("Hugepages total is set to %v", numHugePages) + if numHugePages == hugepagesCount { + return nil + } + + return fmt.Errorf("expected hugepages %v, but found %v", hugepagesCount, numHugePages) +} + +// TODO(KevinTMtz) - Deduplicate from test/e2e_node/util.go:restartKubelet +func RestartKubelet(ctx context.Context, running bool) { + kubeletServiceName := FindKubeletServiceName(running) + // reset the kubelet service start-limit-hit + stdout, err := exec.CommandContext(ctx, "sudo", "systemctl", "reset-failed", kubeletServiceName).CombinedOutput() + framework.ExpectNoError(err, "Failed to reset kubelet start-limit-hit with systemctl: %v, %s", err, string(stdout)) + + stdout, err = exec.CommandContext(ctx, "sudo", "systemctl", "restart", kubeletServiceName).CombinedOutput() + framework.ExpectNoError(err, "Failed to restart kubelet with systemctl: %v, %s", err, string(stdout)) +} + +func FindKubeletServiceName(running bool) string { + cmdLine := []string{ + "systemctl", "list-units", "*kubelet*", + } + if running { + cmdLine = append(cmdLine, "--state=running") + } + stdout, err := exec.Command("sudo", cmdLine...).CombinedOutput() + framework.ExpectNoError(err) + regex := regexp.MustCompile("(kubelet-\\w+)") + matches := regex.FindStringSubmatch(string(stdout)) + gomega.Expect(matches).ToNot(gomega.BeEmpty(), "Found more than one kubelet service running: %q", stdout) + kubeletServiceName := matches[0] + framework.Logf("Get running kubelet with systemctl: %v, %v", string(stdout), kubeletServiceName) + return kubeletServiceName +} + +func WaitForHugepages(ctx context.Context, f *framework.Framework, hugepages map[string]int) { + ginkgo.By("Waiting for hugepages resource to become available on the local node") + gomega.Eventually(ctx, func(ctx context.Context) error { + node, err := f.ClientSet.CoreV1().Nodes().Get(ctx, framework.TestContext.NodeName, metav1.GetOptions{}) + if err != nil { + return err + } + + for hugepagesResource, count := range hugepages { + capacity, ok := node.Status.Capacity[v1.ResourceName(hugepagesResource)] + if !ok { + return fmt.Errorf("the node does not have the resource %s", hugepagesResource) + } + + size, succeed := capacity.AsInt64() + if !succeed { + return fmt.Errorf("failed to convert quantity to int64") + } + + expectedSize := count * resourceToSize[hugepagesResource] * 1024 + if size != int64(expectedSize) { + return fmt.Errorf("the actual size %d is different from the expected one %d", size, expectedSize) + } + } + return nil + }, time.Minute, framework.Poll).Should(gomega.BeNil()) +} + +func ReleaseHugepages(ctx context.Context, hugepages map[string]int) { + ginkgo.By("Releasing hugepages") + gomega.Eventually(ctx, func() error { + for hugepagesResource := range hugepages { + command := fmt.Sprintf("echo 0 > %s-%dkB/%s", hugepagesDirPrefix, resourceToSize[hugepagesResource], hugepagesCapacityFile) + if err := exec.Command("/bin/sh", "-c", command).Run(); err != nil { + return err + } + } + return nil + }, 30*time.Second, framework.Poll).Should(gomega.BeNil()) +} + +// TODO(KevinTMtz) - Deduplicate from test/e2e/framework/skipper/skipper.go:Skipf +func skipf(format string, args ...any) { + msg := fmt.Sprintf(format, args...) + ginkgo.Skip(msg, 2) + + panic("unreachable") +}