mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-19 01:40:13 +00:00
All code must use the context from Ginkgo when doing API calls or polling for a change, otherwise the code would not return immediately when the test gets aborted.
492 lines
16 KiB
Go
492 lines
16 KiB
Go
/*
|
|
Copyright 2017 The Kubernetes Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package e2enode
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"os"
|
|
"os/exec"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/onsi/ginkgo/v2"
|
|
"github.com/onsi/gomega"
|
|
|
|
v1 "k8s.io/api/core/v1"
|
|
"k8s.io/apimachinery/pkg/api/resource"
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
"k8s.io/apimachinery/pkg/types"
|
|
"k8s.io/apimachinery/pkg/util/uuid"
|
|
"k8s.io/kubernetes/pkg/kubelet/cm"
|
|
"k8s.io/kubernetes/test/e2e/framework"
|
|
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
|
|
e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
|
|
admissionapi "k8s.io/pod-security-admission/api"
|
|
)
|
|
|
|
const (
|
|
hugepagesSize2M = 2048
|
|
hugepagesSize1G = 1048576
|
|
hugepagesDirPrefix = "/sys/kernel/mm/hugepages/hugepages"
|
|
hugepagesCapacityFile = "nr_hugepages"
|
|
hugepagesResourceName2Mi = "hugepages-2Mi"
|
|
hugepagesResourceName1Gi = "hugepages-1Gi"
|
|
hugepagesCgroup2MB = "hugetlb.2MB"
|
|
hugepagesCgroup1GB = "hugetlb.1GB"
|
|
mediumHugepages = "HugePages"
|
|
mediumHugepages2Mi = "HugePages-2Mi"
|
|
mediumHugepages1Gi = "HugePages-1Gi"
|
|
)
|
|
|
|
var (
|
|
resourceToSize = map[string]int{
|
|
hugepagesResourceName2Mi: hugepagesSize2M,
|
|
hugepagesResourceName1Gi: hugepagesSize1G,
|
|
}
|
|
resourceToCgroup = map[string]string{
|
|
hugepagesResourceName2Mi: hugepagesCgroup2MB,
|
|
hugepagesResourceName1Gi: hugepagesCgroup1GB,
|
|
}
|
|
)
|
|
|
|
// makePodToVerifyHugePages returns a pod that verifies specified cgroup with hugetlb
|
|
func makePodToVerifyHugePages(baseName string, hugePagesLimit resource.Quantity, hugepagesCgroup string) *v1.Pod {
|
|
// convert the cgroup name to its literal form
|
|
cgroupName := cm.NewCgroupName(cm.RootCgroupName, defaultNodeAllocatableCgroup, baseName)
|
|
cgroupFsName := ""
|
|
if framework.TestContext.KubeletConfig.CgroupDriver == "systemd" {
|
|
cgroupFsName = cgroupName.ToSystemd()
|
|
} else {
|
|
cgroupFsName = cgroupName.ToCgroupfs()
|
|
}
|
|
|
|
hugetlbLimitFile := ""
|
|
// this command takes the expected value and compares it against the actual value for the pod cgroup hugetlb.2MB.<LIMIT>
|
|
if IsCgroup2UnifiedMode() {
|
|
hugetlbLimitFile = fmt.Sprintf("/tmp/%s/%s.max", cgroupFsName, hugepagesCgroup)
|
|
} else {
|
|
hugetlbLimitFile = fmt.Sprintf("/tmp/hugetlb/%s/%s.limit_in_bytes", cgroupFsName, hugepagesCgroup)
|
|
}
|
|
|
|
command := fmt.Sprintf("expected=%v; actual=$(cat %v); if [ \"$expected\" -ne \"$actual\" ]; then exit 1; fi; ", hugePagesLimit.Value(), hugetlbLimitFile)
|
|
framework.Logf("Pod to run command: %v", command)
|
|
pod := &v1.Pod{
|
|
ObjectMeta: metav1.ObjectMeta{
|
|
Name: "pod" + string(uuid.NewUUID()),
|
|
},
|
|
Spec: v1.PodSpec{
|
|
RestartPolicy: v1.RestartPolicyNever,
|
|
Containers: []v1.Container{
|
|
{
|
|
Image: busyboxImage,
|
|
Name: "container" + string(uuid.NewUUID()),
|
|
Command: []string{"sh", "-c", command},
|
|
VolumeMounts: []v1.VolumeMount{
|
|
{
|
|
Name: "sysfscgroup",
|
|
MountPath: "/tmp",
|
|
},
|
|
},
|
|
},
|
|
},
|
|
Volumes: []v1.Volume{
|
|
{
|
|
Name: "sysfscgroup",
|
|
VolumeSource: v1.VolumeSource{
|
|
HostPath: &v1.HostPathVolumeSource{Path: "/sys/fs/cgroup"},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
}
|
|
return pod
|
|
}
|
|
|
|
// configureHugePages attempts to allocate hugepages of the specified size
|
|
func configureHugePages(hugepagesSize int, hugepagesCount int, numaNodeID *int) error {
|
|
// Compact memory to make bigger contiguous blocks of memory available
|
|
// before allocating huge pages.
|
|
// https://www.kernel.org/doc/Documentation/sysctl/vm.txt
|
|
if _, err := os.Stat("/proc/sys/vm/compact_memory"); err == nil {
|
|
if err := exec.Command("/bin/sh", "-c", "echo 1 > /proc/sys/vm/compact_memory").Run(); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
// e.g. hugepages/hugepages-2048kB/nr_hugepages
|
|
hugepagesSuffix := fmt.Sprintf("hugepages/hugepages-%dkB/%s", hugepagesSize, hugepagesCapacityFile)
|
|
|
|
// e.g. /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
|
|
hugepagesFile := fmt.Sprintf("/sys/kernel/mm/%s", hugepagesSuffix)
|
|
if numaNodeID != nil {
|
|
// e.g. /sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages
|
|
hugepagesFile = fmt.Sprintf("/sys/devices/system/node/node%d/%s", *numaNodeID, hugepagesSuffix)
|
|
}
|
|
|
|
// Reserve number of hugepages
|
|
// e.g. /bin/sh -c "echo 5 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages"
|
|
command := fmt.Sprintf("echo %d > %s", hugepagesCount, hugepagesFile)
|
|
if err := exec.Command("/bin/sh", "-c", command).Run(); err != nil {
|
|
return err
|
|
}
|
|
|
|
// verify that the number of hugepages was updated
|
|
// e.g. /bin/sh -c "cat /sys/kernel/mm/hugepages/hugepages-2048kB/vm.nr_hugepages"
|
|
command = fmt.Sprintf("cat %s", hugepagesFile)
|
|
outData, err := exec.Command("/bin/sh", "-c", command).Output()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
numHugePages, err := strconv.Atoi(strings.TrimSpace(string(outData)))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
framework.Logf("Hugepages total is set to %v", numHugePages)
|
|
if numHugePages == hugepagesCount {
|
|
return nil
|
|
}
|
|
|
|
return fmt.Errorf("expected hugepages %v, but found %v", hugepagesCount, numHugePages)
|
|
}
|
|
|
|
// isHugePageAvailable returns true if hugepages of the specified size is available on the host
|
|
func isHugePageAvailable(hugepagesSize int) bool {
|
|
path := fmt.Sprintf("%s-%dkB/%s", hugepagesDirPrefix, hugepagesSize, hugepagesCapacityFile)
|
|
if _, err := os.Stat(path); err != nil {
|
|
return false
|
|
}
|
|
return true
|
|
}
|
|
|
|
func getHugepagesTestPod(f *framework.Framework, limits v1.ResourceList, mounts []v1.VolumeMount, volumes []v1.Volume) *v1.Pod {
|
|
return &v1.Pod{
|
|
ObjectMeta: metav1.ObjectMeta{
|
|
GenerateName: "hugepages-",
|
|
Namespace: f.Namespace.Name,
|
|
},
|
|
Spec: v1.PodSpec{
|
|
Containers: []v1.Container{
|
|
{
|
|
Name: "container" + string(uuid.NewUUID()),
|
|
Image: busyboxImage,
|
|
Resources: v1.ResourceRequirements{
|
|
Limits: limits,
|
|
},
|
|
Command: []string{"sleep", "3600"},
|
|
VolumeMounts: mounts,
|
|
},
|
|
},
|
|
Volumes: volumes,
|
|
},
|
|
}
|
|
}
|
|
|
|
// Serial because the test updates kubelet configuration.
|
|
var _ = SIGDescribe("HugePages [Serial] [Feature:HugePages][NodeSpecialFeature:HugePages]", func() {
|
|
f := framework.NewDefaultFramework("hugepages-test")
|
|
f.NamespacePodSecurityEnforceLevel = admissionapi.LevelPrivileged
|
|
|
|
ginkgo.It("should remove resources for huge page sizes no longer supported", func(ctx context.Context) {
|
|
ginkgo.By("mimicking support for 9Mi of 3Mi huge page memory by patching the node status")
|
|
patch := []byte(`[{"op": "add", "path": "/status/capacity/hugepages-3Mi", "value": "9Mi"}, {"op": "add", "path": "/status/allocatable/hugepages-3Mi", "value": "9Mi"}]`)
|
|
result := f.ClientSet.CoreV1().RESTClient().Patch(types.JSONPatchType).Resource("nodes").Name(framework.TestContext.NodeName).SubResource("status").Body(patch).Do(ctx)
|
|
framework.ExpectNoError(result.Error(), "while patching")
|
|
|
|
node, err := f.ClientSet.CoreV1().Nodes().Get(ctx, framework.TestContext.NodeName, metav1.GetOptions{})
|
|
framework.ExpectNoError(err, "while getting node status")
|
|
|
|
ginkgo.By("Verifying that the node now supports huge pages with size 3Mi")
|
|
value, ok := node.Status.Capacity["hugepages-3Mi"]
|
|
framework.ExpectEqual(ok, true, "capacity should contain resource hugepages-3Mi")
|
|
framework.ExpectEqual(value.String(), "9Mi", "huge pages with size 3Mi should be supported")
|
|
|
|
ginkgo.By("restarting the node and verifying that huge pages with size 3Mi are not supported")
|
|
restartKubelet(true)
|
|
|
|
ginkgo.By("verifying that the hugepages-3Mi resource no longer is present")
|
|
gomega.Eventually(ctx, func() bool {
|
|
node, err = f.ClientSet.CoreV1().Nodes().Get(ctx, framework.TestContext.NodeName, metav1.GetOptions{})
|
|
framework.ExpectNoError(err, "while getting node status")
|
|
_, isPresent := node.Status.Capacity["hugepages-3Mi"]
|
|
return isPresent
|
|
}, 30*time.Second, framework.Poll).Should(gomega.Equal(false))
|
|
})
|
|
|
|
ginkgo.It("should add resources for new huge page sizes on kubelet restart", func(ctx context.Context) {
|
|
ginkgo.By("Stopping kubelet")
|
|
startKubelet := stopKubelet()
|
|
ginkgo.By(`Patching away support for hugepage resource "hugepages-2Mi"`)
|
|
patch := []byte(`[{"op": "remove", "path": "/status/capacity/hugepages-2Mi"}, {"op": "remove", "path": "/status/allocatable/hugepages-2Mi"}]`)
|
|
result := f.ClientSet.CoreV1().RESTClient().Patch(types.JSONPatchType).Resource("nodes").Name(framework.TestContext.NodeName).SubResource("status").Body(patch).Do(ctx)
|
|
framework.ExpectNoError(result.Error(), "while patching")
|
|
|
|
ginkgo.By("Starting kubelet again")
|
|
startKubelet()
|
|
|
|
ginkgo.By("verifying that the hugepages-2Mi resource is present")
|
|
gomega.Eventually(ctx, func() bool {
|
|
node, err := f.ClientSet.CoreV1().Nodes().Get(ctx, framework.TestContext.NodeName, metav1.GetOptions{})
|
|
framework.ExpectNoError(err, "while getting node status")
|
|
_, isPresent := node.Status.Capacity["hugepages-2Mi"]
|
|
return isPresent
|
|
}, 30*time.Second, framework.Poll).Should(gomega.Equal(true))
|
|
})
|
|
|
|
ginkgo.When("start the pod", func() {
|
|
var (
|
|
testpod *v1.Pod
|
|
limits v1.ResourceList
|
|
mounts []v1.VolumeMount
|
|
volumes []v1.Volume
|
|
hugepages map[string]int
|
|
)
|
|
|
|
setHugepages := func(ctx context.Context) {
|
|
for hugepagesResource, count := range hugepages {
|
|
size := resourceToSize[hugepagesResource]
|
|
ginkgo.By(fmt.Sprintf("Verifying hugepages %d are supported", size))
|
|
if !isHugePageAvailable(size) {
|
|
e2eskipper.Skipf("skipping test because hugepages of size %d not supported", size)
|
|
return
|
|
}
|
|
|
|
ginkgo.By(fmt.Sprintf("Configuring the host to reserve %d of pre-allocated hugepages of size %d", count, size))
|
|
gomega.Eventually(ctx, func() error {
|
|
if err := configureHugePages(size, count, nil); err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}, 30*time.Second, framework.Poll).Should(gomega.BeNil())
|
|
}
|
|
}
|
|
|
|
waitForHugepages := func(ctx context.Context) {
|
|
ginkgo.By("Waiting for hugepages resource to become available on the local node")
|
|
gomega.Eventually(ctx, func(ctx context.Context) error {
|
|
node, err := f.ClientSet.CoreV1().Nodes().Get(ctx, framework.TestContext.NodeName, metav1.GetOptions{})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
for hugepagesResource, count := range hugepages {
|
|
capacity, ok := node.Status.Capacity[v1.ResourceName(hugepagesResource)]
|
|
if !ok {
|
|
return fmt.Errorf("the node does not have the resource %s", hugepagesResource)
|
|
}
|
|
|
|
size, succeed := capacity.AsInt64()
|
|
if !succeed {
|
|
return fmt.Errorf("failed to convert quantity to int64")
|
|
}
|
|
|
|
expectedSize := count * resourceToSize[hugepagesResource] * 1024
|
|
if size != int64(expectedSize) {
|
|
return fmt.Errorf("the actual size %d is different from the expected one %d", size, expectedSize)
|
|
}
|
|
}
|
|
return nil
|
|
}, time.Minute, framework.Poll).Should(gomega.BeNil())
|
|
}
|
|
|
|
releaseHugepages := func(ctx context.Context) {
|
|
ginkgo.By("Releasing hugepages")
|
|
gomega.Eventually(ctx, func() error {
|
|
for hugepagesResource := range hugepages {
|
|
command := fmt.Sprintf("echo 0 > %s-%dkB/%s", hugepagesDirPrefix, resourceToSize[hugepagesResource], hugepagesCapacityFile)
|
|
if err := exec.Command("/bin/sh", "-c", command).Run(); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}, 30*time.Second, framework.Poll).Should(gomega.BeNil())
|
|
}
|
|
|
|
runHugePagesTests := func() {
|
|
ginkgo.It("should set correct hugetlb mount and limit under the container cgroup", func(ctx context.Context) {
|
|
ginkgo.By("getting mounts for the test pod")
|
|
command := []string{"mount"}
|
|
out := e2epod.ExecCommandInContainer(f, testpod.Name, testpod.Spec.Containers[0].Name, command...)
|
|
|
|
for _, mount := range mounts {
|
|
ginkgo.By(fmt.Sprintf("checking that the hugetlb mount %s exists under the container", mount.MountPath))
|
|
gomega.Expect(out).To(gomega.ContainSubstring(mount.MountPath))
|
|
}
|
|
|
|
for resourceName := range hugepages {
|
|
verifyPod := makePodToVerifyHugePages(
|
|
"pod"+string(testpod.UID),
|
|
testpod.Spec.Containers[0].Resources.Limits[v1.ResourceName(resourceName)],
|
|
resourceToCgroup[resourceName],
|
|
)
|
|
ginkgo.By("checking if the expected hugetlb settings were applied")
|
|
e2epod.NewPodClient(f).Create(ctx, verifyPod)
|
|
err := e2epod.WaitForPodSuccessInNamespace(ctx, f.ClientSet, verifyPod.Name, f.Namespace.Name)
|
|
framework.ExpectNoError(err)
|
|
}
|
|
})
|
|
}
|
|
|
|
// setup
|
|
ginkgo.JustBeforeEach(func(ctx context.Context) {
|
|
setHugepages(ctx)
|
|
|
|
ginkgo.By("restarting kubelet to pick up pre-allocated hugepages")
|
|
restartKubelet(true)
|
|
|
|
waitForHugepages(ctx)
|
|
|
|
pod := getHugepagesTestPod(f, limits, mounts, volumes)
|
|
|
|
ginkgo.By("by running a test pod that requests hugepages")
|
|
testpod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
|
|
})
|
|
|
|
// we should use JustAfterEach because framework will teardown the client under the AfterEach method
|
|
ginkgo.JustAfterEach(func(ctx context.Context) {
|
|
ginkgo.By(fmt.Sprintf("deleting test pod %s", testpod.Name))
|
|
e2epod.NewPodClient(f).DeleteSync(ctx, testpod.Name, metav1.DeleteOptions{}, 2*time.Minute)
|
|
|
|
releaseHugepages(ctx)
|
|
|
|
ginkgo.By("restarting kubelet to pick up pre-allocated hugepages")
|
|
restartKubelet(true)
|
|
|
|
waitForHugepages(ctx)
|
|
})
|
|
|
|
ginkgo.Context("with the resources requests that contain only one hugepages resource ", func() {
|
|
ginkgo.Context("with the backward compatible API", func() {
|
|
ginkgo.BeforeEach(func() {
|
|
limits = v1.ResourceList{
|
|
v1.ResourceCPU: resource.MustParse("10m"),
|
|
v1.ResourceMemory: resource.MustParse("100Mi"),
|
|
hugepagesResourceName2Mi: resource.MustParse("6Mi"),
|
|
}
|
|
mounts = []v1.VolumeMount{
|
|
{
|
|
Name: "hugepages",
|
|
MountPath: "/hugepages",
|
|
},
|
|
}
|
|
volumes = []v1.Volume{
|
|
{
|
|
Name: "hugepages",
|
|
VolumeSource: v1.VolumeSource{
|
|
EmptyDir: &v1.EmptyDirVolumeSource{
|
|
Medium: mediumHugepages,
|
|
},
|
|
},
|
|
},
|
|
}
|
|
hugepages = map[string]int{hugepagesResourceName2Mi: 5}
|
|
})
|
|
// run tests
|
|
runHugePagesTests()
|
|
})
|
|
|
|
ginkgo.Context("with the new API", func() {
|
|
ginkgo.BeforeEach(func() {
|
|
limits = v1.ResourceList{
|
|
v1.ResourceCPU: resource.MustParse("10m"),
|
|
v1.ResourceMemory: resource.MustParse("100Mi"),
|
|
hugepagesResourceName2Mi: resource.MustParse("6Mi"),
|
|
}
|
|
mounts = []v1.VolumeMount{
|
|
{
|
|
Name: "hugepages-2mi",
|
|
MountPath: "/hugepages-2Mi",
|
|
},
|
|
}
|
|
volumes = []v1.Volume{
|
|
{
|
|
Name: "hugepages-2mi",
|
|
VolumeSource: v1.VolumeSource{
|
|
EmptyDir: &v1.EmptyDirVolumeSource{
|
|
Medium: mediumHugepages2Mi,
|
|
},
|
|
},
|
|
},
|
|
}
|
|
hugepages = map[string]int{hugepagesResourceName2Mi: 5}
|
|
})
|
|
|
|
runHugePagesTests()
|
|
})
|
|
|
|
ginkgo.JustAfterEach(func() {
|
|
hugepages = map[string]int{hugepagesResourceName2Mi: 0}
|
|
})
|
|
})
|
|
|
|
ginkgo.Context("with the resources requests that contain multiple hugepages resources ", func() {
|
|
ginkgo.BeforeEach(func() {
|
|
hugepages = map[string]int{
|
|
hugepagesResourceName2Mi: 5,
|
|
hugepagesResourceName1Gi: 1,
|
|
}
|
|
limits = v1.ResourceList{
|
|
v1.ResourceCPU: resource.MustParse("10m"),
|
|
v1.ResourceMemory: resource.MustParse("100Mi"),
|
|
hugepagesResourceName2Mi: resource.MustParse("6Mi"),
|
|
hugepagesResourceName1Gi: resource.MustParse("1Gi"),
|
|
}
|
|
mounts = []v1.VolumeMount{
|
|
{
|
|
Name: "hugepages-2mi",
|
|
MountPath: "/hugepages-2Mi",
|
|
},
|
|
{
|
|
Name: "hugepages-1gi",
|
|
MountPath: "/hugepages-1Gi",
|
|
},
|
|
}
|
|
volumes = []v1.Volume{
|
|
{
|
|
Name: "hugepages-2mi",
|
|
VolumeSource: v1.VolumeSource{
|
|
EmptyDir: &v1.EmptyDirVolumeSource{
|
|
Medium: mediumHugepages2Mi,
|
|
},
|
|
},
|
|
},
|
|
{
|
|
Name: "hugepages-1gi",
|
|
VolumeSource: v1.VolumeSource{
|
|
EmptyDir: &v1.EmptyDirVolumeSource{
|
|
Medium: mediumHugepages1Gi,
|
|
},
|
|
},
|
|
},
|
|
}
|
|
})
|
|
|
|
runHugePagesTests()
|
|
|
|
ginkgo.JustAfterEach(func() {
|
|
hugepages = map[string]int{
|
|
hugepagesResourceName2Mi: 0,
|
|
hugepagesResourceName1Gi: 0,
|
|
}
|
|
})
|
|
})
|
|
})
|
|
})
|