diff --git a/test/e2e/nodefeature/nodefeature.go b/test/e2e/nodefeature/nodefeature.go index ffa53ce8d42..aceb19ed4cb 100644 --- a/test/e2e/nodefeature/nodefeature.go +++ b/test/e2e/nodefeature/nodefeature.go @@ -104,6 +104,9 @@ var ( // TODO: document the feature (owning SIG, when to use this feature for a test) SidecarContainers = framework.WithNodeFeature(framework.ValidNodeFeatures.Add("SidecarContainers")) + // Sig-node: add e2e tests for KEP-4191 + KubeletSeparateDiskGC = framework.WithNodeFeature(framework.ValidNodeFeatures.Add("KubeletSeparateDiskGC")) + // TODO: document the feature (owning SIG, when to use this feature for a test) SystemNodeCriticalPod = framework.WithNodeFeature(framework.ValidNodeFeatures.Add("SystemNodeCriticalPod")) diff --git a/test/e2e_node/split_disk_test.go b/test/e2e_node/split_disk_test.go new file mode 100644 index 00000000000..3ba77dd3f45 --- /dev/null +++ b/test/e2e_node/split_disk_test.go @@ -0,0 +1,315 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2enode + +import ( + "context" + "fmt" + "k8s.io/kubernetes/pkg/features" + e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" + "os/exec" + "path/filepath" + "strings" + "time" + + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" + kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" + evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api" + kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics" + "k8s.io/kubernetes/test/e2e/framework" + e2epod "k8s.io/kubernetes/test/e2e/framework/pod" + "k8s.io/kubernetes/test/e2e/nodefeature" + imageutils "k8s.io/kubernetes/test/utils/image" + admissionapi "k8s.io/pod-security-admission/api" + + "github.com/onsi/ginkgo/v2" + "github.com/onsi/gomega" +) + +var _ = SIGDescribe("KubeletSeparateDiskGC", nodefeature.KubeletSeparateDiskGC, func() { + f := framework.NewDefaultFramework("split-disk-test") + f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged + pressureTimeout := 10 * time.Minute + expectedNodeCondition := v1.NodeDiskPressure + + ginkgo.BeforeEach(func(ctx context.Context) { + e2eskipper.SkipUnlessFeatureGateEnabled(features.KubeletSeparateDiskGC) + if !hasSplitFileSystem(ctx) { + ginkgo.Skip("it doesn't have split filesystem") + } + }) + + f.It("should display different stats for imageFs and containerFs", func(ctx context.Context) { + summary := eventuallyGetSummary(ctx) + gomega.Expect(summary.Node.Fs.AvailableBytes).ToNot(gomega.Equal(summary.Node.Runtime.ImageFs.AvailableBytes)) + gomega.Expect(summary.Node.Fs.CapacityBytes).ToNot(gomega.Equal(summary.Node.Runtime.ImageFs.CapacityBytes)) + // Node.Fs represents rootfs where /var/lib/kubelet is located. + // Since graphroot is left as the default in storage.conf, it will use the same filesystem location as rootfs. + // Therefore, Node.Fs should be the same as Runtime.ContainerFs. + gomega.Expect(summary.Node.Fs.AvailableBytes).To(gomega.Equal(summary.Node.Runtime.ContainerFs.AvailableBytes)) + gomega.Expect(summary.Node.Fs.CapacityBytes).To(gomega.Equal(summary.Node.Runtime.ContainerFs.CapacityBytes)) + }) + + f.Context("when there is disk pressure", framework.WithSlow(), framework.WithSerial(), framework.WithDisruptive(), func() { + f.Context("on imageFs", func() { + tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) { + initialConfig.EvictionHard = map[string]string{ + string(evictionapi.SignalNodeFsAvailable): "30%", + string(evictionapi.SignalContainerFsAvailable): "30%", + string(evictionapi.SignalImageFsAvailable): "30%", + } + initialConfig.EvictionMinimumReclaim = map[string]string{} + ginkgo.By(fmt.Sprintf("EvictionHard %s", initialConfig.EvictionHard)) + }) + + runImageFsPressureTest(f, pressureTimeout, expectedNodeCondition, logDiskMetrics, []podEvictSpec{ + { + evictionPriority: 1, + pod: innocentPod(), + }, + }) + }) + + f.Context("on containerFs", func() { + expectedStarvedResource := v1.ResourceEphemeralStorage + diskTestInMb := 5000 + + tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) { + initialConfig.EvictionHard = map[string]string{ + string(evictionapi.SignalNodeFsAvailable): "30%", + string(evictionapi.SignalImageFsAvailable): "30%", + } + initialConfig.EvictionMinimumReclaim = map[string]string{} + ginkgo.By(fmt.Sprintf("EvictionHard %s", initialConfig.EvictionHard)) + }) + runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logDiskMetrics, []podEvictSpec{ + { + // This pod should exceed disk capacity on nodeFs since it writes a lot to writeable layer. + evictionPriority: 1, + pod: diskConsumingPod("container-emptydir-disk-limit", diskTestInMb, nil, + v1.ResourceRequirements{}), + }, + }) + }) + }) +}) + +// runImageFsPressureTest tests are similar to eviction tests but will skip the checks on eviction itself, +// as we want to induce disk pressure on the imageFs filesystem. +func runImageFsPressureTest(f *framework.Framework, pressureTimeout time.Duration, expectedNodeCondition v1.NodeConditionType, logFunc func(ctx context.Context), testSpecs []podEvictSpec) { + // Place the remainder of the test within a context so that the kubelet config is set before and after the test. + ginkgo.Context("", func() { + ginkgo.BeforeEach(func(ctx context.Context) { + // Reduce memory usage in the allocatable cgroup to ensure we do not have MemoryPressure. + reduceAllocatableMemoryUsageIfCgroupv1() + // Nodes do not immediately report local storage capacity, + // so wait a little to allow pods requesting local storage to be scheduled. + time.Sleep(30 * time.Second) + ginkgo.By("setting up pods to be used by tests") + pods := []*v1.Pod{} + for _, spec := range testSpecs { + pods = append(pods, spec.pod) + } + e2epod.NewPodClient(f).CreateBatch(ctx, pods) + }) + + ginkgo.It("should evict all of the correct pods", func(ctx context.Context) { + _, is, err := getCRIClient() + framework.ExpectNoError(err) + resp, err := is.ImageFsInfo(ctx) + framework.ExpectNoError(err) + gomega.Expect(resp.ImageFilesystems).NotTo(gomega.BeEmpty()) + gomega.Expect(resp.ImageFilesystems[0].FsId).NotTo(gomega.BeNil()) + diskToPressure := filepath.Dir(resp.ImageFilesystems[0].FsId.Mountpoint) + ginkgo.By(fmt.Sprintf("Got imageFs directory: %s", diskToPressure)) + imagesLenBeforeGC := 1 + sizeOfPressure := "8000" + gomega.Eventually(ctx, func(ctx context.Context) error { + images, err := is.ListImages(ctx, &runtimeapi.ImageFilter{}) + imagesLenBeforeGC = len(images) + return err + }, 1*time.Minute, evictionPollInterval).Should(gomega.Succeed()) + ginkgo.By(fmt.Sprintf("Number of images found before GC was %d", imagesLenBeforeGC)) + ginkgo.By(fmt.Sprintf("Induce disk pressure on %s with size %s", diskToPressure, sizeOfPressure)) + gomega.Expect(runDDOnFilesystem(diskToPressure, sizeOfPressure)).Should(gomega.Succeed()) + ginkgo.By(fmt.Sprintf("Waiting for node to have NodeCondition: %s", expectedNodeCondition)) + + gomega.Eventually(ctx, func(ctx context.Context) error { + logFunc(ctx) + if expectedNodeCondition == noPressure || hasNodeCondition(ctx, f, expectedNodeCondition) { + return nil + } + return fmt.Errorf("NodeCondition: %s not encountered", expectedNodeCondition) + }, pressureTimeout, evictionPollInterval).Should(gomega.BeNil()) + + ginkgo.By("Waiting for evictions to occur") + gomega.Eventually(ctx, func(ctx context.Context) error { + if expectedNodeCondition != noPressure { + if hasNodeCondition(ctx, f, expectedNodeCondition) { + framework.Logf("Node has condition: %s", expectedNodeCondition) + } else { + framework.Logf("Node does NOT have condition: %s", expectedNodeCondition) + } + } + logKubeletLatencyMetrics(ctx, kubeletmetrics.EvictionStatsAgeKey) + logFunc(ctx) + return verifyEvictionOrdering(ctx, f, testSpecs) + }, pressureTimeout, evictionPollInterval).Should(gomega.Succeed()) + + ginkgo.By("checking for the expected pod conditions for evicted pods") + verifyPodConditions(ctx, f, testSpecs) + + gomega.Eventually(ctx, func(ctx context.Context) error { + images, err := is.ListImages(ctx, &runtimeapi.ImageFilter{}) + if err != nil { + return err + } + imagesLenAfterGC := len(images) + if imagesLenAfterGC < imagesLenBeforeGC { + return nil + } + return fmt.Errorf("garbage collection of images should have occurred. before: %d after: %d", imagesLenBeforeGC, imagesLenAfterGC) + }, pressureTimeout, evictionPollInterval).Should(gomega.Succeed()) + + gomega.Expect(removeDiskPressure(diskToPressure)).Should(gomega.Succeed(), "removing disk pressure should not fail") + + ginkgo.By("making sure pressure from test has surfaced before continuing") + + ginkgo.By(fmt.Sprintf("Waiting for NodeCondition: %s to no longer exist on the node", expectedNodeCondition)) + gomega.Eventually(ctx, func(ctx context.Context) error { + logFunc(ctx) + logKubeletLatencyMetrics(ctx, kubeletmetrics.EvictionStatsAgeKey) + if expectedNodeCondition != noPressure && hasNodeCondition(ctx, f, expectedNodeCondition) { + return fmt.Errorf("conditions haven't returned to normal, node still has: %s", expectedNodeCondition) + } + return nil + }, pressureTimeout, evictionPollInterval).Should(gomega.BeNil()) + + ginkgo.By("checking for stable, pressure-free condition without unexpected pod failures") + gomega.Consistently(ctx, func(ctx context.Context) error { + if expectedNodeCondition != noPressure && hasNodeCondition(ctx, f, expectedNodeCondition) { + return fmt.Errorf("condition %s disappeared and then reappeared", expectedNodeCondition) + } + logFunc(ctx) + logKubeletLatencyMetrics(ctx, kubeletmetrics.EvictionStatsAgeKey) + return verifyEvictionOrdering(ctx, f, testSpecs) + }, postTestConditionMonitoringPeriod, evictionPollInterval).Should(gomega.Succeed()) + }) + + ginkgo.AfterEach(func(ctx context.Context) { + prePullImagesIfNecessary := func() { + if expectedNodeCondition == v1.NodeDiskPressure && framework.TestContext.PrepullImages { + // The disk eviction test may cause the pre-pulled images to be evicted, + // so pre-pull those images again to ensure this test does not affect subsequent tests. + err := PrePullAllImages() + framework.ExpectNoError(err) + } + } + // Run pre-pull for images using a `defer` to ensure that images are pulled even when the subsequent assertions fail. + defer prePullImagesIfNecessary() + + ginkgo.By("deleting pods") + for _, spec := range testSpecs { + ginkgo.By(fmt.Sprintf("deleting pod: %s", spec.pod.Name)) + e2epod.NewPodClient(f).DeleteSync(ctx, spec.pod.Name, metav1.DeleteOptions{}, 10*time.Minute) + } + + // In case a test fails before verifying that NodeCondition no longer exist on the node, + // we should wait for the NodeCondition to disappear. + ginkgo.By(fmt.Sprintf("making sure NodeCondition %s no longer exists on the node", expectedNodeCondition)) + gomega.Eventually(ctx, func(ctx context.Context) error { + if expectedNodeCondition != noPressure && hasNodeCondition(ctx, f, expectedNodeCondition) { + return fmt.Errorf("conditions haven't returned to normal, node still has: %s", expectedNodeCondition) + } + return nil + }, pressureDisappearTimeout, evictionPollInterval).Should(gomega.BeNil()) + + reduceAllocatableMemoryUsageIfCgroupv1() + ginkgo.By("making sure we have all the required images for testing") + prePullImagesIfNecessary() + + // Ensure that the NodeCondition hasn't returned after pulling images. + ginkgo.By(fmt.Sprintf("making sure NodeCondition %s doesn't exist again after pulling images", expectedNodeCondition)) + gomega.Eventually(ctx, func(ctx context.Context) error { + if expectedNodeCondition != noPressure && hasNodeCondition(ctx, f, expectedNodeCondition) { + return fmt.Errorf("conditions haven't returned to normal, node still has: %s", expectedNodeCondition) + } + return nil + }, pressureDisappearTimeout, evictionPollInterval).Should(gomega.BeNil()) + + ginkgo.By("making sure we can start a new pod after the test") + podName := "test-admit-pod" + e2epod.NewPodClient(f).CreateSync(ctx, &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: podName, + }, + Spec: v1.PodSpec{ + RestartPolicy: v1.RestartPolicyNever, + Containers: []v1.Container{ + { + Image: imageutils.GetPauseImageName(), + Name: podName, + }, + }, + }, + }) + + if ginkgo.CurrentSpecReport().Failed() { + if framework.TestContext.DumpLogsOnFailure { + logPodEvents(ctx, f) + logNodeEvents(ctx, f) + } + } + }) + }) +} + +func runDDOnFilesystem(diskToPressure, sizeOfPressure string) error { + script := strings.Split(fmt.Sprintf("if=/dev/zero of=%s/file.txt bs=1M count=%s", diskToPressure, sizeOfPressure), " ") + ginkgo.By(fmt.Sprintf("running dd with %s", fmt.Sprintf("if=/dev/zero of=%s/file.txt bs=1M count=%s", diskToPressure, sizeOfPressure))) + cmd := exec.Command("dd", script...) + output, err := cmd.CombinedOutput() + if err != nil { + fmt.Println(string(output)) + fmt.Println(err) + } + return err +} + +func removeDiskPressure(diskToPressure string) error { + fileToRemove := fmt.Sprintf("%s/file.txt", diskToPressure) + ginkgo.By(fmt.Sprintf("calling rm %s", fileToRemove)) + cmd := exec.Command("rm", fileToRemove) + _, err := cmd.CombinedOutput() + return err +} + +func hasSplitFileSystem(ctx context.Context) bool { + _, is, err := getCRIClient() + framework.ExpectNoError(err) + resp, err := is.ImageFsInfo(ctx) + framework.ExpectNoError(err) + if resp.ContainerFilesystems == nil || resp.ImageFilesystems == nil || len(resp.ContainerFilesystems) == 0 || len(resp.ImageFilesystems) == 0 { + return false + } + if resp.ContainerFilesystems[0].FsId != nil && resp.ImageFilesystems[0].FsId != nil { + return resp.ContainerFilesystems[0].FsId.Mountpoint != resp.ImageFilesystems[0].FsId.Mountpoint + } + return false +}