mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 19:56:01 +00:00
Merge pull request #125417 from bitoku/splitfs
KEP-4191: Split Image Filesystem add end-to-end tests
This commit is contained in:
commit
ad72be434d
@ -104,6 +104,9 @@ var (
|
||||
// TODO: document the feature (owning SIG, when to use this feature for a test)
|
||||
SidecarContainers = framework.WithNodeFeature(framework.ValidNodeFeatures.Add("SidecarContainers"))
|
||||
|
||||
// Sig-node: add e2e tests for KEP-4191
|
||||
KubeletSeparateDiskGC = framework.WithNodeFeature(framework.ValidNodeFeatures.Add("KubeletSeparateDiskGC"))
|
||||
|
||||
// TODO: document the feature (owning SIG, when to use this feature for a test)
|
||||
SystemNodeCriticalPod = framework.WithNodeFeature(framework.ValidNodeFeatures.Add("SystemNodeCriticalPod"))
|
||||
|
||||
|
315
test/e2e_node/split_disk_test.go
Normal file
315
test/e2e_node/split_disk_test.go
Normal file
@ -0,0 +1,315 @@
|
||||
/*
|
||||
Copyright 2024 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package e2enode
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
|
||||
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
|
||||
kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics"
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
|
||||
"k8s.io/kubernetes/test/e2e/nodefeature"
|
||||
imageutils "k8s.io/kubernetes/test/utils/image"
|
||||
admissionapi "k8s.io/pod-security-admission/api"
|
||||
|
||||
"github.com/onsi/ginkgo/v2"
|
||||
"github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
var _ = SIGDescribe("KubeletSeparateDiskGC", nodefeature.KubeletSeparateDiskGC, func() {
|
||||
f := framework.NewDefaultFramework("split-disk-test")
|
||||
f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
|
||||
pressureTimeout := 10 * time.Minute
|
||||
expectedNodeCondition := v1.NodeDiskPressure
|
||||
|
||||
ginkgo.BeforeEach(func(ctx context.Context) {
|
||||
e2eskipper.SkipUnlessFeatureGateEnabled(features.KubeletSeparateDiskGC)
|
||||
if !hasSplitFileSystem(ctx) {
|
||||
ginkgo.Skip("it doesn't have split filesystem")
|
||||
}
|
||||
})
|
||||
|
||||
f.It("should display different stats for imageFs and containerFs", func(ctx context.Context) {
|
||||
summary := eventuallyGetSummary(ctx)
|
||||
gomega.Expect(summary.Node.Fs.AvailableBytes).ToNot(gomega.Equal(summary.Node.Runtime.ImageFs.AvailableBytes))
|
||||
gomega.Expect(summary.Node.Fs.CapacityBytes).ToNot(gomega.Equal(summary.Node.Runtime.ImageFs.CapacityBytes))
|
||||
// Node.Fs represents rootfs where /var/lib/kubelet is located.
|
||||
// Since graphroot is left as the default in storage.conf, it will use the same filesystem location as rootfs.
|
||||
// Therefore, Node.Fs should be the same as Runtime.ContainerFs.
|
||||
gomega.Expect(summary.Node.Fs.AvailableBytes).To(gomega.Equal(summary.Node.Runtime.ContainerFs.AvailableBytes))
|
||||
gomega.Expect(summary.Node.Fs.CapacityBytes).To(gomega.Equal(summary.Node.Runtime.ContainerFs.CapacityBytes))
|
||||
})
|
||||
|
||||
f.Context("when there is disk pressure", framework.WithSlow(), framework.WithSerial(), framework.WithDisruptive(), func() {
|
||||
f.Context("on imageFs", func() {
|
||||
tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
|
||||
initialConfig.EvictionHard = map[string]string{
|
||||
string(evictionapi.SignalNodeFsAvailable): "30%",
|
||||
string(evictionapi.SignalContainerFsAvailable): "30%",
|
||||
string(evictionapi.SignalImageFsAvailable): "30%",
|
||||
}
|
||||
initialConfig.EvictionMinimumReclaim = map[string]string{}
|
||||
ginkgo.By(fmt.Sprintf("EvictionHard %s", initialConfig.EvictionHard))
|
||||
})
|
||||
|
||||
runImageFsPressureTest(f, pressureTimeout, expectedNodeCondition, logDiskMetrics, []podEvictSpec{
|
||||
{
|
||||
evictionPriority: 1,
|
||||
pod: innocentPod(),
|
||||
},
|
||||
})
|
||||
})
|
||||
|
||||
f.Context("on containerFs", func() {
|
||||
expectedStarvedResource := v1.ResourceEphemeralStorage
|
||||
diskTestInMb := 5000
|
||||
|
||||
tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
|
||||
initialConfig.EvictionHard = map[string]string{
|
||||
string(evictionapi.SignalNodeFsAvailable): "30%",
|
||||
string(evictionapi.SignalImageFsAvailable): "30%",
|
||||
}
|
||||
initialConfig.EvictionMinimumReclaim = map[string]string{}
|
||||
ginkgo.By(fmt.Sprintf("EvictionHard %s", initialConfig.EvictionHard))
|
||||
})
|
||||
runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logDiskMetrics, []podEvictSpec{
|
||||
{
|
||||
// This pod should exceed disk capacity on nodeFs since it writes a lot to writeable layer.
|
||||
evictionPriority: 1,
|
||||
pod: diskConsumingPod("container-emptydir-disk-limit", diskTestInMb, nil,
|
||||
v1.ResourceRequirements{}),
|
||||
},
|
||||
})
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
// runImageFsPressureTest tests are similar to eviction tests but will skip the checks on eviction itself,
|
||||
// as we want to induce disk pressure on the imageFs filesystem.
|
||||
func runImageFsPressureTest(f *framework.Framework, pressureTimeout time.Duration, expectedNodeCondition v1.NodeConditionType, logFunc func(ctx context.Context), testSpecs []podEvictSpec) {
|
||||
// Place the remainder of the test within a context so that the kubelet config is set before and after the test.
|
||||
ginkgo.Context("", func() {
|
||||
ginkgo.BeforeEach(func(ctx context.Context) {
|
||||
// Reduce memory usage in the allocatable cgroup to ensure we do not have MemoryPressure.
|
||||
reduceAllocatableMemoryUsageIfCgroupv1()
|
||||
// Nodes do not immediately report local storage capacity,
|
||||
// so wait a little to allow pods requesting local storage to be scheduled.
|
||||
time.Sleep(30 * time.Second)
|
||||
ginkgo.By("setting up pods to be used by tests")
|
||||
pods := []*v1.Pod{}
|
||||
for _, spec := range testSpecs {
|
||||
pods = append(pods, spec.pod)
|
||||
}
|
||||
e2epod.NewPodClient(f).CreateBatch(ctx, pods)
|
||||
})
|
||||
|
||||
ginkgo.It("should evict all of the correct pods", func(ctx context.Context) {
|
||||
_, is, err := getCRIClient()
|
||||
framework.ExpectNoError(err)
|
||||
resp, err := is.ImageFsInfo(ctx)
|
||||
framework.ExpectNoError(err)
|
||||
gomega.Expect(resp.ImageFilesystems).NotTo(gomega.BeEmpty())
|
||||
gomega.Expect(resp.ImageFilesystems[0].FsId).NotTo(gomega.BeNil())
|
||||
diskToPressure := filepath.Dir(resp.ImageFilesystems[0].FsId.Mountpoint)
|
||||
ginkgo.By(fmt.Sprintf("Got imageFs directory: %s", diskToPressure))
|
||||
imagesLenBeforeGC := 1
|
||||
sizeOfPressure := "8000"
|
||||
gomega.Eventually(ctx, func(ctx context.Context) error {
|
||||
images, err := is.ListImages(ctx, &runtimeapi.ImageFilter{})
|
||||
imagesLenBeforeGC = len(images)
|
||||
return err
|
||||
}, 1*time.Minute, evictionPollInterval).Should(gomega.Succeed())
|
||||
ginkgo.By(fmt.Sprintf("Number of images found before GC was %d", imagesLenBeforeGC))
|
||||
ginkgo.By(fmt.Sprintf("Induce disk pressure on %s with size %s", diskToPressure, sizeOfPressure))
|
||||
gomega.Expect(runDDOnFilesystem(diskToPressure, sizeOfPressure)).Should(gomega.Succeed())
|
||||
ginkgo.By(fmt.Sprintf("Waiting for node to have NodeCondition: %s", expectedNodeCondition))
|
||||
|
||||
gomega.Eventually(ctx, func(ctx context.Context) error {
|
||||
logFunc(ctx)
|
||||
if expectedNodeCondition == noPressure || hasNodeCondition(ctx, f, expectedNodeCondition) {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("NodeCondition: %s not encountered", expectedNodeCondition)
|
||||
}, pressureTimeout, evictionPollInterval).Should(gomega.BeNil())
|
||||
|
||||
ginkgo.By("Waiting for evictions to occur")
|
||||
gomega.Eventually(ctx, func(ctx context.Context) error {
|
||||
if expectedNodeCondition != noPressure {
|
||||
if hasNodeCondition(ctx, f, expectedNodeCondition) {
|
||||
framework.Logf("Node has condition: %s", expectedNodeCondition)
|
||||
} else {
|
||||
framework.Logf("Node does NOT have condition: %s", expectedNodeCondition)
|
||||
}
|
||||
}
|
||||
logKubeletLatencyMetrics(ctx, kubeletmetrics.EvictionStatsAgeKey)
|
||||
logFunc(ctx)
|
||||
return verifyEvictionOrdering(ctx, f, testSpecs)
|
||||
}, pressureTimeout, evictionPollInterval).Should(gomega.Succeed())
|
||||
|
||||
ginkgo.By("checking for the expected pod conditions for evicted pods")
|
||||
verifyPodConditions(ctx, f, testSpecs)
|
||||
|
||||
gomega.Eventually(ctx, func(ctx context.Context) error {
|
||||
images, err := is.ListImages(ctx, &runtimeapi.ImageFilter{})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
imagesLenAfterGC := len(images)
|
||||
if imagesLenAfterGC < imagesLenBeforeGC {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("garbage collection of images should have occurred. before: %d after: %d", imagesLenBeforeGC, imagesLenAfterGC)
|
||||
}, pressureTimeout, evictionPollInterval).Should(gomega.Succeed())
|
||||
|
||||
gomega.Expect(removeDiskPressure(diskToPressure)).Should(gomega.Succeed(), "removing disk pressure should not fail")
|
||||
|
||||
ginkgo.By("making sure pressure from test has surfaced before continuing")
|
||||
|
||||
ginkgo.By(fmt.Sprintf("Waiting for NodeCondition: %s to no longer exist on the node", expectedNodeCondition))
|
||||
gomega.Eventually(ctx, func(ctx context.Context) error {
|
||||
logFunc(ctx)
|
||||
logKubeletLatencyMetrics(ctx, kubeletmetrics.EvictionStatsAgeKey)
|
||||
if expectedNodeCondition != noPressure && hasNodeCondition(ctx, f, expectedNodeCondition) {
|
||||
return fmt.Errorf("conditions haven't returned to normal, node still has: %s", expectedNodeCondition)
|
||||
}
|
||||
return nil
|
||||
}, pressureTimeout, evictionPollInterval).Should(gomega.BeNil())
|
||||
|
||||
ginkgo.By("checking for stable, pressure-free condition without unexpected pod failures")
|
||||
gomega.Consistently(ctx, func(ctx context.Context) error {
|
||||
if expectedNodeCondition != noPressure && hasNodeCondition(ctx, f, expectedNodeCondition) {
|
||||
return fmt.Errorf("condition %s disappeared and then reappeared", expectedNodeCondition)
|
||||
}
|
||||
logFunc(ctx)
|
||||
logKubeletLatencyMetrics(ctx, kubeletmetrics.EvictionStatsAgeKey)
|
||||
return verifyEvictionOrdering(ctx, f, testSpecs)
|
||||
}, postTestConditionMonitoringPeriod, evictionPollInterval).Should(gomega.Succeed())
|
||||
})
|
||||
|
||||
ginkgo.AfterEach(func(ctx context.Context) {
|
||||
prePullImagesIfNecessary := func() {
|
||||
if expectedNodeCondition == v1.NodeDiskPressure && framework.TestContext.PrepullImages {
|
||||
// The disk eviction test may cause the pre-pulled images to be evicted,
|
||||
// so pre-pull those images again to ensure this test does not affect subsequent tests.
|
||||
err := PrePullAllImages()
|
||||
framework.ExpectNoError(err)
|
||||
}
|
||||
}
|
||||
// Run pre-pull for images using a `defer` to ensure that images are pulled even when the subsequent assertions fail.
|
||||
defer prePullImagesIfNecessary()
|
||||
|
||||
ginkgo.By("deleting pods")
|
||||
for _, spec := range testSpecs {
|
||||
ginkgo.By(fmt.Sprintf("deleting pod: %s", spec.pod.Name))
|
||||
e2epod.NewPodClient(f).DeleteSync(ctx, spec.pod.Name, metav1.DeleteOptions{}, 10*time.Minute)
|
||||
}
|
||||
|
||||
// In case a test fails before verifying that NodeCondition no longer exist on the node,
|
||||
// we should wait for the NodeCondition to disappear.
|
||||
ginkgo.By(fmt.Sprintf("making sure NodeCondition %s no longer exists on the node", expectedNodeCondition))
|
||||
gomega.Eventually(ctx, func(ctx context.Context) error {
|
||||
if expectedNodeCondition != noPressure && hasNodeCondition(ctx, f, expectedNodeCondition) {
|
||||
return fmt.Errorf("conditions haven't returned to normal, node still has: %s", expectedNodeCondition)
|
||||
}
|
||||
return nil
|
||||
}, pressureDisappearTimeout, evictionPollInterval).Should(gomega.BeNil())
|
||||
|
||||
reduceAllocatableMemoryUsageIfCgroupv1()
|
||||
ginkgo.By("making sure we have all the required images for testing")
|
||||
prePullImagesIfNecessary()
|
||||
|
||||
// Ensure that the NodeCondition hasn't returned after pulling images.
|
||||
ginkgo.By(fmt.Sprintf("making sure NodeCondition %s doesn't exist again after pulling images", expectedNodeCondition))
|
||||
gomega.Eventually(ctx, func(ctx context.Context) error {
|
||||
if expectedNodeCondition != noPressure && hasNodeCondition(ctx, f, expectedNodeCondition) {
|
||||
return fmt.Errorf("conditions haven't returned to normal, node still has: %s", expectedNodeCondition)
|
||||
}
|
||||
return nil
|
||||
}, pressureDisappearTimeout, evictionPollInterval).Should(gomega.BeNil())
|
||||
|
||||
ginkgo.By("making sure we can start a new pod after the test")
|
||||
podName := "test-admit-pod"
|
||||
e2epod.NewPodClient(f).CreateSync(ctx, &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: podName,
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Image: imageutils.GetPauseImageName(),
|
||||
Name: podName,
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
if ginkgo.CurrentSpecReport().Failed() {
|
||||
if framework.TestContext.DumpLogsOnFailure {
|
||||
logPodEvents(ctx, f)
|
||||
logNodeEvents(ctx, f)
|
||||
}
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
func runDDOnFilesystem(diskToPressure, sizeOfPressure string) error {
|
||||
script := strings.Split(fmt.Sprintf("if=/dev/zero of=%s/file.txt bs=1M count=%s", diskToPressure, sizeOfPressure), " ")
|
||||
ginkgo.By(fmt.Sprintf("running dd with %s", fmt.Sprintf("if=/dev/zero of=%s/file.txt bs=1M count=%s", diskToPressure, sizeOfPressure)))
|
||||
cmd := exec.Command("dd", script...)
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
fmt.Println(string(output))
|
||||
fmt.Println(err)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func removeDiskPressure(diskToPressure string) error {
|
||||
fileToRemove := fmt.Sprintf("%s/file.txt", diskToPressure)
|
||||
ginkgo.By(fmt.Sprintf("calling rm %s", fileToRemove))
|
||||
cmd := exec.Command("rm", fileToRemove)
|
||||
_, err := cmd.CombinedOutput()
|
||||
return err
|
||||
}
|
||||
|
||||
func hasSplitFileSystem(ctx context.Context) bool {
|
||||
_, is, err := getCRIClient()
|
||||
framework.ExpectNoError(err)
|
||||
resp, err := is.ImageFsInfo(ctx)
|
||||
framework.ExpectNoError(err)
|
||||
if resp.ContainerFilesystems == nil || resp.ImageFilesystems == nil || len(resp.ContainerFilesystems) == 0 || len(resp.ImageFilesystems) == 0 {
|
||||
return false
|
||||
}
|
||||
if resp.ContainerFilesystems[0].FsId != nil && resp.ImageFilesystems[0].FsId != nil {
|
||||
return resp.ContainerFilesystems[0].FsId.Mountpoint != resp.ImageFilesystems[0].FsId.Mountpoint
|
||||
}
|
||||
return false
|
||||
}
|
Loading…
Reference in New Issue
Block a user