mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-31 07:20:13 +00:00
Merge pull request #125417 from bitoku/splitfs
KEP-4191: Split Image Filesystem add end-to-end tests
This commit is contained in:
commit
ad72be434d
@ -104,6 +104,9 @@ var (
|
|||||||
// TODO: document the feature (owning SIG, when to use this feature for a test)
|
// TODO: document the feature (owning SIG, when to use this feature for a test)
|
||||||
SidecarContainers = framework.WithNodeFeature(framework.ValidNodeFeatures.Add("SidecarContainers"))
|
SidecarContainers = framework.WithNodeFeature(framework.ValidNodeFeatures.Add("SidecarContainers"))
|
||||||
|
|
||||||
|
// Sig-node: add e2e tests for KEP-4191
|
||||||
|
KubeletSeparateDiskGC = framework.WithNodeFeature(framework.ValidNodeFeatures.Add("KubeletSeparateDiskGC"))
|
||||||
|
|
||||||
// TODO: document the feature (owning SIG, when to use this feature for a test)
|
// TODO: document the feature (owning SIG, when to use this feature for a test)
|
||||||
SystemNodeCriticalPod = framework.WithNodeFeature(framework.ValidNodeFeatures.Add("SystemNodeCriticalPod"))
|
SystemNodeCriticalPod = framework.WithNodeFeature(framework.ValidNodeFeatures.Add("SystemNodeCriticalPod"))
|
||||||
|
|
||||||
|
315
test/e2e_node/split_disk_test.go
Normal file
315
test/e2e_node/split_disk_test.go
Normal file
@ -0,0 +1,315 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2024 The Kubernetes Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package e2enode
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"k8s.io/kubernetes/pkg/features"
|
||||||
|
e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
|
||||||
|
"os/exec"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
v1 "k8s.io/api/core/v1"
|
||||||
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
|
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||||
|
kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
|
||||||
|
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
|
||||||
|
kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics"
|
||||||
|
"k8s.io/kubernetes/test/e2e/framework"
|
||||||
|
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
|
||||||
|
"k8s.io/kubernetes/test/e2e/nodefeature"
|
||||||
|
imageutils "k8s.io/kubernetes/test/utils/image"
|
||||||
|
admissionapi "k8s.io/pod-security-admission/api"
|
||||||
|
|
||||||
|
"github.com/onsi/ginkgo/v2"
|
||||||
|
"github.com/onsi/gomega"
|
||||||
|
)
|
||||||
|
|
||||||
|
var _ = SIGDescribe("KubeletSeparateDiskGC", nodefeature.KubeletSeparateDiskGC, func() {
|
||||||
|
f := framework.NewDefaultFramework("split-disk-test")
|
||||||
|
f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
|
||||||
|
pressureTimeout := 10 * time.Minute
|
||||||
|
expectedNodeCondition := v1.NodeDiskPressure
|
||||||
|
|
||||||
|
ginkgo.BeforeEach(func(ctx context.Context) {
|
||||||
|
e2eskipper.SkipUnlessFeatureGateEnabled(features.KubeletSeparateDiskGC)
|
||||||
|
if !hasSplitFileSystem(ctx) {
|
||||||
|
ginkgo.Skip("it doesn't have split filesystem")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
f.It("should display different stats for imageFs and containerFs", func(ctx context.Context) {
|
||||||
|
summary := eventuallyGetSummary(ctx)
|
||||||
|
gomega.Expect(summary.Node.Fs.AvailableBytes).ToNot(gomega.Equal(summary.Node.Runtime.ImageFs.AvailableBytes))
|
||||||
|
gomega.Expect(summary.Node.Fs.CapacityBytes).ToNot(gomega.Equal(summary.Node.Runtime.ImageFs.CapacityBytes))
|
||||||
|
// Node.Fs represents rootfs where /var/lib/kubelet is located.
|
||||||
|
// Since graphroot is left as the default in storage.conf, it will use the same filesystem location as rootfs.
|
||||||
|
// Therefore, Node.Fs should be the same as Runtime.ContainerFs.
|
||||||
|
gomega.Expect(summary.Node.Fs.AvailableBytes).To(gomega.Equal(summary.Node.Runtime.ContainerFs.AvailableBytes))
|
||||||
|
gomega.Expect(summary.Node.Fs.CapacityBytes).To(gomega.Equal(summary.Node.Runtime.ContainerFs.CapacityBytes))
|
||||||
|
})
|
||||||
|
|
||||||
|
f.Context("when there is disk pressure", framework.WithSlow(), framework.WithSerial(), framework.WithDisruptive(), func() {
|
||||||
|
f.Context("on imageFs", func() {
|
||||||
|
tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
|
||||||
|
initialConfig.EvictionHard = map[string]string{
|
||||||
|
string(evictionapi.SignalNodeFsAvailable): "30%",
|
||||||
|
string(evictionapi.SignalContainerFsAvailable): "30%",
|
||||||
|
string(evictionapi.SignalImageFsAvailable): "30%",
|
||||||
|
}
|
||||||
|
initialConfig.EvictionMinimumReclaim = map[string]string{}
|
||||||
|
ginkgo.By(fmt.Sprintf("EvictionHard %s", initialConfig.EvictionHard))
|
||||||
|
})
|
||||||
|
|
||||||
|
runImageFsPressureTest(f, pressureTimeout, expectedNodeCondition, logDiskMetrics, []podEvictSpec{
|
||||||
|
{
|
||||||
|
evictionPriority: 1,
|
||||||
|
pod: innocentPod(),
|
||||||
|
},
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
f.Context("on containerFs", func() {
|
||||||
|
expectedStarvedResource := v1.ResourceEphemeralStorage
|
||||||
|
diskTestInMb := 5000
|
||||||
|
|
||||||
|
tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
|
||||||
|
initialConfig.EvictionHard = map[string]string{
|
||||||
|
string(evictionapi.SignalNodeFsAvailable): "30%",
|
||||||
|
string(evictionapi.SignalImageFsAvailable): "30%",
|
||||||
|
}
|
||||||
|
initialConfig.EvictionMinimumReclaim = map[string]string{}
|
||||||
|
ginkgo.By(fmt.Sprintf("EvictionHard %s", initialConfig.EvictionHard))
|
||||||
|
})
|
||||||
|
runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logDiskMetrics, []podEvictSpec{
|
||||||
|
{
|
||||||
|
// This pod should exceed disk capacity on nodeFs since it writes a lot to writeable layer.
|
||||||
|
evictionPriority: 1,
|
||||||
|
pod: diskConsumingPod("container-emptydir-disk-limit", diskTestInMb, nil,
|
||||||
|
v1.ResourceRequirements{}),
|
||||||
|
},
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
// runImageFsPressureTest tests are similar to eviction tests but will skip the checks on eviction itself,
|
||||||
|
// as we want to induce disk pressure on the imageFs filesystem.
|
||||||
|
func runImageFsPressureTest(f *framework.Framework, pressureTimeout time.Duration, expectedNodeCondition v1.NodeConditionType, logFunc func(ctx context.Context), testSpecs []podEvictSpec) {
|
||||||
|
// Place the remainder of the test within a context so that the kubelet config is set before and after the test.
|
||||||
|
ginkgo.Context("", func() {
|
||||||
|
ginkgo.BeforeEach(func(ctx context.Context) {
|
||||||
|
// Reduce memory usage in the allocatable cgroup to ensure we do not have MemoryPressure.
|
||||||
|
reduceAllocatableMemoryUsageIfCgroupv1()
|
||||||
|
// Nodes do not immediately report local storage capacity,
|
||||||
|
// so wait a little to allow pods requesting local storage to be scheduled.
|
||||||
|
time.Sleep(30 * time.Second)
|
||||||
|
ginkgo.By("setting up pods to be used by tests")
|
||||||
|
pods := []*v1.Pod{}
|
||||||
|
for _, spec := range testSpecs {
|
||||||
|
pods = append(pods, spec.pod)
|
||||||
|
}
|
||||||
|
e2epod.NewPodClient(f).CreateBatch(ctx, pods)
|
||||||
|
})
|
||||||
|
|
||||||
|
ginkgo.It("should evict all of the correct pods", func(ctx context.Context) {
|
||||||
|
_, is, err := getCRIClient()
|
||||||
|
framework.ExpectNoError(err)
|
||||||
|
resp, err := is.ImageFsInfo(ctx)
|
||||||
|
framework.ExpectNoError(err)
|
||||||
|
gomega.Expect(resp.ImageFilesystems).NotTo(gomega.BeEmpty())
|
||||||
|
gomega.Expect(resp.ImageFilesystems[0].FsId).NotTo(gomega.BeNil())
|
||||||
|
diskToPressure := filepath.Dir(resp.ImageFilesystems[0].FsId.Mountpoint)
|
||||||
|
ginkgo.By(fmt.Sprintf("Got imageFs directory: %s", diskToPressure))
|
||||||
|
imagesLenBeforeGC := 1
|
||||||
|
sizeOfPressure := "8000"
|
||||||
|
gomega.Eventually(ctx, func(ctx context.Context) error {
|
||||||
|
images, err := is.ListImages(ctx, &runtimeapi.ImageFilter{})
|
||||||
|
imagesLenBeforeGC = len(images)
|
||||||
|
return err
|
||||||
|
}, 1*time.Minute, evictionPollInterval).Should(gomega.Succeed())
|
||||||
|
ginkgo.By(fmt.Sprintf("Number of images found before GC was %d", imagesLenBeforeGC))
|
||||||
|
ginkgo.By(fmt.Sprintf("Induce disk pressure on %s with size %s", diskToPressure, sizeOfPressure))
|
||||||
|
gomega.Expect(runDDOnFilesystem(diskToPressure, sizeOfPressure)).Should(gomega.Succeed())
|
||||||
|
ginkgo.By(fmt.Sprintf("Waiting for node to have NodeCondition: %s", expectedNodeCondition))
|
||||||
|
|
||||||
|
gomega.Eventually(ctx, func(ctx context.Context) error {
|
||||||
|
logFunc(ctx)
|
||||||
|
if expectedNodeCondition == noPressure || hasNodeCondition(ctx, f, expectedNodeCondition) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return fmt.Errorf("NodeCondition: %s not encountered", expectedNodeCondition)
|
||||||
|
}, pressureTimeout, evictionPollInterval).Should(gomega.BeNil())
|
||||||
|
|
||||||
|
ginkgo.By("Waiting for evictions to occur")
|
||||||
|
gomega.Eventually(ctx, func(ctx context.Context) error {
|
||||||
|
if expectedNodeCondition != noPressure {
|
||||||
|
if hasNodeCondition(ctx, f, expectedNodeCondition) {
|
||||||
|
framework.Logf("Node has condition: %s", expectedNodeCondition)
|
||||||
|
} else {
|
||||||
|
framework.Logf("Node does NOT have condition: %s", expectedNodeCondition)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
logKubeletLatencyMetrics(ctx, kubeletmetrics.EvictionStatsAgeKey)
|
||||||
|
logFunc(ctx)
|
||||||
|
return verifyEvictionOrdering(ctx, f, testSpecs)
|
||||||
|
}, pressureTimeout, evictionPollInterval).Should(gomega.Succeed())
|
||||||
|
|
||||||
|
ginkgo.By("checking for the expected pod conditions for evicted pods")
|
||||||
|
verifyPodConditions(ctx, f, testSpecs)
|
||||||
|
|
||||||
|
gomega.Eventually(ctx, func(ctx context.Context) error {
|
||||||
|
images, err := is.ListImages(ctx, &runtimeapi.ImageFilter{})
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
imagesLenAfterGC := len(images)
|
||||||
|
if imagesLenAfterGC < imagesLenBeforeGC {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return fmt.Errorf("garbage collection of images should have occurred. before: %d after: %d", imagesLenBeforeGC, imagesLenAfterGC)
|
||||||
|
}, pressureTimeout, evictionPollInterval).Should(gomega.Succeed())
|
||||||
|
|
||||||
|
gomega.Expect(removeDiskPressure(diskToPressure)).Should(gomega.Succeed(), "removing disk pressure should not fail")
|
||||||
|
|
||||||
|
ginkgo.By("making sure pressure from test has surfaced before continuing")
|
||||||
|
|
||||||
|
ginkgo.By(fmt.Sprintf("Waiting for NodeCondition: %s to no longer exist on the node", expectedNodeCondition))
|
||||||
|
gomega.Eventually(ctx, func(ctx context.Context) error {
|
||||||
|
logFunc(ctx)
|
||||||
|
logKubeletLatencyMetrics(ctx, kubeletmetrics.EvictionStatsAgeKey)
|
||||||
|
if expectedNodeCondition != noPressure && hasNodeCondition(ctx, f, expectedNodeCondition) {
|
||||||
|
return fmt.Errorf("conditions haven't returned to normal, node still has: %s", expectedNodeCondition)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}, pressureTimeout, evictionPollInterval).Should(gomega.BeNil())
|
||||||
|
|
||||||
|
ginkgo.By("checking for stable, pressure-free condition without unexpected pod failures")
|
||||||
|
gomega.Consistently(ctx, func(ctx context.Context) error {
|
||||||
|
if expectedNodeCondition != noPressure && hasNodeCondition(ctx, f, expectedNodeCondition) {
|
||||||
|
return fmt.Errorf("condition %s disappeared and then reappeared", expectedNodeCondition)
|
||||||
|
}
|
||||||
|
logFunc(ctx)
|
||||||
|
logKubeletLatencyMetrics(ctx, kubeletmetrics.EvictionStatsAgeKey)
|
||||||
|
return verifyEvictionOrdering(ctx, f, testSpecs)
|
||||||
|
}, postTestConditionMonitoringPeriod, evictionPollInterval).Should(gomega.Succeed())
|
||||||
|
})
|
||||||
|
|
||||||
|
ginkgo.AfterEach(func(ctx context.Context) {
|
||||||
|
prePullImagesIfNecessary := func() {
|
||||||
|
if expectedNodeCondition == v1.NodeDiskPressure && framework.TestContext.PrepullImages {
|
||||||
|
// The disk eviction test may cause the pre-pulled images to be evicted,
|
||||||
|
// so pre-pull those images again to ensure this test does not affect subsequent tests.
|
||||||
|
err := PrePullAllImages()
|
||||||
|
framework.ExpectNoError(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Run pre-pull for images using a `defer` to ensure that images are pulled even when the subsequent assertions fail.
|
||||||
|
defer prePullImagesIfNecessary()
|
||||||
|
|
||||||
|
ginkgo.By("deleting pods")
|
||||||
|
for _, spec := range testSpecs {
|
||||||
|
ginkgo.By(fmt.Sprintf("deleting pod: %s", spec.pod.Name))
|
||||||
|
e2epod.NewPodClient(f).DeleteSync(ctx, spec.pod.Name, metav1.DeleteOptions{}, 10*time.Minute)
|
||||||
|
}
|
||||||
|
|
||||||
|
// In case a test fails before verifying that NodeCondition no longer exist on the node,
|
||||||
|
// we should wait for the NodeCondition to disappear.
|
||||||
|
ginkgo.By(fmt.Sprintf("making sure NodeCondition %s no longer exists on the node", expectedNodeCondition))
|
||||||
|
gomega.Eventually(ctx, func(ctx context.Context) error {
|
||||||
|
if expectedNodeCondition != noPressure && hasNodeCondition(ctx, f, expectedNodeCondition) {
|
||||||
|
return fmt.Errorf("conditions haven't returned to normal, node still has: %s", expectedNodeCondition)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}, pressureDisappearTimeout, evictionPollInterval).Should(gomega.BeNil())
|
||||||
|
|
||||||
|
reduceAllocatableMemoryUsageIfCgroupv1()
|
||||||
|
ginkgo.By("making sure we have all the required images for testing")
|
||||||
|
prePullImagesIfNecessary()
|
||||||
|
|
||||||
|
// Ensure that the NodeCondition hasn't returned after pulling images.
|
||||||
|
ginkgo.By(fmt.Sprintf("making sure NodeCondition %s doesn't exist again after pulling images", expectedNodeCondition))
|
||||||
|
gomega.Eventually(ctx, func(ctx context.Context) error {
|
||||||
|
if expectedNodeCondition != noPressure && hasNodeCondition(ctx, f, expectedNodeCondition) {
|
||||||
|
return fmt.Errorf("conditions haven't returned to normal, node still has: %s", expectedNodeCondition)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}, pressureDisappearTimeout, evictionPollInterval).Should(gomega.BeNil())
|
||||||
|
|
||||||
|
ginkgo.By("making sure we can start a new pod after the test")
|
||||||
|
podName := "test-admit-pod"
|
||||||
|
e2epod.NewPodClient(f).CreateSync(ctx, &v1.Pod{
|
||||||
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
|
Name: podName,
|
||||||
|
},
|
||||||
|
Spec: v1.PodSpec{
|
||||||
|
RestartPolicy: v1.RestartPolicyNever,
|
||||||
|
Containers: []v1.Container{
|
||||||
|
{
|
||||||
|
Image: imageutils.GetPauseImageName(),
|
||||||
|
Name: podName,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
if ginkgo.CurrentSpecReport().Failed() {
|
||||||
|
if framework.TestContext.DumpLogsOnFailure {
|
||||||
|
logPodEvents(ctx, f)
|
||||||
|
logNodeEvents(ctx, f)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func runDDOnFilesystem(diskToPressure, sizeOfPressure string) error {
|
||||||
|
script := strings.Split(fmt.Sprintf("if=/dev/zero of=%s/file.txt bs=1M count=%s", diskToPressure, sizeOfPressure), " ")
|
||||||
|
ginkgo.By(fmt.Sprintf("running dd with %s", fmt.Sprintf("if=/dev/zero of=%s/file.txt bs=1M count=%s", diskToPressure, sizeOfPressure)))
|
||||||
|
cmd := exec.Command("dd", script...)
|
||||||
|
output, err := cmd.CombinedOutput()
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println(string(output))
|
||||||
|
fmt.Println(err)
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func removeDiskPressure(diskToPressure string) error {
|
||||||
|
fileToRemove := fmt.Sprintf("%s/file.txt", diskToPressure)
|
||||||
|
ginkgo.By(fmt.Sprintf("calling rm %s", fileToRemove))
|
||||||
|
cmd := exec.Command("rm", fileToRemove)
|
||||||
|
_, err := cmd.CombinedOutput()
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func hasSplitFileSystem(ctx context.Context) bool {
|
||||||
|
_, is, err := getCRIClient()
|
||||||
|
framework.ExpectNoError(err)
|
||||||
|
resp, err := is.ImageFsInfo(ctx)
|
||||||
|
framework.ExpectNoError(err)
|
||||||
|
if resp.ContainerFilesystems == nil || resp.ImageFilesystems == nil || len(resp.ContainerFilesystems) == 0 || len(resp.ImageFilesystems) == 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if resp.ContainerFilesystems[0].FsId != nil && resp.ImageFilesystems[0].FsId != nil {
|
||||||
|
return resp.ContainerFilesystems[0].FsId.Mountpoint != resp.ImageFilesystems[0].FsId.Mountpoint
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user