mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-20 02:11:09 +00:00
e2e_node: Fix LocalStorage and PriorityLocalStorage eviction tests
Currently the storage eviction tests fail for a few reasons: - They re-enter storage exhaustion after pulling the images during cleanup (increasing test storage reqs, and adding verification for future diagnosis) - They were timing out, as in practice it seems that eviction takes just over 10 minutes on an n1-standard in many cases. I'm raising these to 15 to provide some padding. This should ideally bring these tests to passing on CI, as they've now passed locally for me several times with the remote GCE env. Follow up work involves diagnosing why these take so long, and restructuring them to be less finicky.
This commit is contained in:
parent
9f7e079a5b
commit
a8168ed543
@ -167,17 +167,26 @@ var _ = SIGDescribe("MemoryAllocatableEviction [Slow] [Serial] [Disruptive][Node
|
||||
// Disk pressure is induced by running pods which consume disk space.
|
||||
var _ = SIGDescribe("LocalStorageEviction [Slow] [Serial] [Disruptive][NodeFeature:Eviction]", func() {
|
||||
f := framework.NewDefaultFramework("localstorage-eviction-test")
|
||||
pressureTimeout := 10 * time.Minute
|
||||
pressureTimeout := 15 * time.Minute
|
||||
expectedNodeCondition := v1.NodeDiskPressure
|
||||
expectedStarvedResource := v1.ResourceEphemeralStorage
|
||||
ginkgo.Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
|
||||
|
||||
tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
|
||||
diskConsumed := resource.MustParse("200Mi")
|
||||
summary := eventuallyGetSummary()
|
||||
availableBytes := *(summary.Node.Fs.AvailableBytes)
|
||||
initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalNodeFsAvailable): fmt.Sprintf("%d", availableBytes-uint64(diskConsumed.Value()))}
|
||||
|
||||
diskConsumedByTest := resource.MustParse("4Gi")
|
||||
availableBytesOnSystem := *(summary.Node.Fs.AvailableBytes)
|
||||
evictionThreshold := strconv.FormatUint(availableBytesOnSystem-uint64(diskConsumedByTest.Value()), 10)
|
||||
|
||||
if availableBytesOnSystem <= uint64(diskConsumedByTest.Value()) {
|
||||
e2eskipper.Skipf("Too little disk free on the host for the LocalStorageEviction test to run")
|
||||
}
|
||||
|
||||
initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalNodeFsAvailable): evictionThreshold}
|
||||
initialConfig.EvictionMinimumReclaim = map[string]string{}
|
||||
})
|
||||
|
||||
runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logDiskMetrics, []podEvictSpec{
|
||||
{
|
||||
evictionPriority: 1,
|
||||
@ -201,7 +210,7 @@ var _ = SIGDescribe("LocalStorageSoftEviction [Slow] [Serial] [Disruptive][NodeF
|
||||
expectedStarvedResource := v1.ResourceEphemeralStorage
|
||||
ginkgo.Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
|
||||
tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
|
||||
diskConsumed := resource.MustParse("200Mi")
|
||||
diskConsumed := resource.MustParse("4Gi")
|
||||
summary := eventuallyGetSummary()
|
||||
availableBytes := *(summary.Node.Fs.AvailableBytes)
|
||||
if availableBytes <= uint64(diskConsumed.Value()) {
|
||||
@ -343,14 +352,14 @@ var _ = SIGDescribe("PriorityLocalStorageEvictionOrdering [Slow] [Serial] [Disru
|
||||
f := framework.NewDefaultFramework("priority-disk-eviction-ordering-test")
|
||||
expectedNodeCondition := v1.NodeDiskPressure
|
||||
expectedStarvedResource := v1.ResourceEphemeralStorage
|
||||
pressureTimeout := 10 * time.Minute
|
||||
pressureTimeout := 15 * time.Minute
|
||||
|
||||
highPriorityClassName := f.BaseName + "-high-priority"
|
||||
highPriority := int32(999999999)
|
||||
|
||||
ginkgo.Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
|
||||
tempSetCurrentKubeletConfig(f, func(initialConfig *kubeletconfig.KubeletConfiguration) {
|
||||
diskConsumed := resource.MustParse("350Mi")
|
||||
diskConsumed := resource.MustParse("4Gi")
|
||||
summary := eventuallyGetSummary()
|
||||
availableBytes := *(summary.Node.Fs.AvailableBytes)
|
||||
if availableBytes <= uint64(diskConsumed.Value()) {
|
||||
@ -545,7 +554,7 @@ func runEvictionTest(f *framework.Framework, pressureTimeout time.Duration, expe
|
||||
|
||||
// In case a test fails before verifying that NodeCondition no longer exist on the node,
|
||||
// we should wait for the NodeCondition to disappear
|
||||
ginkgo.By(fmt.Sprintf("making sure NodeCondition %s no longer exist on the node", expectedNodeCondition))
|
||||
ginkgo.By(fmt.Sprintf("making sure NodeCondition %s no longer exists on the node", expectedNodeCondition))
|
||||
gomega.Eventually(func() error {
|
||||
if expectedNodeCondition != noPressure && hasNodeCondition(f, expectedNodeCondition) {
|
||||
return fmt.Errorf("Conditions haven't returned to normal, node still has %s", expectedNodeCondition)
|
||||
@ -557,6 +566,15 @@ func runEvictionTest(f *framework.Framework, pressureTimeout time.Duration, expe
|
||||
ginkgo.By("making sure we have all the required images for testing")
|
||||
prePullImagesIfNeccecary()
|
||||
|
||||
// Ensure that the NodeCondition hasn't returned after pulling images
|
||||
ginkgo.By(fmt.Sprintf("making sure NodeCondition %s doesn't exist again after pulling images", expectedNodeCondition))
|
||||
gomega.Eventually(func() error {
|
||||
if expectedNodeCondition != noPressure && hasNodeCondition(f, expectedNodeCondition) {
|
||||
return fmt.Errorf("Conditions haven't returned to normal, node still has %s", expectedNodeCondition)
|
||||
}
|
||||
return nil
|
||||
}, pressureDisappearTimeout, evictionPollInterval).Should(gomega.BeNil())
|
||||
|
||||
ginkgo.By("making sure we can start a new pod after the test")
|
||||
podName := "test-admit-pod"
|
||||
f.PodClient().CreateSync(&v1.Pod{
|
||||
|
Loading…
Reference in New Issue
Block a user