diff --git a/test/e2e_node/BUILD b/test/e2e_node/BUILD index c1a118189c4..964c4a4a0f6 100644 --- a/test/e2e_node/BUILD +++ b/test/e2e_node/BUILD @@ -60,6 +60,7 @@ go_test( "e2e_node_suite_test.go", "garbage_collector_test.go", "image_id_test.go", + "inode_eviction_test.go", "kubelet_test.go", "lifecycle_hook_test.go", "log_path_test.go", diff --git a/test/e2e_node/inode_eviction_test.go b/test/e2e_node/inode_eviction_test.go new file mode 100644 index 00000000000..61490dafe4e --- /dev/null +++ b/test/e2e_node/inode_eviction_test.go @@ -0,0 +1,286 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e_node + +import ( + "fmt" + "time" + + "k8s.io/kubernetes/pkg/api/v1" + "k8s.io/kubernetes/test/e2e/framework" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" +) + +// Eviction Policy is described here: +// https://github.com/kubernetes/kubernetes/blob/master/docs/proposals/kubelet-eviction.md + +const ( + postTestConditionMonitoringPeriod = 2 * time.Minute + evictionPollInterval = 5 * time.Second +) + +var _ = framework.KubeDescribe("InodeEviction [Slow] [Serial] [Disruptive]", func() { + f := framework.NewDefaultFramework("inode-eviction-test") + + podTestSpecs := []podTestSpec{ + { + evictionPriority: 1, // This pod should be evicted before the normal memory usage pod + pod: v1.Pod{ + ObjectMeta: v1.ObjectMeta{Name: "container-inode-hog-pod"}, + Spec: v1.PodSpec{ + RestartPolicy: v1.RestartPolicyNever, + Containers: []v1.Container{ + { + Image: "gcr.io/google_containers/busybox:1.24", + Name: "container-inode-hog-pod", + Command: []string{ + "sh", + "-c", // Make 100 billion small files (more than we have inodes) + "i=0; while [[ $i -lt 100000000000 ]]; do touch smallfile$i.txt; sleep 0.001; i=$((i+=1)); done;", + }, + }, + }, + }, + }, + }, + { + evictionPriority: 1, // This pod should be evicted before the normal memory usage pod + pod: v1.Pod{ + ObjectMeta: v1.ObjectMeta{Name: "volume-inode-hog-pod"}, + Spec: v1.PodSpec{ + RestartPolicy: v1.RestartPolicyNever, + Containers: []v1.Container{ + { + Image: "gcr.io/google_containers/busybox:1.24", + Name: "volume-inode-hog-pod", + Command: []string{ + "sh", + "-c", // Make 100 billion small files (more than we have inodes) + "i=0; while [[ $i -lt 100000000000 ]]; do touch /test-empty-dir-mnt/smallfile$i.txt; sleep 0.001; i=$((i+=1)); done;", + }, + VolumeMounts: []v1.VolumeMount{ + {MountPath: "/test-empty-dir-mnt", Name: "test-empty-dir"}, + }, + }, + }, + Volumes: []v1.Volume{ + {Name: "test-empty-dir", VolumeSource: v1.VolumeSource{EmptyDir: &v1.EmptyDirVolumeSource{}}}, + }, + }, + }, + }, + { + evictionPriority: 0, // This pod should never be evicted + pod: v1.Pod{ + ObjectMeta: v1.ObjectMeta{Name: "normal-memory-usage-pod"}, + Spec: v1.PodSpec{ + RestartPolicy: v1.RestartPolicyNever, + Containers: []v1.Container{ + { + Image: "gcr.io/google_containers/busybox:1.24", + Name: "normal-memory-usage-pod", + Command: []string{ + "sh", + "-c", //make one big (5 Gb) file + "dd if=/dev/urandom of=largefile bs=5000000000 count=1; while true; do sleep 5; done", + }, + }, + }, + }, + }, + }, + } + evictionTestTimeout := 10 * time.Minute + testCondition := "Disk Pressure due to Inodes" + + runEvictionTest(f, testCondition, podTestSpecs, evictionTestTimeout, hasInodePressure) +}) + +// Struct used by runEvictionTest that specifies the pod, and when that pod should be evicted, relative to other pods +type podTestSpec struct { + // 0 should never be evicted, 1 shouldn't evict before 2, etc. + // If two are ranked at 1, either is permitted to fail before the other. + // The test ends when all other than the 0 have been evicted + evictionPriority int + pod v1.Pod +} + +// runEvictionTest sets up a testing environment given the provided nodes, and checks a few things: +// It ensures that the desired testCondition is actually triggered. +// It ensures that evictionPriority 0 pods are not evicted +// It ensures that lower evictionPriority pods are always evicted before higher evictionPriority pods (2 evicted before 1, etc.) +// It ensures that all lower evictionPriority pods are eventually evicted. +// runEvictionTest then cleans up the testing environment by deleting provided nodes, and ensures that testCondition no longer exists +func runEvictionTest(f *framework.Framework, testCondition string, podTestSpecs []podTestSpec, + evictionTestTimeout time.Duration, hasPressureCondition func(*framework.Framework, string) (bool, error)) { + + Context(fmt.Sprintf("when we run containers that should cause %s", testCondition), func() { + + BeforeEach(func() { + By("seting up pods to be used by tests") + for _, spec := range podTestSpecs { + By(fmt.Sprintf("creating pod with container: %s", spec.pod.Name)) + f.PodClient().CreateSync(&spec.pod) + } + }) + + It(fmt.Sprintf("should eventually see %s, and then evict all of the correct pods", testCondition), func() { + Eventually(func() error { + hasPressure, err := hasPressureCondition(f, testCondition) + framework.ExpectNoError(err, fmt.Sprintf("checking if we have %s", testCondition)) + if hasPressure { + return nil + } + return fmt.Errorf("Condition: %s not encountered", testCondition) + }, evictionTestTimeout, evictionPollInterval).Should(BeNil()) + + Eventually(func() error { + // Gather current information + updatedPodList, err := f.ClientSet.Core().Pods(f.Namespace.Name).List(v1.ListOptions{}) + updatedPods := updatedPodList.Items + for _, p := range updatedPods { + framework.Logf("fetching pod %s; phase= %v", p.Name, p.Status.Phase) + } + _, err = hasPressureCondition(f, testCondition) + framework.ExpectNoError(err, fmt.Sprintf("checking if we have %s", testCondition)) + + By("checking eviction ordering and ensuring important pods dont fail") + done := true + for _, priorityPodSpec := range podTestSpecs { + var priorityPod v1.Pod + for _, p := range updatedPods { + if p.Name == priorityPodSpec.pod.Name { + priorityPod = p + } + } + Expect(priorityPod).NotTo(BeNil()) + + // Check eviction ordering. + // Note: it is alright for a priority 1 and priority 2 pod (for example) to fail in the same round + for _, lowPriorityPodSpec := range podTestSpecs { + var lowPriorityPod v1.Pod + for _, p := range updatedPods { + if p.Name == lowPriorityPodSpec.pod.Name { + lowPriorityPod = p + } + } + Expect(lowPriorityPod).NotTo(BeNil()) + if priorityPodSpec.evictionPriority < lowPriorityPodSpec.evictionPriority && lowPriorityPod.Status.Phase == v1.PodRunning { + Expect(priorityPod.Status.Phase).NotTo(Equal(v1.PodFailed), + fmt.Sprintf("%s pod failed before %s pod", priorityPodSpec.pod.Name, lowPriorityPodSpec.pod.Name)) + } + } + + // EvictionPriority 0 pods should not fail + if priorityPodSpec.evictionPriority == 0 { + Expect(priorityPod.Status.Phase).NotTo(Equal(v1.PodFailed), + fmt.Sprintf("%s pod failed (and shouldn't have failed)", priorityPod.Name)) + } + + // If a pod that is not evictionPriority 0 has not been evicted, we are not done + if priorityPodSpec.evictionPriority != 0 && priorityPod.Status.Phase != v1.PodFailed { + done = false + } + } + if done { + return nil + } + return fmt.Errorf("pods that caused %s have not been evicted.", testCondition) + }, evictionTestTimeout, evictionPollInterval).Should(BeNil()) + }) + + AfterEach(func() { + By("making sure conditions eventually return to normal") + Eventually(func() bool { + hasPressure, err := hasPressureCondition(f, testCondition) + framework.ExpectNoError(err, fmt.Sprintf("checking if we have %s", testCondition)) + return hasPressure + }, evictionTestTimeout, evictionPollInterval).Should(BeFalse()) + + By("making sure conditions do not return") + Consistently(func() bool { + hasPressure, err := hasPressureCondition(f, testCondition) + framework.ExpectNoError(err, fmt.Sprintf("checking if we have %s", testCondition)) + return hasPressure + }, postTestConditionMonitoringPeriod, evictionPollInterval).Should(BeFalse()) + + By("making sure we can start a new pod after the test") + podName := "test-admit-pod" + f.PodClient().Create(&v1.Pod{ + ObjectMeta: v1.ObjectMeta{ + Name: podName, + }, + Spec: v1.PodSpec{ + RestartPolicy: v1.RestartPolicyNever, + Containers: []v1.Container{ + { + Image: "gcr.io/google_containers/busybox:1.24", + Name: podName, + }, + }, + }, + }) + if CurrentGinkgoTestDescription().Failed && framework.TestContext.DumpLogsOnFailure { + logPodEvents(f) + logNodeEvents(f) + } + }) + }) +} + +// Returns TRUE if the node has disk pressure due to inodes exists on the node, FALSE otherwise +func hasInodePressure(f *framework.Framework, testCondition string) (bool, error) { + + nodeList, err := f.ClientSet.Core().Nodes().List(v1.ListOptions{}) + framework.ExpectNoError(err, "getting node list") + if len(nodeList.Items) != 1 { + return false, fmt.Errorf("expected 1 node, but see %d. List: %v", len(nodeList.Items), nodeList.Items) + } + + _, pressure := v1.GetNodeCondition(&nodeList.Items[0].Status, v1.NodeDiskPressure) + Expect(pressure).NotTo(BeNil()) + hasPressure := pressure.Status == v1.ConditionTrue + By(fmt.Sprintf("checking if pod has %s: %v", testCondition, hasPressure)) + + // Additional Logging relating to Inodes + summary, err := getNodeSummary() + if err != nil { + return false, err + } + if summary.Node.Runtime != nil && summary.Node.Runtime.ImageFs != nil && summary.Node.Runtime.ImageFs.Inodes != nil && summary.Node.Runtime.ImageFs.InodesFree != nil { + framework.Logf("imageFsInfo.Inodes: %d, imageFsInfo.InodesFree: %d", *summary.Node.Runtime.ImageFs.Inodes, *summary.Node.Runtime.ImageFs.InodesFree) + } + if summary.Node.Fs != nil && summary.Node.Fs.Inodes != nil && summary.Node.Fs.InodesFree != nil { + framework.Logf("rootFsInfo.Inodes: %d, rootFsInfo.InodesFree: %d", *summary.Node.Fs.Inodes, *summary.Node.Fs.InodesFree) + } + for _, pod := range summary.Pods { + framework.Logf("Pod: %s", pod.PodRef.Name) + for _, container := range pod.Containers { + if container.Rootfs != nil && container.Rootfs.InodesUsed != nil { + framework.Logf("--- summary Container: %s inodeUsage: %d", container.Name, *container.Rootfs.InodesUsed) + } + } + for _, volume := range pod.VolumeStats { + if volume.FsStats.InodesUsed != nil { + framework.Logf("--- summary Volume: %s inodeUsage: %d", volume.Name, *volume.FsStats.InodesUsed) + } + } + } + return hasPressure, nil +} diff --git a/test/test_owners.csv b/test/test_owners.csv index b3d5adae724..838c3c1f706 100644 --- a/test/test_owners.csv +++ b/test/test_owners.csv @@ -961,3 +961,4 @@ k8s.io/kubernetes/test/integration/storageclasses,andyzheng0831,1 k8s.io/kubernetes/test/integration/thirdparty,davidopp,1 k8s.io/kubernetes/test/list,maisem,1 kubelet Clean up pods on node kubelet should be able to delete * pods per node in *.,yujuhong,0 +"when we run containers that should cause * should eventually see *, and then evict all of the correct pods",Random-Liu,0