diff --git a/test/e2e/framework/test_context.go b/test/e2e/framework/test_context.go index 2f0d5b6cf0d..76b3acbabd2 100644 --- a/test/e2e/framework/test_context.go +++ b/test/e2e/framework/test_context.go @@ -71,6 +71,8 @@ type TestContextType struct { NodeName string // Whether to enable the QoS Cgroup Hierarchy or not CgroupsPerQOS bool + // The hard eviction thresholds + EvictionHard string } type CloudConfig struct { @@ -150,4 +152,5 @@ func RegisterClusterFlags() { func RegisterNodeFlags() { flag.StringVar(&TestContext.NodeName, "node-name", "", "Name of the node to run tests on (node e2e suite only).") flag.BoolVar(&TestContext.CgroupsPerQOS, "cgroups-per-qos", false, "Enable creation of QoS cgroup hierarchy, if true top level QoS and pod cgroups are created.") + flag.StringVar(&TestContext.EvictionHard, "eviction-hard", "", "The hard eviction thresholds. If set, pods get evicted when the specified resources drop below the thresholds.") } diff --git a/test/e2e_node/disk_eviction_test.go b/test/e2e_node/disk_eviction_test.go new file mode 100644 index 00000000000..e32231d4235 --- /dev/null +++ b/test/e2e_node/disk_eviction_test.go @@ -0,0 +1,190 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e_node + +import ( + "fmt" + "os/exec" + "strings" + "time" + + "k8s.io/kubernetes/pkg/api" + "k8s.io/kubernetes/pkg/kubelet/dockertools" + "k8s.io/kubernetes/pkg/util/uuid" + "k8s.io/kubernetes/test/e2e/framework" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + client "k8s.io/kubernetes/pkg/client/unversioned" +) + +const ( + // podCheckInterval is the interval seconds between pod status checks. + podCheckInterval = time.Second * 2 + + dummyFile = "dummy." +) + +// TODO: Leverage dynamic Kubelet settings when it's implemented to only modify the kubelet eviction option in this test. +// To manually trigger the test on a node with disk space just over 15Gi : +// make test-e2e-node FOCUS="hard eviction test" TEST_ARGS="--eviction-hard=nodefs.available<15Gi" +var _ = framework.KubeDescribe("Kubelet Eviction Manager [FLAKY] [Serial] [Disruptive]", func() { + f := framework.NewDefaultFramework("kubelet-eviction-manager") + var podClient *framework.PodClient + var c *client.Client + var n *api.Node + + BeforeEach(func() { + podClient = f.PodClient() + c = f.Client + nodeList := framework.GetReadySchedulableNodesOrDie(c) + n = &nodeList.Items[0] + }) + + Describe("hard eviction test", func() { + Context("pod using the most disk space gets evicted when the node disk usage is above the eviction hard threshold", func() { + var busyPodName, idlePodName string + var containersToCleanUp map[string]bool + + AfterEach(func() { + podClient.Delete(busyPodName, &api.DeleteOptions{}) + podClient.Delete(idlePodName, &api.DeleteOptions{}) + for container := range containersToCleanUp { + // TODO: to be container implementation agnostic + cmd := exec.Command("docker", "rm", "-f", strings.Trim(container, dockertools.DockerPrefix)) + cmd.Run() + } + }) + + BeforeEach(func() { + if !evictionOptionIsSet() { + return + } + + busyPodName = "to-evict" + string(uuid.NewUUID()) + idlePodName = "idle" + string(uuid.NewUUID()) + containersToCleanUp = make(map[string]bool) + podClient.Create(&api.Pod{ + ObjectMeta: api.ObjectMeta{ + Name: idlePodName, + }, + Spec: api.PodSpec{ + RestartPolicy: api.RestartPolicyNever, + Containers: []api.Container{ + { + Image: ImageRegistry[pauseImage], + Name: idlePodName, + }, + }, + }, + }) + podClient.Create(&api.Pod{ + ObjectMeta: api.ObjectMeta{ + Name: busyPodName, + }, + Spec: api.PodSpec{ + RestartPolicy: api.RestartPolicyNever, + Containers: []api.Container{ + { + Image: ImageRegistry[busyBoxImage], + Name: busyPodName, + // Filling the disk + Command: []string{"sh", "-c", + fmt.Sprintf("for NUM in `seq 1 1 1000`; do dd if=/dev/urandom of=%s.$NUM bs=4000000 count=10; sleep 3; done", + dummyFile)}, + }, + }, + }, + }) + }) + + It("should evict the pod using the most disk space", func() { + if !evictionOptionIsSet() { + framework.Logf("test skipped because eviction option is not set") + return + } + + evictionOccurred := false + Eventually(func() error { + if !evictionOccurred { + podData, err := podClient.Get(busyPodName) + if err != nil { + return err + } + recordContainerId(containersToCleanUp, podData.Status.ContainerStatuses) + + err = verifyPodEviction(podData) + if err != nil { + return err + } + if !nodeHasDiskPressure(f.Client) { + return fmt.Errorf("expected disk pressure condition is not set") + } + + podData, err = podClient.Get(idlePodName) + if err != nil { + return err + } + recordContainerId(containersToCleanUp, podData.Status.ContainerStatuses) + + if podData.Status.Phase != api.PodRunning { + return fmt.Errorf("expected phase to be running. got %+v", podData.Status.Phase) + } + + evictionOccurred = true + } + + // After eviction happens the pod is evicted so eventually the node disk pressure should be gone. + if nodeHasDiskPressure(f.Client) { + return fmt.Errorf("expected disk pressure condition relief has not happened") + } + return nil + }, time.Minute*5, podCheckInterval).Should(BeNil()) + }) + }) + }) +}) + +func verifyPodEviction(podData *api.Pod) error { + if podData.Status.Phase != api.PodFailed { + return fmt.Errorf("expected phase to be failed. got %+v", podData.Status.Phase) + } + if podData.Status.Reason != "Evicted" { + return fmt.Errorf("expected failed reason to be evicted. got %+v", podData.Status.Reason) + } + return nil +} + +func nodeHasDiskPressure(c *client.Client) bool { + nodeList := framework.GetReadySchedulableNodesOrDie(c) + for _, condition := range nodeList.Items[0].Status.Conditions { + if condition.Type == api.NodeDiskPressure { + return condition.Status == api.ConditionTrue + } + } + return false +} + +func recordContainerId(containersToCleanUp map[string]bool, containerStatuses []api.ContainerStatus) { + for _, status := range containerStatuses { + containersToCleanUp[status.ContainerID] = true + } +} + +func evictionOptionIsSet() bool { + return len(framework.TestContext.EvictionHard) > 0 +} diff --git a/test/e2e_node/e2e_node_suite_test.go b/test/e2e_node/e2e_node_suite_test.go index 95193a9d540..5d5c99ab762 100644 --- a/test/e2e_node/e2e_node_suite_test.go +++ b/test/e2e_node/e2e_node_suite_test.go @@ -125,7 +125,7 @@ var _ = SynchronizedBeforeSuite(func() []byte { shared := &SharedContext{} if *startServices { - e2es = newE2eService(framework.TestContext.NodeName, framework.TestContext.CgroupsPerQOS, shared) + e2es = newE2eService(framework.TestContext.NodeName, framework.TestContext.CgroupsPerQOS, framework.TestContext.EvictionHard, shared) if err := e2es.start(); err != nil { Fail(fmt.Sprintf("Unable to start node services.\n%v", err)) } diff --git a/test/e2e_node/e2e_service.go b/test/e2e_node/e2e_service.go index d916c81665a..7db7b4a59a0 100644 --- a/test/e2e_node/e2e_service.go +++ b/test/e2e_node/e2e_service.go @@ -47,6 +47,7 @@ type e2eService struct { nodeName string logFiles map[string]logFileData cgroupsPerQOS bool + evictionHard string } type logFileData struct { @@ -61,7 +62,7 @@ const ( defaultEtcdPath = "/tmp/etcd" ) -func newE2eService(nodeName string, cgroupsPerQOS bool, context *SharedContext) *e2eService { +func newE2eService(nodeName string, cgroupsPerQOS bool, evictionHard string, context *SharedContext) *e2eService { // Special log files that need to be collected for additional debugging. var logFiles = map[string]logFileData{ "kern.log": {[]string{"/var/log/kern.log"}, []string{"-k"}}, @@ -73,6 +74,7 @@ func newE2eService(nodeName string, cgroupsPerQOS bool, context *SharedContext) nodeName: nodeName, logFiles: logFiles, cgroupsPerQOS: cgroupsPerQOS, + evictionHard: evictionHard, } } @@ -263,6 +265,8 @@ func (es *e2eService) startKubeletServer() (*killCmd, error) { "--file-check-frequency", "10s", // Check file frequently so tests won't wait too long "--v", LOG_VERBOSITY_LEVEL, "--logtostderr", "--pod-cidr=10.180.0.0/24", // Assign a fixed CIDR to the node because there is no node controller. + "--eviction-hard", es.evictionHard, + "--eviction-pressure-transition-period", "30s", ) if es.cgroupsPerQOS { cmdArgs = append(cmdArgs,