diff --git a/hack/jenkins/e2e.sh b/hack/jenkins/e2e.sh index 225d9bb08bc..c8917972f2a 100755 --- a/hack/jenkins/e2e.sh +++ b/hack/jenkins/e2e.sh @@ -197,6 +197,7 @@ GCE_PARALLEL_SKIP_TESTS=( "Resource\susage\sof\ssystem\scontainers" "SchedulerPredicates" "resource\susage\stracking" + "NodeOutOfDisk" "${DISRUPTIVE_TESTS[@]}" ) diff --git a/test/e2e/es_cluster_logging.go b/test/e2e/es_cluster_logging.go index d2a6bca38c9..41a3b890bab 100644 --- a/test/e2e/es_cluster_logging.go +++ b/test/e2e/es_cluster_logging.go @@ -185,7 +185,7 @@ func ClusterLevelLoggingWithElasticsearch(f *Framework) { // Previous tests may have cause failures of some nodes. Let's skip // 'Not Ready' nodes, just in case (there is no need to fail the test). filterNodes(nodes, func(node api.Node) bool { - return isNodeReadySetAsExpected(&node, true) + return isNodeConditionSetAsExpected(&node, api.NodeReady, true) }) if len(nodes.Items) < 2 { Failf("Less than two nodes were found Ready: %d", len(nodes.Items)) diff --git a/test/e2e/networking.go b/test/e2e/networking.go index 84a4fcacd19..00ef24588b5 100644 --- a/test/e2e/networking.go +++ b/test/e2e/networking.go @@ -145,7 +145,7 @@ var _ = Describe("Networking", func() { // previous tests may have cause failures of some nodes. Let's skip // 'Not Ready' nodes, just in case (there is no need to fail the test). filterNodes(nodes, func(node api.Node) bool { - return isNodeReadySetAsExpected(&node, true) + return isNodeConditionSetAsExpected(&node, api.NodeReady, true) }) if len(nodes.Items) == 0 { diff --git a/test/e2e/nodeoutofdisk.go b/test/e2e/nodeoutofdisk.go new file mode 100644 index 00000000000..2ff607a1a5e --- /dev/null +++ b/test/e2e/nodeoutofdisk.go @@ -0,0 +1,245 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +import ( + "encoding/json" + "fmt" + "time" + + cadvisorapi "github.com/google/cadvisor/info/v1" + "k8s.io/kubernetes/pkg/api" + "k8s.io/kubernetes/pkg/api/resource" + client "k8s.io/kubernetes/pkg/client/unversioned" + "k8s.io/kubernetes/pkg/fields" + "k8s.io/kubernetes/pkg/labels" + "k8s.io/kubernetes/pkg/util/wait" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" +) + +const ( + mb = 1024 * 1024 + gb = 1024 * mb + + // TODO(madhusudancs): find a way to query kubelet's disk space manager to obtain this value. 256MB + // is the default that is set today. This test might break if the default value changes. This value + // can be configured by setting the "low-diskspace-threshold-mb" flag while starting a kubelet. + // However, kubelets are started as part of the cluster start up, once, before any e2e test is run, + // and remain unchanged until all the tests are run and the cluster is brought down. Changing the + // flag value affects all the e2e tests. So we are hard-coding this value for now. + lowDiskSpaceThreshold uint64 = 256 * mb + + nodeOODTimeOut = 1 * time.Minute + + numNodeOODPods = 3 +) + +// Plan: +// 1. Fill disk space on all nodes except one. One node is left out so that we can schedule pods +// on that node. Arbitrarily choose that node to be node with index 0. +// 2. Get the CPU capacity on unfilled node. +// 3. Divide the CPU capacity into one less than the number of pods we want to schedule. We want +// to schedule 3 pods, so divide CPU capacity by 2. +// 4. Request the divided capacity for each pod. +// 5. Observe that 2 of the pods schedule onto the node whose disk is not full, and the remaining +// pod stays pending and does not schedule onto the nodes whose disks are full nor the node +// with the other two pods, since there is not enough free CPU capacity there. +// 6. Recover disk space from one of the nodes whose disk space was previously filled. Arbritrarily +// choose that node to be node with index 1. +// 7. Observe that the pod in pending status schedules on that node. +// +var _ = Describe("NodeOutOfDisk", func() { + var c *client.Client + var unfilledNodeName, recoveredNodeName string + framework := Framework{BaseName: "node-outofdisk"} + + BeforeEach(func() { + framework.beforeEach() + c = framework.Client + + nodelist, err := listNodes(c, labels.Everything(), fields.Everything()) + expectNoError(err, "Error retrieving nodes") + Expect(len(nodelist.Items)).To(BeNumerically(">", 1)) + + unfilledNodeName = nodelist.Items[0].Name + for _, node := range nodelist.Items[1:] { + fillDiskSpace(c, &node) + } + }) + + AfterEach(func() { + defer framework.afterEach() + + nodelist, err := listNodes(c, labels.Everything(), fields.Everything()) + expectNoError(err, "Error retrieving nodes") + Expect(len(nodelist.Items)).ToNot(BeZero()) + for _, node := range nodelist.Items { + if unfilledNodeName == node.Name || recoveredNodeName == node.Name { + continue + } + recoverDiskSpace(c, &node) + } + }) + + It("runs out of disk space", func() { + unfilledNode, err := c.Nodes().Get(unfilledNodeName) + expectNoError(err) + + By(fmt.Sprintf("Get CPU capacity on node %s", unfilledNode.Name)) + + milliCPU := unfilledNode.Status.Capacity.Cpu().MilliValue() + // Per pod CPU should be just enough to fit only (numNodeOODPods - 1) pods on the + // given node. We compute this value by dividing the available CPU capacity on the given + // node by (numNodeOODPods - 1) and subtracting ϵ from it. + podCPU := (milliCPU / (numNodeOODPods - 1)) - (milliCPU / 5) + + ns := framework.Namespace.Name + podClient := c.Pods(ns) + + By("Creating pods and waiting for all but one pods to be scheduled") + + for i := 0; i < numNodeOODPods-1; i++ { + name := fmt.Sprintf("pod-node-outofdisk-%d", i) + createOutOfDiskPod(c, ns, name, podCPU) + + expectNoError(framework.WaitForPodRunning(name)) + pod, err := podClient.Get(name) + expectNoError(err) + Expect(pod.Spec.NodeName).To(Equal(unfilledNodeName)) + } + + pendingPodName := fmt.Sprintf("pod-node-outofdisk-%d", numNodeOODPods-1) + createOutOfDiskPod(c, ns, pendingPodName, podCPU) + + By(fmt.Sprintf("Finding a failed scheduler event for pod %s", pendingPodName)) + wait.Poll(2*time.Second, 5*time.Minute, func() (bool, error) { + schedEvents, err := c.Events(ns).List( + labels.Everything(), + fields.Set{ + "involvedObject.kind": "Pod", + "involvedObject.name": pendingPodName, + "involvedObject.namespace": ns, + "source": "scheduler", + "reason": "FailedScheduling", + }.AsSelector()) + expectNoError(err) + + if len(schedEvents.Items) > 0 { + return true, nil + } else { + return false, nil + } + }) + + nodelist, err := listNodes(c, labels.Everything(), fields.Everything()) + expectNoError(err, "Error retrieving nodes") + Expect(len(nodelist.Items)).To(BeNumerically(">", 1)) + + nodeToRecover := nodelist.Items[1] + Expect(nodeToRecover.Name).ToNot(Equal(unfilledNodeName)) + + By(fmt.Sprintf("Recovering disk space on node %s", nodeToRecover.Name)) + recoverDiskSpace(c, &nodeToRecover) + recoveredNodeName = nodeToRecover.Name + + By(fmt.Sprintf("Verifying that pod %s schedules on node %s", pendingPodName, recoveredNodeName)) + expectNoError(framework.WaitForPodRunning(pendingPodName)) + pendingPod, err := podClient.Get(pendingPodName) + expectNoError(err) + Expect(pendingPod.Spec.NodeName).To(Equal(recoveredNodeName)) + }) +}) + +// createOutOfDiskPod creates a pod in the given namespace with the requested amount of CPU. +func createOutOfDiskPod(c *client.Client, ns, name string, milliCPU int64) { + podClient := c.Pods(ns) + + pod := &api.Pod{ + ObjectMeta: api.ObjectMeta{ + Name: name, + }, + Spec: api.PodSpec{ + Containers: []api.Container{ + { + Name: "pause", + Image: "beta.gcr.io/google_containers/pause:2.0", + Resources: api.ResourceRequirements{ + Requests: api.ResourceList{ + // Request enough CPU to fit only two pods on a given node. + api.ResourceCPU: *resource.NewMilliQuantity(milliCPU, resource.DecimalSI), + }, + }, + }, + }, + }, + } + + _, err := podClient.Create(pod) + expectNoError(err) +} + +// availSize returns the available disk space on a given node by querying node stats which +// is in turn obtained internally from cadvisor. +func availSize(c *client.Client, node *api.Node) (uint64, error) { + statsResource := fmt.Sprintf("api/v1/proxy/nodes/%s/stats/", node.Name) + Logf("Querying stats for node %s using url %s", node.Name, statsResource) + res, err := c.Get().AbsPath(statsResource).Timeout(timeout).Do().Raw() + if err != nil { + return 0, fmt.Errorf("error querying cAdvisor API: %v", err) + } + ci := cadvisorapi.ContainerInfo{} + err = json.Unmarshal(res, &ci) + if err != nil { + return 0, fmt.Errorf("couldn't unmarshal container info: %v", err) + } + return ci.Stats[len(ci.Stats)-1].Filesystem[0].Available, nil +} + +// fillDiskSpace fills the available disk space on a given node by creating a large file. The disk +// space on the node is filled in such a way that the available space after filling the disk is just +// below the lowDiskSpaceThreshold mark. +func fillDiskSpace(c *client.Client, node *api.Node) { + avail, err := availSize(c, node) + expectNoError(err, "Node %s: couldn't obtain available disk size %v", node.Name, err) + + fillSize := (avail - lowDiskSpaceThreshold + (100 * mb)) + + Logf("Node %s: disk space available %d bytes", node.Name, avail) + By(fmt.Sprintf("Node %s: creating a file of size %d bytes to fill the available disk space", node.Name, fillSize)) + + cmd := fmt.Sprintf("fallocate -l %d test.img", fillSize) + expectNoError(issueSSHCommand(cmd, testContext.Provider, node)) + + ood := waitForNodeToBe(c, node.Name, api.NodeOutOfDisk, true, nodeOODTimeOut) + Expect(ood).To(BeTrue(), "Node %s did not run out of disk within %v", node.Name, nodeOODTimeOut) + + avail, err = availSize(c, node) + Logf("Node %s: disk space available %d bytes", node.Name, avail) + Expect(avail < lowDiskSpaceThreshold).To(BeTrue()) +} + +// recoverDiskSpace recovers disk space, filled by creating a large file, on a given node. +func recoverDiskSpace(c *client.Client, node *api.Node) { + By(fmt.Sprintf("Recovering disk space on node %s", node.Name)) + cmd := "rm -f test.img" + expectNoError(issueSSHCommand(cmd, testContext.Provider, node)) + + ood := waitForNodeToBe(c, node.Name, api.NodeOutOfDisk, false, nodeOODTimeOut) + Expect(ood).To(BeTrue(), "Node %s's out of disk condition status did not change to false within %v", node.Name, nodeOODTimeOut) +} diff --git a/test/e2e/reboot.go b/test/e2e/reboot.go index ee44fe6992d..cec051a0a06 100644 --- a/test/e2e/reboot.go +++ b/test/e2e/reboot.go @@ -149,25 +149,6 @@ func testReboot(c *client.Client, rebootCmd string) { } } -func issueSSHCommand(node *api.Node, provider, cmd string) error { - Logf("Getting external IP address for %s", node.Name) - host := "" - for _, a := range node.Status.Addresses { - if a.Type == api.NodeExternalIP { - host = a.Address + ":22" - break - } - } - if host == "" { - return fmt.Errorf("couldn't find external IP address for node %s", node.Name) - } - Logf("Calling %s on %s", cmd, node.Name) - if _, _, code, err := SSH(cmd, host, provider); code != 0 || err != nil { - return fmt.Errorf("when running %s on %s, got %d and %v", cmd, node.Name, code, err) - } - return nil -} - // rebootNode takes node name on provider through the following steps using c: // - ensures the node is ready // - ensures all pods on the node are running and ready @@ -222,7 +203,7 @@ func rebootNode(c *client.Client, provider, name, rebootCmd string) bool { } // Reboot the node. - if err = issueSSHCommand(node, provider, rebootCmd); err != nil { + if err = issueSSHCommand(rebootCmd, provider, node); err != nil { Logf("Error while issuing ssh command: %v", err) return false } diff --git a/test/e2e/resize_nodes.go b/test/e2e/resize_nodes.go index 06cc68ec254..aa264203175 100644 --- a/test/e2e/resize_nodes.go +++ b/test/e2e/resize_nodes.go @@ -354,7 +354,7 @@ func performTemporaryNetworkFailure(c *client.Client, ns, rcName string, replica }() Logf("Waiting %v to ensure node %s is ready before beginning test...", resizeNodeReadyTimeout, node.Name) - if !waitForNodeToBe(c, node.Name, true, resizeNodeReadyTimeout) { + if !waitForNodeToBe(c, node.Name, api.NodeReady, true, resizeNodeReadyTimeout) { Failf("Node %s did not become ready within %v", node.Name, resizeNodeReadyTimeout) } @@ -370,7 +370,7 @@ func performTemporaryNetworkFailure(c *client.Client, ns, rcName string, replica } Logf("Waiting %v for node %s to be not ready after simulated network failure", resizeNodeNotReadyTimeout, node.Name) - if !waitForNodeToBe(c, node.Name, false, resizeNodeNotReadyTimeout) { + if !waitForNodeToBe(c, node.Name, api.NodeReady, false, resizeNodeNotReadyTimeout) { Failf("Node %s did not become not-ready within %v", node.Name, resizeNodeNotReadyTimeout) } diff --git a/test/e2e/util.go b/test/e2e/util.go index 49a7ccdaf94..3f780c88543 100644 --- a/test/e2e/util.go +++ b/test/e2e/util.go @@ -1956,6 +1956,25 @@ func sshCore(cmd, host, provider string, verbose bool) (string, string, int, err return stdout, stderr, code, err } +func issueSSHCommand(cmd, provider string, node *api.Node) error { + Logf("Getting external IP address for %s", node.Name) + host := "" + for _, a := range node.Status.Addresses { + if a.Type == api.NodeExternalIP { + host = a.Address + ":22" + break + } + } + if host == "" { + return fmt.Errorf("couldn't find external IP address for node %s", node.Name) + } + Logf("Calling %s on %s", cmd, node.Name) + if _, _, code, err := SSH(cmd, host, provider); code != 0 || err != nil { + return fmt.Errorf("when running %s on %s, got %d and %v", cmd, node.Name, code, err) + } + return nil +} + // NewHostExecPodSpec returns the pod spec of hostexec pod func NewHostExecPodSpec(ns, name string) *api.Pod { pod := &api.Pod{ @@ -2058,38 +2077,37 @@ func checkPodsRunningReady(c *client.Client, ns string, podNames []string, timeo // waitForNodeToBeReady returns whether node name is ready within timeout. func waitForNodeToBeReady(c *client.Client, name string, timeout time.Duration) bool { - return waitForNodeToBe(c, name, true, timeout) + return waitForNodeToBe(c, name, api.NodeReady, true, timeout) } // waitForNodeToBeNotReady returns whether node name is not ready (i.e. the // readiness condition is anything but ready, e.g false or unknown) within // timeout. func waitForNodeToBeNotReady(c *client.Client, name string, timeout time.Duration) bool { - return waitForNodeToBe(c, name, false, timeout) + return waitForNodeToBe(c, name, api.NodeReady, false, timeout) } -func isNodeReadySetAsExpected(node *api.Node, wantReady bool) bool { +func isNodeConditionSetAsExpected(node *api.Node, conditionType api.NodeConditionType, wantTrue bool) bool { // Check the node readiness condition (logging all). for i, cond := range node.Status.Conditions { Logf("Node %s condition %d/%d: type: %v, status: %v, reason: %q, message: %q, last transition time: %v", node.Name, i+1, len(node.Status.Conditions), cond.Type, cond.Status, cond.Reason, cond.Message, cond.LastTransitionTime) - // Ensure that the condition type is readiness and the status - // matches as desired. - if cond.Type == api.NodeReady && (cond.Status == api.ConditionTrue) == wantReady { - Logf("Successfully found node %s readiness to be %t", node.Name, wantReady) + // Ensure that the condition type and the status matches as desired. + if cond.Type == conditionType && (cond.Status == api.ConditionTrue) == wantTrue { + Logf("Successfully found condition %s of node %s to be %t", conditionType, node.Name, wantTrue) return true } } return false } -// waitForNodeToBe returns whether node name's readiness state matches wantReady -// within timeout. If wantReady is true, it will ensure the node is ready; if -// it's false, it ensures the node is in any state other than ready (e.g. not -// ready or unknown). -func waitForNodeToBe(c *client.Client, name string, wantReady bool, timeout time.Duration) bool { - Logf("Waiting up to %v for node %s readiness to be %t", timeout, name, wantReady) +// waitForNodeToBe returns whether node "name's" condition state matches wantTrue +// within timeout. If wantTrue is true, it will ensure the node condition status +// is ConditionTrue; if it's false, it ensures the node condition is in any state +// other than ConditionTrue (e.g. not true or unknown). +func waitForNodeToBe(c *client.Client, name string, conditionType api.NodeConditionType, wantTrue bool, timeout time.Duration) bool { + Logf("Waiting up to %v for node %s condition %s to be %t", timeout, name, conditionType, wantTrue) for start := time.Now(); time.Since(start) < timeout; time.Sleep(poll) { node, err := c.Nodes().Get(name) if err != nil { @@ -2097,11 +2115,11 @@ func waitForNodeToBe(c *client.Client, name string, wantReady bool, timeout time continue } - if isNodeReadySetAsExpected(node, wantReady) { + if isNodeConditionSetAsExpected(node, conditionType, wantTrue) { return true } } - Logf("Node %s didn't reach desired readiness (%t) within %v", name, wantReady, timeout) + Logf("Node %s didn't reach desired %s condition status (%t) within %v", name, conditionType, wantTrue, timeout) return false } @@ -2117,7 +2135,7 @@ func allNodesReady(c *client.Client, timeout time.Duration) error { return false, err } for _, node := range nodes.Items { - if !isNodeReadySetAsExpected(&node, true) { + if !isNodeConditionSetAsExpected(&node, api.NodeReady, true) { notReady = append(notReady, node) } } @@ -2218,7 +2236,7 @@ func waitForClusterSize(c *client.Client, size int, timeout time.Duration) error // Filter out not-ready nodes. filterNodes(nodes, func(node api.Node) bool { - return isNodeReadySetAsExpected(&node, true) + return isNodeConditionSetAsExpected(&node, api.NodeReady, true) }) numReady := len(nodes.Items)