diff --git a/hack/jenkins/e2e.sh b/hack/jenkins/e2e.sh index 8d3a0f0350d..8fd043b2f6f 100755 --- a/hack/jenkins/e2e.sh +++ b/hack/jenkins/e2e.sh @@ -105,6 +105,7 @@ GCE_PARALLEL_SKIP_TESTS=( "Nodes\sNetwork" "Nodes\sResize" "MaxPods" + "SchedulerPredicates" "Services.*restarting" "Shell.*services" ) diff --git a/test/e2e/max_pods.go b/test/e2e/max_pods.go deleted file mode 100644 index ace5dc9f7b3..00000000000 --- a/test/e2e/max_pods.go +++ /dev/null @@ -1,159 +0,0 @@ -/* -Copyright 2015 The Kubernetes Authors All rights reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package e2e - -import ( - "fmt" - "strconv" - "time" - - "github.com/GoogleCloudPlatform/kubernetes/pkg/api" - "github.com/GoogleCloudPlatform/kubernetes/pkg/client" - "github.com/GoogleCloudPlatform/kubernetes/pkg/fields" - "github.com/GoogleCloudPlatform/kubernetes/pkg/labels" - "github.com/GoogleCloudPlatform/kubernetes/pkg/util" - - . "github.com/onsi/ginkgo" - . "github.com/onsi/gomega" -) - -var _ = Describe("MaxPods", func() { - var c *client.Client - var nodeCount int - var totalPodCapacity int64 - var RCName string - var ns string - var uuid string - - BeforeEach(func() { - var err error - c, err = loadClient() - expectNoError(err) - nodes, err := c.Nodes().List(labels.Everything(), fields.Everything()) - expectNoError(err) - nodeCount = len(nodes.Items) - Expect(nodeCount).NotTo(BeZero()) - - totalPodCapacity = 0 - for _, node := range nodes.Items { - podCapacity, found := node.Status.Capacity["pods"] - Expect(found).To(Equal(true)) - totalPodCapacity += podCapacity.Value() - } - - err = deleteTestingNS(c) - expectNoError(err) - - nsForTesting, err := createTestingNS("maxp", c) - ns = nsForTesting.Name - expectNoError(err) - uuid = string(util.NewUUID()) - }) - - AfterEach(func() { - rc, err := c.ReplicationControllers(ns).Get(RCName) - if err == nil && rc.Spec.Replicas != 0 { - By("Cleaning up the replication controller") - err := DeleteRC(c, ns, RCName) - expectNoError(err) - } - - By(fmt.Sprintf("Destroying namespace for this suite %v", ns)) - if err := c.Namespaces().Delete(ns); err != nil { - Failf("Couldn't delete ns %s", err) - } - }) - - // This test verifies that max-pods flag works as advertised. It assumes that cluster add-on pods stay stable - // and cannot be run in parallel with any other test that touches Nodes or Pods. It is so because to check - // if max-pods is working we need to fully saturate the cluster and keep it in this state for few seconds. - It("Validates MaxPods limit number of pods that are allowed to run.", func() { - pods, err := c.Pods(api.NamespaceAll).List(labels.Everything(), fields.Everything()) - expectNoError(err) - currentlyRunningPods := len(pods.Items) - podsNeededForSaturation := int(totalPodCapacity) - currentlyRunningPods - - RCName = "max-pods" + strconv.FormatInt(totalPodCapacity, 10) + "-" + uuid - - config := RCConfig{Client: c, - Image: "gcr.io/google_containers/pause:go", - Name: RCName, - Namespace: ns, - PollInterval: 10 * time.Second, - Replicas: int(podsNeededForSaturation), - } - - By(fmt.Sprintf("Starting additional %v Pods to fully saturate the cluster and trying to start a new one", podsNeededForSaturation)) - expectNoError(RunRC(config)) - - // Log the amount of pods running in the cluster. - // TODO: remove when test is fixed. - pods, err = c.Pods(api.NamespaceAll).List(labels.Everything(), fields.Set{ - "status.phase": "Running", - }.AsSelector()) - Logf("Observed %v running Pods. Need %v to fully saturate the cluster.", len(pods.Items), totalPodCapacity) - - _, err = c.Pods(ns).Create(&api.Pod{ - TypeMeta: api.TypeMeta{ - Kind: "Pod", - }, - ObjectMeta: api.ObjectMeta{ - Name: "additional-pod", - Labels: map[string]string{"name": "additional"}, - }, - Spec: api.PodSpec{ - Containers: []api.Container{ - { - Name: "additional-pod", - Image: "gcr.io/google_containers/pause:go", - }, - }, - }, - }) - expectNoError(err) - // Wait a bit to allow scheduler to do its thing - time.Sleep(10 * time.Second) - - allPods, err := c.Pods(api.NamespaceAll).List(labels.Everything(), fields.Everything()) - expectNoError(err) - runningPods := 0 - notRunningPods := make([]api.Pod, 0) - for _, pod := range allPods.Items { - if pod.Status.Phase == api.PodRunning { - runningPods += 1 - } else { - notRunningPods = append(notRunningPods, pod) - } - } - - schedEvents, err := c.Events(ns).List( - labels.Everything(), - fields.Set{ - "involvedObject.kind": "Pod", - "involvedObject.name": "additional-pod", - "involvedObject.namespace": ns, - "source": "scheduler", - "reason": "failedScheduling", - }.AsSelector()) - expectNoError(err) - - Expect(runningPods).To(Equal(int(totalPodCapacity))) - Expect(len(notRunningPods)).To(Equal(1)) - Expect(schedEvents.Items).ToNot(BeEmpty()) - Expect(notRunningPods[0].Name).To(Equal("additional-pod")) - }) -}) diff --git a/test/e2e/scheduler_predicates.go b/test/e2e/scheduler_predicates.go new file mode 100644 index 00000000000..ef309985a4a --- /dev/null +++ b/test/e2e/scheduler_predicates.go @@ -0,0 +1,336 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +import ( + "fmt" + "time" + + "github.com/GoogleCloudPlatform/kubernetes/pkg/api" + "github.com/GoogleCloudPlatform/kubernetes/pkg/api/resource" + "github.com/GoogleCloudPlatform/kubernetes/pkg/client" + "github.com/GoogleCloudPlatform/kubernetes/pkg/fields" + "github.com/GoogleCloudPlatform/kubernetes/pkg/labels" + "github.com/GoogleCloudPlatform/kubernetes/pkg/util" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" +) + +// Simplified version of RunRC, that does not create RC, but creates plain Pods and +// requires passing whole Pod definition, which is needed to test various Scheduler predicates. +func startPods(c *client.Client, replicas int, ns string, podNamePrefix string, pod api.Pod) { + pods, err := c.Pods(api.NamespaceAll).List(labels.Everything(), fields.Everything()) + expectNoError(err) + podsRunningBefore := len(pods.Items) + + for i := 0; i < replicas; i++ { + podName := fmt.Sprintf("%v-%v", podNamePrefix, i) + pod.ObjectMeta.Name = podName + pod.ObjectMeta.Labels["name"] = podName + pod.Spec.Containers[0].Name = podName + _, err = c.Pods(ns).Create(&pod) + expectNoError(err) + } + + // Wait for pods to start running. + timeout := 2 * time.Minute + startTime := time.Now() + currentlyRunningPods := 0 + for podsRunningBefore+replicas != currentlyRunningPods { + allPods, err := c.Pods(api.NamespaceAll).List(labels.Everything(), fields.Everything()) + expectNoError(err) + runningPods := 0 + for _, pod := range allPods.Items { + if pod.Status.Phase == api.PodRunning { + runningPods += 1 + } + } + currentlyRunningPods = runningPods + if startTime.Add(timeout).Before(time.Now()) { + break + } + time.Sleep(5 * time.Second) + } + Expect(currentlyRunningPods).To(Equal(podsRunningBefore + replicas)) +} + +func getRequestedCPU(pod api.Pod) int64 { + var result int64 + for _, container := range pod.Spec.Containers { + result += container.Resources.Limits.Cpu().MilliValue() + } + return result +} + +func verifyResult(c *client.Client, podName string, ns string) { + allPods, err := c.Pods(api.NamespaceAll).List(labels.Everything(), fields.Everything()) + expectNoError(err) + runningPods := 0 + notRunningPods := make([]api.Pod, 0) + for _, pod := range allPods.Items { + if pod.Status.Phase == api.PodRunning { + runningPods += 1 + } else { + notRunningPods = append(notRunningPods, pod) + } + } + + schedEvents, err := c.Events(ns).List( + labels.Everything(), + fields.Set{ + "involvedObject.kind": "Pod", + "involvedObject.name": podName, + "involvedObject.namespace": ns, + "source": "scheduler", + "reason": "failedScheduling", + }.AsSelector()) + expectNoError(err) + + printed := false + printOnce := func(msg string) string { + if !printed { + printed = true + return msg + } else { + return "" + } + } + + Expect(len(notRunningPods)).To(Equal(1), printOnce(fmt.Sprintf("Pods found in the cluster: %#v", allPods))) + Expect(schedEvents.Items).ToNot(BeEmpty(), printOnce(fmt.Sprintf("Pods found in the cluster: %#v", allPods))) + Expect(notRunningPods[0].Name).To(Equal(podName), printOnce(fmt.Sprintf("Pods found in the cluster: %#v", allPods))) +} + +var _ = Describe("SchedulerPredicates", func() { + var c *client.Client + var nodeList *api.NodeList + var nodeCount int + var totalPodCapacity int64 + var RCName string + var ns string + var uuid string + + BeforeEach(func() { + var err error + c, err = loadClient() + expectNoError(err) + nodeList, err = c.Nodes().List(labels.Everything(), fields.Everything()) + expectNoError(err) + nodeCount = len(nodeList.Items) + Expect(nodeCount).NotTo(BeZero()) + + err = deleteTestingNS(c) + expectNoError(err) + + nsForTesting, err := createTestingNS("sched-pred", c) + ns = nsForTesting.Name + expectNoError(err) + uuid = string(util.NewUUID()) + }) + + AfterEach(func() { + rc, err := c.ReplicationControllers(ns).Get(RCName) + if err == nil && rc.Spec.Replicas != 0 { + By("Cleaning up the replication controller") + err := DeleteRC(c, ns, RCName) + expectNoError(err) + } + + By(fmt.Sprintf("Destroying namespace for this suite %v", ns)) + if err := c.Namespaces().Delete(ns); err != nil { + Failf("Couldn't delete ns %s", err) + } + }) + + // This test verifies that max-pods flag works as advertised. It assumes that cluster add-on pods stay stable + // and cannot be run in parallel with any other test that touches Nodes or Pods. It is so because to check + // if max-pods is working we need to fully saturate the cluster and keep it in this state for few seconds. + It("validates MaxPods limit number of pods that are allowed to run.", func() { + totalPodCapacity = 0 + + for _, node := range nodeList.Items { + podCapacity, found := node.Status.Capacity["pods"] + Expect(found).To(Equal(true)) + totalPodCapacity += podCapacity.Value() + Logf("Node: %v", node) + } + + pods, err := c.Pods(api.NamespaceAll).List(labels.Everything(), fields.Everything()) + expectNoError(err) + currentlyRunningPods := len(pods.Items) + podsNeededForSaturation := int(totalPodCapacity) - currentlyRunningPods + + By(fmt.Sprintf("Starting additional %v Pods to fully saturate the cluster max pods and trying to start another one", podsNeededForSaturation)) + + startPods(c, podsNeededForSaturation, ns, "maxp", api.Pod{ + TypeMeta: api.TypeMeta{ + Kind: "Pod", + }, + ObjectMeta: api.ObjectMeta{ + Name: "", + Labels: map[string]string{"name": ""}, + }, + Spec: api.PodSpec{ + Containers: []api.Container{ + { + Name: "", + Image: "gcr.io/google_containers/pause:go", + }, + }, + }, + }) + + podName := "additional-pod" + _, err = c.Pods(ns).Create(&api.Pod{ + TypeMeta: api.TypeMeta{ + Kind: "Pod", + }, + ObjectMeta: api.ObjectMeta{ + Name: podName, + Labels: map[string]string{"name": "additional"}, + }, + Spec: api.PodSpec{ + Containers: []api.Container{ + { + Name: podName, + Image: "gcr.io/google_containers/pause:go", + }, + }, + }, + }) + expectNoError(err) + // Wait a bit to allow scheduler to do its thing + time.Sleep(10 * time.Second) + + verifyResult(c, podName, ns) + }) + + // This test verifies we don't allow scheduling of pods in a way that sum of limits of pods is greater than machines capacit. + // It assumes that cluster add-on pods stay stable and cannot be run in parallel with any other test that touches Nodes or Pods. + // It is so because we need to have precise control on what's running in the cluster. + It("validates resource limits of pods that are allowed to run.", func() { + nodeToCapacityMap := make(map[string]int64) + for _, node := range nodeList.Items { + capacity, found := node.Status.Capacity["cpu"] + Expect(found).To(Equal(true)) + nodeToCapacityMap[node.Name] = capacity.MilliValue() + } + + pods, err := c.Pods(api.NamespaceAll).List(labels.Everything(), fields.Everything()) + expectNoError(err) + for _, pod := range pods.Items { + _, found := nodeToCapacityMap[pod.Spec.NodeName] + Expect(found).To(Equal(true)) + Logf("Pod %v requesting capacity %v on Node %v", pod.Name, getRequestedCPU(pod), pod.Spec.NodeName) + nodeToCapacityMap[pod.Spec.NodeName] -= getRequestedCPU(pod) + } + + var podsNeededForSaturation int + for name, leftCapacity := range nodeToCapacityMap { + Logf("Node: %v has capacity: %v", name, leftCapacity) + podsNeededForSaturation += (int)(leftCapacity / 100) + } + + By(fmt.Sprintf("Starting additional %v Pods to fully saturate the cluster CPU and trying to start another one", podsNeededForSaturation)) + + startPods(c, podsNeededForSaturation, ns, "overcommit", api.Pod{ + TypeMeta: api.TypeMeta{ + Kind: "Pod", + }, + ObjectMeta: api.ObjectMeta{ + Name: "", + Labels: map[string]string{"name": ""}, + }, + Spec: api.PodSpec{ + Containers: []api.Container{ + { + Name: "", + Image: "gcr.io/google_containers/pause:go", + Resources: api.ResourceRequirements{ + Limits: api.ResourceList{ + "cpu": *resource.NewMilliQuantity(100, "DecimalSI"), + }, + }, + }, + }, + }, + }) + + podName := "additional-pod" + _, err = c.Pods(ns).Create(&api.Pod{ + TypeMeta: api.TypeMeta{ + Kind: "Pod", + }, + ObjectMeta: api.ObjectMeta{ + Name: podName, + Labels: map[string]string{"name": "additional"}, + }, + Spec: api.PodSpec{ + Containers: []api.Container{ + { + Name: podName, + Image: "gcr.io/google_containers/pause:go", + Resources: api.ResourceRequirements{ + Limits: api.ResourceList{ + "cpu": *resource.NewMilliQuantity(100, "DecimalSI"), + }, + }, + }, + }, + }, + }) + expectNoError(err) + // Wait a bit to allow scheduler to do its thing + time.Sleep(10 * time.Second) + + verifyResult(c, podName, ns) + }) + + // Test Nodes does not have any label, hence it should be impossible to schedule Pod with + // nonempty Selector set. + It("validates that NodeSelector is respected.", func() { + By("Trying to schedule Pod with nonempty NodeSelector.") + podName := "restricted-pod" + + _, err := c.Pods(ns).Create(&api.Pod{ + TypeMeta: api.TypeMeta{ + Kind: "Pod", + }, + ObjectMeta: api.ObjectMeta{ + Name: podName, + Labels: map[string]string{"name": "restricted"}, + }, + Spec: api.PodSpec{ + Containers: []api.Container{ + { + Name: podName, + Image: "gcr.io/google_containers/pause:go", + }, + }, + NodeSelector: map[string]string{ + "label": "nonempty", + }, + }, + }) + expectNoError(err) + // Wait a bit to allow scheduler to do its thing + time.Sleep(10 * time.Second) + + verifyResult(c, podName, ns) + }) +})