Merge pull request #11151 from gmarek/max_pods_e2e

Auto commit by PR queue bot
2025-08-01 15:58:37 +00:00 · 2015-08-04 15:11:13 -07:00 · 2015-08-04 15:11:13 -07:00 · 2d81775a2d
commit 2d81775a2d
parent b90663924b 36aff274f1
3 changed files with 337 additions and 159 deletions
--- a/hack/jenkins/e2e.sh
+++ b/hack/jenkins/e2e.sh
@ -105,6 +105,7 @@ GCE_PARALLEL_SKIP_TESTS=(
    "Nodes\sNetwork"
    "Nodes\sResize"
    "MaxPods"
+    "SchedulerPredicates"
    "Services.*restarting"
    "Shell.*services"
    )
--- a/test/e2e/max_pods.go
+++ b/test/e2e/max_pods.go
@ -1,159 +0,0 @@
-/*
-Copyright 2015 The Kubernetes Authors All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-package e2e
-
-import (
-	"fmt"
-	"strconv"
-	"time"
-
-	"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
-	"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
-	"github.com/GoogleCloudPlatform/kubernetes/pkg/fields"
-	"github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
-	"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
-
-	. "github.com/onsi/ginkgo"
-	. "github.com/onsi/gomega"
-)
-
-var _ = Describe("MaxPods", func() {
-	var c *client.Client
-	var nodeCount int
-	var totalPodCapacity int64
-	var RCName string
-	var ns string
-	var uuid string
-
-	BeforeEach(func() {
-		var err error
-		c, err = loadClient()
-		expectNoError(err)
-		nodes, err := c.Nodes().List(labels.Everything(), fields.Everything())
-		expectNoError(err)
-		nodeCount = len(nodes.Items)
-		Expect(nodeCount).NotTo(BeZero())
-
-		totalPodCapacity = 0
-		for _, node := range nodes.Items {
-			podCapacity, found := node.Status.Capacity["pods"]
-			Expect(found).To(Equal(true))
-			totalPodCapacity += podCapacity.Value()
-		}
-
-		err = deleteTestingNS(c)
-		expectNoError(err)
-
-		nsForTesting, err := createTestingNS("maxp", c)
-		ns = nsForTesting.Name
-		expectNoError(err)
-		uuid = string(util.NewUUID())
-	})
-
-	AfterEach(func() {
-		rc, err := c.ReplicationControllers(ns).Get(RCName)
-		if err == nil && rc.Spec.Replicas != 0 {
-			By("Cleaning up the replication controller")
-			err := DeleteRC(c, ns, RCName)
-			expectNoError(err)
-		}
-
-		By(fmt.Sprintf("Destroying namespace for this suite %v", ns))
-		if err := c.Namespaces().Delete(ns); err != nil {
-			Failf("Couldn't delete ns %s", err)
-		}
-	})
-
-	// This test verifies that max-pods flag works as advertised. It assumes that cluster add-on pods stay stable
-	// and cannot be run in parallel with any other test that touches Nodes or Pods. It is so because to check
-	// if max-pods is working we need to fully saturate the cluster and keep it in this state for few seconds.
-	It("Validates MaxPods limit number of pods that are allowed to run.", func() {
-		pods, err := c.Pods(api.NamespaceAll).List(labels.Everything(), fields.Everything())
-		expectNoError(err)
-		currentlyRunningPods := len(pods.Items)
-		podsNeededForSaturation := int(totalPodCapacity) - currentlyRunningPods
-
-		RCName = "max-pods" + strconv.FormatInt(totalPodCapacity, 10) + "-" + uuid
-
-		config := RCConfig{Client: c,
-			Image:        "gcr.io/google_containers/pause:go",
-			Name:         RCName,
-			Namespace:    ns,
-			PollInterval: 10 * time.Second,
-			Replicas:     int(podsNeededForSaturation),
-		}
-
-		By(fmt.Sprintf("Starting additional %v Pods to fully saturate the cluster and trying to start a new one", podsNeededForSaturation))
-		expectNoError(RunRC(config))
-
-		// Log the amount of pods running in the cluster.
-		// TODO: remove when test is fixed.
-		pods, err = c.Pods(api.NamespaceAll).List(labels.Everything(), fields.Set{
-			"status.phase": "Running",
-		}.AsSelector())
-		Logf("Observed %v running Pods. Need %v to fully saturate the cluster.", len(pods.Items), totalPodCapacity)
-
-		_, err = c.Pods(ns).Create(&api.Pod{
-			TypeMeta: api.TypeMeta{
-				Kind: "Pod",
-			},
-			ObjectMeta: api.ObjectMeta{
-				Name:   "additional-pod",
-				Labels: map[string]string{"name": "additional"},
-			},
-			Spec: api.PodSpec{
-				Containers: []api.Container{
-					{
-						Name:  "additional-pod",
-						Image: "gcr.io/google_containers/pause:go",
-					},
-				},
-			},
-		})
-		expectNoError(err)
-		// Wait a bit to allow scheduler to do its thing
-		time.Sleep(10 * time.Second)
-
-		allPods, err := c.Pods(api.NamespaceAll).List(labels.Everything(), fields.Everything())
-		expectNoError(err)
-		runningPods := 0
-		notRunningPods := make([]api.Pod, 0)
-		for _, pod := range allPods.Items {
-			if pod.Status.Phase == api.PodRunning {
-				runningPods += 1
-			} else {
-				notRunningPods = append(notRunningPods, pod)
-			}
-		}
-
-		schedEvents, err := c.Events(ns).List(
-			labels.Everything(),
-			fields.Set{
-				"involvedObject.kind":      "Pod",
-				"involvedObject.name":      "additional-pod",
-				"involvedObject.namespace": ns,
-				"source":                   "scheduler",
-				"reason":                   "failedScheduling",
-			}.AsSelector())
-		expectNoError(err)
-
-		Expect(runningPods).To(Equal(int(totalPodCapacity)))
-		Expect(len(notRunningPods)).To(Equal(1))
-		Expect(schedEvents.Items).ToNot(BeEmpty())
-		Expect(notRunningPods[0].Name).To(Equal("additional-pod"))
-	})
-})
--- a/test/e2e/scheduler_predicates.go
+++ b/test/e2e/scheduler_predicates.go
@ -0,0 +1,336 @@
+/*
+Copyright 2015 The Kubernetes Authors All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package e2e
+
+import (
+	"fmt"
+	"time"
+
+	"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
+	"github.com/GoogleCloudPlatform/kubernetes/pkg/api/resource"
+	"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
+	"github.com/GoogleCloudPlatform/kubernetes/pkg/fields"
+	"github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
+	"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
+
+	. "github.com/onsi/ginkgo"
+	. "github.com/onsi/gomega"
+)
+
+// Simplified version of RunRC, that does not create RC, but creates plain Pods and
+// requires passing whole Pod definition, which is needed to test various Scheduler predicates.
+func startPods(c *client.Client, replicas int, ns string, podNamePrefix string, pod api.Pod) {
+	pods, err := c.Pods(api.NamespaceAll).List(labels.Everything(), fields.Everything())
+	expectNoError(err)
+	podsRunningBefore := len(pods.Items)
+
+	for i := 0; i < replicas; i++ {
+		podName := fmt.Sprintf("%v-%v", podNamePrefix, i)
+		pod.ObjectMeta.Name = podName
+		pod.ObjectMeta.Labels["name"] = podName
+		pod.Spec.Containers[0].Name = podName
+		_, err = c.Pods(ns).Create(&pod)
+		expectNoError(err)
+	}
+
+	// Wait for pods to start running.
+	timeout := 2 * time.Minute
+	startTime := time.Now()
+	currentlyRunningPods := 0
+	for podsRunningBefore+replicas != currentlyRunningPods {
+		allPods, err := c.Pods(api.NamespaceAll).List(labels.Everything(), fields.Everything())
+		expectNoError(err)
+		runningPods := 0
+		for _, pod := range allPods.Items {
+			if pod.Status.Phase == api.PodRunning {
+				runningPods += 1
+			}
+		}
+		currentlyRunningPods = runningPods
+		if startTime.Add(timeout).Before(time.Now()) {
+			break
+		}
+		time.Sleep(5 * time.Second)
+	}
+	Expect(currentlyRunningPods).To(Equal(podsRunningBefore + replicas))
+}
+
+func getRequestedCPU(pod api.Pod) int64 {
+	var result int64
+	for _, container := range pod.Spec.Containers {
+		result += container.Resources.Limits.Cpu().MilliValue()
+	}
+	return result
+}
+
+func verifyResult(c *client.Client, podName string, ns string) {
+	allPods, err := c.Pods(api.NamespaceAll).List(labels.Everything(), fields.Everything())
+	expectNoError(err)
+	runningPods := 0
+	notRunningPods := make([]api.Pod, 0)
+	for _, pod := range allPods.Items {
+		if pod.Status.Phase == api.PodRunning {
+			runningPods += 1
+		} else {
+			notRunningPods = append(notRunningPods, pod)
+		}
+	}
+
+	schedEvents, err := c.Events(ns).List(
+		labels.Everything(),
+		fields.Set{
+			"involvedObject.kind":      "Pod",
+			"involvedObject.name":      podName,
+			"involvedObject.namespace": ns,
+			"source":                   "scheduler",
+			"reason":                   "failedScheduling",
+		}.AsSelector())
+	expectNoError(err)
+
+	printed := false
+	printOnce := func(msg string) string {
+		if !printed {
+			printed = true
+			return msg
+		} else {
+			return ""
+		}
+	}
+
+	Expect(len(notRunningPods)).To(Equal(1), printOnce(fmt.Sprintf("Pods found in the cluster: %#v", allPods)))
+	Expect(schedEvents.Items).ToNot(BeEmpty(), printOnce(fmt.Sprintf("Pods found in the cluster: %#v", allPods)))
+	Expect(notRunningPods[0].Name).To(Equal(podName), printOnce(fmt.Sprintf("Pods found in the cluster: %#v", allPods)))
+}
+
+var _ = Describe("SchedulerPredicates", func() {
+	var c *client.Client
+	var nodeList *api.NodeList
+	var nodeCount int
+	var totalPodCapacity int64
+	var RCName string
+	var ns string
+	var uuid string
+
+	BeforeEach(func() {
+		var err error
+		c, err = loadClient()
+		expectNoError(err)
+		nodeList, err = c.Nodes().List(labels.Everything(), fields.Everything())
+		expectNoError(err)
+		nodeCount = len(nodeList.Items)
+		Expect(nodeCount).NotTo(BeZero())
+
+		err = deleteTestingNS(c)
+		expectNoError(err)
+
+		nsForTesting, err := createTestingNS("sched-pred", c)
+		ns = nsForTesting.Name
+		expectNoError(err)
+		uuid = string(util.NewUUID())
+	})
+
+	AfterEach(func() {
+		rc, err := c.ReplicationControllers(ns).Get(RCName)
+		if err == nil && rc.Spec.Replicas != 0 {
+			By("Cleaning up the replication controller")
+			err := DeleteRC(c, ns, RCName)
+			expectNoError(err)
+		}
+
+		By(fmt.Sprintf("Destroying namespace for this suite %v", ns))
+		if err := c.Namespaces().Delete(ns); err != nil {
+			Failf("Couldn't delete ns %s", err)
+		}
+	})
+
+	// This test verifies that max-pods flag works as advertised. It assumes that cluster add-on pods stay stable
+	// and cannot be run in parallel with any other test that touches Nodes or Pods. It is so because to check
+	// if max-pods is working we need to fully saturate the cluster and keep it in this state for few seconds.
+	It("validates MaxPods limit number of pods that are allowed to run.", func() {
+		totalPodCapacity = 0
+
+		for _, node := range nodeList.Items {
+			podCapacity, found := node.Status.Capacity["pods"]
+			Expect(found).To(Equal(true))
+			totalPodCapacity += podCapacity.Value()
+			Logf("Node: %v", node)
+		}
+
+		pods, err := c.Pods(api.NamespaceAll).List(labels.Everything(), fields.Everything())
+		expectNoError(err)
+		currentlyRunningPods := len(pods.Items)
+		podsNeededForSaturation := int(totalPodCapacity) - currentlyRunningPods
+
+		By(fmt.Sprintf("Starting additional %v Pods to fully saturate the cluster max pods and trying to start another one", podsNeededForSaturation))
+
+		startPods(c, podsNeededForSaturation, ns, "maxp", api.Pod{
+			TypeMeta: api.TypeMeta{
+				Kind: "Pod",
+			},
+			ObjectMeta: api.ObjectMeta{
+				Name:   "",
+				Labels: map[string]string{"name": ""},
+			},
+			Spec: api.PodSpec{
+				Containers: []api.Container{
+					{
+						Name:  "",
+						Image: "gcr.io/google_containers/pause:go",
+					},
+				},
+			},
+		})
+
+		podName := "additional-pod"
+		_, err = c.Pods(ns).Create(&api.Pod{
+			TypeMeta: api.TypeMeta{
+				Kind: "Pod",
+			},
+			ObjectMeta: api.ObjectMeta{
+				Name:   podName,
+				Labels: map[string]string{"name": "additional"},
+			},
+			Spec: api.PodSpec{
+				Containers: []api.Container{
+					{
+						Name:  podName,
+						Image: "gcr.io/google_containers/pause:go",
+					},
+				},
+			},
+		})
+		expectNoError(err)
+		// Wait a bit to allow scheduler to do its thing
+		time.Sleep(10 * time.Second)
+
+		verifyResult(c, podName, ns)
+	})
+
+	// This test verifies we don't allow scheduling of pods in a way that sum of limits of pods is greater than machines capacit.
+	// It assumes that cluster add-on pods stay stable and cannot be run in parallel with any other test that touches Nodes or Pods.
+	// It is so because we need to have precise control on what's running in the cluster.
+	It("validates resource limits of pods that are allowed to run.", func() {
+		nodeToCapacityMap := make(map[string]int64)
+		for _, node := range nodeList.Items {
+			capacity, found := node.Status.Capacity["cpu"]
+			Expect(found).To(Equal(true))
+			nodeToCapacityMap[node.Name] = capacity.MilliValue()
+		}
+
+		pods, err := c.Pods(api.NamespaceAll).List(labels.Everything(), fields.Everything())
+		expectNoError(err)
+		for _, pod := range pods.Items {
+			_, found := nodeToCapacityMap[pod.Spec.NodeName]
+			Expect(found).To(Equal(true))
+			Logf("Pod %v requesting capacity %v on Node %v", pod.Name, getRequestedCPU(pod), pod.Spec.NodeName)
+			nodeToCapacityMap[pod.Spec.NodeName] -= getRequestedCPU(pod)
+		}
+
+		var podsNeededForSaturation int
+		for name, leftCapacity := range nodeToCapacityMap {
+			Logf("Node: %v has capacity: %v", name, leftCapacity)
+			podsNeededForSaturation += (int)(leftCapacity / 100)
+		}
+
+		By(fmt.Sprintf("Starting additional %v Pods to fully saturate the cluster CPU and trying to start another one", podsNeededForSaturation))
+
+		startPods(c, podsNeededForSaturation, ns, "overcommit", api.Pod{
+			TypeMeta: api.TypeMeta{
+				Kind: "Pod",
+			},
+			ObjectMeta: api.ObjectMeta{
+				Name:   "",
+				Labels: map[string]string{"name": ""},
+			},
+			Spec: api.PodSpec{
+				Containers: []api.Container{
+					{
+						Name:  "",
+						Image: "gcr.io/google_containers/pause:go",
+						Resources: api.ResourceRequirements{
+							Limits: api.ResourceList{
+								"cpu": *resource.NewMilliQuantity(100, "DecimalSI"),
+							},
+						},
+					},
+				},
+			},
+		})
+
+		podName := "additional-pod"
+		_, err = c.Pods(ns).Create(&api.Pod{
+			TypeMeta: api.TypeMeta{
+				Kind: "Pod",
+			},
+			ObjectMeta: api.ObjectMeta{
+				Name:   podName,
+				Labels: map[string]string{"name": "additional"},
+			},
+			Spec: api.PodSpec{
+				Containers: []api.Container{
+					{
+						Name:  podName,
+						Image: "gcr.io/google_containers/pause:go",
+						Resources: api.ResourceRequirements{
+							Limits: api.ResourceList{
+								"cpu": *resource.NewMilliQuantity(100, "DecimalSI"),
+							},
+						},
+					},
+				},
+			},
+		})
+		expectNoError(err)
+		// Wait a bit to allow scheduler to do its thing
+		time.Sleep(10 * time.Second)
+
+		verifyResult(c, podName, ns)
+	})
+
+	// Test Nodes does not have any label, hence it should be impossible to schedule Pod with
+	// nonempty Selector set.
+	It("validates that NodeSelector is respected.", func() {
+		By("Trying to schedule Pod with nonempty NodeSelector.")
+		podName := "restricted-pod"
+
+		_, err := c.Pods(ns).Create(&api.Pod{
+			TypeMeta: api.TypeMeta{
+				Kind: "Pod",
+			},
+			ObjectMeta: api.ObjectMeta{
+				Name:   podName,
+				Labels: map[string]string{"name": "restricted"},
+			},
+			Spec: api.PodSpec{
+				Containers: []api.Container{
+					{
+						Name:  podName,
+						Image: "gcr.io/google_containers/pause:go",
+					},
+				},
+				NodeSelector: map[string]string{
+					"label": "nonempty",
+				},
+			},
+		})
+		expectNoError(err)
+		// Wait a bit to allow scheduler to do its thing
+		time.Sleep(10 * time.Second)
+
+		verifyResult(c, podName, ns)
+	})
+})