Fix a scheduler flaky e2e test

2025-07-27 21:47:07 +00:00 · 2017-09-28 00:44:04 -07:00 · 2017-09-28 00:44:04 -07:00 · e4c8eefd41
commit e4c8eefd41
parent ce4afa8418
1 changed files with 72 additions and 37 deletions
--- a/test/e2e/scheduling/predicates.go
+++ b/test/e2e/scheduling/predicates.go
@ -39,7 +39,6 @@ import (
 )
 const maxNumberOfPods int64 = 10
 const minPodCPURequest int64 = 500
 var localStorageVersion = utilversion.MustParseSemantic("v1.8.0-beta.0")
@ -227,14 +226,36 @@ var _ = SIGDescribe("SchedulerPredicates [Serial]", func() {
 		verifyResult(cs, podsNeededForSaturation, 1, ns)
 	})
-	// This test verifies we don't allow scheduling of pods in a way that sum of limits of pods is greater than machines capacity.
+	// This test verifies we don't allow scheduling of pods in a way that sum of
-	// It assumes that cluster add-on pods stay stable and cannot be run in parallel with any other test that touches Nodes or Pods.
+	// limits of pods is greater than machines capacity.
 	// It assumes that cluster add-on pods stay stable and cannot be run in parallel
 	// with any other test that touches Nodes or Pods.
 	// It is so because we need to have precise control on what's running in the cluster.
 	// Test scenario:
 	// 1. Find the amount CPU resources on each node.
 	// 2. Create one pod with affinity to each node that uses 70% of the node CPU.
 	// 3. Wait for the pods to be scheduled.
 	// 4. Create another pod with no affinity to any node that need 50% of the largest node CPU.
 	// 5. Make sure this additional pod is not scheduled.
 	It("validates resource limits of pods that are allowed to run [Conformance]", func() {
 		framework.WaitForStableCluster(cs, masterNodes)
 		nodeMaxAllocatable := int64(0)
 		nodeToAllocatableMap := make(map[string]int64)
 		for _, node := range nodeList.Items {
 			nodeReady := false
 			for _, condition := range node.Status.Conditions {
 				if condition.Type == v1.NodeReady && condition.Status == v1.ConditionTrue {
 					nodeReady = true
 					break
 				}
 			}
 			if !nodeReady {
 				continue
 			}
 			// Apply node label to each node
 			framework.AddOrUpdateLabelOnNode(cs, node.Name, "node", node.Name)
 			framework.ExpectNodeHasLabel(cs, node.Name, "node", node.Name)
 			// Find allocatable amount of CPU.
 			allocatable, found := node.Status.Allocatable[v1.ResourceCPU]
 			Expect(found).To(Equal(true))
 			nodeToAllocatableMap[node.Name] = allocatable.MilliValue()
@ -242,7 +263,12 @@ var _ = SIGDescribe("SchedulerPredicates [Serial]", func() {
 				nodeMaxAllocatable = allocatable.MilliValue()
 			}
 		}
-		framework.WaitForStableCluster(cs, masterNodes)
+		// Clean up added labels after this test.
 		defer func() {
 			for nodeName := range nodeToAllocatableMap {
 				framework.RemoveLabelOffNode(cs, nodeName, "node")
 			}
 		}()
 		pods, err := cs.CoreV1().Pods(metav1.NamespaceAll).List(metav1.ListOptions{})
 		framework.ExpectNoError(err)
@ -254,51 +280,60 @@ var _ = SIGDescribe("SchedulerPredicates [Serial]", func() {
 			}
 		}
-		var podsNeededForSaturation int
+		By("Starting Pods to consume most of the cluster CPU.")
-
+		// Create one pod per node that requires 70% of the node remaining CPU.
-		milliCpuPerPod := nodeMaxAllocatable / maxNumberOfPods
+		fillerPods := []*v1.Pod{}
-		if milliCpuPerPod < minPodCPURequest {
+		for nodeName, cpu := range nodeToAllocatableMap {
-			milliCpuPerPod = minPodCPURequest
+			requestedCPU := cpu * 7 / 10
-		}
+			fillerPods = append(fillerPods, createPausePod(f, pausePodConfig{
-		framework.Logf("Using pod capacity: %vm", milliCpuPerPod)
+				Name: "filler-pod-" + nodeName,
-		for name, leftAllocatable := range nodeToAllocatableMap {
+				Resources: &v1.ResourceRequirements{
-			framework.Logf("Node: %v has cpu allocatable: %vm", name, leftAllocatable)
+					Limits: v1.ResourceList{
-			podsNeededForSaturation += (int)(leftAllocatable / milliCpuPerPod)
+						v1.ResourceCPU: *resource.NewMilliQuantity(requestedCPU, "DecimalSI"),
-		}
+					},
-
+					Requests: v1.ResourceList{
-		By(fmt.Sprintf("Starting additional %v Pods to fully saturate the cluster CPU and trying to start another one", podsNeededForSaturation))
+						v1.ResourceCPU: *resource.NewMilliQuantity(requestedCPU, "DecimalSI"),
-
+					},
-		// As the pods are distributed randomly among nodes,
+				},
-		// it can easily happen that all nodes are saturated
+				Affinity: &v1.Affinity{
-		// and there is no need to create additional pods.
+					NodeAffinity: &v1.NodeAffinity{
-		// StartPods requires at least one pod to replicate.
+						RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
-		if podsNeededForSaturation > 0 {
+							NodeSelectorTerms: []v1.NodeSelectorTerm{
-			framework.ExpectNoError(testutils.StartPods(cs, podsNeededForSaturation, ns, "overcommit",
+								{
-				*initPausePod(f, pausePodConfig{
+									MatchExpressions: []v1.NodeSelectorRequirement{
-					Name:   "",
+										{
-					Labels: map[string]string{"name": ""},
+											Key:      "node",
-					Resources: &v1.ResourceRequirements{
+											Operator: v1.NodeSelectorOpIn,
-						Limits: v1.ResourceList{
+											Values:   []string{nodeName},
-							v1.ResourceCPU: *resource.NewMilliQuantity(milliCpuPerPod, "DecimalSI"),
+										},
-						},
+									},
-						Requests: v1.ResourceList{
+								},
-							v1.ResourceCPU: *resource.NewMilliQuantity(milliCpuPerPod, "DecimalSI"),
+							},
 						},
 					},
-				}), true, framework.Logf))
+				},
 			}))
 		}
 		// Wait for filler pods to schedule.
 		for _, pod := range fillerPods {
 			framework.ExpectNoError(framework.WaitForPodRunningInNamespace(cs, pod))
 		}
 		By("Creating another pod that requires unavailable amount of CPU.")
 		// Create another pod that requires 50% of the largest node CPU resources.
 		// This pod should remain pending as at least 70% of CPU of other nodes in
 		// the cluster are already consumed.
 		podName := "additional-pod"
 		conf := pausePodConfig{
 			Name:   podName,
 			Labels: map[string]string{"name": "additional"},
 			Resources: &v1.ResourceRequirements{
 				Limits: v1.ResourceList{
-					v1.ResourceCPU: *resource.NewMilliQuantity(milliCpuPerPod, "DecimalSI"),
+					v1.ResourceCPU: *resource.NewMilliQuantity(nodeMaxAllocatable*5/10, "DecimalSI"),
 				},
 			},
 		}
 		WaitForSchedulerAfterAction(f, createPausePodAction(f, conf), podName, false)
-		verifyResult(cs, podsNeededForSaturation, 1, ns)
+		verifyResult(cs, len(fillerPods), 1, ns)
 	})
 	// Test Nodes does not have any label, hence it should be impossible to schedule Pod with