Merge pull request #53169 from bsalamat/fix_sched_e2e

Automatic merge from submit-queue (batch tested with PRs 51311, 52575, 53169). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Fix a scheduler flaky e2e test **What this PR does / why we need it**: Makes a scheduler e2e test that verifies the resource limit predicate more robust. **Which issue this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close that issue when PR gets merged)*: fixes #53066 **Release note**: ```release-note NONE ``` @kubernetes/sig-scheduling-pr-reviews
2025-07-23 11:50:44 +00:00 · 2017-09-28 23:18:22 -07:00 · 2017-09-28 23:18:22 -07:00 · 9a7378f567
commit 9a7378f567
parent 00ee67bdc8 e4c8eefd41
1 changed files with 72 additions and 37 deletions
--- a/test/e2e/scheduling/predicates.go
+++ b/test/e2e/scheduling/predicates.go
@ -39,7 +39,6 @@ import (
 )

 const maxNumberOfPods int64 = 10
-const minPodCPURequest int64 = 500

 var localStorageVersion = utilversion.MustParseSemantic("v1.8.0-beta.0")

@ -227,14 +226,36 @@ var _ = SIGDescribe("SchedulerPredicates [Serial]", func() {
 		verifyResult(cs, podsNeededForSaturation, 1, ns)
 	})

-	// This test verifies we don't allow scheduling of pods in a way that sum of limits of pods is greater than machines capacity.
-	// It assumes that cluster add-on pods stay stable and cannot be run in parallel with any other test that touches Nodes or Pods.
+	// This test verifies we don't allow scheduling of pods in a way that sum of
+	// limits of pods is greater than machines capacity.
+	// It assumes that cluster add-on pods stay stable and cannot be run in parallel
+	// with any other test that touches Nodes or Pods.
 	// It is so because we need to have precise control on what's running in the cluster.
+	// Test scenario:
+	// 1. Find the amount CPU resources on each node.
+	// 2. Create one pod with affinity to each node that uses 70% of the node CPU.
+	// 3. Wait for the pods to be scheduled.
+	// 4. Create another pod with no affinity to any node that need 50% of the largest node CPU.
+	// 5. Make sure this additional pod is not scheduled.
 	It("validates resource limits of pods that are allowed to run [Conformance]", func() {
+		framework.WaitForStableCluster(cs, masterNodes)
 		nodeMaxAllocatable := int64(0)
-
 		nodeToAllocatableMap := make(map[string]int64)
 		for _, node := range nodeList.Items {
+			nodeReady := false
+			for _, condition := range node.Status.Conditions {
+				if condition.Type == v1.NodeReady && condition.Status == v1.ConditionTrue {
+					nodeReady = true
+					break
+				}
+			}
+			if !nodeReady {
+				continue
+			}
+			// Apply node label to each node
+			framework.AddOrUpdateLabelOnNode(cs, node.Name, "node", node.Name)
+			framework.ExpectNodeHasLabel(cs, node.Name, "node", node.Name)
+			// Find allocatable amount of CPU.
 			allocatable, found := node.Status.Allocatable[v1.ResourceCPU]
 			Expect(found).To(Equal(true))
 			nodeToAllocatableMap[node.Name] = allocatable.MilliValue()
@ -242,7 +263,12 @@ var _ = SIGDescribe("SchedulerPredicates [Serial]", func() {
 				nodeMaxAllocatable = allocatable.MilliValue()
 			}
 		}
-		framework.WaitForStableCluster(cs, masterNodes)
+		// Clean up added labels after this test.
+		defer func() {
+			for nodeName := range nodeToAllocatableMap {
+				framework.RemoveLabelOffNode(cs, nodeName, "node")
+			}
+		}()

 		pods, err := cs.CoreV1().Pods(metav1.NamespaceAll).List(metav1.ListOptions{})
 		framework.ExpectNoError(err)
@ -254,51 +280,60 @@ var _ = SIGDescribe("SchedulerPredicates [Serial]", func() {
 			}
 		}

-		var podsNeededForSaturation int
-
-		milliCpuPerPod := nodeMaxAllocatable / maxNumberOfPods
-		if milliCpuPerPod < minPodCPURequest {
-			milliCpuPerPod = minPodCPURequest
-		}
-		framework.Logf("Using pod capacity: %vm", milliCpuPerPod)
-		for name, leftAllocatable := range nodeToAllocatableMap {
-			framework.Logf("Node: %v has cpu allocatable: %vm", name, leftAllocatable)
-			podsNeededForSaturation += (int)(leftAllocatable / milliCpuPerPod)
-		}
-
-		By(fmt.Sprintf("Starting additional %v Pods to fully saturate the cluster CPU and trying to start another one", podsNeededForSaturation))
-
-		// As the pods are distributed randomly among nodes,
-		// it can easily happen that all nodes are saturated
-		// and there is no need to create additional pods.
-		// StartPods requires at least one pod to replicate.
-		if podsNeededForSaturation > 0 {
-			framework.ExpectNoError(testutils.StartPods(cs, podsNeededForSaturation, ns, "overcommit",
-				*initPausePod(f, pausePodConfig{
-					Name:   "",
-					Labels: map[string]string{"name": ""},
-					Resources: &v1.ResourceRequirements{
-						Limits: v1.ResourceList{
-							v1.ResourceCPU: *resource.NewMilliQuantity(milliCpuPerPod, "DecimalSI"),
-						},
-						Requests: v1.ResourceList{
-							v1.ResourceCPU: *resource.NewMilliQuantity(milliCpuPerPod, "DecimalSI"),
+		By("Starting Pods to consume most of the cluster CPU.")
+		// Create one pod per node that requires 70% of the node remaining CPU.
+		fillerPods := []*v1.Pod{}
+		for nodeName, cpu := range nodeToAllocatableMap {
+			requestedCPU := cpu * 7 / 10
+			fillerPods = append(fillerPods, createPausePod(f, pausePodConfig{
+				Name: "filler-pod-" + nodeName,
+				Resources: &v1.ResourceRequirements{
+					Limits: v1.ResourceList{
+						v1.ResourceCPU: *resource.NewMilliQuantity(requestedCPU, "DecimalSI"),
+					},
+					Requests: v1.ResourceList{
+						v1.ResourceCPU: *resource.NewMilliQuantity(requestedCPU, "DecimalSI"),
+					},
+				},
+				Affinity: &v1.Affinity{
+					NodeAffinity: &v1.NodeAffinity{
+						RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
+							NodeSelectorTerms: []v1.NodeSelectorTerm{
+								{
+									MatchExpressions: []v1.NodeSelectorRequirement{
+										{
+											Key:      "node",
+											Operator: v1.NodeSelectorOpIn,
+											Values:   []string{nodeName},
+										},
+									},
+								},
+							},
 						},
 					},
-				}), true, framework.Logf))
+				},
+			}))
 		}
+		// Wait for filler pods to schedule.
+		for _, pod := range fillerPods {
+			framework.ExpectNoError(framework.WaitForPodRunningInNamespace(cs, pod))
+		}
+		By("Creating another pod that requires unavailable amount of CPU.")
+		// Create another pod that requires 50% of the largest node CPU resources.
+		// This pod should remain pending as at least 70% of CPU of other nodes in
+		// the cluster are already consumed.
 		podName := "additional-pod"
 		conf := pausePodConfig{
 			Name:   podName,
 			Labels: map[string]string{"name": "additional"},
 			Resources: &v1.ResourceRequirements{
 				Limits: v1.ResourceList{
-					v1.ResourceCPU: *resource.NewMilliQuantity(milliCpuPerPod, "DecimalSI"),
+					v1.ResourceCPU: *resource.NewMilliQuantity(nodeMaxAllocatable*5/10, "DecimalSI"),
 				},
 			},
 		}
 		WaitForSchedulerAfterAction(f, createPausePodAction(f, conf), podName, false)
-		verifyResult(cs, podsNeededForSaturation, 1, ns)
+		verifyResult(cs, len(fillerPods), 1, ns)
 	})

 	// Test Nodes does not have any label, hence it should be impossible to schedule Pod with