Fix a scheduler flaky e2e test

This commit is contained in:
Bobby (Babak) Salamat 2017-09-28 00:44:04 -07:00
parent ce4afa8418
commit e4c8eefd41

View File

@ -39,7 +39,6 @@ import (
) )
const maxNumberOfPods int64 = 10 const maxNumberOfPods int64 = 10
const minPodCPURequest int64 = 500
var localStorageVersion = utilversion.MustParseSemantic("v1.8.0-beta.0") var localStorageVersion = utilversion.MustParseSemantic("v1.8.0-beta.0")
@ -227,14 +226,36 @@ var _ = SIGDescribe("SchedulerPredicates [Serial]", func() {
verifyResult(cs, podsNeededForSaturation, 1, ns) verifyResult(cs, podsNeededForSaturation, 1, ns)
}) })
// This test verifies we don't allow scheduling of pods in a way that sum of limits of pods is greater than machines capacity. // This test verifies we don't allow scheduling of pods in a way that sum of
// It assumes that cluster add-on pods stay stable and cannot be run in parallel with any other test that touches Nodes or Pods. // limits of pods is greater than machines capacity.
// It assumes that cluster add-on pods stay stable and cannot be run in parallel
// with any other test that touches Nodes or Pods.
// It is so because we need to have precise control on what's running in the cluster. // It is so because we need to have precise control on what's running in the cluster.
// Test scenario:
// 1. Find the amount CPU resources on each node.
// 2. Create one pod with affinity to each node that uses 70% of the node CPU.
// 3. Wait for the pods to be scheduled.
// 4. Create another pod with no affinity to any node that need 50% of the largest node CPU.
// 5. Make sure this additional pod is not scheduled.
It("validates resource limits of pods that are allowed to run [Conformance]", func() { It("validates resource limits of pods that are allowed to run [Conformance]", func() {
framework.WaitForStableCluster(cs, masterNodes)
nodeMaxAllocatable := int64(0) nodeMaxAllocatable := int64(0)
nodeToAllocatableMap := make(map[string]int64) nodeToAllocatableMap := make(map[string]int64)
for _, node := range nodeList.Items { for _, node := range nodeList.Items {
nodeReady := false
for _, condition := range node.Status.Conditions {
if condition.Type == v1.NodeReady && condition.Status == v1.ConditionTrue {
nodeReady = true
break
}
}
if !nodeReady {
continue
}
// Apply node label to each node
framework.AddOrUpdateLabelOnNode(cs, node.Name, "node", node.Name)
framework.ExpectNodeHasLabel(cs, node.Name, "node", node.Name)
// Find allocatable amount of CPU.
allocatable, found := node.Status.Allocatable[v1.ResourceCPU] allocatable, found := node.Status.Allocatable[v1.ResourceCPU]
Expect(found).To(Equal(true)) Expect(found).To(Equal(true))
nodeToAllocatableMap[node.Name] = allocatable.MilliValue() nodeToAllocatableMap[node.Name] = allocatable.MilliValue()
@ -242,7 +263,12 @@ var _ = SIGDescribe("SchedulerPredicates [Serial]", func() {
nodeMaxAllocatable = allocatable.MilliValue() nodeMaxAllocatable = allocatable.MilliValue()
} }
} }
framework.WaitForStableCluster(cs, masterNodes) // Clean up added labels after this test.
defer func() {
for nodeName := range nodeToAllocatableMap {
framework.RemoveLabelOffNode(cs, nodeName, "node")
}
}()
pods, err := cs.CoreV1().Pods(metav1.NamespaceAll).List(metav1.ListOptions{}) pods, err := cs.CoreV1().Pods(metav1.NamespaceAll).List(metav1.ListOptions{})
framework.ExpectNoError(err) framework.ExpectNoError(err)
@ -254,51 +280,60 @@ var _ = SIGDescribe("SchedulerPredicates [Serial]", func() {
} }
} }
var podsNeededForSaturation int By("Starting Pods to consume most of the cluster CPU.")
// Create one pod per node that requires 70% of the node remaining CPU.
milliCpuPerPod := nodeMaxAllocatable / maxNumberOfPods fillerPods := []*v1.Pod{}
if milliCpuPerPod < minPodCPURequest { for nodeName, cpu := range nodeToAllocatableMap {
milliCpuPerPod = minPodCPURequest requestedCPU := cpu * 7 / 10
} fillerPods = append(fillerPods, createPausePod(f, pausePodConfig{
framework.Logf("Using pod capacity: %vm", milliCpuPerPod) Name: "filler-pod-" + nodeName,
for name, leftAllocatable := range nodeToAllocatableMap { Resources: &v1.ResourceRequirements{
framework.Logf("Node: %v has cpu allocatable: %vm", name, leftAllocatable) Limits: v1.ResourceList{
podsNeededForSaturation += (int)(leftAllocatable / milliCpuPerPod) v1.ResourceCPU: *resource.NewMilliQuantity(requestedCPU, "DecimalSI"),
} },
Requests: v1.ResourceList{
By(fmt.Sprintf("Starting additional %v Pods to fully saturate the cluster CPU and trying to start another one", podsNeededForSaturation)) v1.ResourceCPU: *resource.NewMilliQuantity(requestedCPU, "DecimalSI"),
},
// As the pods are distributed randomly among nodes, },
// it can easily happen that all nodes are saturated Affinity: &v1.Affinity{
// and there is no need to create additional pods. NodeAffinity: &v1.NodeAffinity{
// StartPods requires at least one pod to replicate. RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
if podsNeededForSaturation > 0 { NodeSelectorTerms: []v1.NodeSelectorTerm{
framework.ExpectNoError(testutils.StartPods(cs, podsNeededForSaturation, ns, "overcommit", {
*initPausePod(f, pausePodConfig{ MatchExpressions: []v1.NodeSelectorRequirement{
Name: "", {
Labels: map[string]string{"name": ""}, Key: "node",
Resources: &v1.ResourceRequirements{ Operator: v1.NodeSelectorOpIn,
Limits: v1.ResourceList{ Values: []string{nodeName},
v1.ResourceCPU: *resource.NewMilliQuantity(milliCpuPerPod, "DecimalSI"), },
}, },
Requests: v1.ResourceList{ },
v1.ResourceCPU: *resource.NewMilliQuantity(milliCpuPerPod, "DecimalSI"), },
}, },
}, },
}), true, framework.Logf)) },
}))
} }
// Wait for filler pods to schedule.
for _, pod := range fillerPods {
framework.ExpectNoError(framework.WaitForPodRunningInNamespace(cs, pod))
}
By("Creating another pod that requires unavailable amount of CPU.")
// Create another pod that requires 50% of the largest node CPU resources.
// This pod should remain pending as at least 70% of CPU of other nodes in
// the cluster are already consumed.
podName := "additional-pod" podName := "additional-pod"
conf := pausePodConfig{ conf := pausePodConfig{
Name: podName, Name: podName,
Labels: map[string]string{"name": "additional"}, Labels: map[string]string{"name": "additional"},
Resources: &v1.ResourceRequirements{ Resources: &v1.ResourceRequirements{
Limits: v1.ResourceList{ Limits: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(milliCpuPerPod, "DecimalSI"), v1.ResourceCPU: *resource.NewMilliQuantity(nodeMaxAllocatable*5/10, "DecimalSI"),
}, },
}, },
} }
WaitForSchedulerAfterAction(f, createPausePodAction(f, conf), podName, false) WaitForSchedulerAfterAction(f, createPausePodAction(f, conf), podName, false)
verifyResult(cs, podsNeededForSaturation, 1, ns) verifyResult(cs, len(fillerPods), 1, ns)
}) })
// Test Nodes does not have any label, hence it should be impossible to schedule Pod with // Test Nodes does not have any label, hence it should be impossible to schedule Pod with