Merge pull request #53169 from bsalamat/fix_sched_e2e

Automatic merge from submit-queue (batch tested with PRs 51311, 52575, 53169). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

Fix a scheduler flaky e2e test

**What this PR does / why we need it**:
Makes a scheduler e2e test that verifies the resource limit predicate more robust.

**Which issue this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close that issue when PR gets merged)*: fixes #53066 

**Release note**:

```release-note
NONE
```

@kubernetes/sig-scheduling-pr-reviews
This commit is contained in:
Kubernetes Submit Queue 2017-09-28 23:18:22 -07:00 committed by GitHub
commit 9a7378f567

View File

@ -39,7 +39,6 @@ import (
)
const maxNumberOfPods int64 = 10
const minPodCPURequest int64 = 500
var localStorageVersion = utilversion.MustParseSemantic("v1.8.0-beta.0")
@ -227,14 +226,36 @@ var _ = SIGDescribe("SchedulerPredicates [Serial]", func() {
verifyResult(cs, podsNeededForSaturation, 1, ns)
})
// This test verifies we don't allow scheduling of pods in a way that sum of limits of pods is greater than machines capacity.
// It assumes that cluster add-on pods stay stable and cannot be run in parallel with any other test that touches Nodes or Pods.
// This test verifies we don't allow scheduling of pods in a way that sum of
// limits of pods is greater than machines capacity.
// It assumes that cluster add-on pods stay stable and cannot be run in parallel
// with any other test that touches Nodes or Pods.
// It is so because we need to have precise control on what's running in the cluster.
// Test scenario:
// 1. Find the amount CPU resources on each node.
// 2. Create one pod with affinity to each node that uses 70% of the node CPU.
// 3. Wait for the pods to be scheduled.
// 4. Create another pod with no affinity to any node that need 50% of the largest node CPU.
// 5. Make sure this additional pod is not scheduled.
It("validates resource limits of pods that are allowed to run [Conformance]", func() {
framework.WaitForStableCluster(cs, masterNodes)
nodeMaxAllocatable := int64(0)
nodeToAllocatableMap := make(map[string]int64)
for _, node := range nodeList.Items {
nodeReady := false
for _, condition := range node.Status.Conditions {
if condition.Type == v1.NodeReady && condition.Status == v1.ConditionTrue {
nodeReady = true
break
}
}
if !nodeReady {
continue
}
// Apply node label to each node
framework.AddOrUpdateLabelOnNode(cs, node.Name, "node", node.Name)
framework.ExpectNodeHasLabel(cs, node.Name, "node", node.Name)
// Find allocatable amount of CPU.
allocatable, found := node.Status.Allocatable[v1.ResourceCPU]
Expect(found).To(Equal(true))
nodeToAllocatableMap[node.Name] = allocatable.MilliValue()
@ -242,7 +263,12 @@ var _ = SIGDescribe("SchedulerPredicates [Serial]", func() {
nodeMaxAllocatable = allocatable.MilliValue()
}
}
framework.WaitForStableCluster(cs, masterNodes)
// Clean up added labels after this test.
defer func() {
for nodeName := range nodeToAllocatableMap {
framework.RemoveLabelOffNode(cs, nodeName, "node")
}
}()
pods, err := cs.CoreV1().Pods(metav1.NamespaceAll).List(metav1.ListOptions{})
framework.ExpectNoError(err)
@ -254,51 +280,60 @@ var _ = SIGDescribe("SchedulerPredicates [Serial]", func() {
}
}
var podsNeededForSaturation int
milliCpuPerPod := nodeMaxAllocatable / maxNumberOfPods
if milliCpuPerPod < minPodCPURequest {
milliCpuPerPod = minPodCPURequest
}
framework.Logf("Using pod capacity: %vm", milliCpuPerPod)
for name, leftAllocatable := range nodeToAllocatableMap {
framework.Logf("Node: %v has cpu allocatable: %vm", name, leftAllocatable)
podsNeededForSaturation += (int)(leftAllocatable / milliCpuPerPod)
}
By(fmt.Sprintf("Starting additional %v Pods to fully saturate the cluster CPU and trying to start another one", podsNeededForSaturation))
// As the pods are distributed randomly among nodes,
// it can easily happen that all nodes are saturated
// and there is no need to create additional pods.
// StartPods requires at least one pod to replicate.
if podsNeededForSaturation > 0 {
framework.ExpectNoError(testutils.StartPods(cs, podsNeededForSaturation, ns, "overcommit",
*initPausePod(f, pausePodConfig{
Name: "",
Labels: map[string]string{"name": ""},
Resources: &v1.ResourceRequirements{
Limits: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(milliCpuPerPod, "DecimalSI"),
},
Requests: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(milliCpuPerPod, "DecimalSI"),
By("Starting Pods to consume most of the cluster CPU.")
// Create one pod per node that requires 70% of the node remaining CPU.
fillerPods := []*v1.Pod{}
for nodeName, cpu := range nodeToAllocatableMap {
requestedCPU := cpu * 7 / 10
fillerPods = append(fillerPods, createPausePod(f, pausePodConfig{
Name: "filler-pod-" + nodeName,
Resources: &v1.ResourceRequirements{
Limits: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(requestedCPU, "DecimalSI"),
},
Requests: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(requestedCPU, "DecimalSI"),
},
},
Affinity: &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: "node",
Operator: v1.NodeSelectorOpIn,
Values: []string{nodeName},
},
},
},
},
},
},
}), true, framework.Logf))
},
}))
}
// Wait for filler pods to schedule.
for _, pod := range fillerPods {
framework.ExpectNoError(framework.WaitForPodRunningInNamespace(cs, pod))
}
By("Creating another pod that requires unavailable amount of CPU.")
// Create another pod that requires 50% of the largest node CPU resources.
// This pod should remain pending as at least 70% of CPU of other nodes in
// the cluster are already consumed.
podName := "additional-pod"
conf := pausePodConfig{
Name: podName,
Labels: map[string]string{"name": "additional"},
Resources: &v1.ResourceRequirements{
Limits: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(milliCpuPerPod, "DecimalSI"),
v1.ResourceCPU: *resource.NewMilliQuantity(nodeMaxAllocatable*5/10, "DecimalSI"),
},
},
}
WaitForSchedulerAfterAction(f, createPausePodAction(f, conf), podName, false)
verifyResult(cs, podsNeededForSaturation, 1, ns)
verifyResult(cs, len(fillerPods), 1, ns)
})
// Test Nodes does not have any label, hence it should be impossible to schedule Pod with