mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 11:50:44 +00:00
Merge pull request #53169 from bsalamat/fix_sched_e2e
Automatic merge from submit-queue (batch tested with PRs 51311, 52575, 53169). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Fix a scheduler flaky e2e test **What this PR does / why we need it**: Makes a scheduler e2e test that verifies the resource limit predicate more robust. **Which issue this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close that issue when PR gets merged)*: fixes #53066 **Release note**: ```release-note NONE ``` @kubernetes/sig-scheduling-pr-reviews
This commit is contained in:
commit
9a7378f567
@ -39,7 +39,6 @@ import (
|
||||
)
|
||||
|
||||
const maxNumberOfPods int64 = 10
|
||||
const minPodCPURequest int64 = 500
|
||||
|
||||
var localStorageVersion = utilversion.MustParseSemantic("v1.8.0-beta.0")
|
||||
|
||||
@ -227,14 +226,36 @@ var _ = SIGDescribe("SchedulerPredicates [Serial]", func() {
|
||||
verifyResult(cs, podsNeededForSaturation, 1, ns)
|
||||
})
|
||||
|
||||
// This test verifies we don't allow scheduling of pods in a way that sum of limits of pods is greater than machines capacity.
|
||||
// It assumes that cluster add-on pods stay stable and cannot be run in parallel with any other test that touches Nodes or Pods.
|
||||
// This test verifies we don't allow scheduling of pods in a way that sum of
|
||||
// limits of pods is greater than machines capacity.
|
||||
// It assumes that cluster add-on pods stay stable and cannot be run in parallel
|
||||
// with any other test that touches Nodes or Pods.
|
||||
// It is so because we need to have precise control on what's running in the cluster.
|
||||
// Test scenario:
|
||||
// 1. Find the amount CPU resources on each node.
|
||||
// 2. Create one pod with affinity to each node that uses 70% of the node CPU.
|
||||
// 3. Wait for the pods to be scheduled.
|
||||
// 4. Create another pod with no affinity to any node that need 50% of the largest node CPU.
|
||||
// 5. Make sure this additional pod is not scheduled.
|
||||
It("validates resource limits of pods that are allowed to run [Conformance]", func() {
|
||||
framework.WaitForStableCluster(cs, masterNodes)
|
||||
nodeMaxAllocatable := int64(0)
|
||||
|
||||
nodeToAllocatableMap := make(map[string]int64)
|
||||
for _, node := range nodeList.Items {
|
||||
nodeReady := false
|
||||
for _, condition := range node.Status.Conditions {
|
||||
if condition.Type == v1.NodeReady && condition.Status == v1.ConditionTrue {
|
||||
nodeReady = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !nodeReady {
|
||||
continue
|
||||
}
|
||||
// Apply node label to each node
|
||||
framework.AddOrUpdateLabelOnNode(cs, node.Name, "node", node.Name)
|
||||
framework.ExpectNodeHasLabel(cs, node.Name, "node", node.Name)
|
||||
// Find allocatable amount of CPU.
|
||||
allocatable, found := node.Status.Allocatable[v1.ResourceCPU]
|
||||
Expect(found).To(Equal(true))
|
||||
nodeToAllocatableMap[node.Name] = allocatable.MilliValue()
|
||||
@ -242,7 +263,12 @@ var _ = SIGDescribe("SchedulerPredicates [Serial]", func() {
|
||||
nodeMaxAllocatable = allocatable.MilliValue()
|
||||
}
|
||||
}
|
||||
framework.WaitForStableCluster(cs, masterNodes)
|
||||
// Clean up added labels after this test.
|
||||
defer func() {
|
||||
for nodeName := range nodeToAllocatableMap {
|
||||
framework.RemoveLabelOffNode(cs, nodeName, "node")
|
||||
}
|
||||
}()
|
||||
|
||||
pods, err := cs.CoreV1().Pods(metav1.NamespaceAll).List(metav1.ListOptions{})
|
||||
framework.ExpectNoError(err)
|
||||
@ -254,51 +280,60 @@ var _ = SIGDescribe("SchedulerPredicates [Serial]", func() {
|
||||
}
|
||||
}
|
||||
|
||||
var podsNeededForSaturation int
|
||||
|
||||
milliCpuPerPod := nodeMaxAllocatable / maxNumberOfPods
|
||||
if milliCpuPerPod < minPodCPURequest {
|
||||
milliCpuPerPod = minPodCPURequest
|
||||
}
|
||||
framework.Logf("Using pod capacity: %vm", milliCpuPerPod)
|
||||
for name, leftAllocatable := range nodeToAllocatableMap {
|
||||
framework.Logf("Node: %v has cpu allocatable: %vm", name, leftAllocatable)
|
||||
podsNeededForSaturation += (int)(leftAllocatable / milliCpuPerPod)
|
||||
}
|
||||
|
||||
By(fmt.Sprintf("Starting additional %v Pods to fully saturate the cluster CPU and trying to start another one", podsNeededForSaturation))
|
||||
|
||||
// As the pods are distributed randomly among nodes,
|
||||
// it can easily happen that all nodes are saturated
|
||||
// and there is no need to create additional pods.
|
||||
// StartPods requires at least one pod to replicate.
|
||||
if podsNeededForSaturation > 0 {
|
||||
framework.ExpectNoError(testutils.StartPods(cs, podsNeededForSaturation, ns, "overcommit",
|
||||
*initPausePod(f, pausePodConfig{
|
||||
Name: "",
|
||||
Labels: map[string]string{"name": ""},
|
||||
Resources: &v1.ResourceRequirements{
|
||||
Limits: v1.ResourceList{
|
||||
v1.ResourceCPU: *resource.NewMilliQuantity(milliCpuPerPod, "DecimalSI"),
|
||||
},
|
||||
Requests: v1.ResourceList{
|
||||
v1.ResourceCPU: *resource.NewMilliQuantity(milliCpuPerPod, "DecimalSI"),
|
||||
By("Starting Pods to consume most of the cluster CPU.")
|
||||
// Create one pod per node that requires 70% of the node remaining CPU.
|
||||
fillerPods := []*v1.Pod{}
|
||||
for nodeName, cpu := range nodeToAllocatableMap {
|
||||
requestedCPU := cpu * 7 / 10
|
||||
fillerPods = append(fillerPods, createPausePod(f, pausePodConfig{
|
||||
Name: "filler-pod-" + nodeName,
|
||||
Resources: &v1.ResourceRequirements{
|
||||
Limits: v1.ResourceList{
|
||||
v1.ResourceCPU: *resource.NewMilliQuantity(requestedCPU, "DecimalSI"),
|
||||
},
|
||||
Requests: v1.ResourceList{
|
||||
v1.ResourceCPU: *resource.NewMilliQuantity(requestedCPU, "DecimalSI"),
|
||||
},
|
||||
},
|
||||
Affinity: &v1.Affinity{
|
||||
NodeAffinity: &v1.NodeAffinity{
|
||||
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
|
||||
NodeSelectorTerms: []v1.NodeSelectorTerm{
|
||||
{
|
||||
MatchExpressions: []v1.NodeSelectorRequirement{
|
||||
{
|
||||
Key: "node",
|
||||
Operator: v1.NodeSelectorOpIn,
|
||||
Values: []string{nodeName},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}), true, framework.Logf))
|
||||
},
|
||||
}))
|
||||
}
|
||||
// Wait for filler pods to schedule.
|
||||
for _, pod := range fillerPods {
|
||||
framework.ExpectNoError(framework.WaitForPodRunningInNamespace(cs, pod))
|
||||
}
|
||||
By("Creating another pod that requires unavailable amount of CPU.")
|
||||
// Create another pod that requires 50% of the largest node CPU resources.
|
||||
// This pod should remain pending as at least 70% of CPU of other nodes in
|
||||
// the cluster are already consumed.
|
||||
podName := "additional-pod"
|
||||
conf := pausePodConfig{
|
||||
Name: podName,
|
||||
Labels: map[string]string{"name": "additional"},
|
||||
Resources: &v1.ResourceRequirements{
|
||||
Limits: v1.ResourceList{
|
||||
v1.ResourceCPU: *resource.NewMilliQuantity(milliCpuPerPod, "DecimalSI"),
|
||||
v1.ResourceCPU: *resource.NewMilliQuantity(nodeMaxAllocatable*5/10, "DecimalSI"),
|
||||
},
|
||||
},
|
||||
}
|
||||
WaitForSchedulerAfterAction(f, createPausePodAction(f, conf), podName, false)
|
||||
verifyResult(cs, podsNeededForSaturation, 1, ns)
|
||||
verifyResult(cs, len(fillerPods), 1, ns)
|
||||
})
|
||||
|
||||
// Test Nodes does not have any label, hence it should be impossible to schedule Pod with
|
||||
|
Loading…
Reference in New Issue
Block a user