diff --git a/test/e2e/scheduling/preemption.go b/test/e2e/scheduling/preemption.go index 02d30ad687b..934f5485249 100644 --- a/test/e2e/scheduling/preemption.go +++ b/test/e2e/scheduling/preemption.go @@ -35,7 +35,6 @@ import ( "k8s.io/apimachinery/pkg/watch" clientset "k8s.io/client-go/kubernetes" "k8s.io/client-go/tools/cache" - v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos" "k8s.io/kubernetes/pkg/apis/scheduling" "k8s.io/kubernetes/test/e2e/framework" e2enode "k8s.io/kubernetes/test/e2e/framework/node" @@ -55,6 +54,8 @@ type priorityPair struct { value int32 } +var testExtendedResource = v1.ResourceName("scheduling.k8s.io/foo") + var _ = SIGDescribe("SchedulerPreemption [Serial]", func() { var cs clientset.Interface var nodeList *v1.NodeList @@ -75,6 +76,10 @@ var _ = SIGDescribe("SchedulerPreemption [Serial]", func() { for _, pair := range priorityPairs { cs.SchedulingV1().PriorityClasses().Delete(context.TODO(), pair.name, *metav1.NewDeleteOptions(0)) } + for _, node := range nodeList.Items { + delete(node.Status.Capacity, testExtendedResource) + cs.CoreV1().Nodes().UpdateStatus(context.TODO(), &node, metav1.UpdateOptions{}) + } }) ginkgo.BeforeEach(func() { @@ -103,30 +108,20 @@ var _ = SIGDescribe("SchedulerPreemption [Serial]", func() { // the high priority pod. ginkgo.It("validates basic preemption works", func() { var podRes v1.ResourceList + // Create one pod per node that uses a lot of the node's resources. - ginkgo.By("Create pods that use 60% of node resources.") + ginkgo.By("Create pods that use 2/3 of node resources.") pods := make([]*v1.Pod, 0, len(nodeList.Items)) - allPods, err := cs.CoreV1().Pods(metav1.NamespaceAll).List(context.TODO(), metav1.ListOptions{}) - framework.ExpectNoError(err) + // Now create victim pods on each of the node with lower priority for i, node := range nodeList.Items { - currentCPUUsage, currentMemUsage := getCurrentPodUsageOnTheNode(node.Name, allPods.Items, podRequestedResource) - framework.Logf("Current cpu and memory usage %v, %v", currentCPUUsage, currentMemUsage) - cpuAllocatable, found := node.Status.Allocatable["cpu"] - framework.ExpectEqual(found, true) - milliCPU := cpuAllocatable.MilliValue() - milliCPU = int64(float64(milliCPU-currentCPUUsage) * float64(0.6)) - memAllocatable, found := node.Status.Allocatable["memory"] - framework.ExpectEqual(found, true) - memory := memAllocatable.Value() - memory = int64(float64(memory-currentMemUsage) * float64(0.6)) - // If a node is already heavily utilized let not's create a pod there. - if milliCPU <= 0 || memory <= 0 { - framework.Logf("Node is heavily utilized, let's not create a pod here") - continue - } + // Update each node to advertise 3 available extended resources + node.Status.Capacity[testExtendedResource] = resource.MustParse("3") + node, err := cs.CoreV1().Nodes().UpdateStatus(context.TODO(), &node, metav1.UpdateOptions{}) + framework.ExpectNoError(err) + + // Request 2 of the available resources for the victim pods podRes = v1.ResourceList{} - podRes[v1.ResourceCPU] = *resource.NewMilliQuantity(int64(milliCPU), resource.DecimalSI) - podRes[v1.ResourceMemory] = *resource.NewQuantity(int64(memory), resource.BinarySI) + podRes[testExtendedResource] = resource.MustParse("2") // make the first pod low priority and the rest medium priority. priorityName := mediumPriorityClassName @@ -138,8 +133,21 @@ var _ = SIGDescribe("SchedulerPreemption [Serial]", func() { PriorityClassName: priorityName, Resources: &v1.ResourceRequirements{ Requests: podRes, + Limits: podRes, + }, + Affinity: &v1.Affinity{ + NodeAffinity: &v1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{ + { + MatchFields: []v1.NodeSelectorRequirement{ + {Key: "metadata.name", Operator: v1.NodeSelectorOpIn, Values: []string{node.Name}}, + }, + }, + }, + }, + }, }, - NodeName: node.Name, })) framework.Logf("Created pod: %v", pods[i].Name) } @@ -162,8 +170,8 @@ var _ = SIGDescribe("SchedulerPreemption [Serial]", func() { PriorityClassName: highPriorityClassName, Resources: &v1.ResourceRequirements{ Requests: podRes, + Limits: podRes, }, - NodeName: pods[0].Spec.NodeName, }) preemptedPod, err := cs.CoreV1().Pods(pods[0].Namespace).Get(context.TODO(), pods[0].Name, metav1.GetOptions{}) @@ -183,30 +191,18 @@ var _ = SIGDescribe("SchedulerPreemption [Serial]", func() { // this critical pod. ginkgo.It("validates lower priority pod preemption by critical pod", func() { var podRes v1.ResourceList - // Create one pod per node that uses a lot of the node's resources. - ginkgo.By("Create pods that use 60% of node resources.") + + ginkgo.By("Create pods that use 2/3 of node resources.") pods := make([]*v1.Pod, 0, len(nodeList.Items)) - allPods, err := cs.CoreV1().Pods(metav1.NamespaceAll).List(context.TODO(), metav1.ListOptions{}) - framework.ExpectNoError(err) for i, node := range nodeList.Items { - currentCPUUsage, currentMemUsage := getCurrentPodUsageOnTheNode(node.Name, allPods.Items, podRequestedResource) - framework.Logf("Current cpu usage and memory usage is %v, %v", currentCPUUsage, currentMemUsage) - cpuAllocatable, found := node.Status.Allocatable["cpu"] - framework.ExpectEqual(found, true) - milliCPU := cpuAllocatable.MilliValue() - milliCPU = int64(float64(milliCPU-currentCPUUsage) * float64(0.6)) - memAllocatable, found := node.Status.Allocatable["memory"] - framework.ExpectEqual(found, true) - memory := memAllocatable.Value() - memory = int64(float64(memory-currentMemUsage) * float64(0.6)) + // Update each node to advertise 3 available extended resources + node.Status.Capacity[testExtendedResource] = resource.MustParse("3") + node, err := cs.CoreV1().Nodes().UpdateStatus(context.TODO(), &node, metav1.UpdateOptions{}) + framework.ExpectNoError(err) + + // Request 2 of the available resources for the victim pods podRes = v1.ResourceList{} - // If a node is already heavily utilized let not's create a pod there. - if milliCPU <= 0 || memory <= 0 { - framework.Logf("Node is heavily utilized, let's not create a pod there") - continue - } - podRes[v1.ResourceCPU] = *resource.NewMilliQuantity(int64(milliCPU), resource.DecimalSI) - podRes[v1.ResourceMemory] = *resource.NewQuantity(int64(memory), resource.BinarySI) + podRes[testExtendedResource] = resource.MustParse("2") // make the first pod low priority and the rest medium priority. priorityName := mediumPriorityClassName @@ -219,7 +215,19 @@ var _ = SIGDescribe("SchedulerPreemption [Serial]", func() { Resources: &v1.ResourceRequirements{ Requests: podRes, }, - NodeName: node.Name, + Affinity: &v1.Affinity{ + NodeAffinity: &v1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{ + { + MatchFields: []v1.NodeSelectorRequirement{ + {Key: "metadata.name", Operator: v1.NodeSelectorOpIn, Values: []string{node.Name}}, + }, + }, + }, + }, + }, + }, })) framework.Logf("Created pod: %v", pods[i].Name) } @@ -250,8 +258,8 @@ var _ = SIGDescribe("SchedulerPreemption [Serial]", func() { PriorityClassName: scheduling.SystemClusterCritical, Resources: &v1.ResourceRequirements{ Requests: podRes, + Limits: podRes, }, - NodeName: pods[0].Spec.NodeName, }) defer func() { @@ -720,17 +728,3 @@ func waitForPreemptingWithTimeout(f *framework.Framework, pod *v1.Pod, timeout t }) framework.ExpectNoError(err, "pod %v/%v failed to preempt other pods", pod.Namespace, pod.Name) } - -func getCurrentPodUsageOnTheNode(nodeName string, pods []v1.Pod, resource *v1.ResourceRequirements) (int64, int64) { - totalRequestedCPUResource := resource.Requests.Cpu().MilliValue() - totalRequestedMemResource := resource.Requests.Memory().Value() - for _, pod := range pods { - if pod.Spec.NodeName != nodeName || v1qos.GetPodQOS(&pod) == v1.PodQOSBestEffort { - continue - } - result := getNonZeroRequests(&pod) - totalRequestedCPUResource += result.MilliCPU - totalRequestedMemResource += result.Memory - } - return totalRequestedCPUResource, totalRequestedMemResource -} diff --git a/test/e2e/scheduling/priorities.go b/test/e2e/scheduling/priorities.go index 873e6a4a91e..e9919fbaace 100644 --- a/test/e2e/scheduling/priorities.go +++ b/test/e2e/scheduling/priorities.go @@ -465,16 +465,30 @@ func createBalancedPodForNodes(f *framework.Framework, cs clientset.Interface, n needCreateResource[v1.ResourceMemory] = *resource.NewQuantity(int64((ratio-memFraction)*float64(memAllocatableVal)), resource.BinarySI) - err := testutils.StartPods(cs, 1, ns, string(uuid.NewUUID()), - *initPausePod(f, pausePodConfig{ - Name: "", - Labels: balancePodLabel, - Resources: &v1.ResourceRequirements{ - Limits: needCreateResource, - Requests: needCreateResource, + podConfig := &pausePodConfig{ + Name: "", + Labels: balancePodLabel, + Resources: &v1.ResourceRequirements{ + Limits: needCreateResource, + Requests: needCreateResource, + }, + Affinity: &v1.Affinity{ + NodeAffinity: &v1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{ + { + MatchFields: []v1.NodeSelectorRequirement{ + {Key: "metadata.name", Operator: v1.NodeSelectorOpIn, Values: []string{node.Name}}, + }, + }, + }, + }, }, - NodeName: node.Name, - }), true, framework.Logf) + }, + } + + err := testutils.StartPods(cs, 1, ns, string(uuid.NewUUID()), + *initPausePod(f, *podConfig), true, framework.Logf) if err != nil { return err