From 028af0970f7f9f688ff8b44003eb2daa5a1568c9 Mon Sep 17 00:00:00 2001 From: Dave Chen Date: Thu, 2 Jul 2020 18:33:58 +0800 Subject: [PATCH] Cut off the cost to run filter plugins when no victim pods are found If no potential victims could be found, there is no need to evaluate the node again, since its state didn't change. It's safe to return and thus prevent scheduling from running the filter plugins again. NOTE: A node that is filtered out by filter plugins could pass the filter plugins if there is a change on that node, i.e. pods termination on that node. Previously, this could be either caught by the normal `schedule` or `preempt` (pods are terminated when the preemption logic tries to find the nodes and re-evaluate the filter plugins.) Actually, this shouldn't be taken care by the preemption, consider the routine of `schedule` is always running when the interval is "zero", let `schedule` take care of it will release `preempt` from something irrelevant with the `preemption`. Due to above reason, couple of testcase as well as the logic of checking the existence of victim pods are removed as it will never happen after the change. Signed-off-by: Dave Chen --- .../defaultpreemption/default_preemption.go | 13 +++-- .../default_preemption_test.go | 52 +++++-------------- 2 files changed, 20 insertions(+), 45 deletions(-) diff --git a/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption.go b/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption.go index 2af50951d5e..7e876433e19 100644 --- a/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption.go +++ b/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption.go @@ -315,13 +315,6 @@ func pickOneNodeForPreemption(nodesToVictims map[string]*extenderv1.Victims) str var minNodes1 []string lenNodes1 := 0 for node, victims := range nodesToVictims { - if len(victims.Pods) == 0 { - // We found a node that doesn't need any preemption. Return it! - // This should happen rarely when one or more pods are terminated between - // the time that scheduler tries to schedule the pod and the time that - // preemption logic tries to find nodes for preemption. - return node - } numPDBViolatingPods := victims.NumPDBViolations if numPDBViolatingPods < minNumPDBViolatingPods { minNumPDBViolatingPods = numPDBViolatingPods @@ -488,6 +481,12 @@ func selectVictimsOnNode( } } } + + // No potential victims are found, and so we don't need to evaluate the node again since its state didn't change. + if len(potentialVictims) == 0 { + return nil, 0, false + } + // If the new pod does not fit after removing all the lower priority pods, // we are almost done and this node is not suitable for preemption. The only // condition that we could check is if the "pod" is failing to schedule due to diff --git a/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption_test.go b/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption_test.go index 8d480e2e493..a3407fcc6da 100644 --- a/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption_test.go +++ b/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption_test.go @@ -323,7 +323,7 @@ func TestSelectNodesForPreemption(t *testing.T) { expectedNumFilterCalled: 4, }, { - name: "a pod that would fit on the machines, but other pods running are higher priority", + name: "a pod that would fit on the machines, but other pods running are higher priority, no preemption would happen", registerPlugins: []st.RegisterPluginFunc{ st.RegisterPluginAsExtensions(noderesources.FitName, noderesources.NewFit, "Filter", "PreFilter"), }, @@ -334,7 +334,7 @@ func TestSelectNodesForPreemption(t *testing.T) { st.MakePod().Name("p2").UID("p2").Node("node2").Priority(midPriority).Req(largeRes).Obj(), }, expected: map[string]*extenderv1.Victims{}, - expectedNumFilterCalled: 2, + expectedNumFilterCalled: 0, }, { name: "medium priority pod is preempted, but lower priority one stays as it is small", @@ -380,7 +380,7 @@ func TestSelectNodesForPreemption(t *testing.T) { }, }, }, - expectedNumFilterCalled: 5, + expectedNumFilterCalled: 4, }, { name: "mixed priority pods are preempted, pick later StartTime one when priorities are equal", @@ -404,7 +404,7 @@ func TestSelectNodesForPreemption(t *testing.T) { }, }, }, - expectedNumFilterCalled: 5, + expectedNumFilterCalled: 4, // no preemption would happen on node2 and no filter call is counted. }, { name: "pod with anti-affinity is preempted", @@ -428,9 +428,8 @@ func TestSelectNodesForPreemption(t *testing.T) { PodAntiAffinityExists("foo", "hostname", st.PodAntiAffinityWithRequiredReq).Obj(), }, }, - "node2": {}, }, - expectedNumFilterCalled: 4, + expectedNumFilterCalled: 3, // no preemption would happen on node2 and no filter call is counted. }, { name: "preemption to resolve pod topology spread filter failure", @@ -457,7 +456,7 @@ func TestSelectNodesForPreemption(t *testing.T) { Pods: []*v1.Pod{st.MakePod().Name("pod-b1").UID("pod-b1").Node("node-b").Label("foo", "").Priority(lowPriority).Obj()}, }, }, - expectedNumFilterCalled: 6, + expectedNumFilterCalled: 5, // node-a (3), node-b (2), node-x (0) }, { name: "get Unschedulable in the preemption phase when the filter plugins filtering the nodes", @@ -696,17 +695,6 @@ func TestPickOneNodeForPreemption(t *testing.T) { }, expected: []string{"node1", "node2"}, }, - { - name: "a pod that fits on a machine with no preemption", - registerPlugin: st.RegisterPluginAsExtensions(noderesources.FitName, noderesources.NewFit, "Filter", "PreFilter"), - nodeNames: []string{"node1", "node2", "node3"}, - pod: st.MakePod().Name("p").UID("p").Priority(highPriority).Req(largeRes).Obj(), - pods: []*v1.Pod{ - st.MakePod().Name("p1").UID("p1").Node("node1").Priority(midPriority).Req(largeRes).StartTime(epochTime).Obj(), - st.MakePod().Name("p2").UID("p2").Node("node2").Priority(midPriority).Req(largeRes).StartTime(epochTime).Obj(), - }, - expected: []string{"node3"}, - }, { name: "machine with min highest priority pod is picked", registerPlugin: st.RegisterPluginAsExtensions(noderesources.FitName, noderesources.NewFit, "Filter", "PreFilter"), @@ -1088,19 +1076,6 @@ func TestPreempt(t *testing.T) { expectedNode: "node1", expectedPods: []string{"p1.1", "p1.2"}, }, - { - name: "One node doesn't need any preemption", - pod: st.MakePod().Name("p").UID("p").Priority(highPriority).Req(veryLargeRes).PreemptionPolicy(v1.PreemptLowerPriority).Obj(), - pods: []*v1.Pod{ - st.MakePod().Name("p1.1").UID("p1.1").Node("node1").Priority(lowPriority).Req(smallRes).Obj(), - st.MakePod().Name("p1.2").UID("p1.2").Node("node1").Priority(lowPriority).Req(smallRes).Obj(), - st.MakePod().Name("p2.1").UID("p2.1").Node("node2").Priority(highPriority).Req(largeRes).Obj(), - }, - nodeNames: []string{"node1", "node2", "node3"}, - registerPlugin: st.RegisterPluginAsExtensions(noderesources.FitName, noderesources.NewFit, "Filter", "PreFilter"), - expectedNode: "node3", - expectedPods: []string{}, - }, { name: "preemption for topology spread constraints", pod: st.MakePod().Name("p").UID("p").Label("foo", "").Priority(highPriority). @@ -1170,21 +1145,22 @@ func TestPreempt(t *testing.T) { expectedPods: []string{"p1.1", "p1.2"}, }, { - name: "One scheduler extender allows only machine1, but it is not interested in given pod, otherwise node1 would have been chosen", + name: "One scheduler extender allows only machine1, but it is not interested in given pod, otherwise machine1 would have been chosen", pod: st.MakePod().Name("p").UID("p").Priority(highPriority).Req(veryLargeRes).PreemptionPolicy(v1.PreemptLowerPriority).Obj(), pods: []*v1.Pod{ - st.MakePod().Name("p1.1").UID("p1.1").Node("node1").Priority(midPriority).Req(smallRes).Obj(), - st.MakePod().Name("p1.2").UID("p1.2").Node("node1").Priority(lowPriority).Req(smallRes).Obj(), - st.MakePod().Name("p2.1").UID("p2.1").Node("node2").Priority(midPriority).Req(largeRes).Obj(), + st.MakePod().Name("p1.1").UID("p1.1").Node("machine1").Priority(midPriority).Req(smallRes).Obj(), + st.MakePod().Name("p1.2").UID("p1.2").Node("machine1").Priority(lowPriority).Req(smallRes).Obj(), + st.MakePod().Name("p2.1").UID("p2.1").Node("machine2").Priority(midPriority).Req(largeRes).Obj(), }, - nodeNames: []string{"node1", "node2", "node3"}, + nodeNames: []string{"machine1", "machine2"}, extenders: []*st.FakeExtender{ {Predicates: []st.FitPredicate{st.Machine1PredicateExtender}, UnInterested: true}, {Predicates: []st.FitPredicate{st.TruePredicateExtender}}, }, registerPlugin: st.RegisterPluginAsExtensions(noderesources.FitName, noderesources.NewFit, "Filter", "PreFilter"), - expectedNode: "node3", - expectedPods: []string{}, + // sum of priorities of all victims on machine1 is larger than machine2, machine2 is chosen. + expectedNode: "machine2", + expectedPods: []string{"p2.1"}, }, { name: "no preempting in pod",