From fce28dbac9c8223d627db7dde17463826c61ea40 Mon Sep 17 00:00:00 2001 From: Yuan Chen Date: Tue, 27 Apr 2021 22:25:46 -0700 Subject: [PATCH] Prevent scheduler crashing in default preemption --- .../defaultpreemption/default_preemption.go | 25 ++++++++++++--- .../default_preemption_test.go | 32 ++++++------------- 2 files changed, 29 insertions(+), 28 deletions(-) diff --git a/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption.go b/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption.go index 684adfd788e..70102739e2b 100644 --- a/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption.go +++ b/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption.go @@ -334,7 +334,7 @@ func dryRunPreemption(ctx context.Context, fh framework.Handle, nodeInfoCopy := potentialNodes[(int(offset)+i)%len(potentialNodes)].Clone() stateCopy := state.Clone() pods, numPDBViolations, status := selectVictimsOnNode(ctx, fh, stateCopy, pod, nodeInfoCopy, pdbs) - if status.IsSuccess() { + if status.IsSuccess() && len(pods) != 0 { victims := extenderv1.Victims{ Pods: pods, NumPDBViolations: int64(numPDBViolations), @@ -352,11 +352,14 @@ func dryRunPreemption(ctx context.Context, fh framework.Handle, if nvcSize > 0 && nvcSize+vcSize >= numCandidates { cancel() } - } else { - statusesLock.Lock() - nodeStatuses[nodeInfoCopy.Node().Name] = status - statusesLock.Unlock() + return } + if status.IsSuccess() && len(pods) == 0 { + status = framework.AsStatus(fmt.Errorf("expected at least one victim pod on node %q", nodeInfoCopy.Node().Name)) + } + statusesLock.Lock() + nodeStatuses[nodeInfoCopy.Node().Name] = status + statusesLock.Unlock() } fh.Parallelizer().Until(parallelCtx, len(potentialNodes), checkNode) return append(nonViolatingCandidates.get(), violatingCandidates.get()...), nodeStatuses @@ -391,6 +394,18 @@ func CallExtenders(extenders []framework.Extender, pod *v1.Pod, nodeLister frame } return nil, framework.AsStatus(err) } + // Check if the returned victims are valid. + for nodeName, victims := range nodeNameToVictims { + if victims == nil || len(victims.Pods) == 0 { + if extender.IsIgnorable() { + delete(nodeNameToVictims, nodeName) + klog.InfoS("Ignoring node without victims", "node", nodeName) + continue + } + return nil, framework.AsStatus(fmt.Errorf("expected at least one victim pod on node %q", nodeName)) + } + } + // Replace victimsMap with new result after preemption. So the // rest of extenders can continue use it as parameter. victimsMap = nodeNameToVictims diff --git a/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption_test.go b/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption_test.go index b93789f4e8d..194c053a050 100644 --- a/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption_test.go +++ b/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption_test.go @@ -357,13 +357,9 @@ func TestDryRunPreemption(t *testing.T) { st.MakePod().Name("p1").UID("p1").Node("node1").Priority(midPriority).Obj(), st.MakePod().Name("p2").UID("p2").Node("node2").Priority(midPriority).Obj(), }, - expected: [][]Candidate{ - { - &candidate{victims: &extenderv1.Victims{}, name: "node1"}, - &candidate{victims: &extenderv1.Victims{}, name: "node2"}, - }, - }, - expectedNumFilterCalled: []int32{4}, + expected: [][]Candidate{{}}, + fakeFilterRC: framework.Unschedulable, + expectedNumFilterCalled: []int32{2}, }, { name: "a pod that fits on one node with no preemption", @@ -379,12 +375,9 @@ func TestDryRunPreemption(t *testing.T) { st.MakePod().Name("p1").UID("p1").Node("node1").Priority(midPriority).Obj(), st.MakePod().Name("p2").UID("p2").Node("node2").Priority(midPriority).Obj(), }, - expected: [][]Candidate{ - { - &candidate{victims: &extenderv1.Victims{}, name: "node1"}, - }, - }, - expectedNumFilterCalled: []int32{3}, + expected: [][]Candidate{{}}, + fakeFilterRC: framework.Unschedulable, + expectedNumFilterCalled: []int32{2}, }, { name: "a pod that fits on both nodes when lower priority pods are preempted", @@ -1072,16 +1065,6 @@ func TestSelectBestCandidate(t *testing.T) { pods []*v1.Pod expected []string // any of the items is valid }{ - { - name: "No node needs preemption", - registerPlugin: st.RegisterPluginAsExtensions(noderesources.FitName, noderesources.NewFit, "Filter", "PreFilter"), - nodeNames: []string{"node1"}, - pod: st.MakePod().Name("p").UID("p").Priority(highPriority).Req(largeRes).Obj(), - pods: []*v1.Pod{ - st.MakePod().Name("p1").UID("p1").Node("node1").Priority(midPriority).Req(smallRes).StartTime(epochTime).Obj(), - }, - expected: []string{"node1"}, - }, { name: "a pod that fits on both nodes when lower priority pods are preempted", registerPlugin: st.RegisterPluginAsExtensions(noderesources.FitName, noderesources.NewFit, "Filter", "PreFilter"), @@ -1263,6 +1246,9 @@ func TestSelectBestCandidate(t *testing.T) { offset, numCandidates := pl.getOffsetAndNumCandidates(int32(len(nodeInfos))) candidates, _ := dryRunPreemption(context.Background(), fwk, state, tt.pod, nodeInfos, nil, offset, numCandidates) s := SelectCandidate(candidates) + if s == nil || len(s.Name()) == 0 { + return + } found := false for _, nodeName := range tt.expected { if nodeName == s.Name() {