From b3741f344e76b0db1c16b026842accdcf648da7c Mon Sep 17 00:00:00 2001 From: He Jie Xu Date: Mon, 29 Jun 2020 16:43:59 +0800 Subject: [PATCH] The Pod is eligible to preempt when previous nominanted node is UnschedulableAndUnresolvable If the Pod's previous nominated node is UnschedulableAndUnresolvable from previous filtering, it should be considered for preemption again. --- .../defaultpreemption/default_preemption.go | 10 +++- .../default_preemption_test.go | 55 +++++++++++++++++++ pkg/scheduler/testing/wrappers.go | 6 ++ 3 files changed, 69 insertions(+), 2 deletions(-) diff --git a/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption.go b/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption.go index f902b0425c2..2af50951d5e 100644 --- a/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption.go +++ b/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption.go @@ -106,7 +106,7 @@ func preempt(ctx context.Context, fh framework.FrameworkHandle, state *framework return "", err } - if !podEligibleToPreemptOthers(pod, fh.SnapshotSharedLister().NodeInfos()) { + if !podEligibleToPreemptOthers(pod, fh.SnapshotSharedLister().NodeInfos(), m[pod.Status.NominatedNodeName]) { klog.V(5).Infof("Pod %v/%v is not eligible for more preemption.", pod.Namespace, pod.Name) return "", nil } @@ -189,13 +189,19 @@ func preempt(ctx context.Context, fh framework.FrameworkHandle, state *framework // considered for preemption. // We look at the node that is nominated for this pod and as long as there are // terminating pods on the node, we don't consider this for preempting more pods. -func podEligibleToPreemptOthers(pod *v1.Pod, nodeInfos framework.NodeInfoLister) bool { +func podEligibleToPreemptOthers(pod *v1.Pod, nodeInfos framework.NodeInfoLister, nominatedNodeStatus *framework.Status) bool { if pod.Spec.PreemptionPolicy != nil && *pod.Spec.PreemptionPolicy == v1.PreemptNever { klog.V(5).Infof("Pod %v/%v is not eligible for preemption because it has a preemptionPolicy of %v", pod.Namespace, pod.Name, v1.PreemptNever) return false } nomNodeName := pod.Status.NominatedNodeName if len(nomNodeName) > 0 { + // If the pod's nominated node is considered as UnschedulableAndUnresolvable by the filters, + // then the pod should be considered for preempting again. + if nominatedNodeStatus.Code() == framework.UnschedulableAndUnresolvable { + return true + } + if nodeInfo, _ := nodeInfos.Get(nomNodeName); nodeInfo != nil { podPriority := podutil.GetPodPriority(pod) for _, p := range nodeInfo.Pods { diff --git a/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption_test.go b/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption_test.go index bfd8d8c034a..2b7e4cd310e 100644 --- a/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption_test.go +++ b/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption_test.go @@ -889,6 +889,61 @@ func TestPickOneNodeForPreemption(t *testing.T) { } } +func TestPodEligibleToPreemptOthers(t *testing.T) { + tests := []struct { + name string + pod *v1.Pod + pods []*v1.Pod + nodes []string + nominatedNodeStatus *framework.Status + expected bool + }{ + { + name: "Pod with nominated node", + pod: st.MakePod().Name("p_with_nominated_node").UID("p").Priority(highPriority).NominatedNodeName("node1").Obj(), + pods: []*v1.Pod{st.MakePod().Name("p1").UID("p1").Priority(lowPriority).Node("node1").Terminating().Obj()}, + nodes: []string{"node1"}, + nominatedNodeStatus: framework.NewStatus(framework.UnschedulableAndUnresolvable, tainttoleration.ErrReasonNotMatch), + expected: true, + }, + { + name: "Pod with nominated node, but without nominated node status", + pod: st.MakePod().Name("p_without_status").UID("p").Priority(highPriority).NominatedNodeName("node1").Obj(), + pods: []*v1.Pod{st.MakePod().Name("p1").UID("p1").Priority(lowPriority).Node("node1").Terminating().Obj()}, + nodes: []string{"node1"}, + nominatedNodeStatus: nil, + expected: false, + }, + { + name: "Pod without nominated node", + pod: st.MakePod().Name("p_without_nominated_node").UID("p").Priority(highPriority).Obj(), + pods: []*v1.Pod{}, + nodes: []string{}, + nominatedNodeStatus: nil, + expected: true, + }, + { + name: "Pod with 'PreemptNever' preemption policy", + pod: st.MakePod().Name("p_with_preempt_never_policy").UID("p").Priority(highPriority).PreemptionPolicy(v1.PreemptNever).Obj(), + pods: []*v1.Pod{}, + nodes: []string{}, + nominatedNodeStatus: nil, + expected: false, + }, + } + + for _, test := range tests { + var nodes []*v1.Node + for _, n := range test.nodes { + nodes = append(nodes, st.MakeNode().Name(n).Obj()) + } + snapshot := internalcache.NewSnapshot(test.pods, nodes) + if got := podEligibleToPreemptOthers(test.pod, snapshot.NodeInfos(), test.nominatedNodeStatus); got != test.expected { + t.Errorf("expected %t, got %t for pod: %s", test.expected, got, test.pod.Name) + } + } +} + func TestNodesWherePreemptionMightHelp(t *testing.T) { // Prepare 4 nodes names. nodeNames := []string{"node1", "node2", "node3", "node4"} diff --git a/pkg/scheduler/testing/wrappers.go b/pkg/scheduler/testing/wrappers.go index 5cf30532ded..72d8644d7d6 100644 --- a/pkg/scheduler/testing/wrappers.go +++ b/pkg/scheduler/testing/wrappers.go @@ -244,6 +244,12 @@ func (p *PodWrapper) StartTime(t metav1.Time) *PodWrapper { return p } +// NominatedNodeName sets `n` as the .Status.NominatedNodeName of the inner pod. +func (p *PodWrapper) NominatedNodeName(n string) *PodWrapper { + p.Status.NominatedNodeName = n + return p +} + // PodAffinityKind represents different kinds of PodAffinity. type PodAffinityKind int