diff --git a/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption.go b/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption.go index 9282626a0f3..a807ade5f6e 100644 --- a/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption.go +++ b/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption.go @@ -95,7 +95,12 @@ func (pl *DefaultPreemption) PostFilter(ctx context.Context, state *framework.Cy State: state, Interface: pl, } - return pe.Preempt(ctx, pod, m) + + result, status := pe.Preempt(ctx, pod, m) + if status.Message() != "" { + return result, framework.NewStatus(status.Code(), "preemption: "+status.Message()) + } + return result, status } // calculateNumCandidates returns the number of candidates the FindCandidates @@ -222,16 +227,17 @@ func (pl *DefaultPreemption) SelectVictimsOnNode( return victims, numViolatingVictim, framework.NewStatus(framework.Success) } -// PodEligibleToPreemptOthers determines whether this pod should be considered -// for preempting other pods or not. If this pod has already preempted other +// PodEligibleToPreemptOthers returns one bool and one string. The bool +// indicates whether this pod should be considered for preempting other pods or +// not. The string includes the reason if this pod isn't eligible. +// If this pod has a preemptionPolicy of Never or has already preempted other // pods and those are in their graceful termination period, it shouldn't be // considered for preemption. // We look at the node that is nominated for this pod and as long as there are // terminating pods on the node, we don't consider this for preempting more pods. -func (pl *DefaultPreemption) PodEligibleToPreemptOthers(pod *v1.Pod, nominatedNodeStatus *framework.Status) bool { +func (pl *DefaultPreemption) PodEligibleToPreemptOthers(pod *v1.Pod, nominatedNodeStatus *framework.Status) (bool, string) { if pod.Spec.PreemptionPolicy != nil && *pod.Spec.PreemptionPolicy == v1.PreemptNever { - klog.V(5).InfoS("Pod is not eligible for preemption because it has a preemptionPolicy of Never", "pod", klog.KObj(pod)) - return false + return false, fmt.Sprint("not eligible due to preemptionPolicy=Never.") } nodeInfos := pl.fh.SnapshotSharedLister().NodeInfos() nomNodeName := pod.Status.NominatedNodeName @@ -239,7 +245,7 @@ func (pl *DefaultPreemption) PodEligibleToPreemptOthers(pod *v1.Pod, nominatedNo // If the pod's nominated node is considered as UnschedulableAndUnresolvable by the filters, // then the pod should be considered for preempting again. if nominatedNodeStatus.Code() == framework.UnschedulableAndUnresolvable { - return true + return true, "" } if nodeInfo, _ := nodeInfos.Get(nomNodeName); nodeInfo != nil { @@ -247,12 +253,12 @@ func (pl *DefaultPreemption) PodEligibleToPreemptOthers(pod *v1.Pod, nominatedNo for _, p := range nodeInfo.Pods { if p.Pod.DeletionTimestamp != nil && corev1helpers.PodPriority(p.Pod) < podPriority { // There is a terminating pod on the nominated node. - return false + return false, fmt.Sprint("not eligible due to a terminating pod on the nominated node.") } } } } - return true + return true, "" } // filterPodsWithPDBViolation groups the given "pods" into two groups of "violatingPods" diff --git a/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption_test.go b/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption_test.go index cd55946c58d..9c1eb0ff3a2 100644 --- a/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption_test.go +++ b/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption_test.go @@ -177,7 +177,7 @@ func TestPostFilter(t *testing.T) { "node1": framework.NewStatus(framework.Unschedulable), }, wantResult: framework.NewPostFilterResultWithNominatedNode(""), - wantStatus: framework.NewStatus(framework.Unschedulable, "0/1 nodes are available: 1 No victims found on node node1 for preemptor pod p."), + wantStatus: framework.NewStatus(framework.Unschedulable, "preemption: 0/1 nodes are available: 1 No victims found on node node1 for preemptor pod p."), }, { name: "preemption should respect filteredNodesStatuses", @@ -192,7 +192,7 @@ func TestPostFilter(t *testing.T) { "node1": framework.NewStatus(framework.UnschedulableAndUnresolvable), }, wantResult: framework.NewPostFilterResultWithNominatedNode(""), - wantStatus: framework.NewStatus(framework.Unschedulable, "0/1 nodes are available: 1 Preemption is not helpful for scheduling."), + wantStatus: framework.NewStatus(framework.Unschedulable, "preemption: 0/1 nodes are available: 1 Preemption is not helpful for scheduling."), }, { name: "pod can be made schedulable on one node", @@ -247,7 +247,7 @@ func TestPostFilter(t *testing.T) { "node2": framework.NewStatus(framework.Unschedulable), }, wantResult: framework.NewPostFilterResultWithNominatedNode(""), - wantStatus: framework.NewStatus(framework.Unschedulable, "0/2 nodes are available: 2 Insufficient cpu."), + wantStatus: framework.NewStatus(framework.Unschedulable, "preemption: 0/2 nodes are available: 2 Insufficient cpu."), }, { name: "no candidate nodes found with mixed reasons, no lower priority pod and no enough CPU resource", @@ -265,7 +265,7 @@ func TestPostFilter(t *testing.T) { "node2": framework.NewStatus(framework.Unschedulable), }, wantResult: framework.NewPostFilterResultWithNominatedNode(""), - wantStatus: framework.NewStatus(framework.Unschedulable, "0/2 nodes are available: 1 Insufficient cpu, 1 No victims found on node node1 for preemptor pod p."), + wantStatus: framework.NewStatus(framework.Unschedulable, "preemption: 0/2 nodes are available: 1 Insufficient cpu, 1 No victims found on node node1 for preemptor pod p."), }, { name: "no candidate nodes found with mixed reason, 2 UnschedulableAndUnresolvable nodes and 2 nodes don't have enough CPU resource", @@ -285,7 +285,7 @@ func TestPostFilter(t *testing.T) { "node4": framework.NewStatus(framework.UnschedulableAndUnresolvable), }, wantResult: framework.NewPostFilterResultWithNominatedNode(""), - wantStatus: framework.NewStatus(framework.Unschedulable, "0/4 nodes are available: 2 Insufficient cpu, 2 Preemption is not helpful for scheduling."), + wantStatus: framework.NewStatus(framework.Unschedulable, "preemption: 0/4 nodes are available: 2 Insufficient cpu, 2 Preemption is not helpful for scheduling."), }, { name: "only one node but failed with TestPlugin", @@ -297,7 +297,7 @@ func TestPostFilter(t *testing.T) { nodes: []*v1.Node{st.MakeNode().Name("node1").Capacity(largeRes).Label("error", "true").Obj()}, filteredNodesStatuses: framework.NodeToStatusMap{"node1": framework.NewStatus(framework.Unschedulable)}, wantResult: nil, - wantStatus: framework.AsStatus(errors.New("running RemovePod on PreFilter plugin \"test-plugin\": failed to remove pod: p")), + wantStatus: framework.AsStatus(errors.New("preemption: running RemovePod on PreFilter plugin \"test-plugin\": failed to remove pod: p")), }, { name: "one failed with TestPlugin and the other pass", @@ -1447,7 +1447,7 @@ func TestPodEligibleToPreemptOthers(t *testing.T) { t.Fatal(err) } pl := DefaultPreemption{fh: f} - if got := pl.PodEligibleToPreemptOthers(test.pod, test.nominatedNodeStatus); got != test.expected { + if got, _ := pl.PodEligibleToPreemptOthers(test.pod, test.nominatedNodeStatus); got != test.expected { t.Errorf("expected %t, got %t for pod: %s", test.expected, got, test.pod.Name) } }) diff --git a/pkg/scheduler/framework/preemption/preemption.go b/pkg/scheduler/framework/preemption/preemption.go index aeb66c8c3dd..9b9f4bebfa8 100644 --- a/pkg/scheduler/framework/preemption/preemption.go +++ b/pkg/scheduler/framework/preemption/preemption.go @@ -104,9 +104,9 @@ type Interface interface { GetOffsetAndNumCandidates(nodes int32) (int32, int32) // CandidatesToVictimsMap builds a map from the target node to a list of to-be-preempted Pods and the number of PDB violation. CandidatesToVictimsMap(candidates []Candidate) map[string]*extenderv1.Victims - // PodEligibleToPreemptOthers determines whether this pod should be considered - // for preempting other pods or not. - PodEligibleToPreemptOthers(pod *v1.Pod, nominatedNodeStatus *framework.Status) bool + // PodEligibleToPreemptOthers returns one bool and one string. The bool indicates whether this pod should be considered for + // preempting other pods or not. The string includes the reason if this pod isn't eligible. + PodEligibleToPreemptOthers(pod *v1.Pod, nominatedNodeStatus *framework.Status) (bool, string) // SelectVictimsOnNode finds minimum set of pods on the given node that should be preempted in order to make enough room // for "pod" to be scheduled. // Note that both `state` and `nodeInfo` are deep copied. @@ -148,9 +148,9 @@ func (ev *Evaluator) Preempt(ctx context.Context, pod *v1.Pod, m framework.NodeT } // 1) Ensure the preemptor is eligible to preempt other pods. - if !ev.PodEligibleToPreemptOthers(pod, m[pod.Status.NominatedNodeName]) { - klog.V(5).InfoS("Pod is not eligible for more preemption", "pod", klog.KObj(pod)) - return nil, framework.NewStatus(framework.Unschedulable) + if ok, msg := ev.PodEligibleToPreemptOthers(pod, m[pod.Status.NominatedNodeName]); !ok { + klog.V(5).InfoS("Pod is not eligible for preemption", "pod", klog.KObj(pod), "reason", msg) + return nil, framework.NewStatus(framework.Unschedulable, msg) } // 2) Find all preemption candidates. @@ -182,7 +182,7 @@ func (ev *Evaluator) Preempt(ctx context.Context, pod *v1.Pod, m framework.NodeT // 4) Find the best candidate. bestCandidate := ev.SelectCandidate(candidates) if bestCandidate == nil || len(bestCandidate.Name()) == 0 { - return nil, framework.NewStatus(framework.Unschedulable) + return nil, framework.NewStatus(framework.Unschedulable, "no candidate node for preemption") } // 5) Perform preparation work before nominating the selected candidate. diff --git a/pkg/scheduler/framework/preemption/preemption_test.go b/pkg/scheduler/framework/preemption/preemption_test.go index 025f959c2ef..3139995329a 100644 --- a/pkg/scheduler/framework/preemption/preemption_test.go +++ b/pkg/scheduler/framework/preemption/preemption_test.go @@ -76,8 +76,8 @@ func (pl *FakePostFilterPlugin) CandidatesToVictimsMap(candidates []Candidate) m return nil } -func (pl *FakePostFilterPlugin) PodEligibleToPreemptOthers(pod *v1.Pod, nominatedNodeStatus *framework.Status) bool { - return true +func (pl *FakePostFilterPlugin) PodEligibleToPreemptOthers(pod *v1.Pod, nominatedNodeStatus *framework.Status) (bool, string) { + return true, "" } func TestNodesWherePreemptionMightHelp(t *testing.T) { diff --git a/pkg/scheduler/framework/types.go b/pkg/scheduler/framework/types.go index b3d23ad85fb..3a12a0a1996 100644 --- a/pkg/scheduler/framework/types.go +++ b/pkg/scheduler/framework/types.go @@ -215,6 +215,8 @@ type WeightedAffinityTerm struct { type Diagnosis struct { NodeToStatusMap NodeToStatusMap UnschedulablePlugins sets.String + // PostFilterMsg records the messages returned from PostFilterPlugins. + PostFilterMsg string } // FitError describes a fit error of a pod. @@ -247,6 +249,10 @@ func (f *FitError) Error() string { return reasonStrings } reasonMsg := fmt.Sprintf(NoNodeAvailableMsg+": %v.", f.NumAllNodes, strings.Join(sortReasonsHistogram(), ", ")) + postFilterMsg := f.Diagnosis.PostFilterMsg + if postFilterMsg != "" { + reasonMsg += " " + postFilterMsg + } return reasonMsg } diff --git a/pkg/scheduler/scheduler.go b/pkg/scheduler/scheduler.go index 26ec436aa5f..fca985150cc 100644 --- a/pkg/scheduler/scheduler.go +++ b/pkg/scheduler/scheduler.go @@ -468,6 +468,7 @@ func (sched *Scheduler) scheduleOne(ctx context.Context) { if status.Code() == framework.Error { klog.ErrorS(nil, "Status after running PostFilter plugins for pod", "pod", klog.KObj(pod), "status", status) } else { + fitError.Diagnosis.PostFilterMsg = status.Message() klog.V(5).InfoS("Status after running PostFilter plugins for pod", "pod", klog.KObj(pod), "status", status) } if result != nil {