diff --git a/pkg/scheduler/framework/interface.go b/pkg/scheduler/framework/interface.go index 5fd8bd86fcf..c5b7d89ceed 100644 --- a/pkg/scheduler/framework/interface.go +++ b/pkg/scheduler/framework/interface.go @@ -49,7 +49,8 @@ type NodeScore struct { Score int64 } -// NodeToStatusMap declares map from node name to its status. +// NodeToStatusMap contains the statuses of the Nodes where the incoming Pod was not schedulable. +// A PostFilter plugin that uses this map should interpret absent Nodes as UnschedulableAndUnresolvable. type NodeToStatusMap map[string]*Status // NodePluginScores is a struct with node name and scores for that node. @@ -448,6 +449,7 @@ type PostFilterPlugin interface { // If this scheduling cycle failed at PreFilter, all Nodes have the status from the rejector PreFilter plugin in NodeToStatusMap. // Note that the scheduling framework runs PostFilter plugins even when PreFilter returned UnschedulableAndUnresolvable. // In that case, NodeToStatusMap contains all Nodes with UnschedulableAndUnresolvable. + // If there is no entry in the NodeToStatus map, its implicit status is UnschedulableAndUnresolvable. // // Also, ignoring Nodes with UnschedulableAndUnresolvable is the responsibility of each PostFilter plugin, // meaning NodeToStatusMap obviously could have Nodes with UnschedulableAndUnresolvable diff --git a/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption_test.go b/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption_test.go index 536ac111715..915016388cf 100644 --- a/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption_test.go +++ b/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption_test.go @@ -289,7 +289,8 @@ func TestPostFilter(t *testing.T) { st.MakeNode().Name("node4").Capacity(nodeRes).Obj(), }, filteredNodesStatuses: framework.NodeToStatusMap{ - "node3": framework.NewStatus(framework.UnschedulableAndUnresolvable), + "node1": framework.NewStatus(framework.Unschedulable), + "node2": framework.NewStatus(framework.Unschedulable), "node4": framework.NewStatus(framework.UnschedulableAndUnresolvable), }, wantResult: framework.NewPostFilterResultWithNominatedNode(""), @@ -1776,7 +1777,15 @@ func TestPreempt(t *testing.T) { State: state, Interface: &pl, } - res, status := pe.Preempt(ctx, test.pod, make(framework.NodeToStatusMap)) + + // so that these nodes are eligible for preemption, we set their status + // to Unschedulable. + nodeToStatusMap := make(framework.NodeToStatusMap, len(nodes)) + for _, n := range nodes { + nodeToStatusMap[n.Name] = framework.NewStatus(framework.Unschedulable) + } + + res, status := pe.Preempt(ctx, test.pod, nodeToStatusMap) if !status.IsSuccess() && !status.IsRejected() { t.Errorf("unexpected error in preemption: %v", status.AsError()) } diff --git a/pkg/scheduler/framework/preemption/preemption.go b/pkg/scheduler/framework/preemption/preemption.go index 376b6337e99..29864adb52f 100644 --- a/pkg/scheduler/framework/preemption/preemption.go +++ b/pkg/scheduler/framework/preemption/preemption.go @@ -415,15 +415,18 @@ func (ev *Evaluator) prepareCandidate(ctx context.Context, c Candidate, pod *v1. func nodesWherePreemptionMightHelp(nodes []*framework.NodeInfo, m framework.NodeToStatusMap) ([]*framework.NodeInfo, framework.NodeToStatusMap) { var potentialNodes []*framework.NodeInfo nodeStatuses := make(framework.NodeToStatusMap) + unresolvableStatus := framework.NewStatus(framework.UnschedulableAndUnresolvable, "Preemption is not helpful for scheduling") for _, node := range nodes { - name := node.Node().Name - // We rely on the status by each plugin - 'Unschedulable' or 'UnschedulableAndUnresolvable' - // to determine whether preemption may help or not on the node. - if m[name].Code() == framework.UnschedulableAndUnresolvable { - nodeStatuses[node.Node().Name] = framework.NewStatus(framework.UnschedulableAndUnresolvable, "Preemption is not helpful for scheduling") - continue + nodeName := node.Node().Name + // We only attempt preemption on nodes with status 'Unschedulable'. For + // diagnostic purposes, we propagate UnschedulableAndUnresolvable if either + // implied by absence in map or explicitly set. + status, ok := m[nodeName] + if status.Code() == framework.Unschedulable { + potentialNodes = append(potentialNodes, node) + } else if !ok || status.Code() == framework.UnschedulableAndUnresolvable { + nodeStatuses[nodeName] = unresolvableStatus } - potentialNodes = append(potentialNodes, node) } return potentialNodes, nodeStatuses } diff --git a/pkg/scheduler/framework/preemption/preemption_test.go b/pkg/scheduler/framework/preemption/preemption_test.go index 5ddf60c1997..0bf384239ac 100644 --- a/pkg/scheduler/framework/preemption/preemption_test.go +++ b/pkg/scheduler/framework/preemption/preemption_test.go @@ -147,6 +147,7 @@ func TestNodesWherePreemptionMightHelp(t *testing.T) { "node1": framework.NewStatus(framework.Unschedulable, interpodaffinity.ErrReasonAntiAffinityRulesNotMatch), "node2": framework.NewStatus(framework.UnschedulableAndUnresolvable, nodename.ErrReason), "node3": framework.NewStatus(framework.UnschedulableAndUnresolvable, nodeunschedulable.ErrReasonUnschedulable), + "node4": framework.NewStatus(framework.Unschedulable, "Unschedulable"), }, expected: sets.New("node1", "node4"), }, @@ -155,6 +156,8 @@ func TestNodesWherePreemptionMightHelp(t *testing.T) { nodesStatuses: framework.NodeToStatusMap{ "node1": framework.NewStatus(framework.UnschedulableAndUnresolvable, interpodaffinity.ErrReasonAffinityRulesNotMatch), "node2": framework.NewStatus(framework.Unschedulable, interpodaffinity.ErrReasonAntiAffinityRulesNotMatch), + "node3": framework.NewStatus(framework.Unschedulable, "Unschedulable"), + "node4": framework.NewStatus(framework.Unschedulable, "Unschedulable"), }, expected: sets.New("node2", "node3", "node4"), }, @@ -163,6 +166,8 @@ func TestNodesWherePreemptionMightHelp(t *testing.T) { nodesStatuses: framework.NodeToStatusMap{ "node1": framework.NewStatus(framework.UnschedulableAndUnresolvable, volumerestrictions.ErrReasonDiskConflict), "node2": framework.NewStatus(framework.Unschedulable, fmt.Sprintf("Insufficient %v", v1.ResourceMemory)), + "node3": framework.NewStatus(framework.Unschedulable, "Unschedulable"), + "node4": framework.NewStatus(framework.Unschedulable, "Unschedulable"), }, expected: sets.New("node2", "node3", "node4"), }, @@ -170,6 +175,9 @@ func TestNodesWherePreemptionMightHelp(t *testing.T) { name: "Node condition errors should be considered unresolvable", nodesStatuses: framework.NodeToStatusMap{ "node1": framework.NewStatus(framework.UnschedulableAndUnresolvable, nodeunschedulable.ErrReasonUnknownCondition), + "node2": framework.NewStatus(framework.Unschedulable, "Unschedulable"), + "node3": framework.NewStatus(framework.Unschedulable, "Unschedulable"), + "node4": framework.NewStatus(framework.Unschedulable, "Unschedulable"), }, expected: sets.New("node2", "node3", "node4"), }, @@ -179,6 +187,7 @@ func TestNodesWherePreemptionMightHelp(t *testing.T) { "node1": framework.NewStatus(framework.UnschedulableAndUnresolvable, volumezone.ErrReasonConflict), "node2": framework.NewStatus(framework.UnschedulableAndUnresolvable, string(volumebinding.ErrReasonNodeConflict)), "node3": framework.NewStatus(framework.UnschedulableAndUnresolvable, string(volumebinding.ErrReasonBindConflict)), + "node4": framework.NewStatus(framework.Unschedulable, "Unschedulable"), }, expected: sets.New("node4"), }, @@ -188,12 +197,14 @@ func TestNodesWherePreemptionMightHelp(t *testing.T) { "node1": framework.NewStatus(framework.Unschedulable, podtopologyspread.ErrReasonConstraintsNotMatch), "node2": framework.NewStatus(framework.UnschedulableAndUnresolvable, nodename.ErrReason), "node3": framework.NewStatus(framework.Unschedulable, podtopologyspread.ErrReasonConstraintsNotMatch), + "node4": framework.NewStatus(framework.Unschedulable, "Unschedulable"), }, expected: sets.New("node1", "node3", "node4"), }, { name: "UnschedulableAndUnresolvable status should be skipped but Unschedulable should be tried", nodesStatuses: framework.NodeToStatusMap{ + "node1": framework.NewStatus(framework.Unschedulable, ""), "node2": framework.NewStatus(framework.UnschedulableAndUnresolvable, ""), "node3": framework.NewStatus(framework.Unschedulable, ""), "node4": framework.NewStatus(framework.UnschedulableAndUnresolvable, ""), @@ -203,6 +214,7 @@ func TestNodesWherePreemptionMightHelp(t *testing.T) { { name: "ErrReasonNodeLabelNotMatch should not be tried as it indicates that the pod is unschedulable due to node doesn't have the required label", nodesStatuses: framework.NodeToStatusMap{ + "node1": framework.NewStatus(framework.Unschedulable, ""), "node2": framework.NewStatus(framework.UnschedulableAndUnresolvable, podtopologyspread.ErrReasonNodeLabelNotMatch), "node3": framework.NewStatus(framework.Unschedulable, ""), "node4": framework.NewStatus(framework.UnschedulableAndUnresolvable, ""), diff --git a/pkg/scheduler/framework/types.go b/pkg/scheduler/framework/types.go index 164e6aa4095..6856b78f270 100644 --- a/pkg/scheduler/framework/types.go +++ b/pkg/scheduler/framework/types.go @@ -325,9 +325,11 @@ const ExtenderName = "Extender" // Diagnosis records the details to diagnose a scheduling failure. type Diagnosis struct { - // NodeToStatusMap records the status of each node + // NodeToStatusMap records the status of each retriable node (status Unschedulable) // if they're rejected in PreFilter (via PreFilterResult) or Filter plugins. // Nodes that pass PreFilter/Filter plugins are not included in this map. + // While this map may contain UnschedulableAndUnresolvable statuses, the absence of + // a node should be interpreted as UnschedulableAndUnresolvable. NodeToStatusMap NodeToStatusMap // UnschedulablePlugins are plugins that returns Unschedulable or UnschedulableAndUnresolvable. UnschedulablePlugins sets.Set[string] diff --git a/pkg/scheduler/schedule_one.go b/pkg/scheduler/schedule_one.go index 81754db2a53..6b6992095d8 100644 --- a/pkg/scheduler/schedule_one.go +++ b/pkg/scheduler/schedule_one.go @@ -485,14 +485,12 @@ func (sched *Scheduler) findNodesThatFitPod(ctx context.Context, fwk framework.F nodes := allNodes if !preRes.AllNodes() { nodes = make([]*framework.NodeInfo, 0, len(preRes.NodeNames)) - for _, n := range allNodes { - if !preRes.NodeNames.Has(n.Node().Name) { - // We consider Nodes that are filtered out by PreFilterResult as rejected via UnschedulableAndUnresolvable. - // We have to record them in NodeToStatusMap so that they won't be considered as candidates in the preemption. - diagnosis.NodeToStatusMap[n.Node().Name] = framework.NewStatus(framework.UnschedulableAndUnresolvable, "node is filtered out by the prefilter result") - continue + for nodeName := range preRes.NodeNames { + // PreRes may return nodeName(s) which do not exist; we verify + // node exists in the Snapshot. + if nodeInfo, err := sched.nodeInfoSnapshot.Get(nodeName); err == nil { + nodes = append(nodes, nodeInfo) } - nodes = append(nodes, n) } } feasibleNodes, err := sched.findNodesThatPassFilters(ctx, fwk, state, pod, &diagnosis, nodes) diff --git a/pkg/scheduler/schedule_one_test.go b/pkg/scheduler/schedule_one_test.go index 546893c31be..81c08147623 100644 --- a/pkg/scheduler/schedule_one_test.go +++ b/pkg/scheduler/schedule_one_test.go @@ -2344,7 +2344,6 @@ func TestSchedulerSchedulePod(t *testing.T) { NumAllNodes: 2, Diagnosis: framework.Diagnosis{ NodeToStatusMap: framework.NodeToStatusMap{ - "node1": framework.NewStatus(framework.UnschedulableAndUnresolvable, "node is filtered out by the prefilter result"), "node2": framework.NewStatus(framework.Unschedulable, "injecting failure for pod test-prefilter").WithPlugin("FakeFilter"), }, UnschedulablePlugins: sets.New("FakeFilter"), @@ -2453,10 +2452,7 @@ func TestSchedulerSchedulePod(t *testing.T) { Pod: st.MakePod().Name("test-prefilter").UID("test-prefilter").Obj(), NumAllNodes: 2, Diagnosis: framework.Diagnosis{ - NodeToStatusMap: framework.NodeToStatusMap{ - "1": framework.NewStatus(framework.UnschedulableAndUnresolvable, "node is filtered out by the prefilter result"), - "2": framework.NewStatus(framework.UnschedulableAndUnresolvable, "node is filtered out by the prefilter result"), - }, + NodeToStatusMap: framework.NodeToStatusMap{}, }, }, },