From 09abd6be5a859a94f490b2dad34b3c173bd6c1d8 Mon Sep 17 00:00:00 2001 From: Kensei Nakada Date: Sun, 31 Dec 2023 06:52:31 +0000 Subject: [PATCH] address reviews --- pkg/scheduler/framework/interface.go | 3 ++ pkg/scheduler/framework/runtime/framework.go | 20 ++++----- .../framework/runtime/framework_test.go | 6 +-- pkg/scheduler/framework/types.go | 3 ++ pkg/scheduler/schedule_one.go | 12 +++--- pkg/scheduler/schedule_one_test.go | 43 ++++++++++++++++--- pkg/scheduler/scheduler.go | 1 + 7 files changed, 62 insertions(+), 26 deletions(-) diff --git a/pkg/scheduler/framework/interface.go b/pkg/scheduler/framework/interface.go index 65a115dc0d1..23f5d7e5bc8 100644 --- a/pkg/scheduler/framework/interface.go +++ b/pkg/scheduler/framework/interface.go @@ -400,6 +400,9 @@ type PreFilterPlugin interface { // plugins must return success or the pod will be rejected. PreFilter could optionally // return a PreFilterResult to influence which nodes to evaluate downstream. This is useful // for cases where it is possible to determine the subset of nodes to process in O(1) time. + // When PreFilterResult filters out some Nodes, the framework considers Nodes that are filtered out as getting "UnschedulableAndUnresolvable". + // i.e., those Nodes will be out of the candidates of the preemption. + // // When it returns Skip status, returned PreFilterResult and other fields in status are just ignored, // and coupled Filter plugin/PreFilterExtensions() will be skipped in this scheduling cycle. PreFilter(ctx context.Context, state *CycleState, p *v1.Pod) (*PreFilterResult, *Status) diff --git a/pkg/scheduler/framework/runtime/framework.go b/pkg/scheduler/framework/runtime/framework.go index e126d24c84d..19bbc861831 100644 --- a/pkg/scheduler/framework/runtime/framework.go +++ b/pkg/scheduler/framework/runtime/framework.go @@ -684,12 +684,12 @@ func (f *frameworkImpl) RunPreFilterPlugins(ctx context.Context, state *framewor } if !s.IsSuccess() { s.SetPlugin(pl.Name()) - if s.IsRejected() { - if s.Code() == framework.UnschedulableAndUnresolvable { - // In this case, the preemption shouldn't happen in this scheduling cycle. - // So, no need to execute all PreFilter. - return nil, s - } + if s.Code() == framework.UnschedulableAndUnresolvable { + // In this case, the preemption shouldn't happen in this scheduling cycle. + // So, no need to execute all PreFilter. + return nil, s + } + if s.Code() == framework.Unschedulable { // In this case, the preemption should happen later in this scheduling cycle. // So we need to execute all PreFilter. // https://github.com/kubernetes/kubernetes/issues/119770 @@ -707,11 +707,9 @@ func (f *frameworkImpl) RunPreFilterPlugins(ctx context.Context, state *framewor if len(pluginsWithNodes) == 1 { msg = fmt.Sprintf("node(s) didn't satisfy plugin %v", pluginsWithNodes[0]) } - // In this case, the preemption should happen later in this scheduling cycle. - // So we need to execute all PreFilter. - // https://github.com/kubernetes/kubernetes/issues/119770 - returnStatus = framework.NewStatus(framework.Unschedulable, msg) - continue + + // When PreFilterResult filters out Nodes, the framework considers Nodes that are filtered out as getting "UnschedulableAndUnresolvable". + return result, framework.NewStatus(framework.UnschedulableAndUnresolvable, msg) } } return result, returnStatus diff --git a/pkg/scheduler/framework/runtime/framework_test.go b/pkg/scheduler/framework/runtime/framework_test.go index 840eb633e68..b3e9912845b 100644 --- a/pkg/scheduler/framework/runtime/framework_test.go +++ b/pkg/scheduler/framework/runtime/framework_test.go @@ -1656,7 +1656,7 @@ func TestRunPreFilterPlugins(t *testing.T) { wantStatusCode: framework.UnschedulableAndUnresolvable, }, { - name: "all nodes are filtered out by prefilter result, but all other plugins are executed", + name: "all nodes are filtered out by prefilter result, but other plugins aren't executed because we consider all nodes are filtered out by UnschedulableAndUnresolvable", plugins: []*TestPlugin{ { name: "reject-all-nodes", @@ -1669,8 +1669,8 @@ func TestRunPreFilterPlugins(t *testing.T) { }, }, wantPreFilterResult: &framework.PreFilterResult{NodeNames: sets.New[string]()}, - wantSkippedPlugins: sets.New("skip"), - wantStatusCode: framework.Unschedulable, + wantSkippedPlugins: sets.New[string](), // "skip" plugin isn't executed. + wantStatusCode: framework.UnschedulableAndUnresolvable, }, } for _, tt := range tests { diff --git a/pkg/scheduler/framework/types.go b/pkg/scheduler/framework/types.go index 16ba95ff96f..7001ff9fd5a 100644 --- a/pkg/scheduler/framework/types.go +++ b/pkg/scheduler/framework/types.go @@ -290,6 +290,9 @@ const ExtenderName = "Extender" // Diagnosis records the details to diagnose a scheduling failure. type Diagnosis struct { + // NodeToStatusMap records the status of each node + // if they're rejected in PreFilter (via PreFilterResult) or Filter plugins. + // Nodes that pass PreFilter/Filter plugins are not included in this map. NodeToStatusMap NodeToStatusMap // UnschedulablePlugins are plugins that returns Unschedulable or UnschedulableAndUnresolvable. UnschedulablePlugins sets.Set[string] diff --git a/pkg/scheduler/schedule_one.go b/pkg/scheduler/schedule_one.go index c38aca0dd36..0af3b1f3528 100644 --- a/pkg/scheduler/schedule_one.go +++ b/pkg/scheduler/schedule_one.go @@ -485,12 +485,14 @@ func (sched *Scheduler) findNodesThatFitPod(ctx context.Context, fwk framework.F nodes := allNodes if !preRes.AllNodes() { nodes = make([]*framework.NodeInfo, 0, len(preRes.NodeNames)) - for n := range preRes.NodeNames { - nInfo, err := sched.nodeInfoSnapshot.NodeInfos().Get(n) - if err != nil { - return nil, diagnosis, err + for _, n := range allNodes { + if !preRes.NodeNames.Has(n.Node().Name) { + // We consider Nodes that are filtered out by PreFilterResult as rejected via UnschedulableAndUnresolvable. + // We have to record them in NodeToStatusMap so that they won't be considered as candidates in the preemption. + diagnosis.NodeToStatusMap[n.Node().Name] = framework.NewStatus(framework.UnschedulableAndUnresolvable, "node is filtered out by the prefilter result") + continue } - nodes = append(nodes, nInfo) + nodes = append(nodes, n) } } feasibleNodes, err := sched.findNodesThatPassFilters(ctx, fwk, state, pod, &diagnosis, nodes) diff --git a/pkg/scheduler/schedule_one_test.go b/pkg/scheduler/schedule_one_test.go index d83794f660f..a85873a5270 100644 --- a/pkg/scheduler/schedule_one_test.go +++ b/pkg/scheduler/schedule_one_test.go @@ -2227,7 +2227,7 @@ func TestSchedulerSchedulePod(t *testing.T) { nodes: []string{"node1", "node2", "node3"}, pod: st.MakePod().Name("test-prefilter").UID("test-prefilter").Obj(), wantNodes: sets.New("node2"), - wantEvaluatedNodes: ptr.To[int32](1), + wantEvaluatedNodes: ptr.To[int32](3), }, { name: "test prefilter plugin returning non-intersecting nodes", @@ -2254,9 +2254,9 @@ func TestSchedulerSchedulePod(t *testing.T) { NumAllNodes: 3, Diagnosis: framework.Diagnosis{ NodeToStatusMap: framework.NodeToStatusMap{ - "node1": framework.NewStatus(framework.Unschedulable, "node(s) didn't satisfy plugin(s) [FakePreFilter2 FakePreFilter3] simultaneously"), - "node2": framework.NewStatus(framework.Unschedulable, "node(s) didn't satisfy plugin(s) [FakePreFilter2 FakePreFilter3] simultaneously"), - "node3": framework.NewStatus(framework.Unschedulable, "node(s) didn't satisfy plugin(s) [FakePreFilter2 FakePreFilter3] simultaneously"), + "node1": framework.NewStatus(framework.UnschedulableAndUnresolvable, "node(s) didn't satisfy plugin(s) [FakePreFilter2 FakePreFilter3] simultaneously"), + "node2": framework.NewStatus(framework.UnschedulableAndUnresolvable, "node(s) didn't satisfy plugin(s) [FakePreFilter2 FakePreFilter3] simultaneously"), + "node3": framework.NewStatus(framework.UnschedulableAndUnresolvable, "node(s) didn't satisfy plugin(s) [FakePreFilter2 FakePreFilter3] simultaneously"), }, UnschedulablePlugins: sets.Set[string]{}, PreFilterMsg: "node(s) didn't satisfy plugin(s) [FakePreFilter2 FakePreFilter3] simultaneously", @@ -2284,13 +2284,42 @@ func TestSchedulerSchedulePod(t *testing.T) { NumAllNodes: 1, Diagnosis: framework.Diagnosis{ NodeToStatusMap: framework.NodeToStatusMap{ - "node1": framework.NewStatus(framework.Unschedulable, "node(s) didn't satisfy plugin FakePreFilter2"), + "node1": framework.NewStatus(framework.UnschedulableAndUnresolvable, "node(s) didn't satisfy plugin FakePreFilter2"), }, UnschedulablePlugins: sets.Set[string]{}, PreFilterMsg: "node(s) didn't satisfy plugin FakePreFilter2", }, }, }, + { + name: "test some nodes are filtered out by prefilter plugin and other are filtered out by filter plugin", + registerPlugins: []tf.RegisterPluginFunc{ + tf.RegisterQueueSortPlugin(queuesort.Name, queuesort.New), + tf.RegisterPreFilterPlugin( + "FakePreFilter", + tf.NewFakePreFilterPlugin("FakePreFilter", &framework.PreFilterResult{NodeNames: sets.New[string]("node2")}, nil), + ), + tf.RegisterFilterPlugin( + "FakeFilter", + tf.NewFakeFilterPlugin(map[string]framework.Code{"node2": framework.Unschedulable}), + ), + tf.RegisterBindPlugin(defaultbinder.Name, defaultbinder.New), + }, + nodes: []string{"node1", "node2"}, + pod: st.MakePod().Name("test-prefilter").UID("test-prefilter").Obj(), + wErr: &framework.FitError{ + Pod: st.MakePod().Name("test-prefilter").UID("test-prefilter").Obj(), + NumAllNodes: 2, + Diagnosis: framework.Diagnosis{ + NodeToStatusMap: framework.NodeToStatusMap{ + "node1": framework.NewStatus(framework.UnschedulableAndUnresolvable, "node is filtered out by the prefilter result"), + "node2": framework.NewStatus(framework.Unschedulable, "injecting failure for pod test-prefilter").WithPlugin("FakeFilter"), + }, + UnschedulablePlugins: sets.New("FakeFilter"), + PreFilterMsg: "", + }, + }, + }, { name: "test prefilter plugin returning skip", registerPlugins: []tf.RegisterPluginFunc{ @@ -2356,7 +2385,7 @@ func TestSchedulerSchedulePod(t *testing.T) { wantEvaluatedNodes: ptr.To[int32](1), }, { - name: "test no score plugin, prefilter plugin returning 2 nodes, only 1 node is evaluated", + name: "test no score plugin, prefilter plugin returning 2 nodes, only 1 node is evaluated in Filter", registerPlugins: []tf.RegisterPluginFunc{ tf.RegisterQueueSortPlugin(queuesort.Name, queuesort.New), tf.RegisterPreFilterPlugin( @@ -2368,7 +2397,7 @@ func TestSchedulerSchedulePod(t *testing.T) { nodes: []string{"node1", "node2", "node3"}, pod: st.MakePod().Name("test-prefilter").UID("test-prefilter").Obj(), wantNodes: sets.New("node1", "node2"), - wantEvaluatedNodes: ptr.To[int32](1), + wantEvaluatedNodes: ptr.To[int32](2), }, } for _, test := range tests { diff --git a/pkg/scheduler/scheduler.go b/pkg/scheduler/scheduler.go index 84b73c44954..fdae0cf4847 100644 --- a/pkg/scheduler/scheduler.go +++ b/pkg/scheduler/scheduler.go @@ -139,6 +139,7 @@ type ScheduleResult struct { SuggestedHost string // The number of nodes the scheduler evaluated the pod against in the filtering // phase and beyond. + // Note that it contains the number of nodes that filtered out by PreFilterResult. EvaluatedNodes int // The number of nodes out of the evaluated ones that fit the pod. FeasibleNodes int