diff --git a/pkg/scheduler/core/generic_scheduler_test.go b/pkg/scheduler/core/generic_scheduler_test.go index 29536164404..4c4d28e35b6 100644 --- a/pkg/scheduler/core/generic_scheduler_test.go +++ b/pkg/scheduler/core/generic_scheduler_test.go @@ -611,7 +611,7 @@ func TestGenericScheduler(t *testing.T) { wErr: fmt.Errorf("error while running score plugin for pod \"2\": %+v", errPrioritize), }, { - name: "test even pods spread predicate - 2 nodes with maxskew=1", + name: "test podtopologyspread plugin - 2 nodes with maxskew=1", registerPlugins: []st.RegisterPluginFunc{ st.RegisterQueueSortPlugin(queuesort.Name, queuesort.New), st.RegisterPluginAsExtensions( @@ -659,7 +659,7 @@ func TestGenericScheduler(t *testing.T) { wErr: nil, }, { - name: "test even pods spread predicate - 3 nodes with maxskew=2", + name: "test podtopologyspread plugin - 3 nodes with maxskew=2", registerPlugins: []st.RegisterPluginFunc{ st.RegisterQueueSortPlugin(queuesort.Name, queuesort.New), st.RegisterPluginAsExtensions( @@ -854,7 +854,6 @@ func makeScheduler(nodes []*v1.Node, fns ...st.RegisterPluginFunc) *genericSched schedulerapi.DefaultPercentageOfNodesToScore, false) cache.UpdateSnapshot(s.(*genericScheduler).nodeInfoSnapshot) return s.(*genericScheduler) - } func TestFindFitAllError(t *testing.T) { diff --git a/pkg/scheduler/framework/plugins/podtopologyspread/filtering.go b/pkg/scheduler/framework/plugins/podtopologyspread/filtering.go index 4c59eb1fc56..9eeb9701fe2 100644 --- a/pkg/scheduler/framework/plugins/podtopologyspread/filtering.go +++ b/pkg/scheduler/framework/plugins/podtopologyspread/filtering.go @@ -255,7 +255,8 @@ func calPreFilterState(pod *v1.Pod, allNodes []*schedulernodeinfo.NodeInfo) (*pr matchTotal := int32(0) // nodeInfo.Pods() can be empty; or all pods don't fit for _, existingPod := range nodeInfo.Pods() { - if existingPod.Namespace != pod.Namespace { + // Bypass terminating Pod (see #87621). + if existingPod.DeletionTimestamp != nil || existingPod.Namespace != pod.Namespace { continue } if constraint.selector.Matches(labels.Set(existingPod.Labels)) { diff --git a/pkg/scheduler/framework/plugins/podtopologyspread/filtering_test.go b/pkg/scheduler/framework/plugins/podtopologyspread/filtering_test.go index 5e54837815f..115ac64afed 100644 --- a/pkg/scheduler/framework/plugins/podtopologyspread/filtering_test.go +++ b/pkg/scheduler/framework/plugins/podtopologyspread/filtering_test.go @@ -491,7 +491,7 @@ func TestPreFilterStateAddPod(t *testing.T) { }, }, { - name: "add a pod with mis-matched namespace doesn't change topologyKeyToMinPodsMap", + name: "add a pod in a different namespace doesn't change topologyKeyToMinPodsMap", preemptor: st.MakePod().Name("p").Label("foo", ""). SpreadConstraint(1, "node", hardSpread, st.MakeLabelSelector().Exists("foo").Obj()). Obj(), @@ -1036,7 +1036,7 @@ func TestSingleConstraint(t *testing.T) { }, }, { - name: "existing pods with mis-matched namespace doesn't count", + name: "existing pods in a different namespace do not count", pod: st.MakePod().Name("p").Label("foo", "").SpreadConstraint( 1, "zone", hardSpread, st.MakeLabelSelector().Exists("foo").Obj(), ).Obj(), @@ -1225,6 +1225,24 @@ func TestSingleConstraint(t *testing.T) { "node-y": false, }, }, + { + name: "terminating Pods should be excluded", + pod: st.MakePod().Name("p").Label("foo", "").SpreadConstraint( + 1, "node", hardSpread, st.MakeLabelSelector().Exists("foo").Obj(), + ).Obj(), + nodes: []*v1.Node{ + st.MakeNode().Name("node-a").Label("node", "node-a").Obj(), + st.MakeNode().Name("node-b").Label("node", "node-b").Obj(), + }, + existingPods: []*v1.Pod{ + st.MakePod().Name("p-a").Node("node-a").Label("foo", "").Terminating().Obj(), + st.MakePod().Name("p-b").Node("node-b").Label("foo", "").Obj(), + }, + fits: map[string]bool{ + "node-a": true, + "node-b": false, + }, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { diff --git a/pkg/scheduler/framework/plugins/podtopologyspread/scoring.go b/pkg/scheduler/framework/plugins/podtopologyspread/scoring.go index 25b7c72e5e8..d57a889bc21 100644 --- a/pkg/scheduler/framework/plugins/podtopologyspread/scoring.go +++ b/pkg/scheduler/framework/plugins/podtopologyspread/scoring.go @@ -135,6 +135,10 @@ func (pl *PodTopologySpread) PostFilter( // indicates how many pods (on current node) match the . matchSum := int64(0) for _, existingPod := range nodeInfo.Pods() { + // Bypass terminating Pod (see #87621). + if existingPod.DeletionTimestamp != nil || existingPod.Namespace != pod.Namespace { + continue + } if c.selector.Matches(labels.Set(existingPod.Labels)) { matchSum++ } diff --git a/pkg/scheduler/framework/plugins/podtopologyspread/scoring_test.go b/pkg/scheduler/framework/plugins/podtopologyspread/scoring_test.go index 41433962f06..56bfe6b0b57 100644 --- a/pkg/scheduler/framework/plugins/podtopologyspread/scoring_test.go +++ b/pkg/scheduler/framework/plugins/podtopologyspread/scoring_test.go @@ -429,6 +429,44 @@ func TestPodTopologySpreadScore(t *testing.T) { {Name: "node-x", Score: 100}, }, }, + { + name: "existing pods in a different namespace do not count", + pod: st.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, "node", softSpread, st.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + existingPods: []*v1.Pod{ + st.MakePod().Name("p-a1").Namespace("ns1").Node("node-a").Label("foo", "").Obj(), + st.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Obj(), + st.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + st.MakePod().Name("p-b2").Node("node-b").Label("foo", "").Obj(), + }, + nodes: []*v1.Node{ + st.MakeNode().Name("node-a").Label("node", "node-a").Obj(), + st.MakeNode().Name("node-b").Label("node", "node-b").Obj(), + }, + want: []framework.NodeScore{ + {Name: "node-a", Score: 100}, + {Name: "node-b", Score: 50}, + }, + }, + { + name: "terminating Pods should be excluded", + pod: st.MakePod().Name("p").Label("foo", "").SpreadConstraint( + 1, "node", softSpread, st.MakeLabelSelector().Exists("foo").Obj(), + ).Obj(), + nodes: []*v1.Node{ + st.MakeNode().Name("node-a").Label("node", "node-a").Obj(), + st.MakeNode().Name("node-b").Label("node", "node-b").Obj(), + }, + existingPods: []*v1.Pod{ + st.MakePod().Name("p-a").Node("node-a").Label("foo", "").Terminating().Obj(), + st.MakePod().Name("p-b").Node("node-b").Label("foo", "").Obj(), + }, + want: []framework.NodeScore{ + {Name: "node-a", Score: 100}, + {Name: "node-b", Score: 0}, + }, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { diff --git a/pkg/scheduler/testing/wrappers.go b/pkg/scheduler/testing/wrappers.go index 48d4b6dcb9c..1fe47e723e0 100644 --- a/pkg/scheduler/testing/wrappers.go +++ b/pkg/scheduler/testing/wrappers.go @@ -171,6 +171,13 @@ func (p *PodWrapper) Priority(val int32) *PodWrapper { return p } +// Terminating sets the inner pod's deletionTimestamp to current timestamp. +func (p *PodWrapper) Terminating() *PodWrapper { + now := metav1.Now() + p.DeletionTimestamp = &now + return p +} + // ZeroTerminationGracePeriod sets the TerminationGracePeriodSeconds of the inner pod to zero. func (p *PodWrapper) ZeroTerminationGracePeriod() *PodWrapper { p.Spec.TerminationGracePeriodSeconds = &zero