diff --git a/pkg/scheduler/framework/plugins/podtopologyspread/plugin.go b/pkg/scheduler/framework/plugins/podtopologyspread/plugin.go index 5ce2420d49f..31eb457fc43 100644 --- a/pkg/scheduler/framework/plugins/podtopologyspread/plugin.go +++ b/pkg/scheduler/framework/plugins/podtopologyspread/plugin.go @@ -53,6 +53,7 @@ var systemDefaultConstraints = []v1.TopologySpreadConstraint{ // PodTopologySpread is a plugin that ensures pod's topologySpreadConstraints is satisfied. type PodTopologySpread struct { + systemDefaulted bool parallelizer parallelize.Parallelizer defaultConstraints []v1.TopologySpreadConstraint sharedLister framework.SharedLister @@ -97,6 +98,7 @@ func New(plArgs runtime.Object, h framework.Handle) (framework.Plugin, error) { } if args.DefaultingType == config.SystemDefaulting { pl.defaultConstraints = systemDefaultConstraints + pl.systemDefaulted = true } if len(pl.defaultConstraints) != 0 { if h.SharedInformerFactory() == nil { diff --git a/pkg/scheduler/framework/plugins/podtopologyspread/scoring.go b/pkg/scheduler/framework/plugins/podtopologyspread/scoring.go index f1b9ff7e6aa..5bc938f028e 100644 --- a/pkg/scheduler/framework/plugins/podtopologyspread/scoring.go +++ b/pkg/scheduler/framework/plugins/podtopologyspread/scoring.go @@ -56,7 +56,7 @@ func (s *preScoreState) Clone() framework.StateData { // 1) s.TopologyPairToPodCounts: keyed with both eligible topology pair and node names. // 2) s.IgnoredNodes: the set of nodes that shouldn't be scored. // 3) s.TopologyNormalizingWeight: The weight to be given to each constraint based on the number of values in a topology. -func (pl *PodTopologySpread) initPreScoreState(s *preScoreState, pod *v1.Pod, filteredNodes []*v1.Node) error { +func (pl *PodTopologySpread) initPreScoreState(s *preScoreState, pod *v1.Pod, filteredNodes []*v1.Node, requireAllTopologies bool) error { var err error if len(pod.Spec.TopologySpreadConstraints) > 0 { s.Constraints, err = filterTopologySpreadConstraints(pod.Spec.TopologySpreadConstraints, v1.ScheduleAnyway) @@ -74,7 +74,7 @@ func (pl *PodTopologySpread) initPreScoreState(s *preScoreState, pod *v1.Pod, fi } topoSize := make([]int, len(s.Constraints)) for _, node := range filteredNodes { - if !nodeLabelsMatchSpreadConstraints(node.Labels, s.Constraints) { + if requireAllTopologies && !nodeLabelsMatchSpreadConstraints(node.Labels, s.Constraints) { // Nodes which don't have all required topologyKeys present are ignored // when scoring later. s.IgnoredNodes.Insert(node.Name) @@ -125,7 +125,11 @@ func (pl *PodTopologySpread) PreScore( IgnoredNodes: sets.NewString(), TopologyPairToPodCounts: make(map[topologyPair]*int64), } - err = pl.initPreScoreState(state, pod, filteredNodes) + // Only require that nodes have all the topology labels if using + // non-system-default spreading rules. This allows nodes that don't have a + // zone label to still have hostname spreading. + requireAllTopologies := len(pod.Spec.TopologySpreadConstraints) > 0 || !pl.systemDefaulted + err = pl.initPreScoreState(state, pod, filteredNodes, requireAllTopologies) if err != nil { return framework.AsStatus(fmt.Errorf("calculating preScoreState: %w", err)) } @@ -147,7 +151,7 @@ func (pl *PodTopologySpread) PreScore( // (1) `node` should satisfy incoming pod's NodeSelector/NodeAffinity // (2) All topologyKeys need to be present in `node` match, _ := requiredNodeAffinity.Match(node) - if !match || !nodeLabelsMatchSpreadConstraints(node.Labels, state.Constraints) { + if !match || (requireAllTopologies && !nodeLabelsMatchSpreadConstraints(node.Labels, state.Constraints)) { return } diff --git a/pkg/scheduler/framework/plugins/podtopologyspread/scoring_test.go b/pkg/scheduler/framework/plugins/podtopologyspread/scoring_test.go index 562fb0255dd..fe5f9cbabe1 100644 --- a/pkg/scheduler/framework/plugins/podtopologyspread/scoring_test.go +++ b/pkg/scheduler/framework/plugins/podtopologyspread/scoring_test.go @@ -123,6 +123,10 @@ func TestPreScoreStateEmptyNodes(t *testing.T) { }, nodes: []*v1.Node{ st.MakeNode().Name("node-a").Label(v1.LabelHostname, "node-a").Label(v1.LabelTopologyZone, "mars").Obj(), + st.MakeNode().Name("node-b").Label(v1.LabelHostname, "node-b").Label(v1.LabelTopologyZone, "mars").Obj(), + // Nodes with no zone are not excluded. They are considered a separate zone. + st.MakeNode().Name("node-c").Label(v1.LabelHostname, "node-c").Obj(), + st.MakeNode().Name("node-d").Label(v1.LabelHostname, "node-d").Obj(), }, objs: []runtime.Object{ &appsv1.ReplicaSet{ObjectMeta: metav1.ObjectMeta{Namespace: "default", Name: "rs1"}, Spec: appsv1.ReplicaSetSpec{Selector: st.MakeLabelSelector().Exists("foo").Obj()}}, @@ -143,8 +147,9 @@ func TestPreScoreStateEmptyNodes(t *testing.T) { IgnoredNodes: sets.NewString(), TopologyPairToPodCounts: map[topologyPair]*int64{ {key: v1.LabelTopologyZone, value: "mars"}: pointer.Int64Ptr(0), + {key: v1.LabelTopologyZone, value: ""}: pointer.Int64Ptr(0), }, - TopologyNormalizingWeight: []float64{topologyNormalizingWeight(1), topologyNormalizingWeight(1)}, + TopologyNormalizingWeight: []float64{topologyNormalizingWeight(4), topologyNormalizingWeight(2)}, }, }, { @@ -277,6 +282,7 @@ func TestPodTopologySpreadScore(t *testing.T) { existingPods []*v1.Pod nodes []*v1.Node failedNodes []*v1.Node // nodes + failedNodes = all nodes + objs []runtime.Object want framework.NodeScoreList }{ // Explanation on the Legend: @@ -427,6 +433,40 @@ func TestPodTopologySpreadScore(t *testing.T) { {Name: "node-d", Score: 100}, }, }, + { + name: "system defaulting, nodes don't have zone, pods match service", + pod: st.MakePod().Name("p").Label("foo", "").Obj(), + existingPods: []*v1.Pod{ + // matching pods spread as 4/3/2/1. + st.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + st.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Obj(), + st.MakePod().Name("p-a3").Node("node-a").Label("foo", "").Obj(), + st.MakePod().Name("p-a4").Node("node-a").Label("foo", "").Obj(), + st.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + st.MakePod().Name("p-b2").Node("node-b").Label("foo", "").Obj(), + st.MakePod().Name("p-b3").Node("node-b").Label("foo", "").Obj(), + st.MakePod().Name("p-c1").Node("node-c").Label("foo", "").Obj(), + st.MakePod().Name("p-c2").Node("node-c").Label("foo", "").Obj(), + st.MakePod().Name("p-d1").Node("node-d").Label("foo", "").Obj(), + }, + nodes: []*v1.Node{ + st.MakeNode().Name("node-a").Label(v1.LabelHostname, "node-a").Obj(), + st.MakeNode().Name("node-b").Label(v1.LabelHostname, "node-b").Obj(), + st.MakeNode().Name("node-c").Label(v1.LabelHostname, "node-c").Obj(), + st.MakeNode().Name("node-d").Label(v1.LabelHostname, "node-d").Obj(), + }, + failedNodes: []*v1.Node{}, + objs: []runtime.Object{ + &v1.Service{Spec: v1.ServiceSpec{Selector: map[string]string{"foo": ""}}}, + }, + want: []framework.NodeScore{ + // Same scores as if we were using one spreading constraint. + {Name: "node-a", Score: 33}, + {Name: "node-b", Score: 55}, + {Name: "node-c", Score: 77}, + {Name: "node-d", Score: 100}, + }, + }, { // matching pods spread as 4/2/1/~3~ (node4 is not a candidate) name: "one constraint on node, 3 out of 4 nodes are candidates", @@ -686,10 +726,12 @@ func TestPodTopologySpreadScore(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + t.Cleanup(cancel) allNodes := append([]*v1.Node{}, tt.nodes...) allNodes = append(allNodes, tt.failedNodes...) state := framework.NewCycleState() - pl := plugintesting.SetupPlugin(t, New, &config.PodTopologySpreadArgs{DefaultingType: config.ListDefaulting}, cache.NewSnapshot(tt.existingPods, allNodes)) + pl := plugintesting.SetupPluginWithInformers(ctx, t, New, &config.PodTopologySpreadArgs{DefaultingType: config.SystemDefaulting}, cache.NewSnapshot(tt.existingPods, allNodes), tt.objs) p := pl.(*PodTopologySpread) status := p.PreScore(context.Background(), state, tt.pod, tt.nodes) @@ -700,14 +742,14 @@ func TestPodTopologySpreadScore(t *testing.T) { var gotList framework.NodeScoreList for _, n := range tt.nodes { nodeName := n.Name - score, status := p.Score(context.Background(), state, tt.pod, nodeName) + score, status := p.Score(ctx, state, tt.pod, nodeName) if !status.IsSuccess() { t.Errorf("unexpected error: %v", status) } gotList = append(gotList, framework.NodeScore{Name: nodeName, Score: score}) } - status = p.NormalizeScore(context.Background(), state, tt.pod, gotList) + status = p.NormalizeScore(ctx, state, tt.pod, gotList) if !status.IsSuccess() { t.Errorf("unexpected error: %v", status) }