From 762a7113a75a60cffefcbb52a88945be452c6b14 Mon Sep 17 00:00:00 2001
From: Wei Huang <wei.huang1@ibm.com>
Date: Thu, 25 Jul 2019 15:18:20 -0700
Subject: [PATCH] EvenPodsSpread: optimize Priority logic

---
 .../algorithm/priorities/even_pods_spread.go  | 127 +++++++++---------
 .../priorities/even_pods_spread_test.go       |  95 ++++++++++---
 2 files changed, 141 insertions(+), 81 deletions(-)
diff --git a/pkg/scheduler/algorithm/priorities/even_pods_spread.go b/pkg/scheduler/algorithm/priorities/even_pods_spread.go
index b02309ee3bb..121cc032df3 100644
--- a/pkg/scheduler/algorithm/priorities/even_pods_spread.go
+++ b/pkg/scheduler/algorithm/priorities/even_pods_spread.go
@@ -18,10 +18,10 @@ package priorities
 
 import (
 	"context"
+	"math"
 	"sync/atomic"
 
 	"k8s.io/api/core/v1"
-	"k8s.io/apimachinery/pkg/labels"
 	"k8s.io/client-go/util/workqueue"
 	"k8s.io/kubernetes/pkg/scheduler/algorithm/predicates"
 	schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
@@ -37,45 +37,39 @@ type topologyPair struct {
 }
 
 type topologySpreadConstraintsMap struct {
-	// podCounts is keyed with node name, and valued with the number of matching pods.
-	podCounts map[string]*int64
-	// total number of matching pods on each qualified <topologyKey:value> pair
-	total int64
-	// topologyPairToNodeNames store the mapping from potential <topologyKey:value>
-	// pair to node names
-	topologyPairToNodeNames map[topologyPair][]string
+	// nodeNameToPodCounts is keyed with node name, and valued with the number of matching pods.
+	nodeNameToPodCounts map[string]int64
+	// topologyPairToPodCounts is keyed with topologyPair, and valued with the number of matching pods.
+	topologyPairToPodCounts map[topologyPair]*int64
 }
 
-func newTopologySpreadConstraintsMap(len int) *topologySpreadConstraintsMap {
+func newTopologySpreadConstraintsMap() *topologySpreadConstraintsMap {
 	return &topologySpreadConstraintsMap{
-		podCounts:               make(map[string]*int64, len),
-		topologyPairToNodeNames: make(map[topologyPair][]string),
+		nodeNameToPodCounts:     make(map[string]int64),
+		topologyPairToPodCounts: make(map[topologyPair]*int64),
 	}
 }
 
+// Note: the <nodes> passed in are the "filtered" nodes which have passed Predicates.
+// This function iterates <nodes> to filter out the nodes which don't have required topologyKey(s),
+// and initialize two maps:
+// 1) t.topologyPairToPodCounts: keyed with both eligible topology pair and node names.
+// 2) t.nodeNameToPodCounts: keyed with node name, and valued with a *int64 pointer for eligible node only.
 func (t *topologySpreadConstraintsMap) initialize(pod *v1.Pod, nodes []*v1.Node) {
 	constraints := getSoftTopologySpreadConstraints(pod)
 	for _, node := range nodes {
-		match := true
-		var pairs []topologyPair
+		if !predicates.NodeLabelsMatchSpreadConstraints(node.Labels, constraints) {
+			continue
+		}
 		for _, constraint := range constraints {
-			tpKey := constraint.TopologyKey
-			if _, ok := node.Labels[tpKey]; !ok {
-				// Current node isn't qualified for the soft constraints,
-				// so break here and the node will hold default value (nil).
-				match = false
-				break
+			pair := topologyPair{key: constraint.TopologyKey, value: node.Labels[constraint.TopologyKey]}
+			if t.topologyPairToPodCounts[pair] == nil {
+				t.topologyPairToPodCounts[pair] = new(int64)
 			}
-			pairs = append(pairs, topologyPair{key: tpKey, value: node.Labels[tpKey]})
 		}
-		if match {
-			for _, pair := range pairs {
-				t.topologyPairToNodeNames[pair] = append(t.topologyPairToNodeNames[pair], node.Name)
-			}
-			t.podCounts[node.Name] = new(int64)
-		}
-		// For those nodes which don't have all required topologyKeys present, it's intentional to
-		// leave podCounts[nodeName] as nil, so that we're able to score these nodes to 0 afterwards.
+		t.nodeNameToPodCounts[node.Name] = 0
+		// For those nodes which don't have all required topologyKeys present, it's intentional to keep
+		// those entries absent in nodeNameToPodCounts, so that we're able to score them to 0 afterwards.
 	}
 }
 
@@ -83,11 +77,11 @@ func (t *topologySpreadConstraintsMap) initialize(pod *v1.Pod, nodes []*v1.Node)
 // that are with WhenUnsatisfiable=ScheduleAnyway (a.k.a soft constraint).
 // The function works as below:
 // 1) In all nodes, calculate the number of pods which match <pod>'s soft topology spread constraints.
-// 2) Sum up the number to each node in <nodes> which has corresponding topologyPair present.
+// 2) Group the number calculated in 1) by topologyPair, and sum up to corresponding candidate nodes.
 // 3) Finally normalize the number to 0~10. The node with the highest score is the most preferred.
 // Note: Symmetry is not applicable. We only weigh how incomingPod matches existingPod.
 // Whether existingPod matches incomingPod doesn't contribute to the final score.
-// This is different with the Affinity API.
+// This is different from the Affinity API.
 func CalculateEvenPodsSpreadPriority(pod *v1.Pod, nodeNameToInfo map[string]*schedulernodeinfo.NodeInfo, nodes []*v1.Node) (schedulerapi.HostPriorityList, error) {
 	result := make(schedulerapi.HostPriorityList, len(nodes))
 	// return if incoming pod doesn't have soft topology spread constraints.
@@ -96,7 +90,7 @@ func CalculateEvenPodsSpreadPriority(pod *v1.Pod, nodeNameToInfo map[string]*sch
 		return result, nil
 	}
 
-	t := newTopologySpreadConstraintsMap(len(nodes))
+	t := newTopologySpreadConstraintsMap()
 	t.initialize(pod, nodes)
 
 	allNodeNames := make([]string, 0, len(nodeNameToInfo))
@@ -106,7 +100,7 @@ func CalculateEvenPodsSpreadPriority(pod *v1.Pod, nodeNameToInfo map[string]*sch
 
 	errCh := schedutil.NewErrorChannel()
 	ctx, cancel := context.WithCancel(context.Background())
-	processNode := func(i int) {
+	processAllNode := func(i int) {
 		nodeInfo := nodeNameToInfo[allNodeNames[i]]
 		node := nodeInfo.Node()
 		if node == nil {
@@ -118,58 +112,63 @@ func CalculateEvenPodsSpreadPriority(pod *v1.Pod, nodeNameToInfo map[string]*sch
 			!predicates.NodeLabelsMatchSpreadConstraints(node.Labels, constraints) {
 			return
 		}
-		// It's enough to use topologyKey as the "key" of the map.
-		matchCount := make(map[string]int64)
-		for _, existingPod := range nodeInfo.Pods() {
-			podLabelSet := labels.Set(existingPod.Labels)
-			// Matching on constraints is calculated independently.
-			for _, constraint := range constraints {
-				match, err := predicates.PodMatchesSpreadConstraint(podLabelSet, constraint)
+
+		for _, constraint := range constraints {
+			pair := topologyPair{key: constraint.TopologyKey, value: node.Labels[constraint.TopologyKey]}
+			// If current topology pair is not associated with any candidate node,
+			// continue to avoid unnecessary calculation.
+			if t.topologyPairToPodCounts[pair] == nil {
+				continue
+			}
+
+			// <matchSum> indicates how many pods (on current node) match the <constraint>.
+			matchSum := int64(0)
+			for _, existingPod := range nodeInfo.Pods() {
+				match, err := predicates.PodMatchesSpreadConstraint(existingPod.Labels, constraint)
 				if err != nil {
 					errCh.SendErrorWithCancel(err, cancel)
 					return
 				}
 				if match {
-					matchCount[constraint.TopologyKey]++
+					matchSum++
 				}
 			}
-		}
-		// Keys in t.podCounts have been ensured to contain "filtered" nodes only.
-		for _, constraint := range constraints {
-			tpKey := constraint.TopologyKey
-			pair := topologyPair{key: tpKey, value: node.Labels[tpKey]}
-			// For each <pair>, all matched nodes get the credit of summed matchCount.
-			// And we add matchCount to <t.total> to reverse the final score later.
-			for _, nodeName := range t.topologyPairToNodeNames[pair] {
-				atomic.AddInt64(t.podCounts[nodeName], matchCount[tpKey])
-				atomic.AddInt64(&t.total, matchCount[tpKey])
-			}
+			atomic.AddInt64(t.topologyPairToPodCounts[pair], matchSum)
 		}
 	}
-	workqueue.ParallelizeUntil(ctx, 16, len(allNodeNames), processNode)
+	workqueue.ParallelizeUntil(ctx, 16, len(allNodeNames), processAllNode)
 	if err := errCh.ReceiveError(); err != nil {
 		return nil, err
 	}
 
-	var maxCount, minCount int64
+	var minCount int64 = math.MaxInt64
+	// <total> sums up the number of matching pods on each qualified topology pair
+	var total int64
 	for _, node := range nodes {
-		if t.podCounts[node.Name] == nil {
+		if _, ok := t.nodeNameToPodCounts[node.Name]; !ok {
 			continue
 		}
-		// reverse
-		count := t.total - *t.podCounts[node.Name]
-		if count > maxCount {
-			maxCount = count
-		} else if count < minCount {
-			minCount = count
+
+		// For each present <pair>, current node gets a credit of <matchSum>.
+		// And we add <matchSum> to <t.total> to reverse the final score later.
+		for _, constraint := range constraints {
+			if tpVal, ok := node.Labels[constraint.TopologyKey]; ok {
+				pair := topologyPair{key: constraint.TopologyKey, value: tpVal}
+				matchSum := *t.topologyPairToPodCounts[pair]
+				t.nodeNameToPodCounts[node.Name] += matchSum
+				total += matchSum
+			}
+		}
+		if t.nodeNameToPodCounts[node.Name] < minCount {
+			minCount = t.nodeNameToPodCounts[node.Name]
 		}
-		t.podCounts[node.Name] = &count
 	}
+
 	// calculate final priority score for each node
 	// TODO(Huang-Wei): in alpha version, we keep the formula as simple as possible.
 	// current version ranks the nodes properly, but it doesn't take MaxSkew into
 	// consideration, we may come up with a better formula in the future.
-	maxMinDiff := maxCount - minCount
+	maxMinDiff := total - minCount
 	for i := range nodes {
 		node := nodes[i]
 		result[i].Host = node.Name
@@ -182,7 +181,7 @@ func CalculateEvenPodsSpreadPriority(pod *v1.Pod, nodeNameToInfo map[string]*sch
 			}(&result[i].Score, node.Name)
 		}
 
-		if t.podCounts[node.Name] == nil {
+		if _, ok := t.nodeNameToPodCounts[node.Name]; !ok {
 			result[i].Score = 0
 			continue
 		}
@@ -190,7 +189,7 @@ func CalculateEvenPodsSpreadPriority(pod *v1.Pod, nodeNameToInfo map[string]*sch
 			result[i].Score = schedulerapi.MaxPriority
 			continue
 		}
-		fScore := float64(schedulerapi.MaxPriority) * (float64(*t.podCounts[node.Name]-minCount) / float64(maxMinDiff))
+		fScore := float64(schedulerapi.MaxPriority) * (float64(total-t.nodeNameToPodCounts[node.Name]) / float64(maxMinDiff))
 		result[i].Score = int(fScore)
 	}
 
diff --git a/pkg/scheduler/algorithm/priorities/even_pods_spread_test.go b/pkg/scheduler/algorithm/priorities/even_pods_spread_test.go
index 3e859032597..b7da0dbe69d 100644
--- a/pkg/scheduler/algorithm/priorities/even_pods_spread_test.go
+++ b/pkg/scheduler/algorithm/priorities/even_pods_spread_test.go
@@ -29,10 +29,11 @@ import (
 
 func Test_topologySpreadConstraintsMap_initialize(t *testing.T) {
 	tests := []struct {
-		name  string
-		pod   *v1.Pod
-		nodes []*v1.Node
-		want  map[topologyPair][]string
+		name                string
+		pod                 *v1.Pod
+		nodes               []*v1.Node
+		wantNodeNameMap     map[string]int64
+		wantTopologyPairMap map[topologyPair]*int64
 	}{
 		{
 			name: "normal case",
@@ -45,12 +46,17 @@ func Test_topologySpreadConstraintsMap_initialize(t *testing.T) {
 				st.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(),
 				st.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(),
 			},
-			want: map[topologyPair][]string{
-				{key: "zone", value: "zone1"}:  {"node-a", "node-b"},
-				{key: "zone", value: "zone2"}:  {"node-x"},
-				{key: "node", value: "node-a"}: {"node-a"},
-				{key: "node", value: "node-b"}: {"node-b"},
-				{key: "node", value: "node-x"}: {"node-x"},
+			wantNodeNameMap: map[string]int64{
+				"node-a": 0,
+				"node-b": 0,
+				"node-x": 0,
+			},
+			wantTopologyPairMap: map[topologyPair]*int64{
+				{key: "zone", value: "zone1"}:  new(int64),
+				{key: "zone", value: "zone2"}:  new(int64),
+				{key: "node", value: "node-a"}: new(int64),
+				{key: "node", value: "node-b"}: new(int64),
+				{key: "node", value: "node-x"}: new(int64),
 			},
 		},
 		{
@@ -64,19 +70,26 @@ func Test_topologySpreadConstraintsMap_initialize(t *testing.T) {
 				st.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(),
 				st.MakeNode().Name("node-x").Label("node", "node-x").Obj(),
 			},
-			want: map[topologyPair][]string{
-				{key: "zone", value: "zone1"}:  {"node-a", "node-b"},
-				{key: "node", value: "node-a"}: {"node-a"},
-				{key: "node", value: "node-b"}: {"node-b"},
+			wantNodeNameMap: map[string]int64{
+				"node-a": 0,
+				"node-b": 0,
+			},
+			wantTopologyPairMap: map[topologyPair]*int64{
+				{key: "zone", value: "zone1"}:  new(int64),
+				{key: "node", value: "node-a"}: new(int64),
+				{key: "node", value: "node-b"}: new(int64),
 			},
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			tMap := newTopologySpreadConstraintsMap(len(tt.nodes))
+			tMap := newTopologySpreadConstraintsMap()
 			tMap.initialize(tt.pod, tt.nodes)
-			if !reflect.DeepEqual(tMap.topologyPairToNodeNames, tt.want) {
-				t.Errorf("initilize().topologyPairToNodeNames = %#v, want %#v", tMap.topologyPairToNodeNames, tt.want)
+			if !reflect.DeepEqual(tMap.nodeNameToPodCounts, tt.wantNodeNameMap) {
+				t.Errorf("initilize().nodeNameToPodCounts = %#v, want %#v", tMap.nodeNameToPodCounts, tt.wantNodeNameMap)
+			}
+			if !reflect.DeepEqual(tMap.topologyPairToPodCounts, tt.wantTopologyPairMap) {
+				t.Errorf("initilize().topologyPairToPodCounts = %#v, want %#v", tMap.topologyPairToPodCounts, tt.wantTopologyPairMap)
 			}
 		})
 	}
@@ -134,6 +147,24 @@ func TestCalculateEvenPodsSpreadPriority(t *testing.T) {
 				{Host: "node-a", Score: 10},
 			},
 		},
+		{
+			name: "one constraint on node, all nodes have the same number of matching pods",
+			pod: st.MakePod().Name("p").Label("foo", "").
+				SpreadConstraint(1, "node", softSpread, st.MakeLabelSelector().Exists("foo").Obj()).
+				Obj(),
+			existingPods: []*v1.Pod{
+				st.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(),
+				st.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(),
+			},
+			nodes: []*v1.Node{
+				st.MakeNode().Name("node-a").Label("node", "node-a").Obj(),
+				st.MakeNode().Name("node-b").Label("node", "node-b").Obj(),
+			},
+			want: []schedulerapi.HostPriority{
+				{Host: "node-a", Score: 10},
+				{Host: "node-b", Score: 10},
+			},
+		},
 		{
 			// matching pods spread as 2/1/0/3, total = 6
 			// after reversing, it's 4/5/6/3
@@ -338,6 +369,36 @@ func TestCalculateEvenPodsSpreadPriority(t *testing.T) {
 				{Host: "node-y", Score: 8},
 			},
 		},
+		{
+			// For the first constraint (zone): the matching pods spread as 0/0/2/2
+			// For the second constraint (node): the matching pods spread as 0/1/0/1
+			// sum them up gets: 0/1/2/3, and total number is 6.
+			// after reversing, it's 6/5/4/3.
+			// so scores = 60/6, 50/6, 40/6, 30/6
+			name: "two constraints on zone and node, with different labelSelectors, some nodes have 0 pods",
+			pod: st.MakePod().Name("p").Label("foo", "").Label("bar", "").
+				SpreadConstraint(1, "zone", softSpread, st.MakeLabelSelector().Exists("foo").Obj()).
+				SpreadConstraint(1, "node", softSpread, st.MakeLabelSelector().Exists("bar").Obj()).
+				Obj(),
+			existingPods: []*v1.Pod{
+				st.MakePod().Name("p-b1").Node("node-b").Label("bar", "").Obj(),
+				st.MakePod().Name("p-x1").Node("node-x").Label("foo", "").Obj(),
+				st.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Label("bar", "").Obj(),
+			},
+			nodes: []*v1.Node{
+				st.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(),
+				st.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(),
+				st.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(),
+				st.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(),
+			},
+			failedNodes: []*v1.Node{},
+			want: []schedulerapi.HostPriority{
+				{Host: "node-a", Score: 10},
+				{Host: "node-b", Score: 8},
+				{Host: "node-x", Score: 6},
+				{Host: "node-y", Score: 5},
+			},
+		},
 		{
 			// For the first constraint (zone): the matching pods spread as 2/2/1/~1~
 			// For the second constraint (node): the matching pods spread as 0/1/0/~1~