From 4aa92bac73f33dd88aa4135fb5c4699cd452569f Mon Sep 17 00:00:00 2001
From: Gavin <li.guangxu@zte.com.cn>
Date: Thu, 2 Nov 2017 15:08:38 +0800
Subject: [PATCH 1/3] Refactoring of priority function(CaculateSpreadPriority)
 by using map/reduce pattern

---
 .../priorities/selector_spreading.go          | 192 ++++++++----------
 1 file changed, 88 insertions(+), 104 deletions(-)

diff --git a/plugin/pkg/scheduler/algorithm/priorities/selector_spreading.go b/plugin/pkg/scheduler/algorithm/priorities/selector_spreading.go
index 721531f7e32..940813f2cb9 100644
--- a/plugin/pkg/scheduler/algorithm/priorities/selector_spreading.go
+++ b/plugin/pkg/scheduler/algorithm/priorities/selector_spreading.go
@@ -17,12 +17,10 @@ limitations under the License.
 package priorities
 
 import (
-	"sync"
+	"fmt"
 
 	"k8s.io/api/core/v1"
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/labels"
-	"k8s.io/client-go/util/workqueue"
 	utilnode "k8s.io/kubernetes/pkg/util/node"
 	"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
 	schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
@@ -46,149 +44,135 @@ func NewSelectorSpreadPriority(
 	serviceLister algorithm.ServiceLister,
 	controllerLister algorithm.ControllerLister,
 	replicaSetLister algorithm.ReplicaSetLister,
-	statefulSetLister algorithm.StatefulSetLister) algorithm.PriorityFunction {
+	statefulSetLister algorithm.StatefulSetLister) (algorithm.PriorityMapFunction, algorithm.PriorityReduceFunction) {
 	selectorSpread := &SelectorSpread{
 		serviceLister:     serviceLister,
 		controllerLister:  controllerLister,
 		replicaSetLister:  replicaSetLister,
 		statefulSetLister: statefulSetLister,
 	}
-	return selectorSpread.CalculateSpreadPriority
+	return selectorSpread.CalculateSpreadPriorityMap, selectorSpread.CalculateSpreadPriorityReduce
 }
 
-// Returns selectors of services, RCs and RSs matching the given pod.
-func getSelectors(pod *v1.Pod, sl algorithm.ServiceLister, cl algorithm.ControllerLister, rsl algorithm.ReplicaSetLister, ssl algorithm.StatefulSetLister) []labels.Selector {
-	var selectors []labels.Selector
-	if services, err := sl.GetPodServices(pod); err == nil {
-		for _, service := range services {
-			selectors = append(selectors, labels.SelectorFromSet(service.Spec.Selector))
-		}
-	}
-	if rcs, err := cl.GetPodControllers(pod); err == nil {
-		for _, rc := range rcs {
-			selectors = append(selectors, labels.SelectorFromSet(rc.Spec.Selector))
-		}
-	}
-	if rss, err := rsl.GetPodReplicaSets(pod); err == nil {
-		for _, rs := range rss {
-			if selector, err := metav1.LabelSelectorAsSelector(rs.Spec.Selector); err == nil {
-				selectors = append(selectors, selector)
-			}
-		}
-	}
-	if sss, err := ssl.GetPodStatefulSets(pod); err == nil {
-		for _, ss := range sss {
-			if selector, err := metav1.LabelSelectorAsSelector(ss.Spec.Selector); err == nil {
-				selectors = append(selectors, selector)
-			}
-		}
-	}
-	return selectors
-}
-
-func (s *SelectorSpread) getSelectors(pod *v1.Pod) []labels.Selector {
-	return getSelectors(pod, s.serviceLister, s.controllerLister, s.replicaSetLister, s.statefulSetLister)
-}
-
-// CalculateSpreadPriority spreads pods across hosts and zones, considering pods belonging to the same service or replication controller.
+// CalculateSpreadPriorityMap spreads pods across hosts, considering pods belonging to the same service or replication controller.
 // When a pod is scheduled, it looks for services, RCs or RSs that match the pod, then finds existing pods that match those selectors.
 // It favors nodes that have fewer existing matching pods.
 // i.e. it pushes the scheduler towards a node where there's the smallest number of
 // pods which match the same service, RC or RS selectors as the pod being scheduled.
-// Where zone information is included on the nodes, it favors nodes in zones with fewer existing matching pods.
-func (s *SelectorSpread) CalculateSpreadPriority(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*v1.Node) (schedulerapi.HostPriorityList, error) {
-	selectors := s.getSelectors(pod)
+func (s *SelectorSpread) CalculateSpreadPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
+	var selectors []labels.Selector
+	node := nodeInfo.Node()
+	if node == nil {
+		return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
+	}
 
-	// Count similar pods by node
-	countsByNodeName := make(map[string]float64, len(nodes))
-	countsByZone := make(map[string]float64, 10)
-	maxCountByNodeName := float64(0)
-	countsByNodeNameLock := sync.Mutex{}
+	priorityMeta, ok := meta.(*priorityMetadata)
+	if ok {
+		selectors = priorityMeta.podSelectors
+	} else {
+		selectors = getSelectors(pod, s.serviceLister, s.controllerLister, s.replicaSetLister, s.statefulSetLister)
+	}
+
+	if len(selectors) == 0 {
+		return schedulerapi.HostPriority{
+			Host:  node.Name,
+			Score: int(0),
+		}, nil
+	}
+
+	count := float64(0)
+	for _, nodePod := range nodeInfo.Pods() {
+		if pod.Namespace != nodePod.Namespace {
+			continue
+		}
+		// When we are replacing a failed pod, we often see the previous
+		// deleted version while scheduling the replacement.
+		// Ignore the previous deleted version for spreading purposes
+		// (it can still be considered for resource restrictions etc.)
+		if nodePod.DeletionTimestamp != nil {
+			glog.V(4).Infof("skipping pending-deleted pod: %s/%s", nodePod.Namespace, nodePod.Name)
+			continue
+		}
+		matches := false
+		for _, selector := range selectors {
+			if selector.Matches(labels.Set(nodePod.ObjectMeta.Labels)) {
+				matches = true
+				break
+			}
+		}
+		if matches {
+			count++
+		}
+	}
+	return schedulerapi.HostPriority{
+		Host:  node.Name,
+		Score: int(count),
+	}, nil
+}
+
+// CalculateSpreadPriorityReduce calculates the source of each node based on the number of existing matching pods on the node
+// where zone information is included on the nodes, it favors nodes in zones with fewer existing matching pods.
+func (s *SelectorSpread) CalculateSpreadPriorityReduce(pod *v1.Pod, meta interface{}, nodeNameToInfo map[string]*schedulercache.NodeInfo, result schedulerapi.HostPriorityList) error {
+	var selectors []labels.Selector
+	countsByZone := make(map[string]int, 10)
+	maxCountByZone := int(0)
+	maxCountByNodeName := int(0)
+
+	priorityMeta, ok := meta.(*priorityMetadata)
+	if ok {
+		selectors = priorityMeta.podSelectors
+	} else {
+		selectors = getSelectors(pod, s.serviceLister, s.controllerLister, s.replicaSetLister, s.statefulSetLister)
+	}
 
 	if len(selectors) > 0 {
-		processNodeFunc := func(i int) {
-			nodeName := nodes[i].Name
-			count := float64(0)
-			for _, nodePod := range nodeNameToInfo[nodeName].Pods() {
-				if pod.Namespace != nodePod.Namespace {
-					continue
-				}
-				// When we are replacing a failed pod, we often see the previous
-				// deleted version while scheduling the replacement.
-				// Ignore the previous deleted version for spreading purposes
-				// (it can still be considered for resource restrictions etc.)
-				if nodePod.DeletionTimestamp != nil {
-					glog.V(4).Infof("skipping pending-deleted pod: %s/%s", nodePod.Namespace, nodePod.Name)
-					continue
-				}
-				matches := false
-				for _, selector := range selectors {
-					if selector.Matches(labels.Set(nodePod.ObjectMeta.Labels)) {
-						matches = true
-						break
-					}
-				}
-				if matches {
-					count++
-				}
+		for i := range result {
+			if result[i].Score > maxCountByNodeName {
+				maxCountByNodeName = result[i].Score
 			}
-			zoneId := utilnode.GetZoneKey(nodes[i])
-
-			countsByNodeNameLock.Lock()
-			defer countsByNodeNameLock.Unlock()
-			countsByNodeName[nodeName] = count
-			if count > maxCountByNodeName {
-				maxCountByNodeName = count
-			}
-			if zoneId != "" {
-				countsByZone[zoneId] += count
+			zoneId := utilnode.GetZoneKey(nodeNameToInfo[result[i].Host].Node())
+			if zoneId == "" {
+				continue
 			}
+			countsByZone[zoneId] += result[i].Score
+		}
+	}
+
+	for zoneId := range countsByZone {
+		if countsByZone[zoneId] > maxCountByZone {
+			maxCountByZone = countsByZone[zoneId]
 		}
-		workqueue.Parallelize(16, len(nodes), processNodeFunc)
 	}
 
-	// Aggregate by-zone information
-	// Compute the maximum number of pods hosted in any zone
 	haveZones := len(countsByZone) != 0
-	maxCountByZone := float64(0)
-	for _, count := range countsByZone {
-		if count > maxCountByZone {
-			maxCountByZone = count
-		}
-	}
 
-	result := make(schedulerapi.HostPriorityList, 0, len(nodes))
-	//score int - scale of 0-maxPriority
-	// 0 being the lowest priority and maxPriority being the highest
-	for _, node := range nodes {
+	for i := range result {
 		// initializing to the default/max node score of maxPriority
 		fScore := float64(schedulerapi.MaxPriority)
 		if maxCountByNodeName > 0 {
-			fScore = float64(schedulerapi.MaxPriority) * ((maxCountByNodeName - countsByNodeName[node.Name]) / maxCountByNodeName)
+			fScore = float64(schedulerapi.MaxPriority) * (float64(maxCountByNodeName-result[i].Score) / float64(maxCountByNodeName))
 		}
-
 		// If there is zone information present, incorporate it
 		if haveZones {
-			zoneId := utilnode.GetZoneKey(node)
+			zoneId := utilnode.GetZoneKey(nodeNameToInfo[result[i].Host].Node())
 			if zoneId != "" {
 				zoneScore := float64(schedulerapi.MaxPriority)
 				if maxCountByZone > 0 {
-					zoneScore = float64(schedulerapi.MaxPriority) * ((maxCountByZone - countsByZone[zoneId]) / maxCountByZone)
+					zoneScore = float64(schedulerapi.MaxPriority) * (float64(maxCountByZone-countsByZone[zoneId]) / float64(maxCountByZone))
 				}
 				fScore = (fScore * (1.0 - zoneWeighting)) + (zoneWeighting * zoneScore)
 			}
 		}
-
-		result = append(result, schedulerapi.HostPriority{Host: node.Name, Score: int(fScore)})
+		result[i].Score = int(fScore)
 		if glog.V(10) {
 			// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
 			// not logged. There is visible performance gain from it.
 			glog.V(10).Infof(
-				"%v -> %v: SelectorSpreadPriority, Score: (%d)", pod.Name, node.Name, int(fScore),
+				"%v -> %v: SelectorSpreadPriority, Score: (%d)", pod.Name, result[i].Host, int(fScore),
 			)
 		}
 	}
-	return result, nil
+	return nil
 }
 
 type ServiceAntiAffinity struct {

From bed435deca66d436c11aab05906cdecc8e8298e7 Mon Sep 17 00:00:00 2001
From: Gavin <li.guangxu@zte.com.cn>
Date: Thu, 2 Nov 2017 15:09:06 +0800
Subject: [PATCH 2/3] compute pod selectors in priority meta data producer

---
 .../balanced_resource_allocation_test.go      |  2 +-
 .../priorities/image_locality_test.go         |  2 +-
 .../priorities/least_requested_test.go        |  2 +-
 .../algorithm/priorities/metadata.go          | 55 ++++++++++++++++++-
 .../algorithm/priorities/metadata_test.go     | 10 +++-
 .../priorities/most_requested_test.go         |  2 +-
 .../priorities/node_affinity_test.go          |  2 +-
 .../algorithm/priorities/node_label_test.go   |  6 +-
 .../priorities/node_prefer_avoid_pods_test.go |  2 +-
 .../priorities/selector_spreading_test.go     | 34 +++++++++---
 .../priorities/taint_toleration_test.go       |  2 +-
 .../algorithm/priorities/test_util.go         |  6 +-
 .../algorithmprovider/defaults/defaults.go    |  8 +--
 .../scheduler/core/generic_scheduler_test.go  | 26 ++++++---
 14 files changed, 125 insertions(+), 34 deletions(-)

diff --git a/plugin/pkg/scheduler/algorithm/priorities/balanced_resource_allocation_test.go b/plugin/pkg/scheduler/algorithm/priorities/balanced_resource_allocation_test.go
index 777be1b1499..381ff05307a 100644
--- a/plugin/pkg/scheduler/algorithm/priorities/balanced_resource_allocation_test.go
+++ b/plugin/pkg/scheduler/algorithm/priorities/balanced_resource_allocation_test.go
@@ -253,7 +253,7 @@ func TestBalancedResourceAllocation(t *testing.T) {
 
 	for _, test := range tests {
 		nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
-		list, err := priorityFunction(BalancedResourceAllocationMap, nil)(test.pod, nodeNameToInfo, test.nodes)
+		list, err := priorityFunction(BalancedResourceAllocationMap, nil, nil)(test.pod, nodeNameToInfo, test.nodes)
 		if err != nil {
 			t.Errorf("unexpected error: %v", err)
 		}
diff --git a/plugin/pkg/scheduler/algorithm/priorities/image_locality_test.go b/plugin/pkg/scheduler/algorithm/priorities/image_locality_test.go
index c2e0feabbdc..f957e86375b 100644
--- a/plugin/pkg/scheduler/algorithm/priorities/image_locality_test.go
+++ b/plugin/pkg/scheduler/algorithm/priorities/image_locality_test.go
@@ -161,7 +161,7 @@ func TestImageLocalityPriority(t *testing.T) {
 
 	for _, test := range tests {
 		nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
-		list, err := priorityFunction(ImageLocalityPriorityMap, nil)(test.pod, nodeNameToInfo, test.nodes)
+		list, err := priorityFunction(ImageLocalityPriorityMap, nil, nil)(test.pod, nodeNameToInfo, test.nodes)
 		if err != nil {
 			t.Errorf("unexpected error: %v", err)
 		}
diff --git a/plugin/pkg/scheduler/algorithm/priorities/least_requested_test.go b/plugin/pkg/scheduler/algorithm/priorities/least_requested_test.go
index 08e083361b4..f71ef43d1bc 100644
--- a/plugin/pkg/scheduler/algorithm/priorities/least_requested_test.go
+++ b/plugin/pkg/scheduler/algorithm/priorities/least_requested_test.go
@@ -253,7 +253,7 @@ func TestLeastRequested(t *testing.T) {
 
 	for _, test := range tests {
 		nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
-		list, err := priorityFunction(LeastRequestedPriorityMap, nil)(test.pod, nodeNameToInfo, test.nodes)
+		list, err := priorityFunction(LeastRequestedPriorityMap, nil, nil)(test.pod, nodeNameToInfo, test.nodes)
 		if err != nil {
 			t.Errorf("unexpected error: %v", err)
 		}
diff --git a/plugin/pkg/scheduler/algorithm/priorities/metadata.go b/plugin/pkg/scheduler/algorithm/priorities/metadata.go
index 6f3818eb530..1e16c4aad4d 100644
--- a/plugin/pkg/scheduler/algorithm/priorities/metadata.go
+++ b/plugin/pkg/scheduler/algorithm/priorities/metadata.go
@@ -18,26 +18,79 @@ package priorities
 
 import (
 	"k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/labels"
+	"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
 	"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
 )
 
+type PriorityMetadataFactory struct {
+	serviceLister     algorithm.ServiceLister
+	controllerLister  algorithm.ControllerLister
+	replicaSetLister  algorithm.ReplicaSetLister
+	statefulSetLister algorithm.StatefulSetLister
+}
+
+func NewPriorityMetadataFactory(serviceLister algorithm.ServiceLister, controllerLister algorithm.ControllerLister, replicaSetLister algorithm.ReplicaSetLister, statefulSetLister algorithm.StatefulSetLister) algorithm.MetadataProducer {
+	factory := &PriorityMetadataFactory{
+		serviceLister:     serviceLister,
+		controllerLister:  controllerLister,
+		replicaSetLister:  replicaSetLister,
+		statefulSetLister: statefulSetLister,
+	}
+	return factory.PriorityMetadata
+}
+
 // priorityMetadata is a type that is passed as metadata for priority functions
 type priorityMetadata struct {
 	nonZeroRequest *schedulercache.Resource
 	podTolerations []v1.Toleration
 	affinity       *v1.Affinity
+	podSelectors   []labels.Selector
 }
 
 // PriorityMetadata is a MetadataProducer.  Node info can be nil.
-func PriorityMetadata(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo) interface{} {
+func (pmf *PriorityMetadataFactory) PriorityMetadata(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo) interface{} {
 	// If we cannot compute metadata, just return nil
 	if pod == nil {
 		return nil
 	}
 	tolerationsPreferNoSchedule := getAllTolerationPreferNoSchedule(pod.Spec.Tolerations)
+	podSelectors := getSelectors(pod, pmf.serviceLister, pmf.controllerLister, pmf.replicaSetLister, pmf.statefulSetLister)
 	return &priorityMetadata{
 		nonZeroRequest: getNonZeroRequests(pod),
 		podTolerations: tolerationsPreferNoSchedule,
 		affinity:       pod.Spec.Affinity,
+		podSelectors:   podSelectors,
 	}
 }
+
+// getSelectors returns selectors of services, RCs and RSs matching the given pod.
+func getSelectors(pod *v1.Pod, sl algorithm.ServiceLister, cl algorithm.ControllerLister, rsl algorithm.ReplicaSetLister, ssl algorithm.StatefulSetLister) []labels.Selector {
+	var selectors []labels.Selector
+	if services, err := sl.GetPodServices(pod); err == nil {
+		for _, service := range services {
+			selectors = append(selectors, labels.SelectorFromSet(service.Spec.Selector))
+		}
+	}
+	if rcs, err := cl.GetPodControllers(pod); err == nil {
+		for _, rc := range rcs {
+			selectors = append(selectors, labels.SelectorFromSet(rc.Spec.Selector))
+		}
+	}
+	if rss, err := rsl.GetPodReplicaSets(pod); err == nil {
+		for _, rs := range rss {
+			if selector, err := metav1.LabelSelectorAsSelector(rs.Spec.Selector); err == nil {
+				selectors = append(selectors, selector)
+			}
+		}
+	}
+	if sss, err := ssl.GetPodStatefulSets(pod); err == nil {
+		for _, ss := range sss {
+			if selector, err := metav1.LabelSelectorAsSelector(ss.Spec.Selector); err == nil {
+				selectors = append(selectors, selector)
+			}
+		}
+	}
+	return selectors
+}
diff --git a/plugin/pkg/scheduler/algorithm/priorities/metadata_test.go b/plugin/pkg/scheduler/algorithm/priorities/metadata_test.go
index b8fd653ba24..0258d6e0100 100644
--- a/plugin/pkg/scheduler/algorithm/priorities/metadata_test.go
+++ b/plugin/pkg/scheduler/algorithm/priorities/metadata_test.go
@@ -20,11 +20,14 @@ import (
 	"reflect"
 	"testing"
 
+	apps "k8s.io/api/apps/v1beta1"
 	"k8s.io/api/core/v1"
+	extensions "k8s.io/api/extensions/v1beta1"
 	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	priorityutil "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities/util"
 	"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
+	schedulertesting "k8s.io/kubernetes/plugin/pkg/scheduler/testing"
 )
 
 func TestPriorityMetadata(t *testing.T) {
@@ -123,8 +126,13 @@ func TestPriorityMetadata(t *testing.T) {
 			test: "Produce a priorityMetadata with specified requests",
 		},
 	}
+	mataDataProducer := NewPriorityMetadataFactory(
+		schedulertesting.FakeServiceLister([]*v1.Service{}),
+		schedulertesting.FakeControllerLister([]*v1.ReplicationController{}),
+		schedulertesting.FakeReplicaSetLister([]*extensions.ReplicaSet{}),
+		schedulertesting.FakeStatefulSetLister([]*apps.StatefulSet{}))
 	for _, test := range tests {
-		ptData := PriorityMetadata(test.pod, nil)
+		ptData := mataDataProducer(test.pod, nil)
 		if !reflect.DeepEqual(test.expected, ptData) {
 			t.Errorf("%s: expected %#v, got %#v", test.test, test.expected, ptData)
 		}
diff --git a/plugin/pkg/scheduler/algorithm/priorities/most_requested_test.go b/plugin/pkg/scheduler/algorithm/priorities/most_requested_test.go
index a77692b4af9..0cffea5a33b 100644
--- a/plugin/pkg/scheduler/algorithm/priorities/most_requested_test.go
+++ b/plugin/pkg/scheduler/algorithm/priorities/most_requested_test.go
@@ -210,7 +210,7 @@ func TestMostRequested(t *testing.T) {
 
 	for _, test := range tests {
 		nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
-		list, err := priorityFunction(MostRequestedPriorityMap, nil)(test.pod, nodeNameToInfo, test.nodes)
+		list, err := priorityFunction(MostRequestedPriorityMap, nil, nil)(test.pod, nodeNameToInfo, test.nodes)
 		if err != nil {
 			t.Errorf("unexpected error: %v", err)
 		}
diff --git a/plugin/pkg/scheduler/algorithm/priorities/node_affinity_test.go b/plugin/pkg/scheduler/algorithm/priorities/node_affinity_test.go
index 9d425661a92..f5474134364 100644
--- a/plugin/pkg/scheduler/algorithm/priorities/node_affinity_test.go
+++ b/plugin/pkg/scheduler/algorithm/priorities/node_affinity_test.go
@@ -167,7 +167,7 @@ func TestNodeAffinityPriority(t *testing.T) {
 
 	for _, test := range tests {
 		nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
-		nap := priorityFunction(CalculateNodeAffinityPriorityMap, CalculateNodeAffinityPriorityReduce)
+		nap := priorityFunction(CalculateNodeAffinityPriorityMap, CalculateNodeAffinityPriorityReduce, nil)
 		list, err := nap(test.pod, nodeNameToInfo, test.nodes)
 		if err != nil {
 			t.Errorf("unexpected error: %v", err)
diff --git a/plugin/pkg/scheduler/algorithm/priorities/node_label_test.go b/plugin/pkg/scheduler/algorithm/priorities/node_label_test.go
index fbced34e336..7acc6ea7076 100644
--- a/plugin/pkg/scheduler/algorithm/priorities/node_label_test.go
+++ b/plugin/pkg/scheduler/algorithm/priorities/node_label_test.go
@@ -108,7 +108,11 @@ func TestNewNodeLabelPriority(t *testing.T) {
 
 	for _, test := range tests {
 		nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
-		list, err := priorityFunction(NewNodeLabelPriority(test.label, test.presence))(nil, nodeNameToInfo, test.nodes)
+		labelPrioritizer := &NodeLabelPrioritizer{
+			label:    test.label,
+			presence: test.presence,
+		}
+		list, err := priorityFunction(labelPrioritizer.CalculateNodeLabelPriorityMap, nil, nil)(nil, nodeNameToInfo, test.nodes)
 		if err != nil {
 			t.Errorf("unexpected error: %v", err)
 		}
diff --git a/plugin/pkg/scheduler/algorithm/priorities/node_prefer_avoid_pods_test.go b/plugin/pkg/scheduler/algorithm/priorities/node_prefer_avoid_pods_test.go
index a18ddcc03d6..0766b9e5488 100644
--- a/plugin/pkg/scheduler/algorithm/priorities/node_prefer_avoid_pods_test.go
+++ b/plugin/pkg/scheduler/algorithm/priorities/node_prefer_avoid_pods_test.go
@@ -142,7 +142,7 @@ func TestNodePreferAvoidPriority(t *testing.T) {
 
 	for _, test := range tests {
 		nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
-		list, err := priorityFunction(CalculateNodePreferAvoidPodsPriorityMap, nil)(test.pod, nodeNameToInfo, test.nodes)
+		list, err := priorityFunction(CalculateNodePreferAvoidPodsPriorityMap, nil, nil)(test.pod, nodeNameToInfo, test.nodes)
 		if err != nil {
 			t.Errorf("unexpected error: %v", err)
 		}
diff --git a/plugin/pkg/scheduler/algorithm/priorities/selector_spreading_test.go b/plugin/pkg/scheduler/algorithm/priorities/selector_spreading_test.go
index bec4afe7808..0e7ed19ff9f 100644
--- a/plugin/pkg/scheduler/algorithm/priorities/selector_spreading_test.go
+++ b/plugin/pkg/scheduler/algorithm/priorities/selector_spreading_test.go
@@ -338,17 +338,26 @@ func TestSelectorSpreadPriority(t *testing.T) {
 		},
 	}
 
-	for _, test := range tests {
-		nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, nil)
+	for i, test := range tests {
+		nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, makeNodeList(test.nodes))
 		selectorSpread := SelectorSpread{
 			serviceLister:     schedulertesting.FakeServiceLister(test.services),
 			controllerLister:  schedulertesting.FakeControllerLister(test.rcs),
 			replicaSetLister:  schedulertesting.FakeReplicaSetLister(test.rss),
 			statefulSetLister: schedulertesting.FakeStatefulSetLister(test.sss),
 		}
-		list, err := selectorSpread.CalculateSpreadPriority(test.pod, nodeNameToInfo, makeNodeList(test.nodes))
+
+		mataDataProducer := NewPriorityMetadataFactory(
+			schedulertesting.FakeServiceLister(test.services),
+			schedulertesting.FakeControllerLister(test.rcs),
+			schedulertesting.FakeReplicaSetLister(test.rss),
+			schedulertesting.FakeStatefulSetLister(test.sss))
+		mataData := mataDataProducer(test.pod, nodeNameToInfo)
+
+		ttp := priorityFunction(selectorSpread.CalculateSpreadPriorityMap, selectorSpread.CalculateSpreadPriorityReduce, mataData)
+		list, err := ttp(test.pod, nodeNameToInfo, makeNodeList(test.nodes))
 		if err != nil {
-			t.Errorf("unexpected error: %v", err)
+			t.Errorf("unexpected error: %v index : %d\n", err, i)
 		}
 		if !reflect.DeepEqual(test.expectedList, list) {
 			t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
@@ -544,6 +553,7 @@ func TestZoneSelectorSpreadPriority(t *testing.T) {
 				buildPod(nodeMachine1Zone2, labels1, controllerRef("ReplicationController", "name", "abc123")),
 				buildPod(nodeMachine1Zone3, labels1, controllerRef("ReplicationController", "name", "abc123")),
 			},
+			//nodes:        []string{nodeMachine1Zone3, nodeMachine1Zone2, nodeMachine1Zone3},
 			rcs: []*v1.ReplicationController{{Spec: v1.ReplicationControllerSpec{Selector: labels1}}},
 			expectedList: []schedulerapi.HostPriority{
 				// Note that because we put two pods on the same node (nodeMachine1Zone3),
@@ -564,17 +574,25 @@ func TestZoneSelectorSpreadPriority(t *testing.T) {
 		},
 	}
 
-	for _, test := range tests {
-		nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, nil)
+	for i, test := range tests {
+		nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, makeLabeledNodeList(labeledNodes))
 		selectorSpread := SelectorSpread{
 			serviceLister:     schedulertesting.FakeServiceLister(test.services),
 			controllerLister:  schedulertesting.FakeControllerLister(test.rcs),
 			replicaSetLister:  schedulertesting.FakeReplicaSetLister(test.rss),
 			statefulSetLister: schedulertesting.FakeStatefulSetLister(test.sss),
 		}
-		list, err := selectorSpread.CalculateSpreadPriority(test.pod, nodeNameToInfo, makeLabeledNodeList(labeledNodes))
+
+		mataDataProducer := NewPriorityMetadataFactory(
+			schedulertesting.FakeServiceLister(test.services),
+			schedulertesting.FakeControllerLister(test.rcs),
+			schedulertesting.FakeReplicaSetLister(test.rss),
+			schedulertesting.FakeStatefulSetLister(test.sss))
+		mataData := mataDataProducer(test.pod, nodeNameToInfo)
+		ttp := priorityFunction(selectorSpread.CalculateSpreadPriorityMap, selectorSpread.CalculateSpreadPriorityReduce, mataData)
+		list, err := ttp(test.pod, nodeNameToInfo, makeLabeledNodeList(labeledNodes))
 		if err != nil {
-			t.Errorf("unexpected error: %v", err)
+			t.Errorf("unexpected error: %v index : %d", err, i)
 		}
 		// sort the two lists to avoid failures on account of different ordering
 		sort.Sort(test.expectedList)
diff --git a/plugin/pkg/scheduler/algorithm/priorities/taint_toleration_test.go b/plugin/pkg/scheduler/algorithm/priorities/taint_toleration_test.go
index 50e0b4d36f9..f54ce45613c 100644
--- a/plugin/pkg/scheduler/algorithm/priorities/taint_toleration_test.go
+++ b/plugin/pkg/scheduler/algorithm/priorities/taint_toleration_test.go
@@ -227,7 +227,7 @@ func TestTaintAndToleration(t *testing.T) {
 	}
 	for _, test := range tests {
 		nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
-		ttp := priorityFunction(ComputeTaintTolerationPriorityMap, ComputeTaintTolerationPriorityReduce)
+		ttp := priorityFunction(ComputeTaintTolerationPriorityMap, ComputeTaintTolerationPriorityReduce, nil)
 		list, err := ttp(test.pod, nodeNameToInfo, test.nodes)
 		if err != nil {
 			t.Errorf("%s, unexpected error: %v", test.test, err)
diff --git a/plugin/pkg/scheduler/algorithm/priorities/test_util.go b/plugin/pkg/scheduler/algorithm/priorities/test_util.go
index 9eb26f2d93c..312c7619410 100644
--- a/plugin/pkg/scheduler/algorithm/priorities/test_util.go
+++ b/plugin/pkg/scheduler/algorithm/priorities/test_util.go
@@ -41,18 +41,18 @@ func makeNode(node string, milliCPU, memory int64) *v1.Node {
 	}
 }
 
-func priorityFunction(mapFn algorithm.PriorityMapFunction, reduceFn algorithm.PriorityReduceFunction) algorithm.PriorityFunction {
+func priorityFunction(mapFn algorithm.PriorityMapFunction, reduceFn algorithm.PriorityReduceFunction, mataData interface{}) algorithm.PriorityFunction {
 	return func(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*v1.Node) (schedulerapi.HostPriorityList, error) {
 		result := make(schedulerapi.HostPriorityList, 0, len(nodes))
 		for i := range nodes {
-			hostResult, err := mapFn(pod, nil, nodeNameToInfo[nodes[i].Name])
+			hostResult, err := mapFn(pod, mataData, nodeNameToInfo[nodes[i].Name])
 			if err != nil {
 				return nil, err
 			}
 			result = append(result, hostResult)
 		}
 		if reduceFn != nil {
-			if err := reduceFn(pod, nil, nodeNameToInfo, result); err != nil {
+			if err := reduceFn(pod, mataData, nodeNameToInfo, result); err != nil {
 				return nil, err
 			}
 		}
diff --git a/plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go b/plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go
index f6aca517eb5..d9242cf172f 100644
--- a/plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go
+++ b/plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go
@@ -46,7 +46,7 @@ func init() {
 		})
 	factory.RegisterPriorityMetadataProducerFactory(
 		func(args factory.PluginFactoryArgs) algorithm.MetadataProducer {
-			return priorities.PriorityMetadata
+			return priorities.NewPriorityMetadataFactory(args.ServiceLister, args.ControllerLister, args.ReplicaSetLister, args.StatefulSetLister)
 		})
 
 	registerAlgorithmProvider(defaultPredicates(), defaultPriorities())
@@ -90,13 +90,12 @@ func init() {
 	factory.RegisterPriorityConfigFactory(
 		"ServiceSpreadingPriority",
 		factory.PriorityConfigFactory{
-			Function: func(args factory.PluginFactoryArgs) algorithm.PriorityFunction {
+			MapReduceFunction: func(args factory.PluginFactoryArgs) (algorithm.PriorityMapFunction, algorithm.PriorityReduceFunction) {
 				return priorities.NewSelectorSpreadPriority(args.ServiceLister, algorithm.EmptyControllerLister{}, algorithm.EmptyReplicaSetLister{}, algorithm.EmptyStatefulSetLister{})
 			},
 			Weight: 1,
 		},
 	)
-
 	// EqualPriority is a prioritizer function that gives an equal weight of one to all nodes
 	// Register the priority function so that its available
 	// but do not include it as part of the default priorities
@@ -213,12 +212,13 @@ func defaultPriorities() sets.String {
 		factory.RegisterPriorityConfigFactory(
 			"SelectorSpreadPriority",
 			factory.PriorityConfigFactory{
-				Function: func(args factory.PluginFactoryArgs) algorithm.PriorityFunction {
+				MapReduceFunction: func(args factory.PluginFactoryArgs) (algorithm.PriorityMapFunction, algorithm.PriorityReduceFunction) {
 					return priorities.NewSelectorSpreadPriority(args.ServiceLister, args.ControllerLister, args.ReplicaSetLister, args.StatefulSetLister)
 				},
 				Weight: 1,
 			},
 		),
+
 		// pods should be placed in the same topological domain (e.g. same node, same rack, same zone, same power domain, etc.)
 		// as some other pods, or, conversely, should not be placed in the same topological domain as some other pods.
 		factory.RegisterPriorityConfigFactory(
diff --git a/plugin/pkg/scheduler/core/generic_scheduler_test.go b/plugin/pkg/scheduler/core/generic_scheduler_test.go
index ce4ff1300dc..e5890daa38e 100644
--- a/plugin/pkg/scheduler/core/generic_scheduler_test.go
+++ b/plugin/pkg/scheduler/core/generic_scheduler_test.go
@@ -522,18 +522,26 @@ func TestZeroRequest(t *testing.T) {
 		priorityConfigs := []algorithm.PriorityConfig{
 			{Map: algorithmpriorities.LeastRequestedPriorityMap, Weight: 1},
 			{Map: algorithmpriorities.BalancedResourceAllocationMap, Weight: 1},
-			{
-				Function: algorithmpriorities.NewSelectorSpreadPriority(
-					schedulertesting.FakeServiceLister([]*v1.Service{}),
-					schedulertesting.FakeControllerLister([]*v1.ReplicationController{}),
-					schedulertesting.FakeReplicaSetLister([]*extensions.ReplicaSet{}),
-					schedulertesting.FakeStatefulSetLister([]*apps.StatefulSet{})),
-				Weight: 1,
-			},
 		}
+		selectorSpreadPriorityMap, selectorSpreadPriorityReduce := algorithmpriorities.NewSelectorSpreadPriority(
+			schedulertesting.FakeServiceLister([]*v1.Service{}),
+			schedulertesting.FakeControllerLister([]*v1.ReplicationController{}),
+			schedulertesting.FakeReplicaSetLister([]*extensions.ReplicaSet{}),
+			schedulertesting.FakeStatefulSetLister([]*apps.StatefulSet{}))
+		pc := algorithm.PriorityConfig{Map: selectorSpreadPriorityMap, Reduce: selectorSpreadPriorityReduce, Weight: 1}
+		priorityConfigs = append(priorityConfigs, pc)
+
 		nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
+
+		mataDataProducer := algorithmpriorities.NewPriorityMetadataFactory(
+			schedulertesting.FakeServiceLister([]*v1.Service{}),
+			schedulertesting.FakeControllerLister([]*v1.ReplicationController{}),
+			schedulertesting.FakeReplicaSetLister([]*extensions.ReplicaSet{}),
+			schedulertesting.FakeStatefulSetLister([]*apps.StatefulSet{}))
+		mataData := mataDataProducer(test.pod, nodeNameToInfo)
+
 		list, err := PrioritizeNodes(
-			test.pod, nodeNameToInfo, algorithm.EmptyMetadataProducer, priorityConfigs,
+			test.pod, nodeNameToInfo, mataData, priorityConfigs,
 			schedulertesting.FakeNodeLister(test.nodes), []algorithm.SchedulerExtender{})
 		if err != nil {
 			t.Errorf("unexpected error: %v", err)

From 8fa59aa9b08b3a8b46376a35dfa955d6450a14a8 Mon Sep 17 00:00:00 2001
From: Gavin <li.guangxu@zte.com.cn>
Date: Sun, 19 Nov 2017 11:08:02 +0800
Subject: [PATCH 3/3] address review comments

---
 .../priorities/selector_spreading.go          | 54 +++++++++----------
 .../priorities/selector_spreading_test.go     |  1 -
 .../algorithmprovider/defaults/defaults.go    |  1 -
 3 files changed, 26 insertions(+), 30 deletions(-)

diff --git a/plugin/pkg/scheduler/algorithm/priorities/selector_spreading.go b/plugin/pkg/scheduler/algorithm/priorities/selector_spreading.go
index 940813f2cb9..aa195b0e4f8 100644
--- a/plugin/pkg/scheduler/algorithm/priorities/selector_spreading.go
+++ b/plugin/pkg/scheduler/algorithm/priorities/selector_spreading.go
@@ -54,11 +54,13 @@ func NewSelectorSpreadPriority(
 	return selectorSpread.CalculateSpreadPriorityMap, selectorSpread.CalculateSpreadPriorityReduce
 }
 
-// CalculateSpreadPriorityMap spreads pods across hosts, considering pods belonging to the same service or replication controller.
-// When a pod is scheduled, it looks for services, RCs or RSs that match the pod, then finds existing pods that match those selectors.
+// CalculateSpreadPriorityMap spreads pods across hosts, considering pods
+// belonging to the same service,RC,RS or StatefulSet.
+// When a pod is scheduled, it looks for services, RCs,RSs and StatefulSets that match the pod,
+// then finds existing pods that match those selectors.
 // It favors nodes that have fewer existing matching pods.
 // i.e. it pushes the scheduler towards a node where there's the smallest number of
-// pods which match the same service, RC or RS selectors as the pod being scheduled.
+// pods which match the same service, RC,RSs or StatefulSets selectors as the pod being scheduled.
 func (s *SelectorSpread) CalculateSpreadPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
 	var selectors []labels.Selector
 	node := nodeInfo.Node()
@@ -80,7 +82,7 @@ func (s *SelectorSpread) CalculateSpreadPriorityMap(pod *v1.Pod, meta interface{
 		}, nil
 	}
 
-	count := float64(0)
+	count := int(0)
 	for _, nodePod := range nodeInfo.Pods() {
 		if pod.Namespace != nodePod.Namespace {
 			continue
@@ -110,32 +112,24 @@ func (s *SelectorSpread) CalculateSpreadPriorityMap(pod *v1.Pod, meta interface{
 	}, nil
 }
 
-// CalculateSpreadPriorityReduce calculates the source of each node based on the number of existing matching pods on the node
-// where zone information is included on the nodes, it favors nodes in zones with fewer existing matching pods.
+// CalculateSpreadPriorityReduce calculates the source of each node
+// based on the number of existing matching pods on the node
+// where zone information is included on the nodes, it favors nodes
+// in zones with fewer existing matching pods.
 func (s *SelectorSpread) CalculateSpreadPriorityReduce(pod *v1.Pod, meta interface{}, nodeNameToInfo map[string]*schedulercache.NodeInfo, result schedulerapi.HostPriorityList) error {
-	var selectors []labels.Selector
 	countsByZone := make(map[string]int, 10)
 	maxCountByZone := int(0)
 	maxCountByNodeName := int(0)
 
-	priorityMeta, ok := meta.(*priorityMetadata)
-	if ok {
-		selectors = priorityMeta.podSelectors
-	} else {
-		selectors = getSelectors(pod, s.serviceLister, s.controllerLister, s.replicaSetLister, s.statefulSetLister)
-	}
-
-	if len(selectors) > 0 {
-		for i := range result {
-			if result[i].Score > maxCountByNodeName {
-				maxCountByNodeName = result[i].Score
-			}
-			zoneId := utilnode.GetZoneKey(nodeNameToInfo[result[i].Host].Node())
-			if zoneId == "" {
-				continue
-			}
-			countsByZone[zoneId] += result[i].Score
+	for i := range result {
+		if result[i].Score > maxCountByNodeName {
+			maxCountByNodeName = result[i].Score
 		}
+		zoneId := utilnode.GetZoneKey(nodeNameToInfo[result[i].Host].Node())
+		if zoneId == "" {
+			continue
+		}
+		countsByZone[zoneId] += result[i].Score
 	}
 
 	for zoneId := range countsByZone {
@@ -146,19 +140,23 @@ func (s *SelectorSpread) CalculateSpreadPriorityReduce(pod *v1.Pod, meta interfa
 
 	haveZones := len(countsByZone) != 0
 
+	maxCountByNodeNameFloat64 := float64(maxCountByNodeName)
+	maxCountByZoneFloat64 := float64(maxCountByZone)
+	MaxPriorityFloat64 := float64(schedulerapi.MaxPriority)
+
 	for i := range result {
 		// initializing to the default/max node score of maxPriority
-		fScore := float64(schedulerapi.MaxPriority)
+		fScore := MaxPriorityFloat64
 		if maxCountByNodeName > 0 {
-			fScore = float64(schedulerapi.MaxPriority) * (float64(maxCountByNodeName-result[i].Score) / float64(maxCountByNodeName))
+			fScore = MaxPriorityFloat64 * (float64(maxCountByNodeName-result[i].Score) / maxCountByNodeNameFloat64)
 		}
 		// If there is zone information present, incorporate it
 		if haveZones {
 			zoneId := utilnode.GetZoneKey(nodeNameToInfo[result[i].Host].Node())
 			if zoneId != "" {
-				zoneScore := float64(schedulerapi.MaxPriority)
+				zoneScore := MaxPriorityFloat64
 				if maxCountByZone > 0 {
-					zoneScore = float64(schedulerapi.MaxPriority) * (float64(maxCountByZone-countsByZone[zoneId]) / float64(maxCountByZone))
+					zoneScore = MaxPriorityFloat64 * (float64(maxCountByZone-countsByZone[zoneId]) / maxCountByZoneFloat64)
 				}
 				fScore = (fScore * (1.0 - zoneWeighting)) + (zoneWeighting * zoneScore)
 			}
diff --git a/plugin/pkg/scheduler/algorithm/priorities/selector_spreading_test.go b/plugin/pkg/scheduler/algorithm/priorities/selector_spreading_test.go
index 0e7ed19ff9f..d3cb19cb635 100644
--- a/plugin/pkg/scheduler/algorithm/priorities/selector_spreading_test.go
+++ b/plugin/pkg/scheduler/algorithm/priorities/selector_spreading_test.go
@@ -553,7 +553,6 @@ func TestZoneSelectorSpreadPriority(t *testing.T) {
 				buildPod(nodeMachine1Zone2, labels1, controllerRef("ReplicationController", "name", "abc123")),
 				buildPod(nodeMachine1Zone3, labels1, controllerRef("ReplicationController", "name", "abc123")),
 			},
-			//nodes:        []string{nodeMachine1Zone3, nodeMachine1Zone2, nodeMachine1Zone3},
 			rcs: []*v1.ReplicationController{{Spec: v1.ReplicationControllerSpec{Selector: labels1}}},
 			expectedList: []schedulerapi.HostPriority{
 				// Note that because we put two pods on the same node (nodeMachine1Zone3),
diff --git a/plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go b/plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go
index d9242cf172f..99d80e566d7 100644
--- a/plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go
+++ b/plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go
@@ -218,7 +218,6 @@ func defaultPriorities() sets.String {
 				Weight: 1,
 			},
 		),
-
 		// pods should be placed in the same topological domain (e.g. same node, same rack, same zone, same power domain, etc.)
 		// as some other pods, or, conversely, should not be placed in the same topological domain as some other pods.
 		factory.RegisterPriorityConfigFactory(