Optimize selector spreading

2025-08-03 17:30:00 +00:00 · 2016-07-12 11:32:34 +02:00 · 2016-07-12 11:32:34 +02:00 · ae6b66207a
commit ae6b66207a
parent b9d13c5dbd
1 changed files with 57 additions and 96 deletions
--- a/plugin/pkg/scheduler/algorithm/priorities/selector_spreading.go
+++ b/plugin/pkg/scheduler/algorithm/priorities/selector_spreading.go
@ -24,7 +24,7 @@ import (
 	"k8s.io/kubernetes/pkg/api/unversioned"
 	"k8s.io/kubernetes/pkg/labels"
 	utilnode "k8s.io/kubernetes/pkg/util/node"
-	utilruntime "k8s.io/kubernetes/pkg/util/runtime"
+	"k8s.io/kubernetes/pkg/util/workqueue"
 	"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
 	schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
 	"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
@ -32,7 +32,7 @@ import (

 // The maximum priority value to give to a node
 // Prioritiy values range from 0-maxPriority
-const maxPriority = 10
+const maxPriority float32 = 10

 // When zone information is present, give 2/3 of the weighting to zone spreading, 1/3 to node spreading
 // TODO: Any way to justify this weighting?
@ -62,21 +62,18 @@ func NewSelectorSpreadPriority(podLister algorithm.PodLister, serviceLister algo
 // pods which match the same service selectors or RC selectors as the pod being scheduled.
 // Where zone information is included on the nodes, it favors nodes in zones with fewer existing matching pods.
 func (s *SelectorSpread) CalculateSpreadPriority(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodeLister algorithm.NodeLister) (schedulerapi.HostPriorityList, error) {
-	selectors := make([]labels.Selector, 0)
-	services, err := s.serviceLister.GetPodServices(pod)
-	if err == nil {
+	selectors := make([]labels.Selector, 0, 3)
+	if services, err := s.serviceLister.GetPodServices(pod); err == nil {
 		for _, service := range services {
 			selectors = append(selectors, labels.SelectorFromSet(service.Spec.Selector))
 		}
 	}
-	rcs, err := s.controllerLister.GetPodControllers(pod)
-	if err == nil {
+	if rcs, err := s.controllerLister.GetPodControllers(pod); err == nil {
 		for _, rc := range rcs {
 			selectors = append(selectors, labels.SelectorFromSet(rc.Spec.Selector))
 		}
 	}
-	rss, err := s.replicaSetLister.GetPodReplicaSets(pod)
-	if err == nil {
+	if rss, err := s.replicaSetLister.GetPodReplicaSets(pod); err == nil {
 		for _, rs := range rss {
 			if selector, err := unversioned.LabelSelectorAsSelector(rs.Spec.Selector); err == nil {
 				selectors = append(selectors, selector)
@ -90,32 +87,15 @@ func (s *SelectorSpread) CalculateSpreadPriority(pod *api.Pod, nodeNameToInfo ma
 	}

 	// Count similar pods by node
-	countsByNodeName := map[string]int{}
+	countsByNodeName := make(map[string]float32, len(nodes))
+	countsByZone := make(map[string]float32, 10)
+	maxCountByNodeName := float32(0)
 	countsByNodeNameLock := sync.Mutex{}

 	if len(selectors) > 0 {
-		// Create a number of go-routines that will be computing number
-		// of "similar" pods for given nodes.
-		workers := 16
-		toProcess := make(chan string, len(nodes))
-		for i := range nodes {
-			toProcess <- nodes[i].Name
-		}
-		close(toProcess)
-
-		// TODO: Use Parallelize.
-		wg := sync.WaitGroup{}
-		wg.Add(workers)
-		for i := 0; i < workers; i++ {
-			go func() {
-				defer utilruntime.HandleCrash()
-				defer wg.Done()
-				for {
-					nodeName, ok := <-toProcess
-					if !ok {
-						return
-					}
-					count := 0
+		processNodeFunc := func(i int) {
+			nodeName := nodes[i].Name
+			count := float32(0)
 			for _, nodePod := range nodeNameToInfo[nodeName].Pods() {
 				if pod.Namespace != nodePod.Namespace {
 					continue
@ -139,47 +119,25 @@ func (s *SelectorSpread) CalculateSpreadPriority(pod *api.Pod, nodeNameToInfo ma
 					count++
 				}
 			}
+			zoneId := utilnode.GetZoneKey(nodes[i])

-					func() {
 			countsByNodeNameLock.Lock()
 			defer countsByNodeNameLock.Unlock()
 			countsByNodeName[nodeName] = count
-					}()
-				}
-			}()
-		}
-		wg.Wait()
-	}
-
-	// Aggregate by-node information
-	// Compute the maximum number of pods hosted on any node
-	maxCountByNodeName := 0
-	for _, count := range countsByNodeName {
 			if count > maxCountByNodeName {
 				maxCountByNodeName = count
 			}
-	}
-
-	// Count similar pods by zone, if zone information is present
-	countsByZone := map[string]int{}
-	for _, node := range nodes {
-		count, found := countsByNodeName[node.Name]
-		if !found {
-			continue
-		}
-
-		zoneId := utilnode.GetZoneKey(node)
-		if zoneId == "" {
-			continue
-		}
-
+			if zoneId != "" {
 				countsByZone[zoneId] += count
 			}
+		}
+		workqueue.Parallelize(16, len(nodes), processNodeFunc)
+	}

 	// Aggregate by-zone information
 	// Compute the maximum number of pods hosted in any zone
 	haveZones := len(countsByZone) != 0
-	maxCountByZone := 0
+	maxCountByZone := float32(0)
 	for _, count := range countsByZone {
 		if count > maxCountByZone {
 			maxCountByZone = count
@ -191,25 +149,29 @@ func (s *SelectorSpread) CalculateSpreadPriority(pod *api.Pod, nodeNameToInfo ma
 	// 0 being the lowest priority and maxPriority being the highest
 	for _, node := range nodes {
 		// initializing to the default/max node score of maxPriority
-		fScore := float32(maxPriority)
+		fScore := maxPriority
 		if maxCountByNodeName > 0 {
-			fScore = maxPriority * (float32(maxCountByNodeName-countsByNodeName[node.Name]) / float32(maxCountByNodeName))
+			fScore = maxPriority * ((maxCountByNodeName - countsByNodeName[node.Name]) / maxCountByNodeName)
 		}

 		// If there is zone information present, incorporate it
 		if haveZones {
 			zoneId := utilnode.GetZoneKey(node)
 			if zoneId != "" {
-				zoneScore := maxPriority * (float32(maxCountByZone-countsByZone[zoneId]) / float32(maxCountByZone))
+				zoneScore := maxPriority * ((maxCountByZone - countsByZone[zoneId]) / maxCountByZone)
 				fScore = (fScore * (1.0 - zoneWeighting)) + (zoneWeighting * zoneScore)
 			}
 		}

 		result = append(result, schedulerapi.HostPriority{Host: node.Name, Score: int(fScore)})
+		if glog.V(10) {
+			// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
+			// not logged. There is visible performance gain from it.
 			glog.V(10).Infof(
 				"%v -> %v: SelectorSpreadPriority, Score: (%d)", pod.Name, node.Name, int(fScore),
 			)
 		}
+	}
 	return result, nil
 }

@ -234,8 +196,7 @@ func NewServiceAntiAffinityPriority(podLister algorithm.PodLister, serviceLister
 func (s *ServiceAntiAffinity) CalculateAntiAffinityPriority(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodeLister algorithm.NodeLister) (schedulerapi.HostPriorityList, error) {
 	var nsServicePods []*api.Pod

-	services, err := s.serviceLister.GetPodServices(pod)
-	if err == nil {
+	if services, err := s.serviceLister.GetPodServices(pod); err == nil {
 		// just use the first service and get the other pods within the service
 		// TODO: a separate predicate can be created that tries to handle all services for the pod
 		selector := labels.SelectorFromSet(services[0].Spec.Selector)