mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-25 04:33:26 +00:00
Parallelize computing selector spreading priority in scheduler.
This commit is contained in:
parent
a690c2ca76
commit
ebcc8f737c
@ -17,10 +17,13 @@ limitations under the License.
|
|||||||
package priorities
|
package priorities
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"sync"
|
||||||
|
|
||||||
"github.com/golang/glog"
|
"github.com/golang/glog"
|
||||||
"k8s.io/kubernetes/pkg/api"
|
"k8s.io/kubernetes/pkg/api"
|
||||||
"k8s.io/kubernetes/pkg/api/unversioned"
|
"k8s.io/kubernetes/pkg/api/unversioned"
|
||||||
"k8s.io/kubernetes/pkg/labels"
|
"k8s.io/kubernetes/pkg/labels"
|
||||||
|
utilruntime "k8s.io/kubernetes/pkg/util/runtime"
|
||||||
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
|
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
|
||||||
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
|
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
|
||||||
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
|
||||||
@ -79,8 +82,6 @@ func getZoneKey(node *api.Node) string {
|
|||||||
// pods which match the same service selectors or RC selectors as the pod being scheduled.
|
// pods which match the same service selectors or RC selectors as the pod being scheduled.
|
||||||
// Where zone information is included on the nodes, it favors nodes in zones with fewer existing matching pods.
|
// Where zone information is included on the nodes, it favors nodes in zones with fewer existing matching pods.
|
||||||
func (s *SelectorSpread) CalculateSpreadPriority(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodeLister algorithm.NodeLister) (schedulerapi.HostPriorityList, error) {
|
func (s *SelectorSpread) CalculateSpreadPriority(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodeLister algorithm.NodeLister) (schedulerapi.HostPriorityList, error) {
|
||||||
var nsPods []*api.Pod
|
|
||||||
|
|
||||||
selectors := make([]labels.Selector, 0)
|
selectors := make([]labels.Selector, 0)
|
||||||
services, err := s.serviceLister.GetPodServices(pod)
|
services, err := s.serviceLister.GetPodServices(pod)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
@ -103,19 +104,6 @@ func (s *SelectorSpread) CalculateSpreadPriority(pod *api.Pod, nodeNameToInfo ma
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(selectors) > 0 {
|
|
||||||
pods, err := s.podLister.List(labels.Everything())
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
// consider only the pods that belong to the same namespace
|
|
||||||
for _, nsPod := range pods {
|
|
||||||
if nsPod.Namespace == pod.Namespace {
|
|
||||||
nsPods = append(nsPods, nsPod)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
nodes, err := nodeLister.List()
|
nodes, err := nodeLister.List()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@ -123,26 +111,63 @@ func (s *SelectorSpread) CalculateSpreadPriority(pod *api.Pod, nodeNameToInfo ma
|
|||||||
|
|
||||||
// Count similar pods by node
|
// Count similar pods by node
|
||||||
countsByNodeName := map[string]int{}
|
countsByNodeName := map[string]int{}
|
||||||
for _, pod := range nsPods {
|
countsByNodeNameLock := sync.Mutex{}
|
||||||
// When we are replacing a failed pod, we often see the previous deleted version
|
|
||||||
// while scheduling the replacement. Ignore the previous deleted version for spreading
|
|
||||||
// purposes (it can still be considered for resource restrictions etc.)
|
|
||||||
if pod.DeletionTimestamp != nil {
|
|
||||||
glog.V(2).Infof("skipping pending-deleted pod: %s/%s", pod.Namespace, pod.Name)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
matches := false
|
|
||||||
for _, selector := range selectors {
|
|
||||||
if selector.Matches(labels.Set(pod.ObjectMeta.Labels)) {
|
|
||||||
matches = true
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if !matches {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
countsByNodeName[pod.Spec.NodeName]++
|
if len(selectors) > 0 {
|
||||||
|
// Create a number of go-routines that will be computing number
|
||||||
|
// of "similar" pods for given nodes.
|
||||||
|
workers := 16
|
||||||
|
toProcess := make(chan string, len(nodes.Items))
|
||||||
|
for i := range nodes.Items {
|
||||||
|
toProcess <- nodes.Items[i].Name
|
||||||
|
}
|
||||||
|
close(toProcess)
|
||||||
|
|
||||||
|
wg := sync.WaitGroup{}
|
||||||
|
wg.Add(workers)
|
||||||
|
for i := 0; i < workers; i++ {
|
||||||
|
go func() {
|
||||||
|
defer utilruntime.HandleCrash()
|
||||||
|
defer wg.Done()
|
||||||
|
for {
|
||||||
|
nodeName, ok := <-toProcess
|
||||||
|
if !ok {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
count := 0
|
||||||
|
for _, nodePod := range nodeNameToInfo[nodeName].Pods() {
|
||||||
|
if pod.Namespace != nodePod.Namespace {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// When we are replacing a failed pod, we often see the previous
|
||||||
|
// deleted version while scheduling the replacement.
|
||||||
|
// Ignore the previous deleted version for spreading purposes
|
||||||
|
// (it can still be considered for resource restrictions etc.)
|
||||||
|
if nodePod.DeletionTimestamp != nil {
|
||||||
|
glog.V(4).Infof("skipping pending-deleted pod: %s/%s", nodePod.Namespace, nodePod.Name)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
matches := false
|
||||||
|
for _, selector := range selectors {
|
||||||
|
if selector.Matches(labels.Set(nodePod.ObjectMeta.Labels)) {
|
||||||
|
matches = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if matches {
|
||||||
|
count++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func() {
|
||||||
|
countsByNodeNameLock.Lock()
|
||||||
|
defer countsByNodeNameLock.Unlock()
|
||||||
|
countsByNodeName[nodeName] = count
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
wg.Wait()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Aggregate by-node information
|
// Aggregate by-node information
|
||||||
|
Loading…
Reference in New Issue
Block a user