mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-03 17:30:00 +00:00
Refactoring of priority function(CaculateSpreadPriority) by using map/reduce pattern
This commit is contained in:
parent
ce910f249d
commit
4aa92bac73
@ -17,12 +17,10 @@ limitations under the License.
|
|||||||
package priorities
|
package priorities
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"sync"
|
"fmt"
|
||||||
|
|
||||||
"k8s.io/api/core/v1"
|
"k8s.io/api/core/v1"
|
||||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
||||||
"k8s.io/apimachinery/pkg/labels"
|
"k8s.io/apimachinery/pkg/labels"
|
||||||
"k8s.io/client-go/util/workqueue"
|
|
||||||
utilnode "k8s.io/kubernetes/pkg/util/node"
|
utilnode "k8s.io/kubernetes/pkg/util/node"
|
||||||
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
|
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
|
||||||
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
|
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
|
||||||
@ -46,70 +44,44 @@ func NewSelectorSpreadPriority(
|
|||||||
serviceLister algorithm.ServiceLister,
|
serviceLister algorithm.ServiceLister,
|
||||||
controllerLister algorithm.ControllerLister,
|
controllerLister algorithm.ControllerLister,
|
||||||
replicaSetLister algorithm.ReplicaSetLister,
|
replicaSetLister algorithm.ReplicaSetLister,
|
||||||
statefulSetLister algorithm.StatefulSetLister) algorithm.PriorityFunction {
|
statefulSetLister algorithm.StatefulSetLister) (algorithm.PriorityMapFunction, algorithm.PriorityReduceFunction) {
|
||||||
selectorSpread := &SelectorSpread{
|
selectorSpread := &SelectorSpread{
|
||||||
serviceLister: serviceLister,
|
serviceLister: serviceLister,
|
||||||
controllerLister: controllerLister,
|
controllerLister: controllerLister,
|
||||||
replicaSetLister: replicaSetLister,
|
replicaSetLister: replicaSetLister,
|
||||||
statefulSetLister: statefulSetLister,
|
statefulSetLister: statefulSetLister,
|
||||||
}
|
}
|
||||||
return selectorSpread.CalculateSpreadPriority
|
return selectorSpread.CalculateSpreadPriorityMap, selectorSpread.CalculateSpreadPriorityReduce
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns selectors of services, RCs and RSs matching the given pod.
|
// CalculateSpreadPriorityMap spreads pods across hosts, considering pods belonging to the same service or replication controller.
|
||||||
func getSelectors(pod *v1.Pod, sl algorithm.ServiceLister, cl algorithm.ControllerLister, rsl algorithm.ReplicaSetLister, ssl algorithm.StatefulSetLister) []labels.Selector {
|
|
||||||
var selectors []labels.Selector
|
|
||||||
if services, err := sl.GetPodServices(pod); err == nil {
|
|
||||||
for _, service := range services {
|
|
||||||
selectors = append(selectors, labels.SelectorFromSet(service.Spec.Selector))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if rcs, err := cl.GetPodControllers(pod); err == nil {
|
|
||||||
for _, rc := range rcs {
|
|
||||||
selectors = append(selectors, labels.SelectorFromSet(rc.Spec.Selector))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if rss, err := rsl.GetPodReplicaSets(pod); err == nil {
|
|
||||||
for _, rs := range rss {
|
|
||||||
if selector, err := metav1.LabelSelectorAsSelector(rs.Spec.Selector); err == nil {
|
|
||||||
selectors = append(selectors, selector)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if sss, err := ssl.GetPodStatefulSets(pod); err == nil {
|
|
||||||
for _, ss := range sss {
|
|
||||||
if selector, err := metav1.LabelSelectorAsSelector(ss.Spec.Selector); err == nil {
|
|
||||||
selectors = append(selectors, selector)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return selectors
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *SelectorSpread) getSelectors(pod *v1.Pod) []labels.Selector {
|
|
||||||
return getSelectors(pod, s.serviceLister, s.controllerLister, s.replicaSetLister, s.statefulSetLister)
|
|
||||||
}
|
|
||||||
|
|
||||||
// CalculateSpreadPriority spreads pods across hosts and zones, considering pods belonging to the same service or replication controller.
|
|
||||||
// When a pod is scheduled, it looks for services, RCs or RSs that match the pod, then finds existing pods that match those selectors.
|
// When a pod is scheduled, it looks for services, RCs or RSs that match the pod, then finds existing pods that match those selectors.
|
||||||
// It favors nodes that have fewer existing matching pods.
|
// It favors nodes that have fewer existing matching pods.
|
||||||
// i.e. it pushes the scheduler towards a node where there's the smallest number of
|
// i.e. it pushes the scheduler towards a node where there's the smallest number of
|
||||||
// pods which match the same service, RC or RS selectors as the pod being scheduled.
|
// pods which match the same service, RC or RS selectors as the pod being scheduled.
|
||||||
// Where zone information is included on the nodes, it favors nodes in zones with fewer existing matching pods.
|
func (s *SelectorSpread) CalculateSpreadPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
|
||||||
func (s *SelectorSpread) CalculateSpreadPriority(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*v1.Node) (schedulerapi.HostPriorityList, error) {
|
var selectors []labels.Selector
|
||||||
selectors := s.getSelectors(pod)
|
node := nodeInfo.Node()
|
||||||
|
if node == nil {
|
||||||
|
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
|
||||||
|
}
|
||||||
|
|
||||||
// Count similar pods by node
|
priorityMeta, ok := meta.(*priorityMetadata)
|
||||||
countsByNodeName := make(map[string]float64, len(nodes))
|
if ok {
|
||||||
countsByZone := make(map[string]float64, 10)
|
selectors = priorityMeta.podSelectors
|
||||||
maxCountByNodeName := float64(0)
|
} else {
|
||||||
countsByNodeNameLock := sync.Mutex{}
|
selectors = getSelectors(pod, s.serviceLister, s.controllerLister, s.replicaSetLister, s.statefulSetLister)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(selectors) == 0 {
|
||||||
|
return schedulerapi.HostPriority{
|
||||||
|
Host: node.Name,
|
||||||
|
Score: int(0),
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
if len(selectors) > 0 {
|
|
||||||
processNodeFunc := func(i int) {
|
|
||||||
nodeName := nodes[i].Name
|
|
||||||
count := float64(0)
|
count := float64(0)
|
||||||
for _, nodePod := range nodeNameToInfo[nodeName].Pods() {
|
for _, nodePod := range nodeInfo.Pods() {
|
||||||
if pod.Namespace != nodePod.Namespace {
|
if pod.Namespace != nodePod.Namespace {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@ -132,63 +104,75 @@ func (s *SelectorSpread) CalculateSpreadPriority(pod *v1.Pod, nodeNameToInfo map
|
|||||||
count++
|
count++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
zoneId := utilnode.GetZoneKey(nodes[i])
|
return schedulerapi.HostPriority{
|
||||||
|
Host: node.Name,
|
||||||
|
Score: int(count),
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
countsByNodeNameLock.Lock()
|
// CalculateSpreadPriorityReduce calculates the source of each node based on the number of existing matching pods on the node
|
||||||
defer countsByNodeNameLock.Unlock()
|
// where zone information is included on the nodes, it favors nodes in zones with fewer existing matching pods.
|
||||||
countsByNodeName[nodeName] = count
|
func (s *SelectorSpread) CalculateSpreadPriorityReduce(pod *v1.Pod, meta interface{}, nodeNameToInfo map[string]*schedulercache.NodeInfo, result schedulerapi.HostPriorityList) error {
|
||||||
if count > maxCountByNodeName {
|
var selectors []labels.Selector
|
||||||
maxCountByNodeName = count
|
countsByZone := make(map[string]int, 10)
|
||||||
}
|
maxCountByZone := int(0)
|
||||||
if zoneId != "" {
|
maxCountByNodeName := int(0)
|
||||||
countsByZone[zoneId] += count
|
|
||||||
}
|
priorityMeta, ok := meta.(*priorityMetadata)
|
||||||
}
|
if ok {
|
||||||
workqueue.Parallelize(16, len(nodes), processNodeFunc)
|
selectors = priorityMeta.podSelectors
|
||||||
|
} else {
|
||||||
|
selectors = getSelectors(pod, s.serviceLister, s.controllerLister, s.replicaSetLister, s.statefulSetLister)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(selectors) > 0 {
|
||||||
|
for i := range result {
|
||||||
|
if result[i].Score > maxCountByNodeName {
|
||||||
|
maxCountByNodeName = result[i].Score
|
||||||
|
}
|
||||||
|
zoneId := utilnode.GetZoneKey(nodeNameToInfo[result[i].Host].Node())
|
||||||
|
if zoneId == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
countsByZone[zoneId] += result[i].Score
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for zoneId := range countsByZone {
|
||||||
|
if countsByZone[zoneId] > maxCountByZone {
|
||||||
|
maxCountByZone = countsByZone[zoneId]
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Aggregate by-zone information
|
|
||||||
// Compute the maximum number of pods hosted in any zone
|
|
||||||
haveZones := len(countsByZone) != 0
|
haveZones := len(countsByZone) != 0
|
||||||
maxCountByZone := float64(0)
|
|
||||||
for _, count := range countsByZone {
|
|
||||||
if count > maxCountByZone {
|
|
||||||
maxCountByZone = count
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
result := make(schedulerapi.HostPriorityList, 0, len(nodes))
|
for i := range result {
|
||||||
//score int - scale of 0-maxPriority
|
|
||||||
// 0 being the lowest priority and maxPriority being the highest
|
|
||||||
for _, node := range nodes {
|
|
||||||
// initializing to the default/max node score of maxPriority
|
// initializing to the default/max node score of maxPriority
|
||||||
fScore := float64(schedulerapi.MaxPriority)
|
fScore := float64(schedulerapi.MaxPriority)
|
||||||
if maxCountByNodeName > 0 {
|
if maxCountByNodeName > 0 {
|
||||||
fScore = float64(schedulerapi.MaxPriority) * ((maxCountByNodeName - countsByNodeName[node.Name]) / maxCountByNodeName)
|
fScore = float64(schedulerapi.MaxPriority) * (float64(maxCountByNodeName-result[i].Score) / float64(maxCountByNodeName))
|
||||||
}
|
}
|
||||||
|
|
||||||
// If there is zone information present, incorporate it
|
// If there is zone information present, incorporate it
|
||||||
if haveZones {
|
if haveZones {
|
||||||
zoneId := utilnode.GetZoneKey(node)
|
zoneId := utilnode.GetZoneKey(nodeNameToInfo[result[i].Host].Node())
|
||||||
if zoneId != "" {
|
if zoneId != "" {
|
||||||
zoneScore := float64(schedulerapi.MaxPriority)
|
zoneScore := float64(schedulerapi.MaxPriority)
|
||||||
if maxCountByZone > 0 {
|
if maxCountByZone > 0 {
|
||||||
zoneScore = float64(schedulerapi.MaxPriority) * ((maxCountByZone - countsByZone[zoneId]) / maxCountByZone)
|
zoneScore = float64(schedulerapi.MaxPriority) * (float64(maxCountByZone-countsByZone[zoneId]) / float64(maxCountByZone))
|
||||||
}
|
}
|
||||||
fScore = (fScore * (1.0 - zoneWeighting)) + (zoneWeighting * zoneScore)
|
fScore = (fScore * (1.0 - zoneWeighting)) + (zoneWeighting * zoneScore)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
result[i].Score = int(fScore)
|
||||||
result = append(result, schedulerapi.HostPriority{Host: node.Name, Score: int(fScore)})
|
|
||||||
if glog.V(10) {
|
if glog.V(10) {
|
||||||
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
|
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
|
||||||
// not logged. There is visible performance gain from it.
|
// not logged. There is visible performance gain from it.
|
||||||
glog.V(10).Infof(
|
glog.V(10).Infof(
|
||||||
"%v -> %v: SelectorSpreadPriority, Score: (%d)", pod.Name, node.Name, int(fScore),
|
"%v -> %v: SelectorSpreadPriority, Score: (%d)", pod.Name, result[i].Host, int(fScore),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return result, nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
type ServiceAntiAffinity struct {
|
type ServiceAntiAffinity struct {
|
||||||
|
Loading…
Reference in New Issue
Block a user