mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-27 21:47:07 +00:00
Merge pull request #29109 from wojtek-t/pod_affinity_predicate
Automatic merge from submit-queue More pod-affinity code cleanup and prepare for parallelization Ref #26144
This commit is contained in:
commit
ab8f77263b
@ -802,41 +802,84 @@ func (checker *PodAffinityChecker) InterPodAffinityMatches(pod *api.Pod, meta in
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return false, err
|
return false, err
|
||||||
}
|
}
|
||||||
if checker.NodeMatchPodAffinityAntiAffinity(pod, allPods, node) {
|
affinity, err := api.GetAffinityFromPodAnnotations(pod.Annotations)
|
||||||
return true, nil
|
|
||||||
}
|
|
||||||
return false, ErrPodAffinityNotMatch
|
|
||||||
}
|
|
||||||
|
|
||||||
// AnyPodMatchesPodAffinityTerm checks if any of given pods can match the specific podAffinityTerm.
|
|
||||||
func (checker *PodAffinityChecker) AnyPodMatchesPodAffinityTerm(pod *api.Pod, allPods []*api.Pod, node *api.Node, podAffinityTerm api.PodAffinityTerm) (bool, error) {
|
|
||||||
for _, ep := range allPods {
|
|
||||||
epNode, err := checker.info.GetNodeInfo(ep.Spec.NodeName)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false, err
|
return false, err
|
||||||
}
|
}
|
||||||
match, err := checker.failureDomains.CheckIfPodMatchPodAffinityTerm(ep, epNode, node, pod, podAffinityTerm)
|
|
||||||
if err != nil || match {
|
// Check if the current node match the inter-pod affinity scheduling constraints.
|
||||||
return match, err
|
// Hard inter-pod affinity is not symmetric, check only when affinity.PodAffinity exists.
|
||||||
|
if affinity.PodAffinity != nil {
|
||||||
|
if !checker.NodeMatchesHardPodAffinity(pod, allPods, node, affinity.PodAffinity) {
|
||||||
|
return false, ErrPodAffinityNotMatch
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return false, nil
|
|
||||||
|
// Hard inter-pod anti-affinity is symmetric, we should always check it.
|
||||||
|
if !checker.NodeMatchesHardPodAntiAffinity(pod, allPods, node, affinity.PodAntiAffinity) {
|
||||||
|
return false, ErrPodAffinityNotMatch
|
||||||
|
}
|
||||||
|
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// AnyPodMatchesPodAffinityTerm checks if any of given pods can match the specific podAffinityTerm.
|
||||||
|
// First return value indicates whether a matching pod exists on a node that matches the topology key,
|
||||||
|
// while the second return value indicates whether a matching pod exists anywhere.
|
||||||
|
// TODO: Do we really need any pod matching, or all pods matching? I think the latter.
|
||||||
|
func (checker *PodAffinityChecker) AnyPodMatchesPodAffinityTerm(pod *api.Pod, allPods []*api.Pod, node *api.Node, podAffinityTerm api.PodAffinityTerm) (bool, bool, error) {
|
||||||
|
matchingPodExists := false
|
||||||
|
for _, ep := range allPods {
|
||||||
|
epNode, err := checker.info.GetNodeInfo(ep.Spec.NodeName)
|
||||||
|
if err != nil {
|
||||||
|
return false, matchingPodExists, err
|
||||||
|
}
|
||||||
|
match, err := priorityutil.PodMatchesTermsNamespaceAndSelector(ep, pod, &podAffinityTerm)
|
||||||
|
if err != nil {
|
||||||
|
return false, matchingPodExists, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if match {
|
||||||
|
matchingPodExists = true
|
||||||
|
if checker.failureDomains.NodesHaveSameTopologyKey(node, epNode, podAffinityTerm.TopologyKey) {
|
||||||
|
return true, matchingPodExists, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false, matchingPodExists, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func getPodAffinityTerms(podAffinity *api.PodAffinity) (terms []api.PodAffinityTerm) {
|
||||||
|
if podAffinity != nil {
|
||||||
|
if len(podAffinity.RequiredDuringSchedulingIgnoredDuringExecution) != 0 {
|
||||||
|
terms = podAffinity.RequiredDuringSchedulingIgnoredDuringExecution
|
||||||
|
}
|
||||||
|
// TODO: Uncomment this block when implement RequiredDuringSchedulingRequiredDuringExecution.
|
||||||
|
//if len(podAffinity.RequiredDuringSchedulingRequiredDuringExecution) != 0 {
|
||||||
|
// terms = append(terms, podAffinity.RequiredDuringSchedulingRequiredDuringExecution...)
|
||||||
|
//}
|
||||||
|
}
|
||||||
|
return terms
|
||||||
|
}
|
||||||
|
|
||||||
|
func getPodAntiAffinityTerms(podAntiAffinity *api.PodAntiAffinity) (terms []api.PodAffinityTerm) {
|
||||||
|
if podAntiAffinity != nil {
|
||||||
|
if len(podAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution) != 0 {
|
||||||
|
terms = podAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution
|
||||||
|
}
|
||||||
|
// TODO: Uncomment this block when implement RequiredDuringSchedulingRequiredDuringExecution.
|
||||||
|
//if len(podAntiAffinity.RequiredDuringSchedulingRequiredDuringExecution) != 0 {
|
||||||
|
// terms = append(terms, podAntiAffinity.RequiredDuringSchedulingRequiredDuringExecution...)
|
||||||
|
//}
|
||||||
|
}
|
||||||
|
return terms
|
||||||
}
|
}
|
||||||
|
|
||||||
// Checks whether the given node has pods which satisfy all the required pod affinity scheduling rules.
|
// Checks whether the given node has pods which satisfy all the required pod affinity scheduling rules.
|
||||||
// If node has pods which satisfy all the required pod affinity scheduling rules then return true.
|
// If node has pods which satisfy all the required pod affinity scheduling rules then return true.
|
||||||
func (checker *PodAffinityChecker) NodeMatchesHardPodAffinity(pod *api.Pod, allPods []*api.Pod, node *api.Node, podAffinity *api.PodAffinity) bool {
|
func (checker *PodAffinityChecker) NodeMatchesHardPodAffinity(pod *api.Pod, allPods []*api.Pod, node *api.Node, podAffinity *api.PodAffinity) bool {
|
||||||
var podAffinityTerms []api.PodAffinityTerm
|
for _, podAffinityTerm := range getPodAffinityTerms(podAffinity) {
|
||||||
if len(podAffinity.RequiredDuringSchedulingIgnoredDuringExecution) != 0 {
|
podAffinityTermMatches, matchingPodExists, err := checker.AnyPodMatchesPodAffinityTerm(pod, allPods, node, podAffinityTerm)
|
||||||
podAffinityTerms = podAffinity.RequiredDuringSchedulingIgnoredDuringExecution
|
|
||||||
}
|
|
||||||
// TODO: Uncomment this block when implement RequiredDuringSchedulingRequiredDuringExecution.
|
|
||||||
//if len(podAffinity.RequiredDuringSchedulingRequiredDuringExecution) != 0 {
|
|
||||||
// podAffinityTerms = append(podAffinityTerms, podAffinity.RequiredDuringSchedulingRequiredDuringExecution...)
|
|
||||||
//}
|
|
||||||
|
|
||||||
for _, podAffinityTerm := range podAffinityTerms {
|
|
||||||
podAffinityTermMatches, err := checker.AnyPodMatchesPodAffinityTerm(pod, allPods, node, podAffinityTerm)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, an error ocurred when checking existing pods on the node for PodAffinityTerm %v err: %v",
|
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, an error ocurred when checking existing pods on the node for PodAffinityTerm %v err: %v",
|
||||||
podName(pod), node.Name, podAffinityTerm, err)
|
podName(pod), node.Name, podAffinityTerm, err)
|
||||||
@ -844,31 +887,16 @@ func (checker *PodAffinityChecker) NodeMatchesHardPodAffinity(pod *api.Pod, allP
|
|||||||
}
|
}
|
||||||
|
|
||||||
if !podAffinityTermMatches {
|
if !podAffinityTermMatches {
|
||||||
// TODO: Think about whether this can be simplified once we have controllerRef
|
|
||||||
// Check if it is in special case that the requiredDuringScheduling affinity requirement can be disregarded.
|
|
||||||
// If the requiredDuringScheduling affinity requirement matches a pod's own labels and namespace, and there are no other such pods
|
// If the requiredDuringScheduling affinity requirement matches a pod's own labels and namespace, and there are no other such pods
|
||||||
// anywhere, then disregard the requirement.
|
// anywhere, then disregard the requirement.
|
||||||
// This allows rules like "schedule all of the pods of this collection to the same zone" to not block forever
|
// This allows rules like "schedule all of the pods of this collection to the same zone" to not block forever
|
||||||
// because the first pod of the collection can't be scheduled.
|
// because the first pod of the collection can't be scheduled.
|
||||||
names := priorityutil.GetNamespacesFromPodAffinityTerm(pod, podAffinityTerm)
|
match, err := priorityutil.PodMatchesTermsNamespaceAndSelector(pod, pod, &podAffinityTerm)
|
||||||
labelSelector, err := unversioned.LabelSelectorAsSelector(podAffinityTerm.LabelSelector)
|
if err != nil || !match || matchingPodExists {
|
||||||
if err != nil || !names.Has(pod.Namespace) || !labelSelector.Matches(labels.Set(pod.Labels)) {
|
|
||||||
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because none of the existing pods on this node satisfy the PodAffinityTerm %v, err: %+v",
|
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because none of the existing pods on this node satisfy the PodAffinityTerm %v, err: %+v",
|
||||||
podName(pod), node.Name, podAffinityTerm, err)
|
podName(pod), node.Name, podAffinityTerm, err)
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// the affinity is to put the pod together with other pods from its same service or controller
|
|
||||||
filteredPods := priorityutil.FilterPodsByNameSpaces(names, allPods)
|
|
||||||
for _, filteredPod := range filteredPods {
|
|
||||||
// if found an existing pod from same service or RC,
|
|
||||||
// the affinity scheduling rules cannot be disregarded.
|
|
||||||
if labelSelector.Matches(labels.Set(filteredPod.Labels)) {
|
|
||||||
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because none of the existing pods on this node satisfy the PodAffinityTerm %v",
|
|
||||||
podName(pod), node.Name, podAffinityTerm)
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// all the required pod affinity scheduling rules satisfied
|
// all the required pod affinity scheduling rules satisfied
|
||||||
@ -884,21 +912,12 @@ func (checker *PodAffinityChecker) NodeMatchesHardPodAffinity(pod *api.Pod, allP
|
|||||||
// scheduling rules and scheduling the pod onto the node won't
|
// scheduling rules and scheduling the pod onto the node won't
|
||||||
// break any existing pods' anti-affinity rules, then return true.
|
// break any existing pods' anti-affinity rules, then return true.
|
||||||
func (checker *PodAffinityChecker) NodeMatchesHardPodAntiAffinity(pod *api.Pod, allPods []*api.Pod, node *api.Node, podAntiAffinity *api.PodAntiAffinity) bool {
|
func (checker *PodAffinityChecker) NodeMatchesHardPodAntiAffinity(pod *api.Pod, allPods []*api.Pod, node *api.Node, podAntiAffinity *api.PodAntiAffinity) bool {
|
||||||
var podAntiAffinityTerms []api.PodAffinityTerm
|
|
||||||
if podAntiAffinity != nil && len(podAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution) != 0 {
|
|
||||||
podAntiAffinityTerms = podAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution
|
|
||||||
}
|
|
||||||
// TODO: Uncomment this block when implement RequiredDuringSchedulingRequiredDuringExecution.
|
|
||||||
//if len(podAntiAffinity.RequiredDuringSchedulingRequiredDuringExecution) != 0 {
|
|
||||||
// podAntiAffinityTerms = append(podAntiAffinityTerms, podAntiAffinity.RequiredDuringSchedulingRequiredDuringExecution...)
|
|
||||||
//}
|
|
||||||
|
|
||||||
// foreach element podAntiAffinityTerm of podAntiAffinityTerms
|
// foreach element podAntiAffinityTerm of podAntiAffinityTerms
|
||||||
// if the pod matches the term (breaks the anti-affinity),
|
// if the pod matches the term (breaks the anti-affinity),
|
||||||
// don't schedule the pod onto this node.
|
// don't schedule the pod onto this node.
|
||||||
for _, podAntiAffinityTerm := range podAntiAffinityTerms {
|
for _, podAntiAffinityTerm := range getPodAntiAffinityTerms(podAntiAffinity) {
|
||||||
podAntiAffinityTermMatches, err := checker.AnyPodMatchesPodAffinityTerm(pod, allPods, node, podAntiAffinityTerm)
|
podAntiAffinityTermMatches, _, err := checker.AnyPodMatchesPodAffinityTerm(pod, allPods, node, podAntiAffinityTerm)
|
||||||
if err != nil || podAntiAffinityTermMatches == true {
|
if err != nil || podAntiAffinityTermMatches {
|
||||||
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because not all the existing pods on this node satisfy the PodAntiAffinityTerm %v, err: %v",
|
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because not all the existing pods on this node satisfy the PodAntiAffinityTerm %v, err: %v",
|
||||||
podName(pod), node.Name, podAntiAffinityTerm, err)
|
podName(pod), node.Name, podAntiAffinityTerm, err)
|
||||||
return false
|
return false
|
||||||
@ -914,62 +933,29 @@ func (checker *PodAffinityChecker) NodeMatchesHardPodAntiAffinity(pod *api.Pod,
|
|||||||
glog.V(10).Infof("Failed to get Affinity from Pod %+v, err: %+v", podName(pod), err)
|
glog.V(10).Infof("Failed to get Affinity from Pod %+v, err: %+v", podName(pod), err)
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
if epAffinity.PodAntiAffinity != nil {
|
epNode, err := checker.info.GetNodeInfo(ep.Spec.NodeName)
|
||||||
var epAntiAffinityTerms []api.PodAffinityTerm
|
if err != nil {
|
||||||
if len(epAffinity.PodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution) != 0 {
|
glog.V(10).Infof("Failed to get node from Pod %+v, err: %+v", podName(ep), err)
|
||||||
epAntiAffinityTerms = epAffinity.PodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution
|
return false
|
||||||
}
|
}
|
||||||
// TODO: Uncomment this block when implement RequiredDuringSchedulingRequiredDuringExecution.
|
for _, epAntiAffinityTerm := range getPodAntiAffinityTerms(epAffinity.PodAntiAffinity) {
|
||||||
//if len(epAffinity.PodAntiAffinity.RequiredDuringSchedulingRequiredDuringExecution) != 0 {
|
match, err := priorityutil.PodMatchesTermsNamespaceAndSelector(pod, ep, &epAntiAffinityTerm)
|
||||||
// epAntiAffinityTerms = append(epAntiAffinityTerms, epAffinity.PodAntiAffinity.RequiredDuringSchedulingRequiredDuringExecution...)
|
|
||||||
//}
|
|
||||||
|
|
||||||
for _, epAntiAffinityTerm := range epAntiAffinityTerms {
|
|
||||||
labelSelector, err := unversioned.LabelSelectorAsSelector(epAntiAffinityTerm.LabelSelector)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.V(10).Infof("Failed to get label selector from anti-affinityterm %+v of existing pod %+v, err: %+v", epAntiAffinityTerm, podName(pod), err)
|
glog.V(10).Infof("Failed to get label selector from anti-affinityterm %+v of existing pod %+v, err: %+v", epAntiAffinityTerm, podName(pod), err)
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
if match && checker.failureDomains.NodesHaveSameTopologyKey(node, epNode, epAntiAffinityTerm.TopologyKey) {
|
||||||
names := priorityutil.GetNamespacesFromPodAffinityTerm(ep, epAntiAffinityTerm)
|
|
||||||
if (len(names) == 0 || names.Has(pod.Namespace)) && labelSelector.Matches(labels.Set(pod.Labels)) {
|
|
||||||
epNode, err := checker.info.GetNodeInfo(ep.Spec.NodeName)
|
|
||||||
if err != nil || checker.failureDomains.NodesHaveSameTopologyKey(node, epNode, epAntiAffinityTerm.TopologyKey) {
|
|
||||||
glog.V(10).Infof("Cannot schedule Pod %+v, onto node %v because the pod would break the PodAntiAffinityTerm %+v, of existing pod %+v, err: %v",
|
glog.V(10).Infof("Cannot schedule Pod %+v, onto node %v because the pod would break the PodAntiAffinityTerm %+v, of existing pod %+v, err: %v",
|
||||||
podName(pod), node.Name, epAntiAffinityTerm, podName(ep), err)
|
podName(pod), node.Name, epAntiAffinityTerm, podName(ep), err)
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
// all the required pod anti-affinity scheduling rules are satisfied
|
// all the required pod anti-affinity scheduling rules are satisfied
|
||||||
glog.V(10).Infof("Can schedule Pod %+v, on node %v because all the required pod anti-affinity scheduling rules are satisfied", podName(pod), node.Name)
|
glog.V(10).Infof("Can schedule Pod %+v, on node %v because all the required pod anti-affinity scheduling rules are satisfied", podName(pod), node.Name)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
// NodeMatchPodAffinityAntiAffinity checks if the node matches
|
|
||||||
// the requiredDuringScheduling affinity/anti-affinity rules indicated by the pod.
|
|
||||||
func (checker *PodAffinityChecker) NodeMatchPodAffinityAntiAffinity(pod *api.Pod, allPods []*api.Pod, node *api.Node) bool {
|
|
||||||
// Parse required affinity scheduling rules.
|
|
||||||
affinity, err := api.GetAffinityFromPodAnnotations(pod.Annotations)
|
|
||||||
if err != nil {
|
|
||||||
glog.V(10).Infof("Failed to get Affinity from Pod %+v, err: %+v", podName(pod), err)
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// check if the current node match the inter-pod affinity scheduling rules.
|
|
||||||
// hard inter-pod affinity is not symmetric, check only when affinity.PodAffinity is not nil.
|
|
||||||
if affinity.PodAffinity != nil {
|
|
||||||
if !checker.NodeMatchesHardPodAffinity(pod, allPods, node, affinity.PodAffinity) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// hard inter-pod anti-affinity is symmetric, check both when affinity.PodAntiAffinity is nil and not nil.
|
|
||||||
return checker.NodeMatchesHardPodAntiAffinity(pod, allPods, node, affinity.PodAntiAffinity)
|
|
||||||
}
|
|
||||||
|
|
||||||
func PodToleratesNodeTaints(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, error) {
|
func PodToleratesNodeTaints(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, error) {
|
||||||
node := nodeInfo.Node()
|
node := nodeInfo.Node()
|
||||||
if node == nil {
|
if node == nil {
|
||||||
|
@ -19,7 +19,6 @@ package priorities
|
|||||||
import (
|
import (
|
||||||
"github.com/golang/glog"
|
"github.com/golang/glog"
|
||||||
"k8s.io/kubernetes/pkg/api"
|
"k8s.io/kubernetes/pkg/api"
|
||||||
"k8s.io/kubernetes/pkg/api/unversioned"
|
|
||||||
"k8s.io/kubernetes/pkg/labels"
|
"k8s.io/kubernetes/pkg/labels"
|
||||||
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
|
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
|
||||||
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates"
|
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates"
|
||||||
@ -52,19 +51,48 @@ func NewInterPodAffinityPriority(
|
|||||||
return interPodAffinity.CalculateInterPodAffinityPriority
|
return interPodAffinity.CalculateInterPodAffinityPriority
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Share it with predicates by moving to better location.
|
type podAffinityPriorityMap struct {
|
||||||
// TODO: Can we avoid error handling here - this is only a matter of non-parsable selector?
|
// nodes contain all nodes that should be considered
|
||||||
func podMatchesNamespaceAndSelector(pod *api.Pod, affinityPod *api.Pod, term *api.PodAffinityTerm) (bool, error) {
|
nodes []*api.Node
|
||||||
namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(affinityPod, *term)
|
// counts store the mapping from node name to so-far computed score of
|
||||||
if len(namespaces) != 0 && !namespaces.Has(pod.Namespace) {
|
// the node.
|
||||||
return false, nil
|
counts map[string]float64
|
||||||
|
// failureDomains contain default failure domains keys
|
||||||
|
failureDomains priorityutil.Topologies
|
||||||
|
// The first error that we faced.
|
||||||
|
firstError error
|
||||||
}
|
}
|
||||||
|
|
||||||
selector, err := unversioned.LabelSelectorAsSelector(term.LabelSelector)
|
func newPodAffinityPriorityMap(nodes []*api.Node, failureDomains priorityutil.Topologies) *podAffinityPriorityMap {
|
||||||
if err != nil || !selector.Matches(labels.Set(pod.Labels)) {
|
return &podAffinityPriorityMap{
|
||||||
return false, err
|
nodes: nodes,
|
||||||
|
counts: make(map[string]float64, len(nodes)),
|
||||||
|
failureDomains: failureDomains,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *podAffinityPriorityMap) processTerm(term *api.PodAffinityTerm, podDefiningAffinityTerm, podToCheck *api.Pod, fixedNode *api.Node, weight float64) {
|
||||||
|
match, err := priorityutil.PodMatchesTermsNamespaceAndSelector(podToCheck, podDefiningAffinityTerm, term)
|
||||||
|
if err != nil {
|
||||||
|
if p.firstError == nil {
|
||||||
|
p.firstError = err
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if match {
|
||||||
|
for _, node := range p.nodes {
|
||||||
|
if p.failureDomains.NodesHaveSameTopologyKey(node, fixedNode, term.TopologyKey) {
|
||||||
|
p.counts[node.Name] += weight
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *podAffinityPriorityMap) processTerms(terms []api.WeightedPodAffinityTerm, podDefiningAffinityTerm, podToCheck *api.Pod, fixedNode *api.Node, multiplier int) {
|
||||||
|
for i := range terms {
|
||||||
|
term := &terms[i]
|
||||||
|
p.processTerm(&term.PodAffinityTerm, podDefiningAffinityTerm, podToCheck, fixedNode, float64(term.Weight*multiplier))
|
||||||
}
|
}
|
||||||
return true, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// compute a sum by iterating through the elements of weightedPodAffinityTerm and adding
|
// compute a sum by iterating through the elements of weightedPodAffinityTerm and adding
|
||||||
@ -85,32 +113,9 @@ func (ipa *InterPodAffinity) CalculateInterPodAffinityPriority(pod *api.Pod, nod
|
|||||||
// convert the topology key based weights to the node name based weights
|
// convert the topology key based weights to the node name based weights
|
||||||
var maxCount float64
|
var maxCount float64
|
||||||
var minCount float64
|
var minCount float64
|
||||||
// counts store the mapping from node name to so-far computed score of
|
// priorityMap stores the mapping from node name to so-far computed score of
|
||||||
// the node.
|
// the node.
|
||||||
counts := make(map[string]float64, len(nodes))
|
pm := newPodAffinityPriorityMap(nodes, ipa.failureDomains)
|
||||||
|
|
||||||
processTerm := func(term *api.PodAffinityTerm, affinityPod, podToCheck *api.Pod, fixedNode *api.Node, weight float64) error {
|
|
||||||
match, err := podMatchesNamespaceAndSelector(podToCheck, affinityPod, term)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if match {
|
|
||||||
for _, node := range nodes {
|
|
||||||
if ipa.failureDomains.NodesHaveSameTopologyKey(node, fixedNode, term.TopologyKey) {
|
|
||||||
counts[node.Name] += weight
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
processTerms := func(terms []api.WeightedPodAffinityTerm, affinityPod, podToCheck *api.Pod, fixedNode *api.Node, multiplier int) error {
|
|
||||||
for _, weightedTerm := range terms {
|
|
||||||
if err := processTerm(&weightedTerm.PodAffinityTerm, affinityPod, podToCheck, fixedNode, float64(weightedTerm.Weight*multiplier)); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, existingPod := range allPods {
|
for _, existingPod := range allPods {
|
||||||
existingPodNode, err := ipa.info.GetNodeInfo(existingPod.Spec.NodeName)
|
existingPodNode, err := ipa.info.GetNodeInfo(existingPod.Spec.NodeName)
|
||||||
@ -124,26 +129,22 @@ func (ipa *InterPodAffinity) CalculateInterPodAffinityPriority(pod *api.Pod, nod
|
|||||||
|
|
||||||
if affinity.PodAffinity != nil {
|
if affinity.PodAffinity != nil {
|
||||||
// For every soft pod affinity term of <pod>, if <existingPod> matches the term,
|
// For every soft pod affinity term of <pod>, if <existingPod> matches the term,
|
||||||
// increment <counts> for every node in the cluster with the same <term.TopologyKey>
|
// increment <pm.counts> for every node in the cluster with the same <term.TopologyKey>
|
||||||
// value as that of <existingPods>`s node by the term`s weight.
|
// value as that of <existingPods>`s node by the term`s weight.
|
||||||
terms := affinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution
|
terms := affinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution
|
||||||
if err := processTerms(terms, pod, existingPod, existingPodNode, 1); err != nil {
|
pm.processTerms(terms, pod, existingPod, existingPodNode, 1)
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if affinity.PodAntiAffinity != nil {
|
if affinity.PodAntiAffinity != nil {
|
||||||
// For every soft pod anti-affinity term of <pod>, if <existingPod> matches the term,
|
// For every soft pod anti-affinity term of <pod>, if <existingPod> matches the term,
|
||||||
// decrement <counts> for every node in the cluster with the same <term.TopologyKey>
|
// decrement <pm.counts> for every node in the cluster with the same <term.TopologyKey>
|
||||||
// value as that of <existingPod>`s node by the term`s weight.
|
// value as that of <existingPod>`s node by the term`s weight.
|
||||||
terms := affinity.PodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution
|
terms := affinity.PodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution
|
||||||
if err := processTerms(terms, pod, existingPod, existingPodNode, -1); err != nil {
|
pm.processTerms(terms, pod, existingPod, existingPodNode, -1)
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if existingPodAffinity.PodAffinity != nil {
|
if existingPodAffinity.PodAffinity != nil {
|
||||||
// For every hard pod affinity term of <existingPod>, if <pod> matches the term,
|
// For every hard pod affinity term of <existingPod>, if <pod> matches the term,
|
||||||
// increment <counts> for every node in the cluster with the same <term.TopologyKey>
|
// increment <pm.counts> for every node in the cluster with the same <term.TopologyKey>
|
||||||
// value as that of <existingPod>'s node by the constant <ipa.hardPodAffinityWeight>
|
// value as that of <existingPod>'s node by the constant <ipa.hardPodAffinityWeight>
|
||||||
if ipa.hardPodAffinityWeight > 0 {
|
if ipa.hardPodAffinityWeight > 0 {
|
||||||
terms := existingPodAffinity.PodAffinity.RequiredDuringSchedulingIgnoredDuringExecution
|
terms := existingPodAffinity.PodAffinity.RequiredDuringSchedulingIgnoredDuringExecution
|
||||||
@ -152,36 +153,33 @@ func (ipa *InterPodAffinity) CalculateInterPodAffinityPriority(pod *api.Pod, nod
|
|||||||
// terms = append(terms, existingPodAffinity.PodAffinity.RequiredDuringSchedulingRequiredDuringExecution...)
|
// terms = append(terms, existingPodAffinity.PodAffinity.RequiredDuringSchedulingRequiredDuringExecution...)
|
||||||
//}
|
//}
|
||||||
for _, term := range terms {
|
for _, term := range terms {
|
||||||
if err := processTerm(&term, existingPod, pod, existingPodNode, float64(ipa.hardPodAffinityWeight)); err != nil {
|
pm.processTerm(&term, existingPod, pod, existingPodNode, float64(ipa.hardPodAffinityWeight))
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// For every soft pod affinity term of <existingPod>, if <pod> matches the term,
|
// For every soft pod affinity term of <existingPod>, if <pod> matches the term,
|
||||||
// increment <counts> for every node in the cluster with the same <term.TopologyKey>
|
// increment <pm.counts> for every node in the cluster with the same <term.TopologyKey>
|
||||||
// value as that of <existingPod>'s node by the term's weight.
|
// value as that of <existingPod>'s node by the term's weight.
|
||||||
terms := existingPodAffinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution
|
terms := existingPodAffinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution
|
||||||
if err := processTerms(terms, existingPod, pod, existingPodNode, 1); err != nil {
|
pm.processTerms(terms, existingPod, pod, existingPodNode, 1)
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if existingPodAffinity.PodAntiAffinity != nil {
|
if existingPodAffinity.PodAntiAffinity != nil {
|
||||||
// For every soft pod anti-affinity term of <existingPod>, if <pod> matches the term,
|
// For every soft pod anti-affinity term of <existingPod>, if <pod> matches the term,
|
||||||
// decrement <counts> for every node in the cluster with the same <term.TopologyKey>
|
// decrement <pm.counts> for every node in the cluster with the same <term.TopologyKey>
|
||||||
// value as that of <existingPod>'s node by the term's weight.
|
// value as that of <existingPod>'s node by the term's weight.
|
||||||
terms := existingPodAffinity.PodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution
|
terms := existingPodAffinity.PodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution
|
||||||
if err := processTerms(terms, existingPod, pod, existingPodNode, -1); err != nil {
|
pm.processTerms(terms, existingPod, pod, existingPodNode, -1)
|
||||||
return nil, err
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if pm.firstError != nil {
|
||||||
|
return nil, pm.firstError
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, node := range nodes {
|
for _, node := range nodes {
|
||||||
if counts[node.Name] > maxCount {
|
if pm.counts[node.Name] > maxCount {
|
||||||
maxCount = counts[node.Name]
|
maxCount = pm.counts[node.Name]
|
||||||
}
|
}
|
||||||
if counts[node.Name] < minCount {
|
if pm.counts[node.Name] < minCount {
|
||||||
minCount = counts[node.Name]
|
minCount = pm.counts[node.Name]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -190,7 +188,7 @@ func (ipa *InterPodAffinity) CalculateInterPodAffinityPriority(pod *api.Pod, nod
|
|||||||
for _, node := range nodes {
|
for _, node := range nodes {
|
||||||
fScore := float64(0)
|
fScore := float64(0)
|
||||||
if (maxCount - minCount) > 0 {
|
if (maxCount - minCount) > 0 {
|
||||||
fScore = 10 * ((counts[node.Name] - minCount) / (maxCount - minCount))
|
fScore = 10 * ((pm.counts[node.Name] - minCount) / (maxCount - minCount))
|
||||||
}
|
}
|
||||||
result = append(result, schedulerapi.HostPriority{Host: node.Name, Score: int(fScore)})
|
result = append(result, schedulerapi.HostPriority{Host: node.Name, Score: int(fScore)})
|
||||||
if glog.V(10) {
|
if glog.V(10) {
|
||||||
|
@ -23,26 +23,11 @@ import (
|
|||||||
"k8s.io/kubernetes/pkg/util/sets"
|
"k8s.io/kubernetes/pkg/util/sets"
|
||||||
)
|
)
|
||||||
|
|
||||||
// FilterPodsByNameSpaces filters the pods based the given list of namespaces,
|
|
||||||
// empty set of namespaces means all namespaces.
|
|
||||||
func FilterPodsByNameSpaces(names sets.String, pods []*api.Pod) []*api.Pod {
|
|
||||||
if len(pods) == 0 || len(names) == 0 {
|
|
||||||
return pods
|
|
||||||
}
|
|
||||||
result := []*api.Pod{}
|
|
||||||
for _, pod := range pods {
|
|
||||||
if names.Has(pod.Namespace) {
|
|
||||||
result = append(result, pod)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return result
|
|
||||||
}
|
|
||||||
|
|
||||||
// GetNamespacesFromPodAffinityTerm returns a set of names
|
// GetNamespacesFromPodAffinityTerm returns a set of names
|
||||||
// according to the namespaces indicated in podAffinityTerm.
|
// according to the namespaces indicated in podAffinityTerm.
|
||||||
// if the NameSpaces is nil considers the given pod's namespace
|
// 1. If the namespaces is nil considers the given pod's namespace
|
||||||
// if the Namespaces is empty list then considers all the namespaces
|
// 2. If the namespaces is empty list then considers all the namespaces
|
||||||
func GetNamespacesFromPodAffinityTerm(pod *api.Pod, podAffinityTerm api.PodAffinityTerm) sets.String {
|
func getNamespacesFromPodAffinityTerm(pod *api.Pod, podAffinityTerm api.PodAffinityTerm) sets.String {
|
||||||
names := sets.String{}
|
names := sets.String{}
|
||||||
if podAffinityTerm.Namespaces == nil {
|
if podAffinityTerm.Namespaces == nil {
|
||||||
names.Insert(pod.Namespace)
|
names.Insert(pod.Namespace)
|
||||||
@ -52,6 +37,21 @@ func GetNamespacesFromPodAffinityTerm(pod *api.Pod, podAffinityTerm api.PodAffin
|
|||||||
return names
|
return names
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// PodMatchesTermsNamespaceAndSelector returns true if the given <pod>
|
||||||
|
// matches the namespace and selector defined by <affinityPod>`s <term>.
|
||||||
|
func PodMatchesTermsNamespaceAndSelector(pod *api.Pod, affinityPod *api.Pod, term *api.PodAffinityTerm) (bool, error) {
|
||||||
|
namespaces := getNamespacesFromPodAffinityTerm(affinityPod, *term)
|
||||||
|
if len(namespaces) != 0 && !namespaces.Has(pod.Namespace) {
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
selector, err := unversioned.LabelSelectorAsSelector(term.LabelSelector)
|
||||||
|
if err != nil || !selector.Matches(labels.Set(pod.Labels)) {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
|
||||||
// nodesHaveSameTopologyKeyInternal checks if nodeA and nodeB have same label value with given topologyKey as label key.
|
// nodesHaveSameTopologyKeyInternal checks if nodeA and nodeB have same label value with given topologyKey as label key.
|
||||||
func nodesHaveSameTopologyKeyInternal(nodeA, nodeB *api.Node, topologyKey string) bool {
|
func nodesHaveSameTopologyKeyInternal(nodeA, nodeB *api.Node, topologyKey string) bool {
|
||||||
return nodeA.Labels != nil && nodeB.Labels != nil && len(nodeA.Labels[topologyKey]) > 0 && nodeA.Labels[topologyKey] == nodeB.Labels[topologyKey]
|
return nodeA.Labels != nil && nodeB.Labels != nil && len(nodeA.Labels[topologyKey]) > 0 && nodeA.Labels[topologyKey] == nodeB.Labels[topologyKey]
|
||||||
@ -76,20 +76,3 @@ func (tps *Topologies) NodesHaveSameTopologyKey(nodeA, nodeB *api.Node, topology
|
|||||||
return nodesHaveSameTopologyKeyInternal(nodeA, nodeB, topologyKey)
|
return nodesHaveSameTopologyKeyInternal(nodeA, nodeB, topologyKey)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// CheckIfPodMatchPodAffinityTerm checks if podB's affinity request is compatible with podA
|
|
||||||
// TODO: Get rid this method. We should avoid computing Namespaces and selectors multiple times
|
|
||||||
// and check them on higher levels and then use NodesHaveSameTopologyKey method.
|
|
||||||
func (tps *Topologies) CheckIfPodMatchPodAffinityTerm(podA *api.Pod, nodeA, nodeB *api.Node, podB *api.Pod, podBAffinityTerm api.PodAffinityTerm) (bool, error) {
|
|
||||||
names := GetNamespacesFromPodAffinityTerm(podB, podBAffinityTerm)
|
|
||||||
if len(names) != 0 && !names.Has(podA.Namespace) {
|
|
||||||
return false, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
labelSelector, err := unversioned.LabelSelectorAsSelector(podBAffinityTerm.LabelSelector)
|
|
||||||
if err != nil || !labelSelector.Matches(labels.Set(podA.Labels)) {
|
|
||||||
return false, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return tps.NodesHaveSameTopologyKey(nodeA, nodeB, podBAffinityTerm.TopologyKey), nil
|
|
||||||
}
|
|
||||||
|
Loading…
Reference in New Issue
Block a user