mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-29 22:46:12 +00:00
Merge pull request #29539 from wojtek-t/optimize_pod_affinity_3
Automatic merge from submit-queue Optimize PodAffinity priority function
This commit is contained in:
commit
27af240d4f
@ -436,15 +436,16 @@ const (
|
|||||||
|
|
||||||
// GetAffinityFromPod gets the json serialized affinity data from Pod.Annotations
|
// GetAffinityFromPod gets the json serialized affinity data from Pod.Annotations
|
||||||
// and converts it to the Affinity type in api.
|
// and converts it to the Affinity type in api.
|
||||||
func GetAffinityFromPodAnnotations(annotations map[string]string) (Affinity, error) {
|
func GetAffinityFromPodAnnotations(annotations map[string]string) (*Affinity, error) {
|
||||||
var affinity Affinity
|
|
||||||
if len(annotations) > 0 && annotations[AffinityAnnotationKey] != "" {
|
if len(annotations) > 0 && annotations[AffinityAnnotationKey] != "" {
|
||||||
|
var affinity Affinity
|
||||||
err := json.Unmarshal([]byte(annotations[AffinityAnnotationKey]), &affinity)
|
err := json.Unmarshal([]byte(annotations[AffinityAnnotationKey]), &affinity)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return affinity, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
return &affinity, nil
|
||||||
}
|
}
|
||||||
return affinity, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetTolerationsFromPodAnnotations gets the json serialized tolerations data from Pod.Annotations
|
// GetTolerationsFromPodAnnotations gets the json serialized tolerations data from Pod.Annotations
|
||||||
|
@ -1863,6 +1863,9 @@ func ValidateAffinityInPodAnnotations(annotations map[string]string, fldPath *fi
|
|||||||
allErrs = append(allErrs, field.Invalid(fldPath, api.AffinityAnnotationKey, err.Error()))
|
allErrs = append(allErrs, field.Invalid(fldPath, api.AffinityAnnotationKey, err.Error()))
|
||||||
return allErrs
|
return allErrs
|
||||||
}
|
}
|
||||||
|
if affinity == nil {
|
||||||
|
return allErrs
|
||||||
|
}
|
||||||
|
|
||||||
affinityFldPath := fldPath.Child(api.AffinityAnnotationKey)
|
affinityFldPath := fldPath.Child(api.AffinityAnnotationKey)
|
||||||
if affinity.NodeAffinity != nil {
|
if affinity.NodeAffinity != nil {
|
||||||
|
@ -63,7 +63,7 @@ func (p *plugin) Admit(attributes admission.Attributes) (err error) {
|
|||||||
glog.V(5).Infof("Invalid Affinity detected, but we will leave handling of this to validation phase")
|
glog.V(5).Infof("Invalid Affinity detected, but we will leave handling of this to validation phase")
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
if affinity.PodAntiAffinity != nil {
|
if affinity != nil && affinity.PodAntiAffinity != nil {
|
||||||
var podAntiAffinityTerms []api.PodAffinityTerm
|
var podAntiAffinityTerms []api.PodAffinityTerm
|
||||||
if len(affinity.PodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution) != 0 {
|
if len(affinity.PodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution) != 0 {
|
||||||
podAntiAffinityTerms = affinity.PodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution
|
podAntiAffinityTerms = affinity.PodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution
|
||||||
|
@ -512,7 +512,7 @@ func podMatchesNodeLabels(pod *api.Pod, node *api.Node) bool {
|
|||||||
// 5. zero-length non-nil []NodeSelectorRequirement matches no nodes also, just for simplicity
|
// 5. zero-length non-nil []NodeSelectorRequirement matches no nodes also, just for simplicity
|
||||||
// 6. non-nil empty NodeSelectorRequirement is not allowed
|
// 6. non-nil empty NodeSelectorRequirement is not allowed
|
||||||
nodeAffinityMatches := true
|
nodeAffinityMatches := true
|
||||||
if affinity.NodeAffinity != nil {
|
if affinity != nil && affinity.NodeAffinity != nil {
|
||||||
nodeAffinity := affinity.NodeAffinity
|
nodeAffinity := affinity.NodeAffinity
|
||||||
// if no required NodeAffinity requirements, will do no-op, means select all nodes.
|
// if no required NodeAffinity requirements, will do no-op, means select all nodes.
|
||||||
// TODO: Replace next line with subsequent commented-out line when implement RequiredDuringSchedulingRequiredDuringExecution.
|
// TODO: Replace next line with subsequent commented-out line when implement RequiredDuringSchedulingRequiredDuringExecution.
|
||||||
@ -809,14 +809,19 @@ func (checker *PodAffinityChecker) InterPodAffinityMatches(pod *api.Pod, meta in
|
|||||||
|
|
||||||
// Check if the current node match the inter-pod affinity scheduling constraints.
|
// Check if the current node match the inter-pod affinity scheduling constraints.
|
||||||
// Hard inter-pod affinity is not symmetric, check only when affinity.PodAffinity exists.
|
// Hard inter-pod affinity is not symmetric, check only when affinity.PodAffinity exists.
|
||||||
if affinity.PodAffinity != nil {
|
if affinity != nil && affinity.PodAffinity != nil {
|
||||||
if !checker.NodeMatchesHardPodAffinity(pod, allPods, node, affinity.PodAffinity) {
|
if !checker.NodeMatchesHardPodAffinity(pod, allPods, node, affinity.PodAffinity) {
|
||||||
return false, ErrPodAffinityNotMatch
|
return false, ErrPodAffinityNotMatch
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Hard inter-pod anti-affinity is symmetric, we should always check it.
|
// Hard inter-pod anti-affinity is symmetric, we should always check it
|
||||||
if !checker.NodeMatchesHardPodAntiAffinity(pod, allPods, node, affinity.PodAntiAffinity) {
|
// (also when affinity or affinity.PodAntiAffinity is nil).
|
||||||
|
var antiAffinity *api.PodAntiAffinity
|
||||||
|
if affinity != nil {
|
||||||
|
antiAffinity = affinity.PodAntiAffinity
|
||||||
|
}
|
||||||
|
if !checker.NodeMatchesHardPodAntiAffinity(pod, allPods, node, antiAffinity) {
|
||||||
return false, ErrPodAffinityNotMatch
|
return false, ErrPodAffinityNotMatch
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -933,6 +938,9 @@ func (checker *PodAffinityChecker) NodeMatchesHardPodAntiAffinity(pod *api.Pod,
|
|||||||
glog.V(10).Infof("Failed to get Affinity from Pod %+v, err: %+v", podName(pod), err)
|
glog.V(10).Infof("Failed to get Affinity from Pod %+v, err: %+v", podName(pod), err)
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
if epAffinity == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
epNode, err := checker.info.GetNodeInfo(ep.Spec.NodeName)
|
epNode, err := checker.info.GetNodeInfo(ep.Spec.NodeName)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.V(10).Infof("Failed to get node from Pod %+v, err: %+v", podName(ep), err)
|
glog.V(10).Infof("Failed to get node from Pod %+v, err: %+v", podName(ep), err)
|
||||||
|
@ -2500,7 +2500,7 @@ func TestInterPodAffinityWithMultipleNodes(t *testing.T) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("unexpected error: %v", err)
|
t.Errorf("unexpected error: %v", err)
|
||||||
}
|
}
|
||||||
if affinity.NodeAffinity != nil {
|
if affinity != nil && affinity.NodeAffinity != nil {
|
||||||
nodeInfo := schedulercache.NewNodeInfo()
|
nodeInfo := schedulercache.NewNodeInfo()
|
||||||
nodeInfo.SetNode(&node)
|
nodeInfo.SetNode(&node)
|
||||||
fits2, err := PodSelectorMatches(test.pod, PredicateMetadata(test.pod), nodeInfo)
|
fits2, err := PodSelectorMatches(test.pod, PredicateMetadata(test.pod), nodeInfo)
|
||||||
|
@ -17,9 +17,11 @@ limitations under the License.
|
|||||||
package priorities
|
package priorities
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"sync"
|
||||||
|
|
||||||
"github.com/golang/glog"
|
"github.com/golang/glog"
|
||||||
"k8s.io/kubernetes/pkg/api"
|
"k8s.io/kubernetes/pkg/api"
|
||||||
"k8s.io/kubernetes/pkg/labels"
|
"k8s.io/kubernetes/pkg/util/workqueue"
|
||||||
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
|
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
|
||||||
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates"
|
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates"
|
||||||
priorityutil "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities/util"
|
priorityutil "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities/util"
|
||||||
@ -52,6 +54,8 @@ func NewInterPodAffinityPriority(
|
|||||||
}
|
}
|
||||||
|
|
||||||
type podAffinityPriorityMap struct {
|
type podAffinityPriorityMap struct {
|
||||||
|
sync.Mutex
|
||||||
|
|
||||||
// nodes contain all nodes that should be considered
|
// nodes contain all nodes that should be considered
|
||||||
nodes []*api.Node
|
nodes []*api.Node
|
||||||
// counts store the mapping from node name to so-far computed score of
|
// counts store the mapping from node name to so-far computed score of
|
||||||
@ -71,20 +75,30 @@ func newPodAffinityPriorityMap(nodes []*api.Node, failureDomains priorityutil.To
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (p *podAffinityPriorityMap) setError(err error) {
|
||||||
|
p.Lock()
|
||||||
|
defer p.Unlock()
|
||||||
|
if p.firstError == nil {
|
||||||
|
p.firstError = err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (p *podAffinityPriorityMap) processTerm(term *api.PodAffinityTerm, podDefiningAffinityTerm, podToCheck *api.Pod, fixedNode *api.Node, weight float64) {
|
func (p *podAffinityPriorityMap) processTerm(term *api.PodAffinityTerm, podDefiningAffinityTerm, podToCheck *api.Pod, fixedNode *api.Node, weight float64) {
|
||||||
match, err := priorityutil.PodMatchesTermsNamespaceAndSelector(podToCheck, podDefiningAffinityTerm, term)
|
match, err := priorityutil.PodMatchesTermsNamespaceAndSelector(podToCheck, podDefiningAffinityTerm, term)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if p.firstError == nil {
|
p.setError(err)
|
||||||
p.firstError = err
|
|
||||||
}
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if match {
|
if match {
|
||||||
for _, node := range p.nodes {
|
func() {
|
||||||
if p.failureDomains.NodesHaveSameTopologyKey(node, fixedNode, term.TopologyKey) {
|
p.Lock()
|
||||||
p.counts[node.Name] += weight
|
defer p.Unlock()
|
||||||
|
for _, node := range p.nodes {
|
||||||
|
if p.failureDomains.NodesHaveSameTopologyKey(node, fixedNode, term.TopologyKey) {
|
||||||
|
p.counts[node.Name] += weight
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -101,14 +115,17 @@ func (p *podAffinityPriorityMap) processTerms(terms []api.WeightedPodAffinityTer
|
|||||||
// Symmetry need to be considered for preferredDuringSchedulingIgnoredDuringExecution from podAffinity & podAntiAffinity,
|
// Symmetry need to be considered for preferredDuringSchedulingIgnoredDuringExecution from podAffinity & podAntiAffinity,
|
||||||
// symmetry need to be considered for hard requirements from podAffinity
|
// symmetry need to be considered for hard requirements from podAffinity
|
||||||
func (ipa *InterPodAffinity) CalculateInterPodAffinityPriority(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*api.Node) (schedulerapi.HostPriorityList, error) {
|
func (ipa *InterPodAffinity) CalculateInterPodAffinityPriority(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*api.Node) (schedulerapi.HostPriorityList, error) {
|
||||||
allPods, err := ipa.podLister.List(labels.Everything())
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
affinity, err := api.GetAffinityFromPodAnnotations(pod.Annotations)
|
affinity, err := api.GetAffinityFromPodAnnotations(pod.Annotations)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
hasAffinityConstraints := affinity != nil && affinity.PodAffinity != nil
|
||||||
|
hasAntiAffinityConstraints := affinity != nil && affinity.PodAntiAffinity != nil
|
||||||
|
|
||||||
|
allNodeNames := make([]string, 0, len(nodeNameToInfo))
|
||||||
|
for name := range nodeNameToInfo {
|
||||||
|
allNodeNames = append(allNodeNames, name)
|
||||||
|
}
|
||||||
|
|
||||||
// convert the topology key based weights to the node name based weights
|
// convert the topology key based weights to the node name based weights
|
||||||
var maxCount float64
|
var maxCount float64
|
||||||
@ -117,24 +134,26 @@ func (ipa *InterPodAffinity) CalculateInterPodAffinityPriority(pod *api.Pod, nod
|
|||||||
// the node.
|
// the node.
|
||||||
pm := newPodAffinityPriorityMap(nodes, ipa.failureDomains)
|
pm := newPodAffinityPriorityMap(nodes, ipa.failureDomains)
|
||||||
|
|
||||||
for _, existingPod := range allPods {
|
processPod := func(existingPod *api.Pod) error {
|
||||||
existingPodNode, err := ipa.info.GetNodeInfo(existingPod.Spec.NodeName)
|
existingPodNode, err := ipa.info.GetNodeInfo(existingPod.Spec.NodeName)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return err
|
||||||
}
|
}
|
||||||
existingPodAffinity, err := api.GetAffinityFromPodAnnotations(existingPod.Annotations)
|
existingPodAffinity, err := api.GetAffinityFromPodAnnotations(existingPod.Annotations)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return err
|
||||||
}
|
}
|
||||||
|
existingHasAffinityConstraints := existingPodAffinity != nil && existingPodAffinity.PodAffinity != nil
|
||||||
|
existingHasAntiAffinityConstraints := existingPodAffinity != nil && existingPodAffinity.PodAntiAffinity != nil
|
||||||
|
|
||||||
if affinity.PodAffinity != nil {
|
if hasAffinityConstraints {
|
||||||
// For every soft pod affinity term of <pod>, if <existingPod> matches the term,
|
// For every soft pod affinity term of <pod>, if <existingPod> matches the term,
|
||||||
// increment <pm.counts> for every node in the cluster with the same <term.TopologyKey>
|
// increment <pm.counts> for every node in the cluster with the same <term.TopologyKey>
|
||||||
// value as that of <existingPods>`s node by the term`s weight.
|
// value as that of <existingPods>`s node by the term`s weight.
|
||||||
terms := affinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution
|
terms := affinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution
|
||||||
pm.processTerms(terms, pod, existingPod, existingPodNode, 1)
|
pm.processTerms(terms, pod, existingPod, existingPodNode, 1)
|
||||||
}
|
}
|
||||||
if affinity.PodAntiAffinity != nil {
|
if hasAntiAffinityConstraints {
|
||||||
// For every soft pod anti-affinity term of <pod>, if <existingPod> matches the term,
|
// For every soft pod anti-affinity term of <pod>, if <existingPod> matches the term,
|
||||||
// decrement <pm.counts> for every node in the cluster with the same <term.TopologyKey>
|
// decrement <pm.counts> for every node in the cluster with the same <term.TopologyKey>
|
||||||
// value as that of <existingPod>`s node by the term`s weight.
|
// value as that of <existingPod>`s node by the term`s weight.
|
||||||
@ -142,7 +161,7 @@ func (ipa *InterPodAffinity) CalculateInterPodAffinityPriority(pod *api.Pod, nod
|
|||||||
pm.processTerms(terms, pod, existingPod, existingPodNode, -1)
|
pm.processTerms(terms, pod, existingPod, existingPodNode, -1)
|
||||||
}
|
}
|
||||||
|
|
||||||
if existingPodAffinity.PodAffinity != nil {
|
if existingHasAffinityConstraints {
|
||||||
// For every hard pod affinity term of <existingPod>, if <pod> matches the term,
|
// For every hard pod affinity term of <existingPod>, if <pod> matches the term,
|
||||||
// increment <pm.counts> for every node in the cluster with the same <term.TopologyKey>
|
// increment <pm.counts> for every node in the cluster with the same <term.TopologyKey>
|
||||||
// value as that of <existingPod>'s node by the constant <ipa.hardPodAffinityWeight>
|
// value as that of <existingPod>'s node by the constant <ipa.hardPodAffinityWeight>
|
||||||
@ -162,14 +181,35 @@ func (ipa *InterPodAffinity) CalculateInterPodAffinityPriority(pod *api.Pod, nod
|
|||||||
terms := existingPodAffinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution
|
terms := existingPodAffinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution
|
||||||
pm.processTerms(terms, existingPod, pod, existingPodNode, 1)
|
pm.processTerms(terms, existingPod, pod, existingPodNode, 1)
|
||||||
}
|
}
|
||||||
if existingPodAffinity.PodAntiAffinity != nil {
|
if existingHasAntiAffinityConstraints {
|
||||||
// For every soft pod anti-affinity term of <existingPod>, if <pod> matches the term,
|
// For every soft pod anti-affinity term of <existingPod>, if <pod> matches the term,
|
||||||
// decrement <pm.counts> for every node in the cluster with the same <term.TopologyKey>
|
// decrement <pm.counts> for every node in the cluster with the same <term.TopologyKey>
|
||||||
// value as that of <existingPod>'s node by the term's weight.
|
// value as that of <existingPod>'s node by the term's weight.
|
||||||
terms := existingPodAffinity.PodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution
|
terms := existingPodAffinity.PodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution
|
||||||
pm.processTerms(terms, existingPod, pod, existingPodNode, -1)
|
pm.processTerms(terms, existingPod, pod, existingPodNode, -1)
|
||||||
}
|
}
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
processNode := func(i int) {
|
||||||
|
nodeInfo := nodeNameToInfo[allNodeNames[i]]
|
||||||
|
if hasAffinityConstraints || hasAntiAffinityConstraints {
|
||||||
|
// We need to process all the nodes.
|
||||||
|
for _, existingPod := range nodeInfo.Pods() {
|
||||||
|
if err := processPod(existingPod); err != nil {
|
||||||
|
pm.setError(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// The pod doesn't have any constraints - we need to check only existing
|
||||||
|
// ones that have some.
|
||||||
|
for _, existingPod := range nodeInfo.PodsWithAffinity() {
|
||||||
|
if err := processPod(existingPod); err != nil {
|
||||||
|
pm.setError(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
workqueue.Parallelize(16, len(allNodeNames), processNode)
|
||||||
if pm.firstError != nil {
|
if pm.firstError != nil {
|
||||||
return nil, pm.firstError
|
return nil, pm.firstError
|
||||||
}
|
}
|
||||||
|
@ -496,7 +496,7 @@ func TestInterPodAffinityPriority(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods)
|
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
|
||||||
interPodAffinity := InterPodAffinity{
|
interPodAffinity := InterPodAffinity{
|
||||||
info: FakeNodeListInfo(test.nodes),
|
info: FakeNodeListInfo(test.nodes),
|
||||||
nodeLister: algorithm.FakeNodeLister(test.nodes),
|
nodeLister: algorithm.FakeNodeLister(test.nodes),
|
||||||
@ -585,7 +585,7 @@ func TestHardPodAffinitySymmetricWeight(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods)
|
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
|
||||||
ipa := InterPodAffinity{
|
ipa := InterPodAffinity{
|
||||||
info: FakeNodeListInfo(test.nodes),
|
info: FakeNodeListInfo(test.nodes),
|
||||||
nodeLister: algorithm.FakeNodeLister(test.nodes),
|
nodeLister: algorithm.FakeNodeLister(test.nodes),
|
||||||
@ -669,7 +669,7 @@ func TestSoftPodAntiAffinityWithFailureDomains(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods)
|
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
|
||||||
ipa := InterPodAffinity{
|
ipa := InterPodAffinity{
|
||||||
info: FakeNodeListInfo(test.nodes),
|
info: FakeNodeListInfo(test.nodes),
|
||||||
nodeLister: algorithm.FakeNodeLister(test.nodes),
|
nodeLister: algorithm.FakeNodeLister(test.nodes),
|
||||||
|
@ -41,7 +41,7 @@ func CalculateNodeAffinityPriority(pod *api.Pod, nodeNameToInfo map[string]*sche
|
|||||||
// A nil element of PreferredDuringSchedulingIgnoredDuringExecution matches no objects.
|
// A nil element of PreferredDuringSchedulingIgnoredDuringExecution matches no objects.
|
||||||
// An element of PreferredDuringSchedulingIgnoredDuringExecution that refers to an
|
// An element of PreferredDuringSchedulingIgnoredDuringExecution that refers to an
|
||||||
// empty PreferredSchedulingTerm matches all objects.
|
// empty PreferredSchedulingTerm matches all objects.
|
||||||
if affinity.NodeAffinity != nil && affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution != nil {
|
if affinity != nil && affinity.NodeAffinity != nil && affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution != nil {
|
||||||
// Match PreferredDuringSchedulingIgnoredDuringExecution term by term.
|
// Match PreferredDuringSchedulingIgnoredDuringExecution term by term.
|
||||||
for _, preferredSchedulingTerm := range affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution {
|
for _, preferredSchedulingTerm := range affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution {
|
||||||
if preferredSchedulingTerm.Weight == 0 {
|
if preferredSchedulingTerm.Weight == 0 {
|
||||||
|
@ -155,7 +155,7 @@ func TestNodeAffinityPriority(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
list, err := CalculateNodeAffinityPriority(test.pod, schedulercache.CreateNodeNameToInfoMap(nil), test.nodes)
|
list, err := CalculateNodeAffinityPriority(test.pod, schedulercache.CreateNodeNameToInfoMap(nil, test.nodes), test.nodes)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("unexpected error: %v", err)
|
t.Errorf("unexpected error: %v", err)
|
||||||
}
|
}
|
||||||
|
@ -138,13 +138,7 @@ func TestZeroRequest(t *testing.T) {
|
|||||||
|
|
||||||
const expectedPriority int = 25
|
const expectedPriority int = 25
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods)
|
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
|
||||||
for _, node := range test.nodes {
|
|
||||||
if _, ok := nodeNameToInfo[node.Name]; !ok {
|
|
||||||
nodeNameToInfo[node.Name] = schedulercache.NewNodeInfo()
|
|
||||||
}
|
|
||||||
nodeNameToInfo[node.Name].SetNode(node)
|
|
||||||
}
|
|
||||||
list, err := scheduler.PrioritizeNodes(
|
list, err := scheduler.PrioritizeNodes(
|
||||||
test.pod,
|
test.pod,
|
||||||
nodeNameToInfo,
|
nodeNameToInfo,
|
||||||
@ -395,13 +389,7 @@ func TestLeastRequested(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods)
|
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
|
||||||
for _, node := range test.nodes {
|
|
||||||
if _, ok := nodeNameToInfo[node.Name]; !ok {
|
|
||||||
nodeNameToInfo[node.Name] = schedulercache.NewNodeInfo()
|
|
||||||
}
|
|
||||||
nodeNameToInfo[node.Name].SetNode(node)
|
|
||||||
}
|
|
||||||
list, err := LeastRequestedPriority(test.pod, nodeNameToInfo, test.nodes)
|
list, err := LeastRequestedPriority(test.pod, nodeNameToInfo, test.nodes)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("unexpected error: %v", err)
|
t.Errorf("unexpected error: %v", err)
|
||||||
@ -734,13 +722,7 @@ func TestBalancedResourceAllocation(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods)
|
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
|
||||||
for _, node := range test.nodes {
|
|
||||||
if _, ok := nodeNameToInfo[node.Name]; !ok {
|
|
||||||
nodeNameToInfo[node.Name] = schedulercache.NewNodeInfo()
|
|
||||||
}
|
|
||||||
nodeNameToInfo[node.Name].SetNode(node)
|
|
||||||
}
|
|
||||||
list, err := BalancedResourceAllocation(test.pod, nodeNameToInfo, test.nodes)
|
list, err := BalancedResourceAllocation(test.pod, nodeNameToInfo, test.nodes)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("unexpected error: %v", err)
|
t.Errorf("unexpected error: %v", err)
|
||||||
@ -884,7 +866,7 @@ func TestImageLocalityPriority(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods)
|
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
|
||||||
list, err := ImageLocalityPriority(test.pod, nodeNameToInfo, test.nodes)
|
list, err := ImageLocalityPriority(test.pod, nodeNameToInfo, test.nodes)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("unexpected error: %v", err)
|
t.Errorf("unexpected error: %v", err)
|
||||||
|
@ -276,7 +276,7 @@ func TestSelectorSpreadPriority(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods)
|
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, nil)
|
||||||
selectorSpread := SelectorSpread{podLister: algorithm.FakePodLister(test.pods), serviceLister: algorithm.FakeServiceLister(test.services), controllerLister: algorithm.FakeControllerLister(test.rcs), replicaSetLister: algorithm.FakeReplicaSetLister(test.rss)}
|
selectorSpread := SelectorSpread{podLister: algorithm.FakePodLister(test.pods), serviceLister: algorithm.FakeServiceLister(test.services), controllerLister: algorithm.FakeControllerLister(test.rcs), replicaSetLister: algorithm.FakeReplicaSetLister(test.rss)}
|
||||||
list, err := selectorSpread.CalculateSpreadPriority(test.pod, nodeNameToInfo, makeNodeList(test.nodes))
|
list, err := selectorSpread.CalculateSpreadPriority(test.pod, nodeNameToInfo, makeNodeList(test.nodes))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -477,7 +477,7 @@ func TestZoneSelectorSpreadPriority(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods)
|
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, nil)
|
||||||
selectorSpread := SelectorSpread{podLister: algorithm.FakePodLister(test.pods), serviceLister: algorithm.FakeServiceLister(test.services), controllerLister: algorithm.FakeControllerLister(test.rcs), replicaSetLister: algorithm.FakeReplicaSetLister(test.rss)}
|
selectorSpread := SelectorSpread{podLister: algorithm.FakePodLister(test.pods), serviceLister: algorithm.FakeServiceLister(test.services), controllerLister: algorithm.FakeControllerLister(test.rcs), replicaSetLister: algorithm.FakeReplicaSetLister(test.rss)}
|
||||||
list, err := selectorSpread.CalculateSpreadPriority(test.pod, nodeNameToInfo, makeLabeledNodeList(labeledNodes))
|
list, err := selectorSpread.CalculateSpreadPriority(test.pod, nodeNameToInfo, makeLabeledNodeList(labeledNodes))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -649,7 +649,7 @@ func TestZoneSpreadPriority(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods)
|
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, nil)
|
||||||
zoneSpread := ServiceAntiAffinity{podLister: algorithm.FakePodLister(test.pods), serviceLister: algorithm.FakeServiceLister(test.services), label: "zone"}
|
zoneSpread := ServiceAntiAffinity{podLister: algorithm.FakePodLister(test.pods), serviceLister: algorithm.FakeServiceLister(test.services), label: "zone"}
|
||||||
list, err := zoneSpread.CalculateAntiAffinityPriority(test.pod, nodeNameToInfo, makeLabeledNodeList(test.nodes))
|
list, err := zoneSpread.CalculateAntiAffinityPriority(test.pod, nodeNameToInfo, makeLabeledNodeList(test.nodes))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -210,7 +210,7 @@ func TestTaintAndToleration(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap([]*api.Pod{{}})
|
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, nil)
|
||||||
list, err := ComputeTaintTolerationPriority(test.pod, nodeNameToInfo, test.nodes)
|
list, err := ComputeTaintTolerationPriority(test.pod, nodeNameToInfo, test.nodes)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("%s, unexpected error: %v", test.test, err)
|
t.Errorf("%s, unexpected error: %v", test.test, err)
|
||||||
|
@ -33,11 +33,13 @@ type NodeInfo struct {
|
|||||||
// Overall node information.
|
// Overall node information.
|
||||||
node *api.Node
|
node *api.Node
|
||||||
|
|
||||||
|
pods []*api.Pod
|
||||||
|
podsWithAffinity []*api.Pod
|
||||||
|
|
||||||
// Total requested resource of all pods on this node.
|
// Total requested resource of all pods on this node.
|
||||||
// It includes assumed pods which scheduler sends binding to apiserver but
|
// It includes assumed pods which scheduler sends binding to apiserver but
|
||||||
// didn't get it as scheduled yet.
|
// didn't get it as scheduled yet.
|
||||||
requestedResource *Resource
|
requestedResource *Resource
|
||||||
pods []*api.Pod
|
|
||||||
nonzeroRequest *Resource
|
nonzeroRequest *Resource
|
||||||
// We store allocatedResources (which is Node.Status.Allocatable.*) explicitly
|
// We store allocatedResources (which is Node.Status.Allocatable.*) explicitly
|
||||||
// as int64, to avoid conversions and accessing map.
|
// as int64, to avoid conversions and accessing map.
|
||||||
@ -91,6 +93,14 @@ func (n *NodeInfo) Pods() []*api.Pod {
|
|||||||
return n.pods
|
return n.pods
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// PodsWithAffinity return all pods with (anti)affinity constraints on this node.
|
||||||
|
func (n *NodeInfo) PodsWithAffinity() []*api.Pod {
|
||||||
|
if n == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return n.podsWithAffinity
|
||||||
|
}
|
||||||
|
|
||||||
func (n *NodeInfo) AllowedPodNumber() int {
|
func (n *NodeInfo) AllowedPodNumber() int {
|
||||||
if n == nil {
|
if n == nil {
|
||||||
return 0
|
return 0
|
||||||
@ -126,13 +136,19 @@ func (n *NodeInfo) Clone() *NodeInfo {
|
|||||||
pods := append([]*api.Pod(nil), n.pods...)
|
pods := append([]*api.Pod(nil), n.pods...)
|
||||||
clone := &NodeInfo{
|
clone := &NodeInfo{
|
||||||
node: n.node,
|
node: n.node,
|
||||||
|
pods: pods,
|
||||||
requestedResource: &(*n.requestedResource),
|
requestedResource: &(*n.requestedResource),
|
||||||
nonzeroRequest: &(*n.nonzeroRequest),
|
nonzeroRequest: &(*n.nonzeroRequest),
|
||||||
allocatableResource: &(*n.allocatableResource),
|
allocatableResource: &(*n.allocatableResource),
|
||||||
allowedPodNumber: n.allowedPodNumber,
|
allowedPodNumber: n.allowedPodNumber,
|
||||||
pods: pods,
|
|
||||||
generation: n.generation,
|
generation: n.generation,
|
||||||
}
|
}
|
||||||
|
if len(n.pods) > 0 {
|
||||||
|
clone.pods = append([]*api.Pod(nil), n.pods...)
|
||||||
|
}
|
||||||
|
if len(n.podsWithAffinity) > 0 {
|
||||||
|
clone.podsWithAffinity = append([]*api.Pod(nil), n.podsWithAffinity...)
|
||||||
|
}
|
||||||
return clone
|
return clone
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -145,6 +161,14 @@ func (n *NodeInfo) String() string {
|
|||||||
return fmt.Sprintf("&NodeInfo{Pods:%v, RequestedResource:%#v, NonZeroRequest: %#v}", podKeys, n.requestedResource, n.nonzeroRequest)
|
return fmt.Sprintf("&NodeInfo{Pods:%v, RequestedResource:%#v, NonZeroRequest: %#v}", podKeys, n.requestedResource, n.nonzeroRequest)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func hasPodAffinityConstraints(pod *api.Pod) bool {
|
||||||
|
affinity, err := api.GetAffinityFromPodAnnotations(pod.Annotations)
|
||||||
|
if err != nil || affinity == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return affinity.PodAffinity != nil || affinity.PodAntiAffinity != nil
|
||||||
|
}
|
||||||
|
|
||||||
// addPod adds pod information to this NodeInfo.
|
// addPod adds pod information to this NodeInfo.
|
||||||
func (n *NodeInfo) addPod(pod *api.Pod) {
|
func (n *NodeInfo) addPod(pod *api.Pod) {
|
||||||
cpu, mem, nvidia_gpu, non0_cpu, non0_mem := calculateResource(pod)
|
cpu, mem, nvidia_gpu, non0_cpu, non0_mem := calculateResource(pod)
|
||||||
@ -154,6 +178,9 @@ func (n *NodeInfo) addPod(pod *api.Pod) {
|
|||||||
n.nonzeroRequest.MilliCPU += non0_cpu
|
n.nonzeroRequest.MilliCPU += non0_cpu
|
||||||
n.nonzeroRequest.Memory += non0_mem
|
n.nonzeroRequest.Memory += non0_mem
|
||||||
n.pods = append(n.pods, pod)
|
n.pods = append(n.pods, pod)
|
||||||
|
if hasPodAffinityConstraints(pod) {
|
||||||
|
n.podsWithAffinity = append(n.podsWithAffinity, pod)
|
||||||
|
}
|
||||||
n.generation++
|
n.generation++
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -164,6 +191,19 @@ func (n *NodeInfo) removePod(pod *api.Pod) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for i := range n.podsWithAffinity {
|
||||||
|
k2, err := getPodKey(n.podsWithAffinity[i])
|
||||||
|
if err != nil {
|
||||||
|
glog.Errorf("Cannot get pod key, err: %v", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if k1 == k2 {
|
||||||
|
// delete the element
|
||||||
|
n.podsWithAffinity[i] = n.podsWithAffinity[len(n.podsWithAffinity)-1]
|
||||||
|
n.podsWithAffinity = n.podsWithAffinity[:len(n.podsWithAffinity)-1]
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
for i := range n.pods {
|
for i := range n.pods {
|
||||||
k2, err := getPodKey(n.pods[i])
|
k2, err := getPodKey(n.pods[i])
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -20,16 +20,20 @@ import "k8s.io/kubernetes/pkg/api"
|
|||||||
|
|
||||||
// CreateNodeNameToInfoMap obtains a list of pods and pivots that list into a map where the keys are node names
|
// CreateNodeNameToInfoMap obtains a list of pods and pivots that list into a map where the keys are node names
|
||||||
// and the values are the aggregated information for that node.
|
// and the values are the aggregated information for that node.
|
||||||
func CreateNodeNameToInfoMap(pods []*api.Pod) map[string]*NodeInfo {
|
func CreateNodeNameToInfoMap(pods []*api.Pod, nodes []*api.Node) map[string]*NodeInfo {
|
||||||
nodeNameToInfo := make(map[string]*NodeInfo)
|
nodeNameToInfo := make(map[string]*NodeInfo)
|
||||||
for _, pod := range pods {
|
for _, pod := range pods {
|
||||||
nodeName := pod.Spec.NodeName
|
nodeName := pod.Spec.NodeName
|
||||||
nodeInfo, ok := nodeNameToInfo[nodeName]
|
if _, ok := nodeNameToInfo[nodeName]; !ok {
|
||||||
if !ok {
|
nodeNameToInfo[nodeName] = NewNodeInfo()
|
||||||
nodeInfo = NewNodeInfo()
|
|
||||||
nodeNameToInfo[nodeName] = nodeInfo
|
|
||||||
}
|
}
|
||||||
nodeInfo.addPod(pod)
|
nodeNameToInfo[nodeName].addPod(pod)
|
||||||
|
}
|
||||||
|
for _, node := range nodes {
|
||||||
|
if _, ok := nodeNameToInfo[node.Name]; !ok {
|
||||||
|
nodeNameToInfo[node.Name] = NewNodeInfo()
|
||||||
|
}
|
||||||
|
nodeNameToInfo[node.Name].SetNode(node)
|
||||||
}
|
}
|
||||||
return nodeNameToInfo
|
return nodeNameToInfo
|
||||||
}
|
}
|
||||||
|
@ -42,7 +42,7 @@ func (p PodsToCache) UpdateNode(oldNode, newNode *api.Node) error { return nil }
|
|||||||
func (p PodsToCache) RemoveNode(node *api.Node) error { return nil }
|
func (p PodsToCache) RemoveNode(node *api.Node) error { return nil }
|
||||||
|
|
||||||
func (p PodsToCache) UpdateNodeNameToInfoMap(infoMap map[string]*schedulercache.NodeInfo) error {
|
func (p PodsToCache) UpdateNodeNameToInfoMap(infoMap map[string]*schedulercache.NodeInfo) error {
|
||||||
infoMap = schedulercache.CreateNodeNameToInfoMap(p)
|
infoMap = schedulercache.CreateNodeNameToInfoMap(p, nil)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user