From 4d7fff1257ae4594dc5db28f0b086726c8dc1b40 Mon Sep 17 00:00:00 2001 From: "Bobby (Babak) Salamat" Date: Sat, 14 Apr 2018 19:32:58 -0700 Subject: [PATCH 1/2] Add test to ensure anti-affinity matches against all terms --- .../algorithm/predicates/predicates_test.go | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/pkg/scheduler/algorithm/predicates/predicates_test.go b/pkg/scheduler/algorithm/predicates/predicates_test.go index eb952428721..1234b7cc667 100644 --- a/pkg/scheduler/algorithm/predicates/predicates_test.go +++ b/pkg/scheduler/algorithm/predicates/predicates_test.go @@ -2939,6 +2939,55 @@ func TestInterPodAffinityWithMultipleNodes(t *testing.T) { }, test: "NodeA and nodeB have same topologyKey and label value. NodeA has an existing pod that match the inter pod affinity rule. The pod can not be scheduled onto nodeA and nodeB.", }, + { + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Affinity: &v1.Affinity{ + PodAntiAffinity: &v1.PodAntiAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"abc"}, + }, + }, + }, + TopologyKey: "region", + }, + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "service", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"securityscan"}, + }, + }, + }, + TopologyKey: "zone", + }, + }, + }, + }, + }, + }, + pods: []*v1.Pod{ + {Spec: v1.PodSpec{NodeName: "nodeA"}, ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"foo": "abc", "service": "securityscan"}}}, + }, + nodes: []v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: map[string]string{"region": "r1", "zone": "z1", "hostname": "nodeA"}}}, + {ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: map[string]string{"region": "r1", "zone": "z2", "hostname": "nodeB"}}}, + }, + nodesExpectAffinityFailureReasons: [][]algorithm.PredicateFailureReason{{ErrPodAffinityNotMatch, ErrPodAntiAffinityRulesNotMatch}}, + fits: map[string]bool{ + "nodeA": false, + "nodeB": true, + }, + test: "This test ensures that anti-affinity matches a pod when all terms of the anti-affinity rule matches a pod.", + }, { pod: &v1.Pod{ Spec: v1.PodSpec{ From eb213e68c23a04fe240c354eafda09d143014db0 Mon Sep 17 00:00:00 2001 From: "Bobby (Babak) Salamat" Date: Mon, 16 Apr 2018 18:01:17 -0700 Subject: [PATCH 2/2] Fix anti-affinity issue that caused a pod to be considered a match if any of the terms matched (as opposed to all terms matched) --- .../algorithm/predicates/predicates.go | 65 +++++++++---------- 1 file changed, 29 insertions(+), 36 deletions(-) diff --git a/pkg/scheduler/algorithm/predicates/predicates.go b/pkg/scheduler/algorithm/predicates/predicates.go index f2ca8a407ec..3c5338e538a 100644 --- a/pkg/scheduler/algorithm/predicates/predicates.go +++ b/pkg/scheduler/algorithm/predicates/predicates.go @@ -1380,34 +1380,27 @@ func (c *PodAffinityChecker) satisfiesExistingPodsAntiAffinity(pod *v1.Pod, meta return nil, nil } -// anyMatchingPodInTopology checks that any of the given Pods are in the -// topology specified by the affinity term. -func (c *PodAffinityChecker) anyMatchingPodInTopology(pod *v1.Pod, matchingPods map[string][]*v1.Pod, nodeInfo *schedulercache.NodeInfo, term *v1.PodAffinityTerm) (bool, error) { - if len(term.TopologyKey) == 0 { - return false, fmt.Errorf("empty topologyKey is not allowed except for PreferredDuringScheduling pod anti-affinity") - } - if len(matchingPods) == 0 { - return false, nil - } - // Special case: When the topological domain is node, we can limit our - // search to pods on that node without searching the entire cluster. - if term.TopologyKey == kubeletapis.LabelHostname { - if pods, ok := matchingPods[nodeInfo.Node().Name]; ok { - // It may seem odd that we are comparing a node with itself to see if it - // has the same topology key, but it is necessary to check extra conditions - // that the function performs, such as checking that node labels are not nil. - return len(pods) > 0 && priorityutil.NodesHaveSameTopologyKey(nodeInfo.Node(), nodeInfo.Node(), term.TopologyKey), nil - } - return false, nil - } - // Topology key is not "Hostname". Checking all matching pods. - for nodeName, pods := range matchingPods { - matchingPodNodeInfo, err := c.info.GetNodeInfo(nodeName) +// anyPodsMatchingTopologyTerms checks whether any of the nodes given via +// "targetPods" matches topology of all the "terms" for the give "pod" and "nodeInfo". +func (c *PodAffinityChecker) anyPodsMatchingTopologyTerms(pod *v1.Pod, targetPods map[string][]*v1.Pod, nodeInfo *schedulercache.NodeInfo, terms []v1.PodAffinityTerm) (bool, error) { + for nodeName, targetPods := range targetPods { + targetPodNodeInfo, err := c.info.GetNodeInfo(nodeName) if err != nil { return false, err } - if len(pods) > 0 && priorityutil.NodesHaveSameTopologyKey(nodeInfo.Node(), matchingPodNodeInfo, term.TopologyKey) { - return true, nil + if len(targetPods) > 0 { + allTermsMatched := true + for _, term := range terms { + if !priorityutil.NodesHaveSameTopologyKey(nodeInfo.Node(), targetPodNodeInfo, term.TopologyKey) { + allTermsMatched = false + break + } + } + if allTermsMatched { + // We have 1 or more pods on the target node that have already matched namespace and selector + // and all of the terms topologies matched the target node. So, there is at least 1 matching pod on the node. + return true, nil + } } } return false, nil @@ -1424,21 +1417,21 @@ func (c *PodAffinityChecker) satisfiesPodsAffinityAntiAffinity(pod *v1.Pod, if predicateMeta, ok := meta.(*predicateMetadata); ok { // Check all affinity terms. matchingPods := predicateMeta.nodeNameToMatchingAffinityPods - for _, term := range GetPodAffinityTerms(affinity.PodAffinity) { - termMatches, err := c.anyMatchingPodInTopology(pod, matchingPods, nodeInfo, &term) + if affinityTerms := GetPodAffinityTerms(affinity.PodAffinity); len(affinityTerms) > 0 { + matchExists, err := c.anyPodsMatchingTopologyTerms(pod, matchingPods, nodeInfo, affinityTerms) if err != nil { - errMessage := fmt.Sprintf("Cannot schedule pod %+v onto node %v, because of PodAffinityTerm %v, err: %v", podName(pod), node.Name, term, err) + errMessage := fmt.Sprintf("Cannot schedule pod %+v onto node %v, because of PodAffinity, err: %v", podName(pod), node.Name, err) glog.Errorf(errMessage) return ErrPodAffinityRulesNotMatch, errors.New(errMessage) } - if !termMatches { + if !matchExists { // This pod may the first pod in a series that have affinity to themselves. In order // to not leave such pods in pending state forever, we check that if no other pod // in the cluster matches the namespace and selector of this pod and the pod matches // its own terms, then we allow the pod to pass the affinity check. if !(len(matchingPods) == 0 && targetPodMatchesAffinityOfPod(pod, pod)) { - glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAffinityTerm %v", - podName(pod), node.Name, term) + glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAffinity", + podName(pod), node.Name) return ErrPodAffinityRulesNotMatch, nil } } @@ -1446,11 +1439,11 @@ func (c *PodAffinityChecker) satisfiesPodsAffinityAntiAffinity(pod *v1.Pod, // Check all anti-affinity terms. matchingPods = predicateMeta.nodeNameToMatchingAntiAffinityPods - for _, term := range GetPodAntiAffinityTerms(affinity.PodAntiAffinity) { - termMatches, err := c.anyMatchingPodInTopology(pod, matchingPods, nodeInfo, &term) - if err != nil || termMatches { - glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAntiAffinityTerm %v, err: %v", - podName(pod), node.Name, term, err) + if antiAffinityTerms := GetPodAntiAffinityTerms(affinity.PodAntiAffinity); len(antiAffinityTerms) > 0 { + matchExists, err := c.anyPodsMatchingTopologyTerms(pod, matchingPods, nodeInfo, antiAffinityTerms) + if err != nil || matchExists { + glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAntiAffinity, err: %v", + podName(pod), node.Name, err) return ErrPodAntiAffinityRulesNotMatch, nil } }