Merge pull request #62591 from bsalamat/fix_self_affinity

Automatic merge from submit-queue. If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

Fix an issue in inter-pod affinity predicate that cause affinity to self being processed incorrectly

**What this PR does / why we need it**:
Fixes the anti-affinity issue explained in #62232.

**Which issue(s) this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close the issue(s) when PR gets merged)*:
Fixes #62232

**Special notes for your reviewer**:

**Release note**:

```release-note
Fix an issue in inter-pod affinity predicate that cause affinity to self being processed incorrectly
```

/sig scheduling
This commit is contained in:
Kubernetes Submit Queue 2018-04-17 02:11:36 -07:00 committed by GitHub
commit 7966265a51
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 81 additions and 63 deletions

View File

@ -119,7 +119,7 @@ func (pfactory *PredicateMetadataFactory) GetMetadata(pod *v1.Pod, nodeNameToInf
}
affinityPods, antiAffinityPods, err := getPodsMatchingAffinity(pod, nodeNameToInfoMap)
if err != nil {
glog.Errorf("[predicate meta data generation] error finding pods that match affinity terms")
glog.Errorf("[predicate meta data generation] error finding pods that match affinity terms: %v", err)
return nil
}
predicateMetadata := &predicateMetadata{

View File

@ -1164,39 +1164,36 @@ func (c *PodAffinityChecker) InterPodAffinityMatches(pod *v1.Pod, meta algorithm
return true, nil, nil
}
// anyPodMatchesPodAffinityTerm checks if any of given pods can match the specific podAffinityTerm.
// First return value indicates whether a matching pod exists on a node that matches the topology key,
// while the second return value indicates whether a matching pod exists anywhere.
// TODO: Do we really need any pod matching, or all pods matching? I think the latter.
func (c *PodAffinityChecker) anyPodMatchesPodAffinityTerm(pod *v1.Pod, pods []*v1.Pod, nodeInfo *schedulercache.NodeInfo, term *v1.PodAffinityTerm) (bool, bool, error) {
if len(term.TopologyKey) == 0 {
return false, false, fmt.Errorf("empty topologyKey is not allowed except for PreferredDuringScheduling pod anti-affinity")
// podMatchesPodAffinityTerms checks if the "targetPod" matches the given "terms"
// of the "pod" on the given "nodeInfo".Node(). It returns three values: 1) whether
// targetPod matches all the terms and their topologies, 2) whether targetPod
// matches all the terms label selector and namespaces (AKA term properties),
// 3) any error.
func (c *PodAffinityChecker) podMatchesPodAffinityTerms(pod *v1.Pod, targetPod *v1.Pod, nodeInfo *schedulercache.NodeInfo, terms []v1.PodAffinityTerm) (bool, bool, error) {
if len(terms) == 0 {
return false, false, fmt.Errorf("terms array is empty")
}
matchingPodExists := false
namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(pod, term)
selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
props, err := getAffinityTermProperties(pod, terms)
if err != nil {
return false, false, err
}
// Special case: When the topological domain is node, we can limit our
// search to pods on that node without searching the entire cluster.
if term.TopologyKey == kubeletapis.LabelHostname {
pods = nodeInfo.Pods()
if !podMatchesAffinityTermProperties(targetPod, props) {
return false, false, nil
}
for _, existingPod := range pods {
match := priorityutil.PodMatchesTermsNamespaceAndSelector(existingPod, namespaces, selector)
if match {
matchingPodExists = true
existingPodNode, err := c.info.GetNodeInfo(existingPod.Spec.NodeName)
if err != nil {
return false, matchingPodExists, err
}
if priorityutil.NodesHaveSameTopologyKey(nodeInfo.Node(), existingPodNode, term.TopologyKey) {
return true, matchingPodExists, nil
}
// Namespace and selector of the terms have matched. Now we check topology of the terms.
targetPodNode, err := c.info.GetNodeInfo(targetPod.Spec.NodeName)
if err != nil {
return false, false, err
}
for _, term := range terms {
if len(term.TopologyKey) == 0 {
return false, false, fmt.Errorf("empty topologyKey is not allowed except for PreferredDuringScheduling pod anti-affinity")
}
if !priorityutil.NodesHaveSameTopologyKey(nodeInfo.Node(), targetPodNode, term.TopologyKey) {
return false, true, nil
}
}
return false, matchingPodExists, nil
return true, true, nil
}
// GetPodAffinityTerms gets pod affinity terms by a pod affinity object.
@ -1460,49 +1457,57 @@ func (c *PodAffinityChecker) satisfiesPodsAffinityAntiAffinity(pod *v1.Pod,
return ErrPodAffinityRulesNotMatch, err
}
// Check all affinity terms.
for _, term := range GetPodAffinityTerms(affinity.PodAffinity) {
termMatches, matchingPodExists, err := c.anyPodMatchesPodAffinityTerm(pod, filteredPods, nodeInfo, &term)
if err != nil {
errMessage := fmt.Sprintf("Cannot schedule pod %+v onto node %v, because of PodAffinityTerm %v, err: %v", podName(pod), node.Name, term, err)
glog.Error(errMessage)
return ErrPodAffinityRulesNotMatch, errors.New(errMessage)
}
if !termMatches {
// If the requirement matches a pod's own labels are namespace, and there are
// no other such pods, then disregard the requirement. This is necessary to
// not block forever because the first pod of the collection can't be scheduled.
if matchingPodExists {
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAffinityTerm %v",
podName(pod), node.Name, term)
return ErrPodAffinityRulesNotMatch, nil
}
namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(pod, &term)
selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
affinityTerms := GetPodAffinityTerms(affinity.PodAffinity)
antiAffinityTerms := GetPodAntiAffinityTerms(affinity.PodAntiAffinity)
matchFound, termsSelectorMatchFound := false, false
for _, targetPod := range filteredPods {
// Check all affinity terms.
if !matchFound && len(affinityTerms) > 0 {
affTermsMatch, termsSelectorMatch, err := c.podMatchesPodAffinityTerms(pod, targetPod, nodeInfo, affinityTerms)
if err != nil {
errMessage := fmt.Sprintf("Cannot parse selector on term %v for pod %v. Details %v", term, podName(pod), err)
errMessage := fmt.Sprintf("Cannot schedule pod %+v onto node %v, because of PodAffinity, err: %v", podName(pod), node.Name, err)
glog.Error(errMessage)
return ErrPodAffinityRulesNotMatch, errors.New(errMessage)
}
match := priorityutil.PodMatchesTermsNamespaceAndSelector(pod, namespaces, selector)
if !match {
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAffinityTerm %v",
podName(pod), node.Name, term)
return ErrPodAffinityRulesNotMatch, nil
if termsSelectorMatch {
termsSelectorMatchFound = true
}
if affTermsMatch {
matchFound = true
}
}
// Check all anti-affinity terms.
if len(antiAffinityTerms) > 0 {
antiAffTermsMatch, _, err := c.podMatchesPodAffinityTerms(pod, targetPod, nodeInfo, antiAffinityTerms)
if err != nil || antiAffTermsMatch {
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAntiAffinityTerm, err: %v",
podName(pod), node.Name, err)
return ErrPodAntiAffinityRulesNotMatch, nil
}
}
}
// Check all anti-affinity terms.
for _, term := range GetPodAntiAffinityTerms(affinity.PodAntiAffinity) {
termMatches, _, err := c.anyPodMatchesPodAffinityTerm(pod, filteredPods, nodeInfo, &term)
if err != nil || termMatches {
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAntiAffinityTerm %v, err: %v",
podName(pod), node.Name, term, err)
return ErrPodAntiAffinityRulesNotMatch, nil
if !matchFound && len(affinityTerms) > 0 {
// We have not been able to find any matches for the pod's affinity rules.
// This pod may be the first pod in a series that have affinity to themselves. In order
// to not leave such pods in pending state forever, we check that if no other pod
// in the cluster matches the namespace and selector of this pod and the pod matches
// its own terms, then we allow the pod to pass the affinity check.
if termsSelectorMatchFound {
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAffinity",
podName(pod), node.Name)
return ErrPodAffinityRulesNotMatch, nil
}
// Check if pod matches its own affinity properties (namespace and label selector).
if !targetPodMatchesAffinityOfPod(pod, pod) {
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAffinity",
podName(pod), node.Name)
return ErrPodAffinityRulesNotMatch, nil
}
}
}
if glog.V(10) {
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
// not logged. There is visible performance gain from it.

View File

@ -2865,7 +2865,8 @@ func TestInterPodAffinityWithMultipleNodes(t *testing.T) {
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"foo": "bar",
"foo": "bar",
"service": "securityscan",
},
},
Spec: v1.PodSpec{
@ -2884,12 +2885,24 @@ func TestInterPodAffinityWithMultipleNodes(t *testing.T) {
},
TopologyKey: "zone",
},
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"securityscan"},
},
},
},
TopologyKey: "zone",
},
},
},
},
},
},
pods: []*v1.Pod{},
pods: []*v1.Pod{{Spec: v1.PodSpec{NodeName: "nodeA"}, ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: map[string]string{"foo": "bar"}}}},
nodes: []v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: map[string]string{"zone": "az1", "hostname": "h1"}}},
{ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: map[string]string{"zone": "az2", "hostname": "h2"}}},
@ -2976,7 +2989,7 @@ func TestInterPodAffinityWithMultipleNodes(t *testing.T) {
"nodeB": false,
"nodeC": true,
},
test: "NodeA and nodeB have same topologyKey and label value. NodeA has an existing pod that match the inter pod affinity rule. The pod can not be scheduled onto nodeA and nodeB but can be schedulerd onto nodeC",
test: "NodeA and nodeB have same topologyKey and label value. NodeA has an existing pod that match the inter pod affinity rule. The pod can not be scheduled onto nodeA and nodeB but can be scheduled onto nodeC",
},
{
pod: &v1.Pod{
@ -3124,7 +3137,7 @@ func TestInterPodAffinityWithMultipleNodes(t *testing.T) {
"nodeB": false,
"nodeC": true,
},
test: "NodeA and nodeB have same topologyKey and label value. NodeA has an existing pod that match the inter pod affinity rule. The pod can not be scheduled onto nodeA, nodeB, but can be schedulerd onto nodeC (NodeC has an existing pod that match the inter pod affinity rule but in different namespace)",
test: "NodeA and nodeB have same topologyKey and label value. NodeA has an existing pod that match the inter pod affinity rule. The pod can not be scheduled onto nodeA, nodeB, but can be scheduled onto nodeC (NodeC has an existing pod that match the inter pod affinity rule but in different namespace)",
},
}