Fix an issue in inter-pod affinity predicate that cause affinity to self being processed incorrectly

This commit is contained in:
Bobby (Babak) Salamat 2018-04-06 17:43:51 -07:00
parent cb5f1ad9f7
commit c590ec7ae9
3 changed files with 81 additions and 63 deletions

View File

@ -119,7 +119,7 @@ func (pfactory *PredicateMetadataFactory) GetMetadata(pod *v1.Pod, nodeNameToInf
} }
affinityPods, antiAffinityPods, err := getPodsMatchingAffinity(pod, nodeNameToInfoMap) affinityPods, antiAffinityPods, err := getPodsMatchingAffinity(pod, nodeNameToInfoMap)
if err != nil { if err != nil {
glog.Errorf("[predicate meta data generation] error finding pods that match affinity terms") glog.Errorf("[predicate meta data generation] error finding pods that match affinity terms: %v", err)
return nil return nil
} }
predicateMetadata := &predicateMetadata{ predicateMetadata := &predicateMetadata{

View File

@ -1164,39 +1164,36 @@ func (c *PodAffinityChecker) InterPodAffinityMatches(pod *v1.Pod, meta algorithm
return true, nil, nil return true, nil, nil
} }
// anyPodMatchesPodAffinityTerm checks if any of given pods can match the specific podAffinityTerm. // podMatchesPodAffinityTerms checks if the "targetPod" matches the given "terms"
// First return value indicates whether a matching pod exists on a node that matches the topology key, // of the "pod" on the given "nodeInfo".Node(). It returns three values: 1) whether
// while the second return value indicates whether a matching pod exists anywhere. // targetPod matches all the terms and their topologies, 2) whether targetPod
// TODO: Do we really need any pod matching, or all pods matching? I think the latter. // matches all the terms label selector and namespaces (AKA term properties),
func (c *PodAffinityChecker) anyPodMatchesPodAffinityTerm(pod *v1.Pod, pods []*v1.Pod, nodeInfo *schedulercache.NodeInfo, term *v1.PodAffinityTerm) (bool, bool, error) { // 3) any error.
if len(term.TopologyKey) == 0 { func (c *PodAffinityChecker) podMatchesPodAffinityTerms(pod *v1.Pod, targetPod *v1.Pod, nodeInfo *schedulercache.NodeInfo, terms []v1.PodAffinityTerm) (bool, bool, error) {
return false, false, fmt.Errorf("empty topologyKey is not allowed except for PreferredDuringScheduling pod anti-affinity") if len(terms) == 0 {
return false, false, fmt.Errorf("terms array is empty")
} }
matchingPodExists := false props, err := getAffinityTermProperties(pod, terms)
namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(pod, term)
selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
if err != nil { if err != nil {
return false, false, err return false, false, err
} }
// Special case: When the topological domain is node, we can limit our affinityTermPropertiesMatch := podMatchesAffinityTermProperties(targetPod, props)
// search to pods on that node without searching the entire cluster. if !affinityTermPropertiesMatch {
if term.TopologyKey == kubeletapis.LabelHostname { return false, false, nil
pods = nodeInfo.Pods()
} }
for _, existingPod := range pods { targetPodNode, err := c.info.GetNodeInfo(targetPod.Spec.NodeName)
match := priorityutil.PodMatchesTermsNamespaceAndSelector(existingPod, namespaces, selector)
if match {
matchingPodExists = true
existingPodNode, err := c.info.GetNodeInfo(existingPod.Spec.NodeName)
if err != nil { if err != nil {
return false, matchingPodExists, err return false, false, err
} }
if priorityutil.NodesHaveSameTopologyKey(nodeInfo.Node(), existingPodNode, term.TopologyKey) { for _, term := range terms {
return true, matchingPodExists, nil if len(term.TopologyKey) == 0 {
return false, false, fmt.Errorf("empty topologyKey is not allowed except for PreferredDuringScheduling pod anti-affinity")
}
if !priorityutil.NodesHaveSameTopologyKey(nodeInfo.Node(), targetPodNode, term.TopologyKey) {
return false, affinityTermPropertiesMatch, nil
} }
} }
} return true, affinityTermPropertiesMatch, nil
return false, matchingPodExists, nil
} }
// GetPodAffinityTerms gets pod affinity terms by a pod affinity object. // GetPodAffinityTerms gets pod affinity terms by a pod affinity object.
@ -1460,49 +1457,57 @@ func (c *PodAffinityChecker) satisfiesPodsAffinityAntiAffinity(pod *v1.Pod,
return ErrPodAffinityRulesNotMatch, err return ErrPodAffinityRulesNotMatch, err
} }
affinityTerms := GetPodAffinityTerms(affinity.PodAffinity)
antiAffinityTerms := GetPodAntiAffinityTerms(affinity.PodAntiAffinity)
matchFound, termsSelectorMatchFound := false, false
for _, targetPod := range filteredPods {
// Check all affinity terms. // Check all affinity terms.
for _, term := range GetPodAffinityTerms(affinity.PodAffinity) { if !matchFound && len(affinityTerms) > 0 {
termMatches, matchingPodExists, err := c.anyPodMatchesPodAffinityTerm(pod, filteredPods, nodeInfo, &term) affTermsMatch, termsSelectorMatch, err := c.podMatchesPodAffinityTerms(pod, targetPod, nodeInfo, affinityTerms)
if err != nil { if err != nil {
errMessage := fmt.Sprintf("Cannot schedule pod %+v onto node %v, because of PodAffinityTerm %v, err: %v", podName(pod), node.Name, term, err) errMessage := fmt.Sprintf("Cannot schedule pod %+v onto node %v, because of PodAffinity, err: %v", podName(pod), node.Name, err)
glog.Error(errMessage) glog.Error(errMessage)
return ErrPodAffinityRulesNotMatch, errors.New(errMessage) return ErrPodAffinityRulesNotMatch, errors.New(errMessage)
} }
if !termMatches { if termsSelectorMatch {
// If the requirement matches a pod's own labels are namespace, and there are termsSelectorMatchFound = true
// no other such pods, then disregard the requirement. This is necessary to }
// not block forever because the first pod of the collection can't be scheduled. if affTermsMatch {
if matchingPodExists { matchFound = true
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAffinityTerm %v", }
podName(pod), node.Name, term) }
// Check all anti-affinity terms.
if len(antiAffinityTerms) > 0 {
antiAffTermsMatch, _, err := c.podMatchesPodAffinityTerms(pod, targetPod, nodeInfo, antiAffinityTerms)
if err != nil || antiAffTermsMatch {
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAntiAffinityTerm, err: %v",
podName(pod), node.Name, err)
return ErrPodAntiAffinityRulesNotMatch, nil
}
}
}
if !matchFound && len(affinityTerms) > 0 {
// We have not been able to find any matches for the pod's affinity rules.
// This pod may the first pod in a series that have affinity to themselves. In order
// to not leave such pods in pending state forever, we check that if no other pod
// in the cluster matches the namespace and selector of this pod and the pod matches
// its own terms, then we allow the pod to pass the affinity check.
if termsSelectorMatchFound {
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAffinity",
podName(pod), node.Name)
return ErrPodAffinityRulesNotMatch, nil return ErrPodAffinityRulesNotMatch, nil
} }
namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(pod, &term) // Check if pod matches its own affinity properties (namespace and label selector).
selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector) if !targetPodMatchesAffinityOfPod(pod, pod) {
if err != nil { glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAffinity",
errMessage := fmt.Sprintf("Cannot parse selector on term %v for pod %v. Details %v", term, podName(pod), err) podName(pod), node.Name)
glog.Error(errMessage)
return ErrPodAffinityRulesNotMatch, errors.New(errMessage)
}
match := priorityutil.PodMatchesTermsNamespaceAndSelector(pod, namespaces, selector)
if !match {
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAffinityTerm %v",
podName(pod), node.Name, term)
return ErrPodAffinityRulesNotMatch, nil return ErrPodAffinityRulesNotMatch, nil
} }
} }
} }
// Check all anti-affinity terms.
for _, term := range GetPodAntiAffinityTerms(affinity.PodAntiAffinity) {
termMatches, _, err := c.anyPodMatchesPodAffinityTerm(pod, filteredPods, nodeInfo, &term)
if err != nil || termMatches {
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAntiAffinityTerm %v, err: %v",
podName(pod), node.Name, term, err)
return ErrPodAntiAffinityRulesNotMatch, nil
}
}
}
if glog.V(10) { if glog.V(10) {
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is // We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
// not logged. There is visible performance gain from it. // not logged. There is visible performance gain from it.

View File

@ -2866,6 +2866,7 @@ func TestInterPodAffinityWithMultipleNodes(t *testing.T) {
ObjectMeta: metav1.ObjectMeta{ ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{ Labels: map[string]string{
"foo": "bar", "foo": "bar",
"service": "securityscan",
}, },
}, },
Spec: v1.PodSpec{ Spec: v1.PodSpec{
@ -2884,12 +2885,24 @@ func TestInterPodAffinityWithMultipleNodes(t *testing.T) {
}, },
TopologyKey: "zone", TopologyKey: "zone",
}, },
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "service",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"securityscan"},
},
},
},
TopologyKey: "zone",
}, },
}, },
}, },
}, },
}, },
pods: []*v1.Pod{}, },
pods: []*v1.Pod{{Spec: v1.PodSpec{NodeName: "nodeA"}, ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: map[string]string{"foo": "bar"}}}},
nodes: []v1.Node{ nodes: []v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: map[string]string{"zone": "az1", "hostname": "h1"}}}, {ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: map[string]string{"zone": "az1", "hostname": "h1"}}},
{ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: map[string]string{"zone": "az2", "hostname": "h2"}}}, {ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: map[string]string{"zone": "az2", "hostname": "h2"}}},
@ -2976,7 +2989,7 @@ func TestInterPodAffinityWithMultipleNodes(t *testing.T) {
"nodeB": false, "nodeB": false,
"nodeC": true, "nodeC": true,
}, },
test: "NodeA and nodeB have same topologyKey and label value. NodeA has an existing pod that match the inter pod affinity rule. The pod can not be scheduled onto nodeA and nodeB but can be schedulerd onto nodeC", test: "NodeA and nodeB have same topologyKey and label value. NodeA has an existing pod that match the inter pod affinity rule. The pod can not be scheduled onto nodeA and nodeB but can be scheduled onto nodeC",
}, },
{ {
pod: &v1.Pod{ pod: &v1.Pod{
@ -3124,7 +3137,7 @@ func TestInterPodAffinityWithMultipleNodes(t *testing.T) {
"nodeB": false, "nodeB": false,
"nodeC": true, "nodeC": true,
}, },
test: "NodeA and nodeB have same topologyKey and label value. NodeA has an existing pod that match the inter pod affinity rule. The pod can not be scheduled onto nodeA, nodeB, but can be schedulerd onto nodeC (NodeC has an existing pod that match the inter pod affinity rule but in different namespace)", test: "NodeA and nodeB have same topologyKey and label value. NodeA has an existing pod that match the inter pod affinity rule. The pod can not be scheduled onto nodeA, nodeB, but can be scheduled onto nodeC (NodeC has an existing pod that match the inter pod affinity rule but in different namespace)",
}, },
} }