mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-09-14 05:36:12 +00:00
Merge pull request #57476 from misterikkit/podAffinityNode
Automatic merge from submit-queue. If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Restrict pod affinity search when TopologyKey=kubernetes.io/hostname **What this PR does / why we need it**: When a PodAffinityTerm uses TopologyKey=kubernetes.io/hostname, we can avoid searching the entire cluster for a match by only listing pods on the given node. This is a set of 3 PRs targeting affinity predicate performance. (#57476, #57477, #57478) The key takeaway is approximately 2x speedup in the large affinity benchmark. The unexpected increase in BenchmarkScheduling/1000Nodes/1000Pods seems to be an outlier, and did not recur on subsequent runs. The benchmarks have a moderate amount of variance to them, and I did not run them enough times to measure mean and standard deviation. | test | b.N | master | #57476 | #57477 | #57478 | combined | | ---- | --- | ------ | ---------- | ---------- | ---------- | -------- | | BenchmarkScheduling/100Nodes/0Pods | 100 | 39629010 ns/op | 36898566 ns/op (-6.89%) | 38461530 ns/op (-2.95%) | 36214136 ns/op (-8.62%) | 43090781 ns/op (+8.74%) | | BenchmarkScheduling/100Nodes/1000Pods | 100 | 85489577 ns/op | 69538016 ns/op (-18.66%) | 70104254 ns/op (-18.00%) | 75015585 ns/op (-12.25%) | 80986960 ns/op (-5.27%) | | BenchmarkScheduling/1000Nodes/0Pods | 100 | 219356660 ns/op | 200149051 ns/op (-8.76%) | 192867469 ns/op (-12.08%) | 196896770 ns/op (-10.24%) | 212563662 ns/op (-3.10%) | | BenchmarkScheduling/1000Nodes/1000Pods | 100 | 380368238 ns/op | 381786369 ns/op (+0.37%) | 387224973 ns/op (+1.80%) | 417974358 ns/op (+9.89%) | 411140230 ns/op (+8.09%) | | BenchmarkSchedulingAntiAffinity/500Nodes/250Pods | 250 | 124399176 ns/op | 97568988 ns/op (-21.57%) | 112027363 ns/op (-9.95%) | 129134326 ns/op (+3.81%) | 98607941 ns/op (-20.73%) | | BenchmarkSchedulingAntiAffinity/500Nodes/5000Pods | 250 | 491677096 ns/op | 441562422 ns/op (-10.19%) | 278127757 ns/op (-43.43%) | 447355609 ns/op (-9.01%) | 226310721 ns/op (-53.97%) | Combined performance contains all three patches. Percentages are relative to master. Methodology: I ran the tests on each branch with this command. ``` make test-integration WHAT="./test/integration/scheduler_perf" KUBE_TEST_ARGS="-run=xxxx -bench=." ``` The benchmarks have a fair amount of variance to them, and I did not run them enough times to measure mean and standard deviation. **Which issue(s) this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close the issue(s) when PR gets merged)*: Fixes # The three PRs in this set should collectively fix #54189. **Special notes for your reviewer**: **Release note**: ```release-note Improve scheduler performance of MatchInterPodAffinity predicate. ```
This commit is contained in:
@@ -1116,7 +1116,7 @@ func (c *PodAffinityChecker) InterPodAffinityMatches(pod *v1.Pod, meta algorithm
|
||||
// First return value indicates whether a matching pod exists on a node that matches the topology key,
|
||||
// while the second return value indicates whether a matching pod exists anywhere.
|
||||
// TODO: Do we really need any pod matching, or all pods matching? I think the latter.
|
||||
func (c *PodAffinityChecker) anyPodMatchesPodAffinityTerm(pod *v1.Pod, allPods []*v1.Pod, node *v1.Node, term *v1.PodAffinityTerm) (bool, bool, error) {
|
||||
func (c *PodAffinityChecker) anyPodMatchesPodAffinityTerm(pod *v1.Pod, pods []*v1.Pod, nodeInfo *schedulercache.NodeInfo, term *v1.PodAffinityTerm) (bool, bool, error) {
|
||||
if len(term.TopologyKey) == 0 {
|
||||
return false, false, fmt.Errorf("empty topologyKey is not allowed except for PreferredDuringScheduling pod anti-affinity")
|
||||
}
|
||||
@@ -1126,7 +1126,12 @@ func (c *PodAffinityChecker) anyPodMatchesPodAffinityTerm(pod *v1.Pod, allPods [
|
||||
if err != nil {
|
||||
return false, false, err
|
||||
}
|
||||
for _, existingPod := range allPods {
|
||||
// Special case: When the topological domain is node, we can limit our
|
||||
// search to pods on that node without searching the entire cluster.
|
||||
if term.TopologyKey == kubeletapis.LabelHostname {
|
||||
pods = nodeInfo.Pods()
|
||||
}
|
||||
for _, existingPod := range pods {
|
||||
match := priorityutil.PodMatchesTermsNamespaceAndSelector(existingPod, namespaces, selector)
|
||||
if match {
|
||||
matchingPodExists = true
|
||||
@@ -1134,7 +1139,7 @@ func (c *PodAffinityChecker) anyPodMatchesPodAffinityTerm(pod *v1.Pod, allPods [
|
||||
if err != nil {
|
||||
return false, matchingPodExists, err
|
||||
}
|
||||
if priorityutil.NodesHaveSameTopologyKey(node, existingPodNode, term.TopologyKey) {
|
||||
if priorityutil.NodesHaveSameTopologyKey(nodeInfo.Node(), existingPodNode, term.TopologyKey) {
|
||||
return true, matchingPodExists, nil
|
||||
}
|
||||
}
|
||||
@@ -1334,7 +1339,7 @@ func (c *PodAffinityChecker) satisfiesPodsAffinityAntiAffinity(pod *v1.Pod, node
|
||||
|
||||
// Check all affinity terms.
|
||||
for _, term := range GetPodAffinityTerms(affinity.PodAffinity) {
|
||||
termMatches, matchingPodExists, err := c.anyPodMatchesPodAffinityTerm(pod, filteredPods, node, &term)
|
||||
termMatches, matchingPodExists, err := c.anyPodMatchesPodAffinityTerm(pod, filteredPods, nodeInfo, &term)
|
||||
if err != nil {
|
||||
errMessage := fmt.Sprintf("Cannot schedule pod %+v onto node %v, because of PodAffinityTerm %v, err: %v", podName(pod), node.Name, term, err)
|
||||
glog.Error(errMessage)
|
||||
@@ -1367,7 +1372,7 @@ func (c *PodAffinityChecker) satisfiesPodsAffinityAntiAffinity(pod *v1.Pod, node
|
||||
|
||||
// Check all anti-affinity terms.
|
||||
for _, term := range GetPodAntiAffinityTerms(affinity.PodAntiAffinity) {
|
||||
termMatches, _, err := c.anyPodMatchesPodAffinityTerm(pod, filteredPods, node, &term)
|
||||
termMatches, _, err := c.anyPodMatchesPodAffinityTerm(pod, filteredPods, nodeInfo, &term)
|
||||
if err != nil || termMatches {
|
||||
glog.V(10).Infof("Cannot schedule pod %+v onto node %v, because of PodAntiAffinityTerm %v, err: %v",
|
||||
podName(pod), node.Name, term, err)
|
||||
|
Reference in New Issue
Block a user