diff --git a/pkg/scheduler/algorithm/predicates/metadata.go b/pkg/scheduler/algorithm/predicates/metadata.go index c9d36003dde..f6180eb6f96 100644 --- a/pkg/scheduler/algorithm/predicates/metadata.go +++ b/pkg/scheduler/algorithm/predicates/metadata.go @@ -229,12 +229,12 @@ func getTPMapMatchingSpreadConstraints(pod *v1.Pod, nodeInfoMap map[string]*sche } // In accordance to design, if NodeAffinity or NodeSelector is defined, // spreading is applied to nodes that pass those filters. - if !podMatchesNodeSelectorAndAffinityTerms(pod, node) { + if !PodMatchesNodeSelectorAndAffinityTerms(pod, node) { return } // Ensure current node's labels contains all topologyKeys in 'constraints'. - if !nodeLabelsMatchSpreadConstraints(node.Labels, constraints) { + if !NodeLabelsMatchSpreadConstraints(node.Labels, constraints) { return } nodeTopologyMaps := newTopologyPairsMaps() @@ -245,7 +245,7 @@ func getTPMapMatchingSpreadConstraints(pod *v1.Pod, nodeInfoMap map[string]*sche if existingPod.Namespace != pod.Namespace { continue } - ok, err := podMatchesSpreadConstraint(existingPod.Labels, constraint) + ok, err := PodMatchesSpreadConstraint(existingPod.Labels, constraint) if err != nil { errCh.SendErrorWithCancel(err, cancel) return @@ -304,10 +304,11 @@ func getHardTopologySpreadConstraints(pod *v1.Pod) (constraints []v1.TopologySpr return } -// some corner cases: +// PodMatchesSpreadConstraint verifies if matches . +// Some corner cases: // 1. podLabelSet = nil => returns (false, nil) // 2. constraint.LabelSelector = nil => returns (false, nil) -func podMatchesSpreadConstraint(podLabelSet labels.Set, constraint v1.TopologySpreadConstraint) (bool, error) { +func PodMatchesSpreadConstraint(podLabelSet labels.Set, constraint v1.TopologySpreadConstraint) (bool, error) { selector, err := metav1.LabelSelectorAsSelector(constraint.LabelSelector) if err != nil { return false, err @@ -318,8 +319,8 @@ func podMatchesSpreadConstraint(podLabelSet labels.Set, constraint v1.TopologySp return true, nil } -// check if ALL topology keys in spread constraints are present in node labels -func nodeLabelsMatchSpreadConstraints(nodeLabels map[string]string, constraints []v1.TopologySpreadConstraint) bool { +// NodeLabelsMatchSpreadConstraints checks if ALL topology keys in spread constraints are present in node labels. +func NodeLabelsMatchSpreadConstraints(nodeLabels map[string]string, constraints []v1.TopologySpreadConstraint) bool { for _, constraint := range constraints { if _, ok := nodeLabels[constraint.TopologyKey]; !ok { return false @@ -388,7 +389,7 @@ func (m *topologyPairsPodSpreadMap) addPod(addedPod, preemptorPod *v1.Pod, node return nil } constraints := getHardTopologySpreadConstraints(preemptorPod) - if !nodeLabelsMatchSpreadConstraints(node.Labels, constraints) { + if !NodeLabelsMatchSpreadConstraints(node.Labels, constraints) { return nil } @@ -396,7 +397,7 @@ func (m *topologyPairsPodSpreadMap) addPod(addedPod, preemptorPod *v1.Pod, node minMatchNeedingUpdate := make(map[string]struct{}) podLabelSet := labels.Set(addedPod.Labels) for _, constraint := range constraints { - if match, err := podMatchesSpreadConstraint(podLabelSet, constraint); err != nil { + if match, err := PodMatchesSpreadConstraint(podLabelSet, constraint); err != nil { return err } else if !match { continue diff --git a/pkg/scheduler/algorithm/predicates/metadata_test.go b/pkg/scheduler/algorithm/predicates/metadata_test.go index 911c05922bb..70d80f9fc69 100644 --- a/pkg/scheduler/algorithm/predicates/metadata_test.go +++ b/pkg/scheduler/algorithm/predicates/metadata_test.go @@ -904,12 +904,12 @@ func TestPodMatchesSpreadConstraint(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { podLabelSet := labels.Set(tt.podLabels) - got, err := podMatchesSpreadConstraint(podLabelSet, tt.constraint) + got, err := PodMatchesSpreadConstraint(podLabelSet, tt.constraint) if (err != nil) != tt.wantErr { - t.Errorf("podMatchesSpreadConstraint() error = %v, wantErr %v", err, tt.wantErr) + t.Errorf("PodMatchesSpreadConstraint() error = %v, wantErr %v", err, tt.wantErr) } if got != tt.want { - t.Errorf("podMatchesSpreadConstraint() = %v, want %v", got, tt.want) + t.Errorf("PodMatchesSpreadConstraint() = %v, want %v", got, tt.want) } }) } diff --git a/pkg/scheduler/algorithm/predicates/predicates.go b/pkg/scheduler/algorithm/predicates/predicates.go index 059c11220a6..2f6cbe3e76f 100644 --- a/pkg/scheduler/algorithm/predicates/predicates.go +++ b/pkg/scheduler/algorithm/predicates/predicates.go @@ -853,9 +853,9 @@ func nodeMatchesNodeSelectorTerms(node *v1.Node, nodeSelectorTerms []v1.NodeSele return v1helper.MatchNodeSelectorTerms(nodeSelectorTerms, labels.Set(node.Labels), fields.Set(nodeFields)) } -// podMatchesNodeSelectorAndAffinityTerms checks whether the pod is schedulable onto nodes according to +// PodMatchesNodeSelectorAndAffinityTerms checks whether the pod is schedulable onto nodes according to // the requirements in both NodeAffinity and nodeSelector. -func podMatchesNodeSelectorAndAffinityTerms(pod *v1.Pod, node *v1.Node) bool { +func PodMatchesNodeSelectorAndAffinityTerms(pod *v1.Pod, node *v1.Node) bool { // Check if node.Labels match pod.Spec.NodeSelector. if len(pod.Spec.NodeSelector) > 0 { selector := labels.SelectorFromSet(pod.Spec.NodeSelector) @@ -906,7 +906,7 @@ func PodMatchNodeSelector(pod *v1.Pod, meta PredicateMetadata, nodeInfo *schedul if node == nil { return false, nil, fmt.Errorf("node not found") } - if podMatchesNodeSelectorAndAffinityTerms(pod, node) { + if PodMatchesNodeSelectorAndAffinityTerms(pod, node) { return true, nil, nil } return false, []PredicateFailureReason{ErrNodeSelectorNotMatch}, nil @@ -1748,7 +1748,7 @@ func EvenPodsSpreadPredicate(pod *v1.Pod, meta PredicateMetadata, nodeInfo *sche return false, []PredicateFailureReason{ErrTopologySpreadConstraintsNotMatch}, nil } - selfMatch, err := podMatchesSpreadConstraint(podLabelSet, constraint) + selfMatch, err := PodMatchesSpreadConstraint(podLabelSet, constraint) if err != nil { return false, nil, err } diff --git a/pkg/scheduler/algorithm/priorities/BUILD b/pkg/scheduler/algorithm/priorities/BUILD index 42945a31811..f57986635c4 100644 --- a/pkg/scheduler/algorithm/priorities/BUILD +++ b/pkg/scheduler/algorithm/priorities/BUILD @@ -10,6 +10,7 @@ go_library( name = "go_default_library", srcs = [ "balanced_resource_allocation.go", + "even_pods_spread.go", "image_locality.go", "interpod_affinity.go", "least_requested.go", @@ -37,6 +38,7 @@ go_library( "//pkg/scheduler/algorithm/priorities/util:go_default_library", "//pkg/scheduler/api:go_default_library", "//pkg/scheduler/nodeinfo:go_default_library", + "//pkg/scheduler/util:go_default_library", "//pkg/util/node:go_default_library", "//pkg/util/parsers:go_default_library", "//staging/src/k8s.io/api/core/v1:go_default_library", @@ -54,6 +56,7 @@ go_test( name = "go_default_test", srcs = [ "balanced_resource_allocation_test.go", + "even_pods_spread_test.go", "image_locality_test.go", "interpod_affinity_test.go", "least_requested_test.go", diff --git a/pkg/scheduler/algorithm/priorities/even_pods_spread.go b/pkg/scheduler/algorithm/priorities/even_pods_spread.go new file mode 100644 index 00000000000..121cc032df3 --- /dev/null +++ b/pkg/scheduler/algorithm/priorities/even_pods_spread.go @@ -0,0 +1,209 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package priorities + +import ( + "context" + "math" + "sync/atomic" + + "k8s.io/api/core/v1" + "k8s.io/client-go/util/workqueue" + "k8s.io/kubernetes/pkg/scheduler/algorithm/predicates" + schedulerapi "k8s.io/kubernetes/pkg/scheduler/api" + schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo" + schedutil "k8s.io/kubernetes/pkg/scheduler/util" + + "k8s.io/klog" +) + +type topologyPair struct { + key string + value string +} + +type topologySpreadConstraintsMap struct { + // nodeNameToPodCounts is keyed with node name, and valued with the number of matching pods. + nodeNameToPodCounts map[string]int64 + // topologyPairToPodCounts is keyed with topologyPair, and valued with the number of matching pods. + topologyPairToPodCounts map[topologyPair]*int64 +} + +func newTopologySpreadConstraintsMap() *topologySpreadConstraintsMap { + return &topologySpreadConstraintsMap{ + nodeNameToPodCounts: make(map[string]int64), + topologyPairToPodCounts: make(map[topologyPair]*int64), + } +} + +// Note: the passed in are the "filtered" nodes which have passed Predicates. +// This function iterates to filter out the nodes which don't have required topologyKey(s), +// and initialize two maps: +// 1) t.topologyPairToPodCounts: keyed with both eligible topology pair and node names. +// 2) t.nodeNameToPodCounts: keyed with node name, and valued with a *int64 pointer for eligible node only. +func (t *topologySpreadConstraintsMap) initialize(pod *v1.Pod, nodes []*v1.Node) { + constraints := getSoftTopologySpreadConstraints(pod) + for _, node := range nodes { + if !predicates.NodeLabelsMatchSpreadConstraints(node.Labels, constraints) { + continue + } + for _, constraint := range constraints { + pair := topologyPair{key: constraint.TopologyKey, value: node.Labels[constraint.TopologyKey]} + if t.topologyPairToPodCounts[pair] == nil { + t.topologyPairToPodCounts[pair] = new(int64) + } + } + t.nodeNameToPodCounts[node.Name] = 0 + // For those nodes which don't have all required topologyKeys present, it's intentional to keep + // those entries absent in nodeNameToPodCounts, so that we're able to score them to 0 afterwards. + } +} + +// CalculateEvenPodsSpreadPriority computes a score by checking through the topologySpreadConstraints +// that are with WhenUnsatisfiable=ScheduleAnyway (a.k.a soft constraint). +// The function works as below: +// 1) In all nodes, calculate the number of pods which match 's soft topology spread constraints. +// 2) Group the number calculated in 1) by topologyPair, and sum up to corresponding candidate nodes. +// 3) Finally normalize the number to 0~10. The node with the highest score is the most preferred. +// Note: Symmetry is not applicable. We only weigh how incomingPod matches existingPod. +// Whether existingPod matches incomingPod doesn't contribute to the final score. +// This is different from the Affinity API. +func CalculateEvenPodsSpreadPriority(pod *v1.Pod, nodeNameToInfo map[string]*schedulernodeinfo.NodeInfo, nodes []*v1.Node) (schedulerapi.HostPriorityList, error) { + result := make(schedulerapi.HostPriorityList, len(nodes)) + // return if incoming pod doesn't have soft topology spread constraints. + constraints := getSoftTopologySpreadConstraints(pod) + if len(constraints) == 0 { + return result, nil + } + + t := newTopologySpreadConstraintsMap() + t.initialize(pod, nodes) + + allNodeNames := make([]string, 0, len(nodeNameToInfo)) + for name := range nodeNameToInfo { + allNodeNames = append(allNodeNames, name) + } + + errCh := schedutil.NewErrorChannel() + ctx, cancel := context.WithCancel(context.Background()) + processAllNode := func(i int) { + nodeInfo := nodeNameToInfo[allNodeNames[i]] + node := nodeInfo.Node() + if node == nil { + return + } + // (1) `node` should satisfy incoming pod's NodeSelector/NodeAffinity + // (2) All topologyKeys need to be present in `node` + if !predicates.PodMatchesNodeSelectorAndAffinityTerms(pod, node) || + !predicates.NodeLabelsMatchSpreadConstraints(node.Labels, constraints) { + return + } + + for _, constraint := range constraints { + pair := topologyPair{key: constraint.TopologyKey, value: node.Labels[constraint.TopologyKey]} + // If current topology pair is not associated with any candidate node, + // continue to avoid unnecessary calculation. + if t.topologyPairToPodCounts[pair] == nil { + continue + } + + // indicates how many pods (on current node) match the . + matchSum := int64(0) + for _, existingPod := range nodeInfo.Pods() { + match, err := predicates.PodMatchesSpreadConstraint(existingPod.Labels, constraint) + if err != nil { + errCh.SendErrorWithCancel(err, cancel) + return + } + if match { + matchSum++ + } + } + atomic.AddInt64(t.topologyPairToPodCounts[pair], matchSum) + } + } + workqueue.ParallelizeUntil(ctx, 16, len(allNodeNames), processAllNode) + if err := errCh.ReceiveError(); err != nil { + return nil, err + } + + var minCount int64 = math.MaxInt64 + // sums up the number of matching pods on each qualified topology pair + var total int64 + for _, node := range nodes { + if _, ok := t.nodeNameToPodCounts[node.Name]; !ok { + continue + } + + // For each present , current node gets a credit of . + // And we add to to reverse the final score later. + for _, constraint := range constraints { + if tpVal, ok := node.Labels[constraint.TopologyKey]; ok { + pair := topologyPair{key: constraint.TopologyKey, value: tpVal} + matchSum := *t.topologyPairToPodCounts[pair] + t.nodeNameToPodCounts[node.Name] += matchSum + total += matchSum + } + } + if t.nodeNameToPodCounts[node.Name] < minCount { + minCount = t.nodeNameToPodCounts[node.Name] + } + } + + // calculate final priority score for each node + // TODO(Huang-Wei): in alpha version, we keep the formula as simple as possible. + // current version ranks the nodes properly, but it doesn't take MaxSkew into + // consideration, we may come up with a better formula in the future. + maxMinDiff := total - minCount + for i := range nodes { + node := nodes[i] + result[i].Host = node.Name + + // debugging purpose: print the value for each node + // score must be pointer here, otherwise it's always 0 + if klog.V(10) { + defer func(score *int, nodeName string) { + klog.Infof("%v -> %v: EvenPodsSpreadPriority, Score: (%d)", pod.Name, nodeName, *score) + }(&result[i].Score, node.Name) + } + + if _, ok := t.nodeNameToPodCounts[node.Name]; !ok { + result[i].Score = 0 + continue + } + if maxMinDiff == 0 { + result[i].Score = schedulerapi.MaxPriority + continue + } + fScore := float64(schedulerapi.MaxPriority) * (float64(total-t.nodeNameToPodCounts[node.Name]) / float64(maxMinDiff)) + result[i].Score = int(fScore) + } + + return result, nil +} + +// TODO(Huang-Wei): combine this with getHardTopologySpreadConstraints() in predicates package +func getSoftTopologySpreadConstraints(pod *v1.Pod) (constraints []v1.TopologySpreadConstraint) { + if pod != nil { + for _, constraint := range pod.Spec.TopologySpreadConstraints { + if constraint.WhenUnsatisfiable == v1.ScheduleAnyway { + constraints = append(constraints, constraint) + } + } + } + return +} diff --git a/pkg/scheduler/algorithm/priorities/even_pods_spread_test.go b/pkg/scheduler/algorithm/priorities/even_pods_spread_test.go new file mode 100644 index 00000000000..b7da0dbe69d --- /dev/null +++ b/pkg/scheduler/algorithm/priorities/even_pods_spread_test.go @@ -0,0 +1,537 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package priorities + +import ( + "fmt" + "reflect" + "testing" + + "k8s.io/api/core/v1" + schedulerapi "k8s.io/kubernetes/pkg/scheduler/api" + schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo" + st "k8s.io/kubernetes/pkg/scheduler/testing" +) + +func Test_topologySpreadConstraintsMap_initialize(t *testing.T) { + tests := []struct { + name string + pod *v1.Pod + nodes []*v1.Node + wantNodeNameMap map[string]int64 + wantTopologyPairMap map[topologyPair]*int64 + }{ + { + name: "normal case", + pod: st.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, "zone", softSpread, st.MakeLabelSelector().Exists("foo").Obj()). + SpreadConstraint(1, "node", softSpread, st.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + nodes: []*v1.Node{ + st.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + st.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + st.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + }, + wantNodeNameMap: map[string]int64{ + "node-a": 0, + "node-b": 0, + "node-x": 0, + }, + wantTopologyPairMap: map[topologyPair]*int64{ + {key: "zone", value: "zone1"}: new(int64), + {key: "zone", value: "zone2"}: new(int64), + {key: "node", value: "node-a"}: new(int64), + {key: "node", value: "node-b"}: new(int64), + {key: "node", value: "node-x"}: new(int64), + }, + }, + { + name: "node-x doesn't have label zone", + pod: st.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, "zone", softSpread, st.MakeLabelSelector().Exists("foo").Obj()). + SpreadConstraint(1, "node", softSpread, st.MakeLabelSelector().Exists("bar").Obj()). + Obj(), + nodes: []*v1.Node{ + st.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + st.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + st.MakeNode().Name("node-x").Label("node", "node-x").Obj(), + }, + wantNodeNameMap: map[string]int64{ + "node-a": 0, + "node-b": 0, + }, + wantTopologyPairMap: map[topologyPair]*int64{ + {key: "zone", value: "zone1"}: new(int64), + {key: "node", value: "node-a"}: new(int64), + {key: "node", value: "node-b"}: new(int64), + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tMap := newTopologySpreadConstraintsMap() + tMap.initialize(tt.pod, tt.nodes) + if !reflect.DeepEqual(tMap.nodeNameToPodCounts, tt.wantNodeNameMap) { + t.Errorf("initilize().nodeNameToPodCounts = %#v, want %#v", tMap.nodeNameToPodCounts, tt.wantNodeNameMap) + } + if !reflect.DeepEqual(tMap.topologyPairToPodCounts, tt.wantTopologyPairMap) { + t.Errorf("initilize().topologyPairToPodCounts = %#v, want %#v", tMap.topologyPairToPodCounts, tt.wantTopologyPairMap) + } + }) + } +} + +func TestCalculateEvenPodsSpreadPriority(t *testing.T) { + tests := []struct { + name string + pod *v1.Pod + existingPods []*v1.Pod + nodes []*v1.Node + failedNodes []*v1.Node // nodes + failedNodes = all nodes + want schedulerapi.HostPriorityList + }{ + // Explanation on the Legend: + // a) X/Y means there are X matching pods on node1 and Y on node2, both nodes are candidates + // (i.e. they have passed all predicates) + // b) X/~Y~ means there are X matching pods on node1 and Y on node2, but node Y is NOT a candidate + // c) X/?Y? means there are X matching pods on node1 and Y on node2, both nodes are candidates + // but node2 either i) doesn't have all required topologyKeys present, or ii) doesn't match + // incoming pod's nodeSelector/nodeAffinity + { + // if there is only one candidate node, it should be scored to 10 + name: "one constraint on node, no existing pods", + pod: st.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, "node", softSpread, st.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + nodes: []*v1.Node{ + st.MakeNode().Name("node-a").Label("node", "node-a").Obj(), + st.MakeNode().Name("node-b").Label("node", "node-b").Obj(), + }, + want: []schedulerapi.HostPriority{ + {Host: "node-a", Score: 10}, + {Host: "node-b", Score: 10}, + }, + }, + { + // if there is only one candidate node, it should be scored to 10 + name: "one constraint on node, only one node is candidate", + pod: st.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, "node", softSpread, st.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + existingPods: []*v1.Pod{ + st.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + st.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Obj(), + st.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + }, + nodes: []*v1.Node{ + st.MakeNode().Name("node-a").Label("node", "node-a").Obj(), + }, + failedNodes: []*v1.Node{ + st.MakeNode().Name("node-b").Label("node", "node-b").Obj(), + }, + want: []schedulerapi.HostPriority{ + {Host: "node-a", Score: 10}, + }, + }, + { + name: "one constraint on node, all nodes have the same number of matching pods", + pod: st.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, "node", softSpread, st.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + existingPods: []*v1.Pod{ + st.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + st.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + }, + nodes: []*v1.Node{ + st.MakeNode().Name("node-a").Label("node", "node-a").Obj(), + st.MakeNode().Name("node-b").Label("node", "node-b").Obj(), + }, + want: []schedulerapi.HostPriority{ + {Host: "node-a", Score: 10}, + {Host: "node-b", Score: 10}, + }, + }, + { + // matching pods spread as 2/1/0/3, total = 6 + // after reversing, it's 4/5/6/3 + // so scores = 40/6, 50/6, 60/6, 30/6 + name: "one constraint on node, all 4 nodes are candidates", + pod: st.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, "node", softSpread, st.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + existingPods: []*v1.Pod{ + st.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + st.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Obj(), + st.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + st.MakePod().Name("p-d1").Node("node-d").Label("foo", "").Obj(), + st.MakePod().Name("p-d2").Node("node-d").Label("foo", "").Obj(), + st.MakePod().Name("p-d3").Node("node-d").Label("foo", "").Obj(), + }, + nodes: []*v1.Node{ + st.MakeNode().Name("node-a").Label("node", "node-a").Obj(), + st.MakeNode().Name("node-b").Label("node", "node-b").Obj(), + st.MakeNode().Name("node-c").Label("node", "node-c").Obj(), + st.MakeNode().Name("node-d").Label("node", "node-d").Obj(), + }, + failedNodes: []*v1.Node{}, + want: []schedulerapi.HostPriority{ + {Host: "node-a", Score: 6}, + {Host: "node-b", Score: 8}, + {Host: "node-c", Score: 10}, + {Host: "node-d", Score: 5}, + }, + }, + { + // matching pods spread as 4/2/1/~3~, total = 4+2+1 = 7 (as node4 is not a candidate) + // after reversing, it's 3/5/6 + // so scores = 30/6, 50/6, 60/6 + name: "one constraint on node, 3 out of 4 nodes are candidates", + pod: st.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, "node", softSpread, st.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + existingPods: []*v1.Pod{ + st.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + st.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Obj(), + st.MakePod().Name("p-a3").Node("node-a").Label("foo", "").Obj(), + st.MakePod().Name("p-a4").Node("node-a").Label("foo", "").Obj(), + st.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + st.MakePod().Name("p-b2").Node("node-b").Label("foo", "").Obj(), + st.MakePod().Name("p-x1").Node("node-x").Label("foo", "").Obj(), + st.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Obj(), + st.MakePod().Name("p-y2").Node("node-y").Label("foo", "").Obj(), + st.MakePod().Name("p-y3").Node("node-y").Label("foo", "").Obj(), + }, + nodes: []*v1.Node{ + st.MakeNode().Name("node-a").Label("node", "node-a").Obj(), + st.MakeNode().Name("node-b").Label("node", "node-b").Obj(), + st.MakeNode().Name("node-x").Label("node", "node-x").Obj(), + }, + failedNodes: []*v1.Node{ + st.MakeNode().Name("node-y").Label("node", "node-y").Obj(), + }, + want: []schedulerapi.HostPriority{ + {Host: "node-a", Score: 5}, + {Host: "node-b", Score: 8}, + {Host: "node-x", Score: 10}, + }, + }, + { + // matching pods spread as 4/?2?/1/~3~, total = 4+?+1 = 5 (as node2 is problematic) + // after reversing, it's 1/?/4 + // so scores = 10/4, 0, 40/4 + name: "one constraint on node, 3 out of 4 nodes are candidates", + pod: st.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, "node", softSpread, st.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + existingPods: []*v1.Pod{ + st.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + st.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Obj(), + st.MakePod().Name("p-a3").Node("node-a").Label("foo", "").Obj(), + st.MakePod().Name("p-a4").Node("node-a").Label("foo", "").Obj(), + st.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + st.MakePod().Name("p-b2").Node("node-b").Label("foo", "").Obj(), + st.MakePod().Name("p-x1").Node("node-x").Label("foo", "").Obj(), + st.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Obj(), + st.MakePod().Name("p-y2").Node("node-y").Label("foo", "").Obj(), + st.MakePod().Name("p-y3").Node("node-y").Label("foo", "").Obj(), + }, + nodes: []*v1.Node{ + st.MakeNode().Name("node-a").Label("node", "node-a").Obj(), + st.MakeNode().Name("node-b").Label("n", "node-b").Obj(), // label `n` doesn't match topologyKey + st.MakeNode().Name("node-x").Label("node", "node-x").Obj(), + }, + failedNodes: []*v1.Node{ + st.MakeNode().Name("node-y").Label("node", "node-y").Obj(), + }, + want: []schedulerapi.HostPriority{ + {Host: "node-a", Score: 2}, + {Host: "node-b", Score: 0}, + {Host: "node-x", Score: 10}, + }, + }, + { + // matching pods spread as 4/2/1/~3~, total = 6+6+4 = 16 (as topologyKey is zone instead of node) + // after reversing, it's 10/10/12 + // so scores = 100/12, 100/12, 120/12 + name: "one constraint on zone, 3 out of 4 nodes are candidates", + pod: st.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, "zone", softSpread, st.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + existingPods: []*v1.Pod{ + st.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + st.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Obj(), + st.MakePod().Name("p-a3").Node("node-a").Label("foo", "").Obj(), + st.MakePod().Name("p-a4").Node("node-a").Label("foo", "").Obj(), + st.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + st.MakePod().Name("p-b2").Node("node-b").Label("foo", "").Obj(), + st.MakePod().Name("p-x1").Node("node-x").Label("foo", "").Obj(), + st.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Obj(), + st.MakePod().Name("p-y2").Node("node-y").Label("foo", "").Obj(), + st.MakePod().Name("p-y3").Node("node-y").Label("foo", "").Obj(), + }, + nodes: []*v1.Node{ + st.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + st.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + st.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + }, + failedNodes: []*v1.Node{ + st.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + want: []schedulerapi.HostPriority{ + {Host: "node-a", Score: 8}, + {Host: "node-b", Score: 8}, + {Host: "node-x", Score: 10}, + }, + }, + { + // matching pods spread as 2/~1~/2/~4~, total = 2+3 + 2+6 = 13 (zone and node should be both summed up) + // after reversing, it's 8/5 + // so scores = 80/8, 50/8 + name: "two constraints on zone and node, 2 out of 4 nodes are candidates", + pod: st.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, "zone", softSpread, st.MakeLabelSelector().Exists("foo").Obj()). + SpreadConstraint(1, "node", softSpread, st.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + existingPods: []*v1.Pod{ + st.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + st.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Obj(), + st.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(), + st.MakePod().Name("p-x1").Node("node-x").Label("foo", "").Obj(), + st.MakePod().Name("p-x2").Node("node-x").Label("foo", "").Obj(), + st.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Obj(), + st.MakePod().Name("p-y2").Node("node-y").Label("foo", "").Obj(), + st.MakePod().Name("p-y3").Node("node-y").Label("foo", "").Obj(), + st.MakePod().Name("p-y4").Node("node-y").Label("foo", "").Obj(), + }, + nodes: []*v1.Node{ + st.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + st.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + }, + failedNodes: []*v1.Node{ + st.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + st.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + want: []schedulerapi.HostPriority{ + {Host: "node-a", Score: 10}, + {Host: "node-x", Score: 6}, + }, + }, + { + // If constraints hold different labelSelectors, it's a little complex. + // +----------------------+------------------------+ + // | zone1 | zone2 | + // +----------------------+------------------------+ + // | node-a | node-b | node-x | node-y | + // +--------+-------------+--------+---------------+ + // | P{foo} | P{foo, bar} | | P{foo} P{bar} | + // +--------+-------------+--------+---------------+ + // For the first constraint (zone): the matching pods spread as 2/2/1/1 + // For the second constraint (node): the matching pods spread as 0/1/0/1 + // sum them up gets: 2/3/1/2, and total number is 8. + // after reversing, it's 6/5/7/6 + // so scores = 60/7, 50/7, 70/7, 60/7 + name: "two constraints on zone and node, with different labelSelectors", + pod: st.MakePod().Name("p").Label("foo", "").Label("bar", ""). + SpreadConstraint(1, "zone", softSpread, st.MakeLabelSelector().Exists("foo").Obj()). + SpreadConstraint(1, "node", softSpread, st.MakeLabelSelector().Exists("bar").Obj()). + Obj(), + existingPods: []*v1.Pod{ + st.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + st.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Label("bar", "").Obj(), + st.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Obj(), + st.MakePod().Name("p-y2").Node("node-y").Label("bar", "").Obj(), + }, + nodes: []*v1.Node{ + st.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + st.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + st.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + st.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + failedNodes: []*v1.Node{}, + want: []schedulerapi.HostPriority{ + {Host: "node-a", Score: 8}, + {Host: "node-b", Score: 7}, + {Host: "node-x", Score: 10}, + {Host: "node-y", Score: 8}, + }, + }, + { + // For the first constraint (zone): the matching pods spread as 0/0/2/2 + // For the second constraint (node): the matching pods spread as 0/1/0/1 + // sum them up gets: 0/1/2/3, and total number is 6. + // after reversing, it's 6/5/4/3. + // so scores = 60/6, 50/6, 40/6, 30/6 + name: "two constraints on zone and node, with different labelSelectors, some nodes have 0 pods", + pod: st.MakePod().Name("p").Label("foo", "").Label("bar", ""). + SpreadConstraint(1, "zone", softSpread, st.MakeLabelSelector().Exists("foo").Obj()). + SpreadConstraint(1, "node", softSpread, st.MakeLabelSelector().Exists("bar").Obj()). + Obj(), + existingPods: []*v1.Pod{ + st.MakePod().Name("p-b1").Node("node-b").Label("bar", "").Obj(), + st.MakePod().Name("p-x1").Node("node-x").Label("foo", "").Obj(), + st.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Label("bar", "").Obj(), + }, + nodes: []*v1.Node{ + st.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + st.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + st.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + st.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + failedNodes: []*v1.Node{}, + want: []schedulerapi.HostPriority{ + {Host: "node-a", Score: 10}, + {Host: "node-b", Score: 8}, + {Host: "node-x", Score: 6}, + {Host: "node-y", Score: 5}, + }, + }, + { + // For the first constraint (zone): the matching pods spread as 2/2/1/~1~ + // For the second constraint (node): the matching pods spread as 0/1/0/~1~ + // sum them up gets: 2/3/1, and total number is 6. + // after reversing, it's 4/3/5 + // so scores = 40/5, 30/5, 50/5 + name: "two constraints on zone and node, with different labelSelectors, 3 out of 4 nodes are candidates", + pod: st.MakePod().Name("p").Label("foo", "").Label("bar", ""). + SpreadConstraint(1, "zone", softSpread, st.MakeLabelSelector().Exists("foo").Obj()). + SpreadConstraint(1, "node", softSpread, st.MakeLabelSelector().Exists("bar").Obj()). + Obj(), + existingPods: []*v1.Pod{ + st.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(), + st.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Label("bar", "").Obj(), + st.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Obj(), + st.MakePod().Name("p-y2").Node("node-y").Label("bar", "").Obj(), + }, + nodes: []*v1.Node{ + st.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(), + st.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(), + st.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(), + }, + failedNodes: []*v1.Node{ + st.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(), + }, + want: []schedulerapi.HostPriority{ + {Host: "node-a", Score: 8}, + {Host: "node-b", Score: 6}, + {Host: "node-x", Score: 10}, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + allNodes := append([]*v1.Node{}, tt.nodes...) + allNodes = append(allNodes, tt.failedNodes...) + nodeNameToInfo := schedulernodeinfo.CreateNodeNameToInfoMap(tt.existingPods, allNodes) + + got, _ := CalculateEvenPodsSpreadPriority(tt.pod, nodeNameToInfo, tt.nodes) + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("CalculateEvenPodsSpreadPriority() = %#v, want %#v", got, tt.want) + } + }) + } +} + +func makeNodesAndPods(pod *v1.Pod, existingPodsNum, allNodesNum, filteredNodesNum int) (existingPods []*v1.Pod, allNodes []*v1.Node, filteredNodes []*v1.Node) { + var topologyKeys []string + var labels []string + // regions := 3 + zones := 10 + for _, c := range pod.Spec.TopologySpreadConstraints { + topologyKeys = append(topologyKeys, c.TopologyKey) + labels = append(labels, c.LabelSelector.MatchExpressions[0].Key) + } + // build nodes + for i := 0; i < allNodesNum; i++ { + nodeWrapper := st.MakeNode().Name(fmt.Sprintf("node%d", i)) + for _, tpKey := range topologyKeys { + if tpKey == "zone" { + nodeWrapper = nodeWrapper.Label("zone", fmt.Sprintf("zone%d", i%zones)) + } else if tpKey == "node" { + nodeWrapper = nodeWrapper.Label("node", fmt.Sprintf("node%d", i)) + } + } + node := nodeWrapper.Obj() + allNodes = append(allNodes, node) + if len(filteredNodes) < filteredNodesNum { + filteredNodes = append(filteredNodes, node) + } + } + // build pods + for i := 0; i < existingPodsNum; i++ { + podWrapper := st.MakePod().Name(fmt.Sprintf("pod%d", i)).Node(fmt.Sprintf("node%d", i%allNodesNum)) + // apply labels[0], labels[0,1], ..., labels[all] to each pod in turn + for _, label := range labels[:i%len(labels)+1] { + podWrapper = podWrapper.Label(label, "") + } + existingPods = append(existingPods, podWrapper.Obj()) + } + return +} + +func BenchmarkTestCalculateEvenPodsSpreadPriority(b *testing.B) { + tests := []struct { + name string + pod *v1.Pod + existingPodsNum int + allNodesNum int + filteredNodesNum int + }{ + { + name: "1000nodes/single-constraint-zone", + pod: st.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, "zone", softSpread, st.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + existingPodsNum: 10000, + allNodesNum: 1000, + filteredNodesNum: 500, + }, + { + name: "1000nodes/single-constraint-node", + pod: st.MakePod().Name("p").Label("foo", ""). + SpreadConstraint(1, "node", softSpread, st.MakeLabelSelector().Exists("foo").Obj()). + Obj(), + existingPodsNum: 10000, + allNodesNum: 1000, + filteredNodesNum: 500, + }, + { + name: "1000nodes/two-constraints-zone-node", + pod: st.MakePod().Name("p").Label("foo", "").Label("bar", ""). + SpreadConstraint(1, "zone", softSpread, st.MakeLabelSelector().Exists("foo").Obj()). + SpreadConstraint(1, "node", softSpread, st.MakeLabelSelector().Exists("bar").Obj()). + Obj(), + existingPodsNum: 10000, + allNodesNum: 1000, + filteredNodesNum: 500, + }, + } + for _, tt := range tests { + b.Run(tt.name, func(b *testing.B) { + existingPods, allNodes, filteredNodes := makeNodesAndPods(tt.pod, tt.existingPodsNum, tt.allNodesNum, tt.filteredNodesNum) + nodeNameToInfo := schedulernodeinfo.CreateNodeNameToInfoMap(existingPods, allNodes) + b.ResetTimer() + for i := 0; i < b.N; i++ { + CalculateEvenPodsSpreadPriority(tt.pod, nodeNameToInfo, filteredNodes) + } + }) + } +} + +var ( + hardSpread = v1.DoNotSchedule + softSpread = v1.ScheduleAnyway +) diff --git a/pkg/scheduler/algorithm/priorities/priorities.go b/pkg/scheduler/algorithm/priorities/priorities.go index 605fdd143e3..5c2d76b94de 100644 --- a/pkg/scheduler/algorithm/priorities/priorities.go +++ b/pkg/scheduler/algorithm/priorities/priorities.go @@ -51,4 +51,7 @@ const ( ImageLocalityPriority = "ImageLocalityPriority" // ResourceLimitsPriority defines the nodes of prioritizer function ResourceLimitsPriority. ResourceLimitsPriority = "ResourceLimitsPriority" + // EvenPodsSpreadPriority defines the name of prioritizer function that prioritizes nodes + // which have pods and labels matching the incoming pod's topologySpreadConstraints. + EvenPodsSpreadPriority = "EvenPodsSpreadPriority" ) diff --git a/pkg/scheduler/algorithmprovider/defaults/defaults.go b/pkg/scheduler/algorithmprovider/defaults/defaults.go index a77ee69e069..6fd14d4513f 100644 --- a/pkg/scheduler/algorithmprovider/defaults/defaults.go +++ b/pkg/scheduler/algorithmprovider/defaults/defaults.go @@ -87,10 +87,15 @@ func ApplyFeatureGates() { klog.Infof("TaintNodesByCondition is enabled, PodToleratesNodeTaints predicate is mandatory") } - // Only register EvenPodsSpreadPredicate if the feature is enabled + // Only register EvenPodsSpread predicate & priority if the feature is enabled if utilfeature.DefaultFeatureGate.Enabled(features.EvenPodsSpread) { + klog.Infof("Registering EvenPodsSpread predicate and priority function") + // register predicate factory.InsertPredicateKeyToAlgorithmProviderMap(predicates.EvenPodsSpreadPred) factory.RegisterFitPredicate(predicates.EvenPodsSpreadPred, predicates.EvenPodsSpreadPredicate) + // register priority + factory.InsertPriorityKeyToAlgorithmProviderMap(priorities.EvenPodsSpreadPriority) + factory.RegisterPriorityFunction(priorities.EvenPodsSpreadPriority, priorities.CalculateEvenPodsSpreadPriority, 1) } // Prioritizes nodes that satisfy pod's resource limits