EvenPodsSpread: weigh constraints individually

- update logic to weigh each constraint individually
- address comments and misc fixes
This commit is contained in:
Wei Huang 2019-07-24 23:39:21 -07:00
parent 3638fd5353
commit 0bff4c27d6
No known key found for this signature in database
GPG Key ID: BE5E9752F8B6E005
5 changed files with 166 additions and 84 deletions

View File

@ -245,7 +245,7 @@ func getTPMapMatchingSpreadConstraints(pod *v1.Pod, nodeInfoMap map[string]*sche
if existingPod.Namespace != pod.Namespace { if existingPod.Namespace != pod.Namespace {
continue continue
} }
ok, err := podMatchesSpreadConstraint(existingPod.Labels, constraint) ok, err := PodMatchesSpreadConstraint(existingPod.Labels, constraint)
if err != nil { if err != nil {
errCh.SendErrorWithCancel(err, cancel) errCh.SendErrorWithCancel(err, cancel)
return return
@ -304,10 +304,11 @@ func getHardTopologySpreadConstraints(pod *v1.Pod) (constraints []v1.TopologySpr
return return
} }
// some corner cases: // PodMatchesSpreadConstraint verifies if <constraint.LabelSelector> matches <podLabelSet>.
// Some corner cases:
// 1. podLabelSet = nil => returns (false, nil) // 1. podLabelSet = nil => returns (false, nil)
// 2. constraint.LabelSelector = nil => returns (false, nil) // 2. constraint.LabelSelector = nil => returns (false, nil)
func podMatchesSpreadConstraint(podLabelSet labels.Set, constraint v1.TopologySpreadConstraint) (bool, error) { func PodMatchesSpreadConstraint(podLabelSet labels.Set, constraint v1.TopologySpreadConstraint) (bool, error) {
selector, err := metav1.LabelSelectorAsSelector(constraint.LabelSelector) selector, err := metav1.LabelSelectorAsSelector(constraint.LabelSelector)
if err != nil { if err != nil {
return false, err return false, err
@ -318,7 +319,7 @@ func podMatchesSpreadConstraint(podLabelSet labels.Set, constraint v1.TopologySp
return true, nil return true, nil
} }
// check if ALL topology keys in spread constraints are present in node labels // NodeLabelsMatchSpreadConstraints checks if ALL topology keys in spread constraints are present in node labels.
func NodeLabelsMatchSpreadConstraints(nodeLabels map[string]string, constraints []v1.TopologySpreadConstraint) bool { func NodeLabelsMatchSpreadConstraints(nodeLabels map[string]string, constraints []v1.TopologySpreadConstraint) bool {
for _, constraint := range constraints { for _, constraint := range constraints {
if _, ok := nodeLabels[constraint.TopologyKey]; !ok { if _, ok := nodeLabels[constraint.TopologyKey]; !ok {
@ -396,7 +397,7 @@ func (m *topologyPairsPodSpreadMap) addPod(addedPod, preemptorPod *v1.Pod, node
minMatchNeedingUpdate := make(map[string]struct{}) minMatchNeedingUpdate := make(map[string]struct{})
podLabelSet := labels.Set(addedPod.Labels) podLabelSet := labels.Set(addedPod.Labels)
for _, constraint := range constraints { for _, constraint := range constraints {
if match, err := podMatchesSpreadConstraint(podLabelSet, constraint); err != nil { if match, err := PodMatchesSpreadConstraint(podLabelSet, constraint); err != nil {
return err return err
} else if !match { } else if !match {
continue continue

View File

@ -904,12 +904,12 @@ func TestPodMatchesSpreadConstraint(t *testing.T) {
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
podLabelSet := labels.Set(tt.podLabels) podLabelSet := labels.Set(tt.podLabels)
got, err := podMatchesSpreadConstraint(podLabelSet, tt.constraint) got, err := PodMatchesSpreadConstraint(podLabelSet, tt.constraint)
if (err != nil) != tt.wantErr { if (err != nil) != tt.wantErr {
t.Errorf("podMatchesSpreadConstraint() error = %v, wantErr %v", err, tt.wantErr) t.Errorf("PodMatchesSpreadConstraint() error = %v, wantErr %v", err, tt.wantErr)
} }
if got != tt.want { if got != tt.want {
t.Errorf("podMatchesSpreadConstraint() = %v, want %v", got, tt.want) t.Errorf("PodMatchesSpreadConstraint() = %v, want %v", got, tt.want)
} }
}) })
} }

View File

@ -1748,7 +1748,7 @@ func EvenPodsSpreadPredicate(pod *v1.Pod, meta PredicateMetadata, nodeInfo *sche
return false, []PredicateFailureReason{ErrTopologySpreadConstraintsNotMatch}, nil return false, []PredicateFailureReason{ErrTopologySpreadConstraintsNotMatch}, nil
} }
selfMatch, err := podMatchesSpreadConstraint(podLabelSet, constraint) selfMatch, err := PodMatchesSpreadConstraint(podLabelSet, constraint)
if err != nil { if err != nil {
return false, nil, err return false, nil, err
} }

View File

@ -18,7 +18,6 @@ package priorities
import ( import (
"context" "context"
"sync"
"sync/atomic" "sync/atomic"
"k8s.io/api/core/v1" "k8s.io/api/core/v1"
@ -27,6 +26,7 @@ import (
"k8s.io/kubernetes/pkg/scheduler/algorithm/predicates" "k8s.io/kubernetes/pkg/scheduler/algorithm/predicates"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api" schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo" schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
schedutil "k8s.io/kubernetes/pkg/scheduler/util"
"k8s.io/klog" "k8s.io/klog"
) )
@ -36,14 +36,9 @@ type topologyPair struct {
value string value string
} }
type topologySpreadConstrantsMap struct { type topologySpreadConstraintsMap struct {
// The first error that we faced. // podCounts is keyed with node name, and valued with the number of matching pods.
firstError error podCounts map[string]*int64
sync.Mutex
// counts store the mapping from node name to so-far computed score of
// the node.
counts map[string]*int64
// total number of matching pods on each qualified <topologyKey:value> pair // total number of matching pods on each qualified <topologyKey:value> pair
total int64 total int64
// topologyPairToNodeNames store the mapping from potential <topologyKey:value> // topologyPairToNodeNames store the mapping from potential <topologyKey:value>
@ -51,64 +46,57 @@ type topologySpreadConstrantsMap struct {
topologyPairToNodeNames map[topologyPair][]string topologyPairToNodeNames map[topologyPair][]string
} }
func newTopologySpreadConstrantsMap(len int) *topologySpreadConstrantsMap { func newTopologySpreadConstraintsMap(len int) *topologySpreadConstraintsMap {
return &topologySpreadConstrantsMap{ return &topologySpreadConstraintsMap{
counts: make(map[string]*int64, len), podCounts: make(map[string]*int64, len),
topologyPairToNodeNames: make(map[topologyPair][]string), topologyPairToNodeNames: make(map[topologyPair][]string),
} }
} }
func (t *topologySpreadConstrantsMap) setError(err error) { func (t *topologySpreadConstraintsMap) initialize(pod *v1.Pod, nodes []*v1.Node) {
t.Lock()
if t.firstError == nil {
t.firstError = err
}
t.Unlock()
}
func (t *topologySpreadConstrantsMap) initialize(pod *v1.Pod, nodes []*v1.Node) {
constraints := getSoftTopologySpreadConstraints(pod) constraints := getSoftTopologySpreadConstraints(pod)
for _, node := range nodes { for _, node := range nodes {
labelSet := labels.Set(node.Labels) match := true
allMatch := true
var pairs []topologyPair var pairs []topologyPair
for _, constraint := range constraints { for _, constraint := range constraints {
tpKey := constraint.TopologyKey tpKey := constraint.TopologyKey
if !labelSet.Has(tpKey) { if _, ok := node.Labels[tpKey]; !ok {
allMatch = false // Current node isn't qualified for the soft constraints,
// so break here and the node will hold default value (nil).
match = false
break break
} }
pairs = append(pairs, topologyPair{key: tpKey, value: node.Labels[tpKey]}) pairs = append(pairs, topologyPair{key: tpKey, value: node.Labels[tpKey]})
} }
if allMatch { if match {
for _, pair := range pairs { for _, pair := range pairs {
t.topologyPairToNodeNames[pair] = append(t.topologyPairToNodeNames[pair], node.Name) t.topologyPairToNodeNames[pair] = append(t.topologyPairToNodeNames[pair], node.Name)
} }
t.counts[node.Name] = new(int64) t.podCounts[node.Name] = new(int64)
} }
// for those nodes which don't have all required topologyKeys present, it's intentional to // For those nodes which don't have all required topologyKeys present, it's intentional to
// leave counts[nodeName] as nil, so that we're able to score these nodes to 0 afterwards // leave podCounts[nodeName] as nil, so that we're able to score these nodes to 0 afterwards.
} }
} }
// CalculateEvenPodsSpreadPriority computes a score by checking through the topologySpreadConstraints // CalculateEvenPodsSpreadPriority computes a score by checking through the topologySpreadConstraints
// that are with WhenUnsatifiable=ScheduleAnyway (a.k.a soft constraint). // that are with WhenUnsatisfiable=ScheduleAnyway (a.k.a soft constraint).
// For each node (not only "filtered" nodes by Predicates), it adds the number of matching pods // The function works as below:
// (all topologySpreadConstraints must be satified) as a "weight" to any "filtered" node // 1) In all nodes, calculate the number of pods which match <pod>'s soft topology spread constraints.
// which has the <topologyKey:value> pair present. // 2) Sum up the number to each node in <nodes> which has corresponding topologyPair present.
// Then the sumed "weight" are normalized to 0~10, and the node(s) with the highest score are // 3) Finally normalize the number to 0~10. The node with the highest score is the most preferred.
// the most preferred. // Note: Symmetry is not applicable. We only weigh how incomingPod matches existingPod.
// Symmetry is not considered. // Whether existingPod matches incomingPod doesn't contribute to the final score.
// This is different with the Affinity API.
func CalculateEvenPodsSpreadPriority(pod *v1.Pod, nodeNameToInfo map[string]*schedulernodeinfo.NodeInfo, nodes []*v1.Node) (schedulerapi.HostPriorityList, error) { func CalculateEvenPodsSpreadPriority(pod *v1.Pod, nodeNameToInfo map[string]*schedulernodeinfo.NodeInfo, nodes []*v1.Node) (schedulerapi.HostPriorityList, error) {
nodesLen := len(nodes) result := make(schedulerapi.HostPriorityList, len(nodes))
result := make(schedulerapi.HostPriorityList, nodesLen) // return if incoming pod doesn't have soft topology spread constraints.
// if incoming pod doesn't have soft topology spread constraints, return
constraints := getSoftTopologySpreadConstraints(pod) constraints := getSoftTopologySpreadConstraints(pod)
if len(constraints) == 0 { if len(constraints) == 0 {
return result, nil return result, nil
} }
t := newTopologySpreadConstrantsMap(len(nodes)) t := newTopologySpreadConstraintsMap(len(nodes))
t.initialize(pod, nodes) t.initialize(pod, nodes)
allNodeNames := make([]string, 0, len(nodeNameToInfo)) allNodeNames := make([]string, 0, len(nodeNameToInfo))
@ -116,58 +104,66 @@ func CalculateEvenPodsSpreadPriority(pod *v1.Pod, nodeNameToInfo map[string]*sch
allNodeNames = append(allNodeNames, name) allNodeNames = append(allNodeNames, name)
} }
errCh := schedutil.NewErrorChannel()
ctx, cancel := context.WithCancel(context.Background()) ctx, cancel := context.WithCancel(context.Background())
processNode := func(i int) { processNode := func(i int) {
nodeInfo := nodeNameToInfo[allNodeNames[i]] nodeInfo := nodeNameToInfo[allNodeNames[i]]
if node := nodeInfo.Node(); node != nil { node := nodeInfo.Node()
// (1) `node` should satisfy incoming pod's NodeSelector/NodeAffinity if node == nil {
// (2) All topologyKeys need to be present in `node` return
if !predicates.PodMatchesNodeSelectorAndAffinityTerms(pod, node) || }
!predicates.NodeLabelsMatchSpreadConstraints(node.Labels, constraints) { // (1) `node` should satisfy incoming pod's NodeSelector/NodeAffinity
return // (2) All topologyKeys need to be present in `node`
} if !predicates.PodMatchesNodeSelectorAndAffinityTerms(pod, node) ||
matchCount := 0 !predicates.NodeLabelsMatchSpreadConstraints(node.Labels, constraints) {
for _, existingPod := range nodeInfo.Pods() { return
match, err := predicates.PodMatchesAllSpreadConstraints(existingPod, pod.Namespace, constraints) }
// It's enough to use topologyKey as the "key" of the map.
matchCount := make(map[string]int64)
for _, existingPod := range nodeInfo.Pods() {
podLabelSet := labels.Set(existingPod.Labels)
// Matching on constraints is calculated independently.
for _, constraint := range constraints {
match, err := predicates.PodMatchesSpreadConstraint(podLabelSet, constraint)
if err != nil { if err != nil {
t.setError(err) errCh.SendErrorWithCancel(err, cancel)
cancel()
return return
} }
if match { if match {
matchCount++ matchCount[constraint.TopologyKey]++
} }
} }
// add matchCount up to EACH node which is at least in one topology domain }
// with current node // Keys in t.podCounts have been ensured to contain "filtered" nodes only.
for _, constraint := range constraints { for _, constraint := range constraints {
tpKey := constraint.TopologyKey tpKey := constraint.TopologyKey
pair := topologyPair{key: tpKey, value: node.Labels[tpKey]} pair := topologyPair{key: tpKey, value: node.Labels[tpKey]}
for _, nodeName := range t.topologyPairToNodeNames[pair] { // For each <pair>, all matched nodes get the credit of summed matchCount.
atomic.AddInt64(t.counts[nodeName], int64(matchCount)) // And we add matchCount to <t.total> to reverse the final score later.
atomic.AddInt64(&t.total, int64(matchCount)) for _, nodeName := range t.topologyPairToNodeNames[pair] {
} atomic.AddInt64(t.podCounts[nodeName], matchCount[tpKey])
atomic.AddInt64(&t.total, matchCount[tpKey])
} }
} }
} }
workqueue.ParallelizeUntil(ctx, 16, len(allNodeNames), processNode) workqueue.ParallelizeUntil(ctx, 16, len(allNodeNames), processNode)
if t.firstError != nil { if err := errCh.ReceiveError(); err != nil {
return nil, t.firstError return nil, err
} }
var maxCount, minCount int64 var maxCount, minCount int64
for _, node := range nodes { for _, node := range nodes {
if t.counts[node.Name] == nil { if t.podCounts[node.Name] == nil {
continue continue
} }
// reverse // reverse
count := t.total - *t.counts[node.Name] count := t.total - *t.podCounts[node.Name]
if count > maxCount { if count > maxCount {
maxCount = count maxCount = count
} else if count < minCount { } else if count < minCount {
minCount = count minCount = count
} }
t.counts[node.Name] = &count t.podCounts[node.Name] = &count
} }
// calculate final priority score for each node // calculate final priority score for each node
// TODO(Huang-Wei): in alpha version, we keep the formula as simple as possible. // TODO(Huang-Wei): in alpha version, we keep the formula as simple as possible.
@ -186,7 +182,7 @@ func CalculateEvenPodsSpreadPriority(pod *v1.Pod, nodeNameToInfo map[string]*sch
}(&result[i].Score, node.Name) }(&result[i].Score, node.Name)
} }
if t.counts[node.Name] == nil { if t.podCounts[node.Name] == nil {
result[i].Score = 0 result[i].Score = 0
continue continue
} }
@ -194,9 +190,7 @@ func CalculateEvenPodsSpreadPriority(pod *v1.Pod, nodeNameToInfo map[string]*sch
result[i].Score = schedulerapi.MaxPriority result[i].Score = schedulerapi.MaxPriority
continue continue
} }
fScore := float64(schedulerapi.MaxPriority) * (float64(*t.counts[node.Name]-minCount) / float64(maxMinDiff)) fScore := float64(schedulerapi.MaxPriority) * (float64(*t.podCounts[node.Name]-minCount) / float64(maxMinDiff))
// need to reverse b/c the more matching pods it has, the less qualified it is
// result[i].Score = schedulerapi.MaxPriority - int(fScore)
result[i].Score = int(fScore) result[i].Score = int(fScore)
} }

View File

@ -26,7 +26,7 @@ import (
st "k8s.io/kubernetes/pkg/scheduler/testing" st "k8s.io/kubernetes/pkg/scheduler/testing"
) )
func Test_topologySpreadConstrantsMap_initialize(t *testing.T) { func Test_topologySpreadConstraintsMap_initialize(t *testing.T) {
tests := []struct { tests := []struct {
name string name string
pod *v1.Pod pod *v1.Pod
@ -52,10 +52,27 @@ func Test_topologySpreadConstrantsMap_initialize(t *testing.T) {
{key: "node", value: "node-x"}: {"node-x"}, {key: "node", value: "node-x"}: {"node-x"},
}, },
}, },
{
name: "node-x doesn't have label zone",
pod: st.MakePod().Name("p").Label("foo", "").
SpreadConstraint(1, "zone", softSpread, st.MakeLabelSelector().Exists("foo").Obj()).
SpreadConstraint(1, "node", softSpread, st.MakeLabelSelector().Exists("bar").Obj()).
Obj(),
nodes: []*v1.Node{
st.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(),
st.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(),
st.MakeNode().Name("node-x").Label("node", "node-x").Obj(),
},
want: map[topologyPair][]string{
{key: "zone", value: "zone1"}: {"node-a", "node-b"},
{key: "node", value: "node-a"}: {"node-a"},
{key: "node", value: "node-b"}: {"node-b"},
},
},
} }
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
tMap := newTopologySpreadConstrantsMap(len(tt.nodes)) tMap := newTopologySpreadConstraintsMap(len(tt.nodes))
tMap.initialize(tt.pod, tt.nodes) tMap.initialize(tt.pod, tt.nodes)
if !reflect.DeepEqual(tMap.topologyPairToNodeNames, tt.want) { if !reflect.DeepEqual(tMap.topologyPairToNodeNames, tt.want) {
t.Errorf("initilize().topologyPairToNodeNames = %#v, want %#v", tMap.topologyPairToNodeNames, tt.want) t.Errorf("initilize().topologyPairToNodeNames = %#v, want %#v", tMap.topologyPairToNodeNames, tt.want)
@ -249,10 +266,10 @@ func TestCalculateEvenPodsSpreadPriority(t *testing.T) {
}, },
}, },
{ {
// matching pods spread as 2/~1~/2/~4~, total = 2+3 + 2+6 = 13 (zone and node should be both sumed up) // matching pods spread as 2/~1~/2/~4~, total = 2+3 + 2+6 = 13 (zone and node should be both summed up)
// after reversing, it's 8/5 // after reversing, it's 8/5
// so scores = 80/8, 50/8 // so scores = 80/8, 50/8
name: "two constraint on zone and node, 2 out of 4 nodes are candidates", name: "two constraints on zone and node, 2 out of 4 nodes are candidates",
pod: st.MakePod().Name("p").Label("foo", ""). pod: st.MakePod().Name("p").Label("foo", "").
SpreadConstraint(1, "zone", softSpread, st.MakeLabelSelector().Exists("foo").Obj()). SpreadConstraint(1, "zone", softSpread, st.MakeLabelSelector().Exists("foo").Obj()).
SpreadConstraint(1, "node", softSpread, st.MakeLabelSelector().Exists("foo").Obj()). SpreadConstraint(1, "node", softSpread, st.MakeLabelSelector().Exists("foo").Obj()).
@ -281,6 +298,76 @@ func TestCalculateEvenPodsSpreadPriority(t *testing.T) {
{Host: "node-x", Score: 6}, {Host: "node-x", Score: 6},
}, },
}, },
{
// If constraints hold different labelSelectors, it's a little complex.
// +----------------------+------------------------+
// | zone1 | zone2 |
// +----------------------+------------------------+
// | node-a | node-b | node-x | node-y |
// +--------+-------------+--------+---------------+
// | P{foo} | P{foo, bar} | | P{foo} P{bar} |
// +--------+-------------+--------+---------------+
// For the first constraint (zone): the matching pods spread as 2/2/1/1
// For the second constraint (node): the matching pods spread as 0/1/0/1
// sum them up gets: 2/3/1/2, and total number is 8.
// after reversing, it's 6/5/7/6
// so scores = 60/7, 50/7, 70/7, 60/7
name: "two constraints on zone and node, with different labelSelectors",
pod: st.MakePod().Name("p").Label("foo", "").Label("bar", "").
SpreadConstraint(1, "zone", softSpread, st.MakeLabelSelector().Exists("foo").Obj()).
SpreadConstraint(1, "node", softSpread, st.MakeLabelSelector().Exists("bar").Obj()).
Obj(),
existingPods: []*v1.Pod{
st.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(),
st.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Label("bar", "").Obj(),
st.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Obj(),
st.MakePod().Name("p-y2").Node("node-y").Label("bar", "").Obj(),
},
nodes: []*v1.Node{
st.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(),
st.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(),
st.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(),
st.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(),
},
failedNodes: []*v1.Node{},
want: []schedulerapi.HostPriority{
{Host: "node-a", Score: 8},
{Host: "node-b", Score: 7},
{Host: "node-x", Score: 10},
{Host: "node-y", Score: 8},
},
},
{
// For the first constraint (zone): the matching pods spread as 2/2/1/~1~
// For the second constraint (node): the matching pods spread as 0/1/0/~1~
// sum them up gets: 2/3/1, and total number is 6.
// after reversing, it's 4/3/5
// so scores = 40/5, 30/5, 50/5
name: "two constraints on zone and node, with different labelSelectors, 3 out of 4 nodes are candidates",
pod: st.MakePod().Name("p").Label("foo", "").Label("bar", "").
SpreadConstraint(1, "zone", softSpread, st.MakeLabelSelector().Exists("foo").Obj()).
SpreadConstraint(1, "node", softSpread, st.MakeLabelSelector().Exists("bar").Obj()).
Obj(),
existingPods: []*v1.Pod{
st.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(),
st.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Label("bar", "").Obj(),
st.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Obj(),
st.MakePod().Name("p-y2").Node("node-y").Label("bar", "").Obj(),
},
nodes: []*v1.Node{
st.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(),
st.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(),
st.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(),
},
failedNodes: []*v1.Node{
st.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(),
},
want: []schedulerapi.HostPriority{
{Host: "node-a", Score: 8},
{Host: "node-b", Score: 6},
{Host: "node-x", Score: 10},
},
},
} }
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {