Merge pull request #79062 from Huang-Wei/eps-preemption

Even Pods Spread - 4. Preemption Support
This commit is contained in:
Kubernetes Prow Robot 2019-07-25 14:33:45 -07:00 committed by GitHub
commit 7fd8537564
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 858 additions and 22 deletions

View File

@ -74,7 +74,9 @@ type topologyPairsMaps struct {
type topologyPairsPodSpreadMap struct {
// This map is keyed with a topology key, and valued with minimum number
// of pods matched on that topology domain.
// TODO(Huang-Wei): refactor to {tpKey->tpValSet(or tpValSlice)}
topologyKeyToMinPodsMap map[string]int32
// TODO(Huang-Wei): refactor to {tpPair->count, podName->tpPairSet(optional)}
*topologyPairsMaps
}
@ -231,10 +233,8 @@ func getTPMapMatchingSpreadConstraints(pod *v1.Pod, nodeInfoMap map[string]*sche
return
}
// Ensure current node's labels contains all topologyKeys in 'constraints'.
for _, constraint := range constraints {
if _, ok := node.Labels[constraint.TopologyKey]; !ok {
return
}
if !nodeLabelsMatchSpreadConstraints(node.Labels, constraints) {
return
}
nodeTopologyMaps := newTopologyPairsMaps()
@ -319,6 +319,16 @@ func podMatchesSpreadConstraint(podLabelSet labels.Set, constraint v1.TopologySp
return true, nil
}
// check if ALL topology keys in spread constraints are present in node labels
func nodeLabelsMatchSpreadConstraints(nodeLabels map[string]string, constraints []v1.TopologySpreadConstraint) bool {
for _, constraint := range constraints {
if _, ok := nodeLabels[constraint.TopologyKey]; !ok {
return false
}
}
return true
}
// returns a pointer to a new topologyPairsMaps
func newTopologyPairsMaps() *topologyPairsMaps {
return &topologyPairsMaps{topologyPairToPods: make(map[topologyPair]podSet),
@ -374,6 +384,89 @@ func (m *topologyPairsMaps) clone() *topologyPairsMaps {
return copy
}
func (m *topologyPairsPodSpreadMap) addPod(addedPod, preemptorPod *v1.Pod, node *v1.Node) error {
if addedPod.Namespace != preemptorPod.Namespace {
return nil
}
constraints := getHardTopologySpreadConstraints(preemptorPod)
if !nodeLabelsMatchSpreadConstraints(node.Labels, constraints) {
return nil
}
// records which topology key(s) needs to be updated
minMatchNeedingUpdate := make(map[string]struct{})
podLabelSet := labels.Set(addedPod.Labels)
for _, constraint := range constraints {
if match, err := podMatchesSpreadConstraint(podLabelSet, constraint); err != nil {
return err
} else if !match {
continue
}
pair := topologyPair{
key: constraint.TopologyKey,
value: node.Labels[constraint.TopologyKey],
}
// it means current node is one of the critical paths of topologyKeyToMinPodsMap[TopologyKey]
if int32(len(m.topologyPairToPods[pair])) == m.topologyKeyToMinPodsMap[pair.key] {
minMatchNeedingUpdate[pair.key] = struct{}{}
}
m.addTopologyPair(pair, addedPod)
}
// no need to addTopologyPairWithoutPods b/c if a pair without pods must be present,
// it should have already been created earlier in removePod() phase
// In most cases, min match map doesn't need to be updated.
// But it's required to be updated when current node is the ONLY critical path which impacts
// the min match. With that said, in this case min match needs to be updated to min match + 1
if len(minMatchNeedingUpdate) != 0 {
// TODO(Huang-Wei): performance can be optimized.
// A possible solution is to record number of critical paths which co-impact the min match.
tempMinMatchMap := make(map[string]int32, len(minMatchNeedingUpdate))
for key := range minMatchNeedingUpdate {
tempMinMatchMap[key] = math.MaxInt32
}
for pair, podSet := range m.topologyPairToPods {
if _, ok := minMatchNeedingUpdate[pair.key]; !ok {
continue
}
if l := int32(len(podSet)); l < tempMinMatchMap[pair.key] {
tempMinMatchMap[pair.key] = l
}
}
for key, tempMin := range tempMinMatchMap {
if tempMin == m.topologyKeyToMinPodsMap[key]+1 {
m.topologyKeyToMinPodsMap[key] = tempMin
}
}
}
return nil
}
func (m *topologyPairsPodSpreadMap) removePod(deletedPod *v1.Pod) {
if m == nil || deletedPod == nil {
return
}
deletedPodFullName := schedutil.GetPodFullName(deletedPod)
pairSet, ok := m.podToTopologyPairs[deletedPodFullName]
if !ok {
return
}
topologyPairToPods := m.topologyPairToPods
for pair := range pairSet {
delete(topologyPairToPods[pair], deletedPod)
// if topologyPairToPods[pair] is empty after deletion
// don't clean it up as that topology counts as a match now
// removal of the deletedPod would probably genereate a smaller matching number
// so re-calculate minMatch to a smaller value if possible
if l := int32(len(topologyPairToPods[pair])); l < m.topologyKeyToMinPodsMap[pair.key] {
m.topologyKeyToMinPodsMap[pair.key] = l
}
}
delete(m.podToTopologyPairs, deletedPodFullName)
}
func (m *topologyPairsPodSpreadMap) clone() *topologyPairsPodSpreadMap {
// m could be nil when EvenPodsSpread feature is disabled
if m == nil {
@ -400,6 +493,8 @@ func (meta *predicateMetadata) RemovePod(deletedPod *v1.Pod) error {
// Delete pod from the matching affinity or anti-affinity topology pairs maps.
meta.topologyPairsPotentialAffinityPods.removePod(deletedPod)
meta.topologyPairsPotentialAntiAffinityPods.removePod(deletedPod)
// Delete pod from the pod spread topology maps.
meta.topologyPairsPodSpreadMap.removePod(deletedPod)
// All pods in the serviceAffinityMatchingPodList are in the same namespace.
// So, if the namespace of the first one is not the same as the namespace of the
// deletedPod, we don't need to check the list, as deletedPod isn't in the list.
@ -461,6 +556,12 @@ func (meta *predicateMetadata) AddPod(addedPod *v1.Pod, nodeInfo *schedulernodei
}
}
}
// Update meta.topologyPairsPodSpreadMap if meta.pod has hard spread constraints
// and addedPod matches that
if err := meta.topologyPairsPodSpreadMap.addPod(addedPod, meta.pod, nodeInfo.Node()); err != nil {
return err
}
// If addedPod is in the same namespace as the meta.pod, update the list
// of matching pods if applicable.
if meta.serviceAffinityInUse && addedPod.Namespace == meta.pod.Namespace {

View File

@ -1243,6 +1243,547 @@ func TestGetTPMapMatchingSpreadConstraints(t *testing.T) {
}
}
func TestPodSpreadMap_addPod(t *testing.T) {
tests := []struct {
name string
preemptorPod *v1.Pod
addedPod *v1.Pod
existingPods []*v1.Pod
nodeIdx int // denotes which node 'addedPod' belongs to
nodes []*v1.Node
injectPodPointers map[topologyPair][]int // non-negative index refers to existingPods[i], negative index refers to addedPod
want *topologyPairsPodSpreadMap
}{
{
name: "node a and b both impact current min match",
preemptorPod: st.MakePod().Name("p").Label("foo", "").
SpreadConstraint(1, "node", hardSpread, st.MakeLabelSelector().Exists("foo").Obj()).
Obj(),
addedPod: st.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(),
existingPods: nil, // it's an empty cluster
nodeIdx: 0,
nodes: []*v1.Node{
st.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(),
st.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(),
},
injectPodPointers: map[topologyPair][]int{
{key: "node", value: "node-a"}: {-1},
{key: "node", value: "node-b"}: {},
},
want: &topologyPairsPodSpreadMap{
// min match map shouldn't be changed b/c node-b is still on the critical path
// determining min match
topologyKeyToMinPodsMap: map[string]int32{"node": 0},
topologyPairsMaps: &topologyPairsMaps{
podToTopologyPairs: map[string]topologyPairSet{
"p-a1_": newPairSet("node", "node-a"),
},
},
},
},
{
name: "only node a impacts current min match",
preemptorPod: st.MakePod().Name("p").Label("foo", "").
SpreadConstraint(1, "node", hardSpread, st.MakeLabelSelector().Exists("foo").Obj()).
Obj(),
addedPod: st.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(),
existingPods: []*v1.Pod{
st.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(),
},
nodeIdx: 0,
nodes: []*v1.Node{
st.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(),
st.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(),
},
injectPodPointers: map[topologyPair][]int{
{key: "node", value: "node-a"}: {-1},
{key: "node", value: "node-b"}: {0},
},
want: &topologyPairsPodSpreadMap{
// min match should be changed from 0 to 1
topologyKeyToMinPodsMap: map[string]int32{"node": 1},
topologyPairsMaps: &topologyPairsMaps{
podToTopologyPairs: map[string]topologyPairSet{
"p-a1_": newPairSet("node", "node-a"),
"p-b1_": newPairSet("node", "node-b"),
},
},
},
},
{
name: "add a pod with mis-matched namespace doesn't change topologyKeyToMinPodsMap",
preemptorPod: st.MakePod().Name("p").Label("foo", "").
SpreadConstraint(1, "node", hardSpread, st.MakeLabelSelector().Exists("foo").Obj()).
Obj(),
addedPod: st.MakePod().Name("p-a1").Namespace("ns1").Node("node-a").Label("foo", "").Obj(),
existingPods: []*v1.Pod{
st.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(),
},
nodeIdx: 0,
nodes: []*v1.Node{
st.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(),
st.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(),
},
injectPodPointers: map[topologyPair][]int{
{key: "node", value: "node-a"}: {},
{key: "node", value: "node-b"}: {0},
},
want: &topologyPairsPodSpreadMap{
// min match remains the same
topologyKeyToMinPodsMap: map[string]int32{"node": 0},
topologyPairsMaps: &topologyPairsMaps{
podToTopologyPairs: map[string]topologyPairSet{
// "p-a1_": newPairSet("node", "node-a") shouldn't exist
"p-b1_": newPairSet("node", "node-b"),
},
},
},
},
{
name: "add pod on non-critical node won't trigger re-calculation",
preemptorPod: st.MakePod().Name("p").Label("foo", "").
SpreadConstraint(1, "node", hardSpread, st.MakeLabelSelector().Exists("foo").Obj()).
Obj(),
addedPod: st.MakePod().Name("p-b2").Node("node-b").Label("foo", "").Obj(),
existingPods: []*v1.Pod{
st.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(),
},
nodeIdx: 1,
nodes: []*v1.Node{
st.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(),
st.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(),
},
injectPodPointers: map[topologyPair][]int{
{key: "node", value: "node-a"}: {},
{key: "node", value: "node-b"}: {-1, 0},
},
want: &topologyPairsPodSpreadMap{
topologyKeyToMinPodsMap: map[string]int32{"node": 0},
topologyPairsMaps: &topologyPairsMaps{
podToTopologyPairs: map[string]topologyPairSet{
"p-b1_": newPairSet("node", "node-b"),
"p-b2_": newPairSet("node", "node-b"),
},
},
},
},
{
name: "node a and x both impact topologyKeyToMinPodsMap on zone and node",
preemptorPod: st.MakePod().Name("p").Label("foo", "").
SpreadConstraint(1, "zone", hardSpread, st.MakeLabelSelector().Exists("foo").Obj()).
SpreadConstraint(1, "node", hardSpread, st.MakeLabelSelector().Exists("foo").Obj()).
Obj(),
addedPod: st.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(),
existingPods: nil, // it's an empty cluster
nodeIdx: 0,
nodes: []*v1.Node{
st.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(),
st.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(),
},
injectPodPointers: map[topologyPair][]int{
{key: "zone", value: "zone1"}: {-1},
{key: "zone", value: "zone2"}: {},
{key: "node", value: "node-a"}: {-1},
{key: "node", value: "node-x"}: {},
},
want: &topologyPairsPodSpreadMap{
topologyKeyToMinPodsMap: map[string]int32{"zone": 0, "node": 0},
topologyPairsMaps: &topologyPairsMaps{
podToTopologyPairs: map[string]topologyPairSet{
"p-a1_": newPairSet("zone", "zone1", "node", "node-a"),
},
},
},
},
{
name: "only node a impacts topologyKeyToMinPodsMap on zone and node",
preemptorPod: st.MakePod().Name("p").Label("foo", "").
SpreadConstraint(1, "zone", hardSpread, st.MakeLabelSelector().Exists("foo").Obj()).
SpreadConstraint(1, "node", hardSpread, st.MakeLabelSelector().Exists("foo").Obj()).
Obj(),
addedPod: st.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(),
existingPods: []*v1.Pod{
st.MakePod().Name("p-x1").Node("node-x").Label("foo", "").Obj(),
},
nodeIdx: 0,
nodes: []*v1.Node{
st.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(),
st.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(),
},
injectPodPointers: map[topologyPair][]int{
{key: "zone", value: "zone1"}: {-1},
{key: "zone", value: "zone2"}: {0},
{key: "node", value: "node-a"}: {-1},
{key: "node", value: "node-x"}: {0},
},
want: &topologyPairsPodSpreadMap{
topologyKeyToMinPodsMap: map[string]int32{"zone": 1, "node": 1},
topologyPairsMaps: &topologyPairsMaps{
podToTopologyPairs: map[string]topologyPairSet{
"p-a1_": newPairSet("zone", "zone1", "node", "node-a"),
"p-x1_": newPairSet("zone", "zone2", "node", "node-x"),
},
},
},
},
{
name: "node a impacts topologyKeyToMinPodsMap on node, node x impacts topologyKeyToMinPodsMap on zone",
preemptorPod: st.MakePod().Name("p").Label("foo", "").
SpreadConstraint(1, "zone", hardSpread, st.MakeLabelSelector().Exists("foo").Obj()).
SpreadConstraint(1, "node", hardSpread, st.MakeLabelSelector().Exists("foo").Obj()).
Obj(),
addedPod: st.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(),
existingPods: []*v1.Pod{
st.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(),
st.MakePod().Name("p-b2").Node("node-b").Label("foo", "").Obj(),
st.MakePod().Name("p-x1").Node("node-x").Label("foo", "").Obj(),
},
nodeIdx: 0,
nodes: []*v1.Node{
st.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(),
st.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(),
st.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(),
},
injectPodPointers: map[topologyPair][]int{
{key: "zone", value: "zone1"}: {-1, 0, 1},
{key: "zone", value: "zone2"}: {2},
{key: "node", value: "node-a"}: {-1},
{key: "node", value: "node-b"}: {0, 1},
{key: "node", value: "node-x"}: {2},
},
want: &topologyPairsPodSpreadMap{
topologyKeyToMinPodsMap: map[string]int32{"zone": 1, "node": 1},
topologyPairsMaps: &topologyPairsMaps{
podToTopologyPairs: map[string]topologyPairSet{
"p-a1_": newPairSet("zone", "zone1", "node", "node-a"),
"p-b1_": newPairSet("zone", "zone1", "node", "node-b"),
"p-b2_": newPairSet("zone", "zone1", "node", "node-b"),
"p-x1_": newPairSet("zone", "zone2", "node", "node-x"),
},
},
},
},
{
name: "constraints hold different labelSelectors, node a impacts topologyKeyToMinPodsMap on node",
preemptorPod: st.MakePod().Name("p").Label("foo", "").Label("bar", "").
SpreadConstraint(1, "zone", hardSpread, st.MakeLabelSelector().Exists("foo").Obj()).
SpreadConstraint(1, "node", hardSpread, st.MakeLabelSelector().Exists("bar").Obj()).
Obj(),
addedPod: st.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(),
existingPods: []*v1.Pod{
st.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Label("bar", "").Obj(),
st.MakePod().Name("p-x1").Node("node-x").Label("foo", "").Obj(),
st.MakePod().Name("p-x2").Node("node-x").Label("bar", "").Obj(),
},
nodeIdx: 0,
nodes: []*v1.Node{
st.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(),
st.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(),
st.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(),
},
injectPodPointers: map[topologyPair][]int{
{key: "zone", value: "zone1"}: {-1, 0},
{key: "zone", value: "zone2"}: {1},
{key: "node", value: "node-a"}: {},
{key: "node", value: "node-b"}: {0},
{key: "node", value: "node-x"}: {2},
},
want: &topologyPairsPodSpreadMap{
topologyKeyToMinPodsMap: map[string]int32{"zone": 1, "node": 0},
topologyPairsMaps: &topologyPairsMaps{
podToTopologyPairs: map[string]topologyPairSet{
"p-a1_": newPairSet("zone", "zone1"),
"p-b1_": newPairSet("zone", "zone1", "node", "node-b"),
"p-x1_": newPairSet("zone", "zone2"),
"p-x2_": newPairSet("node", "node-x"),
},
},
},
},
{
name: "constraints hold different labelSelectors, node a impacts topologyKeyToMinPodsMap on both zone and node",
preemptorPod: st.MakePod().Name("p").Label("foo", "").Label("bar", "").
SpreadConstraint(1, "zone", hardSpread, st.MakeLabelSelector().Exists("foo").Obj()).
SpreadConstraint(1, "node", hardSpread, st.MakeLabelSelector().Exists("bar").Obj()).
Obj(),
addedPod: st.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Label("bar", "").Obj(),
existingPods: []*v1.Pod{
st.MakePod().Name("p-b1").Node("node-b").Label("bar", "").Obj(),
st.MakePod().Name("p-x1").Node("node-x").Label("foo", "").Obj(),
st.MakePod().Name("p-x2").Node("node-x").Label("bar", "").Obj(),
},
nodeIdx: 0,
nodes: []*v1.Node{
st.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(),
st.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(),
st.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(),
},
injectPodPointers: map[topologyPair][]int{
{key: "zone", value: "zone1"}: {-1},
{key: "zone", value: "zone2"}: {1},
{key: "node", value: "node-a"}: {-1},
{key: "node", value: "node-b"}: {0},
{key: "node", value: "node-x"}: {2},
},
want: &topologyPairsPodSpreadMap{
topologyKeyToMinPodsMap: map[string]int32{"zone": 1, "node": 1},
topologyPairsMaps: &topologyPairsMaps{
podToTopologyPairs: map[string]topologyPairSet{
"p-a1_": newPairSet("zone", "zone1", "node", "node-a"),
"p-b1_": newPairSet("node", "node-b"),
"p-x1_": newPairSet("zone", "zone2"),
"p-x2_": newPairSet("node", "node-x"),
},
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
tt.want.topologyPairToPods = make(map[topologyPair]podSet)
for pair, indexes := range tt.injectPodPointers {
pSet := make(podSet)
for _, i := range indexes {
if i >= 0 {
pSet[tt.existingPods[i]] = struct{}{}
} else {
pSet[tt.addedPod] = struct{}{}
}
}
tt.want.topologyPairToPods[pair] = pSet
}
nodeInfoMap := schedulernodeinfo.CreateNodeNameToInfoMap(tt.existingPods, tt.nodes)
podSpreadMap, _ := getTPMapMatchingSpreadConstraints(tt.preemptorPod, nodeInfoMap)
podSpreadMap.addPod(tt.addedPod, tt.preemptorPod, tt.nodes[tt.nodeIdx])
if !reflect.DeepEqual(podSpreadMap, tt.want) {
t.Errorf("podSpreadMap#addPod() = %v, want %v", podSpreadMap, tt.want)
}
})
}
}
func TestPodSpreadMap_removePod(t *testing.T) {
tests := []struct {
name string
preemptor *v1.Pod // preemptor pod
nodes []*v1.Node
existingPods []*v1.Pod
deletedPodIdx int // need to reuse *Pod of existingPods[i]
deletedPod *v1.Pod // if deletedPodIdx is invalid, this field is bypassed
injectPodPointers map[topologyPair][]int
want *topologyPairsPodSpreadMap
}{
{
// A high priority pod may not be scheduled due to node taints or resource shortage.
// So preemption is triggered.
name: "one spreadConstraint on zone, topologyKeyToMinPodsMap unchanged",
preemptor: st.MakePod().Name("p").Label("foo", "").
SpreadConstraint(1, "zone", hardSpread, st.MakeLabelSelector().Exists("foo").Obj()).
Obj(),
nodes: []*v1.Node{
st.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(),
st.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(),
st.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(),
},
existingPods: []*v1.Pod{
st.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(),
st.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(),
st.MakePod().Name("p-x1").Node("node-x").Label("foo", "").Obj(),
},
deletedPodIdx: 0, // remove pod "p-a1"
injectPodPointers: map[topologyPair][]int{
{key: "zone", value: "zone1"}: {1},
{key: "zone", value: "zone2"}: {2},
},
want: &topologyPairsPodSpreadMap{
// topologyKeyToMinPodsMap actually doesn't change
topologyKeyToMinPodsMap: map[string]int32{"zone": 1},
topologyPairsMaps: &topologyPairsMaps{
podToTopologyPairs: map[string]topologyPairSet{
"p-b1_": newPairSet("zone", "zone1"),
"p-x1_": newPairSet("zone", "zone2"),
},
},
},
},
{
name: "one spreadConstraint on node, topologyKeyToMinPodsMap changed",
preemptor: st.MakePod().Name("p").Label("foo", "").
SpreadConstraint(1, "zone", hardSpread, st.MakeLabelSelector().Exists("foo").Obj()).
Obj(),
nodes: []*v1.Node{
st.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(),
st.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(),
st.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(),
st.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(),
},
existingPods: []*v1.Pod{
st.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(),
st.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(),
st.MakePod().Name("p-x1").Node("node-x").Label("foo", "").Obj(),
st.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Obj(),
},
deletedPodIdx: 0, // remove pod "p-a1"
injectPodPointers: map[topologyPair][]int{
{key: "zone", value: "zone1"}: {1},
{key: "zone", value: "zone2"}: {2, 3},
},
want: &topologyPairsPodSpreadMap{
// topologyKeyToMinPodsMap is expected to be re-calculated from {"zone": 2}
// to {"zone": 1}
topologyKeyToMinPodsMap: map[string]int32{"zone": 1},
topologyPairsMaps: &topologyPairsMaps{
podToTopologyPairs: map[string]topologyPairSet{
"p-b1_": newPairSet("zone", "zone1"),
"p-x1_": newPairSet("zone", "zone2"),
"p-y1_": newPairSet("zone", "zone2"),
},
},
},
},
{
name: "delete an irrelevant pod won't help",
preemptor: st.MakePod().Name("p").Label("foo", "").
SpreadConstraint(1, "zone", hardSpread, st.MakeLabelSelector().Exists("foo").Obj()).
Obj(),
nodes: []*v1.Node{
st.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(),
st.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(),
st.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(),
st.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(),
},
existingPods: []*v1.Pod{
st.MakePod().Name("p-a0").Node("node-a").Label("bar", "").Obj(),
st.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(),
st.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(),
st.MakePod().Name("p-x1").Node("node-x").Label("foo", "").Obj(),
st.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Obj(),
},
deletedPodIdx: 0, // remove pod "p-a0"
injectPodPointers: map[topologyPair][]int{
{key: "zone", value: "zone1"}: {1, 2},
{key: "zone", value: "zone2"}: {3, 4},
},
want: &topologyPairsPodSpreadMap{
// topologyKeyToMinPodsMap is unchanged
topologyKeyToMinPodsMap: map[string]int32{"zone": 2},
topologyPairsMaps: &topologyPairsMaps{
podToTopologyPairs: map[string]topologyPairSet{
"p-a1_": newPairSet("zone", "zone1"),
"p-b1_": newPairSet("zone", "zone1"),
"p-x1_": newPairSet("zone", "zone2"),
"p-y1_": newPairSet("zone", "zone2"),
},
},
},
},
{
name: "delete a non-existing pod won't help",
preemptor: st.MakePod().Name("p").Label("foo", "").
SpreadConstraint(1, "zone", hardSpread, st.MakeLabelSelector().Exists("foo").Obj()).
Obj(),
nodes: []*v1.Node{
st.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(),
st.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(),
st.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(),
st.MakeNode().Name("node-y").Label("zone", "zone2").Label("node", "node-y").Obj(),
},
existingPods: []*v1.Pod{
st.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(),
st.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(),
st.MakePod().Name("p-x1").Node("node-x").Label("foo", "").Obj(),
st.MakePod().Name("p-y1").Node("node-y").Label("foo", "").Obj(),
},
deletedPodIdx: -1,
deletedPod: st.MakePod().Name("p-a0").Node("node-a").Label("bar", "").Obj(),
injectPodPointers: map[topologyPair][]int{
{key: "zone", value: "zone1"}: {0, 1},
{key: "zone", value: "zone2"}: {2, 3},
},
want: &topologyPairsPodSpreadMap{
// topologyKeyToMinPodsMap is unchanged
topologyKeyToMinPodsMap: map[string]int32{"zone": 2},
topologyPairsMaps: &topologyPairsMaps{
podToTopologyPairs: map[string]topologyPairSet{
"p-a1_": newPairSet("zone", "zone1"),
"p-b1_": newPairSet("zone", "zone1"),
"p-x1_": newPairSet("zone", "zone2"),
"p-y1_": newPairSet("zone", "zone2"),
},
},
},
},
{
name: "two spreadConstraints",
preemptor: st.MakePod().Name("p").Label("foo", "").
SpreadConstraint(1, "zone", hardSpread, st.MakeLabelSelector().Exists("foo").Obj()).
SpreadConstraint(1, "node", hardSpread, st.MakeLabelSelector().Exists("foo").Obj()).
Obj(),
nodes: []*v1.Node{
st.MakeNode().Name("node-a").Label("zone", "zone1").Label("node", "node-a").Obj(),
st.MakeNode().Name("node-b").Label("zone", "zone1").Label("node", "node-b").Obj(),
st.MakeNode().Name("node-x").Label("zone", "zone2").Label("node", "node-x").Obj(),
},
existingPods: []*v1.Pod{
st.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(),
st.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Obj(),
st.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(),
st.MakePod().Name("p-x1").Node("node-x").Label("foo", "").Obj(),
st.MakePod().Name("p-x2").Node("node-x").Label("foo", "").Obj(),
},
deletedPodIdx: 3, // remove pod "p-x1"
injectPodPointers: map[topologyPair][]int{
{key: "zone", value: "zone1"}: {0, 1, 2},
{key: "zone", value: "zone2"}: {4},
{key: "node", value: "node-a"}: {0, 1},
{key: "node", value: "node-b"}: {2},
{key: "node", value: "node-x"}: {4},
},
want: &topologyPairsPodSpreadMap{
// topologyKeyToMinPodsMap is expected to be re-calculated from {"zone": 2, "node": 1}
// to {"zone": 1, "node": 1}
topologyKeyToMinPodsMap: map[string]int32{"zone": 1, "node": 1},
topologyPairsMaps: &topologyPairsMaps{
podToTopologyPairs: map[string]topologyPairSet{
"p-a1_": newPairSet("zone", "zone1", "node", "node-a"),
"p-a2_": newPairSet("zone", "zone1", "node", "node-a"),
"p-b1_": newPairSet("zone", "zone1", "node", "node-b"),
"p-x2_": newPairSet("zone", "zone2", "node", "node-x"),
},
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
tt.want.topologyPairToPods = make(map[topologyPair]podSet)
for pair, indexes := range tt.injectPodPointers {
pSet := make(podSet)
for _, i := range indexes {
pSet[tt.existingPods[i]] = struct{}{}
}
tt.want.topologyPairToPods[pair] = pSet
}
nodeInfoMap := schedulernodeinfo.CreateNodeNameToInfoMap(tt.existingPods, tt.nodes)
podSpreadMap, _ := getTPMapMatchingSpreadConstraints(tt.preemptor, nodeInfoMap)
var deletedPod *v1.Pod
if tt.deletedPodIdx < len(tt.existingPods) && tt.deletedPodIdx >= 0 {
deletedPod = tt.existingPods[tt.deletedPodIdx]
} else {
deletedPod = tt.deletedPod
}
podSpreadMap.removePod(deletedPod)
if !reflect.DeepEqual(podSpreadMap, tt.want) {
t.Errorf("podSpreadMap#removePod() = %v, want %v", podSpreadMap, tt.want)
}
})
}
}
var (
hardSpread = v1.DoNotSchedule
softSpread = v1.ScheduleAnyway

View File

@ -1722,7 +1722,6 @@ func EvenPodsSpreadPredicate(pod *v1.Pod, meta PredicateMetadata, nodeInfo *sche
if node == nil {
return false, nil, fmt.Errorf("node not found")
}
constraints := getHardTopologySpreadConstraints(pod)
if len(constraints) == 0 {
return true, nil, nil

View File

@ -553,11 +553,9 @@ func TestGenericScheduler(t *testing.T) {
pvcLister := schedulertesting.FakePersistentVolumeClaimLister(pvcs)
var predMetaProducer algorithmpredicates.PredicateMetadataProducer
predMetaProducer := algorithmpredicates.EmptyPredicateMetadataProducer
if test.buildPredMeta {
predMetaProducer = algorithmpredicates.NewPredicateMetadataFactory(schedulertesting.FakePodLister(test.pods))
} else {
predMetaProducer = algorithmpredicates.EmptyPredicateMetadataProducer
}
scheduler := NewGenericScheduler(
cache,
@ -1083,7 +1081,83 @@ func TestSelectNodesForPreemption(t *testing.T) {
expected: map[string]map[string]bool{"machine1": {"a": true}, "machine2": {}},
addAffinityPredicate: true,
},
{
name: "preemption to resolve even pods spread FitError",
predicates: map[string]algorithmpredicates.FitPredicate{
"matches": algorithmpredicates.EvenPodsSpreadPredicate,
},
nodes: []string{"node-a/zone1", "node-b/zone1", "node-x/zone2"},
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "p",
Labels: map[string]string{"foo": ""},
},
Spec: v1.PodSpec{
Priority: &highPriority,
TopologySpreadConstraints: []v1.TopologySpreadConstraint{
{
MaxSkew: 1,
TopologyKey: "zone",
WhenUnsatisfiable: v1.DoNotSchedule,
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "foo",
Operator: metav1.LabelSelectorOpExists,
},
},
},
},
{
MaxSkew: 1,
TopologyKey: "hostname",
WhenUnsatisfiable: v1.DoNotSchedule,
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "foo",
Operator: metav1.LabelSelectorOpExists,
},
},
},
},
},
},
},
pods: []*v1.Pod{
{
ObjectMeta: metav1.ObjectMeta{Name: "pod-a1", UID: types.UID("pod-a1"), Labels: map[string]string{"foo": ""}},
Spec: v1.PodSpec{NodeName: "node-a", Priority: &midPriority},
Status: v1.PodStatus{Phase: v1.PodRunning},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "pod-a2", UID: types.UID("pod-a2"), Labels: map[string]string{"foo": ""}},
Spec: v1.PodSpec{NodeName: "node-a", Priority: &lowPriority},
Status: v1.PodStatus{Phase: v1.PodRunning},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "pod-b1", UID: types.UID("pod-b1"), Labels: map[string]string{"foo": ""}},
Spec: v1.PodSpec{NodeName: "node-b", Priority: &lowPriority},
Status: v1.PodStatus{Phase: v1.PodRunning},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "pod-x1", UID: types.UID("pod-x1"), Labels: map[string]string{"foo": ""}},
Spec: v1.PodSpec{NodeName: "node-x", Priority: &highPriority},
Status: v1.PodStatus{Phase: v1.PodRunning},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "pod-x2", UID: types.UID("pod-x2"), Labels: map[string]string{"foo": ""}},
Spec: v1.PodSpec{NodeName: "node-x", Priority: &highPriority},
Status: v1.PodStatus{Phase: v1.PodRunning},
},
},
expected: map[string]map[string]bool{
"node-a": {"pod-a2": true},
"node-b": {"pod-b1": true},
},
},
}
labelKeys := []string{"hostname", "zone", "region"}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
assignDefaultStartTime(test.pods)
@ -1091,7 +1165,13 @@ func TestSelectNodesForPreemption(t *testing.T) {
nodes := []*v1.Node{}
for _, n := range test.nodes {
node := makeNode(n, 1000*5, priorityutil.DefaultMemoryRequest*5)
node.ObjectMeta.Labels = map[string]string{"hostname": node.Name}
// if possible, split node name by '/' to form labels in a format of
// {"hostname": node.Name[0], "zone": node.Name[1], "region": node.Name[2]}
node.ObjectMeta.Labels = make(map[string]string)
for i, label := range strings.Split(node.Name, "/") {
node.ObjectMeta.Labels[labelKeys[i]] = label
}
node.Name = node.ObjectMeta.Labels["hostname"]
nodes = append(nodes, node)
}
if test.addAffinityPredicate {
@ -1416,6 +1496,15 @@ func TestNodesWherePreemptionMightHelp(t *testing.T) {
},
expected: map[string]bool{"machine4": true},
},
{
name: "ErrTopologySpreadConstraintsNotMatch should be tried as it indicates that the pod is unschedulable due to topology spread constraints",
failedPredMap: FailedPredicateMap{
"machine1": []algorithmpredicates.PredicateFailureReason{algorithmpredicates.ErrTopologySpreadConstraintsNotMatch},
"machine2": []algorithmpredicates.PredicateFailureReason{algorithmpredicates.ErrPodNotMatchHostName},
"machine3": []algorithmpredicates.PredicateFailureReason{algorithmpredicates.ErrTopologySpreadConstraintsNotMatch},
},
expected: map[string]bool{"machine1": true, "machine3": true, "machine4": true},
},
}
for _, test := range tests {
@ -1435,27 +1524,31 @@ func TestNodesWherePreemptionMightHelp(t *testing.T) {
func TestPreempt(t *testing.T) {
defer algorithmpredicates.SetPredicatesOrderingDuringTest(order)()
failedPredMap := FailedPredicateMap{
defaultFailedPredMap := FailedPredicateMap{
"machine1": []algorithmpredicates.PredicateFailureReason{algorithmpredicates.NewInsufficientResourceError(v1.ResourceMemory, 1000, 500, 300)},
"machine2": []algorithmpredicates.PredicateFailureReason{algorithmpredicates.ErrDiskConflict},
"machine3": []algorithmpredicates.PredicateFailureReason{algorithmpredicates.NewInsufficientResourceError(v1.ResourceMemory, 1000, 600, 400)},
}
// Prepare 3 node names.
nodeNames := []string{}
defaultNodeNames := []string{}
for i := 1; i < 4; i++ {
nodeNames = append(nodeNames, fmt.Sprintf("machine%d", i))
defaultNodeNames = append(defaultNodeNames, fmt.Sprintf("machine%d", i))
}
var (
preemptLowerPriority = v1.PreemptLowerPriority
preemptNever = v1.PreemptNever
)
tests := []struct {
name string
pod *v1.Pod
pods []*v1.Pod
extenders []*FakeExtender
expectedNode string
expectedPods []string // list of preempted pods
name string
pod *v1.Pod
pods []*v1.Pod
extenders []*FakeExtender
failedPredMap FailedPredicateMap
nodeNames []string
predicate algorithmpredicates.FitPredicate
buildPredMeta bool
expectedNode string
expectedPods []string // list of preempted pods
}{
{
name: "basic preemption logic",
@ -1489,6 +1582,83 @@ func TestPreempt(t *testing.T) {
expectedNode: "machine3",
expectedPods: []string{},
},
{
name: "preemption for topology spread constraints",
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "p",
Labels: map[string]string{"foo": ""},
},
Spec: v1.PodSpec{
Priority: &highPriority,
TopologySpreadConstraints: []v1.TopologySpreadConstraint{
{
MaxSkew: 1,
TopologyKey: "zone",
WhenUnsatisfiable: v1.DoNotSchedule,
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "foo",
Operator: metav1.LabelSelectorOpExists,
},
},
},
},
{
MaxSkew: 1,
TopologyKey: "hostname",
WhenUnsatisfiable: v1.DoNotSchedule,
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "foo",
Operator: metav1.LabelSelectorOpExists,
},
},
},
},
},
},
},
pods: []*v1.Pod{
{
ObjectMeta: metav1.ObjectMeta{Name: "pod-a1", UID: types.UID("pod-a1"), Labels: map[string]string{"foo": ""}},
Spec: v1.PodSpec{NodeName: "node-a", Priority: &highPriority},
Status: v1.PodStatus{Phase: v1.PodRunning},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "pod-a2", UID: types.UID("pod-a2"), Labels: map[string]string{"foo": ""}},
Spec: v1.PodSpec{NodeName: "node-a", Priority: &highPriority},
Status: v1.PodStatus{Phase: v1.PodRunning},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "pod-b1", UID: types.UID("pod-b1"), Labels: map[string]string{"foo": ""}},
Spec: v1.PodSpec{NodeName: "node-b", Priority: &lowPriority},
Status: v1.PodStatus{Phase: v1.PodRunning},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "pod-x1", UID: types.UID("pod-x1"), Labels: map[string]string{"foo": ""}},
Spec: v1.PodSpec{NodeName: "node-x", Priority: &highPriority},
Status: v1.PodStatus{Phase: v1.PodRunning},
},
{
ObjectMeta: metav1.ObjectMeta{Name: "pod-x2", UID: types.UID("pod-x2"), Labels: map[string]string{"foo": ""}},
Spec: v1.PodSpec{NodeName: "node-x", Priority: &highPriority},
Status: v1.PodStatus{Phase: v1.PodRunning},
},
},
failedPredMap: FailedPredicateMap{
"node-a": []algorithmpredicates.PredicateFailureReason{algorithmpredicates.ErrTopologySpreadConstraintsNotMatch},
"node-b": []algorithmpredicates.PredicateFailureReason{algorithmpredicates.ErrTopologySpreadConstraintsNotMatch},
"node-x": []algorithmpredicates.PredicateFailureReason{algorithmpredicates.ErrTopologySpreadConstraintsNotMatch},
},
predicate: algorithmpredicates.EvenPodsSpreadPredicate,
buildPredMeta: true,
nodeNames: []string{"node-a/zone1", "node-b/zone1", "node-x/zone2"},
expectedNode: "node-b",
expectedPods: []string{"pod-b1"},
},
{
name: "Scheduler extenders allow only machine1, otherwise machine3 would have been chosen",
pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod1", UID: types.UID("pod1")}, Spec: v1.PodSpec{
@ -1618,6 +1788,7 @@ func TestPreempt(t *testing.T) {
},
}
labelKeys := []string{"hostname", "zone", "region"}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
t.Logf("===== Running test %v", t.Name())
@ -1627,14 +1798,26 @@ func TestPreempt(t *testing.T) {
cache.AddPod(pod)
}
cachedNodeInfoMap := map[string]*schedulernodeinfo.NodeInfo{}
for _, name := range nodeNames {
nodeNames := defaultNodeNames
if len(test.nodeNames) != 0 {
nodeNames = test.nodeNames
}
for i, name := range nodeNames {
node := makeNode(name, 1000*5, priorityutil.DefaultMemoryRequest*5)
// if possible, split node name by '/' to form labels in a format of
// {"hostname": node.Name[0], "zone": node.Name[1], "region": node.Name[2]}
node.ObjectMeta.Labels = make(map[string]string)
for i, label := range strings.Split(node.Name, "/") {
node.ObjectMeta.Labels[labelKeys[i]] = label
}
node.Name = node.ObjectMeta.Labels["hostname"]
cache.AddNode(node)
nodeNames[i] = node.Name
// Set nodeInfo to extenders to mock extenders' cache for preemption.
cachedNodeInfo := schedulernodeinfo.NewNodeInfo()
cachedNodeInfo.SetNode(node)
cachedNodeInfoMap[name] = cachedNodeInfo
cachedNodeInfoMap[node.Name] = cachedNodeInfo
}
extenders := []algorithm.SchedulerExtender{}
for _, extender := range test.extenders {
@ -1642,11 +1825,19 @@ func TestPreempt(t *testing.T) {
extender.cachedNodeNameToInfo = cachedNodeInfoMap
extenders = append(extenders, extender)
}
predicate := algorithmpredicates.PodFitsResources
if test.predicate != nil {
predicate = test.predicate
}
predMetaProducer := algorithmpredicates.EmptyPredicateMetadataProducer
if test.buildPredMeta {
predMetaProducer = algorithmpredicates.NewPredicateMetadataFactory(schedulertesting.FakePodLister(test.pods))
}
scheduler := NewGenericScheduler(
cache,
internalqueue.NewSchedulingQueue(nil, nil),
map[string]algorithmpredicates.FitPredicate{"matches": algorithmpredicates.PodFitsResources},
algorithmpredicates.EmptyPredicateMetadataProducer,
map[string]algorithmpredicates.FitPredicate{"matches": predicate},
predMetaProducer,
[]priorities.PriorityConfig{{Function: numericPriority, Weight: 1}},
priorities.EmptyPriorityMetadataProducer,
emptyFramework,
@ -1660,6 +1851,10 @@ func TestPreempt(t *testing.T) {
true)
scheduler.(*genericScheduler).snapshot()
// Call Preempt and check the expected results.
failedPredMap := defaultFailedPredMap
if test.failedPredMap != nil {
failedPredMap = test.failedPredMap
}
node, victims, _, err := scheduler.Preempt(test.pod, schedulertesting.FakeNodeLister(makeNodeList(nodeNames)), error(&FitError{Pod: test.pod, FailedPredicates: failedPredMap}))
if err != nil {
t.Errorf("unexpected error in preemption: %v", err)