Merge pull request #82841 from Huang-Wei/sched-panic

Fixed a scheduler panic when using PodAffinity (k8s version >= 1.15)
This commit is contained in:
Kubernetes Prow Robot 2019-09-19 14:21:11 -07:00 committed by GitHub
commit 33adc7fafa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 266 additions and 32 deletions

View File

@ -1682,7 +1682,7 @@ func BenchmarkTestGetTPMapMatchingSpreadConstraints(b *testing.B) {
}
for _, tt := range tests {
b.Run(tt.name, func(b *testing.B) {
existingPods, allNodes, _ := st.MakeNodesAndPods(tt.pod, tt.existingPodsNum, tt.allNodesNum, tt.filteredNodesNum)
existingPods, allNodes, _ := st.MakeNodesAndPodsForEvenPodsSpread(tt.pod, tt.existingPodsNum, tt.allNodesNum, tt.filteredNodesNum)
nodeNameToInfo := schedulernodeinfo.CreateNodeNameToInfoMap(existingPods, allNodes)
b.ResetTimer()
for i := 0; i < b.N; i++ {

View File

@ -483,7 +483,7 @@ func BenchmarkTestCalculateEvenPodsSpreadPriority(b *testing.B) {
}
for _, tt := range tests {
b.Run(tt.name, func(b *testing.B) {
existingPods, allNodes, filteredNodes := st.MakeNodesAndPods(tt.pod, tt.existingPodsNum, tt.allNodesNum, tt.filteredNodesNum)
existingPods, allNodes, filteredNodes := st.MakeNodesAndPodsForEvenPodsSpread(tt.pod, tt.existingPodsNum, tt.allNodesNum, tt.filteredNodesNum)
nodeNameToInfo := schedulernodeinfo.CreateNodeNameToInfoMap(existingPods, allNodes)
b.ResetTimer()
for i := 0; i < b.N; i++ {

View File

@ -51,17 +51,16 @@ func NewInterPodAffinityPriority(
}
type podAffinityPriorityMap struct {
// nodes contain all nodes that should be considered
// nodes contain all nodes that should be considered.
nodes []*v1.Node
// counts store the mapping from node name to so-far computed score of
// the node.
counts map[string]*int64
// counts store the so-far computed score for each node.
counts []int64
}
func newPodAffinityPriorityMap(nodes []*v1.Node) *podAffinityPriorityMap {
return &podAffinityPriorityMap{
nodes: nodes,
counts: make(map[string]*int64, len(nodes)),
counts: make([]int64, len(nodes)),
}
}
@ -73,9 +72,9 @@ func (p *podAffinityPriorityMap) processTerm(term *v1.PodAffinityTerm, podDefini
}
match := priorityutil.PodMatchesTermsNamespaceAndSelector(podToCheck, namespaces, selector)
if match {
for _, node := range p.nodes {
for i, node := range p.nodes {
if priorityutil.NodesHaveSameTopologyKey(node, fixedNode, term.TopologyKey) {
atomic.AddInt64(p.counts[node.Name], weight)
atomic.AddInt64(&p.counts[i], weight)
}
}
}
@ -102,17 +101,11 @@ func (ipa *InterPodAffinity) CalculateInterPodAffinityPriority(pod *v1.Pod, node
hasAffinityConstraints := affinity != nil && affinity.PodAffinity != nil
hasAntiAffinityConstraints := affinity != nil && affinity.PodAntiAffinity != nil
// priorityMap stores the mapping from node name to so-far computed score of
// the node.
// pm stores (1) all nodes that should be considered and (2) the so-far computed score for each node.
pm := newPodAffinityPriorityMap(nodes)
allNodeNames := make([]string, 0, len(nodeNameToInfo))
lazyInit := hasAffinityConstraints || hasAntiAffinityConstraints
for name := range nodeNameToInfo {
allNodeNames = append(allNodeNames, name)
// if pod has affinity defined, or target node has affinityPods
if lazyInit || len(nodeNameToInfo[name].PodsWithAffinity()) != 0 {
pm.counts[name] = new(int64)
}
}
// convert the topology key based weights to the node name based weights
@ -216,25 +209,22 @@ func (ipa *InterPodAffinity) CalculateInterPodAffinityPriority(pod *v1.Pod, node
return nil, err
}
for _, node := range nodes {
if pm.counts[node.Name] == nil {
continue
for i := range nodes {
if pm.counts[i] > maxCount {
maxCount = pm.counts[i]
}
if *pm.counts[node.Name] > maxCount {
maxCount = *pm.counts[node.Name]
}
if *pm.counts[node.Name] < minCount {
minCount = *pm.counts[node.Name]
if pm.counts[i] < minCount {
minCount = pm.counts[i]
}
}
// calculate final priority score for each node
result := make(schedulerapi.HostPriorityList, 0, len(nodes))
maxMinDiff := maxCount - minCount
for _, node := range nodes {
for i, node := range nodes {
fScore := float64(0)
if maxMinDiff > 0 && pm.counts[node.Name] != nil {
fScore = float64(schedulerapi.MaxPriority) * (float64(*pm.counts[node.Name]-minCount) / float64(maxCount-minCount))
if maxMinDiff > 0 {
fScore = float64(schedulerapi.MaxPriority) * (float64(pm.counts[i]-minCount) / float64(maxCount-minCount))
}
result = append(result, schedulerapi.HostPriority{Host: node.Name, Score: int64(fScore)})
if klog.V(10) {

View File

@ -25,6 +25,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
st "k8s.io/kubernetes/pkg/scheduler/testing"
)
type FakeNodeListInfo []*v1.Node
@ -506,6 +507,22 @@ func TestInterPodAffinityPriority(t *testing.T) {
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: schedulerapi.MaxPriority}, {Host: "machine4", Score: 0}},
name: "Affinity and Anti Affinity and symmetry: considered only preferredDuringSchedulingIgnoredDuringExecution in both pod affinity & anti affinity & symmetry",
},
// Cover https://github.com/kubernetes/kubernetes/issues/82796 which panics upon:
// 1. Some nodes in a topology don't have pods with affinity, but other nodes in the same topology have.
// 2. The incoming pod doesn't have affinity.
{
pod: &v1.Pod{Spec: v1.PodSpec{NodeName: ""}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
pods: []*v1.Pod{
{Spec: v1.PodSpec{NodeName: "machine1"}, ObjectMeta: metav1.ObjectMeta{Labels: podLabelSecurityS1}},
{Spec: v1.PodSpec{NodeName: "machine2", Affinity: stayWithS1InRegionAwayFromS2InAz}},
},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: labelRgChina}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: labelRgChina}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: schedulerapi.MaxPriority}, {Host: "machine2", Score: schedulerapi.MaxPriority}},
name: "Avoid panic when partial nodes in a topology don't have pods with affinity",
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
@ -612,3 +629,57 @@ func TestHardPodAffinitySymmetricWeight(t *testing.T) {
})
}
}
func BenchmarkInterPodAffinityPriority(b *testing.B) {
tests := []struct {
name string
pod *v1.Pod
existingPodsNum int
allNodesNum int
prepFunc func(existingPodsNum, allNodesNum int) (existingPods []*v1.Pod, allNodes []*v1.Node)
}{
{
name: "1000nodes/incoming pod without PodAffinity and existing pods without PodAffinity",
pod: st.MakePod().Name("p").Label("foo", "").Obj(),
existingPodsNum: 10000,
allNodesNum: 1000,
prepFunc: st.MakeNodesAndPods,
},
{
name: "1000nodes/incoming pod with PodAffinity and existing pods without PodAffinity",
pod: st.MakePod().Name("p").Label("foo", "").PodAffinityExists("foo", "zone", st.PodAffinityWithPreferredReq).Obj(),
existingPodsNum: 10000,
allNodesNum: 1000,
prepFunc: st.MakeNodesAndPods,
},
{
name: "1000nodes/incoming pod without PodAffinity and existing pods with PodAffinity",
pod: st.MakePod().Name("p").Label("foo", "").Obj(),
existingPodsNum: 10000,
allNodesNum: 1000,
prepFunc: st.MakeNodesAndPodsForPodAffinity,
},
{
name: "1000nodes/incoming pod with PodAffinity and existing pods with PodAffinity",
pod: st.MakePod().Name("p").Label("foo", "").PodAffinityExists("foo", "zone", st.PodAffinityWithPreferredReq).Obj(),
existingPodsNum: 10000,
allNodesNum: 1000,
prepFunc: st.MakeNodesAndPodsForPodAffinity,
},
}
for _, tt := range tests {
b.Run(tt.name, func(b *testing.B) {
existingPods, allNodes := tt.prepFunc(tt.existingPodsNum, tt.allNodesNum)
nodeNameToInfo := schedulernodeinfo.CreateNodeNameToInfoMap(existingPods, allNodes)
interPodAffinity := InterPodAffinity{
info: FakeNodeListInfo(allNodes),
hardPodAffinityWeight: v1.DefaultHardPodAffinitySymmetricWeight,
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
interPodAffinity.CalculateInterPodAffinityPriority(tt.pod, nodeNameToInfo, allNodes)
}
})
}
}

View File

@ -22,15 +22,15 @@ import (
"k8s.io/api/core/v1"
)
// MakeNodesAndPods serves as a testing helper for EvenPodsSpread feature.
// MakeNodesAndPodsForEvenPodsSpread serves as a testing helper for EvenPodsSpread feature.
// It builds a fake cluster containing running Pods and Nodes.
// The size of Pods and Nodes are determined by input arguments.
// The specs of Pods and Nodes are generated with the following rules:
// - If `pod` has "node" as a topologyKey, each generated node is applied with a unique label: "node: node<i>".
// - If `pod` has "zone" as a topologyKey, each generated node is applied with a rotating label: "zone: zone[0-9]".
// - Depending on "lableSelector.MatchExpressions[0].Key" the `pod` has in each topologySpreadConstraint,
// - Depending on "labelSelector.MatchExpressions[0].Key" the `pod` has in each topologySpreadConstraint,
// each generated pod will be applied with label "key1", "key1,key2", ..., "key1,key2,...,keyN" in a rotating manner.
func MakeNodesAndPods(pod *v1.Pod, existingPodsNum, allNodesNum, filteredNodesNum int) (existingPods []*v1.Pod, allNodes []*v1.Node, filteredNodes []*v1.Node) {
func MakeNodesAndPodsForEvenPodsSpread(pod *v1.Pod, existingPodsNum, allNodesNum, filteredNodesNum int) (existingPods []*v1.Pod, allNodes []*v1.Node, filteredNodes []*v1.Node) {
var topologyKeys []string
var labels []string
zones := 10
@ -65,3 +65,78 @@ func MakeNodesAndPods(pod *v1.Pod, existingPodsNum, allNodesNum, filteredNodesNu
}
return
}
// MakeNodesAndPodsForPodAffinity serves as a testing helper for Pod(Anti)Affinity feature.
// It builds a fake cluster containing running Pods and Nodes.
// For simplicity, the Nodes will be labelled with "region", "zone" and "node". Nodes[i] will be applied with:
// - "region": "region" + i%3
// - "zone": "zone" + i%10
// - "node": "node" + i
// The Pods will be applied with various combinations of PodAffinity and PodAntiAffinity terms.
func MakeNodesAndPodsForPodAffinity(existingPodsNum, allNodesNum int) (existingPods []*v1.Pod, allNodes []*v1.Node) {
tpKeyToSizeMap := map[string]int{
"region": 3,
"zone": 10,
"node": allNodesNum,
}
// build nodes to spread across all topology domains
for i := 0; i < allNodesNum; i++ {
nodeName := fmt.Sprintf("node%d", i)
nodeWrapper := MakeNode().Name(nodeName)
for tpKey, size := range tpKeyToSizeMap {
nodeWrapper = nodeWrapper.Label(tpKey, fmt.Sprintf("%s%d", tpKey, i%size))
}
allNodes = append(allNodes, nodeWrapper.Obj())
}
labels := []string{"foo", "bar", "baz"}
tpKeys := []string{"region", "zone", "node"}
// Build pods.
// Each pod will be created with one affinity and one anti-affinity terms using all combinations of
// affinity and anti-affinity kinds listed below
// e.g., the first pod will have {affinity, anti-affinity} terms of kinds {NilPodAffinity, NilPodAffinity};
// the second will be {NilPodAffinity, PodAntiAffinityWithRequiredReq}, etc.
affinityKinds := []PodAffinityKind{
NilPodAffinity,
PodAffinityWithRequiredReq,
PodAffinityWithPreferredReq,
PodAffinityWithRequiredPreferredReq,
}
antiAffinityKinds := []PodAffinityKind{
NilPodAffinity,
PodAntiAffinityWithRequiredReq,
PodAntiAffinityWithPreferredReq,
PodAntiAffinityWithRequiredPreferredReq,
}
totalSize := len(affinityKinds) * len(antiAffinityKinds)
for i := 0; i < existingPodsNum; i++ {
podWrapper := MakePod().Name(fmt.Sprintf("pod%d", i)).Node(fmt.Sprintf("node%d", i%allNodesNum))
label, tpKey := labels[i%len(labels)], tpKeys[i%len(tpKeys)]
affinityIdx := i % totalSize
// len(affinityKinds) is equal to len(antiAffinityKinds)
leftIdx, rightIdx := affinityIdx/len(affinityKinds), affinityIdx%len(affinityKinds)
podWrapper = podWrapper.PodAffinityExists(label, tpKey, affinityKinds[leftIdx])
podWrapper = podWrapper.PodAntiAffinityExists(label, tpKey, antiAffinityKinds[rightIdx])
existingPods = append(existingPods, podWrapper.Obj())
}
return
}
// MakeNodesAndPods serves as a testing helper to generate regular Nodes and Pods
// that don't use any advanced scheduling features.
func MakeNodesAndPods(existingPodsNum, allNodesNum int) (existingPods []*v1.Pod, allNodes []*v1.Node) {
// build nodes
for i := 0; i < allNodesNum; i++ {
allNodes = append(allNodes, MakeNode().Name(fmt.Sprintf("node%d", i)).Obj())
}
// build pods
for i := 0; i < existingPodsNum; i++ {
podWrapper := MakePod().Name(fmt.Sprintf("pod%d", i)).Node(fmt.Sprintf("node%d", i%allNodesNum))
existingPods = append(existingPods, podWrapper.Obj())
}
return
}

View File

@ -190,7 +190,7 @@ func (p *PodWrapper) NodeSelector(m map[string]string) *PodWrapper {
}
// NodeAffinityIn creates a HARD node affinity (with the operator In)
// and injects into the innner pod.
// and injects into the inner pod.
func (p *PodWrapper) NodeAffinityIn(key string, vals []string) *PodWrapper {
if p.Spec.Affinity == nil {
p.Spec.Affinity = &v1.Affinity{}
@ -204,7 +204,7 @@ func (p *PodWrapper) NodeAffinityIn(key string, vals []string) *PodWrapper {
}
// NodeAffinityNotIn creates a HARD node affinity (with the operator NotIn)
// and injects into the innner pod.
// and injects into the inner pod.
func (p *PodWrapper) NodeAffinityNotIn(key string, vals []string) *PodWrapper {
if p.Spec.Affinity == nil {
p.Spec.Affinity = &v1.Affinity{}
@ -217,6 +217,104 @@ func (p *PodWrapper) NodeAffinityNotIn(key string, vals []string) *PodWrapper {
return p
}
// PodAffinityKind represents different kinds of PodAffinity.
type PodAffinityKind int
const (
// NilPodAffinity is a no-op which doesn't apply any PodAffinity.
NilPodAffinity PodAffinityKind = iota
// PodAffinityWithRequiredReq applies a HARD requirement to pod.spec.affinity.PodAffinity.
PodAffinityWithRequiredReq
// PodAffinityWithPreferredReq applies a SOFT requirement to pod.spec.affinity.PodAffinity.
PodAffinityWithPreferredReq
// PodAffinityWithRequiredPreferredReq applies HARD and SOFT requirements to pod.spec.affinity.PodAffinity.
PodAffinityWithRequiredPreferredReq
// PodAntiAffinityWithRequiredReq applies a HARD requirement to pod.spec.affinity.PodAntiAffinity.
PodAntiAffinityWithRequiredReq
// PodAntiAffinityWithPreferredReq applies a SOFT requirement to pod.spec.affinity.PodAntiAffinity.
PodAntiAffinityWithPreferredReq
// PodAntiAffinityWithRequiredPreferredReq applies HARD and SOFT requirements to pod.spec.affinity.PodAntiAffinity.
PodAntiAffinityWithRequiredPreferredReq
)
// PodAffinityExists creates an PodAffinity with the operator "Exists"
// and injects into the inner pod.
func (p *PodWrapper) PodAffinityExists(labelKey, topologyKey string, kind PodAffinityKind) *PodWrapper {
if kind == NilPodAffinity {
return p
}
if p.Spec.Affinity == nil {
p.Spec.Affinity = &v1.Affinity{}
}
if p.Spec.Affinity.PodAffinity == nil {
p.Spec.Affinity.PodAffinity = &v1.PodAffinity{}
}
labelSelector := MakeLabelSelector().Exists(labelKey).Obj()
term := v1.PodAffinityTerm{LabelSelector: labelSelector, TopologyKey: topologyKey}
switch kind {
case PodAffinityWithRequiredReq:
p.Spec.Affinity.PodAffinity.RequiredDuringSchedulingIgnoredDuringExecution = append(
p.Spec.Affinity.PodAffinity.RequiredDuringSchedulingIgnoredDuringExecution,
term,
)
case PodAffinityWithPreferredReq:
p.Spec.Affinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution = append(
p.Spec.Affinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution,
v1.WeightedPodAffinityTerm{Weight: 1, PodAffinityTerm: term},
)
case PodAffinityWithRequiredPreferredReq:
p.Spec.Affinity.PodAffinity.RequiredDuringSchedulingIgnoredDuringExecution = append(
p.Spec.Affinity.PodAffinity.RequiredDuringSchedulingIgnoredDuringExecution,
term,
)
p.Spec.Affinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution = append(
p.Spec.Affinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution,
v1.WeightedPodAffinityTerm{Weight: 1, PodAffinityTerm: term},
)
}
return p
}
// PodAntiAffinityExists creates an PodAntiAffinity with the operator "Exists"
// and injects into the inner pod.
func (p *PodWrapper) PodAntiAffinityExists(labelKey, topologyKey string, kind PodAffinityKind) *PodWrapper {
if kind == NilPodAffinity {
return p
}
if p.Spec.Affinity == nil {
p.Spec.Affinity = &v1.Affinity{}
}
if p.Spec.Affinity.PodAntiAffinity == nil {
p.Spec.Affinity.PodAntiAffinity = &v1.PodAntiAffinity{}
}
labelSelector := MakeLabelSelector().Exists(labelKey).Obj()
term := v1.PodAffinityTerm{LabelSelector: labelSelector, TopologyKey: topologyKey}
switch kind {
case PodAntiAffinityWithRequiredReq:
p.Spec.Affinity.PodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution = append(
p.Spec.Affinity.PodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution,
term,
)
case PodAntiAffinityWithPreferredReq:
p.Spec.Affinity.PodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution = append(
p.Spec.Affinity.PodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution,
v1.WeightedPodAffinityTerm{Weight: 1, PodAffinityTerm: term},
)
case PodAntiAffinityWithRequiredPreferredReq:
p.Spec.Affinity.PodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution = append(
p.Spec.Affinity.PodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution,
term,
)
p.Spec.Affinity.PodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution = append(
p.Spec.Affinity.PodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution,
v1.WeightedPodAffinityTerm{Weight: 1, PodAffinityTerm: term},
)
}
return p
}
// SpreadConstraint constructs a TopologySpreadConstraint object and injects
// into the inner pod.
func (p *PodWrapper) SpreadConstraint(maxSkew int, tpKey string, mode v1.UnsatisfiableConstraintAction, selector *metav1.LabelSelector) *PodWrapper {