scheduler: parse Pod's Node affinity once in PreScore phase

Signed-off-by: SataQiu <1527062125@qq.com>
This commit is contained in:
SataQiu 2020-11-26 11:19:52 +08:00
parent b3fc888863
commit 2b38078de1
10 changed files with 134 additions and 19 deletions

View File

@ -186,6 +186,7 @@ profiles:
{Name: "InterPodAffinity"},
{Name: "PodTopologySpread"},
{Name: "TaintToleration"},
{Name: "NodeAffinity"},
},
"ScorePlugin": {
{Name: "NodeResourcesBalancedAllocation", Weight: 1},
@ -314,6 +315,7 @@ profiles:
{Name: "InterPodAffinity"},
{Name: "PodTopologySpread"},
{Name: "TaintToleration"},
{Name: "NodeAffinity"},
},
"ScorePlugin": {
{Name: "NodeResourcesBalancedAllocation", Weight: 1},

View File

@ -113,6 +113,7 @@ func getDefaultConfig() *schedulerapi.Plugins {
{Name: interpodaffinity.Name},
{Name: podtopologyspread.Name},
{Name: tainttoleration.Name},
{Name: nodeaffinity.Name},
},
},
Score: &schedulerapi.PluginSet{

View File

@ -90,6 +90,7 @@ func TestClusterAutoscalerProvider(t *testing.T) {
{Name: interpodaffinity.Name},
{Name: podtopologyspread.Name},
{Name: tainttoleration.Name},
{Name: nodeaffinity.Name},
},
},
Score: &schedulerapi.PluginSet{
@ -180,6 +181,7 @@ func TestApplyFeatureGates(t *testing.T) {
{Name: interpodaffinity.Name},
{Name: podtopologyspread.Name},
{Name: tainttoleration.Name},
{Name: nodeaffinity.Name},
},
},
Score: &schedulerapi.PluginSet{
@ -260,6 +262,7 @@ func TestApplyFeatureGates(t *testing.T) {
{Name: interpodaffinity.Name},
{Name: podtopologyspread.Name},
{Name: tainttoleration.Name},
{Name: nodeaffinity.Name},
{Name: selectorspread.Name},
},
},

View File

@ -259,7 +259,10 @@ func TestCompatibility_v1_Scheduler(t *testing.T) {
{Name: "VolumeZone"},
},
"PostFilterPlugin": {{Name: "DefaultPreemption"}},
"PreScorePlugin": {{Name: "PodTopologySpread"}},
"PreScorePlugin": {
{Name: "NodeAffinity"},
{Name: "PodTopologySpread"},
},
"ScorePlugin": {
{Name: "NodeResourcesBalancedAllocation", Weight: 2},
{Name: "ImageLocality", Weight: 2},
@ -332,6 +335,7 @@ func TestCompatibility_v1_Scheduler(t *testing.T) {
"PostFilterPlugin": {{Name: "DefaultPreemption"}},
"PreScorePlugin": {
{Name: "InterPodAffinity"},
{Name: "NodeAffinity"},
{Name: "PodTopologySpread"},
{Name: "TaintToleration"},
},
@ -409,6 +413,7 @@ func TestCompatibility_v1_Scheduler(t *testing.T) {
"PostFilterPlugin": {{Name: "DefaultPreemption"}},
"PreScorePlugin": {
{Name: "InterPodAffinity"},
{Name: "NodeAffinity"},
{Name: "PodTopologySpread"},
{Name: "TaintToleration"},
},
@ -497,6 +502,7 @@ func TestCompatibility_v1_Scheduler(t *testing.T) {
"PostFilterPlugin": {{Name: "DefaultPreemption"}},
"PreScorePlugin": {
{Name: "InterPodAffinity"},
{Name: "NodeAffinity"},
{Name: "PodTopologySpread"},
{Name: "TaintToleration"},
},
@ -596,6 +602,7 @@ func TestCompatibility_v1_Scheduler(t *testing.T) {
"PostFilterPlugin": {{Name: "DefaultPreemption"}},
"PreScorePlugin": {
{Name: "InterPodAffinity"},
{Name: "NodeAffinity"},
{Name: "PodTopologySpread"},
{Name: "TaintToleration"},
},
@ -698,6 +705,7 @@ func TestCompatibility_v1_Scheduler(t *testing.T) {
"PostFilterPlugin": {{Name: "DefaultPreemption"}},
"PreScorePlugin": {
{Name: "InterPodAffinity"},
{Name: "NodeAffinity"},
{Name: "PodTopologySpread"},
{Name: "TaintToleration"},
},
@ -805,6 +813,7 @@ func TestCompatibility_v1_Scheduler(t *testing.T) {
"PostFilterPlugin": {{Name: "DefaultPreemption"}},
"PreScorePlugin": {
{Name: "InterPodAffinity"},
{Name: "NodeAffinity"},
{Name: "PodTopologySpread"},
{Name: "TaintToleration"},
},
@ -924,6 +933,7 @@ func TestCompatibility_v1_Scheduler(t *testing.T) {
"PostFilterPlugin": {{Name: "DefaultPreemption"}},
"PreScorePlugin": {
{Name: "InterPodAffinity"},
{Name: "NodeAffinity"},
{Name: "PodTopologySpread"},
{Name: "TaintToleration"},
},
@ -1046,6 +1056,7 @@ func TestCompatibility_v1_Scheduler(t *testing.T) {
"PostFilterPlugin": {{Name: "DefaultPreemption"}},
"PreScorePlugin": {
{Name: "InterPodAffinity"},
{Name: "NodeAffinity"},
{Name: "PodTopologySpread"},
{Name: "TaintToleration"},
},
@ -1168,6 +1179,7 @@ func TestCompatibility_v1_Scheduler(t *testing.T) {
"PostFilterPlugin": {{Name: "DefaultPreemption"}},
"PreScorePlugin": {
{Name: "InterPodAffinity"},
{Name: "NodeAffinity"},
{Name: "PodTopologySpread"},
{Name: "TaintToleration"},
},
@ -1294,6 +1306,7 @@ func TestCompatibility_v1_Scheduler(t *testing.T) {
"PostFilterPlugin": {{Name: "DefaultPreemption"}},
"PreScorePlugin": {
{Name: "InterPodAffinity"},
{Name: "NodeAffinity"},
{Name: "PodTopologySpread"},
{Name: "TaintToleration"},
},
@ -1423,6 +1436,7 @@ func TestAlgorithmProviderCompatibility(t *testing.T) {
{Name: "InterPodAffinity"},
{Name: "PodTopologySpread"},
{Name: "TaintToleration"},
{Name: "NodeAffinity"},
},
"ScorePlugin": {
{Name: "NodeResourcesBalancedAllocation", Weight: 1},
@ -1491,6 +1505,7 @@ func TestAlgorithmProviderCompatibility(t *testing.T) {
{Name: "InterPodAffinity"},
{Name: "PodTopologySpread"},
{Name: "TaintToleration"},
{Name: "NodeAffinity"},
},
"ScorePlugin": {
{Name: "NodeResourcesBalancedAllocation", Weight: 1},
@ -1578,6 +1593,7 @@ func TestPluginsConfigurationCompatibility(t *testing.T) {
{Name: "InterPodAffinity"},
{Name: "PodTopologySpread"},
{Name: "TaintToleration"},
{Name: "NodeAffinity"},
},
"ScorePlugin": {
{Name: "NodeResourcesBalancedAllocation", Weight: 1},
@ -1700,6 +1716,7 @@ func TestPluginsConfigurationCompatibility(t *testing.T) {
{Name: "InterPodAffinity"},
{Name: "PodTopologySpread"},
{Name: "TaintToleration"},
{Name: "NodeAffinity"},
},
"ScorePlugin": {
{Name: "NodeResourcesBalancedAllocation", Weight: 1},
@ -1906,6 +1923,7 @@ func TestPluginsConfigurationCompatibility(t *testing.T) {
PreScore: &config.PluginSet{
Disabled: []config.Plugin{
{Name: "InterPodAffinity"},
{Name: "NodeAffinity"},
{Name: "SelectorSpread"},
{Name: "TaintToleration"},
{Name: "PodTopologySpread"},

View File

@ -23,7 +23,7 @@ import (
"time"
"github.com/google/go-cmp/cmp"
"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/clock"
@ -184,6 +184,7 @@ func TestCreateFromConfig(t *testing.T) {
Enabled: []schedulerapi.Plugin{
{Name: "PodTopologySpread"},
{Name: "InterPodAffinity"},
{Name: "NodeAffinity"},
{Name: "TaintToleration"},
},
},
@ -295,7 +296,12 @@ func TestCreateFromConfig(t *testing.T) {
},
},
PostFilter: &schedulerapi.PluginSet{Enabled: []schedulerapi.Plugin{{Name: "DefaultPreemption"}}},
PreScore: &schedulerapi.PluginSet{Enabled: []schedulerapi.Plugin{{Name: "InterPodAffinity"}}},
PreScore: &schedulerapi.PluginSet{
Enabled: []schedulerapi.Plugin{
{Name: "InterPodAffinity"},
{Name: "NodeAffinity"},
},
},
Score: &schedulerapi.PluginSet{
Enabled: []schedulerapi.Plugin{
{Name: "InterPodAffinity", Weight: 1},
@ -360,7 +366,11 @@ func TestCreateFromConfig(t *testing.T) {
},
},
PostFilter: &schedulerapi.PluginSet{Enabled: []schedulerapi.Plugin{{Name: "DefaultPreemption"}}},
PreScore: &schedulerapi.PluginSet{Enabled: []schedulerapi.Plugin{{Name: "InterPodAffinity"}}},
PreScore: &schedulerapi.PluginSet{
Enabled: []schedulerapi.Plugin{
{Name: "InterPodAffinity"},
},
},
Score: &schedulerapi.PluginSet{
Enabled: []schedulerapi.Plugin{
{Name: "InterPodAffinity", Weight: 1},

View File

@ -360,6 +360,7 @@ func NewLegacyRegistry() *LegacyRegistry {
})
registry.registerPriorityConfigProducer(NodeAffinityPriority,
func(args ConfigProducerArgs, plugins *config.Plugins, _ *[]config.PluginConfig) {
plugins.PreScore = appendToPluginSet(plugins.PreScore, nodeaffinity.Name, nil)
plugins.Score = appendToPluginSet(plugins.Score, nodeaffinity.Name, &args.Weight)
})
registry.registerPriorityConfigProducer(ImageLocalityPriority,

View File

@ -121,6 +121,7 @@ func TestAppendPriorityConfigs(t *testing.T) {
Enabled: []config.Plugin{
{Name: podtopologyspread.Name},
{Name: interpodaffinity.Name},
{Name: nodeaffinity.Name},
{Name: tainttoleration.Name},
},
},

View File

@ -43,6 +43,9 @@ const (
// Name is the name of the plugin used in the plugin registry and configurations.
Name = "NodeAffinity"
// preScoreStateKey is the key in CycleState to NodeAffinity pre-computed data for Scoring.
preScoreStateKey = "PreScore" + Name
// ErrReasonPod is the reason for Pod's node affinity/selector not matching.
ErrReasonPod = "node(s) didn't match Pod's node affinity"
@ -71,6 +74,33 @@ func (pl *NodeAffinity) Filter(ctx context.Context, state *framework.CycleState,
return nil
}
// preScoreState computed at PreScore and used at Score.
type preScoreState struct {
preferredNodeAffinity *nodeaffinity.PreferredSchedulingTerms
}
// Clone implements the mandatory Clone interface. We don't really copy the data since
// there is no need for that.
func (s *preScoreState) Clone() framework.StateData {
return s
}
// PreScore builds and writes cycle state used by Score and NormalizeScore.
func (pl *NodeAffinity) PreScore(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod, nodes []*v1.Node) *framework.Status {
if len(nodes) == 0 {
return nil
}
preferredNodeAffinity, err := getPodPreferredNodeAffinity(pod)
if err != nil {
return framework.AsStatus(err)
}
state := &preScoreState{
preferredNodeAffinity: preferredNodeAffinity,
}
cycleState.Write(preScoreStateKey, state)
return nil
}
// Score returns the sum of the weights of the terms that match the Node.
// Terms came from the Pod .spec.affinity.nodeAffinity and from the plugin's
// default affinity.
@ -85,22 +115,25 @@ func (pl *NodeAffinity) Score(ctx context.Context, state *framework.CycleState,
return 0, framework.AsStatus(fmt.Errorf("getting node %q from Snapshot: %w", nodeName, err))
}
affinity := pod.Spec.Affinity
var count int64
if pl.addedPrefSchedTerms != nil {
count += pl.addedPrefSchedTerms.Score(node)
}
// A nil element of PreferredDuringSchedulingIgnoredDuringExecution matches no objects.
// An element of PreferredDuringSchedulingIgnoredDuringExecution that refers to an
// empty PreferredSchedulingTerm matches all objects.
if affinity != nil && affinity.NodeAffinity != nil && affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution != nil {
// TODO(#96164): Do this in PreScore to avoid computing it for all nodes.
preferredNodeAffinity, err := nodeaffinity.NewPreferredSchedulingTerms(affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution)
s, err := getPreScoreState(state)
if err != nil {
// fallback to calculate preferredNodeAffinity here when PreScore is disabled
preferredNodeAffinity, err := getPodPreferredNodeAffinity(pod)
if err != nil {
return 0, framework.AsStatus(err)
}
count += preferredNodeAffinity.Score(node)
s = &preScoreState{
preferredNodeAffinity: preferredNodeAffinity,
}
}
if s.preferredNodeAffinity != nil {
count += s.preferredNodeAffinity.Score(node)
}
return count, nil
@ -150,3 +183,24 @@ func getArgs(obj runtime.Object) (config.NodeAffinityArgs, error) {
}
return *ptr, validation.ValidateNodeAffinityArgs(ptr)
}
func getPodPreferredNodeAffinity(pod *v1.Pod) (*nodeaffinity.PreferredSchedulingTerms, error) {
affinity := pod.Spec.Affinity
if affinity != nil && affinity.NodeAffinity != nil && affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution != nil {
return nodeaffinity.NewPreferredSchedulingTerms(affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution)
}
return nil, nil
}
func getPreScoreState(cycleState *framework.CycleState) (*preScoreState, error) {
c, err := cycleState.Read(preScoreStateKey)
if err != nil {
return nil, fmt.Errorf("reading %q from cycleState: %v", preScoreStateKey, err)
}
s, ok := c.(*preScoreState)
if !ok {
return nil, fmt.Errorf("invalid PreScore state, got type %T", c)
}
return s, nil
}

View File

@ -889,11 +889,12 @@ func TestNodeAffinityPriority(t *testing.T) {
}
tests := []struct {
pod *v1.Pod
nodes []*v1.Node
expectedList framework.NodeScoreList
name string
args config.NodeAffinityArgs
pod *v1.Pod
nodes []*v1.Node
expectedList framework.NodeScoreList
name string
args config.NodeAffinityArgs
disablePreScore bool
}{
{
pod: &v1.Pod{
@ -995,6 +996,21 @@ func TestNodeAffinityPriority(t *testing.T) {
},
},
},
{
pod: &v1.Pod{
Spec: v1.PodSpec{
Affinity: affinity2,
},
},
nodes: []*v1.Node{
{ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: label1}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine5", Labels: label5}},
{ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: label2}},
},
expectedList: []framework.NodeScore{{Name: "machine1", Score: 18}, {Name: "machine5", Score: framework.MaxNodeScore}, {Name: "machine2", Score: 36}},
name: "calculate the priorities correctly even if PreScore is not called",
disablePreScore: true,
},
}
for _, test := range tests {
@ -1006,6 +1022,13 @@ func TestNodeAffinityPriority(t *testing.T) {
if err != nil {
t.Fatalf("Creating plugin: %v", err)
}
var status *framework.Status
if !test.disablePreScore {
status = p.(framework.PreScorePlugin).PreScore(context.Background(), state, test.pod, test.nodes)
if !status.IsSuccess() {
t.Errorf("unexpected error: %v", status)
}
}
var gotList framework.NodeScoreList
for _, n := range test.nodes {
nodeName := n.ObjectMeta.Name
@ -1016,7 +1039,7 @@ func TestNodeAffinityPriority(t *testing.T) {
gotList = append(gotList, framework.NodeScore{Name: nodeName, Score: score})
}
status := p.(framework.ScorePlugin).ScoreExtensions().NormalizeScore(context.Background(), state, test.pod, gotList)
status = p.(framework.ScorePlugin).ScoreExtensions().NormalizeScore(context.Background(), state, test.pod, gotList)
if !status.IsSuccess() {
t.Errorf("unexpected error: %v", status)
}

View File

@ -132,6 +132,7 @@ func TestSchedulerCreationFromConfigMap(t *testing.T) {
"PreScorePlugin": {
{Name: "PodTopologySpread"},
{Name: "InterPodAffinity"},
{Name: "NodeAffinity"},
{Name: "TaintToleration"},
},
"ScorePlugin": {
@ -226,6 +227,7 @@ kind: Policy
"PreScorePlugin": {
{Name: "PodTopologySpread"},
{Name: "InterPodAffinity"},
{Name: "NodeAffinity"},
{Name: "TaintToleration"},
},
"ScorePlugin": {