From 2b38078de1c655221b16dfd8f953b628ea7abe93 Mon Sep 17 00:00:00 2001 From: SataQiu <1527062125@qq.com> Date: Thu, 26 Nov 2020 11:19:52 +0800 Subject: [PATCH] scheduler: parse Pod's Node affinity once in PreScore phase Signed-off-by: SataQiu <1527062125@qq.com> --- cmd/kube-scheduler/app/server_test.go | 2 + pkg/scheduler/algorithmprovider/registry.go | 1 + .../algorithmprovider/registry_test.go | 3 + .../apis/config/testing/compatibility_test.go | 20 +++++- pkg/scheduler/factory_test.go | 16 ++++- .../framework/plugins/legacy_registry.go | 1 + .../framework/plugins/legacy_registry_test.go | 1 + .../plugins/nodeaffinity/node_affinity.go | 72 ++++++++++++++++--- .../nodeaffinity/node_affinity_test.go | 35 +++++++-- test/integration/scheduler/scheduler_test.go | 2 + 10 files changed, 134 insertions(+), 19 deletions(-) diff --git a/cmd/kube-scheduler/app/server_test.go b/cmd/kube-scheduler/app/server_test.go index 909bdb5ab9f..82e7a23585b 100644 --- a/cmd/kube-scheduler/app/server_test.go +++ b/cmd/kube-scheduler/app/server_test.go @@ -186,6 +186,7 @@ profiles: {Name: "InterPodAffinity"}, {Name: "PodTopologySpread"}, {Name: "TaintToleration"}, + {Name: "NodeAffinity"}, }, "ScorePlugin": { {Name: "NodeResourcesBalancedAllocation", Weight: 1}, @@ -314,6 +315,7 @@ profiles: {Name: "InterPodAffinity"}, {Name: "PodTopologySpread"}, {Name: "TaintToleration"}, + {Name: "NodeAffinity"}, }, "ScorePlugin": { {Name: "NodeResourcesBalancedAllocation", Weight: 1}, diff --git a/pkg/scheduler/algorithmprovider/registry.go b/pkg/scheduler/algorithmprovider/registry.go index 0ad2e778834..93ee9af3fea 100644 --- a/pkg/scheduler/algorithmprovider/registry.go +++ b/pkg/scheduler/algorithmprovider/registry.go @@ -113,6 +113,7 @@ func getDefaultConfig() *schedulerapi.Plugins { {Name: interpodaffinity.Name}, {Name: podtopologyspread.Name}, {Name: tainttoleration.Name}, + {Name: nodeaffinity.Name}, }, }, Score: &schedulerapi.PluginSet{ diff --git a/pkg/scheduler/algorithmprovider/registry_test.go b/pkg/scheduler/algorithmprovider/registry_test.go index 12eee457a94..ecb935d9a2f 100644 --- a/pkg/scheduler/algorithmprovider/registry_test.go +++ b/pkg/scheduler/algorithmprovider/registry_test.go @@ -90,6 +90,7 @@ func TestClusterAutoscalerProvider(t *testing.T) { {Name: interpodaffinity.Name}, {Name: podtopologyspread.Name}, {Name: tainttoleration.Name}, + {Name: nodeaffinity.Name}, }, }, Score: &schedulerapi.PluginSet{ @@ -180,6 +181,7 @@ func TestApplyFeatureGates(t *testing.T) { {Name: interpodaffinity.Name}, {Name: podtopologyspread.Name}, {Name: tainttoleration.Name}, + {Name: nodeaffinity.Name}, }, }, Score: &schedulerapi.PluginSet{ @@ -260,6 +262,7 @@ func TestApplyFeatureGates(t *testing.T) { {Name: interpodaffinity.Name}, {Name: podtopologyspread.Name}, {Name: tainttoleration.Name}, + {Name: nodeaffinity.Name}, {Name: selectorspread.Name}, }, }, diff --git a/pkg/scheduler/apis/config/testing/compatibility_test.go b/pkg/scheduler/apis/config/testing/compatibility_test.go index 4e76b626a8f..7c3ced9465c 100644 --- a/pkg/scheduler/apis/config/testing/compatibility_test.go +++ b/pkg/scheduler/apis/config/testing/compatibility_test.go @@ -259,7 +259,10 @@ func TestCompatibility_v1_Scheduler(t *testing.T) { {Name: "VolumeZone"}, }, "PostFilterPlugin": {{Name: "DefaultPreemption"}}, - "PreScorePlugin": {{Name: "PodTopologySpread"}}, + "PreScorePlugin": { + {Name: "NodeAffinity"}, + {Name: "PodTopologySpread"}, + }, "ScorePlugin": { {Name: "NodeResourcesBalancedAllocation", Weight: 2}, {Name: "ImageLocality", Weight: 2}, @@ -332,6 +335,7 @@ func TestCompatibility_v1_Scheduler(t *testing.T) { "PostFilterPlugin": {{Name: "DefaultPreemption"}}, "PreScorePlugin": { {Name: "InterPodAffinity"}, + {Name: "NodeAffinity"}, {Name: "PodTopologySpread"}, {Name: "TaintToleration"}, }, @@ -409,6 +413,7 @@ func TestCompatibility_v1_Scheduler(t *testing.T) { "PostFilterPlugin": {{Name: "DefaultPreemption"}}, "PreScorePlugin": { {Name: "InterPodAffinity"}, + {Name: "NodeAffinity"}, {Name: "PodTopologySpread"}, {Name: "TaintToleration"}, }, @@ -497,6 +502,7 @@ func TestCompatibility_v1_Scheduler(t *testing.T) { "PostFilterPlugin": {{Name: "DefaultPreemption"}}, "PreScorePlugin": { {Name: "InterPodAffinity"}, + {Name: "NodeAffinity"}, {Name: "PodTopologySpread"}, {Name: "TaintToleration"}, }, @@ -596,6 +602,7 @@ func TestCompatibility_v1_Scheduler(t *testing.T) { "PostFilterPlugin": {{Name: "DefaultPreemption"}}, "PreScorePlugin": { {Name: "InterPodAffinity"}, + {Name: "NodeAffinity"}, {Name: "PodTopologySpread"}, {Name: "TaintToleration"}, }, @@ -698,6 +705,7 @@ func TestCompatibility_v1_Scheduler(t *testing.T) { "PostFilterPlugin": {{Name: "DefaultPreemption"}}, "PreScorePlugin": { {Name: "InterPodAffinity"}, + {Name: "NodeAffinity"}, {Name: "PodTopologySpread"}, {Name: "TaintToleration"}, }, @@ -805,6 +813,7 @@ func TestCompatibility_v1_Scheduler(t *testing.T) { "PostFilterPlugin": {{Name: "DefaultPreemption"}}, "PreScorePlugin": { {Name: "InterPodAffinity"}, + {Name: "NodeAffinity"}, {Name: "PodTopologySpread"}, {Name: "TaintToleration"}, }, @@ -924,6 +933,7 @@ func TestCompatibility_v1_Scheduler(t *testing.T) { "PostFilterPlugin": {{Name: "DefaultPreemption"}}, "PreScorePlugin": { {Name: "InterPodAffinity"}, + {Name: "NodeAffinity"}, {Name: "PodTopologySpread"}, {Name: "TaintToleration"}, }, @@ -1046,6 +1056,7 @@ func TestCompatibility_v1_Scheduler(t *testing.T) { "PostFilterPlugin": {{Name: "DefaultPreemption"}}, "PreScorePlugin": { {Name: "InterPodAffinity"}, + {Name: "NodeAffinity"}, {Name: "PodTopologySpread"}, {Name: "TaintToleration"}, }, @@ -1168,6 +1179,7 @@ func TestCompatibility_v1_Scheduler(t *testing.T) { "PostFilterPlugin": {{Name: "DefaultPreemption"}}, "PreScorePlugin": { {Name: "InterPodAffinity"}, + {Name: "NodeAffinity"}, {Name: "PodTopologySpread"}, {Name: "TaintToleration"}, }, @@ -1294,6 +1306,7 @@ func TestCompatibility_v1_Scheduler(t *testing.T) { "PostFilterPlugin": {{Name: "DefaultPreemption"}}, "PreScorePlugin": { {Name: "InterPodAffinity"}, + {Name: "NodeAffinity"}, {Name: "PodTopologySpread"}, {Name: "TaintToleration"}, }, @@ -1423,6 +1436,7 @@ func TestAlgorithmProviderCompatibility(t *testing.T) { {Name: "InterPodAffinity"}, {Name: "PodTopologySpread"}, {Name: "TaintToleration"}, + {Name: "NodeAffinity"}, }, "ScorePlugin": { {Name: "NodeResourcesBalancedAllocation", Weight: 1}, @@ -1491,6 +1505,7 @@ func TestAlgorithmProviderCompatibility(t *testing.T) { {Name: "InterPodAffinity"}, {Name: "PodTopologySpread"}, {Name: "TaintToleration"}, + {Name: "NodeAffinity"}, }, "ScorePlugin": { {Name: "NodeResourcesBalancedAllocation", Weight: 1}, @@ -1578,6 +1593,7 @@ func TestPluginsConfigurationCompatibility(t *testing.T) { {Name: "InterPodAffinity"}, {Name: "PodTopologySpread"}, {Name: "TaintToleration"}, + {Name: "NodeAffinity"}, }, "ScorePlugin": { {Name: "NodeResourcesBalancedAllocation", Weight: 1}, @@ -1700,6 +1716,7 @@ func TestPluginsConfigurationCompatibility(t *testing.T) { {Name: "InterPodAffinity"}, {Name: "PodTopologySpread"}, {Name: "TaintToleration"}, + {Name: "NodeAffinity"}, }, "ScorePlugin": { {Name: "NodeResourcesBalancedAllocation", Weight: 1}, @@ -1906,6 +1923,7 @@ func TestPluginsConfigurationCompatibility(t *testing.T) { PreScore: &config.PluginSet{ Disabled: []config.Plugin{ {Name: "InterPodAffinity"}, + {Name: "NodeAffinity"}, {Name: "SelectorSpread"}, {Name: "TaintToleration"}, {Name: "PodTopologySpread"}, diff --git a/pkg/scheduler/factory_test.go b/pkg/scheduler/factory_test.go index 9336c456fa2..bd2fd29428c 100644 --- a/pkg/scheduler/factory_test.go +++ b/pkg/scheduler/factory_test.go @@ -23,7 +23,7 @@ import ( "time" "github.com/google/go-cmp/cmp" - "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/clock" @@ -184,6 +184,7 @@ func TestCreateFromConfig(t *testing.T) { Enabled: []schedulerapi.Plugin{ {Name: "PodTopologySpread"}, {Name: "InterPodAffinity"}, + {Name: "NodeAffinity"}, {Name: "TaintToleration"}, }, }, @@ -295,7 +296,12 @@ func TestCreateFromConfig(t *testing.T) { }, }, PostFilter: &schedulerapi.PluginSet{Enabled: []schedulerapi.Plugin{{Name: "DefaultPreemption"}}}, - PreScore: &schedulerapi.PluginSet{Enabled: []schedulerapi.Plugin{{Name: "InterPodAffinity"}}}, + PreScore: &schedulerapi.PluginSet{ + Enabled: []schedulerapi.Plugin{ + {Name: "InterPodAffinity"}, + {Name: "NodeAffinity"}, + }, + }, Score: &schedulerapi.PluginSet{ Enabled: []schedulerapi.Plugin{ {Name: "InterPodAffinity", Weight: 1}, @@ -360,7 +366,11 @@ func TestCreateFromConfig(t *testing.T) { }, }, PostFilter: &schedulerapi.PluginSet{Enabled: []schedulerapi.Plugin{{Name: "DefaultPreemption"}}}, - PreScore: &schedulerapi.PluginSet{Enabled: []schedulerapi.Plugin{{Name: "InterPodAffinity"}}}, + PreScore: &schedulerapi.PluginSet{ + Enabled: []schedulerapi.Plugin{ + {Name: "InterPodAffinity"}, + }, + }, Score: &schedulerapi.PluginSet{ Enabled: []schedulerapi.Plugin{ {Name: "InterPodAffinity", Weight: 1}, diff --git a/pkg/scheduler/framework/plugins/legacy_registry.go b/pkg/scheduler/framework/plugins/legacy_registry.go index 8105b656989..a524e0242d5 100644 --- a/pkg/scheduler/framework/plugins/legacy_registry.go +++ b/pkg/scheduler/framework/plugins/legacy_registry.go @@ -360,6 +360,7 @@ func NewLegacyRegistry() *LegacyRegistry { }) registry.registerPriorityConfigProducer(NodeAffinityPriority, func(args ConfigProducerArgs, plugins *config.Plugins, _ *[]config.PluginConfig) { + plugins.PreScore = appendToPluginSet(plugins.PreScore, nodeaffinity.Name, nil) plugins.Score = appendToPluginSet(plugins.Score, nodeaffinity.Name, &args.Weight) }) registry.registerPriorityConfigProducer(ImageLocalityPriority, diff --git a/pkg/scheduler/framework/plugins/legacy_registry_test.go b/pkg/scheduler/framework/plugins/legacy_registry_test.go index 6bd76943b69..c92d66717a0 100644 --- a/pkg/scheduler/framework/plugins/legacy_registry_test.go +++ b/pkg/scheduler/framework/plugins/legacy_registry_test.go @@ -121,6 +121,7 @@ func TestAppendPriorityConfigs(t *testing.T) { Enabled: []config.Plugin{ {Name: podtopologyspread.Name}, {Name: interpodaffinity.Name}, + {Name: nodeaffinity.Name}, {Name: tainttoleration.Name}, }, }, diff --git a/pkg/scheduler/framework/plugins/nodeaffinity/node_affinity.go b/pkg/scheduler/framework/plugins/nodeaffinity/node_affinity.go index 15107ef76d3..e86ed71dbff 100644 --- a/pkg/scheduler/framework/plugins/nodeaffinity/node_affinity.go +++ b/pkg/scheduler/framework/plugins/nodeaffinity/node_affinity.go @@ -43,6 +43,9 @@ const ( // Name is the name of the plugin used in the plugin registry and configurations. Name = "NodeAffinity" + // preScoreStateKey is the key in CycleState to NodeAffinity pre-computed data for Scoring. + preScoreStateKey = "PreScore" + Name + // ErrReasonPod is the reason for Pod's node affinity/selector not matching. ErrReasonPod = "node(s) didn't match Pod's node affinity" @@ -71,6 +74,33 @@ func (pl *NodeAffinity) Filter(ctx context.Context, state *framework.CycleState, return nil } +// preScoreState computed at PreScore and used at Score. +type preScoreState struct { + preferredNodeAffinity *nodeaffinity.PreferredSchedulingTerms +} + +// Clone implements the mandatory Clone interface. We don't really copy the data since +// there is no need for that. +func (s *preScoreState) Clone() framework.StateData { + return s +} + +// PreScore builds and writes cycle state used by Score and NormalizeScore. +func (pl *NodeAffinity) PreScore(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod, nodes []*v1.Node) *framework.Status { + if len(nodes) == 0 { + return nil + } + preferredNodeAffinity, err := getPodPreferredNodeAffinity(pod) + if err != nil { + return framework.AsStatus(err) + } + state := &preScoreState{ + preferredNodeAffinity: preferredNodeAffinity, + } + cycleState.Write(preScoreStateKey, state) + return nil +} + // Score returns the sum of the weights of the terms that match the Node. // Terms came from the Pod .spec.affinity.nodeAffinity and from the plugin's // default affinity. @@ -85,22 +115,25 @@ func (pl *NodeAffinity) Score(ctx context.Context, state *framework.CycleState, return 0, framework.AsStatus(fmt.Errorf("getting node %q from Snapshot: %w", nodeName, err)) } - affinity := pod.Spec.Affinity - var count int64 if pl.addedPrefSchedTerms != nil { count += pl.addedPrefSchedTerms.Score(node) } - // A nil element of PreferredDuringSchedulingIgnoredDuringExecution matches no objects. - // An element of PreferredDuringSchedulingIgnoredDuringExecution that refers to an - // empty PreferredSchedulingTerm matches all objects. - if affinity != nil && affinity.NodeAffinity != nil && affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution != nil { - // TODO(#96164): Do this in PreScore to avoid computing it for all nodes. - preferredNodeAffinity, err := nodeaffinity.NewPreferredSchedulingTerms(affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution) + + s, err := getPreScoreState(state) + if err != nil { + // fallback to calculate preferredNodeAffinity here when PreScore is disabled + preferredNodeAffinity, err := getPodPreferredNodeAffinity(pod) if err != nil { return 0, framework.AsStatus(err) } - count += preferredNodeAffinity.Score(node) + s = &preScoreState{ + preferredNodeAffinity: preferredNodeAffinity, + } + } + + if s.preferredNodeAffinity != nil { + count += s.preferredNodeAffinity.Score(node) } return count, nil @@ -150,3 +183,24 @@ func getArgs(obj runtime.Object) (config.NodeAffinityArgs, error) { } return *ptr, validation.ValidateNodeAffinityArgs(ptr) } + +func getPodPreferredNodeAffinity(pod *v1.Pod) (*nodeaffinity.PreferredSchedulingTerms, error) { + affinity := pod.Spec.Affinity + if affinity != nil && affinity.NodeAffinity != nil && affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution != nil { + return nodeaffinity.NewPreferredSchedulingTerms(affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution) + } + return nil, nil +} + +func getPreScoreState(cycleState *framework.CycleState) (*preScoreState, error) { + c, err := cycleState.Read(preScoreStateKey) + if err != nil { + return nil, fmt.Errorf("reading %q from cycleState: %v", preScoreStateKey, err) + } + + s, ok := c.(*preScoreState) + if !ok { + return nil, fmt.Errorf("invalid PreScore state, got type %T", c) + } + return s, nil +} diff --git a/pkg/scheduler/framework/plugins/nodeaffinity/node_affinity_test.go b/pkg/scheduler/framework/plugins/nodeaffinity/node_affinity_test.go index 29d134ba245..f830569c36e 100644 --- a/pkg/scheduler/framework/plugins/nodeaffinity/node_affinity_test.go +++ b/pkg/scheduler/framework/plugins/nodeaffinity/node_affinity_test.go @@ -889,11 +889,12 @@ func TestNodeAffinityPriority(t *testing.T) { } tests := []struct { - pod *v1.Pod - nodes []*v1.Node - expectedList framework.NodeScoreList - name string - args config.NodeAffinityArgs + pod *v1.Pod + nodes []*v1.Node + expectedList framework.NodeScoreList + name string + args config.NodeAffinityArgs + disablePreScore bool }{ { pod: &v1.Pod{ @@ -995,6 +996,21 @@ func TestNodeAffinityPriority(t *testing.T) { }, }, }, + { + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Affinity: affinity2, + }, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: label1}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine5", Labels: label5}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: label2}}, + }, + expectedList: []framework.NodeScore{{Name: "machine1", Score: 18}, {Name: "machine5", Score: framework.MaxNodeScore}, {Name: "machine2", Score: 36}}, + name: "calculate the priorities correctly even if PreScore is not called", + disablePreScore: true, + }, } for _, test := range tests { @@ -1006,6 +1022,13 @@ func TestNodeAffinityPriority(t *testing.T) { if err != nil { t.Fatalf("Creating plugin: %v", err) } + var status *framework.Status + if !test.disablePreScore { + status = p.(framework.PreScorePlugin).PreScore(context.Background(), state, test.pod, test.nodes) + if !status.IsSuccess() { + t.Errorf("unexpected error: %v", status) + } + } var gotList framework.NodeScoreList for _, n := range test.nodes { nodeName := n.ObjectMeta.Name @@ -1016,7 +1039,7 @@ func TestNodeAffinityPriority(t *testing.T) { gotList = append(gotList, framework.NodeScore{Name: nodeName, Score: score}) } - status := p.(framework.ScorePlugin).ScoreExtensions().NormalizeScore(context.Background(), state, test.pod, gotList) + status = p.(framework.ScorePlugin).ScoreExtensions().NormalizeScore(context.Background(), state, test.pod, gotList) if !status.IsSuccess() { t.Errorf("unexpected error: %v", status) } diff --git a/test/integration/scheduler/scheduler_test.go b/test/integration/scheduler/scheduler_test.go index 4dceec28d95..1b955d065ea 100644 --- a/test/integration/scheduler/scheduler_test.go +++ b/test/integration/scheduler/scheduler_test.go @@ -132,6 +132,7 @@ func TestSchedulerCreationFromConfigMap(t *testing.T) { "PreScorePlugin": { {Name: "PodTopologySpread"}, {Name: "InterPodAffinity"}, + {Name: "NodeAffinity"}, {Name: "TaintToleration"}, }, "ScorePlugin": { @@ -226,6 +227,7 @@ kind: Policy "PreScorePlugin": { {Name: "PodTopologySpread"}, {Name: "InterPodAffinity"}, + {Name: "NodeAffinity"}, {Name: "TaintToleration"}, }, "ScorePlugin": {