diff --git a/cmd/kube-scheduler/app/server_test.go b/cmd/kube-scheduler/app/server_test.go index cc82fc7f510..30b07905d1b 100644 --- a/cmd/kube-scheduler/app/server_test.go +++ b/cmd/kube-scheduler/app/server_test.go @@ -124,6 +124,9 @@ profiles: filter: disabled: - name: "*" + postFilter: + disabled: + - name: "*" preScore: disabled: - name: "*" @@ -175,6 +178,9 @@ profiles: {Name: "PodTopologySpread"}, {Name: "InterPodAffinity"}, }, + "PostFilterPlugin": { + {Name: "DefaultPreemption"}, + }, "PreScorePlugin": { {Name: "InterPodAffinity"}, {Name: "PodTopologySpread"}, @@ -221,16 +227,17 @@ profiles: }, wantPlugins: map[string]map[string][]kubeschedulerconfig.Plugin{ "default-scheduler": { - "BindPlugin": {{Name: "DefaultBinder"}}, - "FilterPlugin": {{Name: "NodeResourcesFit"}, {Name: "NodePorts"}}, - "PreFilterPlugin": {{Name: "NodeResourcesFit"}, {Name: "NodePorts"}}, - "PreScorePlugin": {{Name: "InterPodAffinity"}, {Name: "TaintToleration"}}, - "QueueSortPlugin": {{Name: "PrioritySort"}}, - "ScorePlugin": {{Name: "InterPodAffinity", Weight: 1}, {Name: "TaintToleration", Weight: 1}}, - "ReservePlugin": {{Name: "VolumeBinding"}}, - "UnreservePlugin": {{Name: "VolumeBinding"}}, - "PreBindPlugin": {{Name: "VolumeBinding"}}, - "PostBindPlugin": {{Name: "VolumeBinding"}}, + "BindPlugin": {{Name: "DefaultBinder"}}, + "FilterPlugin": {{Name: "NodeResourcesFit"}, {Name: "NodePorts"}}, + "PreFilterPlugin": {{Name: "NodeResourcesFit"}, {Name: "NodePorts"}}, + "PostFilterPlugin": {{Name: "DefaultPreemption"}}, + "PreScorePlugin": {{Name: "InterPodAffinity"}, {Name: "TaintToleration"}}, + "QueueSortPlugin": {{Name: "PrioritySort"}}, + "ScorePlugin": {{Name: "InterPodAffinity", Weight: 1}, {Name: "TaintToleration", Weight: 1}}, + "ReservePlugin": {{Name: "VolumeBinding"}}, + "UnreservePlugin": {{Name: "VolumeBinding"}}, + "PreBindPlugin": {{Name: "VolumeBinding"}}, + "PostBindPlugin": {{Name: "VolumeBinding"}}, }, }, }, @@ -307,6 +314,9 @@ profiles: {Name: "PodTopologySpread"}, {Name: "InterPodAffinity"}, }, + "PostFilterPlugin": { + {Name: "DefaultPreemption"}, + }, "PreScorePlugin": { {Name: "InterPodAffinity"}, {Name: "PodTopologySpread"}, diff --git a/pkg/scheduler/algorithmprovider/BUILD b/pkg/scheduler/algorithmprovider/BUILD index df9970a7e33..72807c7f144 100644 --- a/pkg/scheduler/algorithmprovider/BUILD +++ b/pkg/scheduler/algorithmprovider/BUILD @@ -15,6 +15,7 @@ go_library( "//pkg/scheduler/apis/config:go_default_library", "//pkg/scheduler/framework/plugins/defaultbinder:go_default_library", "//pkg/scheduler/framework/plugins/defaultpodtopologyspread:go_default_library", + "//pkg/scheduler/framework/plugins/defaultpreemption:go_default_library", "//pkg/scheduler/framework/plugins/imagelocality:go_default_library", "//pkg/scheduler/framework/plugins/interpodaffinity:go_default_library", "//pkg/scheduler/framework/plugins/nodeaffinity:go_default_library", @@ -44,6 +45,7 @@ go_test( "//pkg/scheduler/apis/config:go_default_library", "//pkg/scheduler/framework/plugins/defaultbinder:go_default_library", "//pkg/scheduler/framework/plugins/defaultpodtopologyspread:go_default_library", + "//pkg/scheduler/framework/plugins/defaultpreemption:go_default_library", "//pkg/scheduler/framework/plugins/imagelocality:go_default_library", "//pkg/scheduler/framework/plugins/interpodaffinity:go_default_library", "//pkg/scheduler/framework/plugins/nodeaffinity:go_default_library", diff --git a/pkg/scheduler/algorithmprovider/registry.go b/pkg/scheduler/algorithmprovider/registry.go index a7f831c3e3f..02ded546cad 100644 --- a/pkg/scheduler/algorithmprovider/registry.go +++ b/pkg/scheduler/algorithmprovider/registry.go @@ -26,6 +26,7 @@ import ( schedulerapi "k8s.io/kubernetes/pkg/scheduler/apis/config" "k8s.io/kubernetes/pkg/scheduler/framework/plugins/defaultbinder" "k8s.io/kubernetes/pkg/scheduler/framework/plugins/defaultpodtopologyspread" + "k8s.io/kubernetes/pkg/scheduler/framework/plugins/defaultpreemption" "k8s.io/kubernetes/pkg/scheduler/framework/plugins/imagelocality" "k8s.io/kubernetes/pkg/scheduler/framework/plugins/interpodaffinity" "k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodeaffinity" @@ -109,6 +110,11 @@ func getDefaultConfig() *schedulerapi.Plugins { {Name: interpodaffinity.Name}, }, }, + PostFilter: &schedulerapi.PluginSet{ + Enabled: []schedulerapi.Plugin{ + {Name: defaultpreemption.Name}, + }, + }, PreScore: &schedulerapi.PluginSet{ Enabled: []schedulerapi.Plugin{ {Name: interpodaffinity.Name}, diff --git a/pkg/scheduler/algorithmprovider/registry_test.go b/pkg/scheduler/algorithmprovider/registry_test.go index 7d9a1ad69e5..f8bc917ecb7 100644 --- a/pkg/scheduler/algorithmprovider/registry_test.go +++ b/pkg/scheduler/algorithmprovider/registry_test.go @@ -21,6 +21,7 @@ import ( "github.com/google/go-cmp/cmp" "k8s.io/component-base/featuregate" + "k8s.io/kubernetes/pkg/scheduler/framework/plugins/defaultpreemption" utilfeature "k8s.io/apiserver/pkg/util/feature" featuregatetesting "k8s.io/component-base/featuregate/testing" @@ -80,6 +81,11 @@ func TestClusterAutoscalerProvider(t *testing.T) { {Name: interpodaffinity.Name}, }, }, + PostFilter: &schedulerapi.PluginSet{ + Enabled: []schedulerapi.Plugin{ + {Name: defaultpreemption.Name}, + }, + }, PreScore: &schedulerapi.PluginSet{ Enabled: []schedulerapi.Plugin{ {Name: interpodaffinity.Name}, @@ -177,6 +183,11 @@ func TestApplyFeatureGates(t *testing.T) { {Name: interpodaffinity.Name}, }, }, + PostFilter: &schedulerapi.PluginSet{ + Enabled: []schedulerapi.Plugin{ + {Name: defaultpreemption.Name}, + }, + }, PreScore: &schedulerapi.PluginSet{ Enabled: []schedulerapi.Plugin{ {Name: interpodaffinity.Name}, @@ -262,6 +273,11 @@ func TestApplyFeatureGates(t *testing.T) { {Name: interpodaffinity.Name}, }, }, + PostFilter: &schedulerapi.PluginSet{ + Enabled: []schedulerapi.Plugin{ + {Name: defaultpreemption.Name}, + }, + }, PreScore: &schedulerapi.PluginSet{ Enabled: []schedulerapi.Plugin{ {Name: interpodaffinity.Name}, diff --git a/pkg/scheduler/apis/config/testing/compatibility_test.go b/pkg/scheduler/apis/config/testing/compatibility_test.go index 559562c2718..4e42332a522 100644 --- a/pkg/scheduler/apis/config/testing/compatibility_test.go +++ b/pkg/scheduler/apis/config/testing/compatibility_test.go @@ -1414,6 +1414,9 @@ func TestAlgorithmProviderCompatibility(t *testing.T) { {Name: "PodTopologySpread"}, {Name: "InterPodAffinity"}, }, + "PostFilterPlugin": { + {Name: "DefaultPreemption"}, + }, "PreScorePlugin": { {Name: "InterPodAffinity"}, {Name: "PodTopologySpread"}, @@ -1483,6 +1486,9 @@ func TestAlgorithmProviderCompatibility(t *testing.T) { {Name: "PodTopologySpread"}, {Name: "InterPodAffinity"}, }, + "PostFilterPlugin": { + {Name: "DefaultPreemption"}, + }, "PreScorePlugin": { {Name: "InterPodAffinity"}, {Name: "PodTopologySpread"}, @@ -1572,6 +1578,9 @@ func TestPluginsConfigurationCompatibility(t *testing.T) { {Name: "PodTopologySpread"}, {Name: "InterPodAffinity"}, }, + "PostFilterPlugin": { + {Name: "DefaultPreemption"}, + }, "PreScorePlugin": { {Name: "InterPodAffinity"}, {Name: "PodTopologySpread"}, @@ -1771,6 +1780,11 @@ func TestPluginsConfigurationCompatibility(t *testing.T) { {Name: "PodTopologySpread"}, }, }, + PostFilter: &config.PluginSet{ + Disabled: []config.Plugin{ + {Name: "DefaultPreemption"}, + }, + }, PreScore: &config.PluginSet{ Disabled: []config.Plugin{ {Name: "InterPodAffinity"}, @@ -1917,6 +1931,9 @@ func TestPluginsConfigurationCompatibility(t *testing.T) { {Name: "NodeResourcesFit"}, {Name: "NodeUnschedulable"}, }, + "PostFilterPlugin": { + {Name: "DefaultPreemption"}, + }, "PreScorePlugin": { {Name: "TaintToleration"}, {Name: "DefaultPodTopologySpread"}, diff --git a/pkg/scheduler/eventhandlers.go b/pkg/scheduler/eventhandlers.go index 2085ffc3b5c..1809fc84645 100644 --- a/pkg/scheduler/eventhandlers.go +++ b/pkg/scheduler/eventhandlers.go @@ -30,7 +30,6 @@ import ( coreinformers "k8s.io/client-go/informers/core/v1" "k8s.io/client-go/tools/cache" "k8s.io/kubernetes/pkg/features" - kubefeatures "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/scheduler/internal/queue" "k8s.io/kubernetes/pkg/scheduler/profile" ) @@ -467,11 +466,6 @@ func addAllEventHandlers( AddFunc: sched.onStorageClassAdd, }, ) - - // TODO(Huang-Wei): remove this hack when defaultpreemption plugin is enabled. - if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.PodDisruptionBudget) { - informerFactory.Policy().V1beta1().PodDisruptionBudgets().Lister() - } } func nodeSchedulingPropertiesChange(newNode *v1.Node, oldNode *v1.Node) string { diff --git a/pkg/scheduler/framework/plugins/BUILD b/pkg/scheduler/framework/plugins/BUILD index 2587603af10..a081b290d36 100644 --- a/pkg/scheduler/framework/plugins/BUILD +++ b/pkg/scheduler/framework/plugins/BUILD @@ -12,6 +12,7 @@ go_library( "//pkg/scheduler/apis/config:go_default_library", "//pkg/scheduler/framework/plugins/defaultbinder:go_default_library", "//pkg/scheduler/framework/plugins/defaultpodtopologyspread:go_default_library", + "//pkg/scheduler/framework/plugins/defaultpreemption:go_default_library", "//pkg/scheduler/framework/plugins/imagelocality:go_default_library", "//pkg/scheduler/framework/plugins/interpodaffinity:go_default_library", "//pkg/scheduler/framework/plugins/nodeaffinity:go_default_library", diff --git a/pkg/scheduler/framework/plugins/defaultpreemption/BUILD b/pkg/scheduler/framework/plugins/defaultpreemption/BUILD index 80e8552dcf7..37b8e17c490 100644 --- a/pkg/scheduler/framework/plugins/defaultpreemption/BUILD +++ b/pkg/scheduler/framework/plugins/defaultpreemption/BUILD @@ -6,10 +6,13 @@ go_library( importpath = "k8s.io/kubernetes/pkg/scheduler/framework/plugins/defaultpreemption", visibility = ["//visibility:public"], deps = [ + "//pkg/features:go_default_library", "//pkg/scheduler/core:go_default_library", "//pkg/scheduler/framework/v1alpha1:go_default_library", + "//pkg/scheduler/metrics:go_default_library", "//staging/src/k8s.io/api/core/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library", + "//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library", ], ) diff --git a/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption.go b/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption.go index 670978cff17..cdfbaeafcf7 100644 --- a/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption.go +++ b/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption.go @@ -18,11 +18,15 @@ package defaultpreemption import ( "context" + "time" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" + utilfeature "k8s.io/apiserver/pkg/util/feature" + kubefeatures "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/scheduler/core" framework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1" + "k8s.io/kubernetes/pkg/scheduler/metrics" ) const ( @@ -45,11 +49,22 @@ func (pl *DefaultPreemption) Name() string { // New initializes a new plugin and returns it. func New(_ runtime.Object, fh framework.FrameworkHandle) (framework.Plugin, error) { pl := DefaultPreemption{fh} + if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.PodDisruptionBudget) { + // A hack to initialize pdbLister in sharedInformerFactory. + fh.SharedInformerFactory().Policy().V1beta1().PodDisruptionBudgets().Lister() + } return &pl, nil } // PostFilter invoked at the postFilter extension point. func (pl *DefaultPreemption) PostFilter(ctx context.Context, state *framework.CycleState, pod *v1.Pod, m framework.NodeToStatusMap) (*framework.PostFilterResult, *framework.Status) { + preemptionStartTime := time.Now() + defer func() { + metrics.PreemptionAttempts.Inc() + metrics.SchedulingAlgorithmPreemptionEvaluationDuration.Observe(metrics.SinceInSeconds(preemptionStartTime)) + metrics.DeprecatedSchedulingDuration.WithLabelValues(metrics.PreemptionEvaluation).Observe(metrics.SinceInSeconds(preemptionStartTime)) + }() + nnn, err := core.Preempt(ctx, pl.fh, state, pod, m) if err != nil { return nil, framework.NewStatus(framework.Error, err.Error()) diff --git a/pkg/scheduler/framework/plugins/registry.go b/pkg/scheduler/framework/plugins/registry.go index 2cb4b8d7d93..e009ab8184f 100644 --- a/pkg/scheduler/framework/plugins/registry.go +++ b/pkg/scheduler/framework/plugins/registry.go @@ -19,6 +19,7 @@ package plugins import ( "k8s.io/kubernetes/pkg/scheduler/framework/plugins/defaultbinder" "k8s.io/kubernetes/pkg/scheduler/framework/plugins/defaultpodtopologyspread" + "k8s.io/kubernetes/pkg/scheduler/framework/plugins/defaultpreemption" "k8s.io/kubernetes/pkg/scheduler/framework/plugins/imagelocality" "k8s.io/kubernetes/pkg/scheduler/framework/plugins/interpodaffinity" "k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodeaffinity" @@ -71,5 +72,6 @@ func NewInTreeRegistry() runtime.Registry { serviceaffinity.Name: serviceaffinity.New, queuesort.Name: queuesort.New, defaultbinder.Name: defaultbinder.New, + defaultpreemption.Name: defaultpreemption.New, } } diff --git a/pkg/scheduler/framework/runtime/framework.go b/pkg/scheduler/framework/runtime/framework.go index 480168d3011..917455edfca 100644 --- a/pkg/scheduler/framework/runtime/framework.go +++ b/pkg/scheduler/framework/runtime/framework.go @@ -525,7 +525,12 @@ func (f *frameworkImpl) runFilterPlugin(ctx context.Context, pl framework.Filter // RunPostFilterPlugins runs the set of configured PostFilter plugins until the first // Success or Error is met, otherwise continues to execute all plugins. -func (f *frameworkImpl) RunPostFilterPlugins(ctx context.Context, state *framework.CycleState, pod *v1.Pod, filteredNodeStatusMap framework.NodeToStatusMap) (*framework.PostFilterResult, *framework.Status) { +func (f *frameworkImpl) RunPostFilterPlugins(ctx context.Context, state *framework.CycleState, pod *v1.Pod, filteredNodeStatusMap framework.NodeToStatusMap) (_ *framework.PostFilterResult, status *framework.Status) { + startTime := time.Now() + defer func() { + metrics.FrameworkExtensionPointDuration.WithLabelValues(postFilter, status.Code().String()).Observe(metrics.SinceInSeconds(startTime)) + }() + statuses := make(framework.PluginToStatus) for _, pl := range f.postFilterPlugins { r, s := f.runPostFilterPlugin(ctx, pl, state, pod, filteredNodeStatusMap) @@ -537,6 +542,7 @@ func (f *frameworkImpl) RunPostFilterPlugins(ctx context.Context, state *framewo } statuses[pl.Name()] = s } + return nil, statuses.Merge() } @@ -936,6 +942,11 @@ func (f *frameworkImpl) HasFilterPlugins() bool { return len(f.filterPlugins) > 0 } +// HasPostFilterPlugins returns true if at least one postFilter plugin is defined. +func (f *frameworkImpl) HasPostFilterPlugins() bool { + return len(f.postFilterPlugins) > 0 +} + // HasScorePlugins returns true if at least one score plugin is defined. func (f *frameworkImpl) HasScorePlugins() bool { return len(f.scorePlugins) > 0 diff --git a/pkg/scheduler/framework/v1alpha1/interface.go b/pkg/scheduler/framework/v1alpha1/interface.go index 7089d25e48b..48ba44ff811 100644 --- a/pkg/scheduler/framework/v1alpha1/interface.go +++ b/pkg/scheduler/framework/v1alpha1/interface.go @@ -481,6 +481,9 @@ type Framework interface { // HasFilterPlugins returns true if at least one filter plugin is defined. HasFilterPlugins() bool + // HasPostFilterPlugins returns true if at least one postFilter plugin is defined. + HasPostFilterPlugins() bool + // HasScorePlugins returns true if at least one score plugin is defined. HasScorePlugins() bool diff --git a/pkg/scheduler/scheduler.go b/pkg/scheduler/scheduler.go index 2cf3872329e..629a409cb44 100644 --- a/pkg/scheduler/scheduler.go +++ b/pkg/scheduler/scheduler.go @@ -481,17 +481,10 @@ func (sched *Scheduler) scheduleOne(ctx context.Context) { // into the resources that were preempted, but this is harmless. nominatedNode := "" if fitError, ok := err.(*core.FitError); ok { - if sched.DisablePreemption { + if sched.DisablePreemption || !prof.HasPostFilterPlugins() { klog.V(3).Infof("Pod priority feature is not enabled or preemption is disabled by scheduler configuration." + " No preemption is performed.") } else { - preemptionStartTime := time.Now() - // TODO(Huang-Wei): implement the preemption logic as a PostFilter plugin. - nominatedNode, _ = core.Preempt(schedulingCycleCtx, prof, state, pod, fitError.FilteredNodesStatuses) - metrics.PreemptionAttempts.Inc() - metrics.SchedulingAlgorithmPreemptionEvaluationDuration.Observe(metrics.SinceInSeconds(preemptionStartTime)) - metrics.DeprecatedSchedulingDuration.WithLabelValues(metrics.PreemptionEvaluation).Observe(metrics.SinceInSeconds(preemptionStartTime)) - // Run PostFilter plugins to try to make the pod schedulable in a future scheduling cycle. result, status := prof.RunPostFilterPlugins(ctx, state, pod, fitError.FilteredNodesStatuses) if status.Code() == framework.Error { diff --git a/test/integration/scheduler/framework_test.go b/test/integration/scheduler/framework_test.go index 2f9719c720c..a93a4e30de4 100644 --- a/test/integration/scheduler/framework_test.go +++ b/test/integration/scheduler/framework_test.go @@ -601,19 +601,17 @@ func TestPostFilterPlugin(t *testing.T) { expectPostFilterNumCalled: 0, }, { - name: "Filter failed and PostFilter passed", - rejectFilter: true, - rejectPostFilter: false, - // TODO: change to when the hard-coded preemption logic is removed. - expectFilterNumCalled: numNodes * 3, + name: "Filter failed and PostFilter passed", + rejectFilter: true, + rejectPostFilter: false, + expectFilterNumCalled: numNodes * 2, expectPostFilterNumCalled: 1, }, { - name: "Filter failed and PostFilter failed", - rejectFilter: true, - rejectPostFilter: true, - // TODO: change to when the hard-coded preemption logic is removed. - expectFilterNumCalled: numNodes * 3, + name: "Filter failed and PostFilter failed", + rejectFilter: true, + rejectPostFilter: true, + expectFilterNumCalled: numNodes * 2, expectPostFilterNumCalled: 1, }, } @@ -645,6 +643,11 @@ func TestPostFilterPlugin(t *testing.T) { Enabled: []schedulerconfig.Plugin{ {Name: postfilterPluginName}, }, + // Need to disable default in-tree PostFilter plugins, as they will + // call RunFilterPlugins and hence impact the "numFilterCalled". + Disabled: []schedulerconfig.Plugin{ + {Name: "*"}, + }, }, }, }