From 4f0bd4121e7908ea3c4c0799eeb6cf16211c2644 Mon Sep 17 00:00:00 2001 From: Harry Zhang Date: Wed, 11 Apr 2018 17:40:07 -0700 Subject: [PATCH] Disable pod preemption by config --- cmd/kube-scheduler/app/server.go | 6 +++++ pkg/apis/componentconfig/types.go | 3 +++ pkg/apis/componentconfig/v1alpha1/types.go | 3 +++ .../v1alpha1/zz_generated.conversion.go | 2 ++ .../defaults/compatibility_test.go | 1 + pkg/scheduler/core/extender_test.go | 13 +++++++++- pkg/scheduler/core/generic_scheduler.go | 6 ++++- pkg/scheduler/core/generic_scheduler_test.go | 26 +++++++++++++++++-- pkg/scheduler/factory/factory.go | 20 +++++++++++++- pkg/scheduler/factory/factory_test.go | 6 ++++- pkg/scheduler/scheduler.go | 8 ++++-- pkg/scheduler/scheduler_test.go | 2 ++ test/integration/scheduler/util.go | 1 + test/integration/util/util.go | 1 + 14 files changed, 90 insertions(+), 8 deletions(-) diff --git a/cmd/kube-scheduler/app/server.go b/cmd/kube-scheduler/app/server.go index c557ce93c85..2802487a276 100644 --- a/cmd/kube-scheduler/app/server.go +++ b/cmd/kube-scheduler/app/server.go @@ -381,6 +381,8 @@ type SchedulerServer struct { HealthzServer *http.Server // MetricsServer is optional. MetricsServer *http.Server + // Disable pod preemption or not. + DisablePreemption bool } // NewSchedulerServer creates a runnable SchedulerServer from configuration. @@ -445,6 +447,7 @@ func NewSchedulerServer(config *componentconfig.KubeSchedulerConfiguration, mast LeaderElection: leaderElectionConfig, HealthzServer: healthzServer, MetricsServer: metricsServer, + DisablePreemption: config.DisablePreemption, }, nil } @@ -659,6 +662,7 @@ func (s *SchedulerServer) SchedulerConfig() (*scheduler.Config, error) { storageClassInformer, s.HardPodAffinitySymmetricWeight, utilfeature.DefaultFeatureGate.Enabled(features.EnableEquivalenceClassCache), + s.DisablePreemption, ) source := s.AlgorithmSource @@ -716,5 +720,7 @@ func (s *SchedulerServer) SchedulerConfig() (*scheduler.Config, error) { } // Additional tweaks to the config produced by the configurator. config.Recorder = s.Recorder + + config.DisablePreemption = s.DisablePreemption return config, nil } diff --git a/pkg/apis/componentconfig/types.go b/pkg/apis/componentconfig/types.go index 8a3c952f0b9..78e6f829eaa 100644 --- a/pkg/apis/componentconfig/types.go +++ b/pkg/apis/componentconfig/types.go @@ -111,6 +111,9 @@ type KubeSchedulerConfiguration struct { // Indicate the "all topologies" set for empty topologyKey when it's used for PreferredDuringScheduling pod anti-affinity. // DEPRECATED: This is no longer used. FailureDomains string + + // DisablePreemption disables the pod preemption feature. + DisablePreemption bool } // KubeSchedulerLeaderElectionConfiguration expands LeaderElectionConfiguration diff --git a/pkg/apis/componentconfig/v1alpha1/types.go b/pkg/apis/componentconfig/v1alpha1/types.go index e694f193b2f..fefa9d2f49b 100644 --- a/pkg/apis/componentconfig/v1alpha1/types.go +++ b/pkg/apis/componentconfig/v1alpha1/types.go @@ -106,6 +106,9 @@ type KubeSchedulerConfiguration struct { // Indicate the "all topologies" set for empty topologyKey when it's used for PreferredDuringScheduling pod anti-affinity. FailureDomains string `json:"failureDomains"` + + // DisablePreemption disables the pod preemption feature. + DisablePreemption bool `json:"disablePreemption"` } // LeaderElectionConfiguration defines the configuration of leader election diff --git a/pkg/apis/componentconfig/v1alpha1/zz_generated.conversion.go b/pkg/apis/componentconfig/v1alpha1/zz_generated.conversion.go index 741cbaaad73..d6d856eae5f 100644 --- a/pkg/apis/componentconfig/v1alpha1/zz_generated.conversion.go +++ b/pkg/apis/componentconfig/v1alpha1/zz_generated.conversion.go @@ -309,6 +309,7 @@ func autoConvert_v1alpha1_KubeSchedulerConfiguration_To_componentconfig_KubeSche out.EnableProfiling = in.EnableProfiling out.EnableContentionProfiling = in.EnableContentionProfiling out.FailureDomains = in.FailureDomains + out.DisablePreemption = in.DisablePreemption return nil } @@ -334,6 +335,7 @@ func autoConvert_componentconfig_KubeSchedulerConfiguration_To_v1alpha1_KubeSche out.EnableProfiling = in.EnableProfiling out.EnableContentionProfiling = in.EnableContentionProfiling out.FailureDomains = in.FailureDomains + out.DisablePreemption = in.DisablePreemption return nil } diff --git a/pkg/scheduler/algorithmprovider/defaults/compatibility_test.go b/pkg/scheduler/algorithmprovider/defaults/compatibility_test.go index 77c421bc850..63e09eea400 100644 --- a/pkg/scheduler/algorithmprovider/defaults/compatibility_test.go +++ b/pkg/scheduler/algorithmprovider/defaults/compatibility_test.go @@ -579,6 +579,7 @@ func TestCompatibility_v1_Scheduler(t *testing.T) { informerFactory.Storage().V1().StorageClasses(), v1.DefaultHardPodAffinitySymmetricWeight, enableEquivalenceCache, + false, ).CreateFromConfig(policy); err != nil { t.Errorf("%s: Error constructing: %v", v, err) continue diff --git a/pkg/scheduler/core/extender_test.go b/pkg/scheduler/core/extender_test.go index f4ad3e00c0e..80ed8cc31f0 100644 --- a/pkg/scheduler/core/extender_test.go +++ b/pkg/scheduler/core/extender_test.go @@ -506,7 +506,18 @@ func TestGenericSchedulerWithExtenders(t *testing.T) { } queue := NewSchedulingQueue() scheduler := NewGenericScheduler( - cache, nil, queue, test.predicates, algorithm.EmptyPredicateMetadataProducer, test.prioritizers, algorithm.EmptyPriorityMetadataProducer, extenders, nil, schedulertesting.FakePersistentVolumeClaimLister{}, false) + cache, + nil, + queue, + test.predicates, + algorithm.EmptyPredicateMetadataProducer, + test.prioritizers, + algorithm.EmptyPriorityMetadataProducer, + extenders, + nil, + schedulertesting.FakePersistentVolumeClaimLister{}, + false, + false) podIgnored := &v1.Pod{} machine, err := scheduler.Schedule(podIgnored, schedulertesting.FakeNodeLister(makeNodeList(test.nodes))) if test.expectsErr { diff --git a/pkg/scheduler/core/generic_scheduler.go b/pkg/scheduler/core/generic_scheduler.go index 8f072d76480..3f7567f92b1 100644 --- a/pkg/scheduler/core/generic_scheduler.go +++ b/pkg/scheduler/core/generic_scheduler.go @@ -98,6 +98,7 @@ type genericScheduler struct { cachedNodeInfoMap map[string]*schedulercache.NodeInfo volumeBinder *volumebinder.VolumeBinder pvcLister corelisters.PersistentVolumeClaimLister + disablePreemption bool } // Schedule tries to schedule the given pod to one of the nodes in the node list. @@ -1107,7 +1108,9 @@ func NewGenericScheduler( extenders []algorithm.SchedulerExtender, volumeBinder *volumebinder.VolumeBinder, pvcLister corelisters.PersistentVolumeClaimLister, - alwaysCheckAllPredicates bool) algorithm.ScheduleAlgorithm { + alwaysCheckAllPredicates bool, + disablePreemption bool, +) algorithm.ScheduleAlgorithm { return &genericScheduler{ cache: cache, equivalenceCache: eCache, @@ -1121,5 +1124,6 @@ func NewGenericScheduler( volumeBinder: volumeBinder, pvcLister: pvcLister, alwaysCheckAllPredicates: alwaysCheckAllPredicates, + disablePreemption: disablePreemption, } } diff --git a/pkg/scheduler/core/generic_scheduler_test.go b/pkg/scheduler/core/generic_scheduler_test.go index 696cdd096ff..83efdad37db 100644 --- a/pkg/scheduler/core/generic_scheduler_test.go +++ b/pkg/scheduler/core/generic_scheduler_test.go @@ -409,7 +409,18 @@ func TestGenericScheduler(t *testing.T) { pvcLister := schedulertesting.FakePersistentVolumeClaimLister(pvcs) scheduler := NewGenericScheduler( - cache, nil, NewSchedulingQueue(), test.predicates, algorithm.EmptyPredicateMetadataProducer, test.prioritizers, algorithm.EmptyPriorityMetadataProducer, []algorithm.SchedulerExtender{}, nil, pvcLister, test.alwaysCheckAllPredicates) + cache, + nil, + NewSchedulingQueue(), + test.predicates, + algorithm.EmptyPredicateMetadataProducer, + test.prioritizers, + algorithm.EmptyPriorityMetadataProducer, + []algorithm.SchedulerExtender{}, + nil, + pvcLister, + test.alwaysCheckAllPredicates, + false) machine, err := scheduler.Schedule(test.pod, schedulertesting.FakeNodeLister(makeNodeList(test.nodes))) if !reflect.DeepEqual(err, test.wErr) { @@ -1323,7 +1334,18 @@ func TestPreempt(t *testing.T) { extenders = append(extenders, extender) } scheduler := NewGenericScheduler( - cache, nil, NewSchedulingQueue(), map[string]algorithm.FitPredicate{"matches": algorithmpredicates.PodFitsResources}, algorithm.EmptyPredicateMetadataProducer, []algorithm.PriorityConfig{{Function: numericPriority, Weight: 1}}, algorithm.EmptyPriorityMetadataProducer, extenders, nil, schedulertesting.FakePersistentVolumeClaimLister{}, false) + cache, + nil, + NewSchedulingQueue(), + map[string]algorithm.FitPredicate{"matches": algorithmpredicates.PodFitsResources}, + algorithm.EmptyPredicateMetadataProducer, + []algorithm.PriorityConfig{{Function: numericPriority, Weight: 1}}, + algorithm.EmptyPriorityMetadataProducer, + extenders, + nil, + schedulertesting.FakePersistentVolumeClaimLister{}, + false, + false) // Call Preempt and check the expected results. node, victims, _, err := scheduler.Preempt(test.pod, schedulertesting.FakeNodeLister(makeNodeList(nodeNames)), error(&FitError{Pod: test.pod, FailedPredicates: failedPredMap})) if err != nil { diff --git a/pkg/scheduler/factory/factory.go b/pkg/scheduler/factory/factory.go index c252cbad5ee..efd0bb83249 100644 --- a/pkg/scheduler/factory/factory.go +++ b/pkg/scheduler/factory/factory.go @@ -133,6 +133,9 @@ type configFactory struct { // always check all predicates even if the middle of one predicate fails. alwaysCheckAllPredicates bool + + // Disable pod preemption or not. + disablePreemption bool } // NewConfigFactory initializes the default implementation of a Configurator To encourage eventual privatization of the struct type, we only @@ -152,6 +155,7 @@ func NewConfigFactory( storageClassInformer storageinformers.StorageClassInformer, hardPodAffinitySymmetricWeight int32, enableEquivalenceClassCache bool, + disablePreemption bool, ) scheduler.Configurator { stopEverything := make(chan struct{}) schedulerCache := schedulercache.New(30*time.Second, stopEverything) @@ -179,6 +183,7 @@ func NewConfigFactory( schedulerName: schedulerName, hardPodAffinitySymmetricWeight: hardPodAffinitySymmetricWeight, enableEquivalenceClassCache: enableEquivalenceClassCache, + disablePreemption: disablePreemption, } c.scheduledPodsHasSynced = podInformer.Informer().HasSynced @@ -1064,7 +1069,20 @@ func (c *configFactory) CreateFromKeys(predicateKeys, priorityKeys sets.String, glog.Info("Created equivalence class cache") } - algo := core.NewGenericScheduler(c.schedulerCache, c.equivalencePodCache, c.podQueue, predicateFuncs, predicateMetaProducer, priorityConfigs, priorityMetaProducer, extenders, c.volumeBinder, c.pVCLister, c.alwaysCheckAllPredicates) + algo := core.NewGenericScheduler( + c.schedulerCache, + c.equivalencePodCache, + c.podQueue, + predicateFuncs, + predicateMetaProducer, + priorityConfigs, + priorityMetaProducer, + extenders, + c.volumeBinder, + c.pVCLister, + c.alwaysCheckAllPredicates, + c.disablePreemption, + ) podBackoff := util.CreateDefaultPodBackoff() return &scheduler.Config{ diff --git a/pkg/scheduler/factory/factory_test.go b/pkg/scheduler/factory/factory_test.go index a49e2e0198d..c7afc6c6dc7 100644 --- a/pkg/scheduler/factory/factory_test.go +++ b/pkg/scheduler/factory/factory_test.go @@ -46,7 +46,10 @@ import ( "k8s.io/kubernetes/pkg/scheduler/util" ) -const enableEquivalenceCache = true +const ( + enableEquivalenceCache = true + disablePodPreemption = false +) func TestCreate(t *testing.T) { handler := utiltesting.FakeHandler{ @@ -533,6 +536,7 @@ func newConfigFactory(client *clientset.Clientset, hardPodAffinitySymmetricWeigh informerFactory.Storage().V1().StorageClasses(), hardPodAffinitySymmetricWeight, enableEquivalenceCache, + disablePodPreemption, ) } diff --git a/pkg/scheduler/scheduler.go b/pkg/scheduler/scheduler.go index f6875b31ff3..190b5f10bec 100644 --- a/pkg/scheduler/scheduler.go +++ b/pkg/scheduler/scheduler.go @@ -137,6 +137,9 @@ type Config struct { // VolumeBinder handles PVC/PV binding for the pod. VolumeBinder *volumebinder.VolumeBinder + + // Disable pod preemption or not. + DisablePreemption bool } // NewFromConfigurator returns a new scheduler that is created entirely by the Configurator. Assumes Create() is implemented. @@ -207,8 +210,9 @@ func (sched *Scheduler) schedule(pod *v1.Pod) (string, error) { // If it succeeds, it adds the name of the node where preemption has happened to the pod annotations. // It returns the node name and an error if any. func (sched *Scheduler) preempt(preemptor *v1.Pod, scheduleErr error) (string, error) { - if !util.PodPriorityEnabled() { - glog.V(3).Infof("Pod priority feature is not enabled. No preemption is performed.") + if !util.PodPriorityEnabled() || sched.config.DisablePreemption { + glog.V(3).Infof("Pod priority feature is not enabled or preemption is disabled by scheduler configuration." + + " No preemption is performed.") return "", nil } preemptor, err := sched.config.PodPreemptor.GetUpdatedPod(preemptor) diff --git a/pkg/scheduler/scheduler_test.go b/pkg/scheduler/scheduler_test.go index 4debe5217c6..e4f722178d0 100644 --- a/pkg/scheduler/scheduler_test.go +++ b/pkg/scheduler/scheduler_test.go @@ -548,6 +548,7 @@ func setupTestScheduler(queuedPodStore *clientcache.FIFO, scache schedulercache. []algorithm.SchedulerExtender{}, nil, schedulertesting.FakePersistentVolumeClaimLister{}, + false, false) bindingChan := make(chan *v1.Binding, 1) errChan := make(chan error, 1) @@ -596,6 +597,7 @@ func setupTestSchedulerLongBindingWithRetry(queuedPodStore *clientcache.FIFO, sc []algorithm.SchedulerExtender{}, nil, schedulertesting.FakePersistentVolumeClaimLister{}, + false, false) bindingChan := make(chan *v1.Binding, 2) configurator := &FakeConfigurator{ diff --git a/test/integration/scheduler/util.go b/test/integration/scheduler/util.go index 92de7e0c779..91ca65cde1e 100644 --- a/test/integration/scheduler/util.go +++ b/test/integration/scheduler/util.go @@ -85,6 +85,7 @@ func createConfiguratorWithPodInformer( informerFactory.Storage().V1().StorageClasses(), v1.DefaultHardPodAffinitySymmetricWeight, utilfeature.DefaultFeatureGate.Enabled(features.EnableEquivalenceClassCache), + false, ) } diff --git a/test/integration/util/util.go b/test/integration/util/util.go index 03bb4e2cebd..9c061091677 100644 --- a/test/integration/util/util.go +++ b/test/integration/util/util.go @@ -112,5 +112,6 @@ func createSchedulerConfigurator( informerFactory.Storage().V1().StorageClasses(), v1.DefaultHardPodAffinitySymmetricWeight, utilfeature.DefaultFeatureGate.Enabled(features.EnableEquivalenceClassCache), + false, ) }