diff --git a/plugin/cmd/kube-scheduler/app/BUILD b/plugin/cmd/kube-scheduler/app/BUILD index 72a3c6b35b0..2b95dc68e51 100644 --- a/plugin/cmd/kube-scheduler/app/BUILD +++ b/plugin/cmd/kube-scheduler/app/BUILD @@ -24,7 +24,6 @@ go_library( "//pkg/client/informers/informers_generated/externalversions/extensions/v1beta1:go_default_library", "//pkg/client/leaderelection:go_default_library", "//pkg/client/leaderelection/resourcelock:go_default_library", - "//pkg/controller:go_default_library", "//pkg/util/configz:go_default_library", "//plugin/cmd/kube-scheduler/app/options:go_default_library", "//plugin/pkg/scheduler:go_default_library", diff --git a/plugin/cmd/kube-scheduler/app/configurator.go b/plugin/cmd/kube-scheduler/app/configurator.go index a40dbbb7976..f5f23c3f280 100644 --- a/plugin/cmd/kube-scheduler/app/configurator.go +++ b/plugin/cmd/kube-scheduler/app/configurator.go @@ -77,7 +77,6 @@ func CreateScheduler( s *options.SchedulerServer, kubecli *clientset.Clientset, nodeInformer coreinformers.NodeInformer, - podInformer coreinformers.PodInformer, pvInformer coreinformers.PersistentVolumeInformer, pvcInformer coreinformers.PersistentVolumeClaimInformer, replicationControllerInformer coreinformers.ReplicationControllerInformer, @@ -90,7 +89,6 @@ func CreateScheduler( s.SchedulerName, kubecli, nodeInformer, - podInformer, pvInformer, pvcInformer, replicationControllerInformer, diff --git a/plugin/cmd/kube-scheduler/app/server.go b/plugin/cmd/kube-scheduler/app/server.go index 653e3398f49..c4ddae780c3 100644 --- a/plugin/cmd/kube-scheduler/app/server.go +++ b/plugin/cmd/kube-scheduler/app/server.go @@ -31,11 +31,9 @@ import ( informers "k8s.io/kubernetes/pkg/client/informers/informers_generated/externalversions" "k8s.io/kubernetes/pkg/client/leaderelection" "k8s.io/kubernetes/pkg/client/leaderelection/resourcelock" - "k8s.io/kubernetes/pkg/controller" "k8s.io/kubernetes/pkg/util/configz" "k8s.io/kubernetes/plugin/cmd/kube-scheduler/app/options" _ "k8s.io/kubernetes/plugin/pkg/scheduler/algorithmprovider" - "k8s.io/kubernetes/plugin/pkg/scheduler/factory" "github.com/golang/glog" "github.com/prometheus/client_golang/prometheus" @@ -73,14 +71,11 @@ func Run(s *options.SchedulerServer) error { recorder := createRecorder(kubecli, s) informerFactory := informers.NewSharedInformerFactory(kubecli, 0) - // cache only non-terminal pods - podInformer := factory.NewPodInformer(kubecli, 0) sched, err := CreateScheduler( s, kubecli, informerFactory.Core().V1().Nodes(), - podInformer, informerFactory.Core().V1().PersistentVolumes(), informerFactory.Core().V1().PersistentVolumeClaims(), informerFactory.Core().V1().ReplicationControllers(), @@ -97,11 +92,9 @@ func Run(s *options.SchedulerServer) error { stop := make(chan struct{}) defer close(stop) - go podInformer.Informer().Run(stop) informerFactory.Start(stop) // Waiting for all cache to sync before scheduling. informerFactory.WaitForCacheSync(stop) - controller.WaitForCacheSync("scheduler", stop, podInformer.Informer().HasSynced) run := func(_ <-chan struct{}) { sched.Run() diff --git a/plugin/pkg/scheduler/BUILD b/plugin/pkg/scheduler/BUILD index 86ebb604695..76af6a8b03b 100644 --- a/plugin/pkg/scheduler/BUILD +++ b/plugin/pkg/scheduler/BUILD @@ -41,7 +41,6 @@ go_library( ], tags = ["automanaged"], deps = [ - "//pkg/api:go_default_library", "//pkg/api/v1:go_default_library", "//pkg/client/clientset_generated/clientset:go_default_library", "//pkg/client/listers/core/v1:go_default_library", @@ -53,7 +52,6 @@ go_library( "//plugin/pkg/scheduler/util:go_default_library", "//vendor/github.com/golang/glog:go_default_library", "//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", - "//vendor/k8s.io/apimachinery/pkg/util/runtime:go_default_library", "//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library", "//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library", "//vendor/k8s.io/client-go/tools/cache:go_default_library", diff --git a/plugin/pkg/scheduler/algorithmprovider/defaults/compatibility_test.go b/plugin/pkg/scheduler/algorithmprovider/defaults/compatibility_test.go index ccc57efc50b..267ba65cd4c 100644 --- a/plugin/pkg/scheduler/algorithmprovider/defaults/compatibility_test.go +++ b/plugin/pkg/scheduler/algorithmprovider/defaults/compatibility_test.go @@ -352,7 +352,6 @@ func TestCompatibility_v1_Scheduler(t *testing.T) { "some-scheduler-name", client, informerFactory.Core().V1().Nodes(), - informerFactory.Core().V1().Pods(), informerFactory.Core().V1().PersistentVolumes(), informerFactory.Core().V1().PersistentVolumeClaims(), informerFactory.Core().V1().ReplicationControllers(), diff --git a/plugin/pkg/scheduler/factory/factory.go b/plugin/pkg/scheduler/factory/factory.go index 0409e2b0e2d..312276f122d 100644 --- a/plugin/pkg/scheduler/factory/factory.go +++ b/plugin/pkg/scheduler/factory/factory.go @@ -82,7 +82,7 @@ type ConfigFactory struct { // Close this to stop all reflectors StopEverything chan struct{} - scheduledPodsHasSynced cache.InformerSynced + scheduledPodPopulator cache.Controller schedulerCache schedulercache.Cache @@ -105,7 +105,6 @@ func NewConfigFactory( schedulerName string, client clientset.Interface, nodeInformer coreinformers.NodeInformer, - podInformer coreinformers.PodInformer, pvInformer coreinformers.PersistentVolumeInformer, pvcInformer coreinformers.PersistentVolumeClaimInformer, replicationControllerInformer coreinformers.ReplicationControllerInformer, @@ -133,60 +132,23 @@ func NewConfigFactory( hardPodAffinitySymmetricWeight: hardPodAffinitySymmetricWeight, } - c.scheduledPodsHasSynced = podInformer.Informer().HasSynced - // scheduled pod cache - podInformer.Informer().AddEventHandler( - cache.FilteringResourceEventHandler{ - FilterFunc: func(obj interface{}) bool { - switch t := obj.(type) { - case *v1.Pod: - return assignedNonTerminatedPod(t) - default: - runtime.HandleError(fmt.Errorf("unable to handle object in %T: %T", c, obj)) - return false - } - }, - Handler: cache.ResourceEventHandlerFuncs{ - AddFunc: c.addPodToCache, - UpdateFunc: c.updatePodInCache, - DeleteFunc: c.deletePodFromCache, - }, - }, - ) - // unscheduled pod queue - podInformer.Informer().AddEventHandler( - cache.FilteringResourceEventHandler{ - FilterFunc: func(obj interface{}) bool { - switch t := obj.(type) { - case *v1.Pod: - return unassignedNonTerminatedPod(t) - default: - runtime.HandleError(fmt.Errorf("unable to handle object in %T: %T", c, obj)) - return false - } - }, - Handler: cache.ResourceEventHandlerFuncs{ - AddFunc: func(obj interface{}) { - if err := c.podQueue.Add(obj); err != nil { - runtime.HandleError(fmt.Errorf("unable to queue %T: %v", obj, err)) - } - }, - UpdateFunc: func(oldObj, newObj interface{}) { - if err := c.podQueue.Update(newObj); err != nil { - runtime.HandleError(fmt.Errorf("unable to update %T: %v", newObj, err)) - } - }, - DeleteFunc: func(obj interface{}) { - if err := c.podQueue.Delete(obj); err != nil { - runtime.HandleError(fmt.Errorf("unable to dequeue %T: %v", obj, err)) - } - }, - }, - }, - ) - // ScheduledPodLister is something we provide to plug-in functions that + // On add/delete to the scheduled pods, remove from the assumed pods. + // We construct this here instead of in CreateFromKeys because + // ScheduledPodLister is something we provide to plug in functions that // they may need to call. - c.scheduledPodLister = podInformer.Lister() + var scheduledPodIndexer cache.Indexer + scheduledPodIndexer, c.scheduledPodPopulator = cache.NewIndexerInformer( + c.createAssignedNonTerminatedPodLW(), + &v1.Pod{}, + 0, + cache.ResourceEventHandlerFuncs{ + AddFunc: c.addPodToCache, + UpdateFunc: c.updatePodInCache, + DeleteFunc: c.deletePodFromCache, + }, + cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}, + ) + c.scheduledPodLister = corelisters.NewPodLister(scheduledPodIndexer) // Only nodes in the "Ready" condition with status == "True" are schedulable nodeInformer.Informer().AddEventHandlerWithResyncPeriod( @@ -407,6 +369,7 @@ func (f *ConfigFactory) CreateFromKeys(predicateKeys, priorityKeys sets.String, return nil, err } + f.Run() // TODO(resouer) use equivalence cache instead of nil here when #36238 get merged algo := core.NewGenericScheduler(f.schedulerCache, nil, predicateFuncs, predicateMetaProducer, priorityConfigs, priorityMetaProducer, extenders) podBackoff := util.CreateDefaultPodBackoff() @@ -418,7 +381,7 @@ func (f *ConfigFactory) CreateFromKeys(predicateKeys, priorityKeys sets.String, Binder: &binder{f.client}, PodConditionUpdater: &podConditionUpdater{f.client}, WaitForCacheSync: func() bool { - return cache.WaitForCacheSync(f.StopEverything, f.scheduledPodsHasSynced) + return cache.WaitForCacheSync(f.StopEverything, f.scheduledPodPopulator.HasSynced) }, NextPod: func() *v1.Pod { return f.getNextPod() @@ -487,6 +450,14 @@ func (f *ConfigFactory) getPluginArgs() (*PluginFactoryArgs, error) { }, nil } +func (f *ConfigFactory) Run() { + // Watch and queue pods that need scheduling. + cache.NewReflector(f.createUnassignedNonTerminatedPodLW(), &v1.Pod{}, f.podQueue, 0).RunUntil(f.StopEverything) + + // Begin populating scheduled pods. + go f.scheduledPodPopulator.Run(f.StopEverything) +} + func (f *ConfigFactory) getNextPod() *v1.Pod { for { pod := cache.Pop(f.podQueue).(*v1.Pod) @@ -529,47 +500,19 @@ func getNodeConditionPredicate() corelisters.NodeConditionPredicate { } } -// unassignedNonTerminatedPod selects pods that are unassigned and non-terminal. -func unassignedNonTerminatedPod(pod *v1.Pod) bool { - if len(pod.Spec.NodeName) != 0 { - return false - } - if pod.Status.Phase == v1.PodSucceeded || pod.Status.Phase == v1.PodFailed { - return false - } - return true +// Returns a cache.ListWatch that finds all pods that need to be +// scheduled. +func (factory *ConfigFactory) createUnassignedNonTerminatedPodLW() *cache.ListWatch { + selector := fields.ParseSelectorOrDie("spec.nodeName==" + "" + ",status.phase!=" + string(v1.PodSucceeded) + ",status.phase!=" + string(v1.PodFailed)) + return cache.NewListWatchFromClient(factory.client.Core().RESTClient(), "pods", metav1.NamespaceAll, selector) } -// assignedNonTerminatedPod selects pods that are assigned and non-terminal (scheduled and running). -func assignedNonTerminatedPod(pod *v1.Pod) bool { - if len(pod.Spec.NodeName) == 0 { - return false - } - if pod.Status.Phase == v1.PodSucceeded || pod.Status.Phase == v1.PodFailed { - return false - } - return true -} - -type podInformer struct { - informer cache.SharedIndexInformer -} - -func (i *podInformer) Informer() cache.SharedIndexInformer { - return i.informer -} - -func (i *podInformer) Lister() corelisters.PodLister { - return corelisters.NewPodLister(i.informer.GetIndexer()) -} - -// NewPodInformer creates a shared index informer that returns only non-terminal pods. -func NewPodInformer(client clientset.Interface, resyncPeriod time.Duration) coreinformers.PodInformer { - selector := fields.ParseSelectorOrDie("status.phase!=" + string(v1.PodSucceeded) + ",status.phase!=" + string(v1.PodFailed)) - lw := cache.NewListWatchFromClient(client.Core().RESTClient(), "pods", metav1.NamespaceAll, selector) - return &podInformer{ - informer: cache.NewSharedIndexInformer(lw, &v1.Pod{}, resyncPeriod, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}), - } +// Returns a cache.ListWatch that finds all pods that are +// already scheduled. +// TODO: return a ListerWatcher interface instead? +func (factory *ConfigFactory) createAssignedNonTerminatedPodLW() *cache.ListWatch { + selector := fields.ParseSelectorOrDie("spec.nodeName!=" + "" + ",status.phase!=" + string(v1.PodSucceeded) + ",status.phase!=" + string(v1.PodFailed)) + return cache.NewListWatchFromClient(factory.client.Core().RESTClient(), "pods", metav1.NamespaceAll, selector) } func (factory *ConfigFactory) MakeDefaultErrorFunc(backoff *util.PodBackoff, podQueue *cache.FIFO) func(pod *v1.Pod, err error) { diff --git a/plugin/pkg/scheduler/factory/factory_test.go b/plugin/pkg/scheduler/factory/factory_test.go index d72e873a3c5..572703ec726 100644 --- a/plugin/pkg/scheduler/factory/factory_test.go +++ b/plugin/pkg/scheduler/factory/factory_test.go @@ -55,7 +55,6 @@ func TestCreate(t *testing.T) { v1.DefaultSchedulerName, client, informerFactory.Core().V1().Nodes(), - informerFactory.Core().V1().Pods(), informerFactory.Core().V1().PersistentVolumes(), informerFactory.Core().V1().PersistentVolumeClaims(), informerFactory.Core().V1().ReplicationControllers(), @@ -86,7 +85,6 @@ func TestCreateFromConfig(t *testing.T) { v1.DefaultSchedulerName, client, informerFactory.Core().V1().Nodes(), - informerFactory.Core().V1().Pods(), informerFactory.Core().V1().PersistentVolumes(), informerFactory.Core().V1().PersistentVolumeClaims(), informerFactory.Core().V1().ReplicationControllers(), @@ -140,7 +138,6 @@ func TestCreateFromEmptyConfig(t *testing.T) { v1.DefaultSchedulerName, client, informerFactory.Core().V1().Nodes(), - informerFactory.Core().V1().Pods(), informerFactory.Core().V1().PersistentVolumes(), informerFactory.Core().V1().PersistentVolumeClaims(), informerFactory.Core().V1().ReplicationControllers(), @@ -196,7 +193,6 @@ func TestDefaultErrorFunc(t *testing.T) { v1.DefaultSchedulerName, client, informerFactory.Core().V1().Nodes(), - informerFactory.Core().V1().Pods(), informerFactory.Core().V1().PersistentVolumes(), informerFactory.Core().V1().PersistentVolumeClaims(), informerFactory.Core().V1().ReplicationControllers(), @@ -308,7 +304,6 @@ func TestResponsibleForPod(t *testing.T) { v1.DefaultSchedulerName, client, informerFactory.Core().V1().Nodes(), - informerFactory.Core().V1().Pods(), informerFactory.Core().V1().PersistentVolumes(), informerFactory.Core().V1().PersistentVolumeClaims(), informerFactory.Core().V1().ReplicationControllers(), @@ -322,7 +317,6 @@ func TestResponsibleForPod(t *testing.T) { "foo-scheduler", client, informerFactory.Core().V1().Nodes(), - informerFactory.Core().V1().Pods(), informerFactory.Core().V1().PersistentVolumes(), informerFactory.Core().V1().PersistentVolumeClaims(), informerFactory.Core().V1().ReplicationControllers(), @@ -391,7 +385,6 @@ func TestInvalidHardPodAffinitySymmetricWeight(t *testing.T) { v1.DefaultSchedulerName, client, informerFactory.Core().V1().Nodes(), - informerFactory.Core().V1().Pods(), informerFactory.Core().V1().PersistentVolumes(), informerFactory.Core().V1().PersistentVolumeClaims(), informerFactory.Core().V1().ReplicationControllers(), @@ -436,7 +429,6 @@ func TestInvalidFactoryArgs(t *testing.T) { v1.DefaultSchedulerName, client, informerFactory.Core().V1().Nodes(), - informerFactory.Core().V1().Pods(), informerFactory.Core().V1().PersistentVolumes(), informerFactory.Core().V1().PersistentVolumeClaims(), informerFactory.Core().V1().ReplicationControllers(), diff --git a/plugin/pkg/scheduler/scheduler.go b/plugin/pkg/scheduler/scheduler.go index 8570427dc1a..bebcc454fb9 100644 --- a/plugin/pkg/scheduler/scheduler.go +++ b/plugin/pkg/scheduler/scheduler.go @@ -20,12 +20,10 @@ import ( "time" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/tools/cache" "k8s.io/client-go/tools/record" - "k8s.io/kubernetes/pkg/api" "k8s.io/kubernetes/pkg/api/v1" "k8s.io/kubernetes/pkg/client/clientset_generated/clientset" corelisters "k8s.io/kubernetes/pkg/client/listers/core/v1" @@ -81,6 +79,7 @@ type Configurator interface { GetNodeLister() corelisters.NodeLister GetClient() clientset.Interface GetScheduledPodLister() corelisters.PodLister + Run() Create() (*Config, error) CreateFromProvider(providerName string) (*Config, error) @@ -172,12 +171,6 @@ func (sched *Scheduler) scheduleOne() { dest, err := sched.config.Algorithm.Schedule(pod, sched.config.NodeLister) if err != nil { glog.V(1).Infof("Failed to schedule pod: %v/%v", pod.Namespace, pod.Name) - copied, cerr := api.Scheme.Copy(pod) - if cerr != nil { - runtime.HandleError(err) - return - } - pod = copied.(*v1.Pod) sched.config.Error(pod, err) sched.config.Recorder.Eventf(pod, v1.EventTypeWarning, "FailedScheduling", "%v", err) sched.config.PodConditionUpdater.Update(pod, &v1.PodCondition{ @@ -239,12 +232,6 @@ func (sched *Scheduler) scheduleOne() { if err := sched.config.SchedulerCache.ForgetPod(&assumed); err != nil { glog.Errorf("scheduler cache ForgetPod failed: %v", err) } - copied, cerr := api.Scheme.Copy(pod) - if cerr != nil { - runtime.HandleError(err) - return - } - pod = copied.(*v1.Pod) sched.config.Error(pod, err) sched.config.Recorder.Eventf(pod, v1.EventTypeWarning, "FailedScheduling", "Binding rejected: %v", err) sched.config.PodConditionUpdater.Update(pod, &v1.PodCondition{ diff --git a/test/integration/scheduler/extender_test.go b/test/integration/scheduler/extender_test.go index 1c758a9bc57..4a8a2b732fd 100644 --- a/test/integration/scheduler/extender_test.go +++ b/test/integration/scheduler/extender_test.go @@ -326,7 +326,6 @@ func TestSchedulerExtender(t *testing.T) { v1.DefaultSchedulerName, clientSet, informerFactory.Core().V1().Nodes(), - informerFactory.Core().V1().Pods(), informerFactory.Core().V1().PersistentVolumes(), informerFactory.Core().V1().PersistentVolumeClaims(), informerFactory.Core().V1().ReplicationControllers(), diff --git a/test/integration/scheduler/scheduler_test.go b/test/integration/scheduler/scheduler_test.go index 0d02e8a9e92..ae41c990dfd 100644 --- a/test/integration/scheduler/scheduler_test.go +++ b/test/integration/scheduler/scheduler_test.go @@ -122,7 +122,6 @@ func TestSchedulerCreationFromConfigMap(t *testing.T) { ss.PolicyConfigMapName = configPolicyName sched, err := app.CreateScheduler(ss, clientSet, informerFactory.Core().V1().Nodes(), - informerFactory.Core().V1().Pods(), informerFactory.Core().V1().PersistentVolumes(), informerFactory.Core().V1().PersistentVolumeClaims(), informerFactory.Core().V1().ReplicationControllers(), @@ -175,7 +174,6 @@ func TestSchedulerCreationFromNonExistentConfigMap(t *testing.T) { _, err := app.CreateScheduler(ss, clientSet, informerFactory.Core().V1().Nodes(), - informerFactory.Core().V1().Pods(), informerFactory.Core().V1().PersistentVolumes(), informerFactory.Core().V1().PersistentVolumeClaims(), informerFactory.Core().V1().ReplicationControllers(), @@ -213,7 +211,6 @@ func TestSchedulerCreationInLegacyMode(t *testing.T) { sched, err := app.CreateScheduler(ss, clientSet, informerFactory.Core().V1().Nodes(), - informerFactory.Core().V1().Pods(), informerFactory.Core().V1().PersistentVolumes(), informerFactory.Core().V1().PersistentVolumeClaims(), informerFactory.Core().V1().ReplicationControllers(), @@ -248,7 +245,6 @@ func TestUnschedulableNodes(t *testing.T) { v1.DefaultSchedulerName, clientSet, informerFactory.Core().V1().Nodes(), - informerFactory.Core().V1().Pods(), informerFactory.Core().V1().PersistentVolumes(), informerFactory.Core().V1().PersistentVolumeClaims(), informerFactory.Core().V1().ReplicationControllers(), @@ -531,7 +527,6 @@ func TestMultiScheduler(t *testing.T) { v1.DefaultSchedulerName, clientSet, informerFactory.Core().V1().Nodes(), - informerFactory.Core().V1().Pods(), informerFactory.Core().V1().PersistentVolumes(), informerFactory.Core().V1().PersistentVolumeClaims(), informerFactory.Core().V1().ReplicationControllers(), @@ -617,7 +612,6 @@ func TestMultiScheduler(t *testing.T) { "foo-scheduler", clientSet2, informerFactory.Core().V1().Nodes(), - informerFactory.Core().V1().Pods(), informerFactory.Core().V1().PersistentVolumes(), informerFactory.Core().V1().PersistentVolumeClaims(), informerFactory.Core().V1().ReplicationControllers(), @@ -727,7 +721,6 @@ func TestAllocatable(t *testing.T) { v1.DefaultSchedulerName, clientSet, informerFactory.Core().V1().Nodes(), - informerFactory.Core().V1().Pods(), informerFactory.Core().V1().PersistentVolumes(), informerFactory.Core().V1().PersistentVolumeClaims(), informerFactory.Core().V1().ReplicationControllers(), diff --git a/test/integration/scheduler_perf/util.go b/test/integration/scheduler_perf/util.go index 2bf5c547a57..66e03925aea 100644 --- a/test/integration/scheduler_perf/util.go +++ b/test/integration/scheduler_perf/util.go @@ -65,7 +65,6 @@ func mustSetupScheduler() (schedulerConfigurator scheduler.Configurator, destroy v1.DefaultSchedulerName, clientSet, informerFactory.Core().V1().Nodes(), - informerFactory.Core().V1().Pods(), informerFactory.Core().V1().PersistentVolumes(), informerFactory.Core().V1().PersistentVolumeClaims(), informerFactory.Core().V1().ReplicationControllers(),