Merge pull request #46199 from wojtek-t/revert_scheduler_commit

Automatic merge from submit-queue

Revert "Scheduler should use shared informer for pods"

This reverts commit 479f01d340.

@smarterclayton @deads2k - I don't really have time to debug this today, so for now I'm reverting this commit. We should try to understand what does it change with respect to #46196 (assuming that this will fix the problem).
This commit is contained in:
Kubernetes Submit Queue 2017-05-22 03:06:49 -07:00 committed by GitHub
commit 5dd56c9b1e
11 changed files with 39 additions and 139 deletions

View File

@ -24,7 +24,6 @@ go_library(
"//pkg/client/informers/informers_generated/externalversions/extensions/v1beta1:go_default_library",
"//pkg/client/leaderelection:go_default_library",
"//pkg/client/leaderelection/resourcelock:go_default_library",
"//pkg/controller:go_default_library",
"//pkg/util/configz:go_default_library",
"//plugin/cmd/kube-scheduler/app/options:go_default_library",
"//plugin/pkg/scheduler:go_default_library",

View File

@ -77,7 +77,6 @@ func CreateScheduler(
s *options.SchedulerServer,
kubecli *clientset.Clientset,
nodeInformer coreinformers.NodeInformer,
podInformer coreinformers.PodInformer,
pvInformer coreinformers.PersistentVolumeInformer,
pvcInformer coreinformers.PersistentVolumeClaimInformer,
replicationControllerInformer coreinformers.ReplicationControllerInformer,
@ -90,7 +89,6 @@ func CreateScheduler(
s.SchedulerName,
kubecli,
nodeInformer,
podInformer,
pvInformer,
pvcInformer,
replicationControllerInformer,

View File

@ -31,11 +31,9 @@ import (
informers "k8s.io/kubernetes/pkg/client/informers/informers_generated/externalversions"
"k8s.io/kubernetes/pkg/client/leaderelection"
"k8s.io/kubernetes/pkg/client/leaderelection/resourcelock"
"k8s.io/kubernetes/pkg/controller"
"k8s.io/kubernetes/pkg/util/configz"
"k8s.io/kubernetes/plugin/cmd/kube-scheduler/app/options"
_ "k8s.io/kubernetes/plugin/pkg/scheduler/algorithmprovider"
"k8s.io/kubernetes/plugin/pkg/scheduler/factory"
"github.com/golang/glog"
"github.com/prometheus/client_golang/prometheus"
@ -73,14 +71,11 @@ func Run(s *options.SchedulerServer) error {
recorder := createRecorder(kubecli, s)
informerFactory := informers.NewSharedInformerFactory(kubecli, 0)
// cache only non-terminal pods
podInformer := factory.NewPodInformer(kubecli, 0)
sched, err := CreateScheduler(
s,
kubecli,
informerFactory.Core().V1().Nodes(),
podInformer,
informerFactory.Core().V1().PersistentVolumes(),
informerFactory.Core().V1().PersistentVolumeClaims(),
informerFactory.Core().V1().ReplicationControllers(),
@ -97,11 +92,9 @@ func Run(s *options.SchedulerServer) error {
stop := make(chan struct{})
defer close(stop)
go podInformer.Informer().Run(stop)
informerFactory.Start(stop)
// Waiting for all cache to sync before scheduling.
informerFactory.WaitForCacheSync(stop)
controller.WaitForCacheSync("scheduler", stop, podInformer.Informer().HasSynced)
run := func(_ <-chan struct{}) {
sched.Run()

View File

@ -41,7 +41,6 @@ go_library(
],
tags = ["automanaged"],
deps = [
"//pkg/api:go_default_library",
"//pkg/api/v1:go_default_library",
"//pkg/client/clientset_generated/clientset:go_default_library",
"//pkg/client/listers/core/v1:go_default_library",
@ -53,7 +52,6 @@ go_library(
"//plugin/pkg/scheduler/util:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/runtime:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library",
"//vendor/k8s.io/client-go/tools/cache:go_default_library",

View File

@ -352,7 +352,6 @@ func TestCompatibility_v1_Scheduler(t *testing.T) {
"some-scheduler-name",
client,
informerFactory.Core().V1().Nodes(),
informerFactory.Core().V1().Pods(),
informerFactory.Core().V1().PersistentVolumes(),
informerFactory.Core().V1().PersistentVolumeClaims(),
informerFactory.Core().V1().ReplicationControllers(),

View File

@ -82,7 +82,7 @@ type ConfigFactory struct {
// Close this to stop all reflectors
StopEverything chan struct{}
scheduledPodsHasSynced cache.InformerSynced
scheduledPodPopulator cache.Controller
schedulerCache schedulercache.Cache
@ -105,7 +105,6 @@ func NewConfigFactory(
schedulerName string,
client clientset.Interface,
nodeInformer coreinformers.NodeInformer,
podInformer coreinformers.PodInformer,
pvInformer coreinformers.PersistentVolumeInformer,
pvcInformer coreinformers.PersistentVolumeClaimInformer,
replicationControllerInformer coreinformers.ReplicationControllerInformer,
@ -133,60 +132,23 @@ func NewConfigFactory(
hardPodAffinitySymmetricWeight: hardPodAffinitySymmetricWeight,
}
c.scheduledPodsHasSynced = podInformer.Informer().HasSynced
// scheduled pod cache
podInformer.Informer().AddEventHandler(
cache.FilteringResourceEventHandler{
FilterFunc: func(obj interface{}) bool {
switch t := obj.(type) {
case *v1.Pod:
return assignedNonTerminatedPod(t)
default:
runtime.HandleError(fmt.Errorf("unable to handle object in %T: %T", c, obj))
return false
}
},
Handler: cache.ResourceEventHandlerFuncs{
AddFunc: c.addPodToCache,
UpdateFunc: c.updatePodInCache,
DeleteFunc: c.deletePodFromCache,
},
},
)
// unscheduled pod queue
podInformer.Informer().AddEventHandler(
cache.FilteringResourceEventHandler{
FilterFunc: func(obj interface{}) bool {
switch t := obj.(type) {
case *v1.Pod:
return unassignedNonTerminatedPod(t)
default:
runtime.HandleError(fmt.Errorf("unable to handle object in %T: %T", c, obj))
return false
}
},
Handler: cache.ResourceEventHandlerFuncs{
AddFunc: func(obj interface{}) {
if err := c.podQueue.Add(obj); err != nil {
runtime.HandleError(fmt.Errorf("unable to queue %T: %v", obj, err))
}
},
UpdateFunc: func(oldObj, newObj interface{}) {
if err := c.podQueue.Update(newObj); err != nil {
runtime.HandleError(fmt.Errorf("unable to update %T: %v", newObj, err))
}
},
DeleteFunc: func(obj interface{}) {
if err := c.podQueue.Delete(obj); err != nil {
runtime.HandleError(fmt.Errorf("unable to dequeue %T: %v", obj, err))
}
},
},
},
)
// ScheduledPodLister is something we provide to plug-in functions that
// On add/delete to the scheduled pods, remove from the assumed pods.
// We construct this here instead of in CreateFromKeys because
// ScheduledPodLister is something we provide to plug in functions that
// they may need to call.
c.scheduledPodLister = podInformer.Lister()
var scheduledPodIndexer cache.Indexer
scheduledPodIndexer, c.scheduledPodPopulator = cache.NewIndexerInformer(
c.createAssignedNonTerminatedPodLW(),
&v1.Pod{},
0,
cache.ResourceEventHandlerFuncs{
AddFunc: c.addPodToCache,
UpdateFunc: c.updatePodInCache,
DeleteFunc: c.deletePodFromCache,
},
cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc},
)
c.scheduledPodLister = corelisters.NewPodLister(scheduledPodIndexer)
// Only nodes in the "Ready" condition with status == "True" are schedulable
nodeInformer.Informer().AddEventHandlerWithResyncPeriod(
@ -407,6 +369,7 @@ func (f *ConfigFactory) CreateFromKeys(predicateKeys, priorityKeys sets.String,
return nil, err
}
f.Run()
// TODO(resouer) use equivalence cache instead of nil here when #36238 get merged
algo := core.NewGenericScheduler(f.schedulerCache, nil, predicateFuncs, predicateMetaProducer, priorityConfigs, priorityMetaProducer, extenders)
podBackoff := util.CreateDefaultPodBackoff()
@ -418,7 +381,7 @@ func (f *ConfigFactory) CreateFromKeys(predicateKeys, priorityKeys sets.String,
Binder: &binder{f.client},
PodConditionUpdater: &podConditionUpdater{f.client},
WaitForCacheSync: func() bool {
return cache.WaitForCacheSync(f.StopEverything, f.scheduledPodsHasSynced)
return cache.WaitForCacheSync(f.StopEverything, f.scheduledPodPopulator.HasSynced)
},
NextPod: func() *v1.Pod {
return f.getNextPod()
@ -487,6 +450,14 @@ func (f *ConfigFactory) getPluginArgs() (*PluginFactoryArgs, error) {
}, nil
}
func (f *ConfigFactory) Run() {
// Watch and queue pods that need scheduling.
cache.NewReflector(f.createUnassignedNonTerminatedPodLW(), &v1.Pod{}, f.podQueue, 0).RunUntil(f.StopEverything)
// Begin populating scheduled pods.
go f.scheduledPodPopulator.Run(f.StopEverything)
}
func (f *ConfigFactory) getNextPod() *v1.Pod {
for {
pod := cache.Pop(f.podQueue).(*v1.Pod)
@ -529,47 +500,19 @@ func getNodeConditionPredicate() corelisters.NodeConditionPredicate {
}
}
// unassignedNonTerminatedPod selects pods that are unassigned and non-terminal.
func unassignedNonTerminatedPod(pod *v1.Pod) bool {
if len(pod.Spec.NodeName) != 0 {
return false
}
if pod.Status.Phase == v1.PodSucceeded || pod.Status.Phase == v1.PodFailed {
return false
}
return true
// Returns a cache.ListWatch that finds all pods that need to be
// scheduled.
func (factory *ConfigFactory) createUnassignedNonTerminatedPodLW() *cache.ListWatch {
selector := fields.ParseSelectorOrDie("spec.nodeName==" + "" + ",status.phase!=" + string(v1.PodSucceeded) + ",status.phase!=" + string(v1.PodFailed))
return cache.NewListWatchFromClient(factory.client.Core().RESTClient(), "pods", metav1.NamespaceAll, selector)
}
// assignedNonTerminatedPod selects pods that are assigned and non-terminal (scheduled and running).
func assignedNonTerminatedPod(pod *v1.Pod) bool {
if len(pod.Spec.NodeName) == 0 {
return false
}
if pod.Status.Phase == v1.PodSucceeded || pod.Status.Phase == v1.PodFailed {
return false
}
return true
}
type podInformer struct {
informer cache.SharedIndexInformer
}
func (i *podInformer) Informer() cache.SharedIndexInformer {
return i.informer
}
func (i *podInformer) Lister() corelisters.PodLister {
return corelisters.NewPodLister(i.informer.GetIndexer())
}
// NewPodInformer creates a shared index informer that returns only non-terminal pods.
func NewPodInformer(client clientset.Interface, resyncPeriod time.Duration) coreinformers.PodInformer {
selector := fields.ParseSelectorOrDie("status.phase!=" + string(v1.PodSucceeded) + ",status.phase!=" + string(v1.PodFailed))
lw := cache.NewListWatchFromClient(client.Core().RESTClient(), "pods", metav1.NamespaceAll, selector)
return &podInformer{
informer: cache.NewSharedIndexInformer(lw, &v1.Pod{}, resyncPeriod, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}),
}
// Returns a cache.ListWatch that finds all pods that are
// already scheduled.
// TODO: return a ListerWatcher interface instead?
func (factory *ConfigFactory) createAssignedNonTerminatedPodLW() *cache.ListWatch {
selector := fields.ParseSelectorOrDie("spec.nodeName!=" + "" + ",status.phase!=" + string(v1.PodSucceeded) + ",status.phase!=" + string(v1.PodFailed))
return cache.NewListWatchFromClient(factory.client.Core().RESTClient(), "pods", metav1.NamespaceAll, selector)
}
func (factory *ConfigFactory) MakeDefaultErrorFunc(backoff *util.PodBackoff, podQueue *cache.FIFO) func(pod *v1.Pod, err error) {

View File

@ -55,7 +55,6 @@ func TestCreate(t *testing.T) {
v1.DefaultSchedulerName,
client,
informerFactory.Core().V1().Nodes(),
informerFactory.Core().V1().Pods(),
informerFactory.Core().V1().PersistentVolumes(),
informerFactory.Core().V1().PersistentVolumeClaims(),
informerFactory.Core().V1().ReplicationControllers(),
@ -86,7 +85,6 @@ func TestCreateFromConfig(t *testing.T) {
v1.DefaultSchedulerName,
client,
informerFactory.Core().V1().Nodes(),
informerFactory.Core().V1().Pods(),
informerFactory.Core().V1().PersistentVolumes(),
informerFactory.Core().V1().PersistentVolumeClaims(),
informerFactory.Core().V1().ReplicationControllers(),
@ -140,7 +138,6 @@ func TestCreateFromEmptyConfig(t *testing.T) {
v1.DefaultSchedulerName,
client,
informerFactory.Core().V1().Nodes(),
informerFactory.Core().V1().Pods(),
informerFactory.Core().V1().PersistentVolumes(),
informerFactory.Core().V1().PersistentVolumeClaims(),
informerFactory.Core().V1().ReplicationControllers(),
@ -196,7 +193,6 @@ func TestDefaultErrorFunc(t *testing.T) {
v1.DefaultSchedulerName,
client,
informerFactory.Core().V1().Nodes(),
informerFactory.Core().V1().Pods(),
informerFactory.Core().V1().PersistentVolumes(),
informerFactory.Core().V1().PersistentVolumeClaims(),
informerFactory.Core().V1().ReplicationControllers(),
@ -308,7 +304,6 @@ func TestResponsibleForPod(t *testing.T) {
v1.DefaultSchedulerName,
client,
informerFactory.Core().V1().Nodes(),
informerFactory.Core().V1().Pods(),
informerFactory.Core().V1().PersistentVolumes(),
informerFactory.Core().V1().PersistentVolumeClaims(),
informerFactory.Core().V1().ReplicationControllers(),
@ -322,7 +317,6 @@ func TestResponsibleForPod(t *testing.T) {
"foo-scheduler",
client,
informerFactory.Core().V1().Nodes(),
informerFactory.Core().V1().Pods(),
informerFactory.Core().V1().PersistentVolumes(),
informerFactory.Core().V1().PersistentVolumeClaims(),
informerFactory.Core().V1().ReplicationControllers(),
@ -391,7 +385,6 @@ func TestInvalidHardPodAffinitySymmetricWeight(t *testing.T) {
v1.DefaultSchedulerName,
client,
informerFactory.Core().V1().Nodes(),
informerFactory.Core().V1().Pods(),
informerFactory.Core().V1().PersistentVolumes(),
informerFactory.Core().V1().PersistentVolumeClaims(),
informerFactory.Core().V1().ReplicationControllers(),
@ -436,7 +429,6 @@ func TestInvalidFactoryArgs(t *testing.T) {
v1.DefaultSchedulerName,
client,
informerFactory.Core().V1().Nodes(),
informerFactory.Core().V1().Pods(),
informerFactory.Core().V1().PersistentVolumes(),
informerFactory.Core().V1().PersistentVolumeClaims(),
informerFactory.Core().V1().ReplicationControllers(),

View File

@ -20,12 +20,10 @@ import (
"time"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/tools/record"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/client/clientset_generated/clientset"
corelisters "k8s.io/kubernetes/pkg/client/listers/core/v1"
@ -81,6 +79,7 @@ type Configurator interface {
GetNodeLister() corelisters.NodeLister
GetClient() clientset.Interface
GetScheduledPodLister() corelisters.PodLister
Run()
Create() (*Config, error)
CreateFromProvider(providerName string) (*Config, error)
@ -172,12 +171,6 @@ func (sched *Scheduler) scheduleOne() {
dest, err := sched.config.Algorithm.Schedule(pod, sched.config.NodeLister)
if err != nil {
glog.V(1).Infof("Failed to schedule pod: %v/%v", pod.Namespace, pod.Name)
copied, cerr := api.Scheme.Copy(pod)
if cerr != nil {
runtime.HandleError(err)
return
}
pod = copied.(*v1.Pod)
sched.config.Error(pod, err)
sched.config.Recorder.Eventf(pod, v1.EventTypeWarning, "FailedScheduling", "%v", err)
sched.config.PodConditionUpdater.Update(pod, &v1.PodCondition{
@ -239,12 +232,6 @@ func (sched *Scheduler) scheduleOne() {
if err := sched.config.SchedulerCache.ForgetPod(&assumed); err != nil {
glog.Errorf("scheduler cache ForgetPod failed: %v", err)
}
copied, cerr := api.Scheme.Copy(pod)
if cerr != nil {
runtime.HandleError(err)
return
}
pod = copied.(*v1.Pod)
sched.config.Error(pod, err)
sched.config.Recorder.Eventf(pod, v1.EventTypeWarning, "FailedScheduling", "Binding rejected: %v", err)
sched.config.PodConditionUpdater.Update(pod, &v1.PodCondition{

View File

@ -326,7 +326,6 @@ func TestSchedulerExtender(t *testing.T) {
v1.DefaultSchedulerName,
clientSet,
informerFactory.Core().V1().Nodes(),
informerFactory.Core().V1().Pods(),
informerFactory.Core().V1().PersistentVolumes(),
informerFactory.Core().V1().PersistentVolumeClaims(),
informerFactory.Core().V1().ReplicationControllers(),

View File

@ -122,7 +122,6 @@ func TestSchedulerCreationFromConfigMap(t *testing.T) {
ss.PolicyConfigMapName = configPolicyName
sched, err := app.CreateScheduler(ss, clientSet,
informerFactory.Core().V1().Nodes(),
informerFactory.Core().V1().Pods(),
informerFactory.Core().V1().PersistentVolumes(),
informerFactory.Core().V1().PersistentVolumeClaims(),
informerFactory.Core().V1().ReplicationControllers(),
@ -175,7 +174,6 @@ func TestSchedulerCreationFromNonExistentConfigMap(t *testing.T) {
_, err := app.CreateScheduler(ss, clientSet,
informerFactory.Core().V1().Nodes(),
informerFactory.Core().V1().Pods(),
informerFactory.Core().V1().PersistentVolumes(),
informerFactory.Core().V1().PersistentVolumeClaims(),
informerFactory.Core().V1().ReplicationControllers(),
@ -213,7 +211,6 @@ func TestSchedulerCreationInLegacyMode(t *testing.T) {
sched, err := app.CreateScheduler(ss, clientSet,
informerFactory.Core().V1().Nodes(),
informerFactory.Core().V1().Pods(),
informerFactory.Core().V1().PersistentVolumes(),
informerFactory.Core().V1().PersistentVolumeClaims(),
informerFactory.Core().V1().ReplicationControllers(),
@ -248,7 +245,6 @@ func TestUnschedulableNodes(t *testing.T) {
v1.DefaultSchedulerName,
clientSet,
informerFactory.Core().V1().Nodes(),
informerFactory.Core().V1().Pods(),
informerFactory.Core().V1().PersistentVolumes(),
informerFactory.Core().V1().PersistentVolumeClaims(),
informerFactory.Core().V1().ReplicationControllers(),
@ -531,7 +527,6 @@ func TestMultiScheduler(t *testing.T) {
v1.DefaultSchedulerName,
clientSet,
informerFactory.Core().V1().Nodes(),
informerFactory.Core().V1().Pods(),
informerFactory.Core().V1().PersistentVolumes(),
informerFactory.Core().V1().PersistentVolumeClaims(),
informerFactory.Core().V1().ReplicationControllers(),
@ -617,7 +612,6 @@ func TestMultiScheduler(t *testing.T) {
"foo-scheduler",
clientSet2,
informerFactory.Core().V1().Nodes(),
informerFactory.Core().V1().Pods(),
informerFactory.Core().V1().PersistentVolumes(),
informerFactory.Core().V1().PersistentVolumeClaims(),
informerFactory.Core().V1().ReplicationControllers(),
@ -727,7 +721,6 @@ func TestAllocatable(t *testing.T) {
v1.DefaultSchedulerName,
clientSet,
informerFactory.Core().V1().Nodes(),
informerFactory.Core().V1().Pods(),
informerFactory.Core().V1().PersistentVolumes(),
informerFactory.Core().V1().PersistentVolumeClaims(),
informerFactory.Core().V1().ReplicationControllers(),

View File

@ -65,7 +65,6 @@ func mustSetupScheduler() (schedulerConfigurator scheduler.Configurator, destroy
v1.DefaultSchedulerName,
clientSet,
informerFactory.Core().V1().Nodes(),
informerFactory.Core().V1().Pods(),
informerFactory.Core().V1().PersistentVolumes(),
informerFactory.Core().V1().PersistentVolumeClaims(),
informerFactory.Core().V1().ReplicationControllers(),