Split binder, deleter, podScheduler initialion from NewSchedulerLoop

This commit is contained in:
Dr. Stefan Schimanski 2015-10-28 12:54:21 -05:00
parent 5f070c11a7
commit ae4673e86e
4 changed files with 35 additions and 37 deletions

View File

@ -427,6 +427,7 @@ type lifecycleTest struct {
driver *mmock.JoinableDriver driver *mmock.JoinableDriver
eventObs *EventObserver eventObs *EventObserver
loop operations.SchedulerLoopInterface loop operations.SchedulerLoopInterface
podReconciler *operations.PodReconciler
podsListWatch *MockPodsListWatch podsListWatch *MockPodsListWatch
scheduler *MesosScheduler scheduler *MesosScheduler
schedulerProc *ha.SchedulerProcess schedulerProc *ha.SchedulerProcess
@ -485,7 +486,7 @@ func newLifecycleTest(t *testing.T) lifecycleTest {
// create scheduler loop // create scheduler loop
fw := &MesosFramework{MesosScheduler: mesosScheduler} fw := &MesosFramework{MesosScheduler: mesosScheduler}
eventObs := NewEventObserver() eventObs := NewEventObserver()
loop := operations.NewSchedulerLoop(&c, fw, client, eventObs, schedulerProc.Terminal(), http.DefaultServeMux, &podsListWatch.ListWatch) loop, _ := operations.NewScheduler(&c, fw, client, eventObs, schedulerProc.Terminal(), http.DefaultServeMux, &podsListWatch.ListWatch)
assert.NotNil(loop) assert.NotNil(loop)
// create mock mesos scheduler driver // create mock mesos scheduler driver
@ -510,7 +511,7 @@ func (lt lifecycleTest) Start() <-chan LaunchedTask {
// init scheduler // init scheduler
err := lt.scheduler.Init( err := lt.scheduler.Init(
lt.schedulerProc.Master(), lt.schedulerProc.Master(),
lt.loop, lt.podReconciler,
http.DefaultServeMux, http.DefaultServeMux,
) )
assert.NoError(err) assert.NoError(err)

View File

@ -42,9 +42,6 @@ const (
) )
type SchedulerLoopInterface interface { type SchedulerLoopInterface interface {
ReconcilePodTask(t *podtask.T)
// execute the Scheduling plugin, should start a go routine and return immediately
Run(<-chan struct{}) Run(<-chan struct{})
} }
@ -55,12 +52,11 @@ type SchedulerLoop struct {
error func(*api.Pod, error) error func(*api.Pod, error)
recorder record.EventRecorder recorder record.EventRecorder
client *client.Client client *client.Client
pr *PodReconciler started chan<- struct{} // startup latch
starting chan struct{} // startup latch
} }
func NewSchedulerLoop(c *config.Config, fw types.Framework, client *client.Client, recorder record.EventRecorder, func NewScheduler(c *config.Config, fw types.Framework, client *client.Client, recorder record.EventRecorder,
terminate <-chan struct{}, mux *http.ServeMux, podsWatcher *cache.ListWatch) *SchedulerLoop { terminate <-chan struct{}, mux *http.ServeMux, podsWatcher *cache.ListWatch) (SchedulerLoopInterface, *PodReconciler) {
// Watch and queue pods that need scheduling. // Watch and queue pods that need scheduling.
updates := make(chan queue.Entry, c.UpdatesBacklog) updates := make(chan queue.Entry, c.UpdatesBacklog)
@ -74,10 +70,10 @@ func NewSchedulerLoop(c *config.Config, fw types.Framework, client *client.Clien
q := queuer.New(podUpdates) q := queuer.New(podUpdates)
podDeleter := NewDeleter(fw, q) podDeleter := NewDeleter(fw, q)
podReconciler := NewPodReconciler(fw, client, q, podDeleter) podReconciler := NewPodReconciler(fw, client, q, podDeleter)
bo := backoff.New(c.InitialPodBackoff.Duration, c.MaxPodBackoff.Duration)
eh := NewErrorHandler(fw, bo, q)
startLatch := make(chan struct{}) startLatch := make(chan struct{})
eventBroadcaster := record.NewBroadcaster() eventBroadcaster := record.NewBroadcaster()
runtime.On(startLatch, func() { runtime.On(startLatch, func() {
eventBroadcaster.StartRecordingToSink(client.Events("")) eventBroadcaster.StartRecordingToSink(client.Events(""))
reflector.Run() // TODO(jdef) should listen for termination reflector.Run() // TODO(jdef) should listen for termination
@ -87,20 +83,27 @@ func NewSchedulerLoop(c *config.Config, fw types.Framework, client *client.Clien
q.InstallDebugHandlers(mux) q.InstallDebugHandlers(mux)
podtask.InstallDebugHandlers(fw.Tasks(), mux) podtask.InstallDebugHandlers(fw.Tasks(), mux)
}) })
return NewSchedulerLoop(c, fw, client, recorder, podUpdates, q, startLatch), podReconciler
}
func NewSchedulerLoop(c *config.Config, fw types.Framework, client *client.Client,
recorder record.EventRecorder, podUpdates queue.FIFO, q *queuer.Queuer,
started chan<- struct{}) *SchedulerLoop {
bo := backoff.New(c.InitialPodBackoff.Duration, c.MaxPodBackoff.Duration)
return &SchedulerLoop{ return &SchedulerLoop{
algorithm: NewSchedulerAlgorithm(fw, podUpdates), algorithm: NewSchedulerAlgorithm(fw, podUpdates),
binder: NewBinder(fw), binder: NewBinder(fw),
nextPod: q.Yield, nextPod: q.Yield,
error: eh.Error, error: NewErrorHandler(fw, bo, q).Error,
recorder: recorder, recorder: recorder,
client: client, client: client,
pr: podReconciler, started: started,
starting: startLatch,
} }
} }
func (s *SchedulerLoop) Run(done <-chan struct{}) { func (s *SchedulerLoop) Run(done <-chan struct{}) {
defer close(s.starting) defer close(s.started)
go runtime.Until(s.scheduleOne, recoveryDelay, done) go runtime.Until(s.scheduleOne, recoveryDelay, done)
} }
@ -141,7 +144,3 @@ func (s *SchedulerLoop) scheduleOne() {
} }
s.recorder.Eventf(pod, Scheduled, "Successfully assigned %v to %v", pod.Name, dest) s.recorder.Eventf(pod, Scheduled, "Successfully assigned %v to %v", pod.Name, dest)
} }
func (s *SchedulerLoop) ReconcilePodTask(t *podtask.T) {
s.pr.Reconcile(t)
}

View File

@ -56,8 +56,7 @@ import (
// KubernetesScheduler implements: // KubernetesScheduler implements:
// 1: A mesos scheduler. // 1: A mesos scheduler.
// 2: A kubernetes scheduler plugin. // 2: A kubernetes pod.Registry.
// 3: A kubernetes pod.Registry.
type MesosScheduler struct { type MesosScheduler struct {
// We use a lock here to avoid races // We use a lock here to avoid races
// between invoking the mesos callback // between invoking the mesos callback
@ -93,9 +92,8 @@ type MesosScheduler struct {
taskRegistry podtask.Registry taskRegistry podtask.Registry
// via deferred init // via deferred init
podReconciler *operations.PodReconciler
loop operations.SchedulerLoopInterface tasksReconciler *operations.TasksReconciler
reconciler *operations.TasksReconciler
reconcileCooldown time.Duration reconcileCooldown time.Duration
asRegisteredMaster proc.Doer asRegisteredMaster proc.Doer
terminate <-chan struct{} // signal chan, closes when we should kill background tasks terminate <-chan struct{} // signal chan, closes when we should kill background tasks
@ -172,7 +170,7 @@ func New(config Config) *MesosScheduler {
return k return k
} }
func (k *MesosScheduler) Init(electedMaster proc.Process, sl operations.SchedulerLoopInterface, mux *http.ServeMux) error { func (k *MesosScheduler) Init(electedMaster proc.Process, pr *operations.PodReconciler, mux *http.ServeMux) error {
log.V(1).Infoln("initializing kubernetes mesos scheduler") log.V(1).Infoln("initializing kubernetes mesos scheduler")
k.asRegisteredMaster = proc.DoerFunc(func(a proc.Action) <-chan error { k.asRegisteredMaster = proc.DoerFunc(func(a proc.Action) <-chan error {
@ -182,7 +180,7 @@ func (k *MesosScheduler) Init(electedMaster proc.Process, sl operations.Schedule
return electedMaster.Do(a) return electedMaster.Do(a)
}) })
k.terminate = electedMaster.Done() k.terminate = electedMaster.Done()
k.loop = sl k.podReconciler = pr
k.offers.Init(k.terminate) k.offers.Init(k.terminate)
k.InstallDebugHandlers(mux) k.InstallDebugHandlers(mux)
k.nodeRegistrator.Run(k.terminate) k.nodeRegistrator.Run(k.terminate)
@ -223,8 +221,8 @@ func (k *MesosScheduler) InstallDebugHandlers(mux *http.ServeMux) {
w.WriteHeader(http.StatusNoContent) w.WriteHeader(http.StatusNoContent)
})) }))
} }
requestReconciliation("/debug/actions/requestExplicit", k.reconciler.RequestExplicit) requestReconciliation("/debug/actions/requestExplicit", k.tasksReconciler.RequestExplicit)
requestReconciliation("/debug/actions/requestImplicit", k.reconciler.RequestImplicit) requestReconciliation("/debug/actions/requestImplicit", k.tasksReconciler.RequestImplicit)
wrappedHandler("/debug/actions/kamikaze", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { wrappedHandler("/debug/actions/kamikaze", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
slaves := k.slaveHostNames.SlaveIDs() slaves := k.slaveHostNames.SlaveIDs()
@ -257,7 +255,7 @@ func (k *MesosScheduler) Registered(drv bindings.SchedulerDriver, fid *mesos.Fra
k.registered = true k.registered = true
k.onRegistration.Do(func() { k.onInitialRegistration(drv) }) k.onRegistration.Do(func() { k.onInitialRegistration(drv) })
k.reconciler.RequestExplicit() k.tasksReconciler.RequestExplicit()
} }
// Reregistered is called when the scheduler re-registered with the master successfully. // Reregistered is called when the scheduler re-registered with the master successfully.
@ -270,7 +268,7 @@ func (k *MesosScheduler) Reregistered(drv bindings.SchedulerDriver, mi *mesos.Ma
k.registered = true k.registered = true
k.onRegistration.Do(func() { k.onInitialRegistration(drv) }) k.onRegistration.Do(func() { k.onInitialRegistration(drv) })
k.reconciler.RequestExplicit() k.tasksReconciler.RequestExplicit()
} }
// perform one-time initialization actions upon the first registration event received from Mesos. // perform one-time initialization actions upon the first registration event received from Mesos.
@ -290,13 +288,13 @@ func (k *MesosScheduler) onInitialRegistration(driver bindings.SchedulerDriver)
r1 := k.makeTaskRegistryReconciler() r1 := k.makeTaskRegistryReconciler()
r2 := k.makePodRegistryReconciler() r2 := k.makePodRegistryReconciler()
k.reconciler = operations.NewTasksReconciler(k.asRegisteredMaster, k.makeCompositeReconciler(r1, r2), k.tasksReconciler = operations.NewTasksReconciler(k.asRegisteredMaster, k.makeCompositeReconciler(r1, r2),
k.reconcileCooldown, k.schedulerConfig.ExplicitReconciliationAbortTimeout.Duration, k.terminate) k.reconcileCooldown, k.schedulerConfig.ExplicitReconciliationAbortTimeout.Duration, k.terminate)
go k.reconciler.Run(driver) go k.tasksReconciler.Run(driver)
if k.reconcileInterval > 0 { if k.reconcileInterval > 0 {
ri := time.Duration(k.reconcileInterval) * time.Second ri := time.Duration(k.reconcileInterval) * time.Second
time.AfterFunc(k.schedulerConfig.InitialImplicitReconciliationDelay.Duration, func() { runtime.Until(k.reconciler.RequestImplicit, ri, k.terminate) }) time.AfterFunc(k.schedulerConfig.InitialImplicitReconciliationDelay.Duration, func() { runtime.Until(k.tasksReconciler.RequestImplicit, ri, k.terminate) })
log.Infof("will perform implicit task reconciliation at interval: %v after %v", ri, k.schedulerConfig.InitialImplicitReconciliationDelay.Duration) log.Infof("will perform implicit task reconciliation at interval: %v after %v", ri, k.schedulerConfig.InitialImplicitReconciliationDelay.Duration)
} }
} }
@ -392,7 +390,7 @@ func (k *MesosScheduler) StatusUpdate(driver bindings.SchedulerDriver, taskStatu
case mesos.TaskState_TASK_FAILED, mesos.TaskState_TASK_ERROR: case mesos.TaskState_TASK_FAILED, mesos.TaskState_TASK_ERROR:
if task, _ := k.taskRegistry.UpdateStatus(taskStatus); task != nil { if task, _ := k.taskRegistry.UpdateStatus(taskStatus); task != nil {
if task.Has(podtask.Launched) && !task.Has(podtask.Bound) { if task.Has(podtask.Launched) && !task.Has(podtask.Bound) {
go k.loop.ReconcilePodTask(task) go k.podReconciler.Reconcile(task)
return return
} }
} else { } else {
@ -440,7 +438,7 @@ func (k *MesosScheduler) reconcileTerminalTask(driver bindings.SchedulerDriver,
} else if taskStatus.GetReason() == mesos.TaskStatus_REASON_EXECUTOR_TERMINATED || taskStatus.GetReason() == mesos.TaskStatus_REASON_EXECUTOR_UNREGISTERED { } else if taskStatus.GetReason() == mesos.TaskStatus_REASON_EXECUTOR_TERMINATED || taskStatus.GetReason() == mesos.TaskStatus_REASON_EXECUTOR_UNREGISTERED {
// attempt to prevent dangling pods in the pod and task registries // attempt to prevent dangling pods in the pod and task registries
log.V(1).Infof("request explicit reconciliation to clean up for task %v after executor reported (terminated/unregistered)", taskStatus.TaskId.GetValue()) log.V(1).Infof("request explicit reconciliation to clean up for task %v after executor reported (terminated/unregistered)", taskStatus.TaskId.GetValue())
k.reconciler.RequestExplicit() k.tasksReconciler.RequestExplicit()
} else if taskStatus.GetState() == mesos.TaskState_TASK_LOST && state == podtask.StateRunning && taskStatus.ExecutorId != nil && taskStatus.SlaveId != nil { } else if taskStatus.GetState() == mesos.TaskState_TASK_LOST && state == podtask.StateRunning && taskStatus.ExecutorId != nil && taskStatus.SlaveId != nil {
//TODO(jdef) this may not be meaningful once we have proper checkpointing and master detection //TODO(jdef) this may not be meaningful once we have proper checkpointing and master detection
//If we're reconciling and receive this then the executor may be //If we're reconciling and receive this then the executor may be

View File

@ -765,14 +765,14 @@ func (s *SchedulerServer) bootstrap(hks hyperkube.Interface, sc *schedcfg.Config
eventBroadcaster := record.NewBroadcaster() eventBroadcaster := record.NewBroadcaster()
recorder := eventBroadcaster.NewRecorder(api.EventSource{Component: "scheduler"}) recorder := eventBroadcaster.NewRecorder(api.EventSource{Component: "scheduler"})
lw := cache.NewListWatchFromClient(client, "pods", api.NamespaceAll, fields.Everything()) lw := cache.NewListWatchFromClient(client, "pods", api.NamespaceAll, fields.Everything())
loop := operations.NewSchedulerLoop(sc, fw, client, recorder, schedulerProcess.Terminal(), s.mux, lw) loop, pr := operations.NewScheduler(sc, fw, client, recorder, schedulerProcess.Terminal(), s.mux, lw)
runtime.On(mesosScheduler.Registration(), func() { loop.Run(schedulerProcess.Terminal()) }) runtime.On(mesosScheduler.Registration(), func() { loop.Run(schedulerProcess.Terminal()) })
runtime.On(mesosScheduler.Registration(), s.newServiceWriter(schedulerProcess.Terminal())) runtime.On(mesosScheduler.Registration(), s.newServiceWriter(schedulerProcess.Terminal()))
driverFactory := ha.DriverFactory(func() (drv bindings.SchedulerDriver, err error) { driverFactory := ha.DriverFactory(func() (drv bindings.SchedulerDriver, err error) {
log.V(1).Infoln("performing deferred initialization") log.V(1).Infoln("performing deferred initialization")
if err = mesosScheduler.Init(schedulerProcess.Master(), loop, s.mux); err != nil { if err = mesosScheduler.Init(schedulerProcess.Master(), pr, s.mux); err != nil {
return nil, fmt.Errorf("failed to initialize pod scheduler: %v", err) return nil, fmt.Errorf("failed to initialize pod scheduler: %v", err)
} }
log.V(1).Infoln("deferred init complete") log.V(1).Infoln("deferred init complete")