fix k8sm-787 scheduler failover regression

This commit is contained in:
James DeFelice 2016-02-23 17:43:00 +00:00
parent d0ce85a6d1
commit 250a511a78
2 changed files with 53 additions and 32 deletions

View File

@ -17,8 +17,6 @@ limitations under the License.
package election package election
import ( import (
"sync"
"k8s.io/kubernetes/contrib/mesos/pkg/runtime" "k8s.io/kubernetes/contrib/mesos/pkg/runtime"
"k8s.io/kubernetes/pkg/watch" "k8s.io/kubernetes/pkg/watch"
@ -46,14 +44,7 @@ type Service interface {
} }
type notifier struct { type notifier struct {
changed chan struct{} // to notify the service loop about changed state masters chan Master // elected masters arrive here, should be buffered to better deal with rapidly flapping masters
// desired is updated with every change, current is updated after
// Start()/Stop() finishes. 'cond' is used to signal that a change
// might be needed. This handles the case where mastership flops
// around without calling Start()/Stop() excessively.
desired, current Master
lock sync.Mutex // to protect the desired variable
// for comparison, to see if we are master. // for comparison, to see if we are master.
id Master id Master
@ -64,8 +55,7 @@ type notifier struct {
// Notify runs Elect() on m, and calls Start()/Stop() on s when the // Notify runs Elect() on m, and calls Start()/Stop() on s when the
// elected master starts/stops matching 'id'. Never returns. // elected master starts/stops matching 'id'. Never returns.
func Notify(m MasterElector, path, id string, s Service, abort <-chan struct{}) { func Notify(m MasterElector, path, id string, s Service, abort <-chan struct{}) {
n := &notifier{id: Master(id), service: s} n := &notifier{id: Master(id), service: s, masters: make(chan Master, 1)}
n.changed = make(chan struct{})
finished := runtime.After(func() { finished := runtime.After(func() {
runtime.Until(func() { runtime.Until(func() {
for { for {
@ -87,14 +77,21 @@ func Notify(m MasterElector, path, id string, s Service, abort <-chan struct{})
break break
} }
n.lock.Lock() sendElected:
n.desired = electedMaster for {
n.lock.Unlock()
// notify serviceLoop, but don't block. If a change
// is queued already it will see the new n.desired.
select { select {
case n.changed <- struct{}{}: case <-abort:
return
case n.masters <- electedMaster:
break sendElected
default: // ring full, discard old value and add the new
select {
case <-abort:
return
case <-n.masters:
default: // ring was cleared for us?!
}
}
} }
} }
} }
@ -106,22 +103,19 @@ func Notify(m MasterElector, path, id string, s Service, abort <-chan struct{})
// serviceLoop waits for changes, and calls Start()/Stop() as needed. // serviceLoop waits for changes, and calls Start()/Stop() as needed.
func (n *notifier) serviceLoop(abort <-chan struct{}) { func (n *notifier) serviceLoop(abort <-chan struct{}) {
var current Master
for { for {
select { select {
case <-abort: case <-abort:
return return
case <-n.changed: case desired := <-n.masters:
n.lock.Lock() if current != n.id && desired == n.id {
newDesired := n.desired // copy value to avoid race below n.service.Validate(desired, current)
n.lock.Unlock()
if n.current != n.id && newDesired == n.id {
n.service.Validate(newDesired, n.current)
n.service.Start() n.service.Start()
} else if n.current == n.id && newDesired != n.id { } else if current == n.id && desired != n.id {
n.service.Stop() n.service.Stop()
} }
n.current = newDesired current = desired
} }
} }
} }

View File

@ -43,6 +43,7 @@ import (
mesos "github.com/mesos/mesos-go/mesosproto" mesos "github.com/mesos/mesos-go/mesosproto"
mutil "github.com/mesos/mesos-go/mesosutil" mutil "github.com/mesos/mesos-go/mesosutil"
bindings "github.com/mesos/mesos-go/scheduler" bindings "github.com/mesos/mesos-go/scheduler"
"github.com/pborman/uuid"
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
"github.com/spf13/pflag" "github.com/spf13/pflag"
"golang.org/x/net/context" "golang.org/x/net/context"
@ -594,8 +595,14 @@ func (s *SchedulerServer) Run(hks hyperkube.Interface, _ []string) error {
validation := ha.ValidationFunc(validateLeadershipTransition) validation := ha.ValidationFunc(validateLeadershipTransition)
srv := ha.NewCandidate(schedulerProcess, driverFactory, validation) srv := ha.NewCandidate(schedulerProcess, driverFactory, validation)
path := meta.ElectionPath(s.frameworkName) path := meta.ElectionPath(s.frameworkName)
log.Infof("registering for election at %v with id %v", path, eid.GetValue()) uuid := eid.GetValue() + ":" + uuid.New() // unique for each scheduler instance
go election.Notify(election.NewEtcdMasterElector(etcdClient), path, eid.GetValue(), srv, nil) log.Infof("registering for election at %v with id %v", path, uuid)
go election.Notify(
election.NewEtcdMasterElector(etcdClient),
path,
uuid,
srv,
nil)
} else { } else {
log.Infoln("self-electing in non-HA mode") log.Infoln("self-electing in non-HA mode")
schedulerProcess.Elect(driverFactory) schedulerProcess.Elect(driverFactory)
@ -650,8 +657,28 @@ func (s *SchedulerServer) awaitFailover(schedulerProcess schedulerProcessInterfa
func validateLeadershipTransition(desired, current string) { func validateLeadershipTransition(desired, current string) {
log.Infof("validating leadership transition") log.Infof("validating leadership transition")
// desired, current are of the format <executor-id>:<scheduler-uuid> (see Run()).
// parse them and ensure that executor ID's match, otherwise the cluster can get into
// a bad state after scheduler failover: executor ID is a config hash that must remain
// consistent across failover events.
var (
i = strings.LastIndex(desired, ":")
j = strings.LastIndex(current, ":")
)
if i > -1 {
desired = desired[0:i]
} else {
log.Fatalf("desired id %q is invalid", desired)
}
if j > -1 {
current = current[0:j]
} else if current != "" {
log.Fatalf("current id %q is invalid", current)
}
if desired != current && current != "" { if desired != current && current != "" {
log.Fatalf("desired executor id != current executor id", desired, current) log.Fatalf("desired executor id %q != current executor id %q", desired, current)
} }
} }