mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-03 09:22:44 +00:00
fix k8sm-787 scheduler failover regression
This commit is contained in:
parent
d0ce85a6d1
commit
250a511a78
@ -17,8 +17,6 @@ limitations under the License.
|
|||||||
package election
|
package election
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"sync"
|
|
||||||
|
|
||||||
"k8s.io/kubernetes/contrib/mesos/pkg/runtime"
|
"k8s.io/kubernetes/contrib/mesos/pkg/runtime"
|
||||||
"k8s.io/kubernetes/pkg/watch"
|
"k8s.io/kubernetes/pkg/watch"
|
||||||
|
|
||||||
@ -46,14 +44,7 @@ type Service interface {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type notifier struct {
|
type notifier struct {
|
||||||
changed chan struct{} // to notify the service loop about changed state
|
masters chan Master // elected masters arrive here, should be buffered to better deal with rapidly flapping masters
|
||||||
|
|
||||||
// desired is updated with every change, current is updated after
|
|
||||||
// Start()/Stop() finishes. 'cond' is used to signal that a change
|
|
||||||
// might be needed. This handles the case where mastership flops
|
|
||||||
// around without calling Start()/Stop() excessively.
|
|
||||||
desired, current Master
|
|
||||||
lock sync.Mutex // to protect the desired variable
|
|
||||||
|
|
||||||
// for comparison, to see if we are master.
|
// for comparison, to see if we are master.
|
||||||
id Master
|
id Master
|
||||||
@ -64,8 +55,7 @@ type notifier struct {
|
|||||||
// Notify runs Elect() on m, and calls Start()/Stop() on s when the
|
// Notify runs Elect() on m, and calls Start()/Stop() on s when the
|
||||||
// elected master starts/stops matching 'id'. Never returns.
|
// elected master starts/stops matching 'id'. Never returns.
|
||||||
func Notify(m MasterElector, path, id string, s Service, abort <-chan struct{}) {
|
func Notify(m MasterElector, path, id string, s Service, abort <-chan struct{}) {
|
||||||
n := ¬ifier{id: Master(id), service: s}
|
n := ¬ifier{id: Master(id), service: s, masters: make(chan Master, 1)}
|
||||||
n.changed = make(chan struct{})
|
|
||||||
finished := runtime.After(func() {
|
finished := runtime.After(func() {
|
||||||
runtime.Until(func() {
|
runtime.Until(func() {
|
||||||
for {
|
for {
|
||||||
@ -87,14 +77,21 @@ func Notify(m MasterElector, path, id string, s Service, abort <-chan struct{})
|
|||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
n.lock.Lock()
|
sendElected:
|
||||||
n.desired = electedMaster
|
for {
|
||||||
n.lock.Unlock()
|
|
||||||
|
|
||||||
// notify serviceLoop, but don't block. If a change
|
|
||||||
// is queued already it will see the new n.desired.
|
|
||||||
select {
|
select {
|
||||||
case n.changed <- struct{}{}:
|
case <-abort:
|
||||||
|
return
|
||||||
|
case n.masters <- electedMaster:
|
||||||
|
break sendElected
|
||||||
|
default: // ring full, discard old value and add the new
|
||||||
|
select {
|
||||||
|
case <-abort:
|
||||||
|
return
|
||||||
|
case <-n.masters:
|
||||||
|
default: // ring was cleared for us?!
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -106,22 +103,19 @@ func Notify(m MasterElector, path, id string, s Service, abort <-chan struct{})
|
|||||||
|
|
||||||
// serviceLoop waits for changes, and calls Start()/Stop() as needed.
|
// serviceLoop waits for changes, and calls Start()/Stop() as needed.
|
||||||
func (n *notifier) serviceLoop(abort <-chan struct{}) {
|
func (n *notifier) serviceLoop(abort <-chan struct{}) {
|
||||||
|
var current Master
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case <-abort:
|
case <-abort:
|
||||||
return
|
return
|
||||||
case <-n.changed:
|
case desired := <-n.masters:
|
||||||
n.lock.Lock()
|
if current != n.id && desired == n.id {
|
||||||
newDesired := n.desired // copy value to avoid race below
|
n.service.Validate(desired, current)
|
||||||
n.lock.Unlock()
|
|
||||||
|
|
||||||
if n.current != n.id && newDesired == n.id {
|
|
||||||
n.service.Validate(newDesired, n.current)
|
|
||||||
n.service.Start()
|
n.service.Start()
|
||||||
} else if n.current == n.id && newDesired != n.id {
|
} else if current == n.id && desired != n.id {
|
||||||
n.service.Stop()
|
n.service.Stop()
|
||||||
}
|
}
|
||||||
n.current = newDesired
|
current = desired
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -43,6 +43,7 @@ import (
|
|||||||
mesos "github.com/mesos/mesos-go/mesosproto"
|
mesos "github.com/mesos/mesos-go/mesosproto"
|
||||||
mutil "github.com/mesos/mesos-go/mesosutil"
|
mutil "github.com/mesos/mesos-go/mesosutil"
|
||||||
bindings "github.com/mesos/mesos-go/scheduler"
|
bindings "github.com/mesos/mesos-go/scheduler"
|
||||||
|
"github.com/pborman/uuid"
|
||||||
"github.com/prometheus/client_golang/prometheus"
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
"github.com/spf13/pflag"
|
"github.com/spf13/pflag"
|
||||||
"golang.org/x/net/context"
|
"golang.org/x/net/context"
|
||||||
@ -594,8 +595,14 @@ func (s *SchedulerServer) Run(hks hyperkube.Interface, _ []string) error {
|
|||||||
validation := ha.ValidationFunc(validateLeadershipTransition)
|
validation := ha.ValidationFunc(validateLeadershipTransition)
|
||||||
srv := ha.NewCandidate(schedulerProcess, driverFactory, validation)
|
srv := ha.NewCandidate(schedulerProcess, driverFactory, validation)
|
||||||
path := meta.ElectionPath(s.frameworkName)
|
path := meta.ElectionPath(s.frameworkName)
|
||||||
log.Infof("registering for election at %v with id %v", path, eid.GetValue())
|
uuid := eid.GetValue() + ":" + uuid.New() // unique for each scheduler instance
|
||||||
go election.Notify(election.NewEtcdMasterElector(etcdClient), path, eid.GetValue(), srv, nil)
|
log.Infof("registering for election at %v with id %v", path, uuid)
|
||||||
|
go election.Notify(
|
||||||
|
election.NewEtcdMasterElector(etcdClient),
|
||||||
|
path,
|
||||||
|
uuid,
|
||||||
|
srv,
|
||||||
|
nil)
|
||||||
} else {
|
} else {
|
||||||
log.Infoln("self-electing in non-HA mode")
|
log.Infoln("self-electing in non-HA mode")
|
||||||
schedulerProcess.Elect(driverFactory)
|
schedulerProcess.Elect(driverFactory)
|
||||||
@ -650,8 +657,28 @@ func (s *SchedulerServer) awaitFailover(schedulerProcess schedulerProcessInterfa
|
|||||||
|
|
||||||
func validateLeadershipTransition(desired, current string) {
|
func validateLeadershipTransition(desired, current string) {
|
||||||
log.Infof("validating leadership transition")
|
log.Infof("validating leadership transition")
|
||||||
|
// desired, current are of the format <executor-id>:<scheduler-uuid> (see Run()).
|
||||||
|
// parse them and ensure that executor ID's match, otherwise the cluster can get into
|
||||||
|
// a bad state after scheduler failover: executor ID is a config hash that must remain
|
||||||
|
// consistent across failover events.
|
||||||
|
var (
|
||||||
|
i = strings.LastIndex(desired, ":")
|
||||||
|
j = strings.LastIndex(current, ":")
|
||||||
|
)
|
||||||
|
|
||||||
|
if i > -1 {
|
||||||
|
desired = desired[0:i]
|
||||||
|
} else {
|
||||||
|
log.Fatalf("desired id %q is invalid", desired)
|
||||||
|
}
|
||||||
|
if j > -1 {
|
||||||
|
current = current[0:j]
|
||||||
|
} else if current != "" {
|
||||||
|
log.Fatalf("current id %q is invalid", current)
|
||||||
|
}
|
||||||
|
|
||||||
if desired != current && current != "" {
|
if desired != current && current != "" {
|
||||||
log.Fatalf("desired executor id != current executor id", desired, current)
|
log.Fatalf("desired executor id %q != current executor id %q", desired, current)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user