diff --git a/Godeps/Godeps.json b/Godeps/Godeps.json index 5f30ed59f74..2030531dc3a 100644 --- a/Godeps/Godeps.json +++ b/Godeps/Godeps.json @@ -358,6 +358,10 @@ "Comment": "v0.8.8", "Rev": "afde71eb1740fd763ab9450e1f700ba0e53c36d0" }, + { + "ImportPath": "github.com/kardianos/osext", + "Rev": "8fef92e41e22a70e700a96b29f066cda30ea24ef" + }, { "ImportPath": "github.com/kr/pty", "Comment": "release.r56-25-g05017fc", @@ -367,10 +371,18 @@ "ImportPath": "github.com/matttproud/golang_protobuf_extensions/pbutil", "Rev": "fc2b8d3a73c4867e51861bbdd5ae3c1f0869dd6a" }, + { + "ImportPath": "github.com/mesos/mesos-go/auth", + "Rev": "4b1767c0dfc51020e01f35da5b38472f40ce572a" + }, { "ImportPath": "github.com/mesos/mesos-go/detector", "Rev": "4b1767c0dfc51020e01f35da5b38472f40ce572a" }, + { + "ImportPath": "github.com/mesos/mesos-go/executor", + "Rev": "4b1767c0dfc51020e01f35da5b38472f40ce572a" + }, { "ImportPath": "github.com/mesos/mesos-go/mesosproto", "Rev": "4b1767c0dfc51020e01f35da5b38472f40ce572a" @@ -379,6 +391,14 @@ "ImportPath": "github.com/mesos/mesos-go/mesosutil", "Rev": "4b1767c0dfc51020e01f35da5b38472f40ce572a" }, + { + "ImportPath": "github.com/mesos/mesos-go/messenger", + "Rev": "4b1767c0dfc51020e01f35da5b38472f40ce572a" + }, + { + "ImportPath": "github.com/mesos/mesos-go/scheduler", + "Rev": "4b1767c0dfc51020e01f35da5b38472f40ce572a" + }, { "ImportPath": "github.com/mesos/mesos-go/upid", "Rev": "4b1767c0dfc51020e01f35da5b38472f40ce572a" diff --git a/Godeps/_workspace/src/github.com/kardianos/osext/LICENSE b/Godeps/_workspace/src/github.com/kardianos/osext/LICENSE new file mode 100644 index 00000000000..74487567632 --- /dev/null +++ b/Godeps/_workspace/src/github.com/kardianos/osext/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2012 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/Godeps/_workspace/src/github.com/kardianos/osext/README.md b/Godeps/_workspace/src/github.com/kardianos/osext/README.md new file mode 100644 index 00000000000..820e1ecb544 --- /dev/null +++ b/Godeps/_workspace/src/github.com/kardianos/osext/README.md @@ -0,0 +1,14 @@ +### Extensions to the "os" package. + +## Find the current Executable and ExecutableFolder. + +There is sometimes utility in finding the current executable file +that is running. This can be used for upgrading the current executable +or finding resources located relative to the executable file. + +Multi-platform and supports: + * Linux + * OS X + * Windows + * Plan 9 + * BSDs. diff --git a/Godeps/_workspace/src/github.com/kardianos/osext/osext.go b/Godeps/_workspace/src/github.com/kardianos/osext/osext.go new file mode 100644 index 00000000000..4ed4b9aa334 --- /dev/null +++ b/Godeps/_workspace/src/github.com/kardianos/osext/osext.go @@ -0,0 +1,27 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Extensions to the standard "os" package. +package osext + +import "path/filepath" + +// Executable returns an absolute path that can be used to +// re-invoke the current program. +// It may not be valid after the current program exits. +func Executable() (string, error) { + p, err := executable() + return filepath.Clean(p), err +} + +// Returns same path as Executable, returns just the folder +// path. Excludes the executable name. +func ExecutableFolder() (string, error) { + p, err := Executable() + if err != nil { + return "", err + } + folder, _ := filepath.Split(p) + return folder, nil +} diff --git a/Godeps/_workspace/src/github.com/kardianos/osext/osext_plan9.go b/Godeps/_workspace/src/github.com/kardianos/osext/osext_plan9.go new file mode 100644 index 00000000000..655750c5426 --- /dev/null +++ b/Godeps/_workspace/src/github.com/kardianos/osext/osext_plan9.go @@ -0,0 +1,20 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package osext + +import ( + "os" + "strconv" + "syscall" +) + +func executable() (string, error) { + f, err := os.Open("/proc/" + strconv.Itoa(os.Getpid()) + "/text") + if err != nil { + return "", err + } + defer f.Close() + return syscall.Fd2path(int(f.Fd())) +} diff --git a/Godeps/_workspace/src/github.com/kardianos/osext/osext_procfs.go b/Godeps/_workspace/src/github.com/kardianos/osext/osext_procfs.go new file mode 100644 index 00000000000..b2598bc77a4 --- /dev/null +++ b/Godeps/_workspace/src/github.com/kardianos/osext/osext_procfs.go @@ -0,0 +1,36 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build linux netbsd openbsd solaris dragonfly + +package osext + +import ( + "errors" + "fmt" + "os" + "runtime" + "strings" +) + +func executable() (string, error) { + switch runtime.GOOS { + case "linux": + const deletedTag = " (deleted)" + execpath, err := os.Readlink("/proc/self/exe") + if err != nil { + return execpath, err + } + execpath = strings.TrimSuffix(execpath, deletedTag) + execpath = strings.TrimPrefix(execpath, deletedTag) + return execpath, nil + case "netbsd": + return os.Readlink("/proc/curproc/exe") + case "openbsd", "dragonfly": + return os.Readlink("/proc/curproc/file") + case "solaris": + return os.Readlink(fmt.Sprintf("/proc/%d/path/a.out", os.Getpid())) + } + return "", errors.New("ExecPath not implemented for " + runtime.GOOS) +} diff --git a/Godeps/_workspace/src/github.com/kardianos/osext/osext_sysctl.go b/Godeps/_workspace/src/github.com/kardianos/osext/osext_sysctl.go new file mode 100644 index 00000000000..b66cac878c4 --- /dev/null +++ b/Godeps/_workspace/src/github.com/kardianos/osext/osext_sysctl.go @@ -0,0 +1,79 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build darwin freebsd + +package osext + +import ( + "os" + "path/filepath" + "runtime" + "syscall" + "unsafe" +) + +var initCwd, initCwdErr = os.Getwd() + +func executable() (string, error) { + var mib [4]int32 + switch runtime.GOOS { + case "freebsd": + mib = [4]int32{1 /* CTL_KERN */, 14 /* KERN_PROC */, 12 /* KERN_PROC_PATHNAME */, -1} + case "darwin": + mib = [4]int32{1 /* CTL_KERN */, 38 /* KERN_PROCARGS */, int32(os.Getpid()), -1} + } + + n := uintptr(0) + // Get length. + _, _, errNum := syscall.Syscall6(syscall.SYS___SYSCTL, uintptr(unsafe.Pointer(&mib[0])), 4, 0, uintptr(unsafe.Pointer(&n)), 0, 0) + if errNum != 0 { + return "", errNum + } + if n == 0 { // This shouldn't happen. + return "", nil + } + buf := make([]byte, n) + _, _, errNum = syscall.Syscall6(syscall.SYS___SYSCTL, uintptr(unsafe.Pointer(&mib[0])), 4, uintptr(unsafe.Pointer(&buf[0])), uintptr(unsafe.Pointer(&n)), 0, 0) + if errNum != 0 { + return "", errNum + } + if n == 0 { // This shouldn't happen. + return "", nil + } + for i, v := range buf { + if v == 0 { + buf = buf[:i] + break + } + } + var err error + execPath := string(buf) + // execPath will not be empty due to above checks. + // Try to get the absolute path if the execPath is not rooted. + if execPath[0] != '/' { + execPath, err = getAbs(execPath) + if err != nil { + return execPath, err + } + } + // For darwin KERN_PROCARGS may return the path to a symlink rather than the + // actual executable. + if runtime.GOOS == "darwin" { + if execPath, err = filepath.EvalSymlinks(execPath); err != nil { + return execPath, err + } + } + return execPath, nil +} + +func getAbs(execPath string) (string, error) { + if initCwdErr != nil { + return execPath, initCwdErr + } + // The execPath may begin with a "../" or a "./" so clean it first. + // Join the two paths, trailing and starting slashes undetermined, so use + // the generic Join function. + return filepath.Join(initCwd, filepath.Clean(execPath)), nil +} diff --git a/Godeps/_workspace/src/github.com/kardianos/osext/osext_test.go b/Godeps/_workspace/src/github.com/kardianos/osext/osext_test.go new file mode 100644 index 00000000000..5aafa3af2d2 --- /dev/null +++ b/Godeps/_workspace/src/github.com/kardianos/osext/osext_test.go @@ -0,0 +1,180 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build darwin linux freebsd netbsd windows + +package osext + +import ( + "bytes" + "fmt" + "io" + "os" + "os/exec" + "path/filepath" + "runtime" + "testing" +) + +const ( + executableEnvVar = "OSTEST_OUTPUT_EXECUTABLE" + + executableEnvValueMatch = "match" + executableEnvValueDelete = "delete" +) + +func TestExecutableMatch(t *testing.T) { + ep, err := Executable() + if err != nil { + t.Fatalf("Executable failed: %v", err) + } + + // fullpath to be of the form "dir/prog". + dir := filepath.Dir(filepath.Dir(ep)) + fullpath, err := filepath.Rel(dir, ep) + if err != nil { + t.Fatalf("filepath.Rel: %v", err) + } + // Make child start with a relative program path. + // Alter argv[0] for child to verify getting real path without argv[0]. + cmd := &exec.Cmd{ + Dir: dir, + Path: fullpath, + Env: []string{fmt.Sprintf("%s=%s", executableEnvVar, executableEnvValueMatch)}, + } + out, err := cmd.CombinedOutput() + if err != nil { + t.Fatalf("exec(self) failed: %v", err) + } + outs := string(out) + if !filepath.IsAbs(outs) { + t.Fatalf("Child returned %q, want an absolute path", out) + } + if !sameFile(outs, ep) { + t.Fatalf("Child returned %q, not the same file as %q", out, ep) + } +} + +func TestExecutableDelete(t *testing.T) { + if runtime.GOOS != "linux" { + t.Skip() + } + fpath, err := Executable() + if err != nil { + t.Fatalf("Executable failed: %v", err) + } + + r, w := io.Pipe() + stderrBuff := &bytes.Buffer{} + stdoutBuff := &bytes.Buffer{} + cmd := &exec.Cmd{ + Path: fpath, + Env: []string{fmt.Sprintf("%s=%s", executableEnvVar, executableEnvValueDelete)}, + Stdin: r, + Stderr: stderrBuff, + Stdout: stdoutBuff, + } + err = cmd.Start() + if err != nil { + t.Fatalf("exec(self) start failed: %v", err) + } + + tempPath := fpath + "_copy" + _ = os.Remove(tempPath) + + err = copyFile(tempPath, fpath) + if err != nil { + t.Fatalf("copy file failed: %v", err) + } + err = os.Remove(fpath) + if err != nil { + t.Fatalf("remove running test file failed: %v", err) + } + err = os.Rename(tempPath, fpath) + if err != nil { + t.Fatalf("rename copy to previous name failed: %v", err) + } + + w.Write([]byte{0}) + w.Close() + + err = cmd.Wait() + if err != nil { + t.Fatalf("exec wait failed: %v", err) + } + + childPath := stderrBuff.String() + if !filepath.IsAbs(childPath) { + t.Fatalf("Child returned %q, want an absolute path", childPath) + } + if !sameFile(childPath, fpath) { + t.Fatalf("Child returned %q, not the same file as %q", childPath, fpath) + } +} + +func sameFile(fn1, fn2 string) bool { + fi1, err := os.Stat(fn1) + if err != nil { + return false + } + fi2, err := os.Stat(fn2) + if err != nil { + return false + } + return os.SameFile(fi1, fi2) +} +func copyFile(dest, src string) error { + df, err := os.Create(dest) + if err != nil { + return err + } + defer df.Close() + + sf, err := os.Open(src) + if err != nil { + return err + } + defer sf.Close() + + _, err = io.Copy(df, sf) + return err +} + +func TestMain(m *testing.M) { + env := os.Getenv(executableEnvVar) + switch env { + case "": + os.Exit(m.Run()) + case executableEnvValueMatch: + // First chdir to another path. + dir := "/" + if runtime.GOOS == "windows" { + dir = filepath.VolumeName(".") + } + os.Chdir(dir) + if ep, err := Executable(); err != nil { + fmt.Fprint(os.Stderr, "ERROR: ", err) + } else { + fmt.Fprint(os.Stderr, ep) + } + case executableEnvValueDelete: + bb := make([]byte, 1) + var err error + n, err := os.Stdin.Read(bb) + if err != nil { + fmt.Fprint(os.Stderr, "ERROR: ", err) + os.Exit(2) + } + if n != 1 { + fmt.Fprint(os.Stderr, "ERROR: n != 1, n == ", n) + os.Exit(2) + } + if ep, err := Executable(); err != nil { + fmt.Fprint(os.Stderr, "ERROR: ", err) + } else { + fmt.Fprint(os.Stderr, ep) + } + } + os.Exit(0) +} diff --git a/Godeps/_workspace/src/github.com/kardianos/osext/osext_windows.go b/Godeps/_workspace/src/github.com/kardianos/osext/osext_windows.go new file mode 100644 index 00000000000..72d282cf8c0 --- /dev/null +++ b/Godeps/_workspace/src/github.com/kardianos/osext/osext_windows.go @@ -0,0 +1,34 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package osext + +import ( + "syscall" + "unicode/utf16" + "unsafe" +) + +var ( + kernel = syscall.MustLoadDLL("kernel32.dll") + getModuleFileNameProc = kernel.MustFindProc("GetModuleFileNameW") +) + +// GetModuleFileName() with hModule = NULL +func executable() (exePath string, err error) { + return getModuleFileName() +} + +func getModuleFileName() (string, error) { + var n uint32 + b := make([]uint16, syscall.MAX_PATH) + size := uint32(len(b)) + + r0, _, e1 := getModuleFileNameProc.Call(0, uintptr(unsafe.Pointer(&b[0])), uintptr(size)) + n = uint32(r0) + if n == 0 { + return "", e1 + } + return string(utf16.Decode(b[0:n])), nil +} diff --git a/Godeps/_workspace/src/github.com/mesos/mesos-go/auth/callback/interface.go b/Godeps/_workspace/src/github.com/mesos/mesos-go/auth/callback/interface.go new file mode 100644 index 00000000000..d870fd3c729 --- /dev/null +++ b/Godeps/_workspace/src/github.com/mesos/mesos-go/auth/callback/interface.go @@ -0,0 +1,28 @@ +package callback + +import ( + "fmt" +) + +type Unsupported struct { + Callback Interface +} + +func (uc *Unsupported) Error() string { + return fmt.Sprintf("Unsupported callback <%T>: %v", uc.Callback, uc.Callback) +} + +type Interface interface { + // marker interface +} + +type Handler interface { + // may return an Unsupported error on failure + Handle(callbacks ...Interface) error +} + +type HandlerFunc func(callbacks ...Interface) error + +func (f HandlerFunc) Handle(callbacks ...Interface) error { + return f(callbacks...) +} diff --git a/Godeps/_workspace/src/github.com/mesos/mesos-go/auth/callback/interprocess.go b/Godeps/_workspace/src/github.com/mesos/mesos-go/auth/callback/interprocess.go new file mode 100644 index 00000000000..d9b389cafc0 --- /dev/null +++ b/Godeps/_workspace/src/github.com/mesos/mesos-go/auth/callback/interprocess.go @@ -0,0 +1,27 @@ +package callback + +import ( + "github.com/mesos/mesos-go/upid" +) + +type Interprocess struct { + client upid.UPID + server upid.UPID +} + +func NewInterprocess() *Interprocess { + return &Interprocess{} +} + +func (cb *Interprocess) Client() upid.UPID { + return cb.client +} + +func (cb *Interprocess) Server() upid.UPID { + return cb.server +} + +func (cb *Interprocess) Set(server, client upid.UPID) { + cb.server = server + cb.client = client +} diff --git a/Godeps/_workspace/src/github.com/mesos/mesos-go/auth/callback/name.go b/Godeps/_workspace/src/github.com/mesos/mesos-go/auth/callback/name.go new file mode 100644 index 00000000000..246020a9f2a --- /dev/null +++ b/Godeps/_workspace/src/github.com/mesos/mesos-go/auth/callback/name.go @@ -0,0 +1,17 @@ +package callback + +type Name struct { + name string +} + +func NewName() *Name { + return &Name{} +} + +func (cb *Name) Get() string { + return cb.name +} + +func (cb *Name) Set(name string) { + cb.name = name +} diff --git a/Godeps/_workspace/src/github.com/mesos/mesos-go/auth/callback/password.go b/Godeps/_workspace/src/github.com/mesos/mesos-go/auth/callback/password.go new file mode 100644 index 00000000000..6beadd07b42 --- /dev/null +++ b/Godeps/_workspace/src/github.com/mesos/mesos-go/auth/callback/password.go @@ -0,0 +1,20 @@ +package callback + +type Password struct { + password []byte +} + +func NewPassword() *Password { + return &Password{} +} + +func (cb *Password) Get() []byte { + clone := make([]byte, len(cb.password)) + copy(clone, cb.password) + return clone +} + +func (cb *Password) Set(password []byte) { + cb.password = make([]byte, len(password)) + copy(cb.password, password) +} diff --git a/Godeps/_workspace/src/github.com/mesos/mesos-go/auth/interface.go b/Godeps/_workspace/src/github.com/mesos/mesos-go/auth/interface.go new file mode 100644 index 00000000000..94420f5af26 --- /dev/null +++ b/Godeps/_workspace/src/github.com/mesos/mesos-go/auth/interface.go @@ -0,0 +1,63 @@ +package auth + +import ( + "errors" + "fmt" + "sync" + + log "github.com/golang/glog" + "github.com/mesos/mesos-go/auth/callback" + "golang.org/x/net/context" +) + +// SPI interface: login provider implementations support this interface, clients +// do not authenticate against this directly, instead they should use Login() +type Authenticatee interface { + // Returns no errors if successfully authenticated, otherwise a single + // error. + Authenticate(ctx context.Context, handler callback.Handler) error +} + +// Func adapter for interface: allow func's to implement the Authenticatee interface +// as long as the func signature matches +type AuthenticateeFunc func(ctx context.Context, handler callback.Handler) error + +func (f AuthenticateeFunc) Authenticate(ctx context.Context, handler callback.Handler) error { + return f(ctx, handler) +} + +var ( + // Authentication was attempted and failed (likely due to incorrect credentials, too + // many retries within a time window, etc). Distinctly different from authentication + // errors (e.g. network errors, configuration errors, etc). + AuthenticationFailed = errors.New("authentication failed") + + authenticateeProviders = make(map[string]Authenticatee) // authentication providers dict + providerLock sync.Mutex +) + +// Register an authentication provider (aka "login provider"). packages that +// provide Authenticatee implementations should invoke this func in their +// init() to register. +func RegisterAuthenticateeProvider(name string, auth Authenticatee) (err error) { + providerLock.Lock() + defer providerLock.Unlock() + + if _, found := authenticateeProviders[name]; found { + err = fmt.Errorf("authentication provider already registered: %v", name) + } else { + authenticateeProviders[name] = auth + log.V(1).Infof("registered authentication provider: %v", name) + } + return +} + +// Look up an authentication provider by name, returns non-nil and true if such +// a provider is found. +func getAuthenticateeProvider(name string) (provider Authenticatee, ok bool) { + providerLock.Lock() + defer providerLock.Unlock() + + provider, ok = authenticateeProviders[name] + return +} diff --git a/Godeps/_workspace/src/github.com/mesos/mesos-go/auth/login.go b/Godeps/_workspace/src/github.com/mesos/mesos-go/auth/login.go new file mode 100644 index 00000000000..416c2d61274 --- /dev/null +++ b/Godeps/_workspace/src/github.com/mesos/mesos-go/auth/login.go @@ -0,0 +1,80 @@ +package auth + +import ( + "errors" + "fmt" + + "github.com/mesos/mesos-go/auth/callback" + "github.com/mesos/mesos-go/upid" + "golang.org/x/net/context" +) + +var ( + // No login provider name has been specified in a context.Context + NoLoginProviderName = errors.New("missing login provider name in context") +) + +// Main client entrypoint into the authentication APIs: clients are expected to +// invoke this func with a context containing a login provider name value. +// This may be written as: +// providerName := ... // the user has probably configured this via some flag +// handler := ... // handlers provide data like usernames and passwords +// ctx := ... // obtain some initial or timed context +// err := auth.Login(auth.WithLoginProvider(ctx, providerName), handler) +func Login(ctx context.Context, handler callback.Handler) error { + name, ok := LoginProviderFrom(ctx) + if !ok { + return NoLoginProviderName + } + provider, ok := getAuthenticateeProvider(name) + if !ok { + return fmt.Errorf("unrecognized login provider name in context: %s", name) + } + return provider.Authenticate(ctx, handler) +} + +// Unexported key type, avoids conflicts with other context-using packages. All +// context items registered from this package should use keys of this type. +type loginKeyType int + +const ( + loginProviderNameKey loginKeyType = iota // name of login provider to use + parentUpidKey // upid.UPID of some parent process +) + +// Return a context that inherits all values from the parent ctx and specifies +// the login provider name given here. Intended to be invoked before calls to +// Login(). +func WithLoginProvider(ctx context.Context, providerName string) context.Context { + return context.WithValue(ctx, loginProviderNameKey, providerName) +} + +// Return the name of the login provider specified in this context. +func LoginProviderFrom(ctx context.Context) (name string, ok bool) { + name, ok = ctx.Value(loginProviderNameKey).(string) + return +} + +// Return the name of the login provider specified in this context, or empty +// string if none. +func LoginProvider(ctx context.Context) string { + name, _ := LoginProviderFrom(ctx) + return name +} + +func WithParentUPID(ctx context.Context, pid upid.UPID) context.Context { + return context.WithValue(ctx, parentUpidKey, pid) +} + +func ParentUPIDFrom(ctx context.Context) (pid upid.UPID, ok bool) { + pid, ok = ctx.Value(parentUpidKey).(upid.UPID) + return +} + +func ParentUPID(ctx context.Context) (upid *upid.UPID) { + if upid, ok := ParentUPIDFrom(ctx); ok { + return &upid + } else { + return nil + } +} diff --git a/Godeps/_workspace/src/github.com/mesos/mesos-go/auth/sasl/authenticatee.go b/Godeps/_workspace/src/github.com/mesos/mesos-go/auth/sasl/authenticatee.go new file mode 100644 index 00000000000..3d60bdb814a --- /dev/null +++ b/Godeps/_workspace/src/github.com/mesos/mesos-go/auth/sasl/authenticatee.go @@ -0,0 +1,358 @@ +package sasl + +import ( + "errors" + "fmt" + "sync/atomic" + + "github.com/gogo/protobuf/proto" + log "github.com/golang/glog" + "github.com/mesos/mesos-go/auth" + "github.com/mesos/mesos-go/auth/callback" + "github.com/mesos/mesos-go/auth/sasl/mech" + mesos "github.com/mesos/mesos-go/mesosproto" + "github.com/mesos/mesos-go/mesosutil/process" + "github.com/mesos/mesos-go/messenger" + "github.com/mesos/mesos-go/upid" + "golang.org/x/net/context" +) + +var ( + UnexpectedAuthenticationMechanisms = errors.New("Unexpected authentication 'mechanisms' received") + UnexpectedAuthenticationStep = errors.New("Unexpected authentication 'step' received") + UnexpectedAuthenticationCompleted = errors.New("Unexpected authentication 'completed' received") + UnexpectedAuthenticatorPid = errors.New("Unexpected authentator pid") // authenticator pid changed mid-process + UnsupportedMechanism = errors.New("failed to identify a compatible mechanism") +) + +type statusType int32 + +const ( + statusReady statusType = iota + statusStarting + statusStepping + _statusTerminal // meta status, should never be assigned: all status types following are "terminal" + statusCompleted + statusFailed + statusError + statusDiscarded + + // this login provider name is automatically registered with the auth package; see init() + ProviderName = "SASL" +) + +type authenticateeProcess struct { + transport messenger.Messenger + client upid.UPID + status statusType + done chan struct{} + err error + mech mech.Interface + stepFn mech.StepFunc + from *upid.UPID + handler callback.Handler +} + +type authenticateeConfig struct { + client upid.UPID // pid of the client we're attempting to authenticate + handler callback.Handler + transport messenger.Messenger // mesos communications transport +} + +type transportFactory interface { + makeTransport() messenger.Messenger +} + +type transportFactoryFunc func() messenger.Messenger + +func (f transportFactoryFunc) makeTransport() messenger.Messenger { + return f() +} + +func init() { + factory := func(ctx context.Context) transportFactoryFunc { + return transportFactoryFunc(func() messenger.Messenger { + parent := auth.ParentUPID(ctx) + if parent == nil { + log.Fatal("expected to have a parent UPID in context") + } + process := process.New("sasl_authenticatee") + tpid := &upid.UPID{ + ID: process.Label(), + Host: parent.Host, + } + return messenger.NewHttpWithBindingAddress(tpid, BindingAddressFrom(ctx)) + }) + } + delegate := auth.AuthenticateeFunc(func(ctx context.Context, handler callback.Handler) error { + if impl, err := makeAuthenticatee(handler, factory(ctx)); err != nil { + return err + } else { + return impl.Authenticate(ctx, handler) + } + }) + if err := auth.RegisterAuthenticateeProvider(ProviderName, delegate); err != nil { + log.Error(err) + } +} + +func (s *statusType) get() statusType { + return statusType(atomic.LoadInt32((*int32)(s))) +} + +func (s *statusType) swap(old, new statusType) bool { + return old != new && atomic.CompareAndSwapInt32((*int32)(s), int32(old), int32(new)) +} + +// build a new authenticatee implementation using the given callbacks and a new transport instance +func makeAuthenticatee(handler callback.Handler, factory transportFactory) (auth.Authenticatee, error) { + + ip := callback.NewInterprocess() + if err := handler.Handle(ip); err != nil { + return nil, err + } + config := &authenticateeConfig{ + client: ip.Client(), + handler: handler, + transport: factory.makeTransport(), + } + return auth.AuthenticateeFunc(func(ctx context.Context, handler callback.Handler) error { + ctx, auth := newAuthenticatee(ctx, config) + auth.authenticate(ctx, ip.Server()) + + select { + case <-ctx.Done(): + return auth.discard(ctx) + case <-auth.done: + return auth.err + } + }), nil +} + +// Terminate the authentication process upon context cancellation; +// only to be called if/when ctx.Done() has been signalled. +func (self *authenticateeProcess) discard(ctx context.Context) error { + err := ctx.Err() + status := statusFrom(ctx) + for ; status < _statusTerminal; status = (&self.status).get() { + if self.terminate(status, statusDiscarded, err) { + break + } + } + return err +} + +func newAuthenticatee(ctx context.Context, config *authenticateeConfig) (context.Context, *authenticateeProcess) { + initialStatus := statusReady + proc := &authenticateeProcess{ + transport: config.transport, + client: config.client, + handler: config.handler, + status: initialStatus, + done: make(chan struct{}), + } + ctx = withStatus(ctx, initialStatus) + err := proc.installHandlers(ctx) + if err == nil { + err = proc.startTransport() + } + if err != nil { + proc.terminate(initialStatus, statusError, err) + } + return ctx, proc +} + +func (self *authenticateeProcess) startTransport() error { + if err := self.transport.Start(); err != nil { + return err + } else { + go func() { + // stop the authentication transport upon termination of the + // authenticator process + select { + case <-self.done: + log.V(2).Infof("stopping authenticator transport: %v", self.transport.UPID()) + self.transport.Stop() + } + }() + } + return nil +} + +// returns true when handlers are installed without error, otherwise terminates the +// authentication process. +func (self *authenticateeProcess) installHandlers(ctx context.Context) error { + + type handlerFn func(ctx context.Context, from *upid.UPID, pbMsg proto.Message) + + withContext := func(f handlerFn) messenger.MessageHandler { + return func(from *upid.UPID, m proto.Message) { + status := (&self.status).get() + if self.from != nil && !self.from.Equal(from) { + self.terminate(status, statusError, UnexpectedAuthenticatorPid) + } else { + f(withStatus(ctx, status), from, m) + } + } + } + + // Anticipate mechanisms and steps from the server + handlers := []struct { + f handlerFn + m proto.Message + }{ + {self.mechanisms, &mesos.AuthenticationMechanismsMessage{}}, + {self.step, &mesos.AuthenticationStepMessage{}}, + {self.completed, &mesos.AuthenticationCompletedMessage{}}, + {self.failed, &mesos.AuthenticationFailedMessage{}}, + {self.errored, &mesos.AuthenticationErrorMessage{}}, + } + for _, h := range handlers { + if err := self.transport.Install(withContext(h.f), h.m); err != nil { + return err + } + } + return nil +} + +// return true if the authentication status was updated (if true, self.done will have been closed) +func (self *authenticateeProcess) terminate(old, new statusType, err error) bool { + if (&self.status).swap(old, new) { + self.err = err + if self.mech != nil { + self.mech.Discard() + } + close(self.done) + return true + } + return false +} + +func (self *authenticateeProcess) authenticate(ctx context.Context, pid upid.UPID) { + status := statusFrom(ctx) + if status != statusReady { + return + } + message := &mesos.AuthenticateMessage{ + Pid: proto.String(self.client.String()), + } + if err := self.transport.Send(ctx, &pid, message); err != nil { + self.terminate(status, statusError, err) + } else { + (&self.status).swap(status, statusStarting) + } +} + +func (self *authenticateeProcess) mechanisms(ctx context.Context, from *upid.UPID, pbMsg proto.Message) { + status := statusFrom(ctx) + if status != statusStarting { + self.terminate(status, statusError, UnexpectedAuthenticationMechanisms) + return + } + + msg, ok := pbMsg.(*mesos.AuthenticationMechanismsMessage) + if !ok { + self.terminate(status, statusError, fmt.Errorf("Expected AuthenticationMechanismsMessage, not %T", pbMsg)) + return + } + + mechanisms := msg.GetMechanisms() + log.Infof("Received SASL authentication mechanisms: %v", mechanisms) + + selectedMech, factory := mech.SelectSupported(mechanisms) + if selectedMech == "" { + self.terminate(status, statusError, UnsupportedMechanism) + return + } + + if m, f, err := factory(self.handler); err != nil { + self.terminate(status, statusError, err) + return + } else { + self.mech = m + self.stepFn = f + self.from = from + } + + // execute initialization step... + nextf, data, err := self.stepFn(self.mech, nil) + if err != nil { + self.terminate(status, statusError, err) + return + } else { + self.stepFn = nextf + } + + message := &mesos.AuthenticationStartMessage{ + Mechanism: proto.String(selectedMech), + Data: proto.String(string(data)), // may be nil, depends on init step + } + + if err := self.transport.Send(ctx, from, message); err != nil { + self.terminate(status, statusError, err) + } else { + (&self.status).swap(status, statusStepping) + } +} + +func (self *authenticateeProcess) step(ctx context.Context, from *upid.UPID, pbMsg proto.Message) { + status := statusFrom(ctx) + if status != statusStepping { + self.terminate(status, statusError, UnexpectedAuthenticationStep) + return + } + + log.Info("Received SASL authentication step") + + msg, ok := pbMsg.(*mesos.AuthenticationStepMessage) + if !ok { + self.terminate(status, statusError, fmt.Errorf("Expected AuthenticationStepMessage, not %T", pbMsg)) + return + } + + input := msg.GetData() + fn, output, err := self.stepFn(self.mech, input) + + if err != nil { + self.terminate(status, statusError, fmt.Errorf("failed to perform authentication step: %v", err)) + return + } + self.stepFn = fn + + // We don't start the client with SASL_SUCCESS_DATA so we may + // need to send one more "empty" message to the server. + message := &mesos.AuthenticationStepMessage{} + if len(output) > 0 { + message.Data = output + } + if err := self.transport.Send(ctx, from, message); err != nil { + self.terminate(status, statusError, err) + } +} + +func (self *authenticateeProcess) completed(ctx context.Context, from *upid.UPID, pbMsg proto.Message) { + status := statusFrom(ctx) + if status != statusStepping { + self.terminate(status, statusError, UnexpectedAuthenticationCompleted) + return + } + + log.Info("Authentication success") + self.terminate(status, statusCompleted, nil) +} + +func (self *authenticateeProcess) failed(ctx context.Context, from *upid.UPID, pbMsg proto.Message) { + status := statusFrom(ctx) + self.terminate(status, statusFailed, auth.AuthenticationFailed) +} + +func (self *authenticateeProcess) errored(ctx context.Context, from *upid.UPID, pbMsg proto.Message) { + var err error + if msg, ok := pbMsg.(*mesos.AuthenticationErrorMessage); !ok { + err = fmt.Errorf("Expected AuthenticationErrorMessage, not %T", pbMsg) + } else { + err = fmt.Errorf("Authentication error: %s", msg.GetError()) + } + status := statusFrom(ctx) + self.terminate(status, statusError, err) +} diff --git a/Godeps/_workspace/src/github.com/mesos/mesos-go/auth/sasl/authenticatee_test.go b/Godeps/_workspace/src/github.com/mesos/mesos-go/auth/sasl/authenticatee_test.go new file mode 100644 index 00000000000..9fd37b6fb96 --- /dev/null +++ b/Godeps/_workspace/src/github.com/mesos/mesos-go/auth/sasl/authenticatee_test.go @@ -0,0 +1,98 @@ +package sasl + +import ( + "testing" + "time" + + "github.com/gogo/protobuf/proto" + "github.com/mesos/mesos-go/auth/callback" + "github.com/mesos/mesos-go/auth/sasl/mech/crammd5" + mesos "github.com/mesos/mesos-go/mesosproto" + "github.com/mesos/mesos-go/messenger" + "github.com/mesos/mesos-go/upid" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + "golang.org/x/net/context" +) + +type MockTransport struct { + *messenger.MockedMessenger +} + +func (m *MockTransport) Send(ctx context.Context, upid *upid.UPID, msg proto.Message) error { + return m.Called(mock.Anything, upid, msg).Error(0) +} + +func TestAuthticatee_validLogin(t *testing.T) { + assert := assert.New(t) + ctx := context.TODO() + client := upid.UPID{ + ID: "someFramework", + Host: "b.net", + Port: "789", + } + server := upid.UPID{ + ID: "serv", + Host: "a.com", + Port: "123", + } + tpid := upid.UPID{ + ID: "sasl_transport", + Host: "g.org", + Port: "456", + } + handler := callback.HandlerFunc(func(cb ...callback.Interface) error { + for _, c := range cb { + switch c := c.(type) { + case *callback.Name: + c.Set("foo") + case *callback.Password: + c.Set([]byte("bar")) + case *callback.Interprocess: + c.Set(server, client) + default: + return &callback.Unsupported{Callback: c} + } + } + return nil + }) + var transport *MockTransport + factory := transportFactoryFunc(func() messenger.Messenger { + transport = &MockTransport{messenger.NewMockedMessenger()} + transport.On("Install").Return(nil) + transport.On("UPID").Return(&tpid) + transport.On("Start").Return(nil) + transport.On("Stop").Return(nil) + transport.On("Send", mock.Anything, &server, &mesos.AuthenticateMessage{ + Pid: proto.String(client.String()), + }).Return(nil).Once() + + transport.On("Send", mock.Anything, &server, &mesos.AuthenticationStartMessage{ + Mechanism: proto.String(crammd5.Name), + Data: proto.String(""), // may be nil, depends on init step + }).Return(nil).Once() + + transport.On("Send", mock.Anything, &server, &mesos.AuthenticationStepMessage{ + Data: []byte(`foo cc7fd96cd80123ea844a7dba29a594ed`), + }).Return(nil).Once() + + go func() { + transport.Recv(&server, &mesos.AuthenticationMechanismsMessage{ + Mechanisms: []string{crammd5.Name}, + }) + transport.Recv(&server, &mesos.AuthenticationStepMessage{ + Data: []byte(`lsd;lfkgjs;dlfkgjs;dfklg`), + }) + transport.Recv(&server, &mesos.AuthenticationCompletedMessage{}) + }() + return transport + }) + login, err := makeAuthenticatee(handler, factory) + assert.Nil(err) + + err = login.Authenticate(ctx, handler) + assert.Nil(err) + assert.NotNil(transport) + time.Sleep(1 * time.Second) // wait for the authenticator to shut down + transport.AssertExpectations(t) +} diff --git a/Godeps/_workspace/src/github.com/mesos/mesos-go/auth/sasl/context.go b/Godeps/_workspace/src/github.com/mesos/mesos-go/auth/sasl/context.go new file mode 100644 index 00000000000..8058ac34e6c --- /dev/null +++ b/Godeps/_workspace/src/github.com/mesos/mesos-go/auth/sasl/context.go @@ -0,0 +1,43 @@ +package sasl + +import ( + "net" + + "golang.org/x/net/context" +) + +// unexported to prevent collisions with context keys defined in +// other packages. +type _key int + +// If this package defined other context keys, they would have +// different integer values. +const ( + statusKey _key = iota + bindingAddressKey // bind address for login-related network ops +) + +func withStatus(ctx context.Context, s statusType) context.Context { + return context.WithValue(ctx, statusKey, s) +} + +func statusFrom(ctx context.Context) statusType { + s, ok := ctx.Value(statusKey).(statusType) + if !ok { + panic("missing status in context") + } + return s +} + +func WithBindingAddress(ctx context.Context, address net.IP) context.Context { + return context.WithValue(ctx, bindingAddressKey, address) +} + +func BindingAddressFrom(ctx context.Context) net.IP { + obj := ctx.Value(bindingAddressKey) + if addr, ok := obj.(net.IP); ok { + return addr + } else { + return nil + } +} diff --git a/Godeps/_workspace/src/github.com/mesos/mesos-go/auth/sasl/mech/crammd5/mechanism.go b/Godeps/_workspace/src/github.com/mesos/mesos-go/auth/sasl/mech/crammd5/mechanism.go new file mode 100644 index 00000000000..d6b4dafa155 --- /dev/null +++ b/Godeps/_workspace/src/github.com/mesos/mesos-go/auth/sasl/mech/crammd5/mechanism.go @@ -0,0 +1,72 @@ +package crammd5 + +import ( + "crypto/hmac" + "crypto/md5" + "encoding/hex" + "errors" + "io" + + log "github.com/golang/glog" + "github.com/mesos/mesos-go/auth/callback" + "github.com/mesos/mesos-go/auth/sasl/mech" +) + +var ( + Name = "CRAM-MD5" // name this mechanism is registered with + + //TODO(jdef) is this a generic SASL error? if so, move it up to mech + challengeDataRequired = errors.New("challenge data may not be empty") +) + +func init() { + mech.Register(Name, newInstance) +} + +type mechanism struct { + handler callback.Handler +} + +func (m *mechanism) Handler() callback.Handler { + return m.handler +} + +func (m *mechanism) Discard() { + // noop +} + +func newInstance(h callback.Handler) (mech.Interface, mech.StepFunc, error) { + m := &mechanism{ + handler: h, + } + fn := func(m mech.Interface, data []byte) (mech.StepFunc, []byte, error) { + // noop: no initialization needed + return challengeResponse, nil, nil + } + return m, fn, nil +} + +// algorithm lifted from wikipedia: http://en.wikipedia.org/wiki/CRAM-MD5 +// except that the SASL mechanism used by Mesos doesn't leverage base64 encoding +func challengeResponse(m mech.Interface, data []byte) (mech.StepFunc, []byte, error) { + if len(data) == 0 { + return mech.IllegalState, nil, challengeDataRequired + } + decoded := string(data) + log.V(4).Infof("challenge(decoded): %s", decoded) // for deep debugging only + + username := callback.NewName() + secret := callback.NewPassword() + + if err := m.Handler().Handle(username, secret); err != nil { + return mech.IllegalState, nil, err + } + hash := hmac.New(md5.New, secret.Get()) + if _, err := io.WriteString(hash, decoded); err != nil { + return mech.IllegalState, nil, err + } + + codes := hex.EncodeToString(hash.Sum(nil)) + msg := username.Get() + " " + codes + return nil, []byte(msg), nil +} diff --git a/Godeps/_workspace/src/github.com/mesos/mesos-go/auth/sasl/mech/interface.go b/Godeps/_workspace/src/github.com/mesos/mesos-go/auth/sasl/mech/interface.go new file mode 100644 index 00000000000..56b53bf56a5 --- /dev/null +++ b/Godeps/_workspace/src/github.com/mesos/mesos-go/auth/sasl/mech/interface.go @@ -0,0 +1,33 @@ +package mech + +import ( + "errors" + + "github.com/mesos/mesos-go/auth/callback" +) + +var ( + IllegalStateErr = errors.New("illegal mechanism state") +) + +type Interface interface { + Handler() callback.Handler + Discard() // clean up resources or sensitive information; idempotent +} + +// return a mechanism and it's initialization step (may be a noop that returns +// a nil data blob and handle to the first "real" challenge step). +type Factory func(h callback.Handler) (Interface, StepFunc, error) + +// StepFunc implementations should never return a nil StepFunc result. This +// helps keep the logic in the SASL authticatee simpler: step functions are +// never nil. Mechanisms that end up an error state (for example, some decoding +// logic fails...) should return a StepFunc that represents an error state. +// Some mechanisms may be able to recover from such. +type StepFunc func(m Interface, data []byte) (StepFunc, []byte, error) + +// reflects an unrecoverable, illegal mechanism state; always returns IllegalState +// as the next step along with an IllegalStateErr +func IllegalState(m Interface, data []byte) (StepFunc, []byte, error) { + return IllegalState, nil, IllegalStateErr +} diff --git a/Godeps/_workspace/src/github.com/mesos/mesos-go/auth/sasl/mech/plugins.go b/Godeps/_workspace/src/github.com/mesos/mesos-go/auth/sasl/mech/plugins.go new file mode 100644 index 00000000000..3642fccbeed --- /dev/null +++ b/Godeps/_workspace/src/github.com/mesos/mesos-go/auth/sasl/mech/plugins.go @@ -0,0 +1,49 @@ +package mech + +import ( + "fmt" + "sync" + + log "github.com/golang/glog" +) + +var ( + mechLock sync.Mutex + supportedMechs = make(map[string]Factory) +) + +func Register(name string, f Factory) error { + mechLock.Lock() + defer mechLock.Unlock() + + if _, found := supportedMechs[name]; found { + return fmt.Errorf("Mechanism registered twice: %s", name) + } + supportedMechs[name] = f + log.V(1).Infof("Registered mechanism %s", name) + return nil +} + +func ListSupported() (list []string) { + mechLock.Lock() + defer mechLock.Unlock() + + for mechname := range supportedMechs { + list = append(list, mechname) + } + return list +} + +func SelectSupported(mechanisms []string) (selectedMech string, factory Factory) { + mechLock.Lock() + defer mechLock.Unlock() + + for _, m := range mechanisms { + if f, ok := supportedMechs[m]; ok { + selectedMech = m + factory = f + break + } + } + return +} diff --git a/Godeps/_workspace/src/github.com/mesos/mesos-go/executor/doc.go b/Godeps/_workspace/src/github.com/mesos/mesos-go/executor/doc.go new file mode 100644 index 00000000000..0f37d2c2237 --- /dev/null +++ b/Godeps/_workspace/src/github.com/mesos/mesos-go/executor/doc.go @@ -0,0 +1,5 @@ +/* +Package executor includes the interfaces of the mesos executor and +the mesos executor driver, as well as an implementation of the driver. +*/ +package executor diff --git a/Godeps/_workspace/src/github.com/mesos/mesos-go/executor/exectype.go b/Godeps/_workspace/src/github.com/mesos/mesos-go/executor/exectype.go new file mode 100644 index 00000000000..1c70b4450af --- /dev/null +++ b/Godeps/_workspace/src/github.com/mesos/mesos-go/executor/exectype.go @@ -0,0 +1,142 @@ +package executor + +import ( + "github.com/mesos/mesos-go/mesosproto" +) + +/** + * Executor callback interface to be implemented by frameworks' executors. Note + * that only one callback will be invoked at a time, so it is not + * recommended that you block within a callback because it may cause a + * deadlock. + * + * Each callback includes an instance to the executor driver that was + * used to run this executor. The driver will not change for the + * duration of an executor (i.e., from the point you do + * ExecutorDriver.Start() to the point that ExecutorDriver.Join() + * returns). This is intended for convenience so that an executor + * doesn't need to store a pointer to the driver itself. + */ +type Executor interface { + /** + * Invoked once the executor driver has been able to successfully + * connect with Mesos. In particular, a scheduler can pass some + * data to its executors through the FrameworkInfo.ExecutorInfo's + * data field. + */ + Registered(ExecutorDriver, *mesosproto.ExecutorInfo, *mesosproto.FrameworkInfo, *mesosproto.SlaveInfo) + + /** + * Invoked when the executor re-registers with a restarted slave. + */ + Reregistered(ExecutorDriver, *mesosproto.SlaveInfo) + + /** + * Invoked when the executor becomes "disconnected" from the slave + * (e.g., the slave is being restarted due to an upgrade). + */ + Disconnected(ExecutorDriver) + + /** + * Invoked when a task has been launched on this executor (initiated + * via SchedulerDriver.LaunchTasks). Note that this task can be realized + * with a goroutine, an external process, or some simple computation, however, + * no other callbacks will be invoked on this executor until this + * callback has returned. + */ + LaunchTask(ExecutorDriver, *mesosproto.TaskInfo) + + /** + * Invoked when a task running within this executor has been killed + * (via SchedulerDriver.KillTask). Note that no status update will + * be sent on behalf of the executor, the executor is responsible + * for creating a new TaskStatus (i.e., with TASK_KILLED) and + * invoking ExecutorDriver.SendStatusUpdate. + */ + KillTask(ExecutorDriver, *mesosproto.TaskID) + + /** + * Invoked when a framework message has arrived for this + * executor. These messages are best effort; do not expect a + * framework message to be retransmitted in any reliable fashion. + */ + FrameworkMessage(ExecutorDriver, string) + + /** + * Invoked when the executor should terminate all of its currently + * running tasks. Note that after Mesos has determined that an + * executor has terminated, any tasks that the executor did not send + * terminal status updates for (e.g., TASK_KILLED, TASK_FINISHED, + * TASK_FAILED, etc) a TASK_LOST status update will be created. + */ + Shutdown(ExecutorDriver) + + /** + * Invoked when a fatal error has occured with the executor and/or + * executor driver. The driver will be aborted BEFORE invoking this + * callback. + */ + Error(ExecutorDriver, string) +} + +/** + * ExecutorDriver interface for connecting an executor to Mesos. This + * interface is used both to manage the executor's lifecycle (start + * it, stop it, or wait for it to finish) and to interact with Mesos + * (e.g., send status updates, send framework messages, etc.). + * A driver method is expected to fail-fast and return an error when possible. + * Other internal errors (or remote error) that occur asynchronously are handled + * using the the Executor.Error() callback. + */ +type ExecutorDriver interface { + /** + * Starts the executor driver. This needs to be called before any + * other driver calls are made. + */ + Start() (mesosproto.Status, error) + + /** + * Stops the executor driver. + */ + Stop() (mesosproto.Status, error) + + /** + * Aborts the driver so that no more callbacks can be made to the + * executor. The semantics of abort and stop have deliberately been + * separated so that code can detect an aborted driver (i.e., via + * the return status of ExecutorDriver.Join, see below), and + * instantiate and start another driver if desired (from within the + * same process ... although this functionality is currently not + * supported for executors). + */ + Abort() (mesosproto.Status, error) + + /** + * Waits for the driver to be stopped or aborted, possibly + * blocking the calling goroutine indefinitely. The return status of + * this function can be used to determine if the driver was aborted + * (see package mesosproto for a description of Status). + */ + Join() (mesosproto.Status, error) + + /** + * Starts and immediately joins (i.e., blocks on) the driver. + */ + Run() (mesosproto.Status, error) + + /** + * Sends a status update to the framework scheduler, retrying as + * necessary until an acknowledgement has been received or the + * executor is terminated (in which case, a TASK_LOST status update + * will be sent). See Scheduler.StatusUpdate for more information + * about status update acknowledgements. + */ + SendStatusUpdate(*mesosproto.TaskStatus) (mesosproto.Status, error) + + /** + * Sends a message to the framework scheduler. These messages are + * best effort; do not expect a framework message to be + * retransmitted in any reliable fashion. + */ + SendFrameworkMessage(string) (mesosproto.Status, error) +} diff --git a/Godeps/_workspace/src/github.com/mesos/mesos-go/executor/executor.go b/Godeps/_workspace/src/github.com/mesos/mesos-go/executor/executor.go new file mode 100644 index 00000000000..05ed98581ee --- /dev/null +++ b/Godeps/_workspace/src/github.com/mesos/mesos-go/executor/executor.go @@ -0,0 +1,583 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package executor + +import ( + "fmt" + "net" + "os" + "sync" + "time" + + "code.google.com/p/go-uuid/uuid" + "github.com/gogo/protobuf/proto" + log "github.com/golang/glog" + "github.com/mesos/mesos-go/mesosproto" + "github.com/mesos/mesos-go/mesosutil" + "github.com/mesos/mesos-go/mesosutil/process" + "github.com/mesos/mesos-go/messenger" + "github.com/mesos/mesos-go/upid" + "golang.org/x/net/context" +) + +type DriverConfig struct { + Executor Executor + HostnameOverride string // optional + BindingAddress net.IP // optional + BindingPort uint16 // optional + NewMessenger func() (messenger.Messenger, error) // optional +} + +// MesosExecutorDriver is a implementation of the ExecutorDriver. +type MesosExecutorDriver struct { + lock sync.RWMutex + self *upid.UPID + exec Executor + stopCh chan struct{} + destroyCh chan struct{} + stopped bool + status mesosproto.Status + messenger messenger.Messenger + slaveUPID *upid.UPID + slaveID *mesosproto.SlaveID + frameworkID *mesosproto.FrameworkID + executorID *mesosproto.ExecutorID + workDir string + connected bool + connection uuid.UUID + local bool // TODO(yifan): Not used yet. + directory string // TODO(yifan): Not used yet. + checkpoint bool + recoveryTimeout time.Duration + updates map[string]*mesosproto.StatusUpdate // Key is a UUID string. TODO(yifan): Not used yet. + tasks map[string]*mesosproto.TaskInfo // Key is a UUID string. TODO(yifan): Not used yet. +} + +// NewMesosExecutorDriver creates a new mesos executor driver. +func NewMesosExecutorDriver(config DriverConfig) (*MesosExecutorDriver, error) { + if config.Executor == nil { + msg := "Executor callback interface cannot be nil." + log.Errorln(msg) + return nil, fmt.Errorf(msg) + } + + hostname := mesosutil.GetHostname(config.HostnameOverride) + newMessenger := config.NewMessenger + if newMessenger == nil { + newMessenger = func() (messenger.Messenger, error) { + process := process.New("executor") + return messenger.ForHostname(process, hostname, config.BindingAddress, config.BindingPort) + } + } + + driver := &MesosExecutorDriver{ + exec: config.Executor, + status: mesosproto.Status_DRIVER_NOT_STARTED, + stopCh: make(chan struct{}), + destroyCh: make(chan struct{}), + stopped: true, + updates: make(map[string]*mesosproto.StatusUpdate), + tasks: make(map[string]*mesosproto.TaskInfo), + workDir: ".", + } + var err error + if driver.messenger, err = newMessenger(); err != nil { + return nil, err + } + if err = driver.init(); err != nil { + log.Errorf("failed to initialize the driver: %v", err) + return nil, err + } + return driver, nil +} + +// init initializes the driver. +func (driver *MesosExecutorDriver) init() error { + log.Infof("Init mesos executor driver\n") + log.Infof("Version: %v\n", mesosutil.MesosVersion) + + // Parse environments. + if err := driver.parseEnviroments(); err != nil { + log.Errorf("Failed to parse environments: %v\n", err) + return err + } + + // Install handlers. + driver.messenger.Install(driver.registered, &mesosproto.ExecutorRegisteredMessage{}) + driver.messenger.Install(driver.reregistered, &mesosproto.ExecutorReregisteredMessage{}) + driver.messenger.Install(driver.reconnect, &mesosproto.ReconnectExecutorMessage{}) + driver.messenger.Install(driver.runTask, &mesosproto.RunTaskMessage{}) + driver.messenger.Install(driver.killTask, &mesosproto.KillTaskMessage{}) + driver.messenger.Install(driver.statusUpdateAcknowledgement, &mesosproto.StatusUpdateAcknowledgementMessage{}) + driver.messenger.Install(driver.frameworkMessage, &mesosproto.FrameworkToExecutorMessage{}) + driver.messenger.Install(driver.shutdown, &mesosproto.ShutdownExecutorMessage{}) + driver.messenger.Install(driver.frameworkError, &mesosproto.FrameworkErrorMessage{}) + return nil +} + +func (driver *MesosExecutorDriver) parseEnviroments() error { + var value string + + value = os.Getenv("MESOS_LOCAL") + if len(value) > 0 { + driver.local = true + } + + value = os.Getenv("MESOS_SLAVE_PID") + if len(value) == 0 { + return fmt.Errorf("Cannot find MESOS_SLAVE_PID in the environment") + } + upid, err := upid.Parse(value) + if err != nil { + log.Errorf("Cannot parse UPID %v\n", err) + return err + } + driver.slaveUPID = upid + + value = os.Getenv("MESOS_SLAVE_ID") + driver.slaveID = &mesosproto.SlaveID{Value: proto.String(value)} + + value = os.Getenv("MESOS_FRAMEWORK_ID") + driver.frameworkID = &mesosproto.FrameworkID{Value: proto.String(value)} + + value = os.Getenv("MESOS_EXECUTOR_ID") + driver.executorID = &mesosproto.ExecutorID{Value: proto.String(value)} + + value = os.Getenv("MESOS_DIRECTORY") + if len(value) > 0 { + driver.workDir = value + } + + value = os.Getenv("MESOS_CHECKPOINT") + if value == "1" { + driver.checkpoint = true + } + // TODO(yifan): Parse the duration. For now just use default. + return nil +} + +// ------------------------- Accessors ----------------------- // +func (driver *MesosExecutorDriver) Status() mesosproto.Status { + driver.lock.RLock() + defer driver.lock.RUnlock() + return driver.status +} +func (driver *MesosExecutorDriver) setStatus(stat mesosproto.Status) { + driver.lock.Lock() + driver.status = stat + driver.lock.Unlock() +} + +func (driver *MesosExecutorDriver) Stopped() bool { + return driver.stopped +} + +func (driver *MesosExecutorDriver) setStopped(val bool) { + driver.lock.Lock() + driver.stopped = val + driver.lock.Unlock() +} + +func (driver *MesosExecutorDriver) Connected() bool { + return driver.connected +} + +func (driver *MesosExecutorDriver) setConnected(val bool) { + driver.lock.Lock() + driver.connected = val + driver.lock.Unlock() +} + +// --------------------- Message Handlers --------------------- // + +func (driver *MesosExecutorDriver) registered(from *upid.UPID, pbMsg proto.Message) { + log.Infoln("Executor driver registered") + + msg := pbMsg.(*mesosproto.ExecutorRegisteredMessage) + slaveID := msg.GetSlaveId() + executorInfo := msg.GetExecutorInfo() + frameworkInfo := msg.GetFrameworkInfo() + slaveInfo := msg.GetSlaveInfo() + + if driver.stopped { + log.Infof("Ignoring registered message from slave %v, because the driver is stopped!\n", slaveID) + return + } + + log.Infof("Registered on slave %v\n", slaveID) + driver.setConnected(true) + driver.connection = uuid.NewUUID() + driver.exec.Registered(driver, executorInfo, frameworkInfo, slaveInfo) +} + +func (driver *MesosExecutorDriver) reregistered(from *upid.UPID, pbMsg proto.Message) { + log.Infoln("Executor driver reregistered") + + msg := pbMsg.(*mesosproto.ExecutorReregisteredMessage) + slaveID := msg.GetSlaveId() + slaveInfo := msg.GetSlaveInfo() + + if driver.stopped { + log.Infof("Ignoring re-registered message from slave %v, because the driver is stopped!\n", slaveID) + return + } + + log.Infof("Re-registered on slave %v\n", slaveID) + driver.setConnected(true) + driver.connection = uuid.NewUUID() + driver.exec.Reregistered(driver, slaveInfo) +} + +func (driver *MesosExecutorDriver) send(upid *upid.UPID, msg proto.Message) error { + //TODO(jdef) should implement timeout here + ctx, cancel := context.WithCancel(context.TODO()) + defer cancel() + + c := make(chan error, 1) + go func() { c <- driver.messenger.Send(ctx, upid, msg) }() + + select { + case <-ctx.Done(): + <-c // wait for Send(...) + return ctx.Err() + case err := <-c: + return err + } +} + +func (driver *MesosExecutorDriver) reconnect(from *upid.UPID, pbMsg proto.Message) { + log.Infoln("Executor driver reconnect") + + msg := pbMsg.(*mesosproto.ReconnectExecutorMessage) + slaveID := msg.GetSlaveId() + + if driver.stopped { + log.Infof("Ignoring reconnect message from slave %v, because the driver is stopped!\n", slaveID) + return + } + + log.Infof("Received reconnect request from slave %v\n", slaveID) + driver.slaveUPID = from + + message := &mesosproto.ReregisterExecutorMessage{ + ExecutorId: driver.executorID, + FrameworkId: driver.frameworkID, + } + // Send all unacknowledged updates. + for _, u := range driver.updates { + message.Updates = append(message.Updates, u) + } + // Send all unacknowledged tasks. + for _, t := range driver.tasks { + message.Tasks = append(message.Tasks, t) + } + // Send the message. + if err := driver.send(driver.slaveUPID, message); err != nil { + log.Errorf("Failed to send %v: %v\n", message, err) + } +} + +func (driver *MesosExecutorDriver) runTask(from *upid.UPID, pbMsg proto.Message) { + log.Infoln("Executor driver runTask") + + msg := pbMsg.(*mesosproto.RunTaskMessage) + task := msg.GetTask() + taskID := task.GetTaskId() + + if driver.stopped { + log.Infof("Ignoring run task message for task %v because the driver is stopped!\n", taskID) + return + } + if _, ok := driver.tasks[taskID.String()]; ok { + log.Fatalf("Unexpected duplicate task %v\n", taskID) + } + + log.Infof("Executor asked to run task '%v'\n", taskID) + driver.tasks[taskID.String()] = task + driver.exec.LaunchTask(driver, task) +} + +func (driver *MesosExecutorDriver) killTask(from *upid.UPID, pbMsg proto.Message) { + log.Infoln("Executor driver killTask") + + msg := pbMsg.(*mesosproto.KillTaskMessage) + taskID := msg.GetTaskId() + + if driver.stopped { + log.Infof("Ignoring kill task message for task %v, because the driver is stopped!\n", taskID) + return + } + + log.Infof("Executor driver is asked to kill task '%v'\n", taskID) + driver.exec.KillTask(driver, taskID) +} + +func (driver *MesosExecutorDriver) statusUpdateAcknowledgement(from *upid.UPID, pbMsg proto.Message) { + log.Infoln("Executor statusUpdateAcknowledgement") + + msg := pbMsg.(*mesosproto.StatusUpdateAcknowledgementMessage) + log.Infof("Receiving status update acknowledgement %v", msg) + + frameworkID := msg.GetFrameworkId() + taskID := msg.GetTaskId() + uuid := uuid.UUID(msg.GetUuid()) + + if driver.stopped { + log.Infof("Ignoring status update acknowledgement %v for task %v of framework %v because the driver is stopped!\n", + uuid, taskID, frameworkID) + } + + // Remove the corresponding update. + delete(driver.updates, uuid.String()) + // Remove the corresponding task. + delete(driver.tasks, taskID.String()) +} + +func (driver *MesosExecutorDriver) frameworkMessage(from *upid.UPID, pbMsg proto.Message) { + log.Infoln("Executor driver received frameworkMessage") + + msg := pbMsg.(*mesosproto.FrameworkToExecutorMessage) + data := msg.GetData() + + if driver.stopped { + log.Infof("Ignoring framework message because the driver is stopped!\n") + return + } + + log.Infof("Executor driver receives framework message\n") + driver.exec.FrameworkMessage(driver, string(data)) +} + +func (driver *MesosExecutorDriver) shutdown(from *upid.UPID, pbMsg proto.Message) { + log.Infoln("Executor driver received shutdown") + + _, ok := pbMsg.(*mesosproto.ShutdownExecutorMessage) + if !ok { + panic("Not a ShutdownExecutorMessage! This should not happen") + } + + if driver.stopped { + log.Infof("Ignoring shutdown message because the driver is stopped!\n") + return + } + + log.Infof("Executor driver is asked to shutdown\n") + + driver.exec.Shutdown(driver) + // driver.Stop() will cause process to eventually stop. + driver.Stop() +} + +func (driver *MesosExecutorDriver) frameworkError(from *upid.UPID, pbMsg proto.Message) { + log.Infoln("Executor driver received error") + + msg := pbMsg.(*mesosproto.FrameworkErrorMessage) + driver.exec.Error(driver, msg.GetMessage()) +} + +// ------------------------ Driver Implementation ----------------- // + +// Start starts the executor driver +func (driver *MesosExecutorDriver) Start() (mesosproto.Status, error) { + log.Infoln("Starting the executor driver") + + if stat := driver.Status(); stat != mesosproto.Status_DRIVER_NOT_STARTED { + return stat, fmt.Errorf("Unable to Start, expecting status %s, but got %s", mesosproto.Status_DRIVER_NOT_STARTED, stat) + } + + driver.setStatus(mesosproto.Status_DRIVER_NOT_STARTED) + driver.setStopped(true) + + // Start the messenger. + if err := driver.messenger.Start(); err != nil { + log.Errorf("Failed to start executor: %v\n", err) + return driver.Status(), err + } + + driver.self = driver.messenger.UPID() + + // Register with slave. + log.V(3).Infoln("Sending Executor registration") + message := &mesosproto.RegisterExecutorMessage{ + FrameworkId: driver.frameworkID, + ExecutorId: driver.executorID, + } + + if err := driver.send(driver.slaveUPID, message); err != nil { + stat := driver.Status() + log.Errorf("Stopping the executor, failed to send %v: %v\n", message, err) + err0 := driver.stop(stat) + if err0 != nil { + log.Errorf("Failed to stop executor: %v\n", err) + return stat, err0 + } + return stat, err + } + driver.setStopped(false) + driver.setStatus(mesosproto.Status_DRIVER_RUNNING) + + log.Infoln("Mesos executor is started with PID=", driver.self.String()) + + return driver.Status(), nil +} + +// Stop stops the driver by sending a 'stopEvent' to the event loop, and +// receives the result from the response channel. +func (driver *MesosExecutorDriver) Stop() (mesosproto.Status, error) { + log.Infoln("Stopping the executor driver") + if stat := driver.Status(); stat != mesosproto.Status_DRIVER_RUNNING { + return stat, fmt.Errorf("Unable to Stop, expecting status %s, but got %s", mesosproto.Status_DRIVER_RUNNING, stat) + } + stopStat := mesosproto.Status_DRIVER_STOPPED + return stopStat, driver.stop(stopStat) +} + +// internal function for stopping the driver and set reason for stopping +// Note that messages inflight or queued will not be processed. +func (driver *MesosExecutorDriver) stop(stopStatus mesosproto.Status) error { + err := driver.messenger.Stop() + defer close(driver.destroyCh) + defer close(driver.stopCh) + + driver.setStatus(stopStatus) + driver.setStopped(true) + + if err != nil { + return err + } + + return nil +} + +// Abort aborts the driver by sending an 'abortEvent' to the event loop, and +// receives the result from the response channel. +func (driver *MesosExecutorDriver) Abort() (mesosproto.Status, error) { + if stat := driver.Status(); stat != mesosproto.Status_DRIVER_RUNNING { + return stat, fmt.Errorf("Unable to Stop, expecting status %s, but got %s", mesosproto.Status_DRIVER_RUNNING, stat) + } + + log.Infoln("Aborting the executor driver") + abortStat := mesosproto.Status_DRIVER_ABORTED + return abortStat, driver.stop(abortStat) +} + +// Join waits for the driver by sending a 'joinEvent' to the event loop, and wait +// on a channel for the notification of driver termination. +func (driver *MesosExecutorDriver) Join() (mesosproto.Status, error) { + log.Infoln("Waiting for the executor driver to stop") + if stat := driver.Status(); stat != mesosproto.Status_DRIVER_RUNNING { + return stat, fmt.Errorf("Unable to Join, expecting status %s, but got %s", mesosproto.Status_DRIVER_RUNNING, stat) + } + <-driver.stopCh // wait for stop signal + return driver.Status(), nil +} + +// Run starts the driver and calls Join() to wait for stop request. +func (driver *MesosExecutorDriver) Run() (mesosproto.Status, error) { + stat, err := driver.Start() + + if err != nil { + return driver.Stop() + } + + if stat != mesosproto.Status_DRIVER_RUNNING { + return stat, fmt.Errorf("Unable to continue to Run, expecting status %s, but got %s", mesosproto.Status_DRIVER_RUNNING, driver.status) + } + + return driver.Join() +} + +// SendStatusUpdate sends status updates to the slave. +func (driver *MesosExecutorDriver) SendStatusUpdate(taskStatus *mesosproto.TaskStatus) (mesosproto.Status, error) { + log.V(3).Infoln("Sending task status update: ", taskStatus.String()) + + if stat := driver.Status(); stat != mesosproto.Status_DRIVER_RUNNING { + return stat, fmt.Errorf("Unable to SendStatusUpdate, expecting driver.status %s, but got %s", mesosproto.Status_DRIVER_RUNNING, stat) + } + + if taskStatus.GetState() == mesosproto.TaskState_TASK_STAGING { + err := fmt.Errorf("Executor is not allowed to send TASK_STAGING status update. Aborting!") + log.Errorln(err) + if err0 := driver.stop(mesosproto.Status_DRIVER_ABORTED); err0 != nil { + log.Errorln("Error while stopping the driver", err0) + } + + return driver.Status(), err + } + + // Set up status update. + update := driver.makeStatusUpdate(taskStatus) + log.Infof("Executor sending status update %v\n", update.String()) + + // Capture the status update. + driver.updates[uuid.UUID(update.GetUuid()).String()] = update + + // Put the status update in the message. + message := &mesosproto.StatusUpdateMessage{ + Update: update, + Pid: proto.String(driver.self.String()), + } + // Send the message. + if err := driver.send(driver.slaveUPID, message); err != nil { + log.Errorf("Failed to send %v: %v\n", message, err) + return driver.status, err + } + + return driver.Status(), nil +} + +func (driver *MesosExecutorDriver) makeStatusUpdate(taskStatus *mesosproto.TaskStatus) *mesosproto.StatusUpdate { + now := float64(time.Now().Unix()) + // Fill in all the fields. + taskStatus.Timestamp = proto.Float64(now) + taskStatus.SlaveId = driver.slaveID + update := &mesosproto.StatusUpdate{ + FrameworkId: driver.frameworkID, + ExecutorId: driver.executorID, + SlaveId: driver.slaveID, + Status: taskStatus, + Timestamp: proto.Float64(now), + Uuid: uuid.NewUUID(), + } + return update +} + +// SendFrameworkMessage sends the framework message by sending a 'sendFrameworkMessageEvent' +// to the event loop, and receives the result from the response channel. +func (driver *MesosExecutorDriver) SendFrameworkMessage(data string) (mesosproto.Status, error) { + log.V(3).Infoln("Sending framework message", string(data)) + + if stat := driver.Status(); stat != mesosproto.Status_DRIVER_RUNNING { + return stat, fmt.Errorf("Unable to SendFrameworkMessage, expecting status %s, but got %s", mesosproto.Status_DRIVER_RUNNING, stat) + } + + message := &mesosproto.ExecutorToFrameworkMessage{ + SlaveId: driver.slaveID, + FrameworkId: driver.frameworkID, + ExecutorId: driver.executorID, + Data: []byte(data), + } + + // Send the message. + if err := driver.send(driver.slaveUPID, message); err != nil { + log.Errorln("Failed to send message %v: %v", message, err) + return driver.status, err + } + return driver.status, nil +} diff --git a/Godeps/_workspace/src/github.com/mesos/mesos-go/executor/executor_intgr_test.go b/Godeps/_workspace/src/github.com/mesos/mesos-go/executor/executor_intgr_test.go new file mode 100644 index 00000000000..38b72731872 --- /dev/null +++ b/Godeps/_workspace/src/github.com/mesos/mesos-go/executor/executor_intgr_test.go @@ -0,0 +1,531 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package executor + +import ( + "io/ioutil" + "net/http" + "net/url" + "os" + "strings" + "sync" + "testing" + "time" + + "code.google.com/p/go-uuid/uuid" + "github.com/gogo/protobuf/proto" + log "github.com/golang/glog" + mesos "github.com/mesos/mesos-go/mesosproto" + util "github.com/mesos/mesos-go/mesosutil" + "github.com/mesos/mesos-go/testutil" + "github.com/stretchr/testify/assert" +) + +// testScuduler is used for testing Schduler callbacks. +type testExecutor struct { + ch chan bool + wg *sync.WaitGroup + t *testing.T +} + +func newTestExecutor(t *testing.T) *testExecutor { + return &testExecutor{ch: make(chan bool), t: t} +} + +func (exec *testExecutor) Registered(driver ExecutorDriver, execinfo *mesos.ExecutorInfo, fwinfo *mesos.FrameworkInfo, slaveinfo *mesos.SlaveInfo) { + log.Infoln("Exec.Registered() called.") + assert.NotNil(exec.t, execinfo) + assert.NotNil(exec.t, fwinfo) + assert.NotNil(exec.t, slaveinfo) + exec.ch <- true +} + +func (exec *testExecutor) Reregistered(driver ExecutorDriver, slaveinfo *mesos.SlaveInfo) { + log.Infoln("Exec.Re-registered() called.") + assert.NotNil(exec.t, slaveinfo) + exec.ch <- true +} + +func (e *testExecutor) Disconnected(ExecutorDriver) {} + +func (exec *testExecutor) LaunchTask(driver ExecutorDriver, taskinfo *mesos.TaskInfo) { + log.Infoln("Exec.LaunchTask() called.") + assert.NotNil(exec.t, taskinfo) + assert.True(exec.t, util.NewTaskID("test-task-001").Equal(taskinfo.TaskId)) + exec.ch <- true +} + +func (exec *testExecutor) KillTask(driver ExecutorDriver, taskid *mesos.TaskID) { + log.Infoln("Exec.KillTask() called.") + assert.NotNil(exec.t, taskid) + assert.True(exec.t, util.NewTaskID("test-task-001").Equal(taskid)) + exec.ch <- true +} + +func (exec *testExecutor) FrameworkMessage(driver ExecutorDriver, message string) { + log.Infoln("Exec.FrameworkMessage() called.") + assert.NotNil(exec.t, message) + assert.Equal(exec.t, "Hello-Test", message) + exec.ch <- true +} + +func (exec *testExecutor) Shutdown(ExecutorDriver) { + log.Infoln("Exec.Shutdown() called.") + exec.ch <- true +} + +func (exec *testExecutor) Error(driver ExecutorDriver, err string) { + log.Infoln("Exec.Error() called.") + log.Infoln("Got error ", err) + driver.Stop() + exec.ch <- true +} + +// ------------------------ Test Functions -------------------- // + +func setTestEnv(t *testing.T) { + assert.NoError(t, os.Setenv("MESOS_FRAMEWORK_ID", frameworkID)) + assert.NoError(t, os.Setenv("MESOS_EXECUTOR_ID", executorID)) +} + +func newIntegrationTestDriver(t *testing.T, exec Executor) *MesosExecutorDriver { + dconfig := DriverConfig{ + Executor: exec, + } + driver, err := NewMesosExecutorDriver(dconfig) + if err != nil { + t.Fatal(err) + } + return driver +} + +func TestExecutorDriverRegisterExecutorMessage(t *testing.T) { + setTestEnv(t) + ch := make(chan bool) + server := testutil.NewMockSlaveHttpServer(t, func(rsp http.ResponseWriter, req *http.Request) { + reqPath, err := url.QueryUnescape(req.URL.String()) + assert.NoError(t, err) + log.Infoln("RCVD request", reqPath) + + data, err := ioutil.ReadAll(req.Body) + if err != nil { + t.Fatalf("Missing RegisteredExecutor data from scheduler.") + } + defer req.Body.Close() + + message := new(mesos.RegisterExecutorMessage) + err = proto.Unmarshal(data, message) + assert.NoError(t, err) + assert.Equal(t, frameworkID, message.GetFrameworkId().GetValue()) + assert.Equal(t, executorID, message.GetExecutorId().GetValue()) + + ch <- true + + rsp.WriteHeader(http.StatusAccepted) + }) + + defer server.Close() + + exec := newTestExecutor(t) + exec.ch = ch + + driver := newIntegrationTestDriver(t, exec) + assert.True(t, driver.stopped) + + stat, err := driver.Start() + assert.NoError(t, err) + assert.False(t, driver.stopped) + assert.Equal(t, mesos.Status_DRIVER_RUNNING, stat) + + select { + case <-ch: + case <-time.After(time.Millisecond * 2): + log.Errorf("Tired of waiting...") + } +} + +func TestExecutorDriverExecutorRegisteredEvent(t *testing.T) { + setTestEnv(t) + ch := make(chan bool) + // Mock Slave process to respond to registration event. + server := testutil.NewMockSlaveHttpServer(t, func(rsp http.ResponseWriter, req *http.Request) { + reqPath, err := url.QueryUnescape(req.URL.String()) + assert.NoError(t, err) + log.Infoln("RCVD request", reqPath) + rsp.WriteHeader(http.StatusAccepted) + }) + + defer server.Close() + + exec := newTestExecutor(t) + exec.ch = ch + exec.t = t + + // start + driver := newIntegrationTestDriver(t, exec) + stat, err := driver.Start() + assert.NoError(t, err) + assert.Equal(t, mesos.Status_DRIVER_RUNNING, stat) + + //simulate sending ExecutorRegisteredMessage from server to exec pid. + pbMsg := &mesos.ExecutorRegisteredMessage{ + ExecutorInfo: util.NewExecutorInfo(util.NewExecutorID(executorID), nil), + FrameworkId: util.NewFrameworkID(frameworkID), + FrameworkInfo: util.NewFrameworkInfo("test", "test-framework", util.NewFrameworkID(frameworkID)), + SlaveId: util.NewSlaveID(slaveID), + SlaveInfo: &mesos.SlaveInfo{Hostname: proto.String("localhost")}, + } + c := testutil.NewMockMesosClient(t, server.PID) + c.SendMessage(driver.self, pbMsg) + assert.True(t, driver.connected) + select { + case <-ch: + case <-time.After(time.Millisecond * 2): + log.Errorf("Tired of waiting...") + } +} + +func TestExecutorDriverExecutorReregisteredEvent(t *testing.T) { + setTestEnv(t) + ch := make(chan bool) + // Mock Slave process to respond to registration event. + server := testutil.NewMockSlaveHttpServer(t, func(rsp http.ResponseWriter, req *http.Request) { + reqPath, err := url.QueryUnescape(req.URL.String()) + assert.NoError(t, err) + log.Infoln("RCVD request", reqPath) + rsp.WriteHeader(http.StatusAccepted) + }) + + defer server.Close() + + exec := newTestExecutor(t) + exec.ch = ch + exec.t = t + + // start + driver := newIntegrationTestDriver(t, exec) + stat, err := driver.Start() + assert.NoError(t, err) + assert.Equal(t, mesos.Status_DRIVER_RUNNING, stat) + + //simulate sending ExecutorRegisteredMessage from server to exec pid. + pbMsg := &mesos.ExecutorReregisteredMessage{ + SlaveId: util.NewSlaveID(slaveID), + SlaveInfo: &mesos.SlaveInfo{Hostname: proto.String("localhost")}, + } + c := testutil.NewMockMesosClient(t, server.PID) + c.SendMessage(driver.self, pbMsg) + assert.True(t, driver.connected) + select { + case <-ch: + case <-time.After(time.Millisecond * 2): + log.Errorf("Tired of waiting...") + } +} + +func TestExecutorDriverReconnectEvent(t *testing.T) { + setTestEnv(t) + ch := make(chan bool) + // Mock Slave process to respond to registration event. + server := testutil.NewMockSlaveHttpServer(t, func(rsp http.ResponseWriter, req *http.Request) { + reqPath, err := url.QueryUnescape(req.URL.String()) + assert.NoError(t, err) + log.Infoln("RCVD request", reqPath) + + // exec registration request + if strings.Contains(reqPath, "RegisterExecutorMessage") { + log.Infoln("Got Executor registration request") + } + + if strings.Contains(reqPath, "ReregisterExecutorMessage") { + log.Infoln("Got Executor Re-registration request") + ch <- true + } + + rsp.WriteHeader(http.StatusAccepted) + }) + + defer server.Close() + + exec := newTestExecutor(t) + exec.t = t + + // start + driver := newIntegrationTestDriver(t, exec) + stat, err := driver.Start() + assert.NoError(t, err) + assert.Equal(t, mesos.Status_DRIVER_RUNNING, stat) + driver.connected = true + + // send "reconnect" event to driver + pbMsg := &mesos.ReconnectExecutorMessage{ + SlaveId: util.NewSlaveID(slaveID), + } + c := testutil.NewMockMesosClient(t, server.PID) + c.SendMessage(driver.self, pbMsg) + + select { + case <-ch: + case <-time.After(time.Millisecond * 2): + log.Errorf("Tired of waiting...") + } + +} + +func TestExecutorDriverRunTaskEvent(t *testing.T) { + setTestEnv(t) + ch := make(chan bool) + // Mock Slave process to respond to registration event. + server := testutil.NewMockSlaveHttpServer(t, func(rsp http.ResponseWriter, req *http.Request) { + reqPath, err := url.QueryUnescape(req.URL.String()) + assert.NoError(t, err) + log.Infoln("RCVD request", reqPath) + rsp.WriteHeader(http.StatusAccepted) + }) + + defer server.Close() + + exec := newTestExecutor(t) + exec.ch = ch + exec.t = t + + // start + driver := newIntegrationTestDriver(t, exec) + stat, err := driver.Start() + assert.NoError(t, err) + assert.Equal(t, mesos.Status_DRIVER_RUNNING, stat) + driver.connected = true + + // send runtask event to driver + pbMsg := &mesos.RunTaskMessage{ + FrameworkId: util.NewFrameworkID(frameworkID), + Framework: util.NewFrameworkInfo( + "test", "test-framework-001", util.NewFrameworkID(frameworkID), + ), + Pid: proto.String(server.PID.String()), + Task: util.NewTaskInfo( + "test-task", + util.NewTaskID("test-task-001"), + util.NewSlaveID(slaveID), + []*mesos.Resource{ + util.NewScalarResource("mem", 112), + util.NewScalarResource("cpus", 2), + }, + ), + } + + c := testutil.NewMockMesosClient(t, server.PID) + c.SendMessage(driver.self, pbMsg) + + select { + case <-ch: + case <-time.After(time.Millisecond * 2): + log.Errorf("Tired of waiting...") + } + +} + +func TestExecutorDriverKillTaskEvent(t *testing.T) { + setTestEnv(t) + ch := make(chan bool) + // Mock Slave process to respond to registration event. + server := testutil.NewMockSlaveHttpServer(t, func(rsp http.ResponseWriter, req *http.Request) { + reqPath, err := url.QueryUnescape(req.URL.String()) + assert.NoError(t, err) + log.Infoln("RCVD request", reqPath) + rsp.WriteHeader(http.StatusAccepted) + }) + + defer server.Close() + + exec := newTestExecutor(t) + exec.ch = ch + exec.t = t + + // start + driver := newIntegrationTestDriver(t, exec) + stat, err := driver.Start() + assert.NoError(t, err) + assert.Equal(t, mesos.Status_DRIVER_RUNNING, stat) + driver.connected = true + + // send runtask event to driver + pbMsg := &mesos.KillTaskMessage{ + FrameworkId: util.NewFrameworkID(frameworkID), + TaskId: util.NewTaskID("test-task-001"), + } + + c := testutil.NewMockMesosClient(t, server.PID) + c.SendMessage(driver.self, pbMsg) + + select { + case <-ch: + case <-time.After(time.Millisecond * 2): + log.Errorf("Tired of waiting...") + } +} + +func TestExecutorDriverStatusUpdateAcknowledgement(t *testing.T) { + setTestEnv(t) + ch := make(chan bool) + // Mock Slave process to respond to registration event. + server := testutil.NewMockSlaveHttpServer(t, func(rsp http.ResponseWriter, req *http.Request) { + reqPath, err := url.QueryUnescape(req.URL.String()) + assert.NoError(t, err) + log.Infoln("RCVD request", reqPath) + rsp.WriteHeader(http.StatusAccepted) + }) + + defer server.Close() + + exec := newTestExecutor(t) + exec.ch = ch + exec.t = t + + // start + driver := newIntegrationTestDriver(t, exec) + stat, err := driver.Start() + assert.NoError(t, err) + assert.Equal(t, mesos.Status_DRIVER_RUNNING, stat) + driver.connected = true + + // send ACK from server + pbMsg := &mesos.StatusUpdateAcknowledgementMessage{ + SlaveId: util.NewSlaveID(slaveID), + FrameworkId: util.NewFrameworkID(frameworkID), + TaskId: util.NewTaskID("test-task-001"), + Uuid: []byte(uuid.NewRandom().String()), + } + + c := testutil.NewMockMesosClient(t, server.PID) + c.SendMessage(driver.self, pbMsg) + <-time.After(time.Millisecond * 2) +} + +func TestExecutorDriverFrameworkToExecutorMessageEvent(t *testing.T) { + setTestEnv(t) + ch := make(chan bool) + // Mock Slave process to respond to registration event. + server := testutil.NewMockSlaveHttpServer(t, func(rsp http.ResponseWriter, req *http.Request) { + reqPath, err := url.QueryUnescape(req.URL.String()) + assert.NoError(t, err) + log.Infoln("RCVD request", reqPath) + rsp.WriteHeader(http.StatusAccepted) + }) + + defer server.Close() + + exec := newTestExecutor(t) + exec.ch = ch + exec.t = t + + // start + driver := newIntegrationTestDriver(t, exec) + stat, err := driver.Start() + assert.NoError(t, err) + assert.Equal(t, mesos.Status_DRIVER_RUNNING, stat) + driver.connected = true + + // send runtask event to driver + pbMsg := &mesos.FrameworkToExecutorMessage{ + SlaveId: util.NewSlaveID(slaveID), + ExecutorId: util.NewExecutorID(executorID), + FrameworkId: util.NewFrameworkID(frameworkID), + Data: []byte("Hello-Test"), + } + + c := testutil.NewMockMesosClient(t, server.PID) + c.SendMessage(driver.self, pbMsg) + + select { + case <-ch: + case <-time.After(time.Millisecond * 2): + log.Errorf("Tired of waiting...") + } +} + +func TestExecutorDriverShutdownEvent(t *testing.T) { + setTestEnv(t) + ch := make(chan bool) + // Mock Slave process to respond to registration event. + server := testutil.NewMockSlaveHttpServer(t, func(rsp http.ResponseWriter, req *http.Request) { + reqPath, err := url.QueryUnescape(req.URL.String()) + assert.NoError(t, err) + log.Infoln("RCVD request", reqPath) + rsp.WriteHeader(http.StatusAccepted) + }) + + defer server.Close() + + exec := newTestExecutor(t) + exec.ch = ch + exec.t = t + + // start + driver := newIntegrationTestDriver(t, exec) + stat, err := driver.Start() + assert.NoError(t, err) + assert.Equal(t, mesos.Status_DRIVER_RUNNING, stat) + driver.connected = true + + // send runtask event to driver + pbMsg := &mesos.ShutdownExecutorMessage{} + + c := testutil.NewMockMesosClient(t, server.PID) + c.SendMessage(driver.self, pbMsg) + + select { + case <-ch: + case <-time.After(time.Millisecond * 5): + log.Errorf("Tired of waiting...") + } + + <-time.After(time.Millisecond * 5) // wait for shutdown to finish. + assert.Equal(t, mesos.Status_DRIVER_STOPPED, driver.Status()) +} + +func TestExecutorDriverError(t *testing.T) { + setTestEnv(t) + // Mock Slave process to respond to registration event. + server := testutil.NewMockSlaveHttpServer(t, func(rsp http.ResponseWriter, req *http.Request) { + reqPath, err := url.QueryUnescape(req.URL.String()) + assert.NoError(t, err) + log.Infoln("RCVD request", reqPath) + rsp.WriteHeader(http.StatusAccepted) + }) + + ch := make(chan bool) + exec := newTestExecutor(t) + exec.ch = ch + exec.t = t + + driver := newIntegrationTestDriver(t, exec) + server.Close() // will cause error + // Run() cause async message processing to start + // Therefore, error-handling will be done via Executor.Error callaback. + stat, err := driver.Run() + assert.NoError(t, err) + assert.Equal(t, mesos.Status_DRIVER_STOPPED, stat) + + select { + case <-ch: + case <-time.After(time.Millisecond * 5): + log.Errorf("Tired of waiting...") + } +} diff --git a/Godeps/_workspace/src/github.com/mesos/mesos-go/executor/executor_test.go b/Godeps/_workspace/src/github.com/mesos/mesos-go/executor/executor_test.go new file mode 100644 index 00000000000..a2894b2c299 --- /dev/null +++ b/Godeps/_workspace/src/github.com/mesos/mesos-go/executor/executor_test.go @@ -0,0 +1,396 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package executor + +import ( + "fmt" + "os" + "testing" + "time" + + "github.com/mesos/mesos-go/healthchecker" + "github.com/mesos/mesos-go/mesosproto" + util "github.com/mesos/mesos-go/mesosutil" + "github.com/mesos/mesos-go/messenger" + "github.com/mesos/mesos-go/upid" + "github.com/stretchr/testify/assert" +) + +var ( + slavePID = "slave(1)@127.0.0.1:8080" + slaveID = "some-slave-id-uuid" + frameworkID = "some-framework-id-uuid" + executorID = "some-executor-id-uuid" +) + +func setEnvironments(t *testing.T, workDir string, checkpoint bool) { + assert.NoError(t, os.Setenv("MESOS_SLAVE_PID", slavePID)) + assert.NoError(t, os.Setenv("MESOS_SLAVE_ID", slaveID)) + assert.NoError(t, os.Setenv("MESOS_FRAMEWORK_ID", frameworkID)) + assert.NoError(t, os.Setenv("MESOS_EXECUTOR_ID", executorID)) + if len(workDir) > 0 { + assert.NoError(t, os.Setenv("MESOS_DIRECTORY", workDir)) + } + if checkpoint { + assert.NoError(t, os.Setenv("MESOS_CHECKPOINT", "1")) + } +} + +func clearEnvironments(t *testing.T) { + assert.NoError(t, os.Setenv("MESOS_SLAVE_PID", "")) + assert.NoError(t, os.Setenv("MESOS_SLAVE_ID", "")) + assert.NoError(t, os.Setenv("MESOS_FRAMEWORK_ID", "")) + assert.NoError(t, os.Setenv("MESOS_EXECUTOR_ID", "")) +} + +func newTestExecutorDriver(t *testing.T, exec Executor) *MesosExecutorDriver { + dconfig := DriverConfig{ + Executor: exec, + } + driver, err := NewMesosExecutorDriver(dconfig) + if err != nil { + t.Fatal(err) + } + return driver +} + +func createTestExecutorDriver(t *testing.T) ( + *MesosExecutorDriver, + *messenger.MockedMessenger, + *healthchecker.MockedHealthChecker) { + + exec := NewMockedExecutor() + + setEnvironments(t, "", false) + driver := newTestExecutorDriver(t, exec) + + messenger := messenger.NewMockedMessenger() + messenger.On("Start").Return(nil) + messenger.On("UPID").Return(&upid.UPID{}) + messenger.On("Send").Return(nil) + messenger.On("Stop").Return(nil) + + checker := healthchecker.NewMockedHealthChecker() + checker.On("Start").Return() + checker.On("Stop").Return() + + driver.messenger = messenger + return driver, messenger, checker +} + +func TestExecutorDriverStartFailedToParseEnvironment(t *testing.T) { + clearEnvironments(t) + exec := NewMockedExecutor() + exec.On("Error").Return(nil) + driver := newTestExecutorDriver(t, exec) + assert.Nil(t, driver) +} + +func TestExecutorDriverStartFailedToStartMessenger(t *testing.T) { + exec := NewMockedExecutor() + + setEnvironments(t, "", false) + driver := newTestExecutorDriver(t, exec) + assert.NotNil(t, driver) + messenger := messenger.NewMockedMessenger() + driver.messenger = messenger + + // Set expections and return values. + messenger.On("Start").Return(fmt.Errorf("messenger failed to start")) + messenger.On("Stop").Return(nil) + + status, err := driver.Start() + assert.Error(t, err) + assert.Equal(t, mesosproto.Status_DRIVER_NOT_STARTED, status) + + messenger.Stop() + + messenger.AssertNumberOfCalls(t, "Start", 1) + messenger.AssertNumberOfCalls(t, "Stop", 1) +} + +func TestExecutorDriverStartFailedToSendRegisterMessage(t *testing.T) { + exec := NewMockedExecutor() + + setEnvironments(t, "", false) + driver := newTestExecutorDriver(t, exec) + messenger := messenger.NewMockedMessenger() + driver.messenger = messenger + + // Set expections and return values. + messenger.On("Start").Return(nil) + messenger.On("UPID").Return(&upid.UPID{}) + messenger.On("Send").Return(fmt.Errorf("messenger failed to send")) + messenger.On("Stop").Return(nil) + + status, err := driver.Start() + assert.Error(t, err) + assert.Equal(t, mesosproto.Status_DRIVER_NOT_STARTED, status) + + messenger.AssertNumberOfCalls(t, "Start", 1) + messenger.AssertNumberOfCalls(t, "UPID", 1) + messenger.AssertNumberOfCalls(t, "Send", 1) + messenger.AssertNumberOfCalls(t, "Stop", 1) +} + +func TestExecutorDriverStartSucceed(t *testing.T) { + setEnvironments(t, "", false) + + exec := NewMockedExecutor() + exec.On("Error").Return(nil) + + driver := newTestExecutorDriver(t, exec) + + messenger := messenger.NewMockedMessenger() + driver.messenger = messenger + messenger.On("Start").Return(nil) + messenger.On("UPID").Return(&upid.UPID{}) + messenger.On("Send").Return(nil) + messenger.On("Stop").Return(nil) + + checker := healthchecker.NewMockedHealthChecker() + checker.On("Start").Return() + checker.On("Stop").Return() + + assert.True(t, driver.stopped) + status, err := driver.Start() + assert.False(t, driver.stopped) + assert.NoError(t, err) + assert.Equal(t, mesosproto.Status_DRIVER_RUNNING, status) + + messenger.AssertNumberOfCalls(t, "Start", 1) + messenger.AssertNumberOfCalls(t, "UPID", 1) + messenger.AssertNumberOfCalls(t, "Send", 1) +} + +func TestExecutorDriverRun(t *testing.T) { + setEnvironments(t, "", false) + + // Set expections and return values. + messenger := messenger.NewMockedMessenger() + messenger.On("Start").Return(nil) + messenger.On("UPID").Return(&upid.UPID{}) + messenger.On("Send").Return(nil) + messenger.On("Stop").Return(nil) + + exec := NewMockedExecutor() + exec.On("Error").Return(nil) + + driver := newTestExecutorDriver(t, exec) + driver.messenger = messenger + assert.True(t, driver.stopped) + + checker := healthchecker.NewMockedHealthChecker() + checker.On("Start").Return() + checker.On("Stop").Return() + + go func() { + stat, err := driver.Run() + assert.NoError(t, err) + assert.Equal(t, mesosproto.Status_DRIVER_STOPPED, stat) + }() + time.Sleep(time.Millisecond * 1) // allow for things to settle + assert.False(t, driver.stopped) + assert.Equal(t, mesosproto.Status_DRIVER_RUNNING, driver.Status()) + + // mannually close it all + driver.setStatus(mesosproto.Status_DRIVER_STOPPED) + close(driver.stopCh) + time.Sleep(time.Millisecond * 1) +} + +func TestExecutorDriverJoin(t *testing.T) { + setEnvironments(t, "", false) + + // Set expections and return values. + messenger := messenger.NewMockedMessenger() + messenger.On("Start").Return(nil) + messenger.On("UPID").Return(&upid.UPID{}) + messenger.On("Send").Return(nil) + messenger.On("Stop").Return(nil) + + exec := NewMockedExecutor() + exec.On("Error").Return(nil) + + driver := newTestExecutorDriver(t, exec) + driver.messenger = messenger + assert.True(t, driver.stopped) + + checker := healthchecker.NewMockedHealthChecker() + checker.On("Start").Return() + checker.On("Stop").Return() + + stat, err := driver.Start() + assert.NoError(t, err) + assert.False(t, driver.stopped) + assert.Equal(t, mesosproto.Status_DRIVER_RUNNING, stat) + + testCh := make(chan mesosproto.Status) + go func() { + stat, _ := driver.Join() + testCh <- stat + }() + + close(driver.stopCh) // manually stopping + stat = <-testCh // when Stop() is called, stat will be DRIVER_STOPPED. + +} + +func TestExecutorDriverAbort(t *testing.T) { + statusChan := make(chan mesosproto.Status) + driver, messenger, _ := createTestExecutorDriver(t) + + assert.True(t, driver.stopped) + stat, err := driver.Start() + assert.False(t, driver.stopped) + assert.NoError(t, err) + assert.Equal(t, mesosproto.Status_DRIVER_RUNNING, stat) + go func() { + st, _ := driver.Join() + statusChan <- st + }() + + stat, err = driver.Abort() + assert.NoError(t, err) + assert.Equal(t, mesosproto.Status_DRIVER_ABORTED, stat) + assert.Equal(t, mesosproto.Status_DRIVER_ABORTED, <-statusChan) + assert.True(t, driver.stopped) + + // Abort for the second time, should return directly. + stat, err = driver.Abort() + assert.Error(t, err) + assert.Equal(t, mesosproto.Status_DRIVER_ABORTED, stat) + stat, err = driver.Stop() + assert.Error(t, err) + assert.Equal(t, mesosproto.Status_DRIVER_ABORTED, stat) + assert.True(t, driver.stopped) + + // Restart should not start. + stat, err = driver.Start() + assert.True(t, driver.stopped) + assert.Error(t, err) + assert.Equal(t, mesosproto.Status_DRIVER_ABORTED, stat) + + messenger.AssertNumberOfCalls(t, "Start", 1) + messenger.AssertNumberOfCalls(t, "UPID", 1) + messenger.AssertNumberOfCalls(t, "Send", 1) + messenger.AssertNumberOfCalls(t, "Stop", 1) +} + +func TestExecutorDriverStop(t *testing.T) { + statusChan := make(chan mesosproto.Status) + driver, messenger, _ := createTestExecutorDriver(t) + + assert.True(t, driver.stopped) + stat, err := driver.Start() + assert.False(t, driver.stopped) + assert.NoError(t, err) + assert.Equal(t, mesosproto.Status_DRIVER_RUNNING, stat) + go func() { + stat, _ := driver.Join() + statusChan <- stat + }() + stat, err = driver.Stop() + assert.NoError(t, err) + assert.Equal(t, mesosproto.Status_DRIVER_STOPPED, stat) + assert.Equal(t, mesosproto.Status_DRIVER_STOPPED, <-statusChan) + assert.True(t, driver.stopped) + + // Stop for the second time, should return directly. + stat, err = driver.Stop() + assert.Error(t, err) + assert.Equal(t, mesosproto.Status_DRIVER_STOPPED, stat) + stat, err = driver.Abort() + assert.Error(t, err) + assert.Equal(t, mesosproto.Status_DRIVER_STOPPED, stat) + assert.True(t, driver.stopped) + + // Restart should not start. + stat, err = driver.Start() + assert.True(t, driver.stopped) + assert.Error(t, err) + assert.Equal(t, mesosproto.Status_DRIVER_STOPPED, stat) + + messenger.AssertNumberOfCalls(t, "Start", 1) + messenger.AssertNumberOfCalls(t, "UPID", 1) + messenger.AssertNumberOfCalls(t, "Send", 1) + messenger.AssertNumberOfCalls(t, "Stop", 1) +} + +func TestExecutorDriverSendStatusUpdate(t *testing.T) { + + driver, _, _ := createTestExecutorDriver(t) + + stat, err := driver.Start() + assert.NoError(t, err) + assert.Equal(t, mesosproto.Status_DRIVER_RUNNING, stat) + driver.connected = true + driver.stopped = false + + taskStatus := util.NewTaskStatus( + util.NewTaskID("test-task-001"), + mesosproto.TaskState_TASK_RUNNING, + ) + + stat, err = driver.SendStatusUpdate(taskStatus) + assert.NoError(t, err) + assert.Equal(t, mesosproto.Status_DRIVER_RUNNING, stat) +} + +func TestExecutorDriverSendStatusUpdateStaging(t *testing.T) { + + driver, _, _ := createTestExecutorDriver(t) + + exec := NewMockedExecutor() + exec.On("Error").Return(nil) + driver.exec = exec + + stat, err := driver.Start() + assert.NoError(t, err) + assert.Equal(t, mesosproto.Status_DRIVER_RUNNING, stat) + driver.connected = true + driver.stopped = false + + taskStatus := util.NewTaskStatus( + util.NewTaskID("test-task-001"), + mesosproto.TaskState_TASK_STAGING, + ) + + stat, err = driver.SendStatusUpdate(taskStatus) + assert.Error(t, err) + assert.Equal(t, mesosproto.Status_DRIVER_ABORTED, stat) +} + +func TestExecutorDriverSendFrameworkMessage(t *testing.T) { + + driver, _, _ := createTestExecutorDriver(t) + + stat, err := driver.SendFrameworkMessage("failed") + assert.Error(t, err) + + stat, err = driver.Start() + assert.NoError(t, err) + assert.Equal(t, mesosproto.Status_DRIVER_RUNNING, stat) + driver.connected = true + driver.stopped = false + + stat, err = driver.SendFrameworkMessage("Testing Mesos") + assert.NoError(t, err) + assert.Equal(t, mesosproto.Status_DRIVER_RUNNING, stat) +} diff --git a/Godeps/_workspace/src/github.com/mesos/mesos-go/executor/mocked_executor.go b/Godeps/_workspace/src/github.com/mesos/mesos-go/executor/mocked_executor.go new file mode 100644 index 00000000000..2b4853f3d55 --- /dev/null +++ b/Godeps/_workspace/src/github.com/mesos/mesos-go/executor/mocked_executor.go @@ -0,0 +1,74 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package executor + +import ( + "github.com/mesos/mesos-go/mesosproto" + "github.com/stretchr/testify/mock" +) + +// MockedExecutor is used for testing the executor driver. +type MockedExecutor struct { + mock.Mock +} + +// NewMockedExecutor returns a mocked executor. +func NewMockedExecutor() *MockedExecutor { + return &MockedExecutor{} +} + +// Registered implements the Registered handler. +func (e *MockedExecutor) Registered(ExecutorDriver, *mesosproto.ExecutorInfo, *mesosproto.FrameworkInfo, *mesosproto.SlaveInfo) { + e.Called() +} + +// Reregistered implements the Reregistered handler. +func (e *MockedExecutor) Reregistered(ExecutorDriver, *mesosproto.SlaveInfo) { + e.Called() +} + +// Disconnected implements the Disconnected handler. +func (e *MockedExecutor) Disconnected(ExecutorDriver) { + e.Called() +} + +// LaunchTask implements the LaunchTask handler. +func (e *MockedExecutor) LaunchTask(ExecutorDriver, *mesosproto.TaskInfo) { + e.Called() +} + +// KillTask implements the KillTask handler. +func (e *MockedExecutor) KillTask(ExecutorDriver, *mesosproto.TaskID) { + e.Called() +} + +// FrameworkMessage implements the FrameworkMessage handler. +func (e *MockedExecutor) FrameworkMessage(ExecutorDriver, string) { + e.Called() +} + +// Shutdown implements the Shutdown handler. +func (e *MockedExecutor) Shutdown(ExecutorDriver) { + e.Called() +} + +// Error implements the Error handler. +func (e *MockedExecutor) Error(ExecutorDriver, string) { + e.Called() +} diff --git a/Godeps/_workspace/src/github.com/mesos/mesos-go/messenger/README.md b/Godeps/_workspace/src/github.com/mesos/mesos-go/messenger/README.md new file mode 100644 index 00000000000..da0673e78a0 --- /dev/null +++ b/Godeps/_workspace/src/github.com/mesos/mesos-go/messenger/README.md @@ -0,0 +1,39 @@ +####Benchmark of the messenger. + +```shell +$ go test -v -run=Benckmark* -bench=. +PASS +BenchmarkMessengerSendSmallMessage 50000 70568 ns/op +BenchmarkMessengerSendMediumMessage 50000 70265 ns/op +BenchmarkMessengerSendBigMessage 50000 72693 ns/op +BenchmarkMessengerSendLargeMessage 50000 72896 ns/op +BenchmarkMessengerSendMixedMessage 50000 72631 ns/op +BenchmarkMessengerSendRecvSmallMessage 20000 78409 ns/op +BenchmarkMessengerSendRecvMediumMessage 20000 80471 ns/op +BenchmarkMessengerSendRecvBigMessage 20000 82629 ns/op +BenchmarkMessengerSendRecvLargeMessage 20000 85987 ns/op +BenchmarkMessengerSendRecvMixedMessage 20000 83678 ns/op +ok github.com/mesos/mesos-go/messenger 115.135s + +$ go test -v -run=Benckmark* -bench=. -cpu=4 -send-routines=4 2>/dev/null +PASS +BenchmarkMessengerSendSmallMessage-4 50000 35529 ns/op +BenchmarkMessengerSendMediumMessage-4 50000 35997 ns/op +BenchmarkMessengerSendBigMessage-4 50000 36871 ns/op +BenchmarkMessengerSendLargeMessage-4 50000 37310 ns/op +BenchmarkMessengerSendMixedMessage-4 50000 37419 ns/op +BenchmarkMessengerSendRecvSmallMessage-4 50000 39320 ns/op +BenchmarkMessengerSendRecvMediumMessage-4 50000 41990 ns/op +BenchmarkMessengerSendRecvBigMessage-4 50000 42157 ns/op +BenchmarkMessengerSendRecvLargeMessage-4 50000 45472 ns/op +BenchmarkMessengerSendRecvMixedMessage-4 50000 47393 ns/op +ok github.com/mesos/mesos-go/messenger 105.173s +``` + +####environment: + +``` +OS: Linux yifan-laptop 3.13.0-32-generic #57-Ubuntu SMP Tue Jul 15 03:51:08 UTC 2014 x86_64 x86_64 x86_64 GNU/Linux +CPU: Intel(R) Core(TM) i5-3210M CPU @ 2.50GHz +MEM: 4G DDR3 1600MHz +``` diff --git a/Godeps/_workspace/src/github.com/mesos/mesos-go/messenger/doc.go b/Godeps/_workspace/src/github.com/mesos/mesos-go/messenger/doc.go new file mode 100644 index 00000000000..3b7bd8147c8 --- /dev/null +++ b/Godeps/_workspace/src/github.com/mesos/mesos-go/messenger/doc.go @@ -0,0 +1,7 @@ +/* +Package messenger includes a messenger and a transporter. +The messenger provides interfaces to send a protobuf message +through the underlying transporter. It also dispatches messages +to installed handlers. +*/ +package messenger diff --git a/Godeps/_workspace/src/github.com/mesos/mesos-go/messenger/http_transporter.go b/Godeps/_workspace/src/github.com/mesos/mesos-go/messenger/http_transporter.go new file mode 100644 index 00000000000..30370b04835 --- /dev/null +++ b/Godeps/_workspace/src/github.com/mesos/mesos-go/messenger/http_transporter.go @@ -0,0 +1,371 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package messenger + +import ( + "bytes" + "fmt" + "github.com/mesos/mesos-go/upid" + "io/ioutil" + "net" + "net/http" + "net/url" + "strings" + "sync" + "sync/atomic" + "syscall" + "time" + + log "github.com/golang/glog" + "golang.org/x/net/context" +) + +var ( + discardOnStopError = fmt.Errorf("discarding message because transport is shutting down") +) + +// HTTPTransporter implements the interfaces of the Transporter. +type HTTPTransporter struct { + // If the host is empty("") then it will listen on localhost. + // If the port is empty("") then it will listen on random port. + upid *upid.UPID + listener net.Listener // TODO(yifan): Change to TCPListener. + mux *http.ServeMux + tr *http.Transport + client *http.Client // TODO(yifan): Set read/write deadline. + messageQueue chan *Message + address net.IP // optional binding address + started chan struct{} + stopped chan struct{} + stopping int32 + lifeLock sync.Mutex // protect lifecycle (start/stop) funcs +} + +// NewHTTPTransporter creates a new http transporter with an optional binding address. +func NewHTTPTransporter(upid *upid.UPID, address net.IP) *HTTPTransporter { + tr := &http.Transport{} + result := &HTTPTransporter{ + upid: upid, + messageQueue: make(chan *Message, defaultQueueSize), + mux: http.NewServeMux(), + client: &http.Client{Transport: tr}, + tr: tr, + address: address, + started: make(chan struct{}), + stopped: make(chan struct{}), + } + close(result.stopped) + return result +} + +// some network errors are probably recoverable, attempt to determine that here. +func isRecoverableError(err error) bool { + if urlErr, ok := err.(*url.Error); ok { + log.V(2).Infof("checking url.Error for recoverability") + return urlErr.Op == "Post" && isRecoverableError(urlErr.Err) + } else if netErr, ok := err.(*net.OpError); ok && netErr.Err != nil { + log.V(2).Infof("checking net.OpError for recoverability: %#v", err) + if netErr.Temporary() { + return true + } + //TODO(jdef) this is pretty hackish, there's probably a better way + return (netErr.Op == "dial" && netErr.Net == "tcp" && netErr.Err == syscall.ECONNREFUSED) + } + log.V(2).Infof("unrecoverable error: %#v", err) + return false +} + +type recoverableError struct { + Err error +} + +func (e *recoverableError) Error() string { + if e == nil { + return "" + } + return e.Err.Error() +} + +// Send sends the message to its specified upid. +func (t *HTTPTransporter) Send(ctx context.Context, msg *Message) (sendError error) { + log.V(2).Infof("Sending message to %v via http\n", msg.UPID) + req, err := t.makeLibprocessRequest(msg) + if err != nil { + log.Errorf("Failed to make libprocess request: %v\n", err) + return err + } + duration := 1 * time.Second + for attempt := 0; attempt < 5; attempt++ { //TODO(jdef) extract/parameterize constant + if sendError != nil { + duration *= 2 + log.Warningf("attempting to recover from error '%v', waiting before retry: %v", sendError, duration) + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(duration): + // ..retry request, continue + case <-t.stopped: + return discardOnStopError + } + } + sendError = t.httpDo(ctx, req, func(resp *http.Response, err error) error { + if err != nil { + if isRecoverableError(err) { + return &recoverableError{Err: err} + } + log.Infof("Failed to POST: %v\n", err) + return err + } + defer resp.Body.Close() + + // ensure master acknowledgement. + if (resp.StatusCode != http.StatusOK) && + (resp.StatusCode != http.StatusAccepted) { + msg := fmt.Sprintf("Master %s rejected %s. Returned status %s.", + msg.UPID, msg.RequestURI(), resp.Status) + log.Warning(msg) + return fmt.Errorf(msg) + } + return nil + }) + if sendError == nil { + // success + return + } else if _, ok := sendError.(*recoverableError); ok { + // recoverable, attempt backoff? + continue + } + // unrecoverable + break + } + if recoverable, ok := sendError.(*recoverableError); ok { + sendError = recoverable.Err + } + return +} + +func (t *HTTPTransporter) httpDo(ctx context.Context, req *http.Request, f func(*http.Response, error) error) error { + select { + case <-ctx.Done(): + return ctx.Err() + case <-t.stopped: + return discardOnStopError + default: // continue + } + + c := make(chan error, 1) + go func() { c <- f(t.client.Do(req)) }() + select { + case <-ctx.Done(): + t.tr.CancelRequest(req) + <-c // Wait for f to return. + return ctx.Err() + case err := <-c: + return err + case <-t.stopped: + t.tr.CancelRequest(req) + <-c // Wait for f to return. + return discardOnStopError + } +} + +// Recv returns the message, one at a time. +func (t *HTTPTransporter) Recv() (*Message, error) { + select { + default: + select { + case msg := <-t.messageQueue: + return msg, nil + case <-t.stopped: + } + case <-t.stopped: + } + return nil, discardOnStopError +} + +//Inject places a message into the incoming message queue. +func (t *HTTPTransporter) Inject(ctx context.Context, msg *Message) error { + select { + case <-ctx.Done(): + return ctx.Err() + case <-t.stopped: + return discardOnStopError + default: // continue + } + + select { + case t.messageQueue <- msg: + return nil + case <-ctx.Done(): + return ctx.Err() + case <-t.stopped: + return discardOnStopError + } +} + +// Install the request URI according to the message's name. +func (t *HTTPTransporter) Install(msgName string) { + requestURI := fmt.Sprintf("/%s/%s", t.upid.ID, msgName) + t.mux.HandleFunc(requestURI, t.messageHandler) +} + +// Listen starts listen on UPID. If UPID is empty, the transporter +// will listen on a random port, and then fill the UPID with the +// host:port it is listening. +func (t *HTTPTransporter) listen() error { + var host string + if t.address != nil { + host = t.address.String() + } else { + host = t.upid.Host + } + port := t.upid.Port + // NOTE: Explicitly specifies IPv4 because Libprocess + // only supports IPv4 for now. + ln, err := net.Listen("tcp4", net.JoinHostPort(host, port)) + if err != nil { + log.Errorf("HTTPTransporter failed to listen: %v\n", err) + return err + } + // Save the host:port in case they are not specified in upid. + host, port, _ = net.SplitHostPort(ln.Addr().String()) + t.upid.Host, t.upid.Port = host, port + t.listener = ln + return nil +} + +// Start starts the http transporter +func (t *HTTPTransporter) Start() <-chan error { + t.lifeLock.Lock() + defer t.lifeLock.Unlock() + + select { + case <-t.started: + // already started + return nil + case <-t.stopped: + defer close(t.started) + t.stopped = make(chan struct{}) + atomic.StoreInt32(&t.stopping, 0) + default: + panic("not started, not stopped, what am i? how can i start?") + } + + ch := make(chan error, 1) + if err := t.listen(); err != nil { + ch <- err + } else { + // TODO(yifan): Set read/write deadline. + log.Infof("http transport listening on %v", t.listener.Addr()) + go func() { + err := http.Serve(t.listener, t.mux) + if atomic.CompareAndSwapInt32(&t.stopping, 1, 0) { + ch <- nil + } else { + ch <- err + } + }() + } + return ch +} + +// Stop stops the http transporter by closing the listener. +func (t *HTTPTransporter) Stop(graceful bool) error { + t.lifeLock.Lock() + defer t.lifeLock.Unlock() + + select { + case <-t.stopped: + // already stopped + return nil + case <-t.started: + defer close(t.stopped) + t.started = make(chan struct{}) + default: + panic("not started, not stopped, what am i? how can i stop?") + } + //TODO(jdef) if graceful, wait for pending requests to terminate + atomic.StoreInt32(&t.stopping, 1) + err := t.listener.Close() + return err +} + +// UPID returns the upid of the transporter. +func (t *HTTPTransporter) UPID() *upid.UPID { + return t.upid +} + +func (t *HTTPTransporter) messageHandler(w http.ResponseWriter, r *http.Request) { + // Verify it's a libprocess request. + from, err := getLibprocessFrom(r) + if err != nil { + log.Errorf("Ignoring the request, because it's not a libprocess request: %v\n", err) + w.WriteHeader(http.StatusBadRequest) + return + } + data, err := ioutil.ReadAll(r.Body) + if err != nil { + log.Errorf("Failed to read HTTP body: %v\n", err) + w.WriteHeader(http.StatusBadRequest) + return + } + log.V(2).Infof("Receiving message from %v, length %v\n", from, len(data)) + w.WriteHeader(http.StatusAccepted) + t.messageQueue <- &Message{ + UPID: from, + Name: extractNameFromRequestURI(r.RequestURI), + Bytes: data, + } +} + +func (t *HTTPTransporter) makeLibprocessRequest(msg *Message) (*http.Request, error) { + if msg.UPID == nil { + panic(fmt.Sprintf("message is missing UPID: %+v", msg)) + } + hostport := net.JoinHostPort(msg.UPID.Host, msg.UPID.Port) + targetURL := fmt.Sprintf("http://%s%s", hostport, msg.RequestURI()) + log.V(2).Infof("libproc target URL %s", targetURL) + req, err := http.NewRequest("POST", targetURL, bytes.NewReader(msg.Bytes)) + if err != nil { + log.Errorf("Failed to create request: %v\n", err) + return nil, err + } + req.Header.Add("Libprocess-From", t.upid.String()) + req.Header.Add("Content-Type", "application/x-protobuf") + req.Header.Add("Connection", "Keep-Alive") + + return req, nil +} + +func getLibprocessFrom(r *http.Request) (*upid.UPID, error) { + if r.Method != "POST" { + return nil, fmt.Errorf("Not a POST request") + } + ua, ok := r.Header["User-Agent"] + if ok && strings.HasPrefix(ua[0], "libprocess/") { + // TODO(yifan): Just take the first field for now. + return upid.Parse(ua[0][len("libprocess/"):]) + } + lf, ok := r.Header["Libprocess-From"] + if ok { + // TODO(yifan): Just take the first field for now. + return upid.Parse(lf[0]) + } + return nil, fmt.Errorf("Cannot find 'User-Agent' or 'Libprocess-From'") +} diff --git a/Godeps/_workspace/src/github.com/mesos/mesos-go/messenger/http_transporter_test.go b/Godeps/_workspace/src/github.com/mesos/mesos-go/messenger/http_transporter_test.go new file mode 100644 index 00000000000..e1d14096526 --- /dev/null +++ b/Godeps/_workspace/src/github.com/mesos/mesos-go/messenger/http_transporter_test.go @@ -0,0 +1,273 @@ +package messenger + +import ( + "fmt" + "net/http" + "net/http/httptest" + "regexp" + "strconv" + "testing" + "time" + + "github.com/mesos/mesos-go/messenger/testmessage" + "github.com/mesos/mesos-go/upid" + "github.com/stretchr/testify/assert" + "golang.org/x/net/context" +) + +func TestTransporterNew(t *testing.T) { + id, err := upid.Parse(fmt.Sprintf("mesos1@localhost:%d", getNewPort())) + assert.NoError(t, err) + trans := NewHTTPTransporter(id, nil) + assert.NotNil(t, trans) + assert.NotNil(t, trans.upid) + assert.NotNil(t, trans.messageQueue) + assert.NotNil(t, trans.client) +} + +func TestTransporterSend(t *testing.T) { + idreg := regexp.MustCompile(`[A-Za-z0-9_\-]+@[A-Za-z0-9_\-\.]+:[0-9]+`) + serverId := "testserver" + + // setup mesos client-side + fromUpid, err := upid.Parse(fmt.Sprintf("mesos1@localhost:%d", getNewPort())) + assert.NoError(t, err) + + protoMsg := testmessage.GenerateSmallMessage() + msgName := getMessageName(protoMsg) + msg := &Message{ + Name: msgName, + ProtoMessage: protoMsg, + } + requestURI := fmt.Sprintf("/%s/%s", serverId, msgName) + + // setup server-side + msgReceived := make(chan struct{}) + srv := makeMockServer(requestURI, func(rsp http.ResponseWriter, req *http.Request) { + defer close(msgReceived) + from := req.Header.Get("Libprocess-From") + assert.NotEmpty(t, from) + assert.True(t, idreg.MatchString(from), fmt.Sprintf("regexp failed for '%v'", from)) + }) + defer srv.Close() + toUpid, err := upid.Parse(fmt.Sprintf("%s@%s", serverId, srv.Listener.Addr().String())) + assert.NoError(t, err) + + // make transport call. + transport := NewHTTPTransporter(fromUpid, nil) + errch := transport.Start() + defer transport.Stop(false) + + msg.UPID = toUpid + err = transport.Send(context.TODO(), msg) + assert.NoError(t, err) + + select { + case <-time.After(2 * time.Second): + t.Fatalf("timed out waiting for message receipt") + case <-msgReceived: + case err := <-errch: + if err != nil { + t.Fatalf(err.Error()) + } + } +} + +func TestTransporter_DiscardedSend(t *testing.T) { + serverId := "testserver" + + // setup mesos client-side + fromUpid, err := upid.Parse(fmt.Sprintf("mesos1@localhost:%d", getNewPort())) + assert.NoError(t, err) + + protoMsg := testmessage.GenerateSmallMessage() + msgName := getMessageName(protoMsg) + msg := &Message{ + Name: msgName, + ProtoMessage: protoMsg, + } + requestURI := fmt.Sprintf("/%s/%s", serverId, msgName) + + // setup server-side + msgReceived := make(chan struct{}) + srv := makeMockServer(requestURI, func(rsp http.ResponseWriter, req *http.Request) { + close(msgReceived) + time.Sleep(2 * time.Second) // long enough that we should be able to stop it + }) + defer srv.Close() + toUpid, err := upid.Parse(fmt.Sprintf("%s@%s", serverId, srv.Listener.Addr().String())) + assert.NoError(t, err) + + // make transport call. + transport := NewHTTPTransporter(fromUpid, nil) + errch := transport.Start() + defer transport.Stop(false) + + msg.UPID = toUpid + senderr := make(chan struct{}) + go func() { + defer close(senderr) + err = transport.Send(context.TODO(), msg) + assert.NotNil(t, err) + assert.Equal(t, discardOnStopError, err) + }() + + // wait for message to be received + select { + case <-time.After(2 * time.Second): + t.Fatalf("timed out waiting for message receipt") + return + case <-msgReceived: + transport.Stop(false) + case err := <-errch: + if err != nil { + t.Fatalf(err.Error()) + return + } + } + + // wait for send() to process discarded-error + select { + case <-time.After(5 * time.Second): + t.Fatalf("timed out waiting for aborted send") + return + case <-senderr: // continue + } +} + +func TestTransporterStartAndRcvd(t *testing.T) { + serverId := "testserver" + serverPort := getNewPort() + serverAddr := "127.0.0.1:" + strconv.Itoa(serverPort) + protoMsg := testmessage.GenerateSmallMessage() + msgName := getMessageName(protoMsg) + ctrl := make(chan struct{}) + + // setup receiver (server) process + rcvPid, err := upid.Parse(fmt.Sprintf("%s@%s", serverId, serverAddr)) + assert.NoError(t, err) + receiver := NewHTTPTransporter(rcvPid, nil) + receiver.Install(msgName) + + go func() { + defer close(ctrl) + msg, err := receiver.Recv() + assert.Nil(t, err) + assert.NotNil(t, msg) + if msg != nil { + assert.Equal(t, msgName, msg.Name) + } + }() + + errch := receiver.Start() + defer receiver.Stop(false) + assert.NotNil(t, errch) + + time.Sleep(time.Millisecond * 7) // time to catchup + + // setup sender (client) process + sndUpid, err := upid.Parse(fmt.Sprintf("mesos1@localhost:%d", getNewPort())) + assert.NoError(t, err) + + sender := NewHTTPTransporter(sndUpid, nil) + msg := &Message{ + UPID: rcvPid, + Name: msgName, + ProtoMessage: protoMsg, + } + errch2 := sender.Start() + defer sender.Stop(false) + + sender.Send(context.TODO(), msg) + + select { + case <-time.After(time.Second * 5): + t.Fatalf("Timeout") + case <-ctrl: + case err := <-errch: + if err != nil { + t.Fatalf(err.Error()) + } + case err := <-errch2: + if err != nil { + t.Fatalf(err.Error()) + } + } +} + +func TestTransporterStartAndInject(t *testing.T) { + serverId := "testserver" + serverPort := getNewPort() + serverAddr := "127.0.0.1:" + strconv.Itoa(serverPort) + protoMsg := testmessage.GenerateSmallMessage() + msgName := getMessageName(protoMsg) + ctrl := make(chan struct{}) + + // setup receiver (server) process + rcvPid, err := upid.Parse(fmt.Sprintf("%s@%s", serverId, serverAddr)) + assert.NoError(t, err) + receiver := NewHTTPTransporter(rcvPid, nil) + receiver.Install(msgName) + errch := receiver.Start() + defer receiver.Stop(false) + + msg := &Message{ + UPID: rcvPid, + Name: msgName, + ProtoMessage: protoMsg, + } + + receiver.Inject(context.TODO(), msg) + + go func() { + defer close(ctrl) + msg, err := receiver.Recv() + assert.Nil(t, err) + assert.NotNil(t, msg) + if msg != nil { + assert.Equal(t, msgName, msg.Name) + } + }() + + select { + case <-time.After(time.Second * 1): + t.Fatalf("Timeout") + case <-ctrl: + case err := <-errch: + if err != nil { + t.Fatalf(err.Error()) + } + } +} + +func TestTransporterStartAndStop(t *testing.T) { + serverId := "testserver" + serverPort := getNewPort() + serverAddr := "127.0.0.1:" + strconv.Itoa(serverPort) + + // setup receiver (server) process + rcvPid, err := upid.Parse(fmt.Sprintf("%s@%s", serverId, serverAddr)) + assert.NoError(t, err) + receiver := NewHTTPTransporter(rcvPid, nil) + + errch := receiver.Start() + assert.NotNil(t, errch) + + time.Sleep(1 * time.Second) + receiver.Stop(false) + + select { + case <-time.After(2 * time.Second): + t.Fatalf("timed out waiting for transport to stop") + case err := <-errch: + if err != nil { + t.Fatalf(err.Error()) + } + } +} + +func makeMockServer(path string, handler func(rsp http.ResponseWriter, req *http.Request)) *httptest.Server { + mux := http.NewServeMux() + mux.HandleFunc(path, handler) + return httptest.NewServer(mux) +} diff --git a/Godeps/_workspace/src/github.com/mesos/mesos-go/messenger/message.go b/Godeps/_workspace/src/github.com/mesos/mesos-go/messenger/message.go new file mode 100644 index 00000000000..331317f45cd --- /dev/null +++ b/Godeps/_workspace/src/github.com/mesos/mesos-go/messenger/message.go @@ -0,0 +1,45 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package messenger + +import ( + "fmt" + "strings" + + "github.com/gogo/protobuf/proto" + "github.com/mesos/mesos-go/upid" +) + +// Message defines the type that passes in the Messenger. +type Message struct { + UPID *upid.UPID + Name string + ProtoMessage proto.Message + Bytes []byte +} + +// RequestURI returns the request URI of the message. +func (m *Message) RequestURI() string { + return fmt.Sprintf("/%s/%s", m.UPID.ID, m.Name) +} + +// NOTE: This should not fail or panic. +func extractNameFromRequestURI(requestURI string) string { + return strings.Split(requestURI, "/")[2] +} diff --git a/Godeps/_workspace/src/github.com/mesos/mesos-go/messenger/messenger.go b/Godeps/_workspace/src/github.com/mesos/mesos-go/messenger/messenger.go new file mode 100644 index 00000000000..5b242e5bce3 --- /dev/null +++ b/Godeps/_workspace/src/github.com/mesos/mesos-go/messenger/messenger.go @@ -0,0 +1,357 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package messenger + +import ( + "flag" + "fmt" + "net" + "reflect" + "strconv" + "time" + + "github.com/gogo/protobuf/proto" + log "github.com/golang/glog" + mesos "github.com/mesos/mesos-go/mesosproto" + "github.com/mesos/mesos-go/mesosutil/process" + "github.com/mesos/mesos-go/upid" + "golang.org/x/net/context" +) + +const ( + defaultQueueSize = 1024 + preparePeriod = time.Second * 1 +) + +var ( + sendRoutines int + encodeRoutines int + decodeRoutines int +) + +func init() { + flag.IntVar(&sendRoutines, "send-routines", 1, "Number of network sending routines") + flag.IntVar(&encodeRoutines, "encode-routines", 1, "Number of encoding routines") + flag.IntVar(&decodeRoutines, "decode-routines", 1, "Number of decoding routines") +} + +// MessageHandler is the callback of the message. When the callback +// is invoked, the sender's upid and the message is passed to the callback. +type MessageHandler func(from *upid.UPID, pbMsg proto.Message) + +// Messenger defines the interfaces that should be implemented. +type Messenger interface { + Install(handler MessageHandler, msg proto.Message) error + Send(ctx context.Context, upid *upid.UPID, msg proto.Message) error + Route(ctx context.Context, from *upid.UPID, msg proto.Message) error + Start() error + Stop() error + UPID() *upid.UPID +} + +// MesosMessenger is an implementation of the Messenger interface. +type MesosMessenger struct { + upid *upid.UPID + encodingQueue chan *Message + sendingQueue chan *Message + installedMessages map[string]reflect.Type + installedHandlers map[string]MessageHandler + stop chan struct{} + tr Transporter +} + +// create a new default messenger (HTTP). If a non-nil, non-wildcard bindingAddress is +// specified then it will be used for both the UPID and Transport binding address. Otherwise +// hostname is resolved to an IP address and the UPID.Host is set to that address and the +// bindingAddress is passed through to the Transport. +func ForHostname(proc *process.Process, hostname string, bindingAddress net.IP, port uint16) (Messenger, error) { + upid := &upid.UPID{ + ID: proc.Label(), + Port: strconv.Itoa(int(port)), + } + if bindingAddress != nil && "0.0.0.0" != bindingAddress.String() { + upid.Host = bindingAddress.String() + } else { + ips, err := net.LookupIP(hostname) + if err != nil { + return nil, err + } + // try to find an ipv4 and use that + ip := net.IP(nil) + for _, addr := range ips { + if ip = addr.To4(); ip != nil { + break + } + } + if ip == nil { + // no ipv4? best guess, just take the first addr + if len(ips) > 0 { + ip = ips[0] + log.Warningf("failed to find an IPv4 address for '%v', best guess is '%v'", hostname, ip) + } else { + return nil, fmt.Errorf("failed to determine IP address for host '%v'", hostname) + } + } + upid.Host = ip.String() + } + return NewHttpWithBindingAddress(upid, bindingAddress), nil +} + +// NewMesosMessenger creates a new mesos messenger. +func NewHttp(upid *upid.UPID) *MesosMessenger { + return NewHttpWithBindingAddress(upid, nil) +} + +func NewHttpWithBindingAddress(upid *upid.UPID, address net.IP) *MesosMessenger { + return New(upid, NewHTTPTransporter(upid, address)) +} + +func New(upid *upid.UPID, t Transporter) *MesosMessenger { + return &MesosMessenger{ + upid: upid, + encodingQueue: make(chan *Message, defaultQueueSize), + sendingQueue: make(chan *Message, defaultQueueSize), + installedMessages: make(map[string]reflect.Type), + installedHandlers: make(map[string]MessageHandler), + tr: t, + } +} + +/// Install installs the handler with the given message. +func (m *MesosMessenger) Install(handler MessageHandler, msg proto.Message) error { + // Check if the message is a pointer. + mtype := reflect.TypeOf(msg) + if mtype.Kind() != reflect.Ptr { + return fmt.Errorf("Message %v is not a Ptr type", msg) + } + + // Check if the message is already installed. + name := getMessageName(msg) + if _, ok := m.installedMessages[name]; ok { + return fmt.Errorf("Message %v is already installed", name) + } + m.installedMessages[name] = mtype.Elem() + m.installedHandlers[name] = handler + m.tr.Install(name) + return nil +} + +// Send puts a message into the outgoing queue, waiting to be sent. +// With buffered channels, this will not block under moderate throughput. +// When an error is generated, the error can be communicated by placing +// a message on the incoming queue to be handled upstream. +func (m *MesosMessenger) Send(ctx context.Context, upid *upid.UPID, msg proto.Message) error { + if upid == nil { + panic("cannot sent a message to a nil pid") + } else if upid.Equal(m.upid) { + return fmt.Errorf("Send the message to self") + } + name := getMessageName(msg) + log.V(2).Infof("Sending message %v to %v\n", name, upid) + select { + case <-ctx.Done(): + return ctx.Err() + case m.encodingQueue <- &Message{upid, name, msg, nil}: + return nil + } +} + +// Route puts a message either in the incoming or outgoing queue. +// This method is useful for: +// 1) routing internal error to callback handlers +// 2) testing components without starting remote servers. +func (m *MesosMessenger) Route(ctx context.Context, upid *upid.UPID, msg proto.Message) error { + // if destination is not self, send to outbound. + if !upid.Equal(m.upid) { + return m.Send(ctx, upid, msg) + } + + data, err := proto.Marshal(msg) + if err != nil { + return err + } + name := getMessageName(msg) + return m.tr.Inject(ctx, &Message{upid, name, msg, data}) +} + +// Start starts the messenger. +func (m *MesosMessenger) Start() error { + + m.stop = make(chan struct{}) + errChan := m.tr.Start() + + select { + case err := <-errChan: + log.Errorf("failed to start messenger: %v", err) + return err + case <-time.After(preparePeriod): // continue + } + + m.upid = m.tr.UPID() + + for i := 0; i < sendRoutines; i++ { + go m.sendLoop() + } + for i := 0; i < encodeRoutines; i++ { + go m.encodeLoop() + } + for i := 0; i < decodeRoutines; i++ { + go m.decodeLoop() + } + go func() { + select { + case err := <-errChan: + if err != nil { + //TODO(jdef) should the driver abort in this case? probably + //since this messenger will never attempt to re-establish the + //transport + log.Error(err) + } + case <-m.stop: + } + }() + return nil +} + +// Stop stops the messenger and clean up all the goroutines. +func (m *MesosMessenger) Stop() error { + //TODO(jdef) don't hardcode the graceful flag here + if err := m.tr.Stop(true); err != nil { + log.Errorf("Failed to stop the transporter: %v\n", err) + return err + } + close(m.stop) + return nil +} + +// UPID returns the upid of the messenger. +func (m *MesosMessenger) UPID() *upid.UPID { + return m.upid +} + +func (m *MesosMessenger) encodeLoop() { + for { + select { + case <-m.stop: + return + case msg := <-m.encodingQueue: + e := func() error { + //TODO(jdef) implement timeout for context + ctx, cancel := context.WithCancel(context.TODO()) + defer cancel() + + b, err := proto.Marshal(msg.ProtoMessage) + if err != nil { + return err + } + msg.Bytes = b + select { + case <-ctx.Done(): + return ctx.Err() + case m.sendingQueue <- msg: + return nil + } + }() + if e != nil { + m.reportError(fmt.Errorf("Failed to enqueue message %v: %v", msg, e)) + } + } + } +} + +func (m *MesosMessenger) reportError(err error) { + log.V(2).Info(err) + //TODO(jdef) implement timeout for context + ctx, cancel := context.WithCancel(context.TODO()) + defer cancel() + + c := make(chan error, 1) + go func() { c <- m.Route(ctx, m.UPID(), &mesos.FrameworkErrorMessage{Message: proto.String(err.Error())}) }() + select { + case <-ctx.Done(): + <-c // wait for Route to return + case e := <-c: + if e != nil { + log.Errorf("failed to report error %v due to: %v", err, e) + } + } +} + +func (m *MesosMessenger) sendLoop() { + for { + select { + case <-m.stop: + return + case msg := <-m.sendingQueue: + e := func() error { + //TODO(jdef) implement timeout for context + ctx, cancel := context.WithCancel(context.TODO()) + defer cancel() + + c := make(chan error, 1) + go func() { c <- m.tr.Send(ctx, msg) }() + + select { + case <-ctx.Done(): + // Transport layer must use the context to detect cancelled requests. + <-c // wait for Send to return + return ctx.Err() + case err := <-c: + return err + } + }() + if e != nil { + m.reportError(fmt.Errorf("Failed to send message %v: %v", msg.Name, e)) + } + } + } +} + +// Since HTTPTransporter.Recv() is already buffered, so we don't need a 'recvLoop' here. +func (m *MesosMessenger) decodeLoop() { + for { + select { + case <-m.stop: + return + default: + } + msg, err := m.tr.Recv() + if err != nil { + if err == discardOnStopError { + log.V(1).Info("exiting decodeLoop, transport shutting down") + return + } else { + panic(fmt.Sprintf("unexpected transport error: %v", err)) + } + } + log.V(2).Infof("Receiving message %v from %v\n", msg.Name, msg.UPID) + msg.ProtoMessage = reflect.New(m.installedMessages[msg.Name]).Interface().(proto.Message) + if err := proto.Unmarshal(msg.Bytes, msg.ProtoMessage); err != nil { + log.Errorf("Failed to unmarshal message %v: %v\n", msg, err) + continue + } + // TODO(yifan): Catch panic. + m.installedHandlers[msg.Name](msg.UPID, msg.ProtoMessage) + } +} + +// getMessageName returns the name of the message in the mesos manner. +func getMessageName(msg proto.Message) string { + return fmt.Sprintf("%v.%v", "mesos.internal", reflect.TypeOf(msg).Elem().Name()) +} diff --git a/Godeps/_workspace/src/github.com/mesos/mesos-go/messenger/messenger_test.go b/Godeps/_workspace/src/github.com/mesos/mesos-go/messenger/messenger_test.go new file mode 100644 index 00000000000..096f201116c --- /dev/null +++ b/Godeps/_workspace/src/github.com/mesos/mesos-go/messenger/messenger_test.go @@ -0,0 +1,433 @@ +package messenger + +import ( + "fmt" + "math/rand" + "net/http" + "net/http/httptest" + "strconv" + "sync" + "testing" + "time" + + "github.com/gogo/protobuf/proto" + "github.com/mesos/mesos-go/messenger/testmessage" + "github.com/mesos/mesos-go/upid" + "github.com/stretchr/testify/assert" + "golang.org/x/net/context" +) + +var ( + startPort = 10000 + rand.Intn(30000) + globalWG = new(sync.WaitGroup) +) + +func noopHandler(*upid.UPID, proto.Message) { + globalWG.Done() +} + +func getNewPort() int { + startPort++ + return startPort +} + +func shuffleMessages(queue *[]proto.Message) { + for i := range *queue { + index := rand.Intn(i + 1) + (*queue)[i], (*queue)[index] = (*queue)[index], (*queue)[i] + } +} + +func generateSmallMessages(n int) []proto.Message { + queue := make([]proto.Message, n) + for i := range queue { + queue[i] = testmessage.GenerateSmallMessage() + } + return queue +} + +func generateMediumMessages(n int) []proto.Message { + queue := make([]proto.Message, n) + for i := range queue { + queue[i] = testmessage.GenerateMediumMessage() + } + return queue +} + +func generateBigMessages(n int) []proto.Message { + queue := make([]proto.Message, n) + for i := range queue { + queue[i] = testmessage.GenerateBigMessage() + } + return queue +} + +func generateLargeMessages(n int) []proto.Message { + queue := make([]proto.Message, n) + for i := range queue { + queue[i] = testmessage.GenerateLargeMessage() + } + return queue +} + +func generateMixedMessages(n int) []proto.Message { + queue := make([]proto.Message, n*4) + for i := 0; i < n*4; i = i + 4 { + queue[i] = testmessage.GenerateSmallMessage() + queue[i+1] = testmessage.GenerateMediumMessage() + queue[i+2] = testmessage.GenerateBigMessage() + queue[i+3] = testmessage.GenerateLargeMessage() + } + shuffleMessages(&queue) + return queue +} + +func installMessages(t *testing.T, m Messenger, queue *[]proto.Message, counts *[]int, done chan struct{}) { + testCounts := func(counts []int, done chan struct{}) { + for i := range counts { + if counts[i] != cap(*queue)/4 { + return + } + } + close(done) + } + hander1 := func(from *upid.UPID, pbMsg proto.Message) { + (*queue) = append(*queue, pbMsg) + (*counts)[0]++ + testCounts(*counts, done) + } + hander2 := func(from *upid.UPID, pbMsg proto.Message) { + (*queue) = append(*queue, pbMsg) + (*counts)[1]++ + testCounts(*counts, done) + } + hander3 := func(from *upid.UPID, pbMsg proto.Message) { + (*queue) = append(*queue, pbMsg) + (*counts)[2]++ + testCounts(*counts, done) + } + hander4 := func(from *upid.UPID, pbMsg proto.Message) { + (*queue) = append(*queue, pbMsg) + (*counts)[3]++ + testCounts(*counts, done) + } + assert.NoError(t, m.Install(hander1, &testmessage.SmallMessage{})) + assert.NoError(t, m.Install(hander2, &testmessage.MediumMessage{})) + assert.NoError(t, m.Install(hander3, &testmessage.BigMessage{})) + assert.NoError(t, m.Install(hander4, &testmessage.LargeMessage{})) +} + +func runTestServer(b *testing.B, wg *sync.WaitGroup) *httptest.Server { + mux := http.NewServeMux() + mux.HandleFunc("/testserver/mesos.internal.SmallMessage", func(http.ResponseWriter, *http.Request) { + wg.Done() + }) + mux.HandleFunc("/testserver/mesos.internal.MediumMessage", func(http.ResponseWriter, *http.Request) { + wg.Done() + }) + mux.HandleFunc("/testserver/mesos.internal.BigMessage", func(http.ResponseWriter, *http.Request) { + wg.Done() + }) + mux.HandleFunc("/testserver/mesos.internal.LargeMessage", func(http.ResponseWriter, *http.Request) { + wg.Done() + }) + return httptest.NewServer(mux) +} + +func TestMessengerFailToInstall(t *testing.T) { + m := NewHttp(&upid.UPID{ID: "mesos"}) + handler := func(from *upid.UPID, pbMsg proto.Message) {} + assert.NotNil(t, m) + assert.NoError(t, m.Install(handler, &testmessage.SmallMessage{})) + assert.Error(t, m.Install(handler, &testmessage.SmallMessage{})) +} + +func TestMessengerFailToStart(t *testing.T) { + port := strconv.Itoa(getNewPort()) + m1 := NewHttp(&upid.UPID{ID: "mesos", Host: "localhost", Port: port}) + m2 := NewHttp(&upid.UPID{ID: "mesos", Host: "localhost", Port: port}) + assert.NoError(t, m1.Start()) + assert.Error(t, m2.Start()) +} + +func TestMessengerFailToSend(t *testing.T) { + upid, err := upid.Parse(fmt.Sprintf("mesos1@localhost:%d", getNewPort())) + assert.NoError(t, err) + m := NewHttp(upid) + assert.NoError(t, m.Start()) + assert.Error(t, m.Send(context.TODO(), upid, &testmessage.SmallMessage{})) +} + +func TestMessenger(t *testing.T) { + messages := generateMixedMessages(1000) + + upid1, err := upid.Parse(fmt.Sprintf("mesos1@localhost:%d", getNewPort())) + assert.NoError(t, err) + upid2, err := upid.Parse(fmt.Sprintf("mesos2@localhost:%d", getNewPort())) + assert.NoError(t, err) + + m1 := NewHttp(upid1) + m2 := NewHttp(upid2) + + done := make(chan struct{}) + counts := make([]int, 4) + msgQueue := make([]proto.Message, 0, len(messages)) + installMessages(t, m2, &msgQueue, &counts, done) + + assert.NoError(t, m1.Start()) + assert.NoError(t, m2.Start()) + + go func() { + for _, msg := range messages { + assert.NoError(t, m1.Send(context.TODO(), upid2, msg)) + } + }() + + select { + case <-time.After(time.Second * 10): + t.Fatalf("Timeout") + case <-done: + } + + for i := range counts { + assert.Equal(t, 1000, counts[i]) + } + assert.Equal(t, messages, msgQueue) +} + +func BenchmarkMessengerSendSmallMessage(b *testing.B) { + messages := generateSmallMessages(1000) + + wg := new(sync.WaitGroup) + wg.Add(b.N) + srv := runTestServer(b, wg) + defer srv.Close() + + upid1, err := upid.Parse(fmt.Sprintf("mesos1@localhost:%d", getNewPort())) + assert.NoError(b, err) + upid2, err := upid.Parse(fmt.Sprintf("testserver@%s", srv.Listener.Addr().String())) + + assert.NoError(b, err) + + m1 := NewHttp(upid1) + assert.NoError(b, m1.Start()) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + m1.Send(context.TODO(), upid2, messages[i%1000]) + } + wg.Wait() +} + +func BenchmarkMessengerSendMediumMessage(b *testing.B) { + messages := generateMediumMessages(1000) + + wg := new(sync.WaitGroup) + wg.Add(b.N) + srv := runTestServer(b, wg) + defer srv.Close() + + upid1, err := upid.Parse(fmt.Sprintf("mesos1@localhost:%d", getNewPort())) + assert.NoError(b, err) + upid2, err := upid.Parse(fmt.Sprintf("testserver@%s", srv.Listener.Addr().String())) + assert.NoError(b, err) + + m1 := NewHttp(upid1) + assert.NoError(b, m1.Start()) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + m1.Send(context.TODO(), upid2, messages[i%1000]) + } + wg.Wait() +} + +func BenchmarkMessengerSendBigMessage(b *testing.B) { + messages := generateBigMessages(1000) + + wg := new(sync.WaitGroup) + wg.Add(b.N) + srv := runTestServer(b, wg) + defer srv.Close() + + upid1, err := upid.Parse(fmt.Sprintf("mesos1@localhost:%d", getNewPort())) + assert.NoError(b, err) + upid2, err := upid.Parse(fmt.Sprintf("testserver@%s", srv.Listener.Addr().String())) + assert.NoError(b, err) + + m1 := NewHttp(upid1) + assert.NoError(b, m1.Start()) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + m1.Send(context.TODO(), upid2, messages[i%1000]) + } + wg.Wait() +} + +func BenchmarkMessengerSendLargeMessage(b *testing.B) { + messages := generateLargeMessages(1000) + + wg := new(sync.WaitGroup) + wg.Add(b.N) + srv := runTestServer(b, wg) + defer srv.Close() + + upid1, err := upid.Parse(fmt.Sprintf("mesos1@localhost:%d", getNewPort())) + assert.NoError(b, err) + upid2, err := upid.Parse(fmt.Sprintf("testserver@%s", srv.Listener.Addr().String())) + assert.NoError(b, err) + + m1 := NewHttp(upid1) + assert.NoError(b, m1.Start()) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + m1.Send(context.TODO(), upid2, messages[i%1000]) + } + wg.Wait() +} + +func BenchmarkMessengerSendMixedMessage(b *testing.B) { + messages := generateMixedMessages(1000) + + wg := new(sync.WaitGroup) + wg.Add(b.N) + srv := runTestServer(b, wg) + defer srv.Close() + + upid1, err := upid.Parse(fmt.Sprintf("mesos1@localhost:%d", getNewPort())) + assert.NoError(b, err) + upid2, err := upid.Parse(fmt.Sprintf("testserver@%s", srv.Listener.Addr().String())) + assert.NoError(b, err) + + m1 := NewHttp(upid1) + assert.NoError(b, m1.Start()) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + m1.Send(context.TODO(), upid2, messages[i%1000]) + } + wg.Wait() +} + +func BenchmarkMessengerSendRecvSmallMessage(b *testing.B) { + globalWG.Add(b.N) + + messages := generateSmallMessages(1000) + + upid1, err := upid.Parse(fmt.Sprintf("mesos1@localhost:%d", getNewPort())) + assert.NoError(b, err) + upid2, err := upid.Parse(fmt.Sprintf("mesos2@localhost:%d", getNewPort())) + assert.NoError(b, err) + + m1 := NewHttp(upid1) + m2 := NewHttp(upid2) + assert.NoError(b, m1.Start()) + assert.NoError(b, m2.Start()) + assert.NoError(b, m2.Install(noopHandler, &testmessage.SmallMessage{})) + + time.Sleep(time.Second) // Avoid race on upid. + b.ResetTimer() + for i := 0; i < b.N; i++ { + m1.Send(context.TODO(), upid2, messages[i%1000]) + } + globalWG.Wait() +} + +func BenchmarkMessengerSendRecvMediumMessage(b *testing.B) { + globalWG.Add(b.N) + + messages := generateMediumMessages(1000) + + upid1, err := upid.Parse(fmt.Sprintf("mesos1@localhost:%d", getNewPort())) + assert.NoError(b, err) + upid2, err := upid.Parse(fmt.Sprintf("mesos2@localhost:%d", getNewPort())) + assert.NoError(b, err) + + m1 := NewHttp(upid1) + m2 := NewHttp(upid2) + assert.NoError(b, m1.Start()) + assert.NoError(b, m2.Start()) + assert.NoError(b, m2.Install(noopHandler, &testmessage.MediumMessage{})) + + time.Sleep(time.Second) // Avoid race on upid. + b.ResetTimer() + for i := 0; i < b.N; i++ { + m1.Send(context.TODO(), upid2, messages[i%1000]) + } + globalWG.Wait() +} + +func BenchmarkMessengerSendRecvBigMessage(b *testing.B) { + globalWG.Add(b.N) + + messages := generateBigMessages(1000) + + upid1, err := upid.Parse(fmt.Sprintf("mesos1@localhost:%d", getNewPort())) + assert.NoError(b, err) + upid2, err := upid.Parse(fmt.Sprintf("mesos2@localhost:%d", getNewPort())) + assert.NoError(b, err) + + m1 := NewHttp(upid1) + m2 := NewHttp(upid2) + assert.NoError(b, m1.Start()) + assert.NoError(b, m2.Start()) + assert.NoError(b, m2.Install(noopHandler, &testmessage.BigMessage{})) + + time.Sleep(time.Second) // Avoid race on upid. + b.ResetTimer() + for i := 0; i < b.N; i++ { + m1.Send(context.TODO(), upid2, messages[i%1000]) + } + globalWG.Wait() +} + +func BenchmarkMessengerSendRecvLargeMessage(b *testing.B) { + globalWG.Add(b.N) + messages := generateLargeMessages(1000) + + upid1, err := upid.Parse(fmt.Sprintf("mesos1@localhost:%d", getNewPort())) + assert.NoError(b, err) + upid2, err := upid.Parse(fmt.Sprintf("mesos2@localhost:%d", getNewPort())) + assert.NoError(b, err) + + m1 := NewHttp(upid1) + m2 := NewHttp(upid2) + assert.NoError(b, m1.Start()) + assert.NoError(b, m2.Start()) + assert.NoError(b, m2.Install(noopHandler, &testmessage.LargeMessage{})) + + time.Sleep(time.Second) // Avoid race on upid. + b.ResetTimer() + for i := 0; i < b.N; i++ { + m1.Send(context.TODO(), upid2, messages[i%1000]) + } + globalWG.Wait() +} + +func BenchmarkMessengerSendRecvMixedMessage(b *testing.B) { + globalWG.Add(b.N) + messages := generateMixedMessages(1000) + + upid1, err := upid.Parse(fmt.Sprintf("mesos1@localhost:%d", getNewPort())) + assert.NoError(b, err) + upid2, err := upid.Parse(fmt.Sprintf("mesos2@localhost:%d", getNewPort())) + assert.NoError(b, err) + + m1 := NewHttp(upid1) + m2 := NewHttp(upid2) + assert.NoError(b, m1.Start()) + assert.NoError(b, m2.Start()) + assert.NoError(b, m2.Install(noopHandler, &testmessage.SmallMessage{})) + assert.NoError(b, m2.Install(noopHandler, &testmessage.MediumMessage{})) + assert.NoError(b, m2.Install(noopHandler, &testmessage.BigMessage{})) + assert.NoError(b, m2.Install(noopHandler, &testmessage.LargeMessage{})) + + time.Sleep(time.Second) // Avoid race on upid. + b.ResetTimer() + for i := 0; i < b.N; i++ { + m1.Send(context.TODO(), upid2, messages[i%1000]) + } + globalWG.Wait() +} diff --git a/Godeps/_workspace/src/github.com/mesos/mesos-go/messenger/mocked_messenger.go b/Godeps/_workspace/src/github.com/mesos/mesos-go/messenger/mocked_messenger.go new file mode 100644 index 00000000000..34d53d0868f --- /dev/null +++ b/Godeps/_workspace/src/github.com/mesos/mesos-go/messenger/mocked_messenger.go @@ -0,0 +1,106 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package messenger + +import ( + "reflect" + + "github.com/gogo/protobuf/proto" + "github.com/mesos/mesos-go/upid" + "github.com/stretchr/testify/mock" + "golang.org/x/net/context" +) + +type message struct { + from *upid.UPID + msg proto.Message +} + +// MockedMessenger is a messenger that returns error on every operation. +type MockedMessenger struct { + mock.Mock + messageQueue chan *message + handlers map[string]MessageHandler + stop chan struct{} +} + +// NewMockedMessenger returns a mocked messenger used for testing. +func NewMockedMessenger() *MockedMessenger { + return &MockedMessenger{ + messageQueue: make(chan *message, 1), + handlers: make(map[string]MessageHandler), + stop: make(chan struct{}), + } +} + +// Install is a mocked implementation. +func (m *MockedMessenger) Install(handler MessageHandler, msg proto.Message) error { + m.handlers[reflect.TypeOf(msg).Elem().Name()] = handler + return m.Called().Error(0) +} + +// Send is a mocked implementation. +func (m *MockedMessenger) Send(ctx context.Context, upid *upid.UPID, msg proto.Message) error { + return m.Called().Error(0) +} + +func (m *MockedMessenger) Route(ctx context.Context, upid *upid.UPID, msg proto.Message) error { + return m.Called().Error(0) +} + +// Start is a mocked implementation. +func (m *MockedMessenger) Start() error { + go m.recvLoop() + return m.Called().Error(0) +} + +// Stop is a mocked implementation. +func (m *MockedMessenger) Stop() error { + // don't close an already-closed channel + select { + case <-m.stop: + // noop + default: + close(m.stop) + } + return m.Called().Error(0) +} + +// UPID is a mocked implementation. +func (m *MockedMessenger) UPID() *upid.UPID { + return m.Called().Get(0).(*upid.UPID) +} + +func (m *MockedMessenger) recvLoop() { + for { + select { + case <-m.stop: + return + case msg := <-m.messageQueue: + name := reflect.TypeOf(msg.msg).Elem().Name() + m.handlers[name](msg.from, msg.msg) + } + } +} + +// Recv receives a upid and a message, it will dispatch the message to its handler +// with the upid. This is for testing. +func (m *MockedMessenger) Recv(from *upid.UPID, msg proto.Message) { + m.messageQueue <- &message{from, msg} +} diff --git a/Godeps/_workspace/src/github.com/mesos/mesos-go/messenger/testmessage/Makefile b/Godeps/_workspace/src/github.com/mesos/mesos-go/messenger/testmessage/Makefile new file mode 100644 index 00000000000..9bf30108452 --- /dev/null +++ b/Godeps/_workspace/src/github.com/mesos/mesos-go/messenger/testmessage/Makefile @@ -0,0 +1,2 @@ +all: testmessage.proto + protoc --proto_path=${GOPATH}/src:${GOPATH}/src/github.com/gogo/protobuf/protobuf:. --gogo_out=. testmessage.proto diff --git a/Godeps/_workspace/src/github.com/mesos/mesos-go/messenger/testmessage/generator.go b/Godeps/_workspace/src/github.com/mesos/mesos-go/messenger/testmessage/generator.go new file mode 100644 index 00000000000..56cbe13b8e0 --- /dev/null +++ b/Godeps/_workspace/src/github.com/mesos/mesos-go/messenger/testmessage/generator.go @@ -0,0 +1,49 @@ +package testmessage + +import ( + "math/rand" +) + +func generateRandomString(length int) string { + b := make([]byte, length) + for i := range b { + b[i] = byte(rand.Int()) + } + return string(b) +} + +// GenerateSmallMessage generates a small size message. +func GenerateSmallMessage() *SmallMessage { + v := make([]string, 3) + for i := range v { + v[i] = generateRandomString(5) + } + return &SmallMessage{Values: v} +} + +// GenerateMediumMessage generates a medium size message. +func GenerateMediumMessage() *MediumMessage { + v := make([]string, 10) + for i := range v { + v[i] = generateRandomString(10) + } + return &MediumMessage{Values: v} +} + +// GenerateBigMessage generates a big size message. +func GenerateBigMessage() *BigMessage { + v := make([]string, 20) + for i := range v { + v[i] = generateRandomString(20) + } + return &BigMessage{Values: v} +} + +// GenerateLargeMessage generates a large size message. +func GenerateLargeMessage() *LargeMessage { + v := make([]string, 30) + for i := range v { + v[i] = generateRandomString(30) + } + return &LargeMessage{Values: v} +} diff --git a/Godeps/_workspace/src/github.com/mesos/mesos-go/messenger/testmessage/testmessage.pb.go b/Godeps/_workspace/src/github.com/mesos/mesos-go/messenger/testmessage/testmessage.pb.go new file mode 100644 index 00000000000..11035be133b --- /dev/null +++ b/Godeps/_workspace/src/github.com/mesos/mesos-go/messenger/testmessage/testmessage.pb.go @@ -0,0 +1,1114 @@ +// Code generated by protoc-gen-gogo. +// source: testmessage.proto +// DO NOT EDIT! + +/* +Package testmessage is a generated protocol buffer package. + +It is generated from these files: + testmessage.proto + +It has these top-level messages: + SmallMessage + MediumMessage + BigMessage + LargeMessage +*/ +package testmessage + +import proto "github.com/gogo/protobuf/proto" +import math "math" + +// discarding unused import gogoproto "github.com/gogo/protobuf/gogoproto/gogo.pb" + +import io "io" +import fmt "fmt" +import github_com_gogo_protobuf_proto "github.com/gogo/protobuf/proto" + +import fmt1 "fmt" +import strings "strings" +import reflect "reflect" + +import fmt2 "fmt" +import strings1 "strings" +import github_com_gogo_protobuf_proto1 "github.com/gogo/protobuf/proto" +import sort "sort" +import strconv "strconv" +import reflect1 "reflect" + +import fmt3 "fmt" +import bytes "bytes" + +// Reference imports to suppress errors if they are not otherwise used. +var _ = proto.Marshal +var _ = math.Inf + +type SmallMessage struct { + Values []string `protobuf:"bytes,1,rep" json:"Values,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *SmallMessage) Reset() { *m = SmallMessage{} } +func (*SmallMessage) ProtoMessage() {} + +func (m *SmallMessage) GetValues() []string { + if m != nil { + return m.Values + } + return nil +} + +type MediumMessage struct { + Values []string `protobuf:"bytes,1,rep" json:"Values,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *MediumMessage) Reset() { *m = MediumMessage{} } +func (*MediumMessage) ProtoMessage() {} + +func (m *MediumMessage) GetValues() []string { + if m != nil { + return m.Values + } + return nil +} + +type BigMessage struct { + Values []string `protobuf:"bytes,1,rep" json:"Values,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *BigMessage) Reset() { *m = BigMessage{} } +func (*BigMessage) ProtoMessage() {} + +func (m *BigMessage) GetValues() []string { + if m != nil { + return m.Values + } + return nil +} + +type LargeMessage struct { + Values []string `protobuf:"bytes,1,rep" json:"Values,omitempty"` + XXX_unrecognized []byte `json:"-"` +} + +func (m *LargeMessage) Reset() { *m = LargeMessage{} } +func (*LargeMessage) ProtoMessage() {} + +func (m *LargeMessage) GetValues() []string { + if m != nil { + return m.Values + } + return nil +} + +func init() { +} +func (m *SmallMessage) Unmarshal(data []byte) error { + l := len(data) + index := 0 + for index < l { + var wire uint64 + for shift := uint(0); ; shift += 7 { + if index >= l { + return io.ErrUnexpectedEOF + } + b := data[index] + index++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Values", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if index >= l { + return io.ErrUnexpectedEOF + } + b := data[index] + index++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + postIndex := index + int(stringLen) + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Values = append(m.Values, string(data[index:postIndex])) + index = postIndex + default: + var sizeOfWire int + for { + sizeOfWire++ + wire >>= 7 + if wire == 0 { + break + } + } + index -= sizeOfWire + skippy, err := github_com_gogo_protobuf_proto.Skip(data[index:]) + if err != nil { + return err + } + if (index + skippy) > l { + return io.ErrUnexpectedEOF + } + m.XXX_unrecognized = append(m.XXX_unrecognized, data[index:index+skippy]...) + index += skippy + } + } + return nil +} +func (m *MediumMessage) Unmarshal(data []byte) error { + l := len(data) + index := 0 + for index < l { + var wire uint64 + for shift := uint(0); ; shift += 7 { + if index >= l { + return io.ErrUnexpectedEOF + } + b := data[index] + index++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Values", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if index >= l { + return io.ErrUnexpectedEOF + } + b := data[index] + index++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + postIndex := index + int(stringLen) + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Values = append(m.Values, string(data[index:postIndex])) + index = postIndex + default: + var sizeOfWire int + for { + sizeOfWire++ + wire >>= 7 + if wire == 0 { + break + } + } + index -= sizeOfWire + skippy, err := github_com_gogo_protobuf_proto.Skip(data[index:]) + if err != nil { + return err + } + if (index + skippy) > l { + return io.ErrUnexpectedEOF + } + m.XXX_unrecognized = append(m.XXX_unrecognized, data[index:index+skippy]...) + index += skippy + } + } + return nil +} +func (m *BigMessage) Unmarshal(data []byte) error { + l := len(data) + index := 0 + for index < l { + var wire uint64 + for shift := uint(0); ; shift += 7 { + if index >= l { + return io.ErrUnexpectedEOF + } + b := data[index] + index++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Values", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if index >= l { + return io.ErrUnexpectedEOF + } + b := data[index] + index++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + postIndex := index + int(stringLen) + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Values = append(m.Values, string(data[index:postIndex])) + index = postIndex + default: + var sizeOfWire int + for { + sizeOfWire++ + wire >>= 7 + if wire == 0 { + break + } + } + index -= sizeOfWire + skippy, err := github_com_gogo_protobuf_proto.Skip(data[index:]) + if err != nil { + return err + } + if (index + skippy) > l { + return io.ErrUnexpectedEOF + } + m.XXX_unrecognized = append(m.XXX_unrecognized, data[index:index+skippy]...) + index += skippy + } + } + return nil +} +func (m *LargeMessage) Unmarshal(data []byte) error { + l := len(data) + index := 0 + for index < l { + var wire uint64 + for shift := uint(0); ; shift += 7 { + if index >= l { + return io.ErrUnexpectedEOF + } + b := data[index] + index++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Values", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if index >= l { + return io.ErrUnexpectedEOF + } + b := data[index] + index++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + postIndex := index + int(stringLen) + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Values = append(m.Values, string(data[index:postIndex])) + index = postIndex + default: + var sizeOfWire int + for { + sizeOfWire++ + wire >>= 7 + if wire == 0 { + break + } + } + index -= sizeOfWire + skippy, err := github_com_gogo_protobuf_proto.Skip(data[index:]) + if err != nil { + return err + } + if (index + skippy) > l { + return io.ErrUnexpectedEOF + } + m.XXX_unrecognized = append(m.XXX_unrecognized, data[index:index+skippy]...) + index += skippy + } + } + return nil +} +func (this *SmallMessage) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&SmallMessage{`, + `Values:` + fmt1.Sprintf("%v", this.Values) + `,`, + `XXX_unrecognized:` + fmt1.Sprintf("%v", this.XXX_unrecognized) + `,`, + `}`, + }, "") + return s +} +func (this *MediumMessage) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&MediumMessage{`, + `Values:` + fmt1.Sprintf("%v", this.Values) + `,`, + `XXX_unrecognized:` + fmt1.Sprintf("%v", this.XXX_unrecognized) + `,`, + `}`, + }, "") + return s +} +func (this *BigMessage) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&BigMessage{`, + `Values:` + fmt1.Sprintf("%v", this.Values) + `,`, + `XXX_unrecognized:` + fmt1.Sprintf("%v", this.XXX_unrecognized) + `,`, + `}`, + }, "") + return s +} +func (this *LargeMessage) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&LargeMessage{`, + `Values:` + fmt1.Sprintf("%v", this.Values) + `,`, + `XXX_unrecognized:` + fmt1.Sprintf("%v", this.XXX_unrecognized) + `,`, + `}`, + }, "") + return s +} +func valueToStringTestmessage(v interface{}) string { + rv := reflect.ValueOf(v) + if rv.IsNil() { + return "nil" + } + pv := reflect.Indirect(rv).Interface() + return fmt1.Sprintf("*%v", pv) +} +func (m *SmallMessage) Size() (n int) { + var l int + _ = l + if len(m.Values) > 0 { + for _, s := range m.Values { + l = len(s) + n += 1 + l + sovTestmessage(uint64(l)) + } + } + if m.XXX_unrecognized != nil { + n += len(m.XXX_unrecognized) + } + return n +} + +func (m *MediumMessage) Size() (n int) { + var l int + _ = l + if len(m.Values) > 0 { + for _, s := range m.Values { + l = len(s) + n += 1 + l + sovTestmessage(uint64(l)) + } + } + if m.XXX_unrecognized != nil { + n += len(m.XXX_unrecognized) + } + return n +} + +func (m *BigMessage) Size() (n int) { + var l int + _ = l + if len(m.Values) > 0 { + for _, s := range m.Values { + l = len(s) + n += 1 + l + sovTestmessage(uint64(l)) + } + } + if m.XXX_unrecognized != nil { + n += len(m.XXX_unrecognized) + } + return n +} + +func (m *LargeMessage) Size() (n int) { + var l int + _ = l + if len(m.Values) > 0 { + for _, s := range m.Values { + l = len(s) + n += 1 + l + sovTestmessage(uint64(l)) + } + } + if m.XXX_unrecognized != nil { + n += len(m.XXX_unrecognized) + } + return n +} + +func sovTestmessage(x uint64) (n int) { + for { + n++ + x >>= 7 + if x == 0 { + break + } + } + return n +} +func sozTestmessage(x uint64) (n int) { + return sovTestmessage(uint64((x << 1) ^ uint64((int64(x) >> 63)))) +} +func NewPopulatedSmallMessage(r randyTestmessage, easy bool) *SmallMessage { + this := &SmallMessage{} + if r.Intn(10) != 0 { + v1 := r.Intn(10) + this.Values = make([]string, v1) + for i := 0; i < v1; i++ { + this.Values[i] = randStringTestmessage(r) + } + } + if !easy && r.Intn(10) != 0 { + this.XXX_unrecognized = randUnrecognizedTestmessage(r, 2) + } + return this +} + +func NewPopulatedMediumMessage(r randyTestmessage, easy bool) *MediumMessage { + this := &MediumMessage{} + if r.Intn(10) != 0 { + v2 := r.Intn(10) + this.Values = make([]string, v2) + for i := 0; i < v2; i++ { + this.Values[i] = randStringTestmessage(r) + } + } + if !easy && r.Intn(10) != 0 { + this.XXX_unrecognized = randUnrecognizedTestmessage(r, 2) + } + return this +} + +func NewPopulatedBigMessage(r randyTestmessage, easy bool) *BigMessage { + this := &BigMessage{} + if r.Intn(10) != 0 { + v3 := r.Intn(10) + this.Values = make([]string, v3) + for i := 0; i < v3; i++ { + this.Values[i] = randStringTestmessage(r) + } + } + if !easy && r.Intn(10) != 0 { + this.XXX_unrecognized = randUnrecognizedTestmessage(r, 2) + } + return this +} + +func NewPopulatedLargeMessage(r randyTestmessage, easy bool) *LargeMessage { + this := &LargeMessage{} + if r.Intn(10) != 0 { + v4 := r.Intn(10) + this.Values = make([]string, v4) + for i := 0; i < v4; i++ { + this.Values[i] = randStringTestmessage(r) + } + } + if !easy && r.Intn(10) != 0 { + this.XXX_unrecognized = randUnrecognizedTestmessage(r, 2) + } + return this +} + +type randyTestmessage interface { + Float32() float32 + Float64() float64 + Int63() int64 + Int31() int32 + Uint32() uint32 + Intn(n int) int +} + +func randUTF8RuneTestmessage(r randyTestmessage) rune { + res := rune(r.Uint32() % 1112064) + if 55296 <= res { + res += 2047 + } + return res +} +func randStringTestmessage(r randyTestmessage) string { + v5 := r.Intn(100) + tmps := make([]rune, v5) + for i := 0; i < v5; i++ { + tmps[i] = randUTF8RuneTestmessage(r) + } + return string(tmps) +} +func randUnrecognizedTestmessage(r randyTestmessage, maxFieldNumber int) (data []byte) { + l := r.Intn(5) + for i := 0; i < l; i++ { + wire := r.Intn(4) + if wire == 3 { + wire = 5 + } + fieldNumber := maxFieldNumber + r.Intn(100) + data = randFieldTestmessage(data, r, fieldNumber, wire) + } + return data +} +func randFieldTestmessage(data []byte, r randyTestmessage, fieldNumber int, wire int) []byte { + key := uint32(fieldNumber)<<3 | uint32(wire) + switch wire { + case 0: + data = encodeVarintPopulateTestmessage(data, uint64(key)) + v6 := r.Int63() + if r.Intn(2) == 0 { + v6 *= -1 + } + data = encodeVarintPopulateTestmessage(data, uint64(v6)) + case 1: + data = encodeVarintPopulateTestmessage(data, uint64(key)) + data = append(data, byte(r.Intn(256)), byte(r.Intn(256)), byte(r.Intn(256)), byte(r.Intn(256)), byte(r.Intn(256)), byte(r.Intn(256)), byte(r.Intn(256)), byte(r.Intn(256))) + case 2: + data = encodeVarintPopulateTestmessage(data, uint64(key)) + ll := r.Intn(100) + data = encodeVarintPopulateTestmessage(data, uint64(ll)) + for j := 0; j < ll; j++ { + data = append(data, byte(r.Intn(256))) + } + default: + data = encodeVarintPopulateTestmessage(data, uint64(key)) + data = append(data, byte(r.Intn(256)), byte(r.Intn(256)), byte(r.Intn(256)), byte(r.Intn(256))) + } + return data +} +func encodeVarintPopulateTestmessage(data []byte, v uint64) []byte { + for v >= 1<<7 { + data = append(data, uint8(uint64(v)&0x7f|0x80)) + v >>= 7 + } + data = append(data, uint8(v)) + return data +} +func (m *SmallMessage) Marshal() (data []byte, err error) { + size := m.Size() + data = make([]byte, size) + n, err := m.MarshalTo(data) + if err != nil { + return nil, err + } + return data[:n], nil +} + +func (m *SmallMessage) MarshalTo(data []byte) (n int, err error) { + var i int + _ = i + var l int + _ = l + if len(m.Values) > 0 { + for _, s := range m.Values { + data[i] = 0xa + i++ + l = len(s) + for l >= 1<<7 { + data[i] = uint8(uint64(l)&0x7f | 0x80) + l >>= 7 + i++ + } + data[i] = uint8(l) + i++ + i += copy(data[i:], s) + } + } + if m.XXX_unrecognized != nil { + i += copy(data[i:], m.XXX_unrecognized) + } + return i, nil +} + +func (m *MediumMessage) Marshal() (data []byte, err error) { + size := m.Size() + data = make([]byte, size) + n, err := m.MarshalTo(data) + if err != nil { + return nil, err + } + return data[:n], nil +} + +func (m *MediumMessage) MarshalTo(data []byte) (n int, err error) { + var i int + _ = i + var l int + _ = l + if len(m.Values) > 0 { + for _, s := range m.Values { + data[i] = 0xa + i++ + l = len(s) + for l >= 1<<7 { + data[i] = uint8(uint64(l)&0x7f | 0x80) + l >>= 7 + i++ + } + data[i] = uint8(l) + i++ + i += copy(data[i:], s) + } + } + if m.XXX_unrecognized != nil { + i += copy(data[i:], m.XXX_unrecognized) + } + return i, nil +} + +func (m *BigMessage) Marshal() (data []byte, err error) { + size := m.Size() + data = make([]byte, size) + n, err := m.MarshalTo(data) + if err != nil { + return nil, err + } + return data[:n], nil +} + +func (m *BigMessage) MarshalTo(data []byte) (n int, err error) { + var i int + _ = i + var l int + _ = l + if len(m.Values) > 0 { + for _, s := range m.Values { + data[i] = 0xa + i++ + l = len(s) + for l >= 1<<7 { + data[i] = uint8(uint64(l)&0x7f | 0x80) + l >>= 7 + i++ + } + data[i] = uint8(l) + i++ + i += copy(data[i:], s) + } + } + if m.XXX_unrecognized != nil { + i += copy(data[i:], m.XXX_unrecognized) + } + return i, nil +} + +func (m *LargeMessage) Marshal() (data []byte, err error) { + size := m.Size() + data = make([]byte, size) + n, err := m.MarshalTo(data) + if err != nil { + return nil, err + } + return data[:n], nil +} + +func (m *LargeMessage) MarshalTo(data []byte) (n int, err error) { + var i int + _ = i + var l int + _ = l + if len(m.Values) > 0 { + for _, s := range m.Values { + data[i] = 0xa + i++ + l = len(s) + for l >= 1<<7 { + data[i] = uint8(uint64(l)&0x7f | 0x80) + l >>= 7 + i++ + } + data[i] = uint8(l) + i++ + i += copy(data[i:], s) + } + } + if m.XXX_unrecognized != nil { + i += copy(data[i:], m.XXX_unrecognized) + } + return i, nil +} + +func encodeFixed64Testmessage(data []byte, offset int, v uint64) int { + data[offset] = uint8(v) + data[offset+1] = uint8(v >> 8) + data[offset+2] = uint8(v >> 16) + data[offset+3] = uint8(v >> 24) + data[offset+4] = uint8(v >> 32) + data[offset+5] = uint8(v >> 40) + data[offset+6] = uint8(v >> 48) + data[offset+7] = uint8(v >> 56) + return offset + 8 +} +func encodeFixed32Testmessage(data []byte, offset int, v uint32) int { + data[offset] = uint8(v) + data[offset+1] = uint8(v >> 8) + data[offset+2] = uint8(v >> 16) + data[offset+3] = uint8(v >> 24) + return offset + 4 +} +func encodeVarintTestmessage(data []byte, offset int, v uint64) int { + for v >= 1<<7 { + data[offset] = uint8(v&0x7f | 0x80) + v >>= 7 + offset++ + } + data[offset] = uint8(v) + return offset + 1 +} +func (this *SmallMessage) GoString() string { + if this == nil { + return "nil" + } + s := strings1.Join([]string{`&testmessage.SmallMessage{` + + `Values:` + fmt2.Sprintf("%#v", this.Values), + `XXX_unrecognized:` + fmt2.Sprintf("%#v", this.XXX_unrecognized) + `}`}, ", ") + return s +} +func (this *MediumMessage) GoString() string { + if this == nil { + return "nil" + } + s := strings1.Join([]string{`&testmessage.MediumMessage{` + + `Values:` + fmt2.Sprintf("%#v", this.Values), + `XXX_unrecognized:` + fmt2.Sprintf("%#v", this.XXX_unrecognized) + `}`}, ", ") + return s +} +func (this *BigMessage) GoString() string { + if this == nil { + return "nil" + } + s := strings1.Join([]string{`&testmessage.BigMessage{` + + `Values:` + fmt2.Sprintf("%#v", this.Values), + `XXX_unrecognized:` + fmt2.Sprintf("%#v", this.XXX_unrecognized) + `}`}, ", ") + return s +} +func (this *LargeMessage) GoString() string { + if this == nil { + return "nil" + } + s := strings1.Join([]string{`&testmessage.LargeMessage{` + + `Values:` + fmt2.Sprintf("%#v", this.Values), + `XXX_unrecognized:` + fmt2.Sprintf("%#v", this.XXX_unrecognized) + `}`}, ", ") + return s +} +func valueToGoStringTestmessage(v interface{}, typ string) string { + rv := reflect1.ValueOf(v) + if rv.IsNil() { + return "nil" + } + pv := reflect1.Indirect(rv).Interface() + return fmt2.Sprintf("func(v %v) *%v { return &v } ( %#v )", typ, typ, pv) +} +func extensionToGoStringTestmessage(e map[int32]github_com_gogo_protobuf_proto1.Extension) string { + if e == nil { + return "nil" + } + s := "map[int32]proto.Extension{" + keys := make([]int, 0, len(e)) + for k := range e { + keys = append(keys, int(k)) + } + sort.Ints(keys) + ss := []string{} + for _, k := range keys { + ss = append(ss, strconv.Itoa(k)+": "+e[int32(k)].GoString()) + } + s += strings1.Join(ss, ",") + "}" + return s +} +func (this *SmallMessage) VerboseEqual(that interface{}) error { + if that == nil { + if this == nil { + return nil + } + return fmt3.Errorf("that == nil && this != nil") + } + + that1, ok := that.(*SmallMessage) + if !ok { + return fmt3.Errorf("that is not of type *SmallMessage") + } + if that1 == nil { + if this == nil { + return nil + } + return fmt3.Errorf("that is type *SmallMessage but is nil && this != nil") + } else if this == nil { + return fmt3.Errorf("that is type *SmallMessagebut is not nil && this == nil") + } + if len(this.Values) != len(that1.Values) { + return fmt3.Errorf("Values this(%v) Not Equal that(%v)", len(this.Values), len(that1.Values)) + } + for i := range this.Values { + if this.Values[i] != that1.Values[i] { + return fmt3.Errorf("Values this[%v](%v) Not Equal that[%v](%v)", i, this.Values[i], i, that1.Values[i]) + } + } + if !bytes.Equal(this.XXX_unrecognized, that1.XXX_unrecognized) { + return fmt3.Errorf("XXX_unrecognized this(%v) Not Equal that(%v)", this.XXX_unrecognized, that1.XXX_unrecognized) + } + return nil +} +func (this *SmallMessage) Equal(that interface{}) bool { + if that == nil { + if this == nil { + return true + } + return false + } + + that1, ok := that.(*SmallMessage) + if !ok { + return false + } + if that1 == nil { + if this == nil { + return true + } + return false + } else if this == nil { + return false + } + if len(this.Values) != len(that1.Values) { + return false + } + for i := range this.Values { + if this.Values[i] != that1.Values[i] { + return false + } + } + if !bytes.Equal(this.XXX_unrecognized, that1.XXX_unrecognized) { + return false + } + return true +} +func (this *MediumMessage) VerboseEqual(that interface{}) error { + if that == nil { + if this == nil { + return nil + } + return fmt3.Errorf("that == nil && this != nil") + } + + that1, ok := that.(*MediumMessage) + if !ok { + return fmt3.Errorf("that is not of type *MediumMessage") + } + if that1 == nil { + if this == nil { + return nil + } + return fmt3.Errorf("that is type *MediumMessage but is nil && this != nil") + } else if this == nil { + return fmt3.Errorf("that is type *MediumMessagebut is not nil && this == nil") + } + if len(this.Values) != len(that1.Values) { + return fmt3.Errorf("Values this(%v) Not Equal that(%v)", len(this.Values), len(that1.Values)) + } + for i := range this.Values { + if this.Values[i] != that1.Values[i] { + return fmt3.Errorf("Values this[%v](%v) Not Equal that[%v](%v)", i, this.Values[i], i, that1.Values[i]) + } + } + if !bytes.Equal(this.XXX_unrecognized, that1.XXX_unrecognized) { + return fmt3.Errorf("XXX_unrecognized this(%v) Not Equal that(%v)", this.XXX_unrecognized, that1.XXX_unrecognized) + } + return nil +} +func (this *MediumMessage) Equal(that interface{}) bool { + if that == nil { + if this == nil { + return true + } + return false + } + + that1, ok := that.(*MediumMessage) + if !ok { + return false + } + if that1 == nil { + if this == nil { + return true + } + return false + } else if this == nil { + return false + } + if len(this.Values) != len(that1.Values) { + return false + } + for i := range this.Values { + if this.Values[i] != that1.Values[i] { + return false + } + } + if !bytes.Equal(this.XXX_unrecognized, that1.XXX_unrecognized) { + return false + } + return true +} +func (this *BigMessage) VerboseEqual(that interface{}) error { + if that == nil { + if this == nil { + return nil + } + return fmt3.Errorf("that == nil && this != nil") + } + + that1, ok := that.(*BigMessage) + if !ok { + return fmt3.Errorf("that is not of type *BigMessage") + } + if that1 == nil { + if this == nil { + return nil + } + return fmt3.Errorf("that is type *BigMessage but is nil && this != nil") + } else if this == nil { + return fmt3.Errorf("that is type *BigMessagebut is not nil && this == nil") + } + if len(this.Values) != len(that1.Values) { + return fmt3.Errorf("Values this(%v) Not Equal that(%v)", len(this.Values), len(that1.Values)) + } + for i := range this.Values { + if this.Values[i] != that1.Values[i] { + return fmt3.Errorf("Values this[%v](%v) Not Equal that[%v](%v)", i, this.Values[i], i, that1.Values[i]) + } + } + if !bytes.Equal(this.XXX_unrecognized, that1.XXX_unrecognized) { + return fmt3.Errorf("XXX_unrecognized this(%v) Not Equal that(%v)", this.XXX_unrecognized, that1.XXX_unrecognized) + } + return nil +} +func (this *BigMessage) Equal(that interface{}) bool { + if that == nil { + if this == nil { + return true + } + return false + } + + that1, ok := that.(*BigMessage) + if !ok { + return false + } + if that1 == nil { + if this == nil { + return true + } + return false + } else if this == nil { + return false + } + if len(this.Values) != len(that1.Values) { + return false + } + for i := range this.Values { + if this.Values[i] != that1.Values[i] { + return false + } + } + if !bytes.Equal(this.XXX_unrecognized, that1.XXX_unrecognized) { + return false + } + return true +} +func (this *LargeMessage) VerboseEqual(that interface{}) error { + if that == nil { + if this == nil { + return nil + } + return fmt3.Errorf("that == nil && this != nil") + } + + that1, ok := that.(*LargeMessage) + if !ok { + return fmt3.Errorf("that is not of type *LargeMessage") + } + if that1 == nil { + if this == nil { + return nil + } + return fmt3.Errorf("that is type *LargeMessage but is nil && this != nil") + } else if this == nil { + return fmt3.Errorf("that is type *LargeMessagebut is not nil && this == nil") + } + if len(this.Values) != len(that1.Values) { + return fmt3.Errorf("Values this(%v) Not Equal that(%v)", len(this.Values), len(that1.Values)) + } + for i := range this.Values { + if this.Values[i] != that1.Values[i] { + return fmt3.Errorf("Values this[%v](%v) Not Equal that[%v](%v)", i, this.Values[i], i, that1.Values[i]) + } + } + if !bytes.Equal(this.XXX_unrecognized, that1.XXX_unrecognized) { + return fmt3.Errorf("XXX_unrecognized this(%v) Not Equal that(%v)", this.XXX_unrecognized, that1.XXX_unrecognized) + } + return nil +} +func (this *LargeMessage) Equal(that interface{}) bool { + if that == nil { + if this == nil { + return true + } + return false + } + + that1, ok := that.(*LargeMessage) + if !ok { + return false + } + if that1 == nil { + if this == nil { + return true + } + return false + } else if this == nil { + return false + } + if len(this.Values) != len(that1.Values) { + return false + } + for i := range this.Values { + if this.Values[i] != that1.Values[i] { + return false + } + } + if !bytes.Equal(this.XXX_unrecognized, that1.XXX_unrecognized) { + return false + } + return true +} diff --git a/Godeps/_workspace/src/github.com/mesos/mesos-go/messenger/testmessage/testmessage.proto b/Godeps/_workspace/src/github.com/mesos/mesos-go/messenger/testmessage/testmessage.proto new file mode 100644 index 00000000000..b1fa57fbdec --- /dev/null +++ b/Godeps/_workspace/src/github.com/mesos/mesos-go/messenger/testmessage/testmessage.proto @@ -0,0 +1,31 @@ +package testmessage; + +import "github.com/gogo/protobuf/gogoproto/gogo.proto"; + +option (gogoproto.gostring_all) = true; +option (gogoproto.equal_all) = true; +option (gogoproto.verbose_equal_all) = true; +option (gogoproto.goproto_stringer_all) = false; +option (gogoproto.stringer_all) = true; +option (gogoproto.populate_all) = true; +option (gogoproto.testgen_all) = false; +option (gogoproto.benchgen_all) = false; +option (gogoproto.marshaler_all) = true; +option (gogoproto.sizer_all) = true; +option (gogoproto.unmarshaler_all) = true; + +message SmallMessage { + repeated string Values = 1; +} + +message MediumMessage { + repeated string Values = 1; +} + +message BigMessage { + repeated string Values = 1; +} + +message LargeMessage { + repeated string Values = 1; +} diff --git a/Godeps/_workspace/src/github.com/mesos/mesos-go/messenger/transporter.go b/Godeps/_workspace/src/github.com/mesos/mesos-go/messenger/transporter.go new file mode 100644 index 00000000000..7d920c08b02 --- /dev/null +++ b/Godeps/_workspace/src/github.com/mesos/mesos-go/messenger/transporter.go @@ -0,0 +1,53 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package messenger + +import ( + "github.com/mesos/mesos-go/upid" + "golang.org/x/net/context" +) + +// Transporter defines methods for communicating with remote processes. +type Transporter interface { + //Send sends message to remote process. Must use context to determine + //cancelled requests. Will stop sending when transport is stopped. + Send(ctx context.Context, msg *Message) error + + //Rcvd receives and delegate message handling to installed handlers. + //Will stop receiving when transport is stopped. + Recv() (*Message, error) + + //Inject injects a message to the incoming queue. Must use context to + //determine cancelled requests. Injection is aborted if the transport + //is stopped. + Inject(ctx context.Context, msg *Message) error + + //Install mount an handler based on incoming message name. + Install(messageName string) + + //Start starts the transporter and returns immediately. The error chan + //is never nil. + Start() <-chan error + + //Stop kills the transporter. + Stop(graceful bool) error + + //UPID returns the PID for transporter. + UPID() *upid.UPID +} diff --git a/Godeps/_workspace/src/github.com/mesos/mesos-go/scheduler/doc.go b/Godeps/_workspace/src/github.com/mesos/mesos-go/scheduler/doc.go new file mode 100644 index 00000000000..94cfbacd6af --- /dev/null +++ b/Godeps/_workspace/src/github.com/mesos/mesos-go/scheduler/doc.go @@ -0,0 +1,6 @@ +/* +Package scheduler includes the interfaces for the mesos scheduler and +the mesos executor driver. It also contains as well as an implementation +of the driver that you can use in your code. +*/ +package scheduler diff --git a/Godeps/_workspace/src/github.com/mesos/mesos-go/scheduler/handler.go b/Godeps/_workspace/src/github.com/mesos/mesos-go/scheduler/handler.go new file mode 100644 index 00000000000..fc7fe6ab8ad --- /dev/null +++ b/Godeps/_workspace/src/github.com/mesos/mesos-go/scheduler/handler.go @@ -0,0 +1,29 @@ +package scheduler + +import ( + "github.com/mesos/mesos-go/auth/callback" + mesos "github.com/mesos/mesos-go/mesosproto" + "github.com/mesos/mesos-go/upid" +) + +type CredentialHandler struct { + pid *upid.UPID // the process to authenticate against (master) + client *upid.UPID // the process to be authenticated (slave / framework) + credential *mesos.Credential +} + +func (h *CredentialHandler) Handle(callbacks ...callback.Interface) error { + for _, cb := range callbacks { + switch cb := cb.(type) { + case *callback.Name: + cb.Set(h.credential.GetPrincipal()) + case *callback.Password: + cb.Set(h.credential.GetSecret()) + case *callback.Interprocess: + cb.Set(*(h.pid), *(h.client)) + default: + return &callback.Unsupported{Callback: cb} + } + } + return nil +} diff --git a/Godeps/_workspace/src/github.com/mesos/mesos-go/scheduler/mock_scheduler.go b/Godeps/_workspace/src/github.com/mesos/mesos-go/scheduler/mock_scheduler.go new file mode 100644 index 00000000000..9cfe54d343f --- /dev/null +++ b/Godeps/_workspace/src/github.com/mesos/mesos-go/scheduler/mock_scheduler.go @@ -0,0 +1,56 @@ +package scheduler + +import ( + log "github.com/golang/glog" + mesos "github.com/mesos/mesos-go/mesosproto" + "github.com/stretchr/testify/mock" +) + +type MockScheduler struct { + mock.Mock +} + +func NewMockScheduler() *MockScheduler { + return &MockScheduler{} +} + +func (sched *MockScheduler) Registered(SchedulerDriver, *mesos.FrameworkID, *mesos.MasterInfo) { + sched.Called() +} + +func (sched *MockScheduler) Reregistered(SchedulerDriver, *mesos.MasterInfo) { + sched.Called() +} + +func (sched *MockScheduler) Disconnected(SchedulerDriver) { + sched.Called() +} + +func (sched *MockScheduler) ResourceOffers(SchedulerDriver, []*mesos.Offer) { + sched.Called() +} + +func (sched *MockScheduler) OfferRescinded(SchedulerDriver, *mesos.OfferID) { + sched.Called() +} + +func (sched *MockScheduler) StatusUpdate(SchedulerDriver, *mesos.TaskStatus) { + sched.Called() +} + +func (sched *MockScheduler) FrameworkMessage(SchedulerDriver, *mesos.ExecutorID, *mesos.SlaveID, string) { + sched.Called() +} + +func (sched *MockScheduler) SlaveLost(SchedulerDriver, *mesos.SlaveID) { + sched.Called() +} + +func (sched *MockScheduler) ExecutorLost(SchedulerDriver, *mesos.ExecutorID, *mesos.SlaveID, int) { + sched.Called() +} + +func (sched *MockScheduler) Error(d SchedulerDriver, msg string) { + log.Error(msg) + sched.Called() +} diff --git a/Godeps/_workspace/src/github.com/mesos/mesos-go/scheduler/plugins.go b/Godeps/_workspace/src/github.com/mesos/mesos-go/scheduler/plugins.go new file mode 100644 index 00000000000..0054bbdd977 --- /dev/null +++ b/Godeps/_workspace/src/github.com/mesos/mesos-go/scheduler/plugins.go @@ -0,0 +1,7 @@ +package scheduler + +import ( + _ "github.com/mesos/mesos-go/auth/sasl" + _ "github.com/mesos/mesos-go/auth/sasl/mech/crammd5" + _ "github.com/mesos/mesos-go/detector/zoo" +) diff --git a/Godeps/_workspace/src/github.com/mesos/mesos-go/scheduler/schedcache.go b/Godeps/_workspace/src/github.com/mesos/mesos-go/scheduler/schedcache.go new file mode 100644 index 00000000000..5644623223b --- /dev/null +++ b/Godeps/_workspace/src/github.com/mesos/mesos-go/scheduler/schedcache.go @@ -0,0 +1,96 @@ +package scheduler + +import ( + log "github.com/golang/glog" + mesos "github.com/mesos/mesos-go/mesosproto" + "github.com/mesos/mesos-go/upid" + "sync" +) + +type cachedOffer struct { + offer *mesos.Offer + slavePid *upid.UPID +} + +func newCachedOffer(offer *mesos.Offer, slavePid *upid.UPID) *cachedOffer { + return &cachedOffer{offer: offer, slavePid: slavePid} +} + +// schedCache a managed cache with backing maps to store offeres +// and tasked slaves. +type schedCache struct { + lock sync.RWMutex + savedOffers map[string]*cachedOffer // current offers key:OfferID + savedSlavePids map[string]*upid.UPID // Current saved slaves, key:slaveId +} + +func newSchedCache() *schedCache { + return &schedCache{ + savedOffers: make(map[string]*cachedOffer), + savedSlavePids: make(map[string]*upid.UPID), + } +} + +// putOffer stores an offer and the slavePID associated with offer. +func (cache *schedCache) putOffer(offer *mesos.Offer, pid *upid.UPID) { + if offer == nil || pid == nil { + log.V(3).Infoln("WARN: Offer not cached. The offer or pid cannot be nil") + return + } + log.V(3).Infoln("Caching offer ", offer.Id.GetValue(), " with slavePID ", pid.String()) + cache.lock.Lock() + cache.savedOffers[offer.Id.GetValue()] = &cachedOffer{offer: offer, slavePid: pid} + cache.lock.Unlock() +} + +// getOffer returns cached offer +func (cache *schedCache) getOffer(offerId *mesos.OfferID) *cachedOffer { + if offerId == nil { + log.V(3).Infoln("WARN: OfferId == nil, returning nil") + return nil + } + cache.lock.RLock() + defer cache.lock.RUnlock() + return cache.savedOffers[offerId.GetValue()] +} + +// containsOff test cache for offer(offerId) +func (cache *schedCache) containsOffer(offerId *mesos.OfferID) bool { + cache.lock.RLock() + defer cache.lock.RUnlock() + _, ok := cache.savedOffers[offerId.GetValue()] + return ok +} + +func (cache *schedCache) removeOffer(offerId *mesos.OfferID) { + cache.lock.Lock() + delete(cache.savedOffers, offerId.GetValue()) + cache.lock.Unlock() +} + +func (cache *schedCache) putSlavePid(slaveId *mesos.SlaveID, pid *upid.UPID) { + cache.lock.Lock() + cache.savedSlavePids[slaveId.GetValue()] = pid + cache.lock.Unlock() +} + +func (cache *schedCache) getSlavePid(slaveId *mesos.SlaveID) *upid.UPID { + if slaveId == nil { + log.V(3).Infoln("SlaveId == nil, returning empty UPID") + return nil + } + return cache.savedSlavePids[slaveId.GetValue()] +} + +func (cache *schedCache) containsSlavePid(slaveId *mesos.SlaveID) bool { + cache.lock.RLock() + defer cache.lock.RUnlock() + _, ok := cache.savedSlavePids[slaveId.GetValue()] + return ok +} + +func (cache *schedCache) removeSlavePid(slaveId *mesos.SlaveID) { + cache.lock.Lock() + delete(cache.savedSlavePids, slaveId.GetValue()) + cache.lock.Unlock() +} diff --git a/Godeps/_workspace/src/github.com/mesos/mesos-go/scheduler/schedcache_test.go b/Godeps/_workspace/src/github.com/mesos/mesos-go/scheduler/schedcache_test.go new file mode 100644 index 00000000000..4a3a46e5c4b --- /dev/null +++ b/Godeps/_workspace/src/github.com/mesos/mesos-go/scheduler/schedcache_test.go @@ -0,0 +1,215 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package scheduler + +import ( + mesos "github.com/mesos/mesos-go/mesosproto" + util "github.com/mesos/mesos-go/mesosutil" + "github.com/stretchr/testify/assert" + "testing" + + "github.com/mesos/mesos-go/upid" +) + +func TestSchedCacheNew(t *testing.T) { + cache := newSchedCache() + assert.NotNil(t, cache) + assert.NotNil(t, cache.savedOffers) + assert.NotNil(t, cache.savedSlavePids) +} + +func TestSchedCachePutOffer(t *testing.T) { + cache := newSchedCache() + + offer01 := createTestOffer("01") + pid01, err := upid.Parse("slave01@127.0.0.1:5050") + assert.NoError(t, err) + cache.putOffer(offer01, pid01) + + offer02 := createTestOffer("02") + pid02, err := upid.Parse("slave02@127.0.0.1:5050") + assert.NoError(t, err) + cache.putOffer(offer02, pid02) + + assert.Equal(t, len(cache.savedOffers), 2) + cachedOffer1, ok := cache.savedOffers["test-offer-01"] + assert.True(t, ok) + cachedOffer2, ok := cache.savedOffers["test-offer-02"] + assert.True(t, ok) + + assert.NotNil(t, cachedOffer1.offer) + assert.Equal(t, "test-offer-01", cachedOffer1.offer.Id.GetValue()) + assert.NotNil(t, cachedOffer2.offer) + assert.Equal(t, "test-offer-02", cachedOffer2.offer.Id.GetValue()) + + assert.NotNil(t, cachedOffer1.slavePid) + assert.Equal(t, "slave01@127.0.0.1:5050", cachedOffer1.slavePid.String()) + assert.NotNil(t, cachedOffer2.slavePid) + assert.Equal(t, "slave02@127.0.0.1:5050", cachedOffer2.slavePid.String()) + +} + +func TestSchedCacheGetOffer(t *testing.T) { + cache := newSchedCache() + offer01 := createTestOffer("01") + pid01, err := upid.Parse("slave01@127.0.0.1:5050") + assert.NoError(t, err) + offer02 := createTestOffer("02") + pid02, err := upid.Parse("slave02@127.0.0.1:5050") + assert.NoError(t, err) + + cache.putOffer(offer01, pid01) + cache.putOffer(offer02, pid02) + + cachedOffer01 := cache.getOffer(util.NewOfferID("test-offer-01")).offer + cachedOffer02 := cache.getOffer(util.NewOfferID("test-offer-02")).offer + assert.NotEqual(t, offer01, cachedOffer02) + assert.Equal(t, offer01, cachedOffer01) + assert.Equal(t, offer02, cachedOffer02) + +} + +func TestSchedCacheContainsOffer(t *testing.T) { + cache := newSchedCache() + offer01 := createTestOffer("01") + pid01, err := upid.Parse("slave01@127.0.0.1:5050") + assert.NoError(t, err) + offer02 := createTestOffer("02") + pid02, err := upid.Parse("slave02@127.0.0.1:5050") + assert.NoError(t, err) + + cache.putOffer(offer01, pid01) + cache.putOffer(offer02, pid02) + + assert.True(t, cache.containsOffer(util.NewOfferID("test-offer-01"))) + assert.True(t, cache.containsOffer(util.NewOfferID("test-offer-02"))) + assert.False(t, cache.containsOffer(util.NewOfferID("test-offer-05"))) +} + +func TestSchedCacheRemoveOffer(t *testing.T) { + cache := newSchedCache() + offer01 := createTestOffer("01") + pid01, err := upid.Parse("slave01@127.0.0.1:5050") + assert.NoError(t, err) + offer02 := createTestOffer("02") + pid02, err := upid.Parse("slave02@127.0.0.1:5050") + assert.NoError(t, err) + + cache.putOffer(offer01, pid01) + cache.putOffer(offer02, pid02) + cache.removeOffer(util.NewOfferID("test-offer-01")) + + assert.Equal(t, 1, len(cache.savedOffers)) + assert.True(t, cache.containsOffer(util.NewOfferID("test-offer-02"))) + assert.False(t, cache.containsOffer(util.NewOfferID("test-offer-01"))) +} + +func TestSchedCachePutSlavePid(t *testing.T) { + cache := newSchedCache() + + pid01, err := upid.Parse("slave01@127.0.0.1:5050") + assert.NoError(t, err) + pid02, err := upid.Parse("slave02@127.0.0.1:5050") + assert.NoError(t, err) + pid03, err := upid.Parse("slave03@127.0.0.1:5050") + assert.NoError(t, err) + + cache.putSlavePid(util.NewSlaveID("slave01"), pid01) + cache.putSlavePid(util.NewSlaveID("slave02"), pid02) + cache.putSlavePid(util.NewSlaveID("slave03"), pid03) + + assert.Equal(t, len(cache.savedSlavePids), 3) + cachedSlavePid1, ok := cache.savedSlavePids["slave01"] + assert.True(t, ok) + cachedSlavePid2, ok := cache.savedSlavePids["slave02"] + assert.True(t, ok) + cachedSlavePid3, ok := cache.savedSlavePids["slave03"] + assert.True(t, ok) + + assert.True(t, cachedSlavePid1.Equal(pid01)) + assert.True(t, cachedSlavePid2.Equal(pid02)) + assert.True(t, cachedSlavePid3.Equal(pid03)) +} + +func TestSchedCacheGetSlavePid(t *testing.T) { + cache := newSchedCache() + + pid01, err := upid.Parse("slave01@127.0.0.1:5050") + assert.NoError(t, err) + pid02, err := upid.Parse("slave02@127.0.0.1:5050") + assert.NoError(t, err) + + cache.putSlavePid(util.NewSlaveID("slave01"), pid01) + cache.putSlavePid(util.NewSlaveID("slave02"), pid02) + + cachedSlavePid1 := cache.getSlavePid(util.NewSlaveID("slave01")) + cachedSlavePid2 := cache.getSlavePid(util.NewSlaveID("slave02")) + + assert.NotNil(t, cachedSlavePid1) + assert.NotNil(t, cachedSlavePid2) + assert.True(t, pid01.Equal(cachedSlavePid1)) + assert.True(t, pid02.Equal(cachedSlavePid2)) + assert.False(t, pid01.Equal(cachedSlavePid2)) +} + +func TestSchedCacheContainsSlavePid(t *testing.T) { + cache := newSchedCache() + + pid01, err := upid.Parse("slave01@127.0.0.1:5050") + assert.NoError(t, err) + pid02, err := upid.Parse("slave02@127.0.0.1:5050") + assert.NoError(t, err) + + cache.putSlavePid(util.NewSlaveID("slave01"), pid01) + cache.putSlavePid(util.NewSlaveID("slave02"), pid02) + + assert.True(t, cache.containsSlavePid(util.NewSlaveID("slave01"))) + assert.True(t, cache.containsSlavePid(util.NewSlaveID("slave02"))) + assert.False(t, cache.containsSlavePid(util.NewSlaveID("slave05"))) +} + +func TestSchedCacheRemoveSlavePid(t *testing.T) { + cache := newSchedCache() + + pid01, err := upid.Parse("slave01@127.0.0.1:5050") + assert.NoError(t, err) + pid02, err := upid.Parse("slave02@127.0.0.1:5050") + assert.NoError(t, err) + + cache.putSlavePid(util.NewSlaveID("slave01"), pid01) + cache.putSlavePid(util.NewSlaveID("slave02"), pid02) + + assert.True(t, cache.containsSlavePid(util.NewSlaveID("slave01"))) + assert.True(t, cache.containsSlavePid(util.NewSlaveID("slave02"))) + assert.False(t, cache.containsSlavePid(util.NewSlaveID("slave05"))) + + cache.removeSlavePid(util.NewSlaveID("slave01")) + assert.Equal(t, 1, len(cache.savedSlavePids)) + assert.False(t, cache.containsSlavePid(util.NewSlaveID("slave01"))) + +} + +func createTestOffer(idSuffix string) *mesos.Offer { + return util.NewOffer( + util.NewOfferID("test-offer-"+idSuffix), + util.NewFrameworkID("test-framework-"+idSuffix), + util.NewSlaveID("test-slave-"+idSuffix), + "localhost."+idSuffix, + ) +} diff --git a/Godeps/_workspace/src/github.com/mesos/mesos-go/scheduler/schedtype.go b/Godeps/_workspace/src/github.com/mesos/mesos-go/scheduler/schedtype.go new file mode 100644 index 00000000000..b7634efa9e1 --- /dev/null +++ b/Godeps/_workspace/src/github.com/mesos/mesos-go/scheduler/schedtype.go @@ -0,0 +1,191 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package scheduler + +import ( + mesos "github.com/mesos/mesos-go/mesosproto" +) + +// Interface for connecting a scheduler to Mesos. This +// interface is used both to manage the scheduler's lifecycle (start +// it, stop it, or wait for it to finish) and to interact with Mesos +// (e.g., launch tasks, kill tasks, etc.). +// See the MesosSchedulerDriver type for a concrete +// impl of a SchedulerDriver. +type SchedulerDriver interface { + // Starts the scheduler driver. This needs to be called before any + // other driver calls are made. + Start() (mesos.Status, error) + + // Stops the scheduler driver. If the 'failover' flag is set to + // false then it is expected that this framework will never + // reconnect to Mesos and all of its executors and tasks can be + // terminated. Otherwise, all executors and tasks will remain + // running (for some framework specific failover timeout) allowing the + // scheduler to reconnect (possibly in the same process, or from a + // different process, for example, on a different machine). + Stop(failover bool) (mesos.Status, error) + + // Aborts the driver so that no more callbacks can be made to the + // scheduler. The semantics of abort and stop have deliberately been + // separated so that code can detect an aborted driver (i.e., via + // the return status of SchedulerDriver::join, see below), and + // instantiate and start another driver if desired (from within the + // same process). Note that 'Stop()' is not automatically called + // inside 'Abort()'. + Abort() (mesos.Status, error) + + // Waits for the driver to be stopped or aborted, possibly + // _blocking_ the current thread indefinitely. The return status of + // this function can be used to determine if the driver was aborted + // (see mesos.proto for a description of Status). + Join() (mesos.Status, error) + + // Starts and immediately joins (i.e., blocks on) the driver. + Run() (mesos.Status, error) + + // Requests resources from Mesos (see mesos.proto for a description + // of Request and how, for example, to request resources + // from specific slaves). Any resources available are offered to the + // framework via Scheduler.ResourceOffers callback, asynchronously. + RequestResources(requests []*mesos.Request) (mesos.Status, error) + + // Launches the given set of tasks. Any resources remaining (i.e., + // not used by the tasks or their executors) will be considered + // declined. The specified filters are applied on all unused + // resources (see mesos.proto for a description of Filters). + // Available resources are aggregated when mutiple offers are + // provided. Note that all offers must belong to the same slave. + // Invoking this function with an empty collection of tasks declines + // offers in their entirety (see Scheduler::declineOffer). + LaunchTasks(offerIDs []*mesos.OfferID, tasks []*mesos.TaskInfo, filters *mesos.Filters) (mesos.Status, error) + + // Kills the specified task. Note that attempting to kill a task is + // currently not reliable. If, for example, a scheduler fails over + // while it was attempting to kill a task it will need to retry in + // the future. Likewise, if unregistered / disconnected, the request + // will be dropped (these semantics may be changed in the future). + KillTask(taskID *mesos.TaskID) (mesos.Status, error) + + // Declines an offer in its entirety and applies the specified + // filters on the resources (see mesos.proto for a description of + // Filters). Note that this can be done at any time, it is not + // necessary to do this within the Scheduler::resourceOffers + // callback. + DeclineOffer(offerID *mesos.OfferID, filters *mesos.Filters) (mesos.Status, error) + + // Removes all filters previously set by the framework (via + // LaunchTasks()). This enables the framework to receive offers from + // those filtered slaves. + ReviveOffers() (mesos.Status, error) + + // Sends a message from the framework to one of its executors. These + // messages are best effort; do not expect a framework message to be + // retransmitted in any reliable fashion. + SendFrameworkMessage(executorID *mesos.ExecutorID, slaveID *mesos.SlaveID, data string) (mesos.Status, error) + + // Allows the framework to query the status for non-terminal tasks. + // This causes the master to send back the latest task status for + // each task in 'statuses', if possible. Tasks that are no longer + // known will result in a TASK_LOST update. If statuses is empty, + // then the master will send the latest status for each task + // currently known. + ReconcileTasks(statuses []*mesos.TaskStatus) (mesos.Status, error) +} + +// Scheduler a type with callback attributes to be provided by frameworks +// schedulers. +// +// Each callback includes a reference to the scheduler driver that was +// used to run this scheduler. The pointer will not change for the +// duration of a scheduler (i.e., from the point you do +// SchedulerDriver.Start() to the point that SchedulerDriver.Stop() +// returns). This is intended for convenience so that a scheduler +// doesn't need to store a reference to the driver itself. +type Scheduler interface { + + // Invoked when the scheduler successfully registers with a Mesos + // master. A unique ID (generated by the master) used for + // distinguishing this framework from others and MasterInfo + // with the ip and port of the current master are provided as arguments. + Registered(SchedulerDriver, *mesos.FrameworkID, *mesos.MasterInfo) + + // Invoked when the scheduler re-registers with a newly elected Mesos master. + // This is only called when the scheduler has previously been registered. + // MasterInfo containing the updated information about the elected master + // is provided as an argument. + Reregistered(SchedulerDriver, *mesos.MasterInfo) + + // Invoked when the scheduler becomes "disconnected" from the master + // (e.g., the master fails and another is taking over). + Disconnected(SchedulerDriver) + + // Invoked when resources have been offered to this framework. A + // single offer will only contain resources from a single slave. + // Resources associated with an offer will not be re-offered to + // _this_ framework until either (a) this framework has rejected + // those resources (see SchedulerDriver::launchTasks) or (b) those + // resources have been rescinded (see Scheduler::offerRescinded). + // Note that resources may be concurrently offered to more than one + // framework at a time (depending on the allocator being used). In + // that case, the first framework to launch tasks using those + // resources will be able to use them while the other frameworks + // will have those resources rescinded (or if a framework has + // already launched tasks with those resources then those tasks will + // fail with a TASK_LOST status and a message saying as much). + ResourceOffers(SchedulerDriver, []*mesos.Offer) + + // Invoked when an offer is no longer valid (e.g., the slave was + // lost or another framework used resources in the offer). If for + // whatever reason an offer is never rescinded (e.g., dropped + // message, failing over framework, etc.), a framwork that attempts + // to launch tasks using an invalid offer will receive TASK_LOST + // status updates for those tasks (see Scheduler::resourceOffers). + OfferRescinded(SchedulerDriver, *mesos.OfferID) + + // Invoked when the status of a task has changed (e.g., a slave is + // lost and so the task is lost, a task finishes and an executor + // sends a status update saying so, etc). Note that returning from + // this callback _acknowledges_ receipt of this status update! If + // for whatever reason the scheduler aborts during this callback (or + // the process exits) another status update will be delivered (note, + // however, that this is currently not true if the slave sending the + // status update is lost/fails during that time). + StatusUpdate(SchedulerDriver, *mesos.TaskStatus) + + // Invoked when an executor sends a message. These messages are best + // effort; do not expect a framework message to be retransmitted in + // any reliable fashion. + FrameworkMessage(SchedulerDriver, *mesos.ExecutorID, *mesos.SlaveID, string) + + // Invoked when a slave has been determined unreachable (e.g., + // machine failure, network partition). Most frameworks will need to + // reschedule any tasks launched on this slave on a new slave. + SlaveLost(SchedulerDriver, *mesos.SlaveID) + + // Invoked when an executor has exited/terminated. Note that any + // tasks running will have TASK_LOST status updates automagically + // generated. + ExecutorLost(SchedulerDriver, *mesos.ExecutorID, *mesos.SlaveID, int) + + // Invoked when there is an unrecoverable error in the scheduler or + // scheduler driver. The driver will be aborted BEFORE invoking this + // callback. + Error(SchedulerDriver, string) +} diff --git a/Godeps/_workspace/src/github.com/mesos/mesos-go/scheduler/scheduler.go b/Godeps/_workspace/src/github.com/mesos/mesos-go/scheduler/scheduler.go new file mode 100644 index 00000000000..439e9977f87 --- /dev/null +++ b/Godeps/_workspace/src/github.com/mesos/mesos-go/scheduler/scheduler.go @@ -0,0 +1,1105 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package scheduler + +import ( + "errors" + "fmt" + "math" + "math/rand" + "net" + "os/user" + "sync" + "time" + + "code.google.com/p/go-uuid/uuid" + "github.com/gogo/protobuf/proto" + log "github.com/golang/glog" + "github.com/mesos/mesos-go/auth" + "github.com/mesos/mesos-go/detector" + mesos "github.com/mesos/mesos-go/mesosproto" + util "github.com/mesos/mesos-go/mesosutil" + "github.com/mesos/mesos-go/mesosutil/process" + "github.com/mesos/mesos-go/messenger" + "github.com/mesos/mesos-go/upid" + "golang.org/x/net/context" +) + +const ( + authTimeout = 5 * time.Second // timeout interval for an authentication attempt + registrationRetryIntervalMax = float64(1 * time.Minute) + registrationBackoffFactor = 2 * time.Second +) + +var ( + authenticationCanceledError = errors.New("authentication canceled") +) + +// helper to track authentication progress and to prevent multiple close() ops +// against a signalling chan. it's safe to invoke the func's of this struct +// even if the receiver pointer is nil. +type authenticationAttempt struct { + done chan struct{} + doneOnce sync.Once +} + +func (a *authenticationAttempt) cancel() { + if a != nil { + a.doneOnce.Do(func() { close(a.done) }) + } +} + +func (a *authenticationAttempt) inProgress() bool { + if a != nil { + select { + case <-a.done: + return false + default: + return true + } + } + return false +} + +type DriverConfig struct { + Scheduler Scheduler + Framework *mesos.FrameworkInfo + Master string + Credential *mesos.Credential // optional + WithAuthContext func(context.Context) context.Context // required when Credential != nil + HostnameOverride string // optional + BindingAddress net.IP // optional + BindingPort uint16 // optional + NewMessenger func() (messenger.Messenger, error) // optional +} + +// Concrete implementation of a SchedulerDriver that connects a +// Scheduler with a Mesos master. The MesosSchedulerDriver is +// thread-safe. +// +// Note that scheduler failover is supported in Mesos. After a +// scheduler is registered with Mesos it may failover (to a new +// process on the same machine or across multiple machines) by +// creating a new driver with the ID given to it in +// Scheduler.Registered(). +// +// The driver is responsible for invoking the Scheduler callbacks as +// it communicates with the Mesos master. +// +// Note that blocking on the MesosSchedulerDriver (e.g., via +// MesosSchedulerDriver.Join) doesn't affect the scheduler callbacks +// in anyway because they are handled by a different thread. +// +// TODO(yifan): examples. +// See src/examples/test_framework.cpp for an example of using the +// MesosSchedulerDriver. +type MesosSchedulerDriver struct { + Scheduler Scheduler + MasterPid *upid.UPID + FrameworkInfo *mesos.FrameworkInfo + + lock sync.RWMutex + self *upid.UPID + stopCh chan struct{} + stopped bool + status mesos.Status + messenger messenger.Messenger + masterDetector detector.Master + connected bool + connection uuid.UUID + failoverTimeout float64 + failover bool + cache *schedCache + updates map[string]*mesos.StatusUpdate // Key is a UUID string. + tasks map[string]*mesos.TaskInfo // Key is a UUID string. + credential *mesos.Credential + authenticated bool + authenticating *authenticationAttempt + reauthenticate bool + withAuthContext func(context.Context) context.Context +} + +// Create a new mesos scheduler driver with the given +// scheduler, framework info, +// master address, and credential(optional) +func NewMesosSchedulerDriver(config DriverConfig) (initializedDriver *MesosSchedulerDriver, err error) { + if config.Scheduler == nil { + err = fmt.Errorf("Scheduler callbacks required.") + } else if config.Master == "" { + err = fmt.Errorf("Missing master location URL.") + } else if config.Framework == nil { + err = fmt.Errorf("FrameworkInfo must be provided.") + } else if config.Credential != nil && config.WithAuthContext == nil { + err = fmt.Errorf("WithAuthContext must be provided when Credential != nil") + } + if err != nil { + return + } + + framework := cloneFrameworkInfo(config.Framework) + + // set default userid + if framework.GetUser() == "" { + user, err := user.Current() + if err != nil || user == nil { + if err != nil { + log.Warningf("Failed to obtain username: %v\n", err) + } else { + log.Warningln("Failed to obtain username.") + } + framework.User = proto.String("") + } else { + framework.User = proto.String(user.Username) + } + } + + // default hostname + hostname := util.GetHostname(config.HostnameOverride) + if framework.GetHostname() == "" { + framework.Hostname = proto.String(hostname) + } + + driver := &MesosSchedulerDriver{ + Scheduler: config.Scheduler, + FrameworkInfo: framework, + stopCh: make(chan struct{}), + status: mesos.Status_DRIVER_NOT_STARTED, + stopped: true, + cache: newSchedCache(), + credential: config.Credential, + failover: framework.Id != nil && len(framework.Id.GetValue()) > 0, + withAuthContext: config.WithAuthContext, + } + + if framework.FailoverTimeout != nil && *framework.FailoverTimeout > 0 { + driver.failoverTimeout = *framework.FailoverTimeout * float64(time.Second) + log.V(1).Infof("found failover_timeout = %v", time.Duration(driver.failoverTimeout)) + } + + newMessenger := config.NewMessenger + if newMessenger == nil { + newMessenger = func() (messenger.Messenger, error) { + process := process.New("scheduler") + return messenger.ForHostname(process, hostname, config.BindingAddress, config.BindingPort) + } + } + + // initialize new detector. + if driver.masterDetector, err = detector.New(config.Master); err != nil { + return + } else if driver.messenger, err = newMessenger(); err != nil { + return + } else if err = driver.init(); err != nil { + return + } else { + initializedDriver = driver + } + return +} + +func cloneFrameworkInfo(framework *mesos.FrameworkInfo) *mesos.FrameworkInfo { + if framework == nil { + return nil + } + + clonedInfo := *framework + if clonedInfo.Id != nil { + clonedId := *clonedInfo.Id + clonedInfo.Id = &clonedId + if framework.FailoverTimeout != nil { + clonedInfo.FailoverTimeout = proto.Float64(*framework.FailoverTimeout) + } + if framework.Checkpoint != nil { + clonedInfo.Checkpoint = proto.Bool(*framework.Checkpoint) + } + } + return &clonedInfo +} + +// init initializes the driver. +func (driver *MesosSchedulerDriver) init() error { + log.Infof("Initializing mesos scheduler driver\n") + + // Install handlers. + driver.messenger.Install(driver.frameworkRegistered, &mesos.FrameworkRegisteredMessage{}) + driver.messenger.Install(driver.frameworkReregistered, &mesos.FrameworkReregisteredMessage{}) + driver.messenger.Install(driver.resourcesOffered, &mesos.ResourceOffersMessage{}) + driver.messenger.Install(driver.resourceOfferRescinded, &mesos.RescindResourceOfferMessage{}) + driver.messenger.Install(driver.statusUpdated, &mesos.StatusUpdateMessage{}) + driver.messenger.Install(driver.slaveLost, &mesos.LostSlaveMessage{}) + driver.messenger.Install(driver.frameworkMessageRcvd, &mesos.ExecutorToFrameworkMessage{}) + driver.messenger.Install(driver.frameworkErrorRcvd, &mesos.FrameworkErrorMessage{}) + driver.messenger.Install(driver.handleMasterChanged, &mesos.InternalMasterChangeDetected{}) + driver.messenger.Install(driver.handleAuthenticationResult, &mesos.InternalAuthenticationResult{}) + return nil +} + +// lead master detection callback. +func (driver *MesosSchedulerDriver) handleMasterChanged(from *upid.UPID, pbMsg proto.Message) { + if driver.Status() == mesos.Status_DRIVER_ABORTED { + log.Info("Ignoring master change because the driver is aborted.") + return + } else if !from.Equal(driver.self) { + log.Errorf("ignoring master changed message received from upid '%v'", from) + return + } + + // Reconnect every time a master is dected. + if driver.Connected() { + log.V(3).Info("Disconnecting scheduler.") + driver.MasterPid = nil + driver.Scheduler.Disconnected(driver) + } + + msg := pbMsg.(*mesos.InternalMasterChangeDetected) + master := msg.Master + + driver.setConnected(false) + driver.authenticated = false + + if master != nil { + log.Infof("New master %s detected\n", master.GetPid()) + + pid, err := upid.Parse(master.GetPid()) + if err != nil { + panic("Unable to parse Master's PID value.") // this should not happen. + } + + driver.MasterPid = pid // save for downstream ops. + driver.tryAuthentication() + } else { + log.Infoln("No master detected.") + } +} + +func (driver *MesosSchedulerDriver) tryAuthentication() { + if driver.authenticated { + // programming error + panic("already authenticated") + } + + masterPid := driver.MasterPid // save for referencing later in goroutine + if masterPid == nil { + log.Info("skipping authentication attempt because we lost the master") + return + } + + if driver.authenticating.inProgress() { + // authentication is in progress, try to cancel it (we may too late already) + driver.authenticating.cancel() + driver.reauthenticate = true + return + } + + if driver.credential != nil { + // authentication can block and we don't want to hold up the messenger loop + authenticating := &authenticationAttempt{done: make(chan struct{})} + go func() { + defer authenticating.cancel() + result := &mesos.InternalAuthenticationResult{ + //TODO(jdef): is this really needed? + Success: proto.Bool(false), + Completed: proto.Bool(false), + Pid: proto.String(masterPid.String()), + } + // don't reference driver.authenticating here since it may have changed + if err := driver.authenticate(masterPid, authenticating); err != nil { + log.Errorf("Scheduler failed to authenticate: %v\n", err) + if err == auth.AuthenticationFailed { + result.Completed = proto.Bool(true) + } + } else { + result.Completed = proto.Bool(true) + result.Success = proto.Bool(true) + } + driver.messenger.Route(context.TODO(), driver.messenger.UPID(), result) + }() + driver.authenticating = authenticating + } else { + log.Infoln("No credentials were provided. " + + "Attempting to register scheduler without authentication.") + driver.authenticated = true + go driver.doReliableRegistration(float64(registrationBackoffFactor)) + } +} + +func (driver *MesosSchedulerDriver) handleAuthenticationResult(from *upid.UPID, pbMsg proto.Message) { + if driver.Status() != mesos.Status_DRIVER_RUNNING { + log.V(1).Info("ignoring authenticate because driver is not running") + return + } + if !from.Equal(driver.self) { + log.Errorf("ignoring authentication result message received from upid '%v'", from) + return + } + if driver.authenticated { + // programming error + panic("already authenticated") + } + if driver.MasterPid == nil { + log.Infoln("ignoring authentication result because master is lost") + driver.authenticating.cancel() // cancel any in-progress background attempt + + // disable future retries until we get a new master + driver.reauthenticate = false + return + } + msg := pbMsg.(*mesos.InternalAuthenticationResult) + if driver.reauthenticate || !msg.GetCompleted() || driver.MasterPid.String() != msg.GetPid() { + log.Infof("failed to authenticate with master %v: master changed", driver.MasterPid) + driver.authenticating.cancel() // cancel any in-progress background authentication + driver.reauthenticate = false + driver.tryAuthentication() + return + } + if !msg.GetSuccess() { + log.Errorf("master %v refused authentication", driver.MasterPid) + return + } + driver.authenticated = true + go driver.doReliableRegistration(float64(registrationBackoffFactor)) +} + +// ------------------------- Accessors ----------------------- // +func (driver *MesosSchedulerDriver) Status() mesos.Status { + driver.lock.RLock() + defer driver.lock.RUnlock() + return driver.status +} +func (driver *MesosSchedulerDriver) setStatus(stat mesos.Status) { + driver.lock.Lock() + driver.status = stat + driver.lock.Unlock() +} + +func (driver *MesosSchedulerDriver) Stopped() bool { + driver.lock.RLock() + defer driver.lock.RUnlock() + return driver.stopped +} + +func (driver *MesosSchedulerDriver) setStopped(val bool) { + driver.lock.Lock() + driver.stopped = val + driver.lock.Unlock() +} + +func (driver *MesosSchedulerDriver) Connected() bool { + driver.lock.RLock() + defer driver.lock.RUnlock() + return driver.connected +} + +func (driver *MesosSchedulerDriver) setConnected(val bool) { + driver.lock.Lock() + driver.connected = val + if val { + driver.failover = false + } + driver.lock.Unlock() +} + +// ---------------------- Handlers for Events from Master --------------- // +func (driver *MesosSchedulerDriver) frameworkRegistered(from *upid.UPID, pbMsg proto.Message) { + log.V(2).Infoln("Handling scheduler driver framework registered event.") + + msg := pbMsg.(*mesos.FrameworkRegisteredMessage) + masterInfo := msg.GetMasterInfo() + masterPid := masterInfo.GetPid() + frameworkId := msg.GetFrameworkId() + + if driver.Status() == mesos.Status_DRIVER_ABORTED { + log.Infof("ignoring FrameworkRegisteredMessage from master %s, driver is aborted", masterPid) + return + } + + if driver.connected { + log.Infoln("ignoring FrameworkRegisteredMessage from master, driver is already connected", masterPid) + return + } + + if driver.stopped { + log.Infof("ignoring FrameworkRegisteredMessage from master %s, driver is stopped", masterPid) + return + } + if !driver.MasterPid.Equal(from) { + log.Warningf("ignoring framework registered message because it was sent from '%v' instead of leading master '%v'", from, driver.MasterPid) + return + } + + log.Infof("Framework registered with ID=%s\n", frameworkId.GetValue()) + driver.FrameworkInfo.Id = frameworkId // generated by master. + + driver.setConnected(true) + driver.connection = uuid.NewUUID() + driver.Scheduler.Registered(driver, frameworkId, masterInfo) +} + +func (driver *MesosSchedulerDriver) frameworkReregistered(from *upid.UPID, pbMsg proto.Message) { + log.V(1).Infoln("Handling Scheduler re-registered event.") + msg := pbMsg.(*mesos.FrameworkReregisteredMessage) + + if driver.Status() == mesos.Status_DRIVER_ABORTED { + log.Infoln("Ignoring FrameworkReregisteredMessage from master, driver is aborted!") + return + } + if driver.connected { + log.Infoln("Ignoring FrameworkReregisteredMessage from master,driver is already connected!") + return + } + if !driver.MasterPid.Equal(from) { + log.Warningf("ignoring framework re-registered message because it was sent from '%v' instead of leading master '%v'", from, driver.MasterPid) + return + } + + // TODO(vv) detect if message was from leading-master (sched.cpp) + log.Infof("Framework re-registered with ID [%s] ", msg.GetFrameworkId().GetValue()) + driver.setConnected(true) + driver.connection = uuid.NewUUID() + + driver.Scheduler.Reregistered(driver, msg.GetMasterInfo()) + +} + +func (driver *MesosSchedulerDriver) resourcesOffered(from *upid.UPID, pbMsg proto.Message) { + log.V(1).Infoln("Handling resource offers.") + + msg := pbMsg.(*mesos.ResourceOffersMessage) + if driver.Status() == mesos.Status_DRIVER_ABORTED { + log.Infoln("Ignoring ResourceOffersMessage, the driver is aborted!") + return + } + + if !driver.connected { + log.Infoln("Ignoring ResourceOffersMessage, the driver is not connected!") + return + } + + pidStrings := msg.GetPids() + if len(pidStrings) != len(msg.Offers) { + log.Errorln("Ignoring offers, Offer count does not match Slave PID count.") + return + } + + for i, offer := range msg.Offers { + if pid, err := upid.Parse(pidStrings[i]); err == nil { + driver.cache.putOffer(offer, pid) + log.V(1).Infof("Cached offer %s from SlavePID %s", offer.Id.GetValue(), pid) + } else { + log.Warningf("Failed to parse offer PID '%v': %v", pid, err) + } + } + + driver.Scheduler.ResourceOffers(driver, msg.Offers) +} + +func (driver *MesosSchedulerDriver) resourceOfferRescinded(from *upid.UPID, pbMsg proto.Message) { + log.V(1).Infoln("Handling resource offer rescinded.") + + msg := pbMsg.(*mesos.RescindResourceOfferMessage) + + if driver.Status() == mesos.Status_DRIVER_ABORTED { + log.Infoln("Ignoring RescindResourceOfferMessage, the driver is aborted!") + return + } + + if !driver.connected { + log.Infoln("Ignoring ResourceOffersMessage, the driver is not connected!") + return + } + + // TODO(vv) check for leading master (see sched.cpp) + + log.V(1).Infoln("Rescinding offer ", msg.OfferId.GetValue()) + driver.cache.removeOffer(msg.OfferId) + driver.Scheduler.OfferRescinded(driver, msg.OfferId) +} + +func (driver *MesosSchedulerDriver) send(upid *upid.UPID, msg proto.Message) error { + //TODO(jdef) should implement timeout here + ctx, cancel := context.WithCancel(context.TODO()) + defer cancel() + + c := make(chan error, 1) + go func() { c <- driver.messenger.Send(ctx, upid, msg) }() + + select { + case <-ctx.Done(): + <-c // wait for Send(...) + return ctx.Err() + case err := <-c: + return err + } +} + +func (driver *MesosSchedulerDriver) statusUpdated(from *upid.UPID, pbMsg proto.Message) { + msg := pbMsg.(*mesos.StatusUpdateMessage) + + if driver.Status() == mesos.Status_DRIVER_ABORTED { + log.V(1).Infoln("Ignoring StatusUpdate message, the driver is aborted!") + return + } + if !driver.connected { + log.V(1).Infoln("Ignoring StatusUpdate message, the driver is not connected!") + return + } + if !driver.MasterPid.Equal(from) { + log.Warningf("ignoring status message because it was sent from '%v' instead of leading master '%v'", from, driver.MasterPid) + return + } + + log.V(2).Infoln("Received status update from ", from.String(), " status source:", msg.GetPid()) + + driver.Scheduler.StatusUpdate(driver, msg.Update.GetStatus()) + + if driver.Status() == mesos.Status_DRIVER_ABORTED { + log.V(1).Infoln("Not sending StatusUpdate ACK, the driver is aborted!") + return + } + + // Send StatusUpdate Acknowledgement + // Only send ACK if udpate was not from this driver + if !from.Equal(driver.self) && msg.GetPid() != from.String() { + ackMsg := &mesos.StatusUpdateAcknowledgementMessage{ + SlaveId: msg.Update.SlaveId, + FrameworkId: driver.FrameworkInfo.Id, + TaskId: msg.Update.Status.TaskId, + Uuid: msg.Update.Uuid, + } + + log.V(2).Infoln("Sending status update ACK to ", from.String()) + if err := driver.send(driver.MasterPid, ackMsg); err != nil { + log.Errorf("Failed to send StatusUpdate ACK message: %v\n", err) + return + } + } else { + log.V(1).Infoln("Not sending ACK, update is not from slave:", from.String()) + } +} + +func (driver *MesosSchedulerDriver) slaveLost(from *upid.UPID, pbMsg proto.Message) { + log.V(1).Infoln("Handling LostSlave event.") + + msg := pbMsg.(*mesos.LostSlaveMessage) + + if driver.Status() == mesos.Status_DRIVER_ABORTED { + log.V(1).Infoln("Ignoring LostSlave message, the driver is aborted!") + return + } + + if !driver.connected { + log.V(1).Infoln("Ignoring LostSlave message, the driver is not connected!") + return + } + + // TODO(VV) - detect leading master (see sched.cpp) + + log.V(2).Infoln("Lost slave ", msg.SlaveId.GetValue()) + driver.cache.removeSlavePid(msg.SlaveId) + + driver.Scheduler.SlaveLost(driver, msg.SlaveId) +} + +func (driver *MesosSchedulerDriver) frameworkMessageRcvd(from *upid.UPID, pbMsg proto.Message) { + log.V(1).Infoln("Handling framework message event.") + + msg := pbMsg.(*mesos.ExecutorToFrameworkMessage) + + if driver.Status() == mesos.Status_DRIVER_ABORTED { + log.V(1).Infoln("Ignoring framwork message, the driver is aborted!") + return + } + + log.V(1).Infoln("Received Framwork Message ", msg.String()) + + driver.Scheduler.FrameworkMessage(driver, msg.ExecutorId, msg.SlaveId, string(msg.Data)) +} + +func (driver *MesosSchedulerDriver) frameworkErrorRcvd(from *upid.UPID, pbMsg proto.Message) { + log.V(1).Infoln("Handling framework error event.") + msg := pbMsg.(*mesos.FrameworkErrorMessage) + driver.error(msg.GetMessage(), true) +} + +// ---------------------- Interface Methods ---------------------- // + +// Starts the scheduler driver. +// Returns immediately if an error occurs within start sequence. +func (driver *MesosSchedulerDriver) Start() (mesos.Status, error) { + log.Infoln("Starting the scheduler driver...") + + if stat := driver.Status(); stat != mesos.Status_DRIVER_NOT_STARTED { + return stat, fmt.Errorf("Unable to Start, expecting driver status %s, but is %s:", mesos.Status_DRIVER_NOT_STARTED, stat) + } + + driver.setStopped(true) + driver.setStatus(mesos.Status_DRIVER_NOT_STARTED) + + // Start the messenger. + if err := driver.messenger.Start(); err != nil { + log.Errorf("Scheduler failed to start the messenger: %v\n", err) + return driver.Status(), err + } + + driver.self = driver.messenger.UPID() + driver.setStatus(mesos.Status_DRIVER_RUNNING) + driver.setStopped(false) + + log.Infof("Mesos scheduler driver started with PID=%v", driver.self) + + listener := detector.OnMasterChanged(func(m *mesos.MasterInfo) { + driver.messenger.Route(context.TODO(), driver.self, &mesos.InternalMasterChangeDetected{ + Master: m, + }) + }) + + // register with Detect() AFTER we have a self pid from the messenger, otherwise things get ugly + // because our internal messaging depends on it. detector callbacks are routed over the messenger + // bus, maintaining serial (concurrency-safe) callback execution. + log.V(1).Infof("starting master detector %T: %+v", driver.masterDetector, driver.masterDetector) + driver.masterDetector.Detect(listener) + + log.V(2).Infoln("master detector started") + return driver.Status(), nil +} + +// authenticate against the spec'd master pid using the configured authenticationProvider. +// the authentication process is canceled upon either cancelation of authenticating, or +// else because it timed out (authTimeout). +// +// TODO(jdef) perhaps at some point in the future this will get pushed down into +// the messenger layer (e.g. to use HTTP-based authentication). We'd probably still +// specify the callback.Handler here, along with the user-selected authentication +// provider. Perhaps in the form of some messenger.AuthenticationConfig. +// +func (driver *MesosSchedulerDriver) authenticate(pid *upid.UPID, authenticating *authenticationAttempt) error { + log.Infof("authenticating with master %v", pid) + ctx, cancel := context.WithTimeout(context.Background(), authTimeout) + handler := &CredentialHandler{ + pid: pid, + client: driver.self, + credential: driver.credential, + } + ctx = driver.withAuthContext(ctx) + ctx = auth.WithParentUPID(ctx, *driver.self) + + ch := make(chan error, 1) + go func() { ch <- auth.Login(ctx, handler) }() + select { + case <-ctx.Done(): + <-ch + return ctx.Err() + case <-authenticating.done: + cancel() + <-ch + return authenticationCanceledError + case e := <-ch: + cancel() + return e + } +} + +func (driver *MesosSchedulerDriver) doReliableRegistration(maxBackoff float64) { + for { + if !driver.registerOnce() { + return + } + maxBackoff = math.Min(maxBackoff, registrationRetryIntervalMax) + + // If failover timeout is present, bound the maximum backoff + // by 1/10th of the failover timeout. + if driver.failoverTimeout > 0 { + maxBackoff = math.Min(maxBackoff, driver.failoverTimeout/10.0) + } + + // Determine the delay for next attempt by picking a random + // duration between 0 and 'maxBackoff'. + delay := time.Duration(maxBackoff * rand.Float64()) + + log.V(1).Infof("will retry registration in %v if necessary", delay) + + select { + case <-driver.stopCh: + return + case <-time.After(delay): + maxBackoff *= 2 + } + } +} + +// return true if we should attempt another registration later +func (driver *MesosSchedulerDriver) registerOnce() bool { + + var ( + failover bool + pid *upid.UPID + ) + if func() bool { + driver.lock.RLock() + defer driver.lock.RUnlock() + + if driver.stopped || driver.connected || driver.MasterPid == nil || (driver.credential != nil && !driver.authenticated) { + log.V(1).Infof("skipping registration request: stopped=%v, connected=%v, authenticated=%v", + driver.stopped, driver.connected, driver.authenticated) + return false + } + failover = driver.failover + pid = driver.MasterPid + return true + }() { + // register framework + var message proto.Message + if driver.FrameworkInfo.Id != nil && len(driver.FrameworkInfo.Id.GetValue()) > 0 { + // not the first time, or failing over + log.V(1).Infof("Reregistering with master: %v", pid) + message = &mesos.ReregisterFrameworkMessage{ + Framework: driver.FrameworkInfo, + Failover: proto.Bool(failover), + } + } else { + log.V(1).Infof("Registering with master: %v", pid) + message = &mesos.RegisterFrameworkMessage{ + Framework: driver.FrameworkInfo, + } + } + if err := driver.send(pid, message); err != nil { + log.Errorf("failed to send RegisterFramework message: %v", err) + if _, err = driver.Stop(failover); err != nil { + log.Errorf("failed to stop scheduler driver: %v", err) + } + } + return true + } + return false +} + +//Join blocks until the driver is stopped. +//Should follow a call to Start() +func (driver *MesosSchedulerDriver) Join() (mesos.Status, error) { + if stat := driver.Status(); stat != mesos.Status_DRIVER_RUNNING { + return stat, fmt.Errorf("Unable to Join, expecting driver status %s, but is %s", mesos.Status_DRIVER_RUNNING, stat) + } + <-driver.stopCh // wait for stop signal + return driver.Status(), nil +} + +//Run starts and joins driver process and waits to be stopped or aborted. +func (driver *MesosSchedulerDriver) Run() (mesos.Status, error) { + stat, err := driver.Start() + + if err != nil { + return driver.Stop(false) + } + + if stat != mesos.Status_DRIVER_RUNNING { + return stat, fmt.Errorf("Unable to Run, expecting driver status %s, but is %s:", mesos.Status_DRIVER_RUNNING, driver.status) + } + + log.Infoln("Scheduler driver running. Waiting to be stopped.") + return driver.Join() +} + +//Stop stops the driver. +func (driver *MesosSchedulerDriver) Stop(failover bool) (mesos.Status, error) { + log.Infoln("Stopping the scheduler driver") + if stat := driver.Status(); stat != mesos.Status_DRIVER_RUNNING { + return stat, fmt.Errorf("Unable to Stop, expected driver status %s, but is %s", mesos.Status_DRIVER_RUNNING, stat) + } + + if driver.connected && failover { + // unregister the framework + message := &mesos.UnregisterFrameworkMessage{ + FrameworkId: driver.FrameworkInfo.Id, + } + if err := driver.send(driver.MasterPid, message); err != nil { + log.Errorf("Failed to send UnregisterFramework message while stopping driver: %v\n", err) + return driver.stop(mesos.Status_DRIVER_ABORTED) + } + } + + // stop messenger + return driver.stop(mesos.Status_DRIVER_STOPPED) +} + +func (driver *MesosSchedulerDriver) stop(stopStatus mesos.Status) (mesos.Status, error) { + // stop messenger + err := driver.messenger.Stop() + defer func() { + select { + case <-driver.stopCh: + // already closed + default: + close(driver.stopCh) + } + }() + + driver.setStatus(stopStatus) + driver.setStopped(true) + driver.connected = false + + if err != nil { + return stopStatus, err + } + + return stopStatus, nil +} + +func (driver *MesosSchedulerDriver) Abort() (stat mesos.Status, err error) { + defer driver.masterDetector.Cancel() + log.Infof("Aborting framework [%+v]", driver.FrameworkInfo.Id) + if driver.connected { + _, err = driver.Stop(true) + } else { + driver.messenger.Stop() + } + stat = mesos.Status_DRIVER_ABORTED + driver.setStatus(stat) + return +} + +func (driver *MesosSchedulerDriver) LaunchTasks(offerIds []*mesos.OfferID, tasks []*mesos.TaskInfo, filters *mesos.Filters) (mesos.Status, error) { + if stat := driver.Status(); stat != mesos.Status_DRIVER_RUNNING { + return stat, fmt.Errorf("Unable to LaunchTasks, expected driver status %s, but got %s", mesos.Status_DRIVER_RUNNING, stat) + } + + // Launch tasks + if !driver.connected { + log.Infoln("Ignoring LaunchTasks message, disconnected from master.") + // Send statusUpdate with status=TASK_LOST for each task. + // See sched.cpp L#823 + for _, task := range tasks { + driver.pushLostTask(task, "Master is disconnected") + } + return driver.Status(), fmt.Errorf("Not connected to master. Tasks marked as lost.") + } + + okTasks := make([]*mesos.TaskInfo, 0, len(tasks)) + + // Set TaskInfo.executor.framework_id, if it's missing. + for _, task := range tasks { + if task.Executor != nil && task.Executor.FrameworkId == nil { + task.Executor.FrameworkId = driver.FrameworkInfo.Id + } + okTasks = append(okTasks, task) + } + + for _, offerId := range offerIds { + for _, task := range okTasks { + // Keep only the slave PIDs where we run tasks so we can send + // framework messages directly. + if driver.cache.containsOffer(offerId) { + if driver.cache.getOffer(offerId).offer.SlaveId.Equal(task.SlaveId) { + // cache the tasked slave, for future communication + pid := driver.cache.getOffer(offerId).slavePid + driver.cache.putSlavePid(task.SlaveId, pid) + } else { + log.Warningf("Attempting to launch task %s with the wrong slaveId offer %s\n", task.TaskId.GetValue(), task.SlaveId.GetValue()) + } + } else { + log.Warningf("Attempting to launch task %s with unknown offer %s\n", task.TaskId.GetValue(), offerId.GetValue()) + } + } + + driver.cache.removeOffer(offerId) // if offer + } + + // launch tasks + message := &mesos.LaunchTasksMessage{ + FrameworkId: driver.FrameworkInfo.Id, + OfferIds: offerIds, + Tasks: okTasks, + Filters: filters, + } + + if err := driver.send(driver.MasterPid, message); err != nil { + for _, task := range tasks { + driver.pushLostTask(task, "Unable to launch tasks: "+err.Error()) + } + log.Errorf("Failed to send LaunchTask message: %v\n", err) + return driver.Status(), err + } + + return driver.Status(), nil +} + +func (driver *MesosSchedulerDriver) pushLostTask(taskInfo *mesos.TaskInfo, why string) { + msg := &mesos.StatusUpdateMessage{ + Update: &mesos.StatusUpdate{ + FrameworkId: driver.FrameworkInfo.Id, + Status: &mesos.TaskStatus{ + TaskId: taskInfo.TaskId, + State: mesos.TaskState_TASK_LOST.Enum(), + Message: proto.String(why), + }, + SlaveId: taskInfo.SlaveId, + ExecutorId: taskInfo.Executor.ExecutorId, + Timestamp: proto.Float64(float64(time.Now().Unix())), + Uuid: []byte(uuid.NewUUID()), + }, + } + + // put it on internal chanel + // will cause handler to push to attached Scheduler + driver.statusUpdated(driver.self, msg) +} + +func (driver *MesosSchedulerDriver) KillTask(taskId *mesos.TaskID) (mesos.Status, error) { + if stat := driver.Status(); stat != mesos.Status_DRIVER_RUNNING { + return stat, fmt.Errorf("Unable to KillTask, expecting driver status %s, but got %s", mesos.Status_DRIVER_RUNNING, stat) + } + + if !driver.connected { + log.Infoln("Ignoring kill task message, disconnected from master.") + return driver.Status(), fmt.Errorf("Not connected to master") + } + + message := &mesos.KillTaskMessage{ + FrameworkId: driver.FrameworkInfo.Id, + TaskId: taskId, + } + + if err := driver.send(driver.MasterPid, message); err != nil { + log.Errorf("Failed to send KillTask message: %v\n", err) + return driver.Status(), err + } + + return driver.Status(), nil +} + +func (driver *MesosSchedulerDriver) RequestResources(requests []*mesos.Request) (mesos.Status, error) { + if stat := driver.Status(); stat != mesos.Status_DRIVER_RUNNING { + return stat, fmt.Errorf("Unable to RequestResources, expecting driver status %s, but got %s", mesos.Status_DRIVER_RUNNING, stat) + } + + if !driver.connected { + log.Infoln("Ignoring request resource message, disconnected from master.") + return driver.status, fmt.Errorf("Not connected to master") + } + + message := &mesos.ResourceRequestMessage{ + FrameworkId: driver.FrameworkInfo.Id, + Requests: requests, + } + + if err := driver.send(driver.MasterPid, message); err != nil { + log.Errorf("Failed to send ResourceRequest message: %v\n", err) + return driver.status, err + } + + return driver.status, nil +} + +func (driver *MesosSchedulerDriver) DeclineOffer(offerId *mesos.OfferID, filters *mesos.Filters) (mesos.Status, error) { + // launching an empty task list will decline the offer + return driver.LaunchTasks([]*mesos.OfferID{offerId}, []*mesos.TaskInfo{}, filters) +} + +func (driver *MesosSchedulerDriver) ReviveOffers() (mesos.Status, error) { + if stat := driver.Status(); stat != mesos.Status_DRIVER_RUNNING { + return stat, fmt.Errorf("Unable to ReviveOffers, expecting driver status %s, but got %s", mesos.Status_DRIVER_RUNNING, stat) + } + if !driver.connected { + log.Infoln("Ignoring revive offers message, disconnected from master.") + return driver.Status(), fmt.Errorf("Not connected to master.") + } + + message := &mesos.ReviveOffersMessage{ + FrameworkId: driver.FrameworkInfo.Id, + } + if err := driver.send(driver.MasterPid, message); err != nil { + log.Errorf("Failed to send ReviveOffers message: %v\n", err) + return driver.Status(), err + } + + return driver.Status(), nil +} + +func (driver *MesosSchedulerDriver) SendFrameworkMessage(executorId *mesos.ExecutorID, slaveId *mesos.SlaveID, data string) (mesos.Status, error) { + if stat := driver.Status(); stat != mesos.Status_DRIVER_RUNNING { + return stat, fmt.Errorf("Unable to SendFrameworkMessage, expecting driver status %s, but got %s", mesos.Status_DRIVER_RUNNING, stat) + } + if !driver.connected { + log.Infoln("Ignoring send framework message, disconnected from master.") + return driver.Status(), fmt.Errorf("Not connected to master") + } + + message := &mesos.FrameworkToExecutorMessage{ + SlaveId: slaveId, + FrameworkId: driver.FrameworkInfo.Id, + ExecutorId: executorId, + Data: []byte(data), + } + // Use list of cached slaveIds from previous offers. + // Send frameworkMessage directly to cached slave, otherwise to master. + if driver.cache.containsSlavePid(slaveId) { + slavePid := driver.cache.getSlavePid(slaveId) + if slavePid.Equal(driver.self) { + return driver.Status(), nil + } + if err := driver.send(slavePid, message); err != nil { + log.Errorf("Failed to send framework to executor message: %v\n", err) + return driver.Status(), err + } + } else { + // slavePid not cached, send to master. + if err := driver.send(driver.MasterPid, message); err != nil { + log.Errorf("Failed to send framework to executor message: %v\n", err) + return driver.Status(), err + } + } + + return driver.Status(), nil +} + +func (driver *MesosSchedulerDriver) ReconcileTasks(statuses []*mesos.TaskStatus) (mesos.Status, error) { + if stat := driver.Status(); stat != mesos.Status_DRIVER_RUNNING { + return stat, fmt.Errorf("Unable to ReconcileTasks, expecting driver status %s, but got %s", mesos.Status_DRIVER_RUNNING, stat) + } + if !driver.connected { + log.Infoln("Ignoring send Reconcile Tasks message, disconnected from master.") + return driver.Status(), fmt.Errorf("Not connected to master.") + } + + message := &mesos.ReconcileTasksMessage{ + FrameworkId: driver.FrameworkInfo.Id, + Statuses: statuses, + } + if err := driver.send(driver.MasterPid, message); err != nil { + log.Errorf("Failed to send reconcile tasks message: %v\n", err) + return driver.Status(), err + } + + return driver.Status(), nil +} + +func (driver *MesosSchedulerDriver) error(err string, abortDriver bool) { + if abortDriver { + if driver.Status() == mesos.Status_DRIVER_ABORTED { + log.V(3).Infoln("Ignoring error message, the driver is aborted!") + return + } + + log.Infoln("Aborting driver, got error '", err, "'") + + driver.Abort() + } + + log.V(3).Infof("Sending error '%v'", err) + driver.Scheduler.Error(driver, err) +} diff --git a/Godeps/_workspace/src/github.com/mesos/mesos-go/scheduler/scheduler_intgr_test.go b/Godeps/_workspace/src/github.com/mesos/mesos-go/scheduler/scheduler_intgr_test.go new file mode 100644 index 00000000000..fc4137c2b9e --- /dev/null +++ b/Godeps/_workspace/src/github.com/mesos/mesos-go/scheduler/scheduler_intgr_test.go @@ -0,0 +1,442 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package scheduler + +import ( + "io/ioutil" + "net/http" + "reflect" + "sync" + "testing" + "time" + + "github.com/gogo/protobuf/proto" + log "github.com/golang/glog" + mesos "github.com/mesos/mesos-go/mesosproto" + util "github.com/mesos/mesos-go/mesosutil" + "github.com/mesos/mesos-go/testutil" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/suite" +) + +// testScuduler is used for testing Schduler callbacks. +type testScheduler struct { + ch chan bool + wg *sync.WaitGroup + s *SchedulerIntegrationTestSuite +} + +// convenience +func (sched *testScheduler) T() *testing.T { + return sched.s.T() +} + +func (sched *testScheduler) Registered(dr SchedulerDriver, fw *mesos.FrameworkID, mi *mesos.MasterInfo) { + log.Infoln("Sched.Registered() called.") + sched.s.Equal(fw.GetValue(), sched.s.registeredFrameworkId.GetValue(), "driver did not register the expected framework ID") + sched.s.Equal(mi.GetIp(), uint32(123456)) + sched.ch <- true +} + +func (sched *testScheduler) Reregistered(dr SchedulerDriver, mi *mesos.MasterInfo) { + log.Infoln("Sched.Reregistered() called") + sched.s.Equal(mi.GetIp(), uint32(123456)) + sched.ch <- true +} + +func (sched *testScheduler) Disconnected(dr SchedulerDriver) { + log.Infoln("Shed.Disconnected() called") +} + +func (sched *testScheduler) ResourceOffers(dr SchedulerDriver, offers []*mesos.Offer) { + log.Infoln("Sched.ResourceOffers called.") + sched.s.NotNil(offers) + sched.s.Equal(len(offers), 1) + sched.ch <- true +} + +func (sched *testScheduler) OfferRescinded(dr SchedulerDriver, oid *mesos.OfferID) { + log.Infoln("Sched.OfferRescinded() called.") + sched.s.NotNil(oid) + sched.s.Equal("test-offer-001", oid.GetValue()) + sched.ch <- true +} + +func (sched *testScheduler) StatusUpdate(dr SchedulerDriver, stat *mesos.TaskStatus) { + log.Infoln("Sched.StatusUpdate() called.") + sched.s.NotNil(stat) + sched.s.Equal("test-task-001", stat.GetTaskId().GetValue()) + sched.wg.Done() + log.Infof("Status update done with waitGroup %v \n", sched.wg) +} + +func (sched *testScheduler) SlaveLost(dr SchedulerDriver, slaveId *mesos.SlaveID) { + log.Infoln("Sched.SlaveLost() called.") + sched.s.NotNil(slaveId) + sched.s.Equal(slaveId.GetValue(), "test-slave-001") + sched.ch <- true +} + +func (sched *testScheduler) FrameworkMessage(dr SchedulerDriver, execId *mesos.ExecutorID, slaveId *mesos.SlaveID, data string) { + log.Infoln("Sched.FrameworkMessage() called.") + sched.s.NotNil(slaveId) + sched.s.Equal(slaveId.GetValue(), "test-slave-001") + sched.s.NotNil(execId) + sched.s.NotNil(data) + sched.s.Equal("test-data-999", string(data)) + sched.ch <- true +} + +func (sched *testScheduler) ExecutorLost(SchedulerDriver, *mesos.ExecutorID, *mesos.SlaveID, int) { + log.Infoln("Sched.ExecutorLost called") +} + +func (sched *testScheduler) Error(dr SchedulerDriver, err string) { + log.Infoln("Sched.Error() called.") + sched.s.Equal("test-error-999", err) + sched.ch <- true +} + +func (sched *testScheduler) waitForCallback(timeout time.Duration) bool { + if timeout == 0 { + timeout = 2 * time.Second + } + select { + case <-sched.ch: + //callback complete + return true + case <-time.After(timeout): + sched.T().Fatalf("timed out after waiting %v for callback", timeout) + } + return false +} + +func newTestScheduler(s *SchedulerIntegrationTestSuite) *testScheduler { + return &testScheduler{ch: make(chan bool), s: s} +} + +type mockServerConfigurator func(frameworkId *mesos.FrameworkID, suite *SchedulerIntegrationTestSuite) + +type SchedulerIntegrationTestSuiteCore struct { + SchedulerTestSuiteCore + server *testutil.MockMesosHttpServer + driver *MesosSchedulerDriver + sched *testScheduler + config mockServerConfigurator + validator http.HandlerFunc + registeredFrameworkId *mesos.FrameworkID +} + +type SchedulerIntegrationTestSuite struct { + suite.Suite + SchedulerIntegrationTestSuiteCore +} + +// sets up a mock Mesos HTTP master listener, scheduler, and scheduler driver for testing. +// attempts to wait for a registered or re-registered callback on the suite.sched. +func (suite *SchedulerIntegrationTestSuite) configure(frameworkId *mesos.FrameworkID) bool { + t := suite.T() + // start mock master server to handle connection + suite.server = testutil.NewMockMasterHttpServer(t, func(rsp http.ResponseWriter, req *http.Request) { + log.Infoln("MockMaster - rcvd ", req.RequestURI) + if suite.validator != nil { + suite.validator(rsp, req) + } else { + ioutil.ReadAll(req.Body) + defer req.Body.Close() + rsp.WriteHeader(http.StatusAccepted) + } + }) + + t.Logf("test HTTP server listening on %v", suite.server.Addr) + suite.sched = newTestScheduler(suite) + suite.sched.ch = make(chan bool, 10) // big enough that it doesn't block callback processing + + suite.driver = newTestSchedulerDriver(suite.T(), suite.sched, suite.framework, suite.server.Addr, nil) + + suite.config(frameworkId, suite) + + stat, err := suite.driver.Start() + suite.NoError(err) + suite.Equal(mesos.Status_DRIVER_RUNNING, stat) + + ok := waitForConnected(t, suite.driver, 2*time.Second) + if ok { + ok = suite.sched.waitForCallback(0) // registered or re-registered callback + } + return ok +} + +func (suite *SchedulerIntegrationTestSuite) configureServerWithRegisteredFramework() bool { + // suite.framework is used to initialize the FrameworkInfo of + // the driver, so if we clear the Id then we'll expect a registration message + id := suite.framework.Id + suite.framework.Id = nil + suite.registeredFrameworkId = id + return suite.configure(id) +} + +var defaultMockServerConfigurator = mockServerConfigurator(func(frameworkId *mesos.FrameworkID, suite *SchedulerIntegrationTestSuite) { + t := suite.T() + masterInfo := util.NewMasterInfo("master", 123456, 1234) + suite.server.On("/master/mesos.internal.RegisterFrameworkMessage").Do(func(rsp http.ResponseWriter, req *http.Request) { + if suite.validator != nil { + t.Logf("validating registration request") + suite.validator(rsp, req) + } else { + ioutil.ReadAll(req.Body) + defer req.Body.Close() + rsp.WriteHeader(http.StatusAccepted) + } + // this is what the mocked scheduler is expecting to receive + suite.driver.frameworkRegistered(suite.driver.MasterPid, &mesos.FrameworkRegisteredMessage{ + FrameworkId: frameworkId, + MasterInfo: masterInfo, + }) + }) + suite.server.On("/master/mesos.internal.ReregisterFrameworkMessage").Do(func(rsp http.ResponseWriter, req *http.Request) { + if suite.validator != nil { + suite.validator(rsp, req) + } else { + ioutil.ReadAll(req.Body) + defer req.Body.Close() + rsp.WriteHeader(http.StatusAccepted) + } + // this is what the mocked scheduler is expecting to receive + suite.driver.frameworkReregistered(suite.driver.MasterPid, &mesos.FrameworkReregisteredMessage{ + FrameworkId: frameworkId, + MasterInfo: masterInfo, + }) + }) +}) + +func (s *SchedulerIntegrationTestSuite) newMockClient() *testutil.MockMesosClient { + return testutil.NewMockMesosClient(s.T(), s.server.PID) +} + +func (s *SchedulerIntegrationTestSuite) SetupTest() { + s.SchedulerTestSuiteCore.SetupTest() + s.config = defaultMockServerConfigurator +} + +func (s *SchedulerIntegrationTestSuite) TearDownTest() { + if s.server != nil { + s.server.Close() + } + if s.driver != nil && s.driver.Status() == mesos.Status_DRIVER_RUNNING { + s.driver.Abort() + } +} + +// ---------------------------------- Tests ---------------------------------- // + +func TestSchedulerIntegrationSuite(t *testing.T) { + suite.Run(t, new(SchedulerIntegrationTestSuite)) +} + +func (suite *SchedulerIntegrationTestSuite) TestSchedulerDriverRegisterFrameworkMessage() { + t := suite.T() + + id := suite.framework.Id + suite.framework.Id = nil + validated := make(chan struct{}) + var closeOnce sync.Once + suite.validator = http.HandlerFunc(func(rsp http.ResponseWriter, req *http.Request) { + t.Logf("RCVD request %s", req.URL) + + data, err := ioutil.ReadAll(req.Body) + if err != nil { + t.Fatalf("Missing message data from request") + } + defer req.Body.Close() + + if "/master/mesos.internal.RegisterFrameworkMessage" != req.RequestURI { + rsp.WriteHeader(http.StatusAccepted) + return + } + + defer closeOnce.Do(func() { close(validated) }) + + message := new(mesos.RegisterFrameworkMessage) + err = proto.Unmarshal(data, message) + if err != nil { + t.Fatal("Problem unmarshaling expected RegisterFrameworkMessage") + } + + suite.NotNil(message) + info := message.GetFramework() + suite.NotNil(info) + suite.Equal(suite.framework.GetName(), info.GetName()) + suite.True(reflect.DeepEqual(suite.framework.GetId(), info.GetId())) + rsp.WriteHeader(http.StatusOK) + }) + ok := suite.configure(id) + suite.True(ok, "failed to establish running test server and driver") + select { + case <-time.After(1 * time.Second): + t.Fatalf("failed to complete validation of framework registration message") + case <-validated: + // noop + } +} + +func (suite *SchedulerIntegrationTestSuite) TestSchedulerDriverFrameworkRegisteredEvent() { + ok := suite.configureServerWithRegisteredFramework() + suite.True(ok, "failed to establish running test server and driver") +} + +func (suite *SchedulerIntegrationTestSuite) TestSchedulerDriverFrameworkReregisteredEvent() { + ok := suite.configure(suite.framework.Id) + suite.True(ok, "failed to establish running test server and driver") +} + +func (suite *SchedulerIntegrationTestSuite) TestSchedulerDriverResourceOffersEvent() { + ok := suite.configureServerWithRegisteredFramework() + suite.True(ok, "failed to establish running test server and driver") + + // Send a event to this SchedulerDriver (via http) to test handlers. + offer := util.NewOffer( + util.NewOfferID("test-offer-001"), + suite.registeredFrameworkId, + util.NewSlaveID("test-slave-001"), + "test-localhost", + ) + pbMsg := &mesos.ResourceOffersMessage{ + Offers: []*mesos.Offer{offer}, + Pids: []string{"test-offer-001@test-slave-001:5051"}, + } + + c := suite.newMockClient() + c.SendMessage(suite.driver.self, pbMsg) + suite.sched.waitForCallback(0) +} + +func (suite *SchedulerIntegrationTestSuite) TestSchedulerDriverRescindOfferEvent() { + ok := suite.configureServerWithRegisteredFramework() + suite.True(ok, "failed to establish running test server and driver") + + // Send a event to this SchedulerDriver (via http) to test handlers. + pbMsg := &mesos.RescindResourceOfferMessage{ + OfferId: util.NewOfferID("test-offer-001"), + } + + c := suite.newMockClient() + c.SendMessage(suite.driver.self, pbMsg) + suite.sched.waitForCallback(0) +} + +func (suite *SchedulerIntegrationTestSuite) TestSchedulerDriverStatusUpdatedEvent() { + t := suite.T() + var wg sync.WaitGroup + wg.Add(2) + suite.config = mockServerConfigurator(func(frameworkId *mesos.FrameworkID, suite *SchedulerIntegrationTestSuite) { + defaultMockServerConfigurator(frameworkId, suite) + suite.server.On("/master/mesos.internal.StatusUpdateAcknowledgementMessage").Do(func(rsp http.ResponseWriter, req *http.Request) { + log.Infoln("Master cvd ACK") + data, _ := ioutil.ReadAll(req.Body) + defer req.Body.Close() + assert.NotNil(t, data) + wg.Done() + log.Infof("MockMaster - Done with wait group %v \n", wg) + }) + suite.sched.wg = &wg + }) + + ok := suite.configureServerWithRegisteredFramework() + suite.True(ok, "failed to establish running test server and driver") + + // Send a event to this SchedulerDriver (via http) to test handlers. + pbMsg := &mesos.StatusUpdateMessage{ + Update: util.NewStatusUpdate( + suite.registeredFrameworkId, + util.NewTaskStatus(util.NewTaskID("test-task-001"), mesos.TaskState_TASK_STARTING), + float64(time.Now().Unix()), + []byte("test-abcd-ef-3455-454-001"), + ), + Pid: proto.String(suite.driver.self.String()), + } + pbMsg.Update.SlaveId = &mesos.SlaveID{Value: proto.String("test-slave-001")} + + c := suite.newMockClient() + c.SendMessage(suite.driver.self, pbMsg) + wg.Wait() +} + +func (suite *SchedulerIntegrationTestSuite) TestSchedulerDriverLostSlaveEvent() { + ok := suite.configureServerWithRegisteredFramework() + suite.True(ok, "failed to establish running test server and driver") + + // Send a event to this SchedulerDriver (via http) to test handlers. offer := util.NewOffer( + pbMsg := &mesos.LostSlaveMessage{ + SlaveId: util.NewSlaveID("test-slave-001"), + } + + c := suite.newMockClient() + c.SendMessage(suite.driver.self, pbMsg) + suite.sched.waitForCallback(0) +} + +func (suite *SchedulerIntegrationTestSuite) TestSchedulerDriverFrameworkMessageEvent() { + ok := suite.configureServerWithRegisteredFramework() + suite.True(ok, "failed to establish running test server and driver") + + // Send a event to this SchedulerDriver (via http) to test handlers. offer := util.NewOffer( + pbMsg := &mesos.ExecutorToFrameworkMessage{ + SlaveId: util.NewSlaveID("test-slave-001"), + FrameworkId: suite.registeredFrameworkId, + ExecutorId: util.NewExecutorID("test-executor-001"), + Data: []byte("test-data-999"), + } + + c := suite.newMockClient() + c.SendMessage(suite.driver.self, pbMsg) + suite.sched.waitForCallback(0) +} + +func waitForConnected(t *testing.T, driver *MesosSchedulerDriver, timeout time.Duration) bool { + connected := make(chan struct{}) + go func() { + defer close(connected) + for !driver.Connected() { + time.Sleep(200 * time.Millisecond) + } + }() + select { + case <-time.After(timeout): + t.Fatalf("driver failed to establish connection within %v", timeout) + return false + case <-connected: + return true + } +} + +func (suite *SchedulerIntegrationTestSuite) TestSchedulerDriverFrameworkErrorEvent() { + ok := suite.configureServerWithRegisteredFramework() + suite.True(ok, "failed to establish running test server and driver") + + // Send an error event to this SchedulerDriver (via http) to test handlers. + pbMsg := &mesos.FrameworkErrorMessage{ + Message: proto.String("test-error-999"), + } + + c := suite.newMockClient() + c.SendMessage(suite.driver.self, pbMsg) + suite.sched.waitForCallback(0) + suite.Equal(mesos.Status_DRIVER_ABORTED, suite.driver.Status()) +} diff --git a/Godeps/_workspace/src/github.com/mesos/mesos-go/scheduler/scheduler_unit_test.go b/Godeps/_workspace/src/github.com/mesos/mesos-go/scheduler/scheduler_unit_test.go new file mode 100644 index 00000000000..423459f6a8a --- /dev/null +++ b/Godeps/_workspace/src/github.com/mesos/mesos-go/scheduler/scheduler_unit_test.go @@ -0,0 +1,653 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package scheduler + +import ( + "fmt" + "os" + "os/user" + "sync" + "testing" + "time" + + "github.com/gogo/protobuf/proto" + log "github.com/golang/glog" + "github.com/mesos/mesos-go/detector" + "github.com/mesos/mesos-go/detector/zoo" + mesos "github.com/mesos/mesos-go/mesosproto" + util "github.com/mesos/mesos-go/mesosutil" + "github.com/mesos/mesos-go/messenger" + "github.com/mesos/mesos-go/upid" + "github.com/samuel/go-zookeeper/zk" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/suite" +) + +var ( + registerMockDetectorOnce sync.Once +) + +func ensureMockDetectorRegistered() { + registerMockDetectorOnce.Do(func() { + var s *SchedulerTestSuite + err := s.registerMockDetector("testing://") + if err != nil { + log.Error(err) + } + }) +} + +type MockDetector struct { + mock.Mock + address string +} + +func (m *MockDetector) Detect(listener detector.MasterChanged) error { + if listener != nil { + if pid, err := upid.Parse("master(2)@" + m.address); err != nil { + return err + } else { + go listener.OnMasterChanged(detector.CreateMasterInfo(pid)) + } + } + return nil +} + +func (m *MockDetector) Done() <-chan struct{} { + return nil +} + +func (m *MockDetector) Cancel() {} + +type SchedulerTestSuiteCore struct { + master string + masterUpid string + masterId string + frameworkID string + framework *mesos.FrameworkInfo +} + +type SchedulerTestSuite struct { + suite.Suite + SchedulerTestSuiteCore +} + +func (s *SchedulerTestSuite) registerMockDetector(prefix string) error { + address := "" + if s != nil { + address = s.master + } else { + address = "127.0.0.1:8080" + } + return detector.Register(prefix, detector.PluginFactory(func(spec string) (detector.Master, error) { + return &MockDetector{address: address}, nil + })) +} + +func (s *SchedulerTestSuiteCore) SetupTest() { + s.master = "127.0.0.1:8080" + s.masterUpid = "master(2)@" + s.master + s.masterId = "some-master-id-uuid" + s.frameworkID = "some-framework-id-uuid" + s.framework = util.NewFrameworkInfo( + "test-user", + "test-name", + util.NewFrameworkID(s.frameworkID), + ) +} + +func TestSchedulerSuite(t *testing.T) { + t.Logf("running scheduler test suite..") + suite.Run(t, new(SchedulerTestSuite)) +} + +func newTestSchedulerDriver(t *testing.T, sched Scheduler, framework *mesos.FrameworkInfo, master string, cred *mesos.Credential) *MesosSchedulerDriver { + dconfig := DriverConfig{ + Scheduler: sched, + Framework: framework, + Master: master, + Credential: cred, + } + driver, err := NewMesosSchedulerDriver(dconfig) + if err != nil { + t.Fatal(err) + } + return driver +} + +func TestSchedulerDriverNew(t *testing.T) { + masterAddr := "localhost:5050" + driver := newTestSchedulerDriver(t, NewMockScheduler(), &mesos.FrameworkInfo{}, masterAddr, nil) + user, _ := user.Current() + assert.Equal(t, user.Username, driver.FrameworkInfo.GetUser()) + host, _ := os.Hostname() + assert.Equal(t, host, driver.FrameworkInfo.GetHostname()) +} + +func TestSchedulerDriverNew_WithPid(t *testing.T) { + masterAddr := "master@127.0.0.1:5050" + mUpid, err := upid.Parse(masterAddr) + assert.NoError(t, err) + driver := newTestSchedulerDriver(t, NewMockScheduler(), &mesos.FrameworkInfo{}, masterAddr, nil) + driver.handleMasterChanged(driver.self, &mesos.InternalMasterChangeDetected{Master: &mesos.MasterInfo{Pid: proto.String(mUpid.String())}}) + assert.True(t, driver.MasterPid.Equal(mUpid), fmt.Sprintf("expected upid %+v instead of %+v", mUpid, driver.MasterPid)) + assert.NoError(t, err) +} + +func (suite *SchedulerTestSuite) TestSchedulerDriverNew_WithZkUrl() { + masterAddr := "zk://127.0.0.1:5050/mesos" + driver := newTestSchedulerDriver(suite.T(), NewMockScheduler(), suite.framework, masterAddr, nil) + md, err := zoo.NewMockMasterDetector(masterAddr) + suite.NoError(err) + suite.NotNil(md) + driver.masterDetector = md // override internal master detector + + md.ScheduleConnEvent(zk.StateConnected) + + done := make(chan struct{}) + driver.masterDetector.Detect(detector.OnMasterChanged(func(m *mesos.MasterInfo) { + suite.NotNil(m) + suite.NotEqual(m.GetPid, suite.masterUpid) + close(done) + })) + + //TODO(vlad) revisit, detector not responding. + + //NOTE(jdef) this works for me, I wonder if the timeouts are too short, or if + //GOMAXPROCS settings are affecting the result? + + // md.ScheduleSessEvent(zk.EventNodeChildrenChanged) + // select { + // case <-done: + // case <-time.After(time.Millisecond * 1000): + // suite.T().Errorf("Timed out waiting for children event.") + // } +} + +func (suite *SchedulerTestSuite) TestSchedulerDriverNew_WithFrameworkInfo_Override() { + suite.framework.Hostname = proto.String("local-host") + driver := newTestSchedulerDriver(suite.T(), NewMockScheduler(), suite.framework, "127.0.0.1:5050", nil) + suite.Equal(driver.FrameworkInfo.GetUser(), "test-user") + suite.Equal("local-host", driver.FrameworkInfo.GetHostname()) +} + +func (suite *SchedulerTestSuite) TestSchedulerDriverStartOK() { + sched := NewMockScheduler() + + messenger := messenger.NewMockedMessenger() + messenger.On("Start").Return(nil) + messenger.On("UPID").Return(&upid.UPID{}) + messenger.On("Send").Return(nil) + messenger.On("Stop").Return(nil) + + driver := newTestSchedulerDriver(suite.T(), sched, suite.framework, suite.master, nil) + driver.messenger = messenger + suite.True(driver.Stopped()) + + stat, err := driver.Start() + suite.NoError(err) + suite.Equal(mesos.Status_DRIVER_RUNNING, stat) + suite.False(driver.Stopped()) +} + +func (suite *SchedulerTestSuite) TestSchedulerDriverStartWithMessengerFailure() { + sched := NewMockScheduler() + sched.On("Error").Return() + + messenger := messenger.NewMockedMessenger() + messenger.On("Start").Return(fmt.Errorf("Failed to start messenger")) + messenger.On("Stop").Return() + + driver := newTestSchedulerDriver(suite.T(), sched, suite.framework, suite.master, nil) + driver.messenger = messenger + suite.True(driver.Stopped()) + + stat, err := driver.Start() + suite.Error(err) + suite.True(driver.Stopped()) + suite.True(!driver.Connected()) + suite.Equal(mesos.Status_DRIVER_NOT_STARTED, driver.Status()) + suite.Equal(mesos.Status_DRIVER_NOT_STARTED, stat) + +} + +func (suite *SchedulerTestSuite) TestSchedulerDriverStartWithRegistrationFailure() { + sched := NewMockScheduler() + sched.On("Error").Return() + + // Set expections and return values. + messenger := messenger.NewMockedMessenger() + messenger.On("Start").Return(nil) + messenger.On("UPID").Return(&upid.UPID{}) + messenger.On("Stop").Return(nil) + + driver := newTestSchedulerDriver(suite.T(), sched, suite.framework, suite.master, nil) + + driver.messenger = messenger + suite.True(driver.Stopped()) + + // reliable registration loops until the driver is stopped, connected, etc.. + stat, err := driver.Start() + suite.NoError(err) + suite.Equal(mesos.Status_DRIVER_RUNNING, stat) + + time.Sleep(5 * time.Second) // wait a bit, registration should be looping... + + suite.False(driver.Stopped()) + suite.Equal(mesos.Status_DRIVER_RUNNING, driver.Status()) + + // stop the driver, should not panic! + driver.Stop(false) // not failing over + suite.True(driver.Stopped()) + suite.Equal(mesos.Status_DRIVER_STOPPED, driver.Status()) + + messenger.AssertExpectations(suite.T()) +} + +func (suite *SchedulerTestSuite) TestSchedulerDriverJoinUnstarted() { + driver := newTestSchedulerDriver(suite.T(), NewMockScheduler(), suite.framework, suite.master, nil) + suite.True(driver.Stopped()) + + stat, err := driver.Join() + suite.Error(err) + suite.Equal(mesos.Status_DRIVER_NOT_STARTED, stat) +} + +func (suite *SchedulerTestSuite) TestSchedulerDriverJoinOK() { + // Set expections and return values. + messenger := messenger.NewMockedMessenger() + messenger.On("Start").Return(nil) + messenger.On("UPID").Return(&upid.UPID{}) + messenger.On("Send").Return(nil) + messenger.On("Stop").Return(nil) + + driver := newTestSchedulerDriver(suite.T(), NewMockScheduler(), suite.framework, suite.master, nil) + driver.messenger = messenger + suite.True(driver.Stopped()) + + stat, err := driver.Start() + suite.NoError(err) + suite.Equal(mesos.Status_DRIVER_RUNNING, stat) + suite.False(driver.Stopped()) + + testCh := make(chan mesos.Status) + go func() { + stat, _ := driver.Join() + testCh <- stat + }() + + close(driver.stopCh) // manually stopping + stat = <-testCh // when Stop() is called, stat will be DRIVER_STOPPED. +} + +func (suite *SchedulerTestSuite) TestSchedulerDriverRun() { + // Set expections and return values. + messenger := messenger.NewMockedMessenger() + messenger.On("Start").Return(nil) + messenger.On("UPID").Return(&upid.UPID{}) + messenger.On("Send").Return(nil) + messenger.On("Stop").Return(nil) + + driver := newTestSchedulerDriver(suite.T(), NewMockScheduler(), suite.framework, suite.master, nil) + driver.messenger = messenger + suite.True(driver.Stopped()) + + go func() { + stat, err := driver.Run() + suite.NoError(err) + suite.Equal(mesos.Status_DRIVER_STOPPED, stat) + }() + time.Sleep(time.Millisecond * 1) + + suite.False(driver.Stopped()) + suite.Equal(mesos.Status_DRIVER_RUNNING, driver.Status()) + + // close it all. + driver.setStatus(mesos.Status_DRIVER_STOPPED) + close(driver.stopCh) + time.Sleep(time.Millisecond * 1) +} + +func (suite *SchedulerTestSuite) TestSchedulerDriverStopUnstarted() { + driver := newTestSchedulerDriver(suite.T(), NewMockScheduler(), suite.framework, suite.master, nil) + suite.True(driver.Stopped()) + + stat, err := driver.Stop(true) + suite.NotNil(err) + suite.True(driver.Stopped()) + suite.Equal(mesos.Status_DRIVER_NOT_STARTED, stat) +} + +func (suite *SchedulerTestSuite) TestSchdulerDriverStopOK() { + // Set expections and return values. + messenger := messenger.NewMockedMessenger() + messenger.On("Start").Return(nil) + messenger.On("UPID").Return(&upid.UPID{}) + messenger.On("Send").Return(nil) + messenger.On("Stop").Return(nil) + messenger.On("Route").Return(nil) + + driver := newTestSchedulerDriver(suite.T(), NewMockScheduler(), suite.framework, suite.master, nil) + driver.messenger = messenger + suite.True(driver.Stopped()) + + go func() { + stat, err := driver.Run() + suite.NoError(err) + suite.Equal(mesos.Status_DRIVER_STOPPED, stat) + }() + time.Sleep(time.Millisecond * 1) + + suite.False(driver.Stopped()) + suite.Equal(mesos.Status_DRIVER_RUNNING, driver.Status()) + + driver.Stop(false) + time.Sleep(time.Millisecond * 1) + + suite.True(driver.Stopped()) + suite.Equal(mesos.Status_DRIVER_STOPPED, driver.Status()) +} + +func (suite *SchedulerTestSuite) TestSchdulerDriverAbort() { + // Set expections and return values. + messenger := messenger.NewMockedMessenger() + messenger.On("Start").Return(nil) + messenger.On("UPID").Return(&upid.UPID{}) + messenger.On("Send").Return(nil) + messenger.On("Stop").Return(nil) + messenger.On("Route").Return(nil) + + driver := newTestSchedulerDriver(suite.T(), NewMockScheduler(), suite.framework, suite.master, nil) + driver.messenger = messenger + suite.True(driver.Stopped()) + + go func() { + stat, err := driver.Run() + suite.NoError(err) + suite.Equal(mesos.Status_DRIVER_ABORTED, stat) + }() + time.Sleep(time.Millisecond * 1) + driver.setConnected(true) // simulated + + suite.False(driver.Stopped()) + suite.Equal(mesos.Status_DRIVER_RUNNING, driver.Status()) + + stat, err := driver.Abort() + time.Sleep(time.Millisecond * 1) + suite.NoError(err) + suite.True(driver.Stopped()) + suite.Equal(mesos.Status_DRIVER_ABORTED, stat) + suite.Equal(mesos.Status_DRIVER_ABORTED, driver.Status()) +} + +func (suite *SchedulerTestSuite) TestSchdulerDriverLunchTasksUnstarted() { + sched := NewMockScheduler() + sched.On("Error").Return() + + // Set expections and return values. + messenger := messenger.NewMockedMessenger() + messenger.On("Route").Return(nil) + + driver := newTestSchedulerDriver(suite.T(), sched, suite.framework, suite.master, nil) + driver.messenger = messenger + suite.True(driver.Stopped()) + + stat, err := driver.LaunchTasks( + []*mesos.OfferID{&mesos.OfferID{}}, + []*mesos.TaskInfo{}, + &mesos.Filters{}, + ) + suite.Error(err) + suite.Equal(mesos.Status_DRIVER_NOT_STARTED, stat) +} + +func (suite *SchedulerTestSuite) TestSchdulerDriverLaunchTasksWithError() { + sched := NewMockScheduler() + sched.On("StatusUpdate").Return(nil) + sched.On("Error").Return() + + msgr := messenger.NewMockedMessenger() + msgr.On("Start").Return(nil) + msgr.On("Send").Return(nil) + msgr.On("UPID").Return(&upid.UPID{}) + msgr.On("Stop").Return(nil) + msgr.On("Route").Return(nil) + + driver := newTestSchedulerDriver(suite.T(), sched, suite.framework, suite.master, nil) + driver.messenger = msgr + suite.True(driver.Stopped()) + + go func() { + driver.Run() + }() + time.Sleep(time.Millisecond * 1) + driver.setConnected(true) // simulated + suite.False(driver.Stopped()) + suite.Equal(mesos.Status_DRIVER_RUNNING, driver.Status()) + + // to trigger error + msgr2 := messenger.NewMockedMessenger() + msgr2.On("Start").Return(nil) + msgr2.On("UPID").Return(&upid.UPID{}) + msgr2.On("Send").Return(fmt.Errorf("Unable to send message")) + msgr2.On("Stop").Return(nil) + msgr.On("Route").Return(nil) + driver.messenger = msgr2 + + // setup an offer + offer := util.NewOffer( + util.NewOfferID("test-offer-001"), + suite.framework.Id, + util.NewSlaveID("test-slave-001"), + "test-slave(1)@localhost:5050", + ) + + pid, err := upid.Parse("test-slave(1)@localhost:5050") + suite.NoError(err) + driver.cache.putOffer(offer, pid) + + // launch task + task := util.NewTaskInfo( + "simple-task", + util.NewTaskID("simpe-task-1"), + util.NewSlaveID("test-slave-001"), + []*mesos.Resource{util.NewScalarResource("mem", 400)}, + ) + task.Command = util.NewCommandInfo("pwd") + task.Executor = util.NewExecutorInfo(util.NewExecutorID("test-exec"), task.Command) + tasks := []*mesos.TaskInfo{task} + + stat, err := driver.LaunchTasks( + []*mesos.OfferID{offer.Id}, + tasks, + &mesos.Filters{}, + ) + suite.Error(err) + suite.Equal(mesos.Status_DRIVER_RUNNING, stat) + +} + +func (suite *SchedulerTestSuite) TestSchdulerDriverLaunchTasks() { + messenger := messenger.NewMockedMessenger() + messenger.On("Start").Return(nil) + messenger.On("UPID").Return(&upid.UPID{}) + messenger.On("Send").Return(nil) + messenger.On("Stop").Return(nil) + messenger.On("Route").Return(nil) + + driver := newTestSchedulerDriver(suite.T(), NewMockScheduler(), suite.framework, suite.master, nil) + driver.messenger = messenger + suite.True(driver.Stopped()) + + go func() { + driver.Run() + }() + time.Sleep(time.Millisecond * 1) + driver.setConnected(true) // simulated + suite.False(driver.Stopped()) + suite.Equal(mesos.Status_DRIVER_RUNNING, driver.Status()) + + task := util.NewTaskInfo( + "simple-task", + util.NewTaskID("simpe-task-1"), + util.NewSlaveID("slave-1"), + []*mesos.Resource{util.NewScalarResource("mem", 400)}, + ) + task.Command = util.NewCommandInfo("pwd") + tasks := []*mesos.TaskInfo{task} + + stat, err := driver.LaunchTasks( + []*mesos.OfferID{&mesos.OfferID{}}, + tasks, + &mesos.Filters{}, + ) + suite.NoError(err) + suite.Equal(mesos.Status_DRIVER_RUNNING, stat) +} + +func (suite *SchedulerTestSuite) TestSchdulerDriverKillTask() { + messenger := messenger.NewMockedMessenger() + messenger.On("Start").Return(nil) + messenger.On("UPID").Return(&upid.UPID{}) + messenger.On("Send").Return(nil) + messenger.On("Stop").Return(nil) + messenger.On("Route").Return(nil) + + driver := newTestSchedulerDriver(suite.T(), NewMockScheduler(), suite.framework, suite.master, nil) + driver.messenger = messenger + suite.True(driver.Stopped()) + + go func() { + driver.Run() + }() + time.Sleep(time.Millisecond * 1) + driver.setConnected(true) // simulated + suite.False(driver.Stopped()) + suite.Equal(mesos.Status_DRIVER_RUNNING, driver.Status()) + + stat, err := driver.KillTask(util.NewTaskID("test-task-1")) + suite.NoError(err) + suite.Equal(mesos.Status_DRIVER_RUNNING, stat) +} + +func (suite *SchedulerTestSuite) TestSchdulerDriverRequestResources() { + messenger := messenger.NewMockedMessenger() + messenger.On("Start").Return(nil) + messenger.On("UPID").Return(&upid.UPID{}) + messenger.On("Send").Return(nil) + messenger.On("Stop").Return(nil) + messenger.On("Route").Return(nil) + + driver := newTestSchedulerDriver(suite.T(), NewMockScheduler(), suite.framework, suite.master, nil) + driver.messenger = messenger + suite.True(driver.Stopped()) + + driver.Start() + driver.setConnected(true) // simulated + suite.Equal(mesos.Status_DRIVER_RUNNING, driver.Status()) + + stat, err := driver.RequestResources( + []*mesos.Request{ + &mesos.Request{ + SlaveId: util.NewSlaveID("test-slave-001"), + Resources: []*mesos.Resource{ + util.NewScalarResource("test-res-001", 33.00), + }, + }, + }, + ) + suite.NoError(err) + suite.Equal(mesos.Status_DRIVER_RUNNING, stat) +} + +func (suite *SchedulerTestSuite) TestSchdulerDriverDeclineOffers() { + // see LaunchTasks test +} + +func (suite *SchedulerTestSuite) TestSchdulerDriverReviveOffers() { + messenger := messenger.NewMockedMessenger() + messenger.On("Start").Return(nil) + messenger.On("UPID").Return(&upid.UPID{}) + messenger.On("Send").Return(nil) + messenger.On("Stop").Return(nil) + messenger.On("Route").Return(nil) + + driver := newTestSchedulerDriver(suite.T(), NewMockScheduler(), suite.framework, suite.master, nil) + driver.messenger = messenger + suite.True(driver.Stopped()) + + driver.Start() + driver.setConnected(true) // simulated + suite.Equal(mesos.Status_DRIVER_RUNNING, driver.Status()) + + stat, err := driver.ReviveOffers() + suite.NoError(err) + suite.Equal(mesos.Status_DRIVER_RUNNING, stat) +} + +func (suite *SchedulerTestSuite) TestSchdulerDriverSendFrameworkMessage() { + messenger := messenger.NewMockedMessenger() + messenger.On("Start").Return(nil) + messenger.On("UPID").Return(&upid.UPID{}) + messenger.On("Send").Return(nil) + messenger.On("Stop").Return(nil) + messenger.On("Route").Return(nil) + + driver := newTestSchedulerDriver(suite.T(), NewMockScheduler(), suite.framework, suite.master, nil) + driver.messenger = messenger + suite.True(driver.Stopped()) + + driver.Start() + driver.setConnected(true) // simulated + suite.Equal(mesos.Status_DRIVER_RUNNING, driver.Status()) + + stat, err := driver.SendFrameworkMessage( + util.NewExecutorID("test-exec-001"), + util.NewSlaveID("test-slave-001"), + "Hello!", + ) + suite.NoError(err) + suite.Equal(mesos.Status_DRIVER_RUNNING, stat) +} + +func (suite *SchedulerTestSuite) TestSchdulerDriverReconcileTasks() { + messenger := messenger.NewMockedMessenger() + messenger.On("Start").Return(nil) + messenger.On("UPID").Return(&upid.UPID{}) + messenger.On("Send").Return(nil) + messenger.On("Stop").Return(nil) + messenger.On("Route").Return(nil) + + driver := newTestSchedulerDriver(suite.T(), NewMockScheduler(), suite.framework, suite.master, nil) + driver.messenger = messenger + suite.True(driver.Stopped()) + + driver.Start() + driver.setConnected(true) // simulated + suite.Equal(mesos.Status_DRIVER_RUNNING, driver.Status()) + + stat, err := driver.ReconcileTasks( + []*mesos.TaskStatus{ + util.NewTaskStatus(util.NewTaskID("test-task-001"), mesos.TaskState_TASK_FINISHED), + }, + ) + suite.NoError(err) + suite.Equal(mesos.Status_DRIVER_RUNNING, stat) +} diff --git a/build/common.sh b/build/common.sh index 70970f3d37f..764006c0bec 100644 --- a/build/common.sh +++ b/build/common.sh @@ -304,10 +304,9 @@ function kube::build::ensure_golang() { } } -# Set up the context directory for the kube-build image and build it. -function kube::build::build_image() { - local -r build_context_dir="${LOCAL_OUTPUT_IMAGE_STAGING}/${KUBE_BUILD_IMAGE}" - local -r source=( +# The set of source targets to include in the kube-build image +function kube::build::source_targets() { + local targets=( api build cmd @@ -323,11 +322,22 @@ function kube::build::build_image() { test third_party ) + if [ -n "${KUBERNETES_CONTRIB:-}" ]; then + for contrib in "${KUBERNETES_CONTRIB}"; do + targets+=($(eval "kube::contrib::${contrib}::source_targets")) + done + fi + echo "${targets[@]}" +} + +# Set up the context directory for the kube-build image and build it. +function kube::build::build_image() { + local -r build_context_dir="${LOCAL_OUTPUT_IMAGE_STAGING}/${KUBE_BUILD_IMAGE}" kube::build::build_image_cross mkdir -p "${build_context_dir}" - tar czf "${build_context_dir}/kube-source.tar.gz" "${source[@]}" + tar czf "${build_context_dir}/kube-source.tar.gz" $(kube::build::source_targets) kube::version::get_version_vars kube::version::save_version_vars "${build_context_dir}/kube-version-defs" @@ -412,8 +422,12 @@ function kube::build::run_build_command() { local -a docker_run_opts=( "--name=${KUBE_BUILD_CONTAINER_NAME}" - "${DOCKER_MOUNT_ARGS[@]}" - ) + "${DOCKER_MOUNT_ARGS[@]}" + ) + + if [ -n "${KUBERNETES_CONTRIB:-}" ]; then + docker_run_opts+=(-e "KUBERNETES_CONTRIB=${KUBERNETES_CONTRIB}") + fi # If we have stdin we can run interactive. This allows things like 'shell.sh' # to work. However, if we run this way and don't have stdin, then it ends up diff --git a/cmd/kubelet/app/server.go b/cmd/kubelet/app/server.go index ad887796287..2fb8bc9dce9 100644 --- a/cmd/kubelet/app/server.go +++ b/cmd/kubelet/app/server.go @@ -17,6 +17,9 @@ limitations under the License. // Package app makes it easy to create a kubelet server for various contexts. package app +// Note: if you change code in this file, you might need to change code in +// contrib/mesos/pkg/executor/service/. + import ( "crypto/tls" "fmt" diff --git a/contrib/mesos/cmd/k8sm-executor/doc.go b/contrib/mesos/cmd/k8sm-executor/doc.go new file mode 100644 index 00000000000..2a2041eb6f6 --- /dev/null +++ b/contrib/mesos/cmd/k8sm-executor/doc.go @@ -0,0 +1,18 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// This package main implements the executable Kubernetes Mesos executor. +package main diff --git a/contrib/mesos/cmd/k8sm-executor/main.go b/contrib/mesos/cmd/k8sm-executor/main.go new file mode 100644 index 00000000000..353f6b448ab --- /dev/null +++ b/contrib/mesos/cmd/k8sm-executor/main.go @@ -0,0 +1,47 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package main + +import ( + "fmt" + "os" + "runtime" + + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/executor/service" + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/hyperkube" + "github.com/GoogleCloudPlatform/kubernetes/pkg/util" + "github.com/GoogleCloudPlatform/kubernetes/pkg/version/verflag" + "github.com/spf13/pflag" +) + +func main() { + runtime.GOMAXPROCS(runtime.NumCPU()) + + s := service.NewKubeletExecutorServer() + s.AddStandaloneFlags(pflag.CommandLine) + + util.InitFlags() + util.InitLogs() + defer util.FlushLogs() + + verflag.PrintAndExitIfRequested() + + if err := s.Run(hyperkube.Nil(), pflag.CommandLine.Args()); err != nil { + fmt.Fprintf(os.Stderr, err.Error()) + os.Exit(1) + } +} diff --git a/contrib/mesos/cmd/k8sm-redirfd/doc.go b/contrib/mesos/cmd/k8sm-redirfd/doc.go new file mode 100644 index 00000000000..fdfc3b67426 --- /dev/null +++ b/contrib/mesos/cmd/k8sm-redirfd/doc.go @@ -0,0 +1,21 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// This package main is used for testing the redirfd package. +// Inspired by http://skarnet.org/software/execline/redirfd.html. +// Usage: +// k8sm-redirfb [-n] [-b] {mode} {fd} {file} {prog...} +package main diff --git a/contrib/mesos/cmd/k8sm-redirfd/redirfd.go b/contrib/mesos/cmd/k8sm-redirfd/redirfd.go new file mode 100644 index 00000000000..5592b3273d5 --- /dev/null +++ b/contrib/mesos/cmd/k8sm-redirfd/redirfd.go @@ -0,0 +1,105 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package main + +import ( + "flag" + "fmt" + "os" + "os/exec" + "syscall" + + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/redirfd" +) + +func main() { + nonblock := flag.Bool("n", false, "open file in non-blocking mode") + changemode := flag.Bool("b", false, "change mode of file after opening it: to non-blocking mode if the -n option was not given, to blocking mode if it was") + flag.Parse() + + args := flag.Args() + if len(args) < 4 { + fmt.Fprintf(os.Stderr, "expected {mode} {fd} {file} instead of: %v\n", args) + os.Exit(1) + } + + var mode redirfd.RedirectMode + switch m := args[0]; m { + case "r": + mode = redirfd.Read + case "w": + mode = redirfd.Write + case "u": + mode = redirfd.Update + case "a": + mode = redirfd.Append + case "c": + mode = redirfd.AppendExisting + case "x": + mode = redirfd.WriteNew + default: + fmt.Fprintf(os.Stderr, "unrecognized mode %q\n", mode) + os.Exit(1) + } + + fd, err := redirfd.ParseFileDescriptor(args[1]) + if err != nil { + fmt.Fprintf(os.Stderr, "failed to parse file descriptor: %v\n", err) + os.Exit(1) + } + file := args[2] + + f, err := mode.Redirect(*nonblock, *changemode, fd, file) + if err != nil { + fmt.Fprintf(os.Stderr, "redirect failed: %q, %v\n", args[1], err) + os.Exit(1) + } + var pargs []string + if len(args) > 4 { + pargs = args[4:] + } + cmd := exec.Command(args[3], pargs...) + cmd.Stdin = os.Stdin + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + switch fd { + case redirfd.Stdin: + cmd.Stdin = f + case redirfd.Stdout: + cmd.Stdout = f + case redirfd.Stderr: + cmd.Stderr = f + default: + cmd.ExtraFiles = []*os.File{f} + } + defer f.Close() + if err = cmd.Run(); err != nil { + exiterr := err.(*exec.ExitError) + state := exiterr.ProcessState + if state != nil { + sys := state.Sys() + if waitStatus, ok := sys.(syscall.WaitStatus); ok { + if waitStatus.Signaled() { + os.Exit(256 + int(waitStatus.Signal())) + } else { + os.Exit(waitStatus.ExitStatus()) + } + } + } + os.Exit(3) + } +} diff --git a/contrib/mesos/cmd/k8sm-scheduler/doc.go b/contrib/mesos/cmd/k8sm-scheduler/doc.go new file mode 100644 index 00000000000..68e44de0b54 --- /dev/null +++ b/contrib/mesos/cmd/k8sm-scheduler/doc.go @@ -0,0 +1,18 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// This package main implements the executable Kubernetes Mesos scheduler. +package main diff --git a/contrib/mesos/cmd/k8sm-scheduler/main.go b/contrib/mesos/cmd/k8sm-scheduler/main.go new file mode 100644 index 00000000000..8ff4a987bb7 --- /dev/null +++ b/contrib/mesos/cmd/k8sm-scheduler/main.go @@ -0,0 +1,46 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package main + +import ( + "fmt" + "os" + "runtime" + + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/hyperkube" + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/service" + "github.com/GoogleCloudPlatform/kubernetes/pkg/util" + "github.com/GoogleCloudPlatform/kubernetes/pkg/version/verflag" + "github.com/spf13/pflag" +) + +func main() { + runtime.GOMAXPROCS(runtime.NumCPU()) + s := service.NewSchedulerServer() + s.AddStandaloneFlags(pflag.CommandLine) + + util.InitFlags() + util.InitLogs() + defer util.FlushLogs() + + verflag.PrintAndExitIfRequested() + + if err := s.Run(hyperkube.Nil(), pflag.CommandLine.Args()); err != nil { + fmt.Fprintf(os.Stderr, err.Error()) + os.Exit(1) + } +} diff --git a/contrib/mesos/pkg/assert/assert.go b/contrib/mesos/pkg/assert/assert.go new file mode 100644 index 00000000000..dd716465392 --- /dev/null +++ b/contrib/mesos/pkg/assert/assert.go @@ -0,0 +1,43 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package assert + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +// EventuallyTrue asserts that the given predicate becomes true within the given timeout. It +// checks the predicate regularly each 100ms. +func EventuallyTrue(t *testing.T, timeout time.Duration, fn func() bool, msgAndArgs ...interface{}) bool { + start := time.Now() + for { + if fn() { + return true + } + if time.Now().Sub(start) > timeout { + if len(msgAndArgs) > 0 { + return assert.Fail(t, msgAndArgs[0].(string), msgAndArgs[1:]...) + } else { + return assert.Fail(t, "predicate fn has not been true after %v", timeout.String()) + } + } + time.Sleep(100 * time.Millisecond) + } +} diff --git a/contrib/mesos/pkg/assert/doc.go b/contrib/mesos/pkg/assert/doc.go new file mode 100644 index 00000000000..3fb556cecc2 --- /dev/null +++ b/contrib/mesos/pkg/assert/doc.go @@ -0,0 +1,19 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package assert is an utility package containing reusable testing functionality +// extending github.com/stretchr/testify/assert +package assert diff --git a/contrib/mesos/pkg/backoff/backoff.go b/contrib/mesos/pkg/backoff/backoff.go new file mode 100644 index 00000000000..f2b12b26027 --- /dev/null +++ b/contrib/mesos/pkg/backoff/backoff.go @@ -0,0 +1,96 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package backoff + +import ( + "math/rand" + "sync" + "time" + + log "github.com/golang/glog" +) + +type clock interface { + Now() time.Time +} + +type realClock struct{} + +func (realClock) Now() time.Time { + return time.Now() +} + +type backoffEntry struct { + backoff time.Duration + lastUpdate time.Time +} + +type Backoff struct { + perItemBackoff map[string]*backoffEntry + lock sync.Mutex + clock clock + defaultDuration time.Duration + maxDuration time.Duration +} + +func New(initial, max time.Duration) *Backoff { + return &Backoff{ + perItemBackoff: map[string]*backoffEntry{}, + clock: realClock{}, + defaultDuration: initial, + maxDuration: max, + } +} + +func (p *Backoff) getEntry(id string) *backoffEntry { + p.lock.Lock() + defer p.lock.Unlock() + entry, ok := p.perItemBackoff[id] + if !ok { + entry = &backoffEntry{backoff: p.defaultDuration} + p.perItemBackoff[id] = entry + } + entry.lastUpdate = p.clock.Now() + return entry +} + +func (p *Backoff) Get(id string) time.Duration { + entry := p.getEntry(id) + duration := entry.backoff + entry.backoff *= 2 + if entry.backoff > p.maxDuration { + entry.backoff = p.maxDuration + } + //TODO(jdef) parameterize use of jitter? + // add jitter, get better backoff distribution + duration = time.Duration(rand.Int63n(int64(duration))) + log.V(3).Infof("Backing off %v for pod %s", duration, id) + return duration +} + +// Garbage collect records that have aged past maxDuration. Backoff users are expected +// to invoke this periodically. +func (p *Backoff) GC() { + p.lock.Lock() + defer p.lock.Unlock() + now := p.clock.Now() + for id, entry := range p.perItemBackoff { + if now.Sub(entry.lastUpdate) > p.maxDuration { + delete(p.perItemBackoff, id) + } + } +} diff --git a/contrib/mesos/pkg/backoff/doc.go b/contrib/mesos/pkg/backoff/doc.go new file mode 100644 index 00000000000..1bd98a2617d --- /dev/null +++ b/contrib/mesos/pkg/backoff/doc.go @@ -0,0 +1,19 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package backoff provides backoff functionality with a simple API. +// Originally copied from Kubernetes: plugin/pkg/scheduler/factory/factory.go +package backoff diff --git a/contrib/mesos/pkg/election/doc.go b/contrib/mesos/pkg/election/doc.go new file mode 100644 index 00000000000..35bbe4e142d --- /dev/null +++ b/contrib/mesos/pkg/election/doc.go @@ -0,0 +1,18 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package election provides interfaces used for master election. +package election diff --git a/contrib/mesos/pkg/election/etcd_master.go b/contrib/mesos/pkg/election/etcd_master.go new file mode 100644 index 00000000000..17f4d71fd80 --- /dev/null +++ b/contrib/mesos/pkg/election/etcd_master.go @@ -0,0 +1,185 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package election + +import ( + "fmt" + "time" + + "github.com/GoogleCloudPlatform/kubernetes/pkg/tools" + "github.com/GoogleCloudPlatform/kubernetes/pkg/util" + "github.com/GoogleCloudPlatform/kubernetes/pkg/watch" + "github.com/coreos/go-etcd/etcd" + "github.com/golang/glog" +) + +// Master is used to announce the current elected master. +type Master string + +// IsAnAPIObject is used solely so we can work with the watch package. +// TODO(k8s): Either fix watch so this isn't necessary, or make this a real API Object. +// TODO(k8s): when it becomes clear how this package will be used, move these declarations to +// to the proper place. +func (Master) IsAnAPIObject() {} + +// NewEtcdMasterElector returns an implementation of election.MasterElector backed by etcd. +func NewEtcdMasterElector(h tools.EtcdGetSet) MasterElector { + return &etcdMasterElector{etcd: h} +} + +type empty struct{} + +// internal implementation struct +type etcdMasterElector struct { + etcd tools.EtcdGetSet + done chan empty + events chan watch.Event +} + +// Elect implements the election.MasterElector interface. +func (e *etcdMasterElector) Elect(path, id string) watch.Interface { + e.done = make(chan empty) + e.events = make(chan watch.Event) + go util.Forever(func() { e.run(path, id) }, time.Second*5) + return e +} + +func (e *etcdMasterElector) run(path, id string) { + masters := make(chan string) + errors := make(chan error) + go e.master(path, id, 30, masters, errors, e.done) // TODO(jdef) extract constant + for { + select { + case m := <-masters: + e.events <- watch.Event{ + Type: watch.Modified, + Object: Master(m), + } + case e := <-errors: + glog.Errorf("error in election: %v", e) + } + } +} + +// ResultChan implements the watch.Interface interface. +func (e *etcdMasterElector) ResultChan() <-chan watch.Event { + return e.events +} + +// extendMaster attempts to extend ownership of a master lock for TTL seconds. +// returns "", nil if extension failed +// returns id, nil if extension succeeded +// returns "", err if an error occurred +func (e *etcdMasterElector) extendMaster(path, id string, ttl uint64, res *etcd.Response) (string, error) { + // If it matches the passed in id, extend the lease by writing a new entry. + // Uses compare and swap, so that if we TTL out in the meantime, the write will fail. + // We don't handle the TTL delete w/o a write case here, it's handled in the next loop + // iteration. + _, err := e.etcd.CompareAndSwap(path, id, ttl, "", res.Node.ModifiedIndex) + if err != nil && !tools.IsEtcdTestFailed(err) { + return "", err + } + if err != nil && tools.IsEtcdTestFailed(err) { + return "", nil + } + return id, nil +} + +// becomeMaster attempts to become the master for this lock. +// returns "", nil if the attempt failed +// returns id, nil if the attempt succeeded +// returns "", err if an error occurred +func (e *etcdMasterElector) becomeMaster(path, id string, ttl uint64) (string, error) { + _, err := e.etcd.Create(path, id, ttl) + if err != nil && !tools.IsEtcdNodeExist(err) { + // unexpected error + return "", err + } + if err != nil && tools.IsEtcdNodeExist(err) { + return "", nil + } + return id, nil +} + +// handleMaster performs one loop of master locking. +// on success it returns , nil +// on error it returns "", err +// in situations where you should try again due to concurrent state changes (e.g. another actor simultaneously acquiring the lock) +// it returns "", nil +func (e *etcdMasterElector) handleMaster(path, id string, ttl uint64) (string, error) { + res, err := e.etcd.Get(path, false, false) + + // Unexpected error, bail out + if err != nil && !tools.IsEtcdNotFound(err) { + return "", err + } + + // There is no master, try to become the master. + if err != nil && tools.IsEtcdNotFound(err) { + return e.becomeMaster(path, id, ttl) + } + + // This should never happen. + if res.Node == nil { + return "", fmt.Errorf("unexpected response: %#v", res) + } + + // We're not the master, just return the current value + if res.Node.Value != id { + return res.Node.Value, nil + } + + // We are the master, try to extend out lease + return e.extendMaster(path, id, ttl, res) +} + +// master provices a distributed master election lock, maintains lock until failure, or someone sends something in the done channel. +// The basic algorithm is: +// while !done +// Get the current master +// If there is no current master +// Try to become the master +// Otherwise +// If we are the master, extend the lease +// If the master is different than the last time through the loop, report the master +// Sleep 80% of TTL +func (e *etcdMasterElector) master(path, id string, ttl uint64, masters chan<- string, errors chan<- error, done <-chan empty) { + lastMaster := "" + for { + master, err := e.handleMaster(path, id, ttl) + if err != nil { + errors <- err + } else if len(master) == 0 { + continue + } else if master != lastMaster { + lastMaster = master + masters <- master + } + // TODO(k8s): Add Watch here, skip the polling for faster reactions + // If done is closed, break out. + select { + case <-done: + return + case <-time.After(time.Duration((ttl*8)/10) * time.Second): + } + } +} + +// ResultChan implements the watch.Interface interface +func (e *etcdMasterElector) Stop() { + close(e.done) +} diff --git a/contrib/mesos/pkg/election/etcd_master_test.go b/contrib/mesos/pkg/election/etcd_master_test.go new file mode 100644 index 00000000000..9facd532411 --- /dev/null +++ b/contrib/mesos/pkg/election/etcd_master_test.go @@ -0,0 +1,98 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package election + +import ( + "testing" + + "github.com/GoogleCloudPlatform/kubernetes/pkg/tools" + "github.com/GoogleCloudPlatform/kubernetes/pkg/watch" + "github.com/coreos/go-etcd/etcd" +) + +func TestEtcdMasterOther(t *testing.T) { + path := "foo" + etcd := tools.NewFakeEtcdClient(t) + etcd.Set(path, "baz", 0) + master := NewEtcdMasterElector(etcd) + w := master.Elect(path, "bar") + result := <-w.ResultChan() + if result.Type != watch.Modified || result.Object.(Master) != "baz" { + t.Errorf("unexpected event: %#v", result) + } + w.Stop() +} + +func TestEtcdMasterNoOther(t *testing.T) { + path := "foo" + e := tools.NewFakeEtcdClient(t) + e.TestIndex = true + e.Data["foo"] = tools.EtcdResponseWithError{ + R: &etcd.Response{ + Node: nil, + }, + E: &etcd.EtcdError{ + ErrorCode: tools.EtcdErrorCodeNotFound, + }, + } + master := NewEtcdMasterElector(e) + w := master.Elect(path, "bar") + result := <-w.ResultChan() + if result.Type != watch.Modified || result.Object.(Master) != "bar" { + t.Errorf("unexpected event: %#v", result) + } + w.Stop() +} + +func TestEtcdMasterNoOtherThenConflict(t *testing.T) { + path := "foo" + e := tools.NewFakeEtcdClient(t) + e.TestIndex = true + // Ok, so we set up a chain of responses from etcd: + // 1) Nothing there + // 2) conflict (someone else wrote) + // 3) new value (the data they wrote) + empty := tools.EtcdResponseWithError{ + R: &etcd.Response{ + Node: nil, + }, + E: &etcd.EtcdError{ + ErrorCode: tools.EtcdErrorCodeNotFound, + }, + } + empty.N = &tools.EtcdResponseWithError{ + R: &etcd.Response{}, + E: &etcd.EtcdError{ + ErrorCode: tools.EtcdErrorCodeNodeExist, + }, + } + empty.N.N = &tools.EtcdResponseWithError{ + R: &etcd.Response{ + Node: &etcd.Node{ + Value: "baz", + }, + }, + } + e.Data["foo"] = empty + master := NewEtcdMasterElector(e) + w := master.Elect(path, "bar") + result := <-w.ResultChan() + if result.Type != watch.Modified || result.Object.(Master) != "bar" { + t.Errorf("unexpected event: %#v", result) + } + w.Stop() +} diff --git a/contrib/mesos/pkg/election/fake.go b/contrib/mesos/pkg/election/fake.go new file mode 100644 index 00000000000..d4eaddfb3ec --- /dev/null +++ b/contrib/mesos/pkg/election/fake.go @@ -0,0 +1,53 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package election + +import ( + "sync" + + "github.com/GoogleCloudPlatform/kubernetes/pkg/watch" +) + +// Fake allows for testing of anything consuming a MasterElector. +type Fake struct { + mux *watch.Broadcaster + currentMaster Master + lock sync.Mutex // Protect access of currentMaster +} + +// NewFake makes a new fake MasterElector. +func NewFake() *Fake { + // 0 means block for clients. + return &Fake{mux: watch.NewBroadcaster(0, watch.WaitIfChannelFull)} +} + +func (f *Fake) ChangeMaster(newMaster Master) { + f.lock.Lock() + defer f.lock.Unlock() + f.mux.Action(watch.Modified, newMaster) + f.currentMaster = newMaster +} + +func (f *Fake) Elect(path, id string) watch.Interface { + f.lock.Lock() + defer f.lock.Unlock() + w := f.mux.Watch() + if f.currentMaster != "" { + f.mux.Action(watch.Modified, f.currentMaster) + } + return w +} diff --git a/contrib/mesos/pkg/election/master.go b/contrib/mesos/pkg/election/master.go new file mode 100644 index 00000000000..d5f1a76a7ca --- /dev/null +++ b/contrib/mesos/pkg/election/master.go @@ -0,0 +1,134 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package election + +import ( + "sync" + + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/runtime" + "github.com/GoogleCloudPlatform/kubernetes/pkg/watch" + + "github.com/golang/glog" +) + +// MasterElector is an interface for services that can elect masters. +// Important Note: MasterElectors are not inter-operable, all participants in the election need to be +// using the same underlying implementation of this interface for correct behavior. +type MasterElector interface { + // Elect makes the caller represented by 'id' enter into a master election for the + // distributed lock defined by 'path' + // The returned watch.Interface provides a stream of Master objects which + // contain the current master. + // Calling Stop on the returned interface relinquishes ownership (if currently possesed) + // and removes the caller from the election + Elect(path, id string) watch.Interface +} + +// Service represents anything that can start and stop on demand. +type Service interface { + Validate(desired, current Master) + Start() + Stop() +} + +type notifier struct { + lock sync.Mutex + cond *sync.Cond + + // desired is updated with every change, current is updated after + // Start()/Stop() finishes. 'cond' is used to signal that a change + // might be needed. This handles the case where mastership flops + // around without calling Start()/Stop() excessively. + desired, current Master + + // for comparison, to see if we are master. + id Master + + service Service +} + +// Notify runs Elect() on m, and calls Start()/Stop() on s when the +// elected master starts/stops matching 'id'. Never returns. +func Notify(m MasterElector, path, id string, s Service, abort <-chan struct{}) { + n := ¬ifier{id: Master(id), service: s} + n.cond = sync.NewCond(&n.lock) + finished := runtime.After(func() { + runtime.Until(func() { + for { + w := m.Elect(path, id) + for { + select { + case <-abort: + return + case event, open := <-w.ResultChan(): + if !open { + break + } + if event.Type != watch.Modified { + continue + } + electedMaster, ok := event.Object.(Master) + if !ok { + glog.Errorf("Unexpected object from election channel: %v", event.Object) + break + } + func() { + n.lock.Lock() + defer n.lock.Unlock() + n.desired = electedMaster + if n.desired != n.current { + n.cond.Signal() + } + }() + } + } + } + }, 0, abort) + }) + runtime.Until(func() { n.serviceLoop(finished) }, 0, abort) +} + +// serviceLoop waits for changes, and calls Start()/Stop() as needed. +func (n *notifier) serviceLoop(abort <-chan struct{}) { + n.lock.Lock() + defer n.lock.Unlock() + for { + select { + case <-abort: + return + default: + for n.desired == n.current { + ch := runtime.After(n.cond.Wait) + select { + case <-abort: + n.cond.Signal() // ensure that Wait() returns + <-ch + return + case <-ch: + // we were notified and have the lock, proceed.. + } + } + if n.current != n.id && n.desired == n.id { + n.service.Validate(n.desired, n.current) + n.service.Start() + } else if n.current == n.id && n.desired != n.id { + n.service.Stop() + } + n.current = n.desired + } + } +} diff --git a/contrib/mesos/pkg/election/master_test.go b/contrib/mesos/pkg/election/master_test.go new file mode 100644 index 00000000000..5584ab25fae --- /dev/null +++ b/contrib/mesos/pkg/election/master_test.go @@ -0,0 +1,98 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package election + +import ( + "testing" + "time" + + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/runtime" +) + +type slowService struct { + t *testing.T + on bool + // We explicitly have no lock to prove that + // Start and Stop are not called concurrently. + changes chan<- bool + done <-chan struct{} +} + +func (s *slowService) Validate(d, c Master) { + // noop +} + +func (s *slowService) Start() { + select { + case <-s.done: + return // avoid writing to closed changes chan + default: + } + if s.on { + s.t.Errorf("started already on service") + } + time.Sleep(2 * time.Millisecond) + s.on = true + s.changes <- true +} + +func (s *slowService) Stop() { + select { + case <-s.done: + return // avoid writing to closed changes chan + default: + } + if !s.on { + s.t.Errorf("stopped already off service") + } + time.Sleep(2 * time.Millisecond) + s.on = false + s.changes <- false +} + +func Test(t *testing.T) { + m := NewFake() + changes := make(chan bool, 1500) + done := make(chan struct{}) + s := &slowService{t: t, changes: changes, done: done} + notifyDone := runtime.After(func() { Notify(m, "", "me", s, done) }) + + go func() { + defer close(done) + for i := 0; i < 500; i++ { + for _, key := range []string{"me", "notme", "alsonotme"} { + m.ChangeMaster(Master(key)) + } + } + }() + + <-notifyDone + close(changes) + + changeList := []bool{} + for { + change, ok := <-changes + if !ok { + break + } + changeList = append(changeList, change) + } + + if len(changeList) > 1000 { + t.Errorf("unexpected number of changes: %v", len(changeList)) + } +} diff --git a/contrib/mesos/pkg/executor/config/config.go b/contrib/mesos/pkg/executor/config/config.go new file mode 100644 index 00000000000..999058dbc8b --- /dev/null +++ b/contrib/mesos/pkg/executor/config/config.go @@ -0,0 +1,29 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package config + +import ( + "time" +) + +// default values to use when constructing mesos ExecutorInfo messages +const ( + DefaultInfoID = "k8sm-executor" + DefaultInfoSource = "kubernetes" + DefaultInfoName = "Kubelet-Executor" + DefaultSuicideTimeout = 20 * time.Minute +) diff --git a/contrib/mesos/pkg/executor/config/doc.go b/contrib/mesos/pkg/executor/config/doc.go new file mode 100644 index 00000000000..7a44f3e7b5f --- /dev/null +++ b/contrib/mesos/pkg/executor/config/doc.go @@ -0,0 +1,18 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package config contains executor configuration constants. +package config diff --git a/contrib/mesos/pkg/executor/doc.go b/contrib/mesos/pkg/executor/doc.go new file mode 100644 index 00000000000..5ac5e9d8f82 --- /dev/null +++ b/contrib/mesos/pkg/executor/doc.go @@ -0,0 +1,21 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +/* +Package executor includes a mesos executor, which contains +a kubelet as its member to manage containers. +*/ +package executor diff --git a/contrib/mesos/pkg/executor/executor.go b/contrib/mesos/pkg/executor/executor.go new file mode 100644 index 00000000000..6a024fddb93 --- /dev/null +++ b/contrib/mesos/pkg/executor/executor.go @@ -0,0 +1,847 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package executor + +import ( + "encoding/json" + "fmt" + "strings" + "sync" + "sync/atomic" + "time" + + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/executor/messages" + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/meta" + "github.com/GoogleCloudPlatform/kubernetes/pkg/api" + "github.com/GoogleCloudPlatform/kubernetes/pkg/client" + "github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet" + "github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/container" + "github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/dockertools" + "github.com/GoogleCloudPlatform/kubernetes/pkg/util" + "github.com/GoogleCloudPlatform/kubernetes/pkg/watch" + "github.com/fsouza/go-dockerclient" + "github.com/gogo/protobuf/proto" + log "github.com/golang/glog" + bindings "github.com/mesos/mesos-go/executor" + mesos "github.com/mesos/mesos-go/mesosproto" + mutil "github.com/mesos/mesos-go/mesosutil" +) + +const ( + containerPollTime = 300 * time.Millisecond + launchGracePeriod = 5 * time.Minute +) + +type stateType int32 + +const ( + disconnectedState stateType = iota + connectedState + suicidalState + terminalState +) + +func (s *stateType) get() stateType { + return stateType(atomic.LoadInt32((*int32)(s))) +} + +func (s *stateType) transition(from, to stateType) bool { + return atomic.CompareAndSwapInt32((*int32)(s), int32(from), int32(to)) +} + +func (s *stateType) transitionTo(to stateType, unless ...stateType) bool { + if len(unless) == 0 { + atomic.StoreInt32((*int32)(s), int32(to)) + return true + } + for { + state := s.get() + for _, x := range unless { + if state == x { + return false + } + } + if s.transition(state, to) { + return true + } + } +} + +type kuberTask struct { + mesosTaskInfo *mesos.TaskInfo + podName string +} + +// func that attempts suicide +type jumper func(bindings.ExecutorDriver, <-chan struct{}) + +type suicideWatcher interface { + Next(time.Duration, bindings.ExecutorDriver, jumper) suicideWatcher + Reset(time.Duration) bool + Stop() bool +} + +type podStatusFunc func() (*api.PodStatus, error) + +// KubernetesExecutor is an mesos executor that runs pods +// in a minion machine. +type KubernetesExecutor struct { + kl *kubelet.Kubelet // the kubelet instance. + updateChan chan<- interface{} // to send pod config updates to the kubelet + state stateType + tasks map[string]*kuberTask + pods map[string]*api.Pod + lock sync.RWMutex + sourcename string + client *client.Client + events <-chan watch.Event + done chan struct{} // signals shutdown + outgoing chan func() (mesos.Status, error) // outgoing queue to the mesos driver + dockerClient dockertools.DockerInterface + suicideWatch suicideWatcher + suicideTimeout time.Duration + shutdownAlert func() // invoked just prior to executor shutdown + kubeletFinished <-chan struct{} // signals that kubelet Run() died + initialRegistration sync.Once + exitFunc func(int) + podStatusFunc func(*kubelet.Kubelet, *api.Pod) (*api.PodStatus, error) +} + +type Config struct { + Kubelet *kubelet.Kubelet + Updates chan<- interface{} // to send pod config updates to the kubelet + SourceName string + APIClient *client.Client + Watch watch.Interface + Docker dockertools.DockerInterface + ShutdownAlert func() + SuicideTimeout time.Duration + KubeletFinished <-chan struct{} // signals that kubelet Run() died + ExitFunc func(int) + PodStatusFunc func(*kubelet.Kubelet, *api.Pod) (*api.PodStatus, error) +} + +func (k *KubernetesExecutor) isConnected() bool { + return connectedState == (&k.state).get() +} + +// New creates a new kubernetes executor. +func New(config Config) *KubernetesExecutor { + k := &KubernetesExecutor{ + kl: config.Kubelet, + updateChan: config.Updates, + state: disconnectedState, + tasks: make(map[string]*kuberTask), + pods: make(map[string]*api.Pod), + sourcename: config.SourceName, + client: config.APIClient, + done: make(chan struct{}), + outgoing: make(chan func() (mesos.Status, error), 1024), + dockerClient: config.Docker, + suicideTimeout: config.SuicideTimeout, + kubeletFinished: config.KubeletFinished, + suicideWatch: &suicideTimer{}, + shutdownAlert: config.ShutdownAlert, + exitFunc: config.ExitFunc, + podStatusFunc: config.PodStatusFunc, + } + //TODO(jdef) do something real with these events.. + if config.Watch != nil { + events := config.Watch.ResultChan() + if events != nil { + go func() { + for e := range events { + // e ~= watch.Event { ADDED, *api.Event } + log.V(1).Info(e) + } + }() + k.events = events + } + } + return k +} + +func (k *KubernetesExecutor) Init(driver bindings.ExecutorDriver) { + k.killKubeletContainers() + k.resetSuicideWatch(driver) + go k.sendLoop() + //TODO(jdef) monitor kubeletFinished and shutdown if it happens +} + +func (k *KubernetesExecutor) Done() <-chan struct{} { + return k.done +} + +func (k *KubernetesExecutor) isDone() bool { + select { + case <-k.done: + return true + default: + return false + } +} + +// Registered is called when the executor is successfully registered with the slave. +func (k *KubernetesExecutor) Registered(driver bindings.ExecutorDriver, + executorInfo *mesos.ExecutorInfo, frameworkInfo *mesos.FrameworkInfo, slaveInfo *mesos.SlaveInfo) { + if k.isDone() { + return + } + log.Infof("Executor %v of framework %v registered with slave %v\n", + executorInfo, frameworkInfo, slaveInfo) + if !(&k.state).transition(disconnectedState, connectedState) { + log.Errorf("failed to register/transition to a connected state") + } + k.initialRegistration.Do(k.onInitialRegistration) +} + +// Reregistered is called when the executor is successfully re-registered with the slave. +// This can happen when the slave fails over. +func (k *KubernetesExecutor) Reregistered(driver bindings.ExecutorDriver, slaveInfo *mesos.SlaveInfo) { + if k.isDone() { + return + } + log.Infof("Reregistered with slave %v\n", slaveInfo) + if !(&k.state).transition(disconnectedState, connectedState) { + log.Errorf("failed to reregister/transition to a connected state") + } + k.initialRegistration.Do(k.onInitialRegistration) +} + +func (k *KubernetesExecutor) onInitialRegistration() { + // emit an empty update to allow the mesos "source" to be marked as seen + k.updateChan <- kubelet.PodUpdate{ + Pods: []*api.Pod{}, + Op: kubelet.SET, + Source: k.sourcename, + } +} + +// Disconnected is called when the executor is disconnected from the slave. +func (k *KubernetesExecutor) Disconnected(driver bindings.ExecutorDriver) { + if k.isDone() { + return + } + log.Infof("Slave is disconnected\n") + if !(&k.state).transition(connectedState, disconnectedState) { + log.Errorf("failed to disconnect/transition to a disconnected state") + } +} + +// LaunchTask is called when the executor receives a request to launch a task. +// The happens when the k8sm scheduler has decided to schedule the pod +// (which corresponds to a Mesos Task) onto the node where this executor +// is running, but the binding is not recorded in the Kubernetes store yet. +// This function is invoked to tell the executor to record the binding in the +// Kubernetes store and start the pod via the Kubelet. +func (k *KubernetesExecutor) LaunchTask(driver bindings.ExecutorDriver, taskInfo *mesos.TaskInfo) { + if k.isDone() { + return + } + log.Infof("Launch task %v\n", taskInfo) + + if !k.isConnected() { + log.Errorf("Ignore launch task because the executor is disconnected\n") + k.sendStatus(driver, newStatus(taskInfo.GetTaskId(), mesos.TaskState_TASK_FAILED, + messages.ExecutorUnregistered)) + return + } + + obj, err := api.Codec.Decode(taskInfo.GetData()) + if err != nil { + log.Errorf("failed to extract yaml data from the taskInfo.data %v", err) + k.sendStatus(driver, newStatus(taskInfo.GetTaskId(), mesos.TaskState_TASK_FAILED, + messages.UnmarshalTaskDataFailure)) + return + } + pod, ok := obj.(*api.Pod) + if !ok { + log.Errorf("expected *api.Pod instead of %T: %+v", pod, pod) + k.sendStatus(driver, newStatus(taskInfo.GetTaskId(), mesos.TaskState_TASK_FAILED, + messages.UnmarshalTaskDataFailure)) + return + } + + k.lock.Lock() + defer k.lock.Unlock() + + taskId := taskInfo.GetTaskId().GetValue() + if _, found := k.tasks[taskId]; found { + log.Errorf("task already launched\n") + // Not to send back TASK_RUNNING here, because + // may be duplicated messages or duplicated task id. + return + } + // remember this task so that: + // (a) we ignore future launches for it + // (b) we have a record of it so that we can kill it if needed + // (c) we're leaving podName == "" for now, indicates we don't need to delete containers + k.tasks[taskId] = &kuberTask{ + mesosTaskInfo: taskInfo, + } + k.resetSuicideWatch(driver) + + go k.launchTask(driver, taskId, pod) +} + +// TODO(jdef) add metrics for this? +type suicideTimer struct { + timer *time.Timer +} + +func (w *suicideTimer) Next(d time.Duration, driver bindings.ExecutorDriver, f jumper) suicideWatcher { + return &suicideTimer{ + timer: time.AfterFunc(d, func() { + log.Warningf("Suicide timeout (%v) expired", d) + f(driver, nil) + }), + } +} + +func (w *suicideTimer) Stop() (result bool) { + if w != nil && w.timer != nil { + log.Infoln("stopping suicide watch") //TODO(jdef) debug + result = w.timer.Stop() + } + return +} + +// return true if the timer was successfully reset +func (w *suicideTimer) Reset(d time.Duration) bool { + if w != nil && w.timer != nil { + log.Infoln("resetting suicide watch") //TODO(jdef) debug + w.timer.Reset(d) + return true + } + return false +} + +// determine whether we need to start a suicide countdown. if so, then start +// a timer that, upon expiration, causes this executor to commit suicide. +// this implementation runs asynchronously. callers that wish to wait for the +// reset to complete may wait for the returned signal chan to close. +func (k *KubernetesExecutor) resetSuicideWatch(driver bindings.ExecutorDriver) <-chan struct{} { + ch := make(chan struct{}) + go func() { + defer close(ch) + k.lock.Lock() + defer k.lock.Unlock() + + if k.suicideTimeout < 1 { + return + } + + if k.suicideWatch != nil { + if len(k.tasks) > 0 { + k.suicideWatch.Stop() + return + } + if k.suicideWatch.Reset(k.suicideTimeout) { + // valid timer, reset was successful + return + } + } + + //TODO(jdef) reduce verbosity here once we're convinced that suicide watch is working properly + log.Infof("resetting suicide watch timer for %v", k.suicideTimeout) + + k.suicideWatch = k.suicideWatch.Next(k.suicideTimeout, driver, jumper(k.attemptSuicide)) + }() + return ch +} + +func (k *KubernetesExecutor) attemptSuicide(driver bindings.ExecutorDriver, abort <-chan struct{}) { + k.lock.Lock() + defer k.lock.Unlock() + + // this attempt may have been queued and since been aborted + select { + case <-abort: + //TODO(jdef) reduce verbosity once suicide watch is working properly + log.Infof("aborting suicide attempt since watch was cancelled") + return + default: // continue + } + + // fail-safe, will abort kamikaze attempts if there are tasks + if len(k.tasks) > 0 { + ids := []string{} + for taskid := range k.tasks { + ids = append(ids, taskid) + } + log.Errorf("suicide attempt failed, there are still running tasks: %v", ids) + return + } + + log.Infoln("Attempting suicide") + if (&k.state).transitionTo(suicidalState, suicidalState, terminalState) { + //TODO(jdef) let the scheduler know? + //TODO(jdef) is suicide more graceful than slave-demanded shutdown? + k.doShutdown(driver) + } +} + +// async continuation of LaunchTask +func (k *KubernetesExecutor) launchTask(driver bindings.ExecutorDriver, taskId string, pod *api.Pod) { + + //HACK(jdef): cloned binding construction from k8s plugin/pkg/scheduler/scheduler.go + binding := &api.Binding{ + ObjectMeta: api.ObjectMeta{ + Namespace: pod.Namespace, + Name: pod.Name, + Annotations: make(map[string]string), + }, + Target: api.ObjectReference{ + Kind: "Node", + Name: pod.Annotations[meta.BindingHostKey], + }, + } + + // forward the annotations that the scheduler wants to apply + for k, v := range pod.Annotations { + binding.Annotations[k] = v + } + + deleteTask := func() { + k.lock.Lock() + defer k.lock.Unlock() + delete(k.tasks, taskId) + k.resetSuicideWatch(driver) + } + + log.Infof("Binding '%v/%v' to '%v' with annotations %+v...", pod.Namespace, pod.Name, binding.Target.Name, binding.Annotations) + ctx := api.WithNamespace(api.NewContext(), binding.Namespace) + // TODO(k8s): use Pods interface for binding once clusters are upgraded + // return b.Pods(binding.Namespace).Bind(binding) + err := k.client.Post().Namespace(api.NamespaceValue(ctx)).Resource("bindings").Body(binding).Do().Error() + if err != nil { + deleteTask() + k.sendStatus(driver, newStatus(mutil.NewTaskID(taskId), mesos.TaskState_TASK_FAILED, + messages.CreateBindingFailure)) + return + } + podFullName := container.GetPodFullName(pod) + + // allow a recently failed-over scheduler the chance to recover the task/pod binding: + // it may have failed and recovered before the apiserver is able to report the updated + // binding information. replays of this status event will signal to the scheduler that + // the apiserver should be up-to-date. + data, err := json.Marshal(api.PodStatusResult{ + ObjectMeta: api.ObjectMeta{ + Name: podFullName, + SelfLink: "/podstatusresult", + }, + }) + if err != nil { + deleteTask() + log.Errorf("failed to marshal pod status result: %v", err) + k.sendStatus(driver, newStatus(mutil.NewTaskID(taskId), mesos.TaskState_TASK_FAILED, + err.Error())) + return + } + + k.lock.Lock() + defer k.lock.Unlock() + + // Add the task. + task, found := k.tasks[taskId] + if !found { + log.V(1).Infof("task %v not found, probably killed: aborting launch, reporting lost", taskId) + k.reportLostTask(driver, taskId, messages.LaunchTaskFailed) + return + } + + //TODO(jdef) check for duplicate pod name, if found send TASK_ERROR + + // from here on, we need to delete containers associated with the task + // upon it going into a terminal state + task.podName = podFullName + k.pods[podFullName] = pod + + // send the latest snapshot of the set of pods to the kubelet via the pod update channel. + // this results in the kubelet spinning up the new pod. + update := kubelet.PodUpdate{Op: kubelet.SET} + for _, p := range k.pods { + update.Pods = append(update.Pods, p) + } + k.updateChan <- update + + statusUpdate := &mesos.TaskStatus{ + TaskId: mutil.NewTaskID(taskId), + State: mesos.TaskState_TASK_STARTING.Enum(), + Message: proto.String(messages.CreateBindingSuccess), + Data: data, + } + k.sendStatus(driver, statusUpdate) + + // Delay reporting 'task running' until container is up. + psf := podStatusFunc(func() (*api.PodStatus, error) { + return k.podStatusFunc(k.kl, pod) + }) + + go k._launchTask(driver, taskId, podFullName, psf) +} + +func (k *KubernetesExecutor) _launchTask(driver bindings.ExecutorDriver, taskId, podFullName string, psf podStatusFunc) { + + expired := make(chan struct{}) + time.AfterFunc(launchGracePeriod, func() { close(expired) }) + + getMarshalledInfo := func() (data []byte, cancel bool) { + // potentially long call.. + if podStatus, err := psf(); err == nil && podStatus != nil { + select { + case <-expired: + cancel = true + default: + k.lock.Lock() + defer k.lock.Unlock() + if _, found := k.tasks[taskId]; !found { + // don't bother with the pod status if the task is already gone + cancel = true + break + } else if podStatus.Phase != api.PodRunning { + // avoid sending back a running status before it's really running + break + } + log.V(2).Infof("Found pod status: '%v'", podStatus) + result := api.PodStatusResult{ + ObjectMeta: api.ObjectMeta{ + Name: podFullName, + SelfLink: "/podstatusresult", + }, + Status: *podStatus, + } + if data, err = json.Marshal(result); err != nil { + log.Errorf("failed to marshal pod status result: %v", err) + } + } + } + return + } + +waitForRunningPod: + for { + select { + case <-expired: + log.Warningf("Launch expired grace period of '%v'", launchGracePeriod) + break waitForRunningPod + case <-time.After(containerPollTime): + if data, cancel := getMarshalledInfo(); cancel { + break waitForRunningPod + } else if data == nil { + continue waitForRunningPod + } else { + k.lock.Lock() + defer k.lock.Unlock() + if _, found := k.tasks[taskId]; !found { + goto reportLost + } + + statusUpdate := &mesos.TaskStatus{ + TaskId: mutil.NewTaskID(taskId), + State: mesos.TaskState_TASK_RUNNING.Enum(), + Message: proto.String(fmt.Sprintf("pod-running:%s", podFullName)), + Data: data, + } + + k.sendStatus(driver, statusUpdate) + + // continue to monitor the health of the pod + go k.__launchTask(driver, taskId, podFullName, psf) + return + } + } + } + + k.lock.Lock() + defer k.lock.Unlock() +reportLost: + k.reportLostTask(driver, taskId, messages.LaunchTaskFailed) +} + +func (k *KubernetesExecutor) __launchTask(driver bindings.ExecutorDriver, taskId, podFullName string, psf podStatusFunc) { + // TODO(nnielsen): Monitor health of pod and report if lost. + // Should we also allow this to fail a couple of times before reporting lost? + // What if the docker daemon is restarting and we can't connect, but it's + // going to bring the pods back online as soon as it restarts? + knownPod := func() bool { + _, err := psf() + return err == nil + } + // Wait for the pod to go away and stop monitoring once it does + // TODO (jdefelice) replace with an /events watch? + for { + time.Sleep(containerPollTime) + if k.checkForLostPodTask(driver, taskId, knownPod) { + return + } + } +} + +// Intended to be executed as part of the pod monitoring loop, this fn (ultimately) checks with Docker +// whether the pod is running. It will only return false if the task is still registered and the pod is +// registered in Docker. Otherwise it returns true. If there's still a task record on file, but no pod +// in Docker, then we'll also send a TASK_LOST event. +func (k *KubernetesExecutor) checkForLostPodTask(driver bindings.ExecutorDriver, taskId string, isKnownPod func() bool) bool { + // TODO (jdefelice) don't send false alarms for deleted pods (KILLED tasks) + k.lock.Lock() + defer k.lock.Unlock() + + // TODO(jdef) we should really consider k.pods here, along with what docker is reporting, since the + // kubelet may constantly attempt to instantiate a pod as long as it's in the pod state that we're + // handing to it. otherwise, we're probably reporting a TASK_LOST prematurely. Should probably + // consult RestartPolicy to determine appropriate behavior. Should probably also gracefully handle + // docker daemon restarts. + if _, ok := k.tasks[taskId]; ok { + if isKnownPod() { + return false + } else { + log.Warningf("Detected lost pod, reporting lost task %v", taskId) + k.reportLostTask(driver, taskId, messages.ContainersDisappeared) + } + } else { + log.V(2).Infof("Task %v no longer registered, stop monitoring for lost pods", taskId) + } + return true +} + +// KillTask is called when the executor receives a request to kill a task. +func (k *KubernetesExecutor) KillTask(driver bindings.ExecutorDriver, taskId *mesos.TaskID) { + if k.isDone() { + return + } + log.Infof("Kill task %v\n", taskId) + + if !k.isConnected() { + //TODO(jdefelice) sent TASK_LOST here? + log.Warningf("Ignore kill task because the executor is disconnected\n") + return + } + + k.lock.Lock() + defer k.lock.Unlock() + k.removePodTask(driver, taskId.GetValue(), messages.TaskKilled, mesos.TaskState_TASK_KILLED) +} + +// Reports a lost task to the slave and updates internal task and pod tracking state. +// Assumes that the caller is locking around pod and task state. +func (k *KubernetesExecutor) reportLostTask(driver bindings.ExecutorDriver, tid, reason string) { + k.removePodTask(driver, tid, reason, mesos.TaskState_TASK_LOST) +} + +// deletes the pod and task associated with the task identified by tid and sends a task +// status update to mesos. also attempts to reset the suicide watch. +// Assumes that the caller is locking around pod and task state. +func (k *KubernetesExecutor) removePodTask(driver bindings.ExecutorDriver, tid, reason string, state mesos.TaskState) { + task, ok := k.tasks[tid] + if !ok { + log.V(1).Infof("Failed to remove task, unknown task %v\n", tid) + return + } + delete(k.tasks, tid) + k.resetSuicideWatch(driver) + + pid := task.podName + if _, found := k.pods[pid]; !found { + log.Warningf("Cannot remove unknown pod %v for task %v", pid, tid) + } else { + log.V(2).Infof("deleting pod %v for task %v", pid, tid) + delete(k.pods, pid) + + // Send the pod updates to the channel. + update := kubelet.PodUpdate{Op: kubelet.SET} + for _, p := range k.pods { + update.Pods = append(update.Pods, p) + } + k.updateChan <- update + } + // TODO(jdef): ensure that the update propagates, perhaps return a signal chan? + k.sendStatus(driver, newStatus(mutil.NewTaskID(tid), state, reason)) +} + +// FrameworkMessage is called when the framework sends some message to the executor +func (k *KubernetesExecutor) FrameworkMessage(driver bindings.ExecutorDriver, message string) { + if k.isDone() { + return + } + if !k.isConnected() { + log.Warningf("Ignore framework message because the executor is disconnected\n") + return + } + + log.Infof("Receives message from framework %v\n", message) + //TODO(jdef) master reported a lost task, reconcile this! @see scheduler.go:handleTaskLost + if strings.HasPrefix(message, "task-lost:") && len(message) > 10 { + taskId := message[10:] + if taskId != "" { + // clean up pod state + k.lock.Lock() + defer k.lock.Unlock() + k.reportLostTask(driver, taskId, messages.TaskLostAck) + } + } + + switch message { + case messages.Kamikaze: + k.attemptSuicide(driver, nil) + } +} + +// Shutdown is called when the executor receives a shutdown request. +func (k *KubernetesExecutor) Shutdown(driver bindings.ExecutorDriver) { + k.lock.Lock() + defer k.lock.Unlock() + k.doShutdown(driver) +} + +// assumes that caller has obtained state lock +func (k *KubernetesExecutor) doShutdown(driver bindings.ExecutorDriver) { + defer func() { + log.Errorf("exiting with unclean shutdown: %v", recover()) + if k.exitFunc != nil { + k.exitFunc(1) + } + }() + + (&k.state).transitionTo(terminalState) + + // signal to all listeners that this KubeletExecutor is done! + close(k.done) + + if k.shutdownAlert != nil { + func() { + util.HandleCrash() + k.shutdownAlert() + }() + } + + log.Infoln("Stopping executor driver") + _, err := driver.Stop() + if err != nil { + log.Warningf("failed to stop executor driver: %v", err) + } + + log.Infoln("Shutdown the executor") + + // according to docs, mesos will generate TASK_LOST updates for us + // if needed, so don't take extra time to do that here. + k.tasks = map[string]*kuberTask{} + + select { + // the main Run() func may still be running... wait for it to finish: it will + // clear the pod configuration cleanly, telling k8s "there are no pods" and + // clean up resources (pods, volumes, etc). + case <-k.kubeletFinished: + + //TODO(jdef) attempt to wait for events to propagate to API server? + + // TODO(jdef) extract constant, should be smaller than whatever the + // slave graceful shutdown timeout period is. + case <-time.After(15 * time.Second): + log.Errorf("timed out waiting for kubelet Run() to die") + } + + log.Infoln("exiting") + if k.exitFunc != nil { + k.exitFunc(0) + } +} + +// Destroy existing k8s containers +func (k *KubernetesExecutor) killKubeletContainers() { + if containers, err := dockertools.GetKubeletDockerContainers(k.dockerClient, true); err == nil { + opts := docker.RemoveContainerOptions{ + RemoveVolumes: true, + Force: true, + } + for _, container := range containers { + opts.ID = container.ID + log.V(2).Infof("Removing container: %v", opts.ID) + if err := k.dockerClient.RemoveContainer(opts); err != nil { + log.Warning(err) + } + } + } else { + log.Warningf("Failed to list kubelet docker containers: %v", err) + } +} + +// Error is called when some error happens. +func (k *KubernetesExecutor) Error(driver bindings.ExecutorDriver, message string) { + log.Errorln(message) +} + +func newStatus(taskId *mesos.TaskID, state mesos.TaskState, message string) *mesos.TaskStatus { + return &mesos.TaskStatus{ + TaskId: taskId, + State: &state, + Message: proto.String(message), + } +} + +func (k *KubernetesExecutor) sendStatus(driver bindings.ExecutorDriver, status *mesos.TaskStatus) { + select { + case <-k.done: + default: + k.outgoing <- func() (mesos.Status, error) { return driver.SendStatusUpdate(status) } + } +} + +func (k *KubernetesExecutor) sendFrameworkMessage(driver bindings.ExecutorDriver, msg string) { + select { + case <-k.done: + default: + k.outgoing <- func() (mesos.Status, error) { return driver.SendFrameworkMessage(msg) } + } +} + +func (k *KubernetesExecutor) sendLoop() { + defer log.V(1).Info("sender loop exiting") + for { + select { + case <-k.done: + return + default: + if !k.isConnected() { + select { + case <-k.done: + case <-time.After(1 * time.Second): + } + continue + } + sender, ok := <-k.outgoing + if !ok { + // programming error + panic("someone closed the outgoing channel") + } + if status, err := sender(); err == nil { + continue + } else { + log.Error(err) + if status == mesos.Status_DRIVER_ABORTED { + return + } + } + // attempt to re-queue the sender + select { + case <-k.done: + case k.outgoing <- sender: + } + } + } +} diff --git a/contrib/mesos/pkg/executor/executor_test.go b/contrib/mesos/pkg/executor/executor_test.go new file mode 100644 index 00000000000..96f87270269 --- /dev/null +++ b/contrib/mesos/pkg/executor/executor_test.go @@ -0,0 +1,618 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package executor + +import ( + "fmt" + "net/http" + "net/http/httptest" + "reflect" + "sync" + "sync/atomic" + "testing" + "time" + + assertext "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/assert" + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/executor/messages" + kmruntime "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/runtime" + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/podtask" + "github.com/GoogleCloudPlatform/kubernetes/pkg/api" + "github.com/GoogleCloudPlatform/kubernetes/pkg/api/testapi" + "github.com/GoogleCloudPlatform/kubernetes/pkg/client" + "github.com/GoogleCloudPlatform/kubernetes/pkg/client/cache" + "github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet" + "github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/dockertools" + "github.com/GoogleCloudPlatform/kubernetes/pkg/runtime" + "github.com/GoogleCloudPlatform/kubernetes/pkg/watch" + + "github.com/golang/glog" + bindings "github.com/mesos/mesos-go/executor" + "github.com/mesos/mesos-go/mesosproto" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" +) + +type suicideTracker struct { + suicideWatcher + stops uint32 + resets uint32 + timers uint32 + jumps *uint32 +} + +func (t *suicideTracker) Reset(d time.Duration) bool { + defer func() { t.resets++ }() + return t.suicideWatcher.Reset(d) +} + +func (t *suicideTracker) Stop() bool { + defer func() { t.stops++ }() + return t.suicideWatcher.Stop() +} + +func (t *suicideTracker) Next(d time.Duration, driver bindings.ExecutorDriver, f jumper) suicideWatcher { + tracker := &suicideTracker{ + stops: t.stops, + resets: t.resets, + jumps: t.jumps, + timers: t.timers + 1, + } + jumper := tracker.makeJumper(f) + tracker.suicideWatcher = t.suicideWatcher.Next(d, driver, jumper) + return tracker +} + +func (t *suicideTracker) makeJumper(_ jumper) jumper { + return jumper(func(driver bindings.ExecutorDriver, cancel <-chan struct{}) { + glog.Warningln("jumping?!") + if t.jumps != nil { + atomic.AddUint32(t.jumps, 1) + } + }) +} + +func TestSuicide_zeroTimeout(t *testing.T) { + defer glog.Flush() + + k := New(Config{}) + tracker := &suicideTracker{suicideWatcher: k.suicideWatch} + k.suicideWatch = tracker + + ch := k.resetSuicideWatch(nil) + + select { + case <-ch: + case <-time.After(2 * time.Second): + t.Fatalf("timeout waiting for reset of suicide watch") + } + if tracker.stops != 0 { + t.Fatalf("expected no stops since suicideWatchTimeout was never set") + } + if tracker.resets != 0 { + t.Fatalf("expected no resets since suicideWatchTimeout was never set") + } + if tracker.timers != 0 { + t.Fatalf("expected no timers since suicideWatchTimeout was never set") + } +} + +func TestSuicide_WithTasks(t *testing.T) { + defer glog.Flush() + + k := New(Config{ + SuicideTimeout: 50 * time.Millisecond, + }) + + jumps := uint32(0) + tracker := &suicideTracker{suicideWatcher: k.suicideWatch, jumps: &jumps} + k.suicideWatch = tracker + + k.tasks["foo"] = &kuberTask{} // prevent suicide attempts from succeeding + + // call reset with a nil timer + glog.Infoln("resetting suicide watch with 1 task") + select { + case <-k.resetSuicideWatch(nil): + tracker = k.suicideWatch.(*suicideTracker) + if tracker.stops != 1 { + t.Fatalf("expected suicide attempt to Stop() since there are registered tasks") + } + if tracker.resets != 0 { + t.Fatalf("expected no resets since") + } + if tracker.timers != 0 { + t.Fatalf("expected no timers since") + } + case <-time.After(1 * time.Second): + t.Fatalf("initial suicide watch setup failed") + } + + delete(k.tasks, "foo") // zero remaining tasks + k.suicideTimeout = 1500 * time.Millisecond + suicideStart := time.Now() + + // reset the suicide watch, which should actually start a timer now + glog.Infoln("resetting suicide watch with 0 tasks") + select { + case <-k.resetSuicideWatch(nil): + tracker = k.suicideWatch.(*suicideTracker) + if tracker.stops != 1 { + t.Fatalf("did not expect suicide attempt to Stop() since there are no registered tasks") + } + if tracker.resets != 1 { + t.Fatalf("expected 1 resets instead of %d", tracker.resets) + } + if tracker.timers != 1 { + t.Fatalf("expected 1 timers instead of %d", tracker.timers) + } + case <-time.After(1 * time.Second): + t.Fatalf("2nd suicide watch setup failed") + } + + k.lock.Lock() + k.tasks["foo"] = &kuberTask{} // prevent suicide attempts from succeeding + k.lock.Unlock() + + // reset the suicide watch, which should stop the existing timer + glog.Infoln("resetting suicide watch with 1 task") + select { + case <-k.resetSuicideWatch(nil): + tracker = k.suicideWatch.(*suicideTracker) + if tracker.stops != 2 { + t.Fatalf("expected 2 stops instead of %d since there are registered tasks", tracker.stops) + } + if tracker.resets != 1 { + t.Fatalf("expected 1 resets instead of %d", tracker.resets) + } + if tracker.timers != 1 { + t.Fatalf("expected 1 timers instead of %d", tracker.timers) + } + case <-time.After(1 * time.Second): + t.Fatalf("3rd suicide watch setup failed") + } + + k.lock.Lock() + delete(k.tasks, "foo") // allow suicide attempts to schedule + k.lock.Unlock() + + // reset the suicide watch, which should reset a stopped timer + glog.Infoln("resetting suicide watch with 0 tasks") + select { + case <-k.resetSuicideWatch(nil): + tracker = k.suicideWatch.(*suicideTracker) + if tracker.stops != 2 { + t.Fatalf("expected 2 stops instead of %d since there are no registered tasks", tracker.stops) + } + if tracker.resets != 2 { + t.Fatalf("expected 2 resets instead of %d", tracker.resets) + } + if tracker.timers != 1 { + t.Fatalf("expected 1 timers instead of %d", tracker.timers) + } + case <-time.After(1 * time.Second): + t.Fatalf("4th suicide watch setup failed") + } + + sinceWatch := time.Since(suicideStart) + time.Sleep(3*time.Second - sinceWatch) // give the first timer to misfire (it shouldn't since Stop() was called) + + if j := atomic.LoadUint32(&jumps); j != 1 { + t.Fatalf("expected 1 jumps instead of %d since stop was called", j) + } else { + glog.Infoln("jumps verified") // glog so we get a timestamp + } +} + +// TestExecutorRegister ensures that the executor thinks it is connected +// after Register is called. +func TestExecutorRegister(t *testing.T) { + mockDriver := &MockExecutorDriver{} + updates := make(chan interface{}, 1024) + executor := New(Config{ + Docker: dockertools.ConnectToDockerOrDie("fake://"), + Updates: updates, + SourceName: "executor_test", + }) + + executor.Init(mockDriver) + executor.Registered(mockDriver, nil, nil, nil) + + initialPodUpdate := kubelet.PodUpdate{ + Pods: []*api.Pod{}, + Op: kubelet.SET, + Source: executor.sourcename, + } + receivedInitialPodUpdate := false + select { + case m := <-updates: + update, ok := m.(kubelet.PodUpdate) + if ok { + if reflect.DeepEqual(initialPodUpdate, update) { + receivedInitialPodUpdate = true + } + } + case <-time.After(time.Second): + } + assert.Equal(t, true, receivedInitialPodUpdate, + "executor should have sent an initial PodUpdate "+ + "to the updates chan upon registration") + + assert.Equal(t, true, executor.isConnected(), "executor should be connected") + mockDriver.AssertExpectations(t) +} + +// TestExecutorDisconnect ensures that the executor thinks that it is not +// connected after a call to Disconnected has occured. +func TestExecutorDisconnect(t *testing.T) { + mockDriver := &MockExecutorDriver{} + executor := NewTestKubernetesExecutor() + + executor.Init(mockDriver) + executor.Registered(mockDriver, nil, nil, nil) + executor.Disconnected(mockDriver) + + assert.Equal(t, false, executor.isConnected(), + "executor should not be connected after Disconnected") + mockDriver.AssertExpectations(t) +} + +// TestExecutorReregister ensures that the executor thinks it is connected +// after a connection problem happens, followed by a call to Reregistered. +func TestExecutorReregister(t *testing.T) { + mockDriver := &MockExecutorDriver{} + executor := NewTestKubernetesExecutor() + + executor.Init(mockDriver) + executor.Registered(mockDriver, nil, nil, nil) + executor.Disconnected(mockDriver) + executor.Reregistered(mockDriver, nil) + + assert.Equal(t, true, executor.isConnected(), "executor should be connected") + mockDriver.AssertExpectations(t) +} + +// TestExecutorLaunchAndKillTask ensures that the executor is able to launch +// and kill tasks while properly bookkeping its tasks. +func TestExecutorLaunchAndKillTask(t *testing.T) { + // create a fake pod watch. We use that below to submit new pods to the scheduler + podListWatch := NewMockPodsListWatch(api.PodList{}) + + // create fake apiserver + testApiServer := NewTestServer(t, api.NamespaceDefault, &podListWatch.list) + defer testApiServer.server.Close() + + mockDriver := &MockExecutorDriver{} + updates := make(chan interface{}, 1024) + config := Config{ + Docker: dockertools.ConnectToDockerOrDie("fake://"), + Updates: updates, + APIClient: client.NewOrDie(&client.Config{ + Host: testApiServer.server.URL, + Version: testapi.Version(), + }), + Kubelet: &kubelet.Kubelet{}, + PodStatusFunc: func(kl *kubelet.Kubelet, pod *api.Pod) (*api.PodStatus, error) { + return &api.PodStatus{ + ContainerStatuses: []api.ContainerStatus{ + { + Name: "foo", + State: api.ContainerState{ + Running: &api.ContainerStateRunning{}, + }, + }, + }, + Phase: api.PodRunning, + }, nil + }, + } + executor := New(config) + + executor.Init(mockDriver) + executor.Registered(mockDriver, nil, nil, nil) + + select { + case <-updates: + case <-time.After(time.Second): + t.Fatalf("Executor should send an intial update on Registration") + } + + pod := NewTestPod(1) + podTask, err := podtask.New(api.NewDefaultContext(), "", + *pod, &mesosproto.ExecutorInfo{}) + assert.Equal(t, nil, err, "must be able to create a task from a pod") + + taskInfo := podTask.BuildTaskInfo() + data, err := testapi.Codec().Encode(pod) + assert.Equal(t, nil, err, "must be able to encode a pod's spec data") + taskInfo.Data = data + var statusUpdateCalls sync.WaitGroup + statusUpdateDone := func(_ mock.Arguments) { statusUpdateCalls.Done() } + + statusUpdateCalls.Add(1) + mockDriver.On( + "SendStatusUpdate", + mesosproto.TaskState_TASK_STARTING, + ).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(statusUpdateDone).Once() + + statusUpdateCalls.Add(1) + mockDriver.On( + "SendStatusUpdate", + mesosproto.TaskState_TASK_RUNNING, + ).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(statusUpdateDone).Once() + + executor.LaunchTask(mockDriver, taskInfo) + + assertext.EventuallyTrue(t, 5*time.Second, func() bool { + executor.lock.Lock() + defer executor.lock.Unlock() + return len(executor.tasks) == 1 && len(executor.pods) == 1 + }, "executor must be able to create a task and a pod") + + gotPodUpdate := false + select { + case m := <-updates: + update, ok := m.(kubelet.PodUpdate) + if ok && len(update.Pods) == 1 { + gotPodUpdate = true + } + case <-time.After(time.Second): + } + assert.Equal(t, true, gotPodUpdate, + "the executor should send an update about a new pod to "+ + "the updates chan when creating a new one.") + + // Allow some time for asynchronous requests to the driver. + finished := kmruntime.After(statusUpdateCalls.Wait) + select { + case <-finished: + case <-time.After(5 * time.Second): + t.Fatalf("timed out waiting for status update calls to finish") + } + + statusUpdateCalls.Add(1) + mockDriver.On( + "SendStatusUpdate", + mesosproto.TaskState_TASK_KILLED, + ).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(statusUpdateDone).Once() + + executor.KillTask(mockDriver, taskInfo.TaskId) + + assertext.EventuallyTrue(t, 5*time.Second, func() bool { + executor.lock.Lock() + defer executor.lock.Unlock() + return len(executor.tasks) == 0 && len(executor.pods) == 0 + }, "executor must be able to kill a created task and pod") + + // Allow some time for asynchronous requests to the driver. + finished = kmruntime.After(statusUpdateCalls.Wait) + select { + case <-finished: + case <-time.After(5 * time.Second): + t.Fatalf("timed out waiting for status update calls to finish") + } + mockDriver.AssertExpectations(t) +} + +// TestExecutorFrameworkMessage ensures that the executor is able to +// handle messages from the framework, specifically about lost tasks +// and Kamikaze. When a task is lost, the executor needs to clean up +// its state. When a Kamikaze message is received, the executor should +// attempt suicide. +func TestExecutorFrameworkMessage(t *testing.T) { + mockDriver := &MockExecutorDriver{} + kubeletFinished := make(chan struct{}) + config := Config{ + Docker: dockertools.ConnectToDockerOrDie("fake://"), + Updates: make(chan interface{}, 1024), + APIClient: client.NewOrDie(&client.Config{ + Host: "fakehost", + Version: testapi.Version(), + }), + ShutdownAlert: func() { + close(kubeletFinished) + }, + KubeletFinished: kubeletFinished, + } + executor := New(config) + + executor.Init(mockDriver) + executor.Registered(mockDriver, nil, nil, nil) + + executor.FrameworkMessage(mockDriver, "test framework message") + + // set up a pod to then lose + pod := NewTestPod(1) + podTask, _ := podtask.New(api.NewDefaultContext(), "foo", + *pod, &mesosproto.ExecutorInfo{}) + + taskInfo := podTask.BuildTaskInfo() + data, _ := testapi.Codec().Encode(pod) + taskInfo.Data = data + + executor.LaunchTask(mockDriver, taskInfo) + + // send task-lost message for it + called := make(chan struct{}) + mockDriver.On( + "SendStatusUpdate", + mesosproto.TaskState_TASK_LOST, + ).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(func(_ mock.Arguments) { close(called) }).Once() + + executor.FrameworkMessage(mockDriver, "task-lost:foo") + assertext.EventuallyTrue(t, 5*time.Second, func() bool { + executor.lock.Lock() + defer executor.lock.Unlock() + return len(executor.tasks) == 0 && len(executor.pods) == 0 + }, "executor must be able to kill a created task and pod") + + select { + case <-called: + case <-time.After(5 * time.Second): + t.Fatalf("timed out waiting for SendStatusUpdate") + } + + mockDriver.On("Stop").Return(mesosproto.Status_DRIVER_STOPPED, nil).Once() + + executor.FrameworkMessage(mockDriver, messages.Kamikaze) + assert.Equal(t, true, executor.isDone(), + "executor should have shut down after receiving a Kamikaze message") + + mockDriver.AssertExpectations(t) +} + +// Create a pod with a given index, requiring one port +func NewTestPod(i int) *api.Pod { + name := fmt.Sprintf("pod%d", i) + return &api.Pod{ + TypeMeta: api.TypeMeta{APIVersion: testapi.Version()}, + ObjectMeta: api.ObjectMeta{ + Name: name, + Namespace: api.NamespaceDefault, + SelfLink: testapi.SelfLink("pods", string(i)), + }, + Spec: api.PodSpec{ + Containers: []api.Container{ + { + Ports: []api.ContainerPort{ + { + ContainerPort: 8000 + i, + Protocol: api.ProtocolTCP, + }, + }, + }, + }, + }, + Status: api.PodStatus{ + Conditions: []api.PodCondition{ + { + Type: api.PodReady, + Status: api.ConditionTrue, + }, + }, + }, + } +} + +// Create mock of pods ListWatch, usually listening on the apiserver pods watch endpoint +type MockPodsListWatch struct { + ListWatch cache.ListWatch + fakeWatcher *watch.FakeWatcher + list api.PodList +} + +// A apiserver mock which partially mocks the pods API +type TestServer struct { + server *httptest.Server + Stats map[string]uint + lock sync.Mutex +} + +func NewTestServer(t *testing.T, namespace string, pods *api.PodList) *TestServer { + ts := TestServer{ + Stats: map[string]uint{}, + } + mux := http.NewServeMux() + + mux.HandleFunc(testapi.ResourcePath("bindings", namespace, ""), func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + }) + + ts.server = httptest.NewServer(mux) + return &ts +} + +func NewMockPodsListWatch(initialPodList api.PodList) *MockPodsListWatch { + lw := MockPodsListWatch{ + fakeWatcher: watch.NewFake(), + list: initialPodList, + } + lw.ListWatch = cache.ListWatch{ + WatchFunc: func(resourceVersion string) (watch.Interface, error) { + return lw.fakeWatcher, nil + }, + ListFunc: func() (runtime.Object, error) { + return &lw.list, nil + }, + } + return &lw +} + +// TestExecutorShutdown ensures that the executor properly shuts down +// when Shutdown is called. +func TestExecutorShutdown(t *testing.T) { + mockDriver := &MockExecutorDriver{} + kubeletFinished := make(chan struct{}) + var exitCalled int32 = 0 + config := Config{ + Docker: dockertools.ConnectToDockerOrDie("fake://"), + Updates: make(chan interface{}, 1024), + ShutdownAlert: func() { + close(kubeletFinished) + }, + KubeletFinished: kubeletFinished, + ExitFunc: func(_ int) { + atomic.AddInt32(&exitCalled, 1) + }, + } + executor := New(config) + + executor.Init(mockDriver) + executor.Registered(mockDriver, nil, nil, nil) + + mockDriver.On("Stop").Return(mesosproto.Status_DRIVER_STOPPED, nil).Once() + + executor.Shutdown(mockDriver) + + assert.Equal(t, false, executor.isConnected(), + "executor should not be connected after Shutdown") + assert.Equal(t, true, executor.isDone(), + "executor should be in Done state after Shutdown") + + select { + case <-executor.Done(): + default: + t.Fatal("done channel should be closed after shutdown") + } + + assert.Equal(t, true, atomic.LoadInt32(&exitCalled) > 0, + "the executor should call its ExitFunc when it is ready to close down") + + mockDriver.AssertExpectations(t) +} + +func TestExecutorsendFrameworkMessage(t *testing.T) { + mockDriver := &MockExecutorDriver{} + executor := NewTestKubernetesExecutor() + + executor.Init(mockDriver) + executor.Registered(mockDriver, nil, nil, nil) + + called := make(chan struct{}) + mockDriver.On( + "SendFrameworkMessage", + "foo bar baz", + ).Return(mesosproto.Status_DRIVER_RUNNING, nil).Run(func(_ mock.Arguments) { close(called) }).Once() + executor.sendFrameworkMessage(mockDriver, "foo bar baz") + + // guard against data race in mock driver between AssertExpectations and Called + select { + case <-called: // expected + case <-time.After(5 * time.Second): + t.Fatalf("expected call to SendFrameworkMessage") + } + mockDriver.AssertExpectations(t) +} diff --git a/contrib/mesos/pkg/executor/messages/doc.go b/contrib/mesos/pkg/executor/messages/doc.go new file mode 100644 index 00000000000..ac09f189b56 --- /dev/null +++ b/contrib/mesos/pkg/executor/messages/doc.go @@ -0,0 +1,18 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package messages exposes executor event/message names as constants. +package messages diff --git a/contrib/mesos/pkg/executor/messages/messages.go b/contrib/mesos/pkg/executor/messages/messages.go new file mode 100644 index 00000000000..bf3dd1a9888 --- /dev/null +++ b/contrib/mesos/pkg/executor/messages/messages.go @@ -0,0 +1,32 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package messages + +// messages that ship with TaskStatus objects + +const ( + ContainersDisappeared = "containers-disappeared" + CreateBindingFailure = "create-binding-failure" + CreateBindingSuccess = "create-binding-success" + ExecutorUnregistered = "executor-unregistered" + ExecutorShutdown = "executor-shutdown" + LaunchTaskFailed = "launch-task-failed" + TaskKilled = "task-killed" + UnmarshalTaskDataFailure = "unmarshal-task-data-failure" + TaskLostAck = "task-lost-ack" // executor acknowledgement of forwarded TASK_LOST framework message + Kamikaze = "kamikaze" +) diff --git a/contrib/mesos/pkg/executor/mock_test.go b/contrib/mesos/pkg/executor/mock_test.go new file mode 100644 index 00000000000..4b060024a52 --- /dev/null +++ b/contrib/mesos/pkg/executor/mock_test.go @@ -0,0 +1,81 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package executor + +import ( + "testing" + + "github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/dockertools" + "github.com/mesos/mesos-go/mesosproto" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" +) + +type MockExecutorDriver struct { + mock.Mock +} + +func (m *MockExecutorDriver) Start() (mesosproto.Status, error) { + args := m.Called() + return args.Get(0).(mesosproto.Status), args.Error(1) +} + +func (m *MockExecutorDriver) Stop() (mesosproto.Status, error) { + args := m.Called() + return args.Get(0).(mesosproto.Status), args.Error(1) +} + +func (m *MockExecutorDriver) Abort() (mesosproto.Status, error) { + args := m.Called() + return args.Get(0).(mesosproto.Status), args.Error(1) +} + +func (m *MockExecutorDriver) Join() (mesosproto.Status, error) { + args := m.Called() + return args.Get(0).(mesosproto.Status), args.Error(1) +} + +func (m *MockExecutorDriver) Run() (mesosproto.Status, error) { + args := m.Called() + return args.Get(0).(mesosproto.Status), args.Error(1) +} + +func (m *MockExecutorDriver) SendStatusUpdate(taskStatus *mesosproto.TaskStatus) (mesosproto.Status, error) { + args := m.Called(*taskStatus.State) + return args.Get(0).(mesosproto.Status), args.Error(1) +} + +func (m *MockExecutorDriver) SendFrameworkMessage(msg string) (mesosproto.Status, error) { + args := m.Called(msg) + return args.Get(0).(mesosproto.Status), args.Error(1) +} + +func NewTestKubernetesExecutor() *KubernetesExecutor { + return New(Config{ + Docker: dockertools.ConnectToDockerOrDie("fake://"), + Updates: make(chan interface{}, 1024), + }) +} + +func TestExecutorNew(t *testing.T) { + mockDriver := &MockExecutorDriver{} + executor := NewTestKubernetesExecutor() + executor.Init(mockDriver) + + assert.Equal(t, executor.isDone(), false, "executor should not be in Done state on initialization") + assert.Equal(t, executor.isConnected(), false, "executor should not be connected on initialization") +} diff --git a/contrib/mesos/pkg/executor/service/doc.go b/contrib/mesos/pkg/executor/service/doc.go new file mode 100644 index 00000000000..f915ee4239f --- /dev/null +++ b/contrib/mesos/pkg/executor/service/doc.go @@ -0,0 +1,18 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package service contains the cmd/k8sm-executor glue code. +package service diff --git a/contrib/mesos/pkg/executor/service/service.go b/contrib/mesos/pkg/executor/service/service.go new file mode 100644 index 00000000000..5de0da0db65 --- /dev/null +++ b/contrib/mesos/pkg/executor/service/service.go @@ -0,0 +1,600 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package service + +import ( + "bufio" + "fmt" + "io" + "math/rand" + "net" + "net/http" + "os" + "os/exec" + "strconv" + "strings" + "sync" + "time" + + "github.com/GoogleCloudPlatform/kubernetes/cmd/kubelet/app" + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/executor" + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/executor/config" + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/hyperkube" + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/redirfd" + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/runtime" + "github.com/GoogleCloudPlatform/kubernetes/pkg/api" + "github.com/GoogleCloudPlatform/kubernetes/pkg/client" + "github.com/GoogleCloudPlatform/kubernetes/pkg/credentialprovider" + "github.com/GoogleCloudPlatform/kubernetes/pkg/healthz" + "github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet" + "github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/cadvisor" + kconfig "github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/config" + "github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/dockertools" + "github.com/GoogleCloudPlatform/kubernetes/pkg/util" + "github.com/GoogleCloudPlatform/kubernetes/pkg/util/mount" + log "github.com/golang/glog" + "github.com/kardianos/osext" + bindings "github.com/mesos/mesos-go/executor" + + "github.com/spf13/pflag" +) + +const ( + // if we don't use this source then the kubelet will do funny, mirror things. + // @see ConfigSourceAnnotationKey + MESOS_CFG_SOURCE = kubelet.ApiserverSource +) + +type KubeletExecutorServer struct { + *app.KubeletServer + RunProxy bool + ProxyLogV int + ProxyExec string + ProxyLogfile string + ProxyBindall bool + SuicideTimeout time.Duration + ShutdownFD int + ShutdownFIFO string +} + +func NewKubeletExecutorServer() *KubeletExecutorServer { + k := &KubeletExecutorServer{ + KubeletServer: app.NewKubeletServer(), + RunProxy: true, + ProxyExec: "./kube-proxy", + ProxyLogfile: "./proxy-log", + SuicideTimeout: config.DefaultSuicideTimeout, + } + if pwd, err := os.Getwd(); err != nil { + log.Warningf("failed to determine current directory: %v", err) + } else { + k.RootDirectory = pwd // mesos sandbox dir + } + k.Address = util.IP(net.ParseIP(defaultBindingAddress())) + k.ShutdownFD = -1 // indicates unspecified FD + return k +} + +func NewHyperKubeletExecutorServer() *KubeletExecutorServer { + s := NewKubeletExecutorServer() + + // cache this for later use + binary, err := osext.Executable() + if err != nil { + log.Fatalf("failed to determine currently running executable: %v", err) + } + + s.ProxyExec = binary + return s +} + +func (s *KubeletExecutorServer) addCoreFlags(fs *pflag.FlagSet) { + s.KubeletServer.AddFlags(fs) + fs.BoolVar(&s.RunProxy, "run-proxy", s.RunProxy, "Maintain a running kube-proxy instance as a child proc of this kubelet-executor.") + fs.IntVar(&s.ProxyLogV, "proxy-logv", s.ProxyLogV, "Log verbosity of the child kube-proxy.") + fs.StringVar(&s.ProxyLogfile, "proxy-logfile", s.ProxyLogfile, "Path to the kube-proxy log file.") + fs.BoolVar(&s.ProxyBindall, "proxy-bindall", s.ProxyBindall, "When true will cause kube-proxy to bind to 0.0.0.0.") + fs.DurationVar(&s.SuicideTimeout, "suicide-timeout", s.SuicideTimeout, "Self-terminate after this period of inactivity. Zero disables suicide watch.") + fs.IntVar(&s.ShutdownFD, "shutdown-fd", s.ShutdownFD, "File descriptor used to signal shutdown to external watchers, requires shutdown-fifo flag") + fs.StringVar(&s.ShutdownFIFO, "shutdown-fifo", s.ShutdownFIFO, "FIFO used to signal shutdown to external watchers, requires shutdown-fd flag") +} + +func (s *KubeletExecutorServer) AddStandaloneFlags(fs *pflag.FlagSet) { + s.addCoreFlags(fs) + fs.StringVar(&s.ProxyExec, "proxy-exec", s.ProxyExec, "Path to the kube-proxy executable.") +} + +func (s *KubeletExecutorServer) AddHyperkubeFlags(fs *pflag.FlagSet) { + s.addCoreFlags(fs) +} + +// returns a Closer that should be closed to signal impending shutdown, but only if ShutdownFD +// and ShutdownFIFO were specified. if they are specified, then this func blocks until there's +// a reader on the FIFO stream. +func (s *KubeletExecutorServer) syncExternalShutdownWatcher() (io.Closer, error) { + if s.ShutdownFD == -1 || s.ShutdownFIFO == "" { + return nil, nil + } + // redirfd -w n fifo ... # (blocks until the fifo is read) + log.Infof("blocked, waiting for shutdown reader for FD %d FIFO at %s", s.ShutdownFD, s.ShutdownFIFO) + return redirfd.Write.Redirect(true, false, redirfd.FileDescriptor(s.ShutdownFD), s.ShutdownFIFO) +} + +// Run runs the specified KubeletExecutorServer. +func (s *KubeletExecutorServer) Run(hks hyperkube.Interface, _ []string) error { + rand.Seed(time.Now().UTC().UnixNano()) + + if err := util.ApplyOomScoreAdj(0, s.OOMScoreAdj); err != nil { + log.Info(err) + } + + var apiclient *client.Client + clientConfig, err := s.CreateAPIServerClientConfig() + if err == nil { + apiclient, err = client.New(clientConfig) + } + if err != nil { + // required for k8sm since we need to send api.Binding information + // back to the apiserver + log.Fatalf("No API client: %v", err) + } + + log.Infof("Using root directory: %v", s.RootDirectory) + credentialprovider.SetPreferredDockercfgPath(s.RootDirectory) + + shutdownCloser, err := s.syncExternalShutdownWatcher() + if err != nil { + return err + } + + cadvisorInterface, err := cadvisor.New(s.CadvisorPort) + if err != nil { + return err + } + + imageGCPolicy := kubelet.ImageGCPolicy{ + HighThresholdPercent: s.ImageGCHighThresholdPercent, + LowThresholdPercent: s.ImageGCLowThresholdPercent, + } + + diskSpacePolicy := kubelet.DiskSpacePolicy{ + DockerFreeDiskMB: s.LowDiskSpaceThresholdMB, + RootFreeDiskMB: s.LowDiskSpaceThresholdMB, + } + + //TODO(jdef) intentionally NOT initializing a cloud provider here since: + //(a) the kubelet doesn't actually use it + //(b) we don't need to create N-kubelet connections to zookeeper for no good reason + //cloud := cloudprovider.InitCloudProvider(s.CloudProvider, s.CloudConfigFile) + //log.Infof("Successfully initialized cloud provider: %q from the config file: %q\n", s.CloudProvider, s.CloudConfigFile) + + hostNetworkSources, err := kubelet.GetValidatedSources(strings.Split(s.HostNetworkSources, ",")) + if err != nil { + return err + } + + tlsOptions, err := s.InitializeTLS() + if err != nil { + return err + } + mounter := mount.New() + if s.Containerized { + log.V(2).Info("Running kubelet in containerized mode (experimental)") + mounter = &mount.NsenterMounter{} + } + + var dockerExecHandler dockertools.ExecHandler + switch s.DockerExecHandlerName { + case "native": + dockerExecHandler = &dockertools.NativeExecHandler{} + case "nsenter": + dockerExecHandler = &dockertools.NsenterExecHandler{} + default: + log.Warningf("Unknown Docker exec handler %q; defaulting to native", s.DockerExecHandlerName) + dockerExecHandler = &dockertools.NativeExecHandler{} + } + + kcfg := app.KubeletConfig{ + Address: s.Address, + AllowPrivileged: s.AllowPrivileged, + HostNetworkSources: hostNetworkSources, + HostnameOverride: s.HostnameOverride, + RootDirectory: s.RootDirectory, + // ConfigFile: "" + // ManifestURL: "" + // FileCheckFrequency + // HTTPCheckFrequency + PodInfraContainerImage: s.PodInfraContainerImage, + SyncFrequency: s.SyncFrequency, + RegistryPullQPS: s.RegistryPullQPS, + RegistryBurst: s.RegistryBurst, + MinimumGCAge: s.MinimumGCAge, + MaxPerPodContainerCount: s.MaxPerPodContainerCount, + MaxContainerCount: s.MaxContainerCount, + RegisterNode: s.RegisterNode, + ClusterDomain: s.ClusterDomain, + ClusterDNS: s.ClusterDNS, + Runonce: s.RunOnce, + Port: s.Port, + ReadOnlyPort: s.ReadOnlyPort, + CadvisorInterface: cadvisorInterface, + EnableServer: s.EnableServer, + EnableDebuggingHandlers: s.EnableDebuggingHandlers, + DockerClient: dockertools.ConnectToDockerOrDie(s.DockerEndpoint), + KubeClient: apiclient, + MasterServiceNamespace: s.MasterServiceNamespace, + VolumePlugins: app.ProbeVolumePlugins(), + NetworkPlugins: app.ProbeNetworkPlugins(), + NetworkPluginName: s.NetworkPluginName, + StreamingConnectionIdleTimeout: s.StreamingConnectionIdleTimeout, + TLSOptions: tlsOptions, + ImageGCPolicy: imageGCPolicy, + DiskSpacePolicy: diskSpacePolicy, + Cloud: nil, // TODO(jdef) Cloud, specifying null here because we don't want all kubelets polling mesos-master; need to account for this in the cloudprovider impl + NodeStatusUpdateFrequency: s.NodeStatusUpdateFrequency, + ResourceContainer: s.ResourceContainer, + CgroupRoot: s.CgroupRoot, + ContainerRuntime: s.ContainerRuntime, + Mounter: mounter, + DockerDaemonContainer: s.DockerDaemonContainer, + SystemContainer: s.SystemContainer, + ConfigureCBR0: s.ConfigureCBR0, + MaxPods: s.MaxPods, + DockerExecHandler: dockerExecHandler, + } + + err = app.RunKubelet(&kcfg, app.KubeletBuilder(func(kc *app.KubeletConfig) (app.KubeletBootstrap, *kconfig.PodConfig, error) { + return s.createAndInitKubelet(kc, hks, clientConfig, shutdownCloser) + })) + if err != nil { + return err + } + + if s.HealthzPort > 0 { + healthz.DefaultHealthz() + go util.Forever(func() { + err := http.ListenAndServe(net.JoinHostPort(s.HealthzBindAddress.String(), strconv.Itoa(s.HealthzPort)), nil) + if err != nil { + log.Errorf("Starting health server failed: %v", err) + } + }, 5*time.Second) + } + + // block until executor is shut down or commits shutdown + select {} +} + +func defaultBindingAddress() string { + libProcessIP := os.Getenv("LIBPROCESS_IP") + if libProcessIP == "" { + return "0.0.0.0" + } else { + return libProcessIP + } +} + +func (ks *KubeletExecutorServer) createAndInitKubelet( + kc *app.KubeletConfig, + hks hyperkube.Interface, + clientConfig *client.Config, + shutdownCloser io.Closer, +) (app.KubeletBootstrap, *kconfig.PodConfig, error) { + + // TODO(k8s): block until all sources have delivered at least one update to the channel, or break the sync loop + // up into "per source" synchronizations + // TODO(k8s): KubeletConfig.KubeClient should be a client interface, but client interface misses certain methods + // used by kubelet. Since NewMainKubelet expects a client interface, we need to make sure we are not passing + // a nil pointer to it when what we really want is a nil interface. + var kubeClient client.Interface + if kc.KubeClient == nil { + kubeClient = nil + } else { + kubeClient = kc.KubeClient + } + + gcPolicy := kubelet.ContainerGCPolicy{ + MinAge: kc.MinimumGCAge, + MaxPerPodContainer: kc.MaxPerPodContainerCount, + MaxContainers: kc.MaxContainerCount, + } + + pc := kconfig.NewPodConfig(kconfig.PodConfigNotificationSnapshotAndUpdates, kc.Recorder) + updates := pc.Channel(MESOS_CFG_SOURCE) + + klet, err := kubelet.NewMainKubelet( + kc.Hostname, + kc.DockerClient, + kubeClient, + kc.RootDirectory, + kc.PodInfraContainerImage, + kc.SyncFrequency, + float32(kc.RegistryPullQPS), + kc.RegistryBurst, + gcPolicy, + pc.SeenAllSources, + kc.RegisterNode, + kc.ClusterDomain, + net.IP(kc.ClusterDNS), + kc.MasterServiceNamespace, + kc.VolumePlugins, + kc.NetworkPlugins, + kc.NetworkPluginName, + kc.StreamingConnectionIdleTimeout, + kc.Recorder, + kc.CadvisorInterface, + kc.ImageGCPolicy, + kc.DiskSpacePolicy, + kc.Cloud, + kc.NodeStatusUpdateFrequency, + kc.ResourceContainer, + kc.OSInterface, + kc.CgroupRoot, + kc.ContainerRuntime, + kc.Mounter, + kc.DockerDaemonContainer, + kc.SystemContainer, + kc.ConfigureCBR0, + kc.MaxPods, + kc.DockerExecHandler, + ) + if err != nil { + return nil, nil, err + } + + //TODO(jdef) either configure Watch here with something useful, or else + // get rid of it from executor.Config + kubeletFinished := make(chan struct{}) + exec := executor.New(executor.Config{ + Kubelet: klet, + Updates: updates, + SourceName: MESOS_CFG_SOURCE, + APIClient: kc.KubeClient, + Docker: kc.DockerClient, + SuicideTimeout: ks.SuicideTimeout, + KubeletFinished: kubeletFinished, + ShutdownAlert: func() { + if shutdownCloser != nil { + if e := shutdownCloser.Close(); e != nil { + log.Warningf("failed to signal shutdown to external watcher: %v", e) + } + } + }, + ExitFunc: os.Exit, + PodStatusFunc: func(kl *kubelet.Kubelet, pod *api.Pod) (*api.PodStatus, error) { + return kl.GetRuntime().GetPodStatus(pod) + }, + }) + + k := &kubeletExecutor{ + Kubelet: klet, + runProxy: ks.RunProxy, + proxyLogV: ks.ProxyLogV, + proxyExec: ks.ProxyExec, + proxyLogfile: ks.ProxyLogfile, + proxyBindall: ks.ProxyBindall, + address: ks.Address, + dockerClient: kc.DockerClient, + hks: hks, + kubeletFinished: kubeletFinished, + executorDone: exec.Done(), + clientConfig: clientConfig, + } + + dconfig := bindings.DriverConfig{ + Executor: exec, + HostnameOverride: ks.HostnameOverride, + BindingAddress: net.IP(ks.Address), + } + if driver, err := bindings.NewMesosExecutorDriver(dconfig); err != nil { + log.Fatalf("failed to create executor driver: %v", err) + } else { + k.driver = driver + } + + log.V(2).Infof("Initialize executor driver...") + + k.BirthCry() + exec.Init(k.driver) + + k.StartGarbageCollection() + + return k, pc, nil +} + +// kubelet decorator +type kubeletExecutor struct { + *kubelet.Kubelet + initialize sync.Once + driver bindings.ExecutorDriver + runProxy bool + proxyLogV int + proxyExec string + proxyLogfile string + proxyBindall bool + address util.IP + dockerClient dockertools.DockerInterface + hks hyperkube.Interface + kubeletFinished chan struct{} // closed once kubelet.Run() returns + executorDone <-chan struct{} // from KubeletExecutor.Done() + clientConfig *client.Config +} + +func (kl *kubeletExecutor) ListenAndServe(address net.IP, port uint, tlsOptions *kubelet.TLSOptions, enableDebuggingHandlers bool) { + // this func could be called many times, depending how often the HTTP server crashes, + // so only execute certain initialization procs once + kl.initialize.Do(func() { + if kl.runProxy { + go runtime.Until(kl.runProxyService, 5*time.Second, kl.executorDone) + } + go func() { + if _, err := kl.driver.Run(); err != nil { + log.Fatalf("executor driver failed: %v", err) + } + log.Info("executor Run completed") + }() + }) + log.Infof("Starting kubelet server...") + kubelet.ListenAndServeKubeletServer(kl, address, port, tlsOptions, enableDebuggingHandlers) +} + +// this function blocks as long as the proxy service is running; intended to be +// executed asynchronously. +func (kl *kubeletExecutor) runProxyService() { + + log.Infof("Starting proxy process...") + + const KM_PROXY = "proxy" //TODO(jdef) constant should be shared with km package + args := []string{} + + if kl.hks.FindServer(KM_PROXY) { + args = append(args, KM_PROXY) + log.V(1).Infof("attempting to using km proxy service") + } else if _, err := os.Stat(kl.proxyExec); os.IsNotExist(err) { + log.Errorf("failed to locate proxy executable at '%v' and km not present: %v", kl.proxyExec, err) + return + } + + bindAddress := "0.0.0.0" + if !kl.proxyBindall { + bindAddress = kl.address.String() + } + args = append(args, + fmt.Sprintf("--bind-address=%s", bindAddress), + fmt.Sprintf("--v=%d", kl.proxyLogV), + "--logtostderr=true", + ) + + // add client.Config args here. proxy still calls client.BindClientConfigFlags + appendStringArg := func(name, value string) { + if value != "" { + args = append(args, fmt.Sprintf("--%s=%s", name, value)) + } + } + appendStringArg("master", kl.clientConfig.Host) + /* TODO(jdef) move these flags to a config file pointed to by --kubeconfig + appendStringArg("api-version", kl.clientConfig.Version) + appendStringArg("client-certificate", kl.clientConfig.CertFile) + appendStringArg("client-key", kl.clientConfig.KeyFile) + appendStringArg("certificate-authority", kl.clientConfig.CAFile) + args = append(args, fmt.Sprintf("--insecure-skip-tls-verify=%t", kl.clientConfig.Insecure)) + */ + + log.Infof("Spawning process executable %s with args '%+v'", kl.proxyExec, args) + + cmd := exec.Command(kl.proxyExec, args...) + if _, err := cmd.StdoutPipe(); err != nil { + log.Fatal(err) + } + + proxylogs, err := cmd.StderrPipe() + if err != nil { + log.Fatal(err) + } + + //TODO(jdef) append instead of truncate? what if the disk is full? + logfile, err := os.Create(kl.proxyLogfile) + if err != nil { + log.Fatal(err) + } + defer logfile.Close() + + ch := make(chan struct{}) + go func() { + defer func() { + select { + case <-ch: + log.Infof("killing proxy process..") + if err = cmd.Process.Kill(); err != nil { + log.Errorf("failed to kill proxy process: %v", err) + } + default: + } + }() + + writer := bufio.NewWriter(logfile) + defer writer.Flush() + + <-ch + written, err := io.Copy(writer, proxylogs) + if err != nil { + log.Errorf("error writing data to proxy log: %v", err) + } + + log.Infof("wrote %d bytes to proxy log", written) + }() + + // if the proxy fails to start then we exit the executor, otherwise + // wait for the proxy process to end (and release resources after). + if err := cmd.Start(); err != nil { + log.Fatal(err) + } + close(ch) + if err := cmd.Wait(); err != nil { + log.Error(err) + } +} + +// runs the main kubelet loop, closing the kubeletFinished chan when the loop exits. +// never returns. +func (kl *kubeletExecutor) Run(updates <-chan kubelet.PodUpdate) { + defer func() { + close(kl.kubeletFinished) + util.HandleCrash() + log.Infoln("kubelet run terminated") //TODO(jdef) turn down verbosity + // important: never return! this is in our contract + select {} + }() + + // push updates through a closable pipe. when the executor indicates shutdown + // via Done() we want to stop the Kubelet from processing updates. + pipe := make(chan kubelet.PodUpdate) + go func() { + // closing pipe will cause our patched kubelet's syncLoop() to exit + defer close(pipe) + pipeLoop: + for { + select { + case <-kl.executorDone: + break pipeLoop + default: + select { + case u := <-updates: + select { + case pipe <- u: // noop + case <-kl.executorDone: + break pipeLoop + } + case <-kl.executorDone: + break pipeLoop + } + } + } + }() + + // we expect that Run() will complete after the pipe is closed and the + // kubelet's syncLoop() has finished processing its backlog, which hopefully + // will not take very long. Peeking into the future (current k8s master) it + // seems that the backlog has grown from 1 to 50 -- this may negatively impact + // us going forward, time will tell. + util.Until(func() { kl.Kubelet.Run(pipe) }, 0, kl.executorDone) + + //TODO(jdef) revisit this if/when executor failover lands + err := kl.SyncPods([]*api.Pod{}, nil, nil, time.Now()) + if err != nil { + log.Errorf("failed to cleanly remove all pods and associated state: %v", err) + } +} diff --git a/contrib/mesos/pkg/hyperkube/doc.go b/contrib/mesos/pkg/hyperkube/doc.go new file mode 100644 index 00000000000..c20e34402b3 --- /dev/null +++ b/contrib/mesos/pkg/hyperkube/doc.go @@ -0,0 +1,21 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package hyperkube facilitates the combination of multiple +// kubernetes-mesos components into a single binary form, providing a +// simple mechanism for intra-component discovery as per the original +// Kubernetes hyperkube package. +package hyperkube diff --git a/contrib/mesos/pkg/hyperkube/types.go b/contrib/mesos/pkg/hyperkube/types.go new file mode 100644 index 00000000000..e255f893f4f --- /dev/null +++ b/contrib/mesos/pkg/hyperkube/types.go @@ -0,0 +1,54 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package hyperkube + +import ( + "github.com/spf13/pflag" +) + +var ( + nilKube = &nilKubeType{} +) + +type Interface interface { + // FindServer will find a specific server named name. + FindServer(name string) bool + + // The executable name, used for help and soft-link invocation + Name() string + + // Flags returns a flagset for "global" flags. + Flags() *pflag.FlagSet +} + +type nilKubeType struct{} + +func (n *nilKubeType) FindServer(_ string) bool { + return false +} + +func (n *nilKubeType) Name() string { + return "" +} + +func (n *nilKubeType) Flags() *pflag.FlagSet { + return nil +} + +func Nil() Interface { + return nilKube +} diff --git a/contrib/mesos/pkg/offers/doc.go b/contrib/mesos/pkg/offers/doc.go new file mode 100644 index 00000000000..03a76f3a3cb --- /dev/null +++ b/contrib/mesos/pkg/offers/doc.go @@ -0,0 +1,18 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package offers contains code that manages Mesos offers. +package offers diff --git a/contrib/mesos/pkg/offers/metrics/doc.go b/contrib/mesos/pkg/offers/metrics/doc.go new file mode 100644 index 00000000000..9660dff774c --- /dev/null +++ b/contrib/mesos/pkg/offers/metrics/doc.go @@ -0,0 +1,19 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package metrics defines and exposes instrumentation metrics related to +// Mesos offers. +package metrics diff --git a/contrib/mesos/pkg/offers/metrics/metrics.go b/contrib/mesos/pkg/offers/metrics/metrics.go new file mode 100644 index 00000000000..dbebf2f42ea --- /dev/null +++ b/contrib/mesos/pkg/offers/metrics/metrics.go @@ -0,0 +1,89 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "sync" + "time" + + "github.com/prometheus/client_golang/prometheus" +) + +const ( + offerSubsystem = "mesos_offers" +) + +type OfferDeclinedReason string + +const ( + OfferExpired = OfferDeclinedReason("expired") + OfferRescinded = OfferDeclinedReason("rescinded") + OfferCompat = OfferDeclinedReason("compat") +) + +var ( + OffersReceived = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Subsystem: offerSubsystem, + Name: "received", + Help: "Counter of offers received from Mesos broken out by slave host.", + }, + []string{"hostname"}, + ) + + OffersDeclined = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Subsystem: offerSubsystem, + Name: "declined", + Help: "Counter of offers declined by the framework broken out by slave host.", + }, + []string{"hostname", "reason"}, + ) + + OffersAcquired = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Subsystem: offerSubsystem, + Name: "acquired", + Help: "Counter of offers acquired for task launch broken out by slave host.", + }, + []string{"hostname"}, + ) + + OffersReleased = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Subsystem: offerSubsystem, + Name: "released", + Help: "Counter of previously-acquired offers later released, broken out by slave host.", + }, + []string{"hostname"}, + ) +) + +var registerMetrics sync.Once + +func Register() { + registerMetrics.Do(func() { + prometheus.MustRegister(OffersReceived) + prometheus.MustRegister(OffersDeclined) + prometheus.MustRegister(OffersAcquired) + prometheus.MustRegister(OffersReleased) + }) +} + +func InMicroseconds(d time.Duration) float64 { + return float64(d.Nanoseconds() / time.Microsecond.Nanoseconds()) +} diff --git a/contrib/mesos/pkg/offers/offers.go b/contrib/mesos/pkg/offers/offers.go new file mode 100644 index 00000000000..0df507d593d --- /dev/null +++ b/contrib/mesos/pkg/offers/offers.go @@ -0,0 +1,570 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package offers + +import ( + "fmt" + "reflect" + "sync" + "sync/atomic" + "time" + + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/offers/metrics" + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/proc" + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/queue" + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/runtime" + "github.com/GoogleCloudPlatform/kubernetes/pkg/client/cache" + "github.com/GoogleCloudPlatform/kubernetes/pkg/util" + log "github.com/golang/glog" + mesos "github.com/mesos/mesos-go/mesosproto" +) + +const ( + offerListenerMaxAge = 12 // max number of times we'll attempt to fit an offer to a listener before requiring them to re-register themselves + offerIdCacheTTL = 1 * time.Second // determines expiration of cached offer ids, used in listener notification + deferredDeclineTtlFactor = 2 // this factor, multiplied by the offer ttl, determines how long to wait before attempting to decline previously claimed offers that were subsequently deleted, then released. see offerStorage.Delete + notifyListenersDelay = 0 // delay between offer listener notification attempts +) + +type Filter func(*mesos.Offer) bool + +type Registry interface { + // Initialize the instance, spawning necessary housekeeping go routines. + Init(<-chan struct{}) + + // Add offers to this registry, rejecting those that are deemed incompatible. + Add([]*mesos.Offer) + + // Listen for arriving offers that are acceptable to the filter, sending + // a signal on (by closing) the returned channel. A listener will only + // ever be notified once, if at all. + Listen(id string, f Filter) <-chan struct{} + + // invoked when offers are rescinded or expired + Delete(string, metrics.OfferDeclinedReason) + + // when true, returns the offer that's registered for the given ID + Get(offerId string) (Perishable, bool) + + // iterate through non-expired offers in this registry + Walk(Walker) error + + // invalidate one or all (when offerId="") offers; offers are not declined, + // but are simply flagged as expired in the offer history + Invalidate(offerId string) + + // invalidate all offers associated with the slave identified by slaveId. + InvalidateForSlave(slaveId string) +} + +// callback that is invoked during a walk through a series of live offers, +// returning with stop=true (or err != nil) if the walk should stop permaturely. +type Walker func(offer Perishable) (stop bool, err error) + +type RegistryConfig struct { + DeclineOffer func(offerId string) <-chan error // tell Mesos that we're declining the offer + Compat func(*mesos.Offer) bool // returns true if offer is compatible; incompatible offers are declined + TTL time.Duration // determines a perishable offer's expiration deadline: now+ttl + LingerTTL time.Duration // if zero, offers will not linger in the FIFO past their expiration deadline + ListenerDelay time.Duration // specifies the sleep time between offer listener notifications +} + +type offerStorage struct { + RegistryConfig + offers *cache.FIFO // collection of Perishable, both live and expired + listeners *queue.DelayFIFO // collection of *offerListener + delayed *queue.DelayQueue // deadline-oriented offer-event queue + slaves *slaveStorage // slave to offer mappings +} + +type liveOffer struct { + *mesos.Offer + expiration time.Time + acquired int32 // 1 = acquired, 0 = free +} + +type expiredOffer struct { + offerSpec + deadline time.Time +} + +// subset of mesos.OfferInfo useful for recordkeeping +type offerSpec struct { + id string + hostname string +} + +// offers that may perish (all of them?) implement this interface. +// callers may expect to access these funcs concurrently so implementations +// must provide their own form of synchronization around mutable state. +type Perishable interface { + // returns true if this offer has expired + HasExpired() bool + // if not yet expired, return mesos offer details; otherwise nil + Details() *mesos.Offer + // mark this offer as acquired, returning true if it was previously unacquired. thread-safe. + Acquire() bool + // mark this offer as un-acquired. thread-safe. + Release() + // expire or delete this offer from storage + age(s *offerStorage) + // return a unique identifier for this offer + Id() string + // return the slave host for this offer + Host() string + addTo(*queue.DelayQueue) +} + +func (e *expiredOffer) addTo(q *queue.DelayQueue) { + q.Add(e) +} + +func (e *expiredOffer) Id() string { + return e.id +} + +func (e *expiredOffer) Host() string { + return e.hostname +} + +func (e *expiredOffer) HasExpired() bool { + return true +} + +func (e *expiredOffer) Details() *mesos.Offer { + return nil +} + +func (e *expiredOffer) Acquire() bool { + return false +} + +func (e *expiredOffer) Release() {} + +func (e *expiredOffer) age(s *offerStorage) { + log.V(3).Infof("Delete lingering offer: %v", e.id) + s.offers.Delete(e) + s.slaves.deleteOffer(e.id) +} + +// return the time left to linger +func (e *expiredOffer) GetDelay() time.Duration { + return e.deadline.Sub(time.Now()) +} + +func (to *liveOffer) HasExpired() bool { + return time.Now().After(to.expiration) +} + +func (to *liveOffer) Details() *mesos.Offer { + return to.Offer +} + +func (to *liveOffer) Acquire() (acquired bool) { + if acquired = atomic.CompareAndSwapInt32(&to.acquired, 0, 1); acquired { + metrics.OffersAcquired.WithLabelValues(to.Host()).Inc() + } + return +} + +func (to *liveOffer) Release() { + if released := atomic.CompareAndSwapInt32(&to.acquired, 1, 0); released { + metrics.OffersReleased.WithLabelValues(to.Host()).Inc() + } +} + +func (to *liveOffer) age(s *offerStorage) { + s.Delete(to.Id(), metrics.OfferExpired) +} + +func (to *liveOffer) Id() string { + return to.Offer.Id.GetValue() +} + +func (to *liveOffer) Host() string { + return to.Offer.GetHostname() +} + +func (to *liveOffer) addTo(q *queue.DelayQueue) { + q.Add(to) +} + +// return the time remaining before the offer expires +func (to *liveOffer) GetDelay() time.Duration { + return to.expiration.Sub(time.Now()) +} + +func CreateRegistry(c RegistryConfig) Registry { + metrics.Register() + return &offerStorage{ + RegistryConfig: c, + offers: cache.NewFIFO(cache.KeyFunc(func(v interface{}) (string, error) { + if perishable, ok := v.(Perishable); !ok { + return "", fmt.Errorf("expected perishable offer, not '%+v'", v) + } else { + return perishable.Id(), nil + } + })), + listeners: queue.NewDelayFIFO(), + delayed: queue.NewDelayQueue(), + slaves: newSlaveStorage(), + } +} + +func (s *offerStorage) declineOffer(offerId, hostname string, reason metrics.OfferDeclinedReason) { + //TODO(jdef) might be nice to spec an abort chan here + runtime.Signal(proc.OnError(s.DeclineOffer(offerId), func(err error) { + log.Warningf("decline failed for offer id %v: %v", offerId, err) + }, nil)).Then(func() { + metrics.OffersDeclined.WithLabelValues(hostname, string(reason)).Inc() + }) +} + +func (s *offerStorage) Add(offers []*mesos.Offer) { + now := time.Now() + for _, offer := range offers { + if !s.Compat(offer) { + //TODO(jdef) would be nice to batch these up + offerId := offer.Id.GetValue() + log.V(3).Infof("Declining incompatible offer %v", offerId) + s.declineOffer(offerId, offer.GetHostname(), metrics.OfferCompat) + return + } + timed := &liveOffer{ + Offer: offer, + expiration: now.Add(s.TTL), + acquired: 0, + } + log.V(3).Infof("Receiving offer %v", timed.Id()) + s.offers.Add(timed) + s.delayed.Add(timed) + s.slaves.add(offer.SlaveId.GetValue(), timed.Id()) + metrics.OffersReceived.WithLabelValues(timed.Host()).Inc() + } +} + +// delete an offer from storage, implicitly expires the offer +func (s *offerStorage) Delete(offerId string, reason metrics.OfferDeclinedReason) { + if offer, ok := s.Get(offerId); ok { + log.V(3).Infof("Deleting offer %v", offerId) + // attempt to block others from consuming the offer. if it's already been + // claimed and is not yet lingering then don't decline it - just mark it as + // expired in the history: allow a prior claimant to attempt to launch with it + notYetClaimed := offer.Acquire() + if offer.Details() != nil { + if notYetClaimed { + log.V(3).Infof("Declining offer %v", offerId) + s.declineOffer(offerId, offer.Host(), reason) + } else { + // some pod has acquired this and may attempt to launch a task with it + // failed schedule/launch attempts are requried to Release() any claims on the offer + + // TODO(jdef): not sure what a good value is here. the goal is to provide a + // launchTasks (driver) operation enough time to complete so that we don't end + // up declining an offer that we're actually attempting to use. + time.AfterFunc(deferredDeclineTtlFactor*s.TTL, func() { + // at this point the offer is in one of five states: + // a) permanently deleted: expired due to timeout + // b) permanently deleted: expired due to having been rescinded + // c) lingering: expired due to timeout + // d) lingering: expired due to having been rescinded + // e) claimed: task launched and it using resources from this offer + // we want to **avoid** declining an offer that's claimed: attempt to acquire + if offer.Acquire() { + // previously claimed offer was released, perhaps due to a launch + // failure, so we should attempt to decline + log.V(3).Infof("attempting to decline (previously claimed) offer %v", offerId) + s.declineOffer(offerId, offer.Host(), reason) + } + }) + } + } + s.expireOffer(offer) + } // else, ignore offers not in the history +} + +func (s *offerStorage) InvalidateForSlave(slaveId string) { + offerIds := s.slaves.deleteSlave(slaveId) + for oid := range offerIds { + s.invalidateOne(oid) + } +} + +// if offerId == "" then expire all known, live offers, otherwise only the offer indicated +func (s *offerStorage) Invalidate(offerId string) { + if offerId != "" { + s.invalidateOne(offerId) + return + } + obj := s.offers.List() + for _, o := range obj { + offer, ok := o.(Perishable) + if !ok { + log.Errorf("Expected perishable offer, not %v", o) + continue + } + offer.Acquire() // attempt to block others from using it + s.expireOffer(offer) + // don't decline, we already know that it's an invalid offer + } +} + +func (s *offerStorage) invalidateOne(offerId string) { + if offer, ok := s.Get(offerId); ok { + offer.Acquire() // attempt to block others from using it + s.expireOffer(offer) + // don't decline, we already know that it's an invalid offer + } +} + +// Walk the collection of offers. The walk stops either as indicated by the +// Walker or when the end of the offer list is reached. Expired offers are +// never passed to a Walker. +func (s *offerStorage) Walk(w Walker) error { + for _, v := range s.offers.List() { + offer, ok := v.(Perishable) + if !ok { + // offer disappeared... + continue + } + if offer.HasExpired() { + // never pass expired offers to walkers + continue + } + if stop, err := w(offer); err != nil { + return err + } else if stop { + return nil + } + } + return nil +} + +func Expired(offerId, hostname string, ttl time.Duration) *expiredOffer { + return &expiredOffer{offerSpec{id: offerId, hostname: hostname}, time.Now().Add(ttl)} +} + +func (s *offerStorage) expireOffer(offer Perishable) { + // the offer may or may not be expired due to TTL so check for details + // since that's a more reliable determinant of lingering status + if details := offer.Details(); details != nil { + // recently expired, should linger + offerId := details.Id.GetValue() + log.V(3).Infof("Expiring offer %v", offerId) + if s.LingerTTL > 0 { + log.V(3).Infof("offer will linger: %v", offerId) + expired := Expired(offerId, offer.Host(), s.LingerTTL) + s.offers.Update(expired) + s.delayed.Add(expired) + } else { + log.V(3).Infof("Permanently deleting offer %v", offerId) + s.offers.Delete(offerId) + s.slaves.deleteOffer(offerId) + } + } // else, it's still lingering... +} + +func (s *offerStorage) Get(id string) (Perishable, bool) { + if obj, ok, _ := s.offers.GetByKey(id); !ok { + return nil, false + } else { + to, ok := obj.(Perishable) + if !ok { + log.Errorf("invalid offer object in fifo '%v'", obj) + } + return to, ok + } +} + +type offerListener struct { + id string + accepts Filter + notify chan<- struct{} + age int + deadline time.Time + sawVersion uint64 +} + +func (l *offerListener) GetUID() string { + return l.id +} + +func (l *offerListener) Deadline() (time.Time, bool) { + return l.deadline, true +} + +// register a listener for new offers, whom we'll notify upon receiving such. +// notification is delivered in the form of closing the channel, nothing is ever sent. +func (s *offerStorage) Listen(id string, f Filter) <-chan struct{} { + if f == nil { + return nil + } + ch := make(chan struct{}) + listen := &offerListener{ + id: id, + accepts: f, + notify: ch, + deadline: time.Now().Add(s.ListenerDelay), + } + log.V(3).Infof("Registering offer listener %s", listen.id) + s.listeners.Offer(listen, queue.ReplaceExisting) + return ch +} + +func (s *offerStorage) ageOffers() { + offer, ok := s.delayed.Pop().(Perishable) + if !ok { + log.Errorf("Expected Perishable, not %v", offer) + return + } + if details := offer.Details(); details != nil && !offer.HasExpired() { + // live offer has not expired yet: timed out early + // FWIW: early timeouts are more frequent when GOMAXPROCS is > 1 + offer.addTo(s.delayed) + } else { + offer.age(s) + } +} + +func (s *offerStorage) nextListener() *offerListener { + obj := s.listeners.Pop() + if listen, ok := obj.(*offerListener); !ok { + //programming error + panic(fmt.Sprintf("unexpected listener object %v", obj)) + } else { + return listen + } +} + +// notify listeners if we find an acceptable offer for them. listeners +// are garbage collected after a certain age (see offerListenerMaxAge). +// ids lists offer IDs that are retrievable from offer storage. +func (s *offerStorage) notifyListeners(ids func() (util.StringSet, uint64)) { + listener := s.nextListener() // blocking + + offerIds, version := ids() + if listener.sawVersion == version { + // no changes to offer list, avoid growing older - just wait for new offers to arrive + listener.deadline = time.Now().Add(s.ListenerDelay) + s.listeners.Offer(listener, queue.KeepExisting) + return + } + listener.sawVersion = version + + // notify if we find an acceptable offer + for id := range offerIds { + if offer, ok := s.Get(id); !ok || offer.HasExpired() { + continue + } else if listener.accepts(offer.Details()) { + log.V(3).Infof("Notifying offer listener %s", listener.id) + close(listener.notify) + return + } + } + + // no interesting offers found, re-queue the listener + listener.age++ + if listener.age < offerListenerMaxAge { + listener.deadline = time.Now().Add(s.ListenerDelay) + s.listeners.Offer(listener, queue.KeepExisting) + } else { + // garbage collection is as simple as not re-adding the listener to the queue + log.V(3).Infof("garbage collecting offer listener %s", listener.id) + } +} + +func (s *offerStorage) Init(done <-chan struct{}) { + // zero delay, reap offers as soon as they expire + go runtime.Until(s.ageOffers, 0, done) + + // cached offer ids for the purposes of listener notification + idCache := &stringsCache{ + refill: func() util.StringSet { + result := util.NewStringSet() + for _, v := range s.offers.List() { + if offer, ok := v.(Perishable); ok { + result.Insert(offer.Id()) + } + } + return result + }, + ttl: offerIdCacheTTL, + } + + go runtime.Until(func() { s.notifyListeners(idCache.Strings) }, notifyListenersDelay, done) +} + +type stringsCache struct { + expiresAt time.Time + cached util.StringSet + ttl time.Duration + refill func() util.StringSet + version uint64 +} + +// not thread-safe +func (c *stringsCache) Strings() (util.StringSet, uint64) { + now := time.Now() + if c.expiresAt.Before(now) { + old := c.cached + c.cached = c.refill() + c.expiresAt = now.Add(c.ttl) + if !reflect.DeepEqual(old, c.cached) { + c.version++ + } + } + return c.cached, c.version +} + +type slaveStorage struct { + sync.Mutex + index map[string]string // map offerId to slaveId +} + +func newSlaveStorage() *slaveStorage { + return &slaveStorage{ + index: make(map[string]string), + } +} + +// create a mapping between a slave and an offer +func (self *slaveStorage) add(slaveId, offerId string) { + self.Lock() + defer self.Unlock() + self.index[offerId] = slaveId +} + +// delete the slave-offer mappings for slaveId, returns the IDs of the offers that were unmapped +func (self *slaveStorage) deleteSlave(slaveId string) util.StringSet { + offerIds := util.NewStringSet() + self.Lock() + defer self.Unlock() + for oid, sid := range self.index { + if sid == slaveId { + offerIds.Insert(oid) + delete(self.index, oid) + } + } + return offerIds +} + +// delete the slave-offer mappings for offerId +func (self *slaveStorage) deleteOffer(offerId string) { + self.Lock() + defer self.Unlock() + delete(self.index, offerId) +} diff --git a/contrib/mesos/pkg/offers/offers_test.go b/contrib/mesos/pkg/offers/offers_test.go new file mode 100644 index 00000000000..5b44eee5afc --- /dev/null +++ b/contrib/mesos/pkg/offers/offers_test.go @@ -0,0 +1,391 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package offers + +import ( + "errors" + "sync/atomic" + "testing" + "time" + + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/proc" + mesos "github.com/mesos/mesos-go/mesosproto" + util "github.com/mesos/mesos-go/mesosutil" +) + +func TestExpiredOffer(t *testing.T) { + t.Parallel() + + ttl := 2 * time.Second + o := Expired("test", "testhost", ttl) + + if o.Id() != "test" { + t.Error("expiredOffer does not return its Id") + } + if o.Host() != "testhost" { + t.Error("expiredOffer does not return its hostname") + } + if o.HasExpired() != true { + t.Error("expiredOffer is not expired") + } + if o.Details() != nil { + t.Error("expiredOffer does not return nil Details") + } + if o.Acquire() != false { + t.Error("expiredOffer must not be able to be acquired") + } + if delay := o.GetDelay(); !(0 < delay && delay <= ttl) { + t.Error("expiredOffer does not return a valid deadline") + } +} // TestExpiredOffer + +func TestTimedOffer(t *testing.T) { + t.Parallel() + + ttl := 2 * time.Second + now := time.Now() + o := &liveOffer{nil, now.Add(ttl), 0} + + if o.HasExpired() { + t.Errorf("offer ttl was %v and should not have expired yet", ttl) + } + if !o.Acquire() { + t.Fatal("1st acquisition of offer failed") + } + o.Release() + if !o.Acquire() { + t.Fatal("2nd acquisition of offer failed") + } + if o.Acquire() { + t.Fatal("3rd acquisition of offer passed but prior claim was not released") + } + o.Release() + if !o.Acquire() { + t.Fatal("4th acquisition of offer failed") + } + o.Release() + time.Sleep(ttl) + if !o.HasExpired() { + t.Fatal("offer not expired after ttl passed") + } + if !o.Acquire() { + t.Fatal("5th acquisition of offer failed; should not be tied to expiration") + } + if o.Acquire() { + t.Fatal("6th acquisition of offer succeeded; should already be acquired") + } +} // TestTimedOffer + +func TestOfferStorage(t *testing.T) { + ttl := time.Second / 4 + var declinedNum int32 + getDeclinedNum := func() int32 { return atomic.LoadInt32(&declinedNum) } + config := RegistryConfig{ + DeclineOffer: func(offerId string) <-chan error { + atomic.AddInt32(&declinedNum, 1) + return proc.ErrorChan(nil) + }, + Compat: func(o *mesos.Offer) bool { + return o.Hostname == nil || *o.Hostname != "incompatiblehost" + }, + TTL: ttl, + LingerTTL: 2 * ttl, + } + storage := CreateRegistry(config) + + done := make(chan struct{}) + storage.Init(done) + + // Add offer + id := util.NewOfferID("foo") + o := &mesos.Offer{Id: id} + storage.Add([]*mesos.Offer{o}) + + // Added offer should be in the storage + if obj, ok := storage.Get(id.GetValue()); obj == nil || !ok { + t.Error("offer not added") + } + if obj, _ := storage.Get(id.GetValue()); obj.Details() != o { + t.Error("added offer differs from returned offer") + } + + // Not-added offer is not in storage + if obj, ok := storage.Get("bar"); obj != nil || ok { + t.Error("offer bar should not exist in storage") + } + + // Deleted offer lingers in storage, is acquired and declined + offer, _ := storage.Get(id.GetValue()) + declinedNumBefore := getDeclinedNum() + storage.Delete(id.GetValue(), "deleted for test") + if obj, _ := storage.Get(id.GetValue()); obj == nil { + t.Error("deleted offer is not lingering") + } + if obj, _ := storage.Get(id.GetValue()); !obj.HasExpired() { + t.Error("deleted offer is no expired") + } + if ok := offer.Acquire(); ok { + t.Error("deleted offer can be acquired") + } + if getDeclinedNum() <= declinedNumBefore { + t.Error("deleted offer was not declined") + } + + // Acquired offer is only declined after 2*ttl + id = util.NewOfferID("foo2") + o = &mesos.Offer{Id: id} + storage.Add([]*mesos.Offer{o}) + offer, _ = storage.Get(id.GetValue()) + declinedNumBefore = getDeclinedNum() + offer.Acquire() + storage.Delete(id.GetValue(), "deleted for test") + if getDeclinedNum() > declinedNumBefore { + t.Error("acquired offer is declined") + } + + offer.Release() + time.Sleep(3 * ttl) + if getDeclinedNum() <= declinedNumBefore { + t.Error("released offer is not declined after 2*ttl") + } + + // Added offer should be expired after ttl, but lingering + id = util.NewOfferID("foo3") + o = &mesos.Offer{Id: id} + storage.Add([]*mesos.Offer{o}) + + time.Sleep(2 * ttl) + obj, ok := storage.Get(id.GetValue()) + if obj == nil || !ok { + t.Error("offer not lingering after ttl") + } + if !obj.HasExpired() { + t.Error("offer is not expired after ttl") + } + + // Should be deleted when waiting longer than LingerTTL + time.Sleep(2 * ttl) + if obj, ok := storage.Get(id.GetValue()); obj != nil || ok { + t.Error("offer not deleted after LingerTTL") + } + + // Incompatible offer is declined + id = util.NewOfferID("foo4") + incompatibleHostname := "incompatiblehost" + o = &mesos.Offer{Id: id, Hostname: &incompatibleHostname} + declinedNumBefore = getDeclinedNum() + storage.Add([]*mesos.Offer{o}) + if obj, ok := storage.Get(id.GetValue()); obj != nil || ok { + t.Error("incompatible offer not rejected") + } + if getDeclinedNum() <= declinedNumBefore { + t.Error("incompatible offer is not declined") + } + + // Invalidated offer are not declined, but expired + id = util.NewOfferID("foo5") + o = &mesos.Offer{Id: id} + storage.Add([]*mesos.Offer{o}) + offer, _ = storage.Get(id.GetValue()) + declinedNumBefore = getDeclinedNum() + storage.Invalidate(id.GetValue()) + if obj, _ := storage.Get(id.GetValue()); !obj.HasExpired() { + t.Error("invalidated offer is not expired") + } + if getDeclinedNum() > declinedNumBefore { + t.Error("invalidated offer is declined") + } + if ok := offer.Acquire(); ok { + t.Error("invalidated offer can be acquired") + } + + // Invalidate "" will invalidate all offers + id = util.NewOfferID("foo6") + o = &mesos.Offer{Id: id} + storage.Add([]*mesos.Offer{o}) + id2 := util.NewOfferID("foo7") + o2 := &mesos.Offer{Id: id2} + storage.Add([]*mesos.Offer{o2}) + storage.Invalidate("") + if obj, _ := storage.Get(id.GetValue()); !obj.HasExpired() { + t.Error("invalidated offer is not expired") + } + if obj2, _ := storage.Get(id2.GetValue()); !obj2.HasExpired() { + t.Error("invalidated offer is not expired") + } + + // InvalidateForSlave invalides all offers for that slave, but only those + id = util.NewOfferID("foo8") + slaveId := util.NewSlaveID("test-slave") + o = &mesos.Offer{Id: id, SlaveId: slaveId} + storage.Add([]*mesos.Offer{o}) + id2 = util.NewOfferID("foo9") + o2 = &mesos.Offer{Id: id2} + storage.Add([]*mesos.Offer{o2}) + storage.InvalidateForSlave(slaveId.GetValue()) + if obj, _ := storage.Get(id.GetValue()); !obj.HasExpired() { + t.Error("invalidated offer for test-slave is not expired") + } + if obj2, _ := storage.Get(id2.GetValue()); obj2.HasExpired() { + t.Error("invalidated offer another slave is expired") + } + + close(done) +} // TestOfferStorage + +func TestListen(t *testing.T) { + ttl := time.Second / 4 + config := RegistryConfig{ + DeclineOffer: func(offerId string) <-chan error { + return proc.ErrorChan(nil) + }, + Compat: func(o *mesos.Offer) bool { + return true + }, + TTL: ttl, + ListenerDelay: ttl / 2, + } + storage := CreateRegistry(config) + + done := make(chan struct{}) + storage.Init(done) + + // Create two listeners with a hostname filter + hostname1 := "hostname1" + hostname2 := "hostname2" + listener1 := storage.Listen("listener1", func(offer *mesos.Offer) bool { + return offer.GetHostname() == hostname1 + }) + listener2 := storage.Listen("listener2", func(offer *mesos.Offer) bool { + return offer.GetHostname() == hostname2 + }) + + // Add hostname1 offer + id := util.NewOfferID("foo") + o := &mesos.Offer{Id: id, Hostname: &hostname1} + storage.Add([]*mesos.Offer{o}) + + // listener1 is notified by closing channel + select { + case _, more := <-listener1: + if more { + t.Error("listener1 is not closed") + } + } + + // listener2 is not notified within ttl + select { + case <-listener2: + t.Error("listener2 is notified") + case <-time.After(ttl): + } + + close(done) +} // TestListen + +func TestWalk(t *testing.T) { + t.Parallel() + config := RegistryConfig{ + DeclineOffer: func(offerId string) <-chan error { + return proc.ErrorChan(nil) + }, + TTL: 0 * time.Second, + LingerTTL: 0 * time.Second, + ListenerDelay: 0 * time.Second, + } + storage := CreateRegistry(config) + acceptedOfferId := "" + walked := 0 + walker1 := func(p Perishable) (bool, error) { + walked++ + if p.Acquire() { + acceptedOfferId = p.Details().Id.GetValue() + return true, nil + } + return false, nil + } + // sanity check + err := storage.Walk(walker1) + if err != nil { + t.Fatalf("received impossible error %v", err) + } + if walked != 0 { + t.Fatal("walked empty storage") + } + if acceptedOfferId != "" { + t.Fatal("somehow found an offer when registry was empty") + } + impl, ok := storage.(*offerStorage) + if !ok { + t.Fatal("unexpected offer storage impl") + } + // single offer + ttl := 2 * time.Second + now := time.Now() + o := &liveOffer{&mesos.Offer{Id: util.NewOfferID("foo")}, now.Add(ttl), 0} + + impl.offers.Add(o) + err = storage.Walk(walker1) + if err != nil { + t.Fatalf("received impossible error %v", err) + } + if walked != 1 { + t.Fatalf("walk count %d", walked) + } + if acceptedOfferId != "foo" { + t.Fatalf("found offer %v", acceptedOfferId) + } + + acceptedOfferId = "" + err = storage.Walk(walker1) + if err != nil { + t.Fatalf("received impossible error %v", err) + } + if walked != 2 { + t.Fatalf("walk count %d", walked) + } + if acceptedOfferId != "" { + t.Fatalf("found offer %v", acceptedOfferId) + } + + walker2 := func(p Perishable) (bool, error) { + walked++ + return true, nil + } + err = storage.Walk(walker2) + if err != nil { + t.Fatalf("received impossible error %v", err) + } + if walked != 3 { + t.Fatalf("walk count %d", walked) + } + if acceptedOfferId != "" { + t.Fatalf("found offer %v", acceptedOfferId) + } + + walker3 := func(p Perishable) (bool, error) { + walked++ + return true, errors.New("baz") + } + err = storage.Walk(walker3) + if err == nil { + t.Fatal("expected error") + } + if walked != 4 { + t.Fatalf("walk count %d", walked) + } +} diff --git a/contrib/mesos/pkg/proc/doc.go b/contrib/mesos/pkg/proc/doc.go new file mode 100644 index 00000000000..ec3b4e0f80f --- /dev/null +++ b/contrib/mesos/pkg/proc/doc.go @@ -0,0 +1,19 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package proc provides opinionated utilities for processing background +// operations and future errors, somewhat inspired by libprocess. +package proc diff --git a/contrib/mesos/pkg/proc/errors.go b/contrib/mesos/pkg/proc/errors.go new file mode 100644 index 00000000000..c7fe0f442e6 --- /dev/null +++ b/contrib/mesos/pkg/proc/errors.go @@ -0,0 +1,34 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package proc + +import ( + "errors" +) + +var ( + errProcessTerminated = errors.New("cannot execute action because process has terminated") + errIllegalState = errors.New("illegal state, cannot execute action") +) + +func IsProcessTerminated(err error) bool { + return err == errProcessTerminated +} + +func IsIllegalState(err error) bool { + return err == errIllegalState +} diff --git a/contrib/mesos/pkg/proc/proc.go b/contrib/mesos/pkg/proc/proc.go new file mode 100644 index 00000000000..159e523961f --- /dev/null +++ b/contrib/mesos/pkg/proc/proc.go @@ -0,0 +1,377 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package proc + +import ( + "fmt" + "sync" + "sync/atomic" + "time" + + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/runtime" + log "github.com/golang/glog" +) + +const ( + // if the action processor crashes (if some Action panics) then we + // wait this long before spinning up the action processor again. + defaultActionHandlerCrashDelay = 100 * time.Millisecond + + // how many actions we can store in the backlog + defaultActionQueueDepth = 1024 +) + +type procImpl struct { + Config + backlog chan Action // action queue + terminate chan struct{} // signaled via close() + wg sync.WaitGroup // End() terminates when the wait is over + done runtime.Signal + state *stateType + pid uint32 + writeLock sync.Mutex // avoid data race between write and close of backlog + changed *sync.Cond // wait/signal for backlog changes + engine DoerFunc // isolated this for easier unit testing later on + running chan struct{} // closes once event loop processing starts + dead chan struct{} // closes upon completion of process termination +} + +type Config struct { + // cooldown period in between deferred action crashes + actionHandlerCrashDelay time.Duration + + // determines the size of the deferred action backlog + actionQueueDepth uint32 +} + +var ( + defaultConfig = Config{ + actionHandlerCrashDelay: defaultActionHandlerCrashDelay, + actionQueueDepth: defaultActionQueueDepth, + } + pid uint32 + closedErrChan <-chan error +) + +func init() { + ch := make(chan error) + close(ch) + closedErrChan = ch +} + +func New() Process { + return newConfigured(defaultConfig) +} + +func newConfigured(config Config) Process { + state := stateNew + pi := &procImpl{ + Config: config, + backlog: make(chan Action, config.actionQueueDepth), + terminate: make(chan struct{}), + state: &state, + pid: atomic.AddUint32(&pid, 1), + running: make(chan struct{}), + dead: make(chan struct{}), + } + pi.engine = DoerFunc(pi.doLater) + pi.changed = sync.NewCond(&pi.writeLock) + pi.wg.Add(1) // symmetrical to wg.Done() in End() + pi.done = pi.begin() + return pi +} + +// returns a chan that closes upon termination of the action processing loop +func (self *procImpl) Done() <-chan struct{} { + return self.done +} + +func (self *procImpl) Running() <-chan struct{} { + return self.running +} + +func (self *procImpl) begin() runtime.Signal { + if !self.state.transition(stateNew, stateRunning) { + panic(fmt.Errorf("failed to transition from New to Idle state")) + } + defer log.V(2).Infof("started process %d", self.pid) + var entered runtime.Latch + // execute actions on the backlog chan + return runtime.After(func() { + runtime.Until(func() { + if entered.Acquire() { + close(self.running) + self.wg.Add(1) + } + for action := range self.backlog { + select { + case <-self.terminate: + return + default: + // signal to indicate there's room in the backlog now + self.changed.Broadcast() + // rely on Until to handle action panics + action() + } + } + }, self.actionHandlerCrashDelay, self.terminate) + }).Then(func() { + log.V(2).Infof("finished processing action backlog for process %d", self.pid) + if !entered.Acquire() { + self.wg.Done() + } + }) +} + +// execute some action in the context of the current process. Actions +// executed via this func are to be executed in a concurrency-safe manner: +// no two actions should execute at the same time. invocations of this func +// should not block for very long, unless the action backlog is full or the +// process is terminating. +// returns errProcessTerminated if the process already ended. +func (self *procImpl) doLater(deferredAction Action) (err <-chan error) { + a := Action(func() { + self.wg.Add(1) + defer self.wg.Done() + deferredAction() + }) + + scheduled := false + self.writeLock.Lock() + defer self.writeLock.Unlock() + + for err == nil && !scheduled { + switch s := self.state.get(); s { + case stateRunning: + select { + case self.backlog <- a: + scheduled = true + default: + self.changed.Wait() + } + case stateTerminal: + err = ErrorChan(errProcessTerminated) + default: + err = ErrorChan(errIllegalState) + } + } + return +} + +// implementation of Doer interface, schedules some action to be executed via +// the current execution engine +func (self *procImpl) Do(a Action) <-chan error { + return self.engine(a) +} + +// spawn a goroutine that waits for an error. if a non-nil error is read from the +// channel then the handler func is invoked, otherwise (nil error or closed chan) +// the handler is skipped. if a nil handler is specified then it's not invoked. +// the signal chan that's returned closes once the error process logic (and handler, +// if any) has completed. +func OnError(ch <-chan error, f func(error), abort <-chan struct{}) <-chan struct{} { + return runtime.After(func() { + if ch == nil { + return + } + select { + case err, ok := <-ch: + if ok && err != nil && f != nil { + f(err) + } + case <-abort: + if f != nil { + f(errProcessTerminated) + } + } + }) +} + +func (self *procImpl) OnError(ch <-chan error, f func(error)) <-chan struct{} { + return OnError(ch, f, self.Done()) +} + +func (self *procImpl) flush() { + log.V(2).Infof("flushing action backlog for process %d", self.pid) + i := 0 + //TODO: replace with `for range self.backlog` once Go 1.3 support is dropped + for { + _, open := <-self.backlog + if !open { + break + } + i++ + } + log.V(2).Infof("flushed %d backlog actions for process %d", i, self.pid) +} + +func (self *procImpl) End() <-chan struct{} { + if self.state.transitionTo(stateTerminal, stateTerminal) { + go func() { + defer close(self.dead) + self.writeLock.Lock() + defer self.writeLock.Unlock() + + log.V(2).Infof("terminating process %d", self.pid) + + close(self.backlog) + close(self.terminate) + self.wg.Done() + self.changed.Broadcast() + + log.V(2).Infof("waiting for deferred actions to complete") + + // wait for all pending actions to complete, then flush the backlog + self.wg.Wait() + self.flush() + }() + } + return self.dead +} + +type errorOnce struct { + once sync.Once + err chan error + abort <-chan struct{} +} + +func NewErrorOnce(abort <-chan struct{}) ErrorOnce { + return &errorOnce{ + err: make(chan error, 1), + abort: abort, + } +} + +func (b *errorOnce) Err() <-chan error { + return b.err +} + +func (b *errorOnce) Reportf(msg string, args ...interface{}) { + b.Report(fmt.Errorf(msg, args...)) +} + +func (b *errorOnce) Report(err error) { + b.once.Do(func() { + select { + case b.err <- err: + default: + } + }) +} + +func (b *errorOnce) Send(errIn <-chan error) ErrorOnce { + go b.forward(errIn) + return b +} + +func (b *errorOnce) forward(errIn <-chan error) { + if errIn == nil { + b.Report(nil) + return + } + select { + case err, _ := <-errIn: + b.Report(err) + case <-b.abort: + b.Report(errProcessTerminated) + } +} + +type processAdapter struct { + parent Process + delegate Doer +} + +func (p *processAdapter) Do(a Action) <-chan error { + if p == nil || p.parent == nil || p.delegate == nil { + return ErrorChan(errIllegalState) + } + errCh := NewErrorOnce(p.Done()) + go func() { + errOuter := p.parent.Do(func() { + errInner := p.delegate.Do(a) + errCh.forward(errInner) + }) + // if the outer err is !nil then either the parent failed to schedule the + // the action, or else it backgrounded the scheduling task. + if errOuter != nil { + errCh.forward(errOuter) + } + }() + return errCh.Err() +} + +func (p *processAdapter) End() <-chan struct{} { + if p != nil && p.parent != nil { + return p.parent.End() + } + return nil +} + +func (p *processAdapter) Done() <-chan struct{} { + if p != nil && p.parent != nil { + return p.parent.Done() + } + return nil +} + +func (p *processAdapter) Running() <-chan struct{} { + if p != nil && p.parent != nil { + return p.parent.Running() + } + return nil +} + +func (p *processAdapter) OnError(ch <-chan error, f func(error)) <-chan struct{} { + if p != nil && p.parent != nil { + return p.parent.OnError(ch, f) + } + return nil +} + +// returns a process that, within its execution context, delegates to the specified Doer. +// if the given Doer instance is nil, a valid Process is still returned though calls to its +// Do() implementation will always return errIllegalState. +// if the given Process instance is nil then in addition to the behavior in the prior sentence, +// calls to End() and Done() are effectively noops. +func DoWith(other Process, d Doer) Process { + return &processAdapter{ + parent: other, + delegate: d, + } +} + +func ErrorChanf(msg string, args ...interface{}) <-chan error { + return ErrorChan(fmt.Errorf(msg, args...)) +} + +func ErrorChan(err error) <-chan error { + if err == nil { + return closedErrChan + } + ch := make(chan error, 1) + ch <- err + return ch +} + +// invoke the f on action a. returns an illegal state error if f is nil. +func (f DoerFunc) Do(a Action) <-chan error { + if f != nil { + return f(a) + } + return ErrorChan(errIllegalState) +} diff --git a/contrib/mesos/pkg/proc/proc_test.go b/contrib/mesos/pkg/proc/proc_test.go new file mode 100644 index 00000000000..31c034465f6 --- /dev/null +++ b/contrib/mesos/pkg/proc/proc_test.go @@ -0,0 +1,373 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package proc + +import ( + "fmt" + "sync" + "testing" + "time" + + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/runtime" + log "github.com/golang/glog" +) + +// logs a testing.Fatalf if the elapsed time d passes before signal chan done is closed +func fatalAfter(t *testing.T, done <-chan struct{}, d time.Duration, msg string, args ...interface{}) { + select { + case <-done: + case <-time.After(d): + t.Fatalf(msg, args...) + } +} + +func errorAfter(errOnce ErrorOnce, done <-chan struct{}, d time.Duration, msg string, args ...interface{}) { + select { + case <-done: + case <-time.After(d): + errOnce.Reportf(msg, args...) + } +} + +// logs a testing.Fatalf if the signal chan closes before the elapsed time d passes +func fatalOn(t *testing.T, done <-chan struct{}, d time.Duration, msg string, args ...interface{}) { + select { + case <-done: + t.Fatalf(msg, args...) + case <-time.After(d): + } +} + +func TestProc_manyEndings(t *testing.T) { + p := New() + const COUNT = 20 + var wg sync.WaitGroup + wg.Add(COUNT) + for i := 0; i < COUNT; i++ { + runtime.On(p.End(), wg.Done) + } + fatalAfter(t, runtime.After(wg.Wait), 5*time.Second, "timed out waiting for loose End()s") + fatalAfter(t, p.Done(), 5*time.Second, "timed out waiting for process death") +} + +func TestProc_singleAction(t *testing.T) { + p := New() + scheduled := make(chan struct{}) + called := make(chan struct{}) + + go func() { + log.Infof("do'ing deferred action") + defer close(scheduled) + err := p.Do(func() { + defer close(called) + log.Infof("deferred action invoked") + }) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + }() + + fatalAfter(t, scheduled, 5*time.Second, "timed out waiting for deferred action to be scheduled") + fatalAfter(t, called, 5*time.Second, "timed out waiting for deferred action to be invoked") + + p.End() + + fatalAfter(t, p.Done(), 5*time.Second, "timed out waiting for process death") +} + +func TestProc_singleActionEnd(t *testing.T) { + p := New() + scheduled := make(chan struct{}) + called := make(chan struct{}) + + go func() { + log.Infof("do'ing deferred action") + defer close(scheduled) + err := p.Do(func() { + defer close(called) + log.Infof("deferred action invoked") + p.End() + }) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + }() + + fatalAfter(t, scheduled, 5*time.Second, "timed out waiting for deferred action to be scheduled") + fatalAfter(t, called, 5*time.Second, "timed out waiting for deferred action to be invoked") + fatalAfter(t, p.Done(), 5*time.Second, "timed out waiting for process death") +} + +func TestProc_multiAction(t *testing.T) { + p := New() + const COUNT = 10 + var called sync.WaitGroup + called.Add(COUNT) + + // test FIFO property + next := 0 + for i := 0; i < COUNT; i++ { + log.Infof("do'ing deferred action %d", i) + idx := i + err := p.Do(func() { + defer called.Done() + log.Infof("deferred action invoked") + if next != idx { + t.Fatalf("expected index %d instead of %d", idx, next) + } + next++ + }) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + } + + fatalAfter(t, runtime.After(called.Wait), 2*time.Second, "timed out waiting for deferred actions to be invoked") + + p.End() + + fatalAfter(t, p.Done(), 5*time.Second, "timed out waiting for process death") +} + +func TestProc_goodLifecycle(t *testing.T) { + p := New() + p.End() + fatalAfter(t, p.Done(), 5*time.Second, "timed out waiting for process death") +} + +func TestProc_doWithDeadProc(t *testing.T) { + p := New() + p.End() + time.Sleep(100 * time.Millisecond) + + errUnexpected := fmt.Errorf("unexpected execution of delegated action") + decorated := DoWith(p, DoerFunc(func(_ Action) <-chan error { + return ErrorChan(errUnexpected) + })) + + decorated.Do(func() {}) + fatalAfter(t, decorated.Done(), 5*time.Second, "timed out waiting for process death") +} + +func TestProc_doWith(t *testing.T) { + p := New() + + delegated := false + decorated := DoWith(p, DoerFunc(func(a Action) <-chan error { + delegated = true + a() + return nil + })) + + executed := make(chan struct{}) + err := decorated.Do(func() { + defer close(executed) + if !delegated { + t.Fatalf("expected delegated execution") + } + }) + if err == nil { + t.Fatalf("expected !nil error chan") + } + + fatalAfter(t, executed, 5*time.Second, "timed out waiting deferred execution") + fatalAfter(t, decorated.OnError(err, func(e error) { + t.Fatalf("unexpected error: %v", err) + }), 1*time.Second, "timed out waiting for doer result") + + decorated.End() + fatalAfter(t, p.Done(), 5*time.Second, "timed out waiting for process death") +} + +func TestProc_doWithNestedTwice(t *testing.T) { + p := New() + + delegated := false + decorated := DoWith(p, DoerFunc(func(a Action) <-chan error { + a() + return nil + })) + + decorated2 := DoWith(decorated, DoerFunc(func(a Action) <-chan error { + delegated = true + a() + return nil + })) + + executed := make(chan struct{}) + err := decorated2.Do(func() { + defer close(executed) + if !delegated { + t.Fatalf("expected delegated execution") + } + }) + if err == nil { + t.Fatalf("expected !nil error chan") + } + + fatalAfter(t, executed, 5*time.Second, "timed out waiting deferred execution") + fatalAfter(t, decorated2.OnError(err, func(e error) { + t.Fatalf("unexpected error: %v", err) + }), 1*time.Second, "timed out waiting for doer result") + + decorated2.End() + fatalAfter(t, p.Done(), 5*time.Second, "timed out waiting for process death") +} + +func TestProc_doWithNestedErrorPropagation(t *testing.T) { + p := New() + + delegated := false + decorated := DoWith(p, DoerFunc(func(a Action) <-chan error { + a() + return nil + })) + + expectedErr := fmt.Errorf("expecting this") + errOnce := NewErrorOnce(p.Done()) + decorated2 := DoWith(decorated, DoerFunc(func(a Action) <-chan error { + delegated = true + a() + errOnce.Reportf("unexpected error in decorator2") + return ErrorChanf("another unexpected error in decorator2") + })) + + executed := make(chan struct{}) + err := decorated2.Do(func() { + defer close(executed) + if !delegated { + t.Fatalf("expected delegated execution") + } + errOnce.Report(expectedErr) + }) + if err == nil { + t.Fatalf("expected !nil error chan") + } + errOnce.Send(err) + + foundError := false + fatalAfter(t, executed, 1*time.Second, "timed out waiting deferred execution") + fatalAfter(t, decorated2.OnError(errOnce.Err(), func(e error) { + if e != expectedErr { + t.Fatalf("unexpected error: %v", err) + } else { + foundError = true + } + }), 1*time.Second, "timed out waiting for doer result") + + if !foundError { + t.Fatalf("expected a propagated error") + } + + decorated2.End() + fatalAfter(t, p.Done(), 5*time.Second, "timed out waiting for process death") +} + +func runDelegationTest(t *testing.T, p Process, name string, errOnce ErrorOnce) { + defer func() { + t.Logf("runDelegationTest finished at " + time.Now().String()) + }() + var decorated Process + decorated = p + + const DEPTH = 100 + var wg sync.WaitGroup + wg.Add(DEPTH) + y := 0 + + for x := 1; x <= DEPTH; x++ { + x := x + nextp := DoWith(decorated, DoerFunc(func(a Action) <-chan error { + if x == 1 { + t.Logf("delegate chain invoked for " + name) + } + y++ + if y != x { + return ErrorChanf("out of order delegated execution") + } + defer wg.Done() + a() + return nil + })) + decorated = nextp + } + + executed := make(chan struct{}) + errCh := decorated.Do(func() { + defer close(executed) + if y != DEPTH { + errOnce.Reportf("expected delegated execution") + } + t.Logf("executing deferred action: " + name + " at " + time.Now().String()) + errOnce.Send(nil) // we completed without error, let the listener know + }) + if errCh == nil { + t.Fatalf("expected !nil error chan") + } + + // forward any scheduling errors to the listener; NOTHING else should attempt to read + // from errCh after this point + errOnce.Send(errCh) + + errorAfter(errOnce, executed, 5*time.Second, "timed out waiting deferred execution") + t.Logf("runDelegationTest received executed signal at " + time.Now().String()) +} + +func TestProc_doWithNestedX(t *testing.T) { + t.Logf("starting test case at " + time.Now().String()) + p := New() + errOnce := NewErrorOnce(p.Done()) + runDelegationTest(t, p, "nested", errOnce) + <-p.End() + select { + case err := <-errOnce.Err(): + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + case <-time.After(5 * time.Second): + t.Fatalf("timed out waiting for doer result") + } + fatalAfter(t, p.Done(), 5*time.Second, "timed out waiting for process death") +} + +// intended to be run with -race +func TestProc_doWithNestedXConcurrent(t *testing.T) { + p := New() + errOnce := NewErrorOnce(p.Done()) + var wg sync.WaitGroup + const CONC = 20 + wg.Add(CONC) + for i := 0; i < CONC; i++ { + i := i + runtime.After(func() { runDelegationTest(t, p, fmt.Sprintf("nested%d", i), errOnce) }).Then(wg.Done) + } + ch := runtime.After(wg.Wait) + fatalAfter(t, ch, 10*time.Second, "timed out waiting for concurrent delegates") + + <-p.End() + + select { + case err := <-errOnce.Err(): + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + case <-time.After(5 * time.Second): + t.Fatalf("timed out waiting for doer result") + } + + fatalAfter(t, p.Done(), 5*time.Second, "timed out waiting for process death") +} diff --git a/contrib/mesos/pkg/proc/state.go b/contrib/mesos/pkg/proc/state.go new file mode 100644 index 00000000000..f35a2ea8382 --- /dev/null +++ b/contrib/mesos/pkg/proc/state.go @@ -0,0 +1,55 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package proc + +import ( + "sync/atomic" +) + +type stateType int32 + +const ( + stateNew stateType = iota + stateRunning + stateTerminal +) + +func (s *stateType) get() stateType { + return stateType(atomic.LoadInt32((*int32)(s))) +} + +func (s *stateType) transition(from, to stateType) bool { + return atomic.CompareAndSwapInt32((*int32)(s), int32(from), int32(to)) +} + +func (s *stateType) transitionTo(to stateType, unless ...stateType) bool { + if len(unless) == 0 { + atomic.StoreInt32((*int32)(s), int32(to)) + return true + } + for { + state := s.get() + for _, x := range unless { + if state == x { + return false + } + } + if s.transition(state, to) { + return true + } + } +} diff --git a/contrib/mesos/pkg/proc/types.go b/contrib/mesos/pkg/proc/types.go new file mode 100644 index 00000000000..d2cae458b15 --- /dev/null +++ b/contrib/mesos/pkg/proc/types.go @@ -0,0 +1,71 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package proc + +// something that executes in the context of a process +type Action func() + +type Context interface { + // end (terminate) the execution context + End() <-chan struct{} + + // return a signal chan that will close upon the termination of this process + Done() <-chan struct{} +} + +type Doer interface { + // execute some action in some context. actions are to be executed in a + // concurrency-safe manner: no two actions should execute at the same time. + // errors are generated if the action cannot be executed (not by the execution + // of the action) and should be testable with the error API of this package, + // for example, IsProcessTerminated. + Do(Action) <-chan error +} + +// adapter func for Doer interface +type DoerFunc func(Action) <-chan error + +type Process interface { + Context + Doer + + // see top level OnError func. this implementation will terminate upon the arrival of + // an error (and subsequently invoke the error handler, if given) or else the termination + // of the process (testable via IsProcessTerminated). + OnError(<-chan error, func(error)) <-chan struct{} + + // return a signal chan that will close once the process is ready to run actions + Running() <-chan struct{} +} + +// this is an error promise. if we ever start building out support for other promise types it will probably +// make sense to group them in some sort of "promises" package. +type ErrorOnce interface { + // return a chan that only ever sends one error, either obtained via Report() or Forward() + Err() <-chan error + + // reports the given error via Err(), but only if no other errors have been reported or forwarded + Report(error) + Reportf(string, ...interface{}) + + // waits for an error on the incoming chan, the result of which is later obtained via Err() (if no + // other errors have been reported or forwarded) + forward(<-chan error) + + // non-blocking, spins up a goroutine that reports an error (if any) that occurs on the error chan. + Send(<-chan error) ErrorOnce +} diff --git a/contrib/mesos/pkg/profile/doc.go b/contrib/mesos/pkg/profile/doc.go new file mode 100644 index 00000000000..041a3c914d8 --- /dev/null +++ b/contrib/mesos/pkg/profile/doc.go @@ -0,0 +1,18 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package profile contains reusable code for profiling Go programs with pprof. +package profile diff --git a/contrib/mesos/pkg/profile/profile.go b/contrib/mesos/pkg/profile/profile.go new file mode 100644 index 00000000000..a24fe8a07ae --- /dev/null +++ b/contrib/mesos/pkg/profile/profile.go @@ -0,0 +1,27 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package profile + +import "net/http" +import "net/http/pprof" + +func InstallHandler(m *http.ServeMux) { + // register similar endpoints as net/http/pprof.init() does + m.Handle("/debug/pprof/", http.HandlerFunc(pprof.Index)) + m.Handle("/debug/pprof/profile", http.HandlerFunc(pprof.Profile)) + m.Handle("/debug/pprof/symbol", http.HandlerFunc(pprof.Symbol)) +} diff --git a/contrib/mesos/pkg/queue/delay.go b/contrib/mesos/pkg/queue/delay.go new file mode 100644 index 00000000000..39e93281cd6 --- /dev/null +++ b/contrib/mesos/pkg/queue/delay.go @@ -0,0 +1,373 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package queue + +import ( + "container/heap" + "sync" + "time" + + "github.com/GoogleCloudPlatform/kubernetes/pkg/util" +) + +type qitem struct { + value interface{} + priority Priority + index int + readd func(item *qitem) // re-add the value of the item to the queue +} + +// A priorityQueue implements heap.Interface and holds qitems. +type priorityQueue []*qitem + +func (pq priorityQueue) Len() int { return len(pq) } + +func (pq priorityQueue) Less(i, j int) bool { + return pq[i].priority.ts.Before(pq[j].priority.ts) +} + +func (pq priorityQueue) Swap(i, j int) { + pq[i], pq[j] = pq[j], pq[i] + pq[i].index = i + pq[j].index = j +} + +func (pq *priorityQueue) Push(x interface{}) { + n := len(*pq) + item := x.(*qitem) + item.index = n + *pq = append(*pq, item) +} + +func (pq *priorityQueue) Pop() interface{} { + old := *pq + n := len(old) + item := old[n-1] + item.index = -1 // for safety + *pq = old[0 : n-1] + return item +} + +// concurrency-safe, deadline-oriented queue that returns items after their +// delay period has expired. +type DelayQueue struct { + queue priorityQueue + lock sync.RWMutex + cond sync.Cond +} + +func NewDelayQueue() *DelayQueue { + q := &DelayQueue{} + q.cond.L = &q.lock + return q +} + +func (q *DelayQueue) Add(d Delayed) { + deadline := extractFromDelayed(d) + + q.lock.Lock() + defer q.lock.Unlock() + + // readd using the original deadline computed from the original delay + var readd func(*qitem) + readd = func(qp *qitem) { + q.lock.Lock() + defer q.lock.Unlock() + heap.Push(&q.queue, &qitem{ + value: d, + priority: deadline, + readd: readd, + }) + q.cond.Broadcast() + } + heap.Push(&q.queue, &qitem{ + value: d, + priority: deadline, + readd: readd, + }) + q.cond.Broadcast() +} + +// If there's a deadline reported by d.Deadline() then `d` is added to the +// queue and this func returns true. +func (q *DelayQueue) Offer(d Deadlined) bool { + deadline, ok := extractFromDeadlined(d) + if ok { + q.lock.Lock() + defer q.lock.Unlock() + heap.Push(&q.queue, &qitem{ + value: d, + priority: deadline, + readd: func(qp *qitem) { + q.Offer(qp.value.(Deadlined)) + }, + }) + q.cond.Broadcast() + } + return ok +} + +// wait for the delay of the next item in the queue to expire, blocking if +// there are no items in the queue. does not guarantee first-come-first-serve +// ordering with respect to clients. +func (q *DelayQueue) Pop() interface{} { + // doesn't implement cancellation, will always return a non-nil value + return q.pop(func() *qitem { + q.lock.Lock() + defer q.lock.Unlock() + for q.queue.Len() == 0 { + q.cond.Wait() + } + x := heap.Pop(&q.queue) + item := x.(*qitem) + return item + }, nil) +} + +// returns a non-nil value from the queue, or else nil if/when cancelled; if cancel +// is nil then cancellation is disabled and this func must return a non-nil value. +func (q *DelayQueue) pop(next func() *qitem, cancel <-chan struct{}) interface{} { + var ch chan struct{} + for { + item := next() + if item == nil { + // cancelled + return nil + } + x := item.value + waitingPeriod := item.priority.ts.Sub(time.Now()) + if waitingPeriod >= 0 { + // listen for calls to Add() while we're waiting for the deadline + if ch == nil { + ch = make(chan struct{}, 1) + } + go func() { + q.lock.Lock() + defer q.lock.Unlock() + q.cond.Wait() + ch <- struct{}{} + }() + select { + case <-cancel: + item.readd(item) + return nil + case <-ch: + // we may no longer have the earliest deadline, re-try + item.readd(item) + continue + case <-time.After(waitingPeriod): + // noop + case <-item.priority.notify: + // noop + } + } + return x + } +} + +// If multiple adds/updates of a single item happen while an item is in the +// queue before it has been processed, it will only be processed once, and +// when it is processed, the most recent version will be processed. Items are +// popped in order of their priority, currently controlled by a delay or +// deadline assigned to each item in the queue. +type DelayFIFO struct { + // internal deadline-based priority queue + delegate *DelayQueue + // We depend on the property that items in the set are in the queue and vice versa. + items map[string]*qitem + deadlinePolicy DeadlinePolicy +} + +func (q *DelayFIFO) lock() { + q.delegate.lock.Lock() +} + +func (q *DelayFIFO) unlock() { + q.delegate.lock.Unlock() +} + +func (q *DelayFIFO) rlock() { + q.delegate.lock.RLock() +} + +func (q *DelayFIFO) runlock() { + q.delegate.lock.RUnlock() +} + +func (q *DelayFIFO) queue() *priorityQueue { + return &q.delegate.queue +} + +func (q *DelayFIFO) cond() *sync.Cond { + return &q.delegate.cond +} + +// Add inserts an item, and puts it in the queue. The item is only enqueued +// if it doesn't already exist in the set. +func (q *DelayFIFO) Add(d UniqueDelayed, rp ReplacementPolicy) { + deadline := extractFromDelayed(d) + id := d.GetUID() + var adder func(*qitem) + adder = func(*qitem) { + q.add(id, deadline, d, KeepExisting, adder) + } + q.add(id, deadline, d, rp, adder) +} + +func (q *DelayFIFO) Offer(d UniqueDeadlined, rp ReplacementPolicy) bool { + if deadline, ok := extractFromDeadlined(d); ok { + id := d.GetUID() + q.add(id, deadline, d, rp, func(qp *qitem) { q.Offer(qp.value.(UniqueDeadlined), KeepExisting) }) + return true + } + return false +} + +func (q *DelayFIFO) add(id string, deadline Priority, value interface{}, rp ReplacementPolicy, adder func(*qitem)) { + q.lock() + defer q.unlock() + if item, exists := q.items[id]; !exists { + item = &qitem{ + value: value, + priority: deadline, + readd: adder, + } + heap.Push(q.queue(), item) + q.items[id] = item + } else { + // this is an update of an existing item + item.value = rp.replacementValue(item.value, value) + item.priority = q.deadlinePolicy.nextDeadline(item.priority, deadline) + heap.Fix(q.queue(), item.index) + } + q.cond().Broadcast() +} + +// Delete removes an item. It doesn't add it to the queue, because +// this implementation assumes the consumer only cares about the objects, +// not their priority order. +func (f *DelayFIFO) Delete(id string) { + f.lock() + defer f.unlock() + delete(f.items, id) +} + +// List returns a list of all the items. +func (f *DelayFIFO) List() []UniqueID { + f.rlock() + defer f.runlock() + list := make([]UniqueID, 0, len(f.items)) + for _, item := range f.items { + list = append(list, item.value.(UniqueDelayed)) + } + return list +} + +// ContainedIDs returns a util.StringSet containing all IDs of the stored items. +// This is a snapshot of a moment in time, and one should keep in mind that +// other go routines can add or remove items after you call this. +func (c *DelayFIFO) ContainedIDs() util.StringSet { + c.rlock() + defer c.runlock() + set := util.StringSet{} + for id := range c.items { + set.Insert(id) + } + return set +} + +// Get returns the requested item, or sets exists=false. +func (f *DelayFIFO) Get(id string) (UniqueID, bool) { + f.rlock() + defer f.runlock() + if item, exists := f.items[id]; exists { + return item.value.(UniqueID), true + } + return nil, false +} + +// Variant of DelayQueue.Pop() for UniqueDelayed items +func (q *DelayFIFO) Await(timeout time.Duration) UniqueID { + cancel := make(chan struct{}) + ch := make(chan interface{}, 1) + go func() { ch <- q.pop(cancel) }() + var x interface{} + select { + case <-time.After(timeout): + close(cancel) + x = <-ch + case x = <-ch: + // noop + } + if x != nil { + return x.(UniqueID) + } + return nil +} + +// Variant of DelayQueue.Pop() for UniqueDelayed items +func (q *DelayFIFO) Pop() UniqueID { + return q.pop(nil).(UniqueID) +} + +// variant of DelayQueue.Pop that implements optional cancellation +func (q *DelayFIFO) pop(cancel chan struct{}) interface{} { + next := func() *qitem { + q.lock() + defer q.unlock() + for { + for q.queue().Len() == 0 { + signal := make(chan struct{}) + go func() { + defer close(signal) + q.cond().Wait() + }() + select { + case <-cancel: + // we may not have the lock yet, so + // broadcast to abort Wait, then + // return after lock re-acquisition + q.cond().Broadcast() + <-signal + return nil + case <-signal: + // we have the lock, re-check + // the queue for data... + } + } + x := heap.Pop(q.queue()) + item := x.(*qitem) + unique := item.value.(UniqueID) + uid := unique.GetUID() + if _, ok := q.items[uid]; !ok { + // item was deleted, keep looking + continue + } + delete(q.items, uid) + return item + } + } + return q.delegate.pop(next, cancel) +} + +func NewDelayFIFO() *DelayFIFO { + f := &DelayFIFO{ + delegate: NewDelayQueue(), + items: map[string]*qitem{}, + } + return f +} diff --git a/contrib/mesos/pkg/queue/delay_test.go b/contrib/mesos/pkg/queue/delay_test.go new file mode 100644 index 00000000000..df0ea940a07 --- /dev/null +++ b/contrib/mesos/pkg/queue/delay_test.go @@ -0,0 +1,406 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package queue + +import ( + "sync/atomic" + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +const ( + tolerance = 100 * time.Millisecond // go time delays aren't perfect, this is our tolerance for errors WRT expected timeouts +) + +func timedPriority(t time.Time) Priority { + return Priority{ts: t} +} + +func TestPQ(t *testing.T) { + t.Parallel() + + var pq priorityQueue + if pq.Len() != 0 { + t.Fatalf("pq should be empty") + } + + now := timedPriority(time.Now()) + now2 := timedPriority(now.ts.Add(2 * time.Second)) + pq.Push(&qitem{priority: now2}) + if pq.Len() != 1 { + t.Fatalf("pq.len should be 1") + } + x := pq.Pop() + if x == nil { + t.Fatalf("x is nil") + } + if pq.Len() != 0 { + t.Fatalf("pq should be empty") + } + item := x.(*qitem) + if !item.priority.Equal(now2) { + t.Fatalf("item.priority != now2") + } + + pq.Push(&qitem{priority: now2}) + pq.Push(&qitem{priority: now2}) + pq.Push(&qitem{priority: now2}) + pq.Push(&qitem{priority: now2}) + pq.Push(&qitem{priority: now2}) + pq.Pop() + pq.Pop() + pq.Pop() + pq.Pop() + pq.Pop() + if pq.Len() != 0 { + t.Fatalf("pq should be empty") + } + now4 := timedPriority(now.ts.Add(4 * time.Second)) + now6 := timedPriority(now.ts.Add(4 * time.Second)) + pq.Push(&qitem{priority: now2}) + pq.Push(&qitem{priority: now4}) + pq.Push(&qitem{priority: now6}) + pq.Swap(0, 2) + if !pq[0].priority.Equal(now6) || !pq[2].priority.Equal(now2) { + t.Fatalf("swap failed") + } + if pq.Less(1, 2) { + t.Fatalf("now4 < now2") + } +} + +func TestPopEmptyPQ(t *testing.T) { + t.Parallel() + defer func() { + if r := recover(); r == nil { + t.Fatalf("Expected panic from popping an empty PQ") + } + }() + var pq priorityQueue + pq.Pop() +} + +type testjob struct { + d time.Duration + t time.Time + deadline *time.Time + uid string + instance int +} + +func (j *testjob) GetDelay() time.Duration { + return j.d +} + +func (j testjob) GetUID() string { + return j.uid +} + +func (td *testjob) Deadline() (deadline time.Time, ok bool) { + if td.deadline != nil { + return *td.deadline, true + } else { + return time.Now(), false + } +} + +func TestDQ_sanity_check(t *testing.T) { + t.Parallel() + + dq := NewDelayQueue() + delay := 2 * time.Second + dq.Add(&testjob{d: delay}) + + before := time.Now() + x := dq.Pop() + + now := time.Now() + waitPeriod := now.Sub(before) + + if waitPeriod+tolerance < delay { + t.Fatalf("delay too short: %v, expected: %v", waitPeriod, delay) + } + if x == nil { + t.Fatalf("x is nil") + } + item := x.(*testjob) + if item.d != delay { + t.Fatalf("d != delay") + } +} + +func TestDQ_Offer(t *testing.T) { + t.Parallel() + assert := assert.New(t) + + dq := NewDelayQueue() + delay := time.Second + + added := dq.Offer(&testjob{}) + if added { + t.Fatalf("DelayQueue should not add offered job without deadline") + } + + deadline := time.Now().Add(delay) + added = dq.Offer(&testjob{deadline: &deadline}) + if !added { + t.Fatalf("DelayQueue should add offered job with deadline") + } + + before := time.Now() + x := dq.Pop() + + now := time.Now() + waitPeriod := now.Sub(before) + + if waitPeriod+tolerance < delay { + t.Fatalf("delay too short: %v, expected: %v", waitPeriod, delay) + } + assert.NotNil(x) + assert.Equal(x.(*testjob).deadline, &deadline) +} + +func TestDQ_ordered_add_pop(t *testing.T) { + t.Parallel() + + dq := NewDelayQueue() + dq.Add(&testjob{d: 2 * time.Second}) + dq.Add(&testjob{d: 1 * time.Second}) + dq.Add(&testjob{d: 3 * time.Second}) + + var finished [3]*testjob + before := time.Now() + idx := int32(-1) + ch := make(chan bool, 3) + //TODO: replace with `for range finished` once Go 1.3 support is dropped + for n := 0; n < len(finished); n++ { + go func() { + var ok bool + x := dq.Pop() + i := atomic.AddInt32(&idx, 1) + if finished[i], ok = x.(*testjob); !ok { + t.Fatalf("expected a *testjob, not %v", x) + } + finished[i].t = time.Now() + ch <- true + }() + } + <-ch + <-ch + <-ch + + after := time.Now() + totalDelay := after.Sub(before) + if totalDelay+tolerance < (3 * time.Second) { + t.Fatalf("totalDelay < 3s: %v", totalDelay) + } + for i, v := range finished { + if v == nil { + t.Fatalf("task %d was nil", i) + } + expected := time.Duration(i+1) * time.Second + if v.d != expected { + t.Fatalf("task %d had delay-priority %v, expected %v", i, v.d, expected) + } + actualDelay := v.t.Sub(before) + if actualDelay+tolerance < v.d { + t.Fatalf("task %d had actual-delay %v < expected delay %v", i, actualDelay, v.d) + } + } +} + +func TestDQ_always_pop_earliest_deadline(t *testing.T) { + t.Parallel() + + // add a testjob with delay of 2s + // spawn a func f1 that attempts to Pop() and wait for f1 to begin + // add a testjob with a delay of 1s + // check that the func f1 actually popped the 1s task (not the 2s task) + + dq := NewDelayQueue() + dq.Add(&testjob{d: 2 * time.Second}) + ch := make(chan *testjob) + started := make(chan bool) + + go func() { + started <- true + x := dq.Pop() + job := x.(*testjob) + job.t = time.Now() + ch <- job + }() + + <-started + time.Sleep(500 * time.Millisecond) // give plently of time for Pop() to enter + expected := 1 * time.Second + dq.Add(&testjob{d: expected}) + job := <-ch + + if expected != job.d { + t.Fatalf("Expected delay-prority of %v got instead got %v", expected, job.d) + } + + job = dq.Pop().(*testjob) + expected = 2 * time.Second + if expected != job.d { + t.Fatalf("Expected delay-prority of %v got instead got %v", expected, job.d) + } +} + +func TestDQ_always_pop_earliest_deadline_multi(t *testing.T) { + t.Parallel() + + dq := NewDelayQueue() + dq.Add(&testjob{d: 2 * time.Second}) + + ch := make(chan *testjob) + multi := 10 + started := make(chan bool, multi) + + go func() { + started <- true + for i := 0; i < multi; i++ { + x := dq.Pop() + job := x.(*testjob) + job.t = time.Now() + ch <- job + } + }() + + <-started + time.Sleep(500 * time.Millisecond) // give plently of time for Pop() to enter + expected := 1 * time.Second + + for i := 0; i < multi; i++ { + dq.Add(&testjob{d: expected}) + } + for i := 0; i < multi; i++ { + job := <-ch + if expected != job.d { + t.Fatalf("Expected delay-prority of %v got instead got %v", expected, job.d) + } + } + + job := dq.Pop().(*testjob) + expected = 2 * time.Second + if expected != job.d { + t.Fatalf("Expected delay-prority of %v got instead got %v", expected, job.d) + } +} + +func TestDQ_negative_delay(t *testing.T) { + t.Parallel() + + dq := NewDelayQueue() + delay := -2 * time.Second + dq.Add(&testjob{d: delay}) + + before := time.Now() + x := dq.Pop() + + now := time.Now() + waitPeriod := now.Sub(before) + + if waitPeriod > tolerance { + t.Fatalf("delay too long: %v, expected something less than: %v", waitPeriod, tolerance) + } + if x == nil { + t.Fatalf("x is nil") + } + item := x.(*testjob) + if item.d != delay { + t.Fatalf("d != delay") + } +} + +func TestDFIFO_sanity_check(t *testing.T) { + t.Parallel() + assert := assert.New(t) + + df := NewDelayFIFO() + delay := 2 * time.Second + df.Add(&testjob{d: delay, uid: "a", instance: 1}, ReplaceExisting) + assert.True(df.ContainedIDs().Has("a")) + + // re-add by ReplaceExisting + df.Add(&testjob{d: delay, uid: "a", instance: 2}, ReplaceExisting) + assert.True(df.ContainedIDs().Has("a")) + + a, ok := df.Get("a") + assert.True(ok) + assert.Equal(a.(*testjob).instance, 2) + + // re-add by KeepExisting + df.Add(&testjob{d: delay, uid: "a", instance: 3}, KeepExisting) + assert.True(df.ContainedIDs().Has("a")) + + a, ok = df.Get("a") + assert.True(ok) + assert.Equal(a.(*testjob).instance, 2) + + // pop last + before := time.Now() + x := df.Pop() + assert.Equal(a.(*testjob).instance, 2) + + now := time.Now() + waitPeriod := now.Sub(before) + + if waitPeriod+tolerance < delay { + t.Fatalf("delay too short: %v, expected: %v", waitPeriod, delay) + } + if x == nil { + t.Fatalf("x is nil") + } + item := x.(*testjob) + if item.d != delay { + t.Fatalf("d != delay") + } +} + +func TestDFIFO_Offer(t *testing.T) { + t.Parallel() + assert := assert.New(t) + + dq := NewDelayFIFO() + delay := time.Second + + added := dq.Offer(&testjob{instance: 1}, ReplaceExisting) + if added { + t.Fatalf("DelayFIFO should not add offered job without deadline") + } + + deadline := time.Now().Add(delay) + added = dq.Offer(&testjob{deadline: &deadline, instance: 2}, ReplaceExisting) + if !added { + t.Fatalf("DelayFIFO should add offered job with deadline") + } + + before := time.Now() + x := dq.Pop() + + now := time.Now() + waitPeriod := now.Sub(before) + + if waitPeriod+tolerance < delay { + t.Fatalf("delay too short: %v, expected: %v", waitPeriod, delay) + } + assert.NotNil(x) + assert.Equal(x.(*testjob).instance, 2) +} diff --git a/contrib/mesos/pkg/queue/doc.go b/contrib/mesos/pkg/queue/doc.go new file mode 100644 index 00000000000..c35bd971bc7 --- /dev/null +++ b/contrib/mesos/pkg/queue/doc.go @@ -0,0 +1,19 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package queue provides several queue implementations, originally +// inspired by Kubernetes pkg/client/cache/fifo. +package queue diff --git a/contrib/mesos/pkg/queue/historical.go b/contrib/mesos/pkg/queue/historical.go new file mode 100644 index 00000000000..09148acdb83 --- /dev/null +++ b/contrib/mesos/pkg/queue/historical.go @@ -0,0 +1,403 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package queue + +import ( + "fmt" + "reflect" + "sync" + "time" + + "github.com/GoogleCloudPlatform/kubernetes/pkg/util" +) + +type entry struct { + value UniqueCopyable + event EventType +} + +type deletedEntry struct { + *entry + expiration time.Time +} + +func (e *entry) Value() UniqueCopyable { + return e.value +} + +func (e *entry) Copy() Copyable { + if e == nil { + return nil + } + return &entry{e.value.Copy().(UniqueCopyable), e.event} +} + +func (e *entry) Is(types EventType) bool { + return types&e.event != 0 +} + +func (e *deletedEntry) Copy() Copyable { + if e == nil { + return nil + } + return &deletedEntry{e.entry.Copy().(*entry), e.expiration} +} + +// deliver a message +type pigeon func(msg Entry) + +func dead(msg Entry) { + // intentionally blank +} + +// HistoricalFIFO receives adds and updates from a Reflector, and puts them in a queue for +// FIFO order processing. If multiple adds/updates of a single item happen while +// an item is in the queue before it has been processed, it will only be +// processed once, and when it is processed, the most recent version will be +// processed. This can't be done with a channel. +type HistoricalFIFO struct { + lock sync.RWMutex + cond sync.Cond + items map[string]Entry // We depend on the property that items in the queue are in the set. + queue []string + carrier pigeon // may be dead, but never nil + gcc int + lingerTTL time.Duration +} + +// panics if obj doesn't implement UniqueCopyable; otherwise returns the same, typecast object +func checkType(obj interface{}) UniqueCopyable { + if v, ok := obj.(UniqueCopyable); !ok { + panic(fmt.Sprintf("Illegal object type, expected UniqueCopyable: %T", obj)) + } else { + return v + } +} + +// Add inserts an item, and puts it in the queue. The item is only enqueued +// if it doesn't already exist in the set. +func (f *HistoricalFIFO) Add(v interface{}) error { + obj := checkType(v) + notifications := []Entry(nil) + defer func() { + for _, e := range notifications { + f.carrier(e) + } + }() + + f.lock.Lock() + defer f.lock.Unlock() + + id := obj.GetUID() + if entry, exists := f.items[id]; !exists { + f.queue = append(f.queue, id) + } else { + if entry.Is(DELETE_EVENT | POP_EVENT) { + f.queue = append(f.queue, id) + } + } + notifications = f.merge(id, obj) + f.cond.Broadcast() + return nil +} + +// Update is the same as Add in this implementation. +func (f *HistoricalFIFO) Update(obj interface{}) error { + return f.Add(obj) +} + +// Delete removes an item. It doesn't add it to the queue, because +// this implementation assumes the consumer only cares about the objects, +// not the order in which they were created/added. +func (f *HistoricalFIFO) Delete(v interface{}) error { + obj := checkType(v) + deleteEvent := (Entry)(nil) + defer func() { + f.carrier(deleteEvent) + }() + + f.lock.Lock() + defer f.lock.Unlock() + id := obj.GetUID() + item, exists := f.items[id] + if exists && !item.Is(DELETE_EVENT) { + e := item.(*entry) + e.event = DELETE_EVENT + deleteEvent = &deletedEntry{e, time.Now().Add(f.lingerTTL)} + f.items[id] = deleteEvent + } + return nil +} + +// List returns a list of all the items. +func (f *HistoricalFIFO) List() []interface{} { + f.lock.RLock() + defer f.lock.RUnlock() + + // TODO(jdef): slightly overallocates b/c of deleted items + list := make([]interface{}, 0, len(f.queue)) + + for _, entry := range f.items { + if entry.Is(DELETE_EVENT | POP_EVENT) { + continue + } + list = append(list, entry.Value().Copy()) + } + return list +} + +// List returns a list of all the items. +func (f *HistoricalFIFO) ListKeys() []string { + f.lock.RLock() + defer f.lock.RUnlock() + + // TODO(jdef): slightly overallocates b/c of deleted items + list := make([]string, 0, len(f.queue)) + + for key, entry := range f.items { + if entry.Is(DELETE_EVENT | POP_EVENT) { + continue + } + list = append(list, key) + } + return list +} + +// ContainedIDs returns a util.StringSet containing all IDs of the stored items. +// This is a snapshot of a moment in time, and one should keep in mind that +// other go routines can add or remove items after you call this. +func (c *HistoricalFIFO) ContainedIDs() util.StringSet { + c.lock.RLock() + defer c.lock.RUnlock() + set := util.StringSet{} + for id, entry := range c.items { + if entry.Is(DELETE_EVENT | POP_EVENT) { + continue + } + set.Insert(id) + } + return set +} + +// Get returns the requested item, or sets exists=false. +func (f *HistoricalFIFO) Get(v interface{}) (interface{}, bool, error) { + obj := checkType(v) + return f.GetByKey(obj.GetUID()) +} + +// Get returns the requested item, or sets exists=false. +func (f *HistoricalFIFO) GetByKey(id string) (interface{}, bool, error) { + f.lock.RLock() + defer f.lock.RUnlock() + entry, exists := f.items[id] + if exists && !entry.Is(DELETE_EVENT|POP_EVENT) { + return entry.Value().Copy(), true, nil + } + return nil, false, nil +} + +// Get returns the requested item, or sets exists=false. +func (f *HistoricalFIFO) Poll(id string, t EventType) bool { + f.lock.RLock() + defer f.lock.RUnlock() + entry, exists := f.items[id] + return exists && entry.Is(t) +} + +// Variant of DelayQueue.Pop() for UniqueDelayed items +func (q *HistoricalFIFO) Await(timeout time.Duration) interface{} { + cancel := make(chan struct{}) + ch := make(chan interface{}, 1) + go func() { ch <- q.pop(cancel) }() + select { + case <-time.After(timeout): + close(cancel) + return <-ch + case x := <-ch: + return x + } +} +func (f *HistoricalFIFO) Pop() interface{} { + return f.pop(nil) +} + +func (f *HistoricalFIFO) pop(cancel chan struct{}) interface{} { + popEvent := (Entry)(nil) + defer func() { + f.carrier(popEvent) + }() + + f.lock.Lock() + defer f.lock.Unlock() + for { + for len(f.queue) == 0 { + signal := make(chan struct{}) + go func() { + defer close(signal) + f.cond.Wait() + }() + select { + case <-cancel: + // we may not have the lock yet, so + // broadcast to abort Wait, then + // return after lock re-acquisition + f.cond.Broadcast() + <-signal + return nil + case <-signal: + // we have the lock, re-check + // the queue for data... + } + } + id := f.queue[0] + f.queue = f.queue[1:] + item, ok := f.items[id] + if !ok || item.Is(DELETE_EVENT|POP_EVENT) { + // Item may have been deleted subsequently. + continue + } + value := item.Value() + popEvent = &entry{value, POP_EVENT} + f.items[id] = popEvent + return value.Copy() + } +} + +func (f *HistoricalFIFO) Replace(objs []interface{}) error { + notifications := make([]Entry, 0, len(objs)) + defer func() { + for _, e := range notifications { + f.carrier(e) + } + }() + + idToObj := make(map[string]interface{}) + for _, v := range objs { + obj := checkType(v) + idToObj[obj.GetUID()] = v + } + + f.lock.Lock() + defer f.lock.Unlock() + + f.queue = f.queue[:0] + now := time.Now() + for id, v := range f.items { + if _, exists := idToObj[id]; !exists && !v.Is(DELETE_EVENT) { + // a non-deleted entry in the items list that doesn't show up in the + // new list: mark it as deleted + ent := v.(*entry) + ent.event = DELETE_EVENT + e := &deletedEntry{ent, now.Add(f.lingerTTL)} + f.items[id] = e + notifications = append(notifications, e) + } + } + for id, v := range idToObj { + obj := checkType(v) + f.queue = append(f.queue, id) + n := f.merge(id, obj) + notifications = append(notifications, n...) + } + if len(f.queue) > 0 { + f.cond.Broadcast() + } + return nil +} + +// garbage collect DELETEd items whose TTL has expired; the IDs of such items are removed +// from the queue. This impl assumes that caller has acquired state lock. +func (f *HistoricalFIFO) gc() { + now := time.Now() + deleted := make(map[string]struct{}) + for id, v := range f.items { + if v.Is(DELETE_EVENT) { + ent := v.(*deletedEntry) + if ent.expiration.Before(now) { + delete(f.items, id) + deleted[id] = struct{}{} + } + } + } + // remove deleted items from the queue, will likely (slightly) overallocate here + queue := make([]string, 0, len(f.queue)) + for _, id := range f.queue { + if _, exists := deleted[id]; !exists { + queue = append(queue, id) + } + } + f.queue = queue +} + +// Assumes that the caller has acquired the state lock. +func (f *HistoricalFIFO) merge(id string, obj UniqueCopyable) (notifications []Entry) { + item, exists := f.items[id] + now := time.Now() + if !exists { + e := &entry{obj.Copy().(UniqueCopyable), ADD_EVENT} + f.items[id] = e + notifications = append(notifications, e) + } else { + if !item.Is(DELETE_EVENT) && item.Value().GetUID() != obj.GetUID() { + // hidden DELETE! + // (1) append a DELETE + // (2) append an ADD + // .. and notify listeners in that order + ent := item.(*entry) + ent.event = DELETE_EVENT + e1 := &deletedEntry{ent, now.Add(f.lingerTTL)} + e2 := &entry{obj.Copy().(UniqueCopyable), ADD_EVENT} + f.items[id] = e2 + notifications = append(notifications, e1, e2) + } else if !reflect.DeepEqual(obj, item.Value()) { + //TODO(jdef): it would be nice if we could rely on resource versions + //instead of doing a DeepEqual. Maybe someday we'll be able to. + e := &entry{obj.Copy().(UniqueCopyable), UPDATE_EVENT} + f.items[id] = e + notifications = append(notifications, e) + } + } + // check for garbage collection + f.gcc++ + if f.gcc%256 == 0 { //TODO(jdef): extract constant + f.gcc = 0 + f.gc() + } + return +} + +// NewHistorical returns a Store which can be used to queue up items to +// process. If a non-nil Mux is provided, then modifications to the +// the FIFO are delivered on a channel specific to this fifo. +func NewHistorical(ch chan<- Entry) FIFO { + carrier := dead + if ch != nil { + carrier = func(msg Entry) { + if msg != nil { + ch <- msg.Copy().(Entry) + } + } + } + f := &HistoricalFIFO{ + items: map[string]Entry{}, + queue: []string{}, + carrier: carrier, + lingerTTL: 5 * time.Minute, // TODO(jdef): extract constant + } + f.cond.L = &f.lock + return f +} diff --git a/contrib/mesos/pkg/queue/historical_test.go b/contrib/mesos/pkg/queue/historical_test.go new file mode 100644 index 00000000000..4477601beda --- /dev/null +++ b/contrib/mesos/pkg/queue/historical_test.go @@ -0,0 +1,191 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package queue + +import ( + "fmt" + "testing" + "time" +) + +type _int int +type _uint uint + +func (i _int) Copy() Copyable { + return i +} + +func (i _int) GetUID() string { + return fmt.Sprintf("INT%d", int(i)) +} + +func (i _uint) Copy() Copyable { + return i +} + +func (i _uint) GetUID() string { + return fmt.Sprintf("UINT%d", uint64(i)) +} + +type testObj struct { + id string + value int +} + +func (i *testObj) Copy() Copyable { + if i == nil { + return nil + } else { + return &testObj{i.id, i.value} + } +} + +func (i *testObj) GetUID() string { + return i.id +} + +func TestFIFO_basic(t *testing.T) { + f := NewHistorical(nil) + const amount = 500 + go func() { + for i := 0; i < amount; i++ { + f.Add(_int(i + 1)) + } + }() + go func() { + for u := uint(0); u < amount; u++ { + f.Add(_uint(u + 1)) + } + }() + + lastInt := _int(0) + lastUint := _uint(0) + for i := 0; i < amount*2; i++ { + switch obj := f.Pop().(type) { + case _int: + if obj <= lastInt { + t.Errorf("got %v (int) out of order, last was %v", obj, lastInt) + } + lastInt = obj + case _uint: + if obj <= lastUint { + t.Errorf("got %v (uint) out of order, last was %v", obj, lastUint) + } else { + lastUint = obj + } + default: + t.Fatalf("unexpected type %#v", obj) + } + } +} + +func TestFIFO_addUpdate(t *testing.T) { + f := NewHistorical(nil) + f.Add(&testObj{"foo", 10}) + f.Update(&testObj{"foo", 15}) + got := make(chan *testObj, 2) + go func() { + for { + got <- f.Pop().(*testObj) + } + }() + + first := <-got + if e, a := 15, first.value; e != a { + t.Errorf("Didn't get updated value (%v), got %v", e, a) + } + select { + case unexpected := <-got: + t.Errorf("Got second value %v", unexpected) + case <-time.After(50 * time.Millisecond): + } + _, exists, _ := f.GetByKey("foo") + if exists { + t.Errorf("item did not get removed") + } +} + +func TestFIFO_addReplace(t *testing.T) { + f := NewHistorical(nil) + f.Add(&testObj{"foo", 10}) + f.Replace([]interface{}{&testObj{"foo", 15}}) + got := make(chan *testObj, 2) + go func() { + for { + got <- f.Pop().(*testObj) + } + }() + + first := <-got + if e, a := 15, first.value; e != a { + t.Errorf("Didn't get updated value (%v), got %v", e, a) + } + select { + case unexpected := <-got: + t.Errorf("Got second value %v", unexpected) + case <-time.After(50 * time.Millisecond): + } + _, exists, _ := f.GetByKey("foo") + if exists { + t.Errorf("item did not get removed") + } +} + +func TestFIFO_detectLineJumpers(t *testing.T) { + f := NewHistorical(nil) + + f.Add(&testObj{"foo", 10}) + f.Add(&testObj{"bar", 1}) + f.Add(&testObj{"foo", 11}) + f.Add(&testObj{"foo", 13}) + f.Add(&testObj{"zab", 30}) + + err := error(nil) + done := make(chan struct{}) + go func() { + defer close(done) + if e, a := 13, f.Pop().(*testObj).value; a != e { + err = fmt.Errorf("expected %d, got %d", e, a) + return + } + + f.Add(&testObj{"foo", 14}) // ensure foo doesn't jump back in line + + if e, a := 1, f.Pop().(*testObj).value; a != e { + err = fmt.Errorf("expected %d, got %d", e, a) + return + } + + if e, a := 30, f.Pop().(*testObj).value; a != e { + err = fmt.Errorf("expected %d, got %d", e, a) + return + } + + if e, a := 14, f.Pop().(*testObj).value; a != e { + err = fmt.Errorf("expected %d, got %d", e, a) + return + } + }() + select { + case <-done: + if err != nil { + t.Fatal(err) + } + case <-time.After(1 * time.Second): + t.Fatal("Deadlocked unit test") + } +} diff --git a/contrib/mesos/pkg/queue/interface.go b/contrib/mesos/pkg/queue/interface.go new file mode 100644 index 00000000000..7191552bfbd --- /dev/null +++ b/contrib/mesos/pkg/queue/interface.go @@ -0,0 +1,103 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package queue + +import ( + "time" + + "github.com/GoogleCloudPlatform/kubernetes/pkg/client/cache" +) + +type EventType int + +const ( + ADD_EVENT EventType = 1 << iota + UPDATE_EVENT + DELETE_EVENT + POP_EVENT +) + +type Entry interface { + Copyable + Value() UniqueCopyable + // types is a logically OR'd combination of EventType, e.g. ADD_EVENT|UPDATE_EVENT + Is(types EventType) bool +} + +type Copyable interface { + // return an independent copy (deep clone) of the current object + Copy() Copyable +} + +type UniqueID interface { + GetUID() string +} + +type UniqueCopyable interface { + Copyable + UniqueID +} + +type FIFO interface { + cache.Store + + // Pop waits until an item is ready and returns it. If multiple items are + // ready, they are returned in the order in which they were added/updated. + // The item is removed from the queue (and the store) before it is returned, + // so if you don't succesfully process it, you need to add it back with Add(). + Pop() interface{} + + // Await attempts to Pop within the given interval; upon success the non-nil + // item is returned, otherwise nil + Await(timeout time.Duration) interface{} + + // Is there an entry for the id that matches the event mask? + Poll(id string, types EventType) bool +} + +type Delayed interface { + // return the remaining delay; a non-positive value indicates no delay + GetDelay() time.Duration +} + +type Deadlined interface { + // when ok, returns the time when this object should be activated/executed/evaluated + Deadline() (deadline time.Time, ok bool) +} + +// No objects are ever expected to be sent over this channel. References to BreakChan +// instances may be nil (always blocking). Signalling over this channel is performed by +// closing the channel. As such there can only ever be a single signal sent over the +// lifetime of the channel. +type BreakChan <-chan struct{} + +// an optional interface to be implemented by Delayed objects; returning a nil +// channel from Breaker() results in waiting the full delay duration +type Breakout interface { + // return a channel that signals early departure from a blocking delay + Breaker() BreakChan +} + +type UniqueDelayed interface { + UniqueID + Delayed +} + +type UniqueDeadlined interface { + UniqueID + Deadlined +} diff --git a/contrib/mesos/pkg/queue/policy.go b/contrib/mesos/pkg/queue/policy.go new file mode 100644 index 00000000000..5798aec927d --- /dev/null +++ b/contrib/mesos/pkg/queue/policy.go @@ -0,0 +1,70 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package queue + +// Decide whether a pre-existing deadline for an item in a delay-queue should be +// updated if an attempt is made to offer/add a new deadline for said item. Whether +// the deadline changes or not has zero impact on the data blob associated with the +// entry in the queue. +type DeadlinePolicy int + +const ( + PreferLatest DeadlinePolicy = iota + PreferEarliest +) + +// Decide whether a pre-existing data blob in a delay-queue should be replaced if an +// an attempt is made to add/offer a new data blob in its place. Whether the data is +// replaced has no bearing on the deadline (priority) of the item in the queue. +type ReplacementPolicy int + +const ( + KeepExisting ReplacementPolicy = iota + ReplaceExisting +) + +func (rp ReplacementPolicy) replacementValue(original, replacement interface{}) (result interface{}) { + switch rp { + case KeepExisting: + result = original + case ReplaceExisting: + fallthrough + default: + result = replacement + } + return +} + +func (dp DeadlinePolicy) nextDeadline(a, b Priority) (result Priority) { + switch dp { + case PreferEarliest: + if a.ts.Before(b.ts) { + result = a + } else { + result = b + } + case PreferLatest: + fallthrough + default: + if a.ts.After(b.ts) { + result = a + } else { + result = b + } + } + return +} diff --git a/contrib/mesos/pkg/queue/priority.go b/contrib/mesos/pkg/queue/priority.go new file mode 100644 index 00000000000..f2ccb8b735e --- /dev/null +++ b/contrib/mesos/pkg/queue/priority.go @@ -0,0 +1,56 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package queue + +import ( + "time" +) + +type Priority struct { + ts time.Time // timestamp + notify BreakChan // notification channel +} + +func (p Priority) Equal(other Priority) bool { + return p.ts.Equal(other.ts) && p.notify == other.notify +} + +func extractFromDelayed(d Delayed) Priority { + deadline := time.Now().Add(d.GetDelay()) + breaker := BreakChan(nil) + if breakout, good := d.(Breakout); good { + breaker = breakout.Breaker() + } + return Priority{ + ts: deadline, + notify: breaker, + } +} + +func extractFromDeadlined(d Deadlined) (Priority, bool) { + if ts, ok := d.Deadline(); ok { + breaker := BreakChan(nil) + if breakout, good := d.(Breakout); good { + breaker = breakout.Breaker() + } + return Priority{ + ts: ts, + notify: breaker, + }, true + } + return Priority{}, false +} diff --git a/contrib/mesos/pkg/redirfd/doc.go b/contrib/mesos/pkg/redirfd/doc.go new file mode 100644 index 00000000000..1092ad941d4 --- /dev/null +++ b/contrib/mesos/pkg/redirfd/doc.go @@ -0,0 +1,19 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Some file descriptor manipulation funcs (Unix-Only), inspired by +// https://github.com/skarnet/execline/blob/master/src/execline/redirfd.c +package redirfd diff --git a/contrib/mesos/pkg/redirfd/file_descriptor.go b/contrib/mesos/pkg/redirfd/file_descriptor.go new file mode 100644 index 00000000000..2c717e15c9f --- /dev/null +++ b/contrib/mesos/pkg/redirfd/file_descriptor.go @@ -0,0 +1,41 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package redirfd + +import ( + "fmt" + "strconv" +) + +// FileDescriptor mirrors unix-specific indexes for cross-platform use +type FileDescriptor int + +const ( + InvalidFD FileDescriptor = -1 + Stdin FileDescriptor = 0 + Stdout FileDescriptor = 1 + Stderr FileDescriptor = 2 +) + +// ParseFileDescriptor parses a string formatted file descriptor +func ParseFileDescriptor(fdstr string) (FileDescriptor, error) { + fdint, err := strconv.Atoi(fdstr) + if err != nil { + return InvalidFD, fmt.Errorf("file descriptor must be an integer: %q", fdstr) + } + return FileDescriptor(fdint), nil +} diff --git a/contrib/mesos/pkg/redirfd/file_descriptor_test.go b/contrib/mesos/pkg/redirfd/file_descriptor_test.go new file mode 100644 index 00000000000..787f2294455 --- /dev/null +++ b/contrib/mesos/pkg/redirfd/file_descriptor_test.go @@ -0,0 +1,54 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package redirfd + +import ( + "testing" + + . "github.com/onsi/gomega" +) + +func TestParseFileDescriptor(t *testing.T) { + RegisterTestingT(t) + + valid := map[string]FileDescriptor{ + "-1": InvalidFD, + "0": Stdin, + "1": Stdout, + "2": Stderr, + "3": FileDescriptor(3), + } + + for input, expected := range valid { + fd, err := ParseFileDescriptor(input) + Expect(err).ToNot(HaveOccurred(), "Input: '%s'", input) + Expect(fd).To(Equal(expected), "Input: '%s'", input) + } + + invalid := []string{ + "a", + " 1", + "blue", + "stderr", + "STDERR", + } + + for _, input := range invalid { + _, err := ParseFileDescriptor(input) + Expect(err).To(HaveOccurred(), "Input: '%s'", input) + } +} diff --git a/contrib/mesos/pkg/redirfd/redirfd_unix.go b/contrib/mesos/pkg/redirfd/redirfd_unix.go new file mode 100644 index 00000000000..a2159e1c98e --- /dev/null +++ b/contrib/mesos/pkg/redirfd/redirfd_unix.go @@ -0,0 +1,208 @@ +// +build !windows + +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package redirfd + +import ( + "fmt" + "os" + "syscall" +) + +type RedirectMode int + +const ( + Read RedirectMode = iota // open file for reading + Write // open file for writing, truncating if it exists + Update // open file for read & write + Append // open file for append, create if it does not exist + AppendExisting // open file for append, do not create if it does not already exist + WriteNew // open file for writing, creating it, failing if it already exists +) + +// see https://github.com/skarnet/execline/blob/master/src/execline/redirfd.c +func (mode RedirectMode) Redirect(nonblock, changemode bool, fd FileDescriptor, name string) (*os.File, error) { + flags := 0 + what := -1 + + switch mode { + case Read: + what = syscall.O_RDONLY + flags &= ^(syscall.O_APPEND | syscall.O_CREAT | syscall.O_TRUNC | syscall.O_EXCL) + case Write: + what = syscall.O_WRONLY + flags |= syscall.O_CREAT | syscall.O_TRUNC + flags &= ^(syscall.O_APPEND | syscall.O_EXCL) + case Update: + what = syscall.O_RDWR + flags &= ^(syscall.O_APPEND | syscall.O_CREAT | syscall.O_TRUNC | syscall.O_EXCL) + case Append: + what = syscall.O_WRONLY + flags |= syscall.O_CREAT | syscall.O_APPEND + flags &= ^(syscall.O_TRUNC | syscall.O_EXCL) + case AppendExisting: + what = syscall.O_WRONLY + flags |= syscall.O_APPEND + flags &= ^(syscall.O_CREAT | syscall.O_TRUNC | syscall.O_EXCL) + case WriteNew: + what = syscall.O_WRONLY + flags |= syscall.O_CREAT | syscall.O_EXCL + flags &= ^(syscall.O_APPEND | syscall.O_TRUNC) + default: + return nil, fmt.Errorf("unexpected mode %d", mode) + } + if nonblock { + flags |= syscall.O_NONBLOCK + } + flags |= what + + fd2, e := open(name, flags, 0666) + if (what == syscall.O_WRONLY) && (e == syscall.ENXIO) { + // Opens file in read-only, non-blocking mode. Returns a valid fd number if it succeeds, or -1 (and sets errno) if it fails. + fdr, e2 := open(name, syscall.O_RDONLY|syscall.O_NONBLOCK, 0) + if e2 != nil { + return nil, &os.PathError{"open_read", name, e2} + } + fd2, e = open(name, flags, 0666) + fd_close(fdr) + } + if e != nil { + return nil, &os.PathError{"open", name, e} + } + if e = fd_move(fd, fd2); e != nil { + return nil, &os.PathError{"fd_move", name, e} + } + if changemode { + if nonblock { + e = ndelay_off(fd) + } else { + e = ndelay_on(fd) + } + if e != nil { + return nil, &os.PathError{"ndelay", name, e} + } + } + return os.NewFile(uintptr(fd2), name), nil +} + +// proxy to return a FileDescriptor +func open(path string, openmode int, perm uint32) (FileDescriptor, error) { + fdint, err := syscall.Open(path, openmode, perm) + return FileDescriptor(fdint), err +} + +// see https://github.com/skarnet/skalibs/blob/master/src/libstddjb/fd_move.c +func fd_move(to, from FileDescriptor) (err error) { + if to == from { + return + } + for { + _, _, e1 := syscall.RawSyscall(syscall.SYS_DUP2, uintptr(from), uintptr(to), 0) + if e1 != syscall.EINTR { + if e1 != 0 { + err = e1 + } + break + } + } + if err != nil { + err = fd_close(from) + } + return + /* + do + r = dup2(from, to) ; + while ((r == -1) && (errno == EINTR)) ; + return (r == -1) ? -1 : fd_close(from) ; + */ +} + +// see https://github.com/skarnet/skalibs/blob/master/src/libstddjb/fd_close.c +func fd_close(fd FileDescriptor) (err error) { + i := 0 + var e error + for { + if e = syscall.Close(int(fd)); e != nil { + return nil + } + i++ + if e != syscall.EINTR { + break + } + } + if e == syscall.EBADF && i > 1 { + return nil + } + return e +} + +/* +int fd_close (int fd) +{ + register unsigned int i = 0 ; +doit: + if (!close(fd)) return 0 ; + i++ ; + if (errno == EINTR) goto doit ; + return ((errno == EBADF) && (i > 1)) ? 0 : -1 ; +} +*/ + +// see https://github.com/skarnet/skalibs/blob/master/src/libstddjb/ndelay_on.c +func ndelay_on(fd FileDescriptor) error { + // 32-bit will likely break because it needs SYS_FCNTL64 + got, _, e := syscall.Syscall(syscall.SYS_FCNTL, uintptr(fd), uintptr(syscall.F_GETFL), 0) + if e != 0 { + return e + } + _, _, e = syscall.Syscall(syscall.SYS_FCNTL, uintptr(fd), uintptr(syscall.F_SETFL), uintptr(got|syscall.O_NONBLOCK)) + if e != 0 { + return e + } + return nil +} + +/* +int ndelay_on (int fd) +{ + register int got = fcntl(fd, F_GETFL) ; + return (got == -1) ? -1 : fcntl(fd, F_SETFL, got | O_NONBLOCK) ; +} +*/ + +// see https://github.com/skarnet/skalibs/blob/master/src/libstddjb/ndelay_off.c +func ndelay_off(fd FileDescriptor) error { + // 32-bit will likely break because it needs SYS_FCNTL64 + got, _, e := syscall.Syscall(syscall.SYS_FCNTL, uintptr(fd), uintptr(syscall.F_GETFL), 0) + if e != 0 { + return e + } + _, _, e = syscall.Syscall(syscall.SYS_FCNTL, uintptr(fd), uintptr(syscall.F_SETFL), uintptr(int(got) & ^syscall.O_NONBLOCK)) + if e != 0 { + return e + } + return nil +} + +/* +int ndelay_off (int fd) +{ + register int got = fcntl(fd, F_GETFL) ; + return (got == -1) ? -1 : fcntl(fd, F_SETFL, got & ^O_NONBLOCK) ; +} +*/ diff --git a/contrib/mesos/pkg/redirfd/redirfd_windows.go b/contrib/mesos/pkg/redirfd/redirfd_windows.go new file mode 100644 index 00000000000..609d158d2d4 --- /dev/null +++ b/contrib/mesos/pkg/redirfd/redirfd_windows.go @@ -0,0 +1,39 @@ +// +build windows + +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package redirfd + +import ( + "fmt" + "os" +) + +type RedirectMode int + +const ( + Read RedirectMode = iota // open file for reading + Write // open file for writing, truncating if it exists + Update // open file for read & write + Append // open file for append, create if it does not exist + AppendExisting // open file for append, do not create if it does not already exist + WriteNew // open file for writing, creating it, failing if it already exists +) + +func (mode RedirectMode) Redirect(nonblock, changemode bool, fd FileDescriptor, name string) (*os.File, error) { + return nil, fmt.Errorf("Redirect(%s, %s, %d, \"%s\") not supported on windows", nonblock, changemode, fd, name) +} diff --git a/contrib/mesos/pkg/runtime/doc.go b/contrib/mesos/pkg/runtime/doc.go new file mode 100644 index 00000000000..7acc851bb99 --- /dev/null +++ b/contrib/mesos/pkg/runtime/doc.go @@ -0,0 +1,19 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package runtime provides utilities for semaphores (chan struct{}), +// a simple Latch implementation, and metrics for reporting handled panics. +package runtime diff --git a/contrib/mesos/pkg/runtime/latch.go b/contrib/mesos/pkg/runtime/latch.go new file mode 100644 index 00000000000..93514ae46c7 --- /dev/null +++ b/contrib/mesos/pkg/runtime/latch.go @@ -0,0 +1,35 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package runtime + +import ( + "sync/atomic" +) + +type Latch struct { + int32 +} + +// return true if this latch was successfully acquired. concurrency safe. will only return true +// upon the first invocation, all subsequent invocations will return false. always returns false +// when self is nil. +func (self *Latch) Acquire() bool { + if self == nil { + return false + } + return atomic.CompareAndSwapInt32(&self.int32, 0, 1) +} diff --git a/contrib/mesos/pkg/runtime/latch_test.go b/contrib/mesos/pkg/runtime/latch_test.go new file mode 100644 index 00000000000..5bb4600f02d --- /dev/null +++ b/contrib/mesos/pkg/runtime/latch_test.go @@ -0,0 +1,61 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package runtime + +import ( + "sync" + "sync/atomic" + "testing" + "time" +) + +func Test_LatchAcquireBasic(t *testing.T) { + var x Latch + if !x.Acquire() { + t.Fatalf("expected first acquire to succeed") + } + if x.Acquire() { + t.Fatalf("expected second acquire to fail") + } + if x.Acquire() { + t.Fatalf("expected third acquire to fail") + } +} + +func Test_LatchAcquireConcurrent(t *testing.T) { + var x Latch + const NUM = 10 + ch := make(chan struct{}) + var success int32 + var wg sync.WaitGroup + wg.Add(NUM) + for i := 0; i < NUM; i++ { + go func() { + defer wg.Done() + <-ch + if x.Acquire() { + atomic.AddInt32(&success, 1) + } + }() + } + time.Sleep(200 * time.Millisecond) + close(ch) + wg.Wait() + if success != 1 { + t.Fatalf("expected single acquire to succeed instead of %d", success) + } +} diff --git a/contrib/mesos/pkg/runtime/metrics.go b/contrib/mesos/pkg/runtime/metrics.go new file mode 100644 index 00000000000..a2b15966803 --- /dev/null +++ b/contrib/mesos/pkg/runtime/metrics.go @@ -0,0 +1,47 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package runtime + +import ( + "sync" + + "github.com/GoogleCloudPlatform/kubernetes/pkg/util" + "github.com/prometheus/client_golang/prometheus" +) + +const ( + runtimeSubsystem = "mesos_runtime" +) + +var ( + panicCounter = prometheus.NewCounter( + prometheus.CounterOpts{ + Subsystem: runtimeSubsystem, + Name: "panics", + Help: "Counter of panics handled by the internal crash handler.", + }, + ) +) + +var registerMetrics sync.Once + +func Register() { + registerMetrics.Do(func() { + prometheus.MustRegister(panicCounter) + util.PanicHandlers = append(util.PanicHandlers, func(interface{}) { panicCounter.Inc() }) + }) +} diff --git a/contrib/mesos/pkg/runtime/util.go b/contrib/mesos/pkg/runtime/util.go new file mode 100644 index 00000000000..ed7974245eb --- /dev/null +++ b/contrib/mesos/pkg/runtime/util.go @@ -0,0 +1,122 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package runtime + +import ( + "os" + "sync" + "time" + + "github.com/GoogleCloudPlatform/kubernetes/pkg/util" +) + +type Signal <-chan struct{} + +// return a func that will close the signal chan. +// multiple invocations of the returned func will not generate a panic. +// two funcs from separate invocations of Closer() (on the same sig chan) will cause a panic if both invoked. +// for example: +// // good +// x := runtime.After(func() { ... }) +// f := x.Closer() +// f() +// f() +// +// // bad +// x := runtime.After(func() { ... }) +// f := x.Closer() +// g := x.Closer() +// f() +// g() // this will panic +func Closer(sig chan<- struct{}) func() { + var once sync.Once + return func() { + once.Do(func() { close(sig) }) + } +} + +// upon receiving signal sig invoke function f and immediately return a signal +// that indicates f's completion. used to chain handler funcs, for example: +// On(job.Done(), response.Send).Then(wg.Done) +func (sig Signal) Then(f func()) Signal { + if sig == nil { + return nil + } + return On(sig, f) +} + +// execute a callback function after the specified signal chan closes. +// immediately returns a signal that indicates f's completion. +func On(sig <-chan struct{}, f func()) Signal { + if sig == nil { + return nil + } + return After(func() { + <-sig + if f != nil { + f() + } + }) +} + +func OnOSSignal(sig <-chan os.Signal, f func(os.Signal)) Signal { + if sig == nil { + return nil + } + return After(func() { + if s, ok := <-sig; ok && f != nil { + f(s) + } + }) +} + +// spawn a goroutine to execute a func, immediately returns a chan that closes +// upon completion of the func. returns a nil signal chan if the given func is nil. +func After(f func()) Signal { + ch := make(chan struct{}) + go func() { + defer close(ch) + defer util.HandleCrash() + if f != nil { + f() + } + }() + return Signal(ch) +} + +// periodically execute the given function, stopping once stopCh is closed. +// this func blocks until stopCh is closed, it's intended to be run as a goroutine. +func Until(f func(), period time.Duration, stopCh <-chan struct{}) { + if f == nil { + return + } + for { + select { + case <-stopCh: + return + default: + } + func() { + defer util.HandleCrash() + f() + }() + select { + case <-stopCh: + case <-time.After(period): + } + } +} diff --git a/contrib/mesos/pkg/runtime/util_test.go b/contrib/mesos/pkg/runtime/util_test.go new file mode 100644 index 00000000000..7c9cc1fb22c --- /dev/null +++ b/contrib/mesos/pkg/runtime/util_test.go @@ -0,0 +1,64 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package runtime + +import ( + "testing" + "time" +) + +func TestUntil(t *testing.T) { + ch := make(chan struct{}) + close(ch) + Until(func() { + t.Fatal("should not have been invoked") + }, 0, ch) + + //-- + ch = make(chan struct{}) + called := make(chan struct{}) + After(func() { + Until(func() { + called <- struct{}{} + }, 0, ch) + }).Then(func() { close(called) }) + + <-called + close(ch) + <-called + + //-- + ch = make(chan struct{}) + called = make(chan struct{}) + running := make(chan struct{}) + After(func() { + Until(func() { + close(running) + called <- struct{}{} + }, 2*time.Second, ch) + }).Then(func() { close(called) }) + + <-running + close(ch) + <-called // unblock the goroutine + now := time.Now() + + <-called + if time.Since(now) > 1800*time.Millisecond { + t.Fatalf("Until should not have waited the full timeout period since we closed the stop chan") + } +} diff --git a/contrib/mesos/pkg/scheduler/config/config.go b/contrib/mesos/pkg/scheduler/config/config.go new file mode 100644 index 00000000000..5290729b482 --- /dev/null +++ b/contrib/mesos/pkg/scheduler/config/config.go @@ -0,0 +1,109 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package config + +import ( + "io" + "time" + + "code.google.com/p/gcfg" +) + +const ( + DefaultOfferTTL = 5 * time.Second // duration an offer is viable, prior to being expired + DefaultOfferLingerTTL = 120 * time.Second // duration an expired offer lingers in history + DefaultListenerDelay = 1 * time.Second // duration between offer listener notifications + DefaultUpdatesBacklog = 2048 // size of the pod updates channel + DefaultFrameworkIdRefreshInterval = 30 * time.Second // interval we update the frameworkId stored in etcd + DefaultInitialImplicitReconciliationDelay = 15 * time.Second // wait this amount of time after initial registration before attempting implicit reconciliation + DefaultExplicitReconciliationMaxBackoff = 2 * time.Minute // interval in between internal task status checks/updates + DefaultExplicitReconciliationAbortTimeout = 30 * time.Second // waiting period after attempting to cancel an ongoing reconciliation + DefaultInitialPodBackoff = 1 * time.Second + DefaultMaxPodBackoff = 60 * time.Second + DefaultHttpHandlerTimeout = 10 * time.Second + DefaultHttpBindInterval = 5 * time.Second +) + +// Example scheduler configuration file: +// +// [scheduler] +// info-name = Kubernetes +// offer-ttl = 5s +// offer-linger-ttl = 2m + +type ConfigWrapper struct { + Scheduler Config +} + +type Config struct { + OfferTTL WrappedDuration `gcfg:"offer-ttl"` + OfferLingerTTL WrappedDuration `gcfg:"offer-linger-ttl"` + ListenerDelay WrappedDuration `gcfg:"listener-delay"` + UpdatesBacklog int `gcfg:"updates-backlog"` + FrameworkIdRefreshInterval WrappedDuration `gcfg:"framework-id-refresh-interval"` + InitialImplicitReconciliationDelay WrappedDuration `gcfg:"initial-implicit-reconciliation-delay"` + ExplicitReconciliationMaxBackoff WrappedDuration `gcfg:"explicit-reconciliantion-max-backoff"` + ExplicitReconciliationAbortTimeout WrappedDuration `gcfg:"explicit-reconciliantion-abort-timeout"` + InitialPodBackoff WrappedDuration `gcfg:"initial-pod-backoff"` + MaxPodBackoff WrappedDuration `gcfg:"max-pod-backoff"` + HttpHandlerTimeout WrappedDuration `gcfg:"http-handler-timeout"` + HttpBindInterval WrappedDuration `gcfg:"http-bind-interval"` +} + +type WrappedDuration struct { + time.Duration +} + +func (wd *WrappedDuration) UnmarshalText(data []byte) error { + d, err := time.ParseDuration(string(data)) + if err == nil { + wd.Duration = d + } + return err +} + +func (c *Config) SetDefaults() { + c.OfferTTL = WrappedDuration{DefaultOfferTTL} + c.OfferLingerTTL = WrappedDuration{DefaultOfferLingerTTL} + c.ListenerDelay = WrappedDuration{DefaultListenerDelay} + c.UpdatesBacklog = DefaultUpdatesBacklog + c.FrameworkIdRefreshInterval = WrappedDuration{DefaultFrameworkIdRefreshInterval} + c.InitialImplicitReconciliationDelay = WrappedDuration{DefaultInitialImplicitReconciliationDelay} + c.ExplicitReconciliationMaxBackoff = WrappedDuration{DefaultExplicitReconciliationMaxBackoff} + c.ExplicitReconciliationAbortTimeout = WrappedDuration{DefaultExplicitReconciliationAbortTimeout} + c.InitialPodBackoff = WrappedDuration{DefaultInitialPodBackoff} + c.MaxPodBackoff = WrappedDuration{DefaultMaxPodBackoff} + c.HttpHandlerTimeout = WrappedDuration{DefaultHttpHandlerTimeout} + c.HttpBindInterval = WrappedDuration{DefaultHttpBindInterval} +} + +func CreateDefaultConfig() *Config { + c := &Config{} + c.SetDefaults() + return c +} + +func (c *Config) Read(configReader io.Reader) error { + wrapper := &ConfigWrapper{Scheduler: *c} + if configReader != nil { + if err := gcfg.ReadInto(wrapper, configReader); err != nil { + return err + } + *c = wrapper.Scheduler + } + return nil +} diff --git a/contrib/mesos/pkg/scheduler/config/config_test.go b/contrib/mesos/pkg/scheduler/config/config_test.go new file mode 100644 index 00000000000..c316b3bc557 --- /dev/null +++ b/contrib/mesos/pkg/scheduler/config/config_test.go @@ -0,0 +1,112 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package config + +import ( + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func is_default(c *Config, t *testing.T) { + assert := assert.New(t) + + assert.Equal(DefaultOfferTTL, c.OfferTTL.Duration) + assert.Equal(DefaultOfferLingerTTL, c.OfferLingerTTL.Duration) + assert.Equal(DefaultListenerDelay, c.ListenerDelay.Duration) + assert.Equal(DefaultUpdatesBacklog, c.UpdatesBacklog) + assert.Equal(DefaultFrameworkIdRefreshInterval, c.FrameworkIdRefreshInterval.Duration) + assert.Equal(DefaultInitialImplicitReconciliationDelay, c.InitialImplicitReconciliationDelay.Duration) + assert.Equal(DefaultExplicitReconciliationMaxBackoff, c.ExplicitReconciliationMaxBackoff.Duration) + assert.Equal(DefaultExplicitReconciliationAbortTimeout, c.ExplicitReconciliationAbortTimeout.Duration) + assert.Equal(DefaultInitialPodBackoff, c.InitialPodBackoff.Duration) + assert.Equal(DefaultMaxPodBackoff, c.MaxPodBackoff.Duration) + assert.Equal(DefaultHttpHandlerTimeout, c.HttpHandlerTimeout.Duration) + assert.Equal(DefaultHttpBindInterval, c.HttpBindInterval.Duration) +} + +// Check that SetDefaults sets the default values +func TestConfig_SetDefaults(t *testing.T) { + c := &Config{} + c.SetDefaults() + is_default(c, t) +} + +// Check that CreateDefaultConfig returns a default config +func TestConfig_CreateDefaultConfig(t *testing.T) { + c := CreateDefaultConfig() + is_default(c, t) +} + +// Check that a config string can be parsed +func TestConfig_Read(t *testing.T) { + assert := assert.New(t) + + c := CreateDefaultConfig() + reader := strings.NewReader(` + [scheduler] + offer-ttl=42s + offer-linger-ttl=42s + listener-delay=42s + updates-backlog=42 + framework-id-refresh-interval=42s + initial-implicit-reconciliation-delay=42s + explicit-reconciliantion-max-backoff=42s + explicit-reconciliantion-abort-timeout=42s + initial-pod-backoff=42s + max-pod-backoff=42s + http-handler-timeout=42s + http-bind-interval=42s + `) + err := c.Read(reader) + if err != nil { + t.Fatal("Cannot parse scheduler config: " + err.Error()) + } + + assert.Equal(42*time.Second, c.OfferTTL.Duration) + assert.Equal(42*time.Second, c.OfferLingerTTL.Duration) + assert.Equal(42*time.Second, c.ListenerDelay.Duration) + assert.Equal(42, c.UpdatesBacklog) + assert.Equal(42*time.Second, c.FrameworkIdRefreshInterval.Duration) + assert.Equal(42*time.Second, c.InitialImplicitReconciliationDelay.Duration) + assert.Equal(42*time.Second, c.ExplicitReconciliationMaxBackoff.Duration) + assert.Equal(42*time.Second, c.ExplicitReconciliationAbortTimeout.Duration) + assert.Equal(42*time.Second, c.InitialPodBackoff.Duration) + assert.Equal(42*time.Second, c.MaxPodBackoff.Duration) + assert.Equal(42*time.Second, c.HttpHandlerTimeout.Duration) + assert.Equal(42*time.Second, c.HttpBindInterval.Duration) +} + +// check that an invalid config is rejected and non of the values to overwritten +func TestConfig_ReadError(t *testing.T) { + assert := assert.New(t) + + c := CreateDefaultConfig() + reader := strings.NewReader(` + [scheduler] + offer-ttl = 42s + invalid-setting = 42s + `) + err := c.Read(reader) + if err == nil { + t.Fatal("Invalid scheduler config should lead to an error") + } + + assert.NotEqual(42*time.Second, c.OfferTTL.Duration) +} diff --git a/contrib/mesos/pkg/scheduler/config/doc.go b/contrib/mesos/pkg/scheduler/config/doc.go new file mode 100644 index 00000000000..7ce9a982e95 --- /dev/null +++ b/contrib/mesos/pkg/scheduler/config/doc.go @@ -0,0 +1,18 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package config provides mechanisms for low-level scheduler tuning. +package config diff --git a/contrib/mesos/pkg/scheduler/constraint/constraint.go b/contrib/mesos/pkg/scheduler/constraint/constraint.go new file mode 100644 index 00000000000..a2a90b3c377 --- /dev/null +++ b/contrib/mesos/pkg/scheduler/constraint/constraint.go @@ -0,0 +1,106 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package constraint + +import ( + "encoding/json" + "fmt" +) + +type OperatorType int + +const ( + UniqueOperator OperatorType = iota + LikeOperator + ClusterOperator + GroupByOperator + UnlikeOperator +) + +var ( + labels = []string{ + "UNIQUE", + "LIKE", + "CLUSTER", + "GROUP_BY", + "UNLIKE", + } + + labelToType map[string]OperatorType +) + +func init() { + labelToType = make(map[string]OperatorType) + for i, s := range labels { + labelToType[s] = OperatorType(i) + } +} + +func (t OperatorType) String() string { + switch t { + case UniqueOperator, LikeOperator, ClusterOperator, GroupByOperator, UnlikeOperator: + return labels[int(t)] + default: + panic(fmt.Sprintf("unrecognized operator type: %d", int(t))) + } +} + +func parseOperatorType(s string) (OperatorType, error) { + t, found := labelToType[s] + if !found { + return UniqueOperator, fmt.Errorf("unrecognized operator %q", s) + } + return t, nil +} + +type Constraint struct { + Field string // required + Operator OperatorType // required + Value string // optional +} + +func (c *Constraint) MarshalJSON() ([]byte, error) { + var a []string + if c != nil { + if c.Value != "" { + a = append(a, c.Field, c.Operator.String(), c.Value) + } else { + a = append(a, c.Field, c.Operator.String()) + } + } + return json.Marshal(a) +} + +func (c *Constraint) UnmarshalJSON(buf []byte) (err error) { + var a []string + if err = json.Unmarshal(buf, &a); err != nil { + return err + } + switch x := len(a); { + case x < 2: + err = fmt.Errorf("not enough arguments to form constraint") + case x > 3: + err = fmt.Errorf("too many arguments to form constraint") + case x == 3: + c.Value = a[2] + fallthrough + case x == 2: + c.Field = a[0] + c.Operator, err = parseOperatorType(a[1]) + } + return err +} diff --git a/contrib/mesos/pkg/scheduler/constraint/constraint_test.go b/contrib/mesos/pkg/scheduler/constraint/constraint_test.go new file mode 100644 index 00000000000..2869e2d2109 --- /dev/null +++ b/contrib/mesos/pkg/scheduler/constraint/constraint_test.go @@ -0,0 +1,79 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package constraint + +import ( + "encoding/json" + "testing" +) + +func TestDeserialize(t *testing.T) { + shouldMatch := func(js string, field string, operator OperatorType, value string) (err error) { + constraint := Constraint{} + if err = json.Unmarshal(([]byte)(js), &constraint); err != nil { + return + } + if field != constraint.Field { + t.Fatalf("expected field %q instead of %q", field, constraint.Field) + } + if operator != constraint.Operator { + t.Fatalf("expected operator %v instead of %v", operator, constraint.Operator) + } + if value != constraint.Value { + t.Fatalf("expected value %q instead of %q", value, constraint.Value) + } + return + } + failOnError := func(err error) { + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + } + failOnError(shouldMatch(`["hostname","UNIQUE"]`, "hostname", UniqueOperator, "")) + failOnError(shouldMatch(`["rackid","GROUP_BY","1"]`, "rackid", GroupByOperator, "1")) + failOnError(shouldMatch(`["jdk","LIKE","7"]`, "jdk", LikeOperator, "7")) + failOnError(shouldMatch(`["jdk","UNLIKE","7"]`, "jdk", UnlikeOperator, "7")) + failOnError(shouldMatch(`["bob","CLUSTER","foo"]`, "bob", ClusterOperator, "foo")) + err := shouldMatch(`["bill","NOT_REALLY_AN_OPERATOR","pete"]`, "bill", ClusterOperator, "pete") + if err == nil { + t.Fatalf("expected unmarshalling error for invalid operator") + } +} + +func TestSerialize(t *testing.T) { + shouldMatch := func(expected string, constraint *Constraint) error { + data, err := json.Marshal(constraint) + if err != nil { + return err + } + js := string(data) + if js != expected { + t.Fatalf("expected json %q instead of %q", expected, js) + } + return nil + } + failOnError := func(err error) { + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + } + failOnError(shouldMatch(`["hostname","UNIQUE"]`, &Constraint{"hostname", UniqueOperator, ""})) + failOnError(shouldMatch(`["rackid","GROUP_BY","1"]`, &Constraint{"rackid", GroupByOperator, "1"})) + failOnError(shouldMatch(`["jdk","LIKE","7"]`, &Constraint{"jdk", LikeOperator, "7"})) + failOnError(shouldMatch(`["jdk","UNLIKE","7"]`, &Constraint{"jdk", UnlikeOperator, "7"})) + failOnError(shouldMatch(`["bob","CLUSTER","foo"]`, &Constraint{"bob", ClusterOperator, "foo"})) +} diff --git a/contrib/mesos/pkg/scheduler/constraint/doc.go b/contrib/mesos/pkg/scheduler/constraint/doc.go new file mode 100644 index 00000000000..c21082848b7 --- /dev/null +++ b/contrib/mesos/pkg/scheduler/constraint/doc.go @@ -0,0 +1,21 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package constraint exposes Marathon-like constraints for scheduling pods. +// Incomplete. +// TODO(jdef) We need better alignment between k8s-mesos and k8s scheduling +// constraints (read: a common constraints API). +package constraint diff --git a/contrib/mesos/pkg/scheduler/doc.go b/contrib/mesos/pkg/scheduler/doc.go new file mode 100644 index 00000000000..40552fc1a7d --- /dev/null +++ b/contrib/mesos/pkg/scheduler/doc.go @@ -0,0 +1,18 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package scheduler implements the Kubernetes Mesos scheduler. +package scheduler diff --git a/contrib/mesos/pkg/scheduler/fcfs.go b/contrib/mesos/pkg/scheduler/fcfs.go new file mode 100644 index 00000000000..761c49c362a --- /dev/null +++ b/contrib/mesos/pkg/scheduler/fcfs.go @@ -0,0 +1,57 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package scheduler + +import ( + "fmt" + log "github.com/golang/glog" + + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/offers" + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/podtask" +) + +// A first-come-first-serve scheduler: acquires the first offer that can support the task +func FCFSScheduleFunc(r offers.Registry, unused SlaveIndex, task *podtask.T) (offers.Perishable, error) { + podName := fmt.Sprintf("%s/%s", task.Pod.Namespace, task.Pod.Name) + var acceptedOffer offers.Perishable + err := r.Walk(func(p offers.Perishable) (bool, error) { + offer := p.Details() + if offer == nil { + return false, fmt.Errorf("nil offer while scheduling task %v", task.ID) + } + if task.AcceptOffer(offer) { + if p.Acquire() { + acceptedOffer = p + log.V(3).Infof("Pod %s accepted offer %v", podName, offer.Id.GetValue()) + return true, nil // stop, we found an offer + } + } + return false, nil // continue + }) + if acceptedOffer != nil { + if err != nil { + log.Warningf("problems walking the offer registry: %v, attempting to continue", err) + } + return acceptedOffer, nil + } + if err != nil { + log.V(2).Infof("failed to find a fit for pod: %s, err = %v", podName, err) + return nil, err + } + log.V(2).Infof("failed to find a fit for pod: %s", podName) + return nil, noSuitableOffersErr +} diff --git a/contrib/mesos/pkg/scheduler/ha/doc.go b/contrib/mesos/pkg/scheduler/ha/doc.go new file mode 100644 index 00000000000..4e6fc0beda5 --- /dev/null +++ b/contrib/mesos/pkg/scheduler/ha/doc.go @@ -0,0 +1,18 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package ha encapsulates high-availability scheduler concerns. +package ha diff --git a/contrib/mesos/pkg/scheduler/ha/election.go b/contrib/mesos/pkg/scheduler/ha/election.go new file mode 100644 index 00000000000..588b2ba5f6b --- /dev/null +++ b/contrib/mesos/pkg/scheduler/ha/election.go @@ -0,0 +1,73 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package ha + +import ( + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/election" + log "github.com/golang/glog" +) + +type roleType int + +const ( + followerRole roleType = iota + masterRole + retiredRole +) + +type candidateService struct { + sched *SchedulerProcess + newDriver DriverFactory + role roleType + valid ValidationFunc +} + +type ValidationFunc func(desiredUid, currentUid string) + +func NewCandidate(s *SchedulerProcess, f DriverFactory, v ValidationFunc) election.Service { + return &candidateService{ + sched: s, + newDriver: f, + role: followerRole, + valid: v, + } +} + +func (self *candidateService) Validate(desired, current election.Master) { + if self.valid != nil { + self.valid(string(desired), string(current)) + } +} + +func (self *candidateService) Start() { + if self.role == followerRole { + log.Info("elected as master") + self.role = masterRole + self.sched.Elect(self.newDriver) + } +} + +func (self *candidateService) Stop() { + if self.role == masterRole { + log.Info("retiring from master") + self.role = retiredRole + // order is important here, watchers of a SchedulerProcess will + // check SchedulerProcess.Failover() once Done() is closed. + close(self.sched.failover) + self.sched.End() + } +} diff --git a/contrib/mesos/pkg/scheduler/ha/ha.go b/contrib/mesos/pkg/scheduler/ha/ha.go new file mode 100644 index 00000000000..cdfc0c0c5cf --- /dev/null +++ b/contrib/mesos/pkg/scheduler/ha/ha.go @@ -0,0 +1,285 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package ha + +import ( + "fmt" + "sync/atomic" + + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/proc" + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/runtime" + log "github.com/golang/glog" + mesos "github.com/mesos/mesos-go/mesosproto" + bindings "github.com/mesos/mesos-go/scheduler" +) + +type DriverFactory func() (bindings.SchedulerDriver, error) + +type stageType int32 + +const ( + initStage stageType = iota + standbyStage + masterStage + finStage +) + +func (stage *stageType) transition(from, to stageType) bool { + return atomic.CompareAndSwapInt32((*int32)(stage), int32(from), int32(to)) +} + +func (s *stageType) transitionTo(to stageType, unless ...stageType) bool { + if len(unless) == 0 { + atomic.StoreInt32((*int32)(s), int32(to)) + return true + } + for { + state := s.get() + for _, x := range unless { + if state == x { + return false + } + } + if s.transition(state, to) { + return true + } + } +} + +func (stage *stageType) get() stageType { + return stageType(atomic.LoadInt32((*int32)(stage))) +} + +// execute some action in the deferred context of the process, but only if we +// match the stage of the process at the time the action is executed. +func (stage stageType) Do(p *SchedulerProcess, a proc.Action) <-chan error { + errOnce := proc.NewErrorOnce(p.fin) + errOuter := p.Do(proc.Action(func() { + switch stage { + case standbyStage: + //await standby signal or death + select { + case <-p.standby: + case <-p.Done(): + } + case masterStage: + //await elected signal or death + select { + case <-p.elected: + case <-p.Done(): + } + case finStage: + errOnce.Reportf("scheduler process is dying, dropping action") + return + default: + } + errOnce.Report(stage.When(p, a)) + })) + return errOnce.Send(errOuter).Err() +} + +// execute some action only if we match the stage of the scheduler process +func (stage stageType) When(p *SchedulerProcess, a proc.Action) (err error) { + if stage != (&p.stage).get() { + err = fmt.Errorf("failed to execute deferred action, expected lifecycle stage %v instead of %v", stage, p.stage) + } else { + a() + } + return +} + +type SchedulerProcess struct { + proc.Process + bindings.Scheduler + stage stageType + elected chan struct{} // upon close we've been elected + failover chan struct{} // closed indicates that we should failover upon End() + standby chan struct{} + fin chan struct{} +} + +func New(sched bindings.Scheduler) *SchedulerProcess { + p := &SchedulerProcess{ + Process: proc.New(), + Scheduler: sched, + stage: initStage, + elected: make(chan struct{}), + failover: make(chan struct{}), + standby: make(chan struct{}), + fin: make(chan struct{}), + } + runtime.On(p.Running(), p.begin) + return p +} + +func (self *SchedulerProcess) begin() { + if (&self.stage).transition(initStage, standbyStage) { + close(self.standby) + log.Infoln("scheduler process entered standby stage") + } else { + log.Errorf("failed to transition from init to standby stage") + } +} + +func (self *SchedulerProcess) End() <-chan struct{} { + if (&self.stage).transitionTo(finStage, finStage) { + defer close(self.fin) + log.Infoln("scheduler process entered fin stage") + } + return self.Process.End() +} + +func (self *SchedulerProcess) Elect(newDriver DriverFactory) { + errOnce := proc.NewErrorOnce(self.fin) + proc.OnError(errOnce.Send(standbyStage.Do(self, proc.Action(func() { + if !(&self.stage).transition(standbyStage, masterStage) { + log.Errorf("failed to transition from standby to master stage, aborting") + self.End() + return + } + log.Infoln("scheduler process entered master stage") + drv, err := newDriver() + if err != nil { + log.Errorf("failed to fetch scheduler driver: %v", err) + self.End() + return + } + log.V(1).Infoln("starting driver...") + stat, err := drv.Start() + if stat == mesos.Status_DRIVER_RUNNING && err == nil { + log.Infoln("driver started successfully and is running") + close(self.elected) + go func() { + defer self.End() + _, err := drv.Join() + if err != nil { + log.Errorf("driver failed with error: %v", err) + } + errOnce.Report(err) + }() + return + } + defer self.End() + if err != nil { + log.Errorf("failed to start scheduler driver: %v", err) + } else { + log.Errorf("expected RUNNING status, not %v", stat) + } + }))).Err(), func(err error) { + defer self.End() + log.Errorf("failed to handle election event, aborting: %v", err) + }, self.fin) +} + +func (self *SchedulerProcess) Terminal() <-chan struct{} { + return self.fin +} + +func (self *SchedulerProcess) Elected() <-chan struct{} { + return self.elected +} + +func (self *SchedulerProcess) Failover() <-chan struct{} { + return self.failover +} + +type masterProcess struct { + *SchedulerProcess + doer proc.Doer +} + +func (self *masterProcess) Done() <-chan struct{} { + return self.SchedulerProcess.Terminal() +} + +func (self *masterProcess) Do(a proc.Action) <-chan error { + return self.doer.Do(a) +} + +// returns a Process instance that will only execute a proc.Action if the scheduler is the elected master +func (self *SchedulerProcess) Master() proc.Process { + return &masterProcess{ + SchedulerProcess: self, + doer: proc.DoWith(self, proc.DoerFunc(func(a proc.Action) <-chan error { + return proc.ErrorChan(masterStage.When(self, a)) + })), + } +} + +func (self *SchedulerProcess) logError(ch <-chan error) { + self.OnError(ch, func(err error) { + log.Errorf("failed to execute scheduler action: %v", err) + }) +} + +func (self *SchedulerProcess) Registered(drv bindings.SchedulerDriver, fid *mesos.FrameworkID, mi *mesos.MasterInfo) { + self.logError(self.Master().Do(proc.Action(func() { + self.Scheduler.Registered(drv, fid, mi) + }))) +} + +func (self *SchedulerProcess) Reregistered(drv bindings.SchedulerDriver, mi *mesos.MasterInfo) { + self.logError(self.Master().Do(proc.Action(func() { + self.Scheduler.Reregistered(drv, mi) + }))) +} + +func (self *SchedulerProcess) Disconnected(drv bindings.SchedulerDriver) { + self.logError(self.Master().Do(proc.Action(func() { + self.Scheduler.Disconnected(drv) + }))) +} + +func (self *SchedulerProcess) ResourceOffers(drv bindings.SchedulerDriver, off []*mesos.Offer) { + self.logError(self.Master().Do(proc.Action(func() { + self.Scheduler.ResourceOffers(drv, off) + }))) +} + +func (self *SchedulerProcess) OfferRescinded(drv bindings.SchedulerDriver, oid *mesos.OfferID) { + self.logError(self.Master().Do(proc.Action(func() { + self.Scheduler.OfferRescinded(drv, oid) + }))) +} + +func (self *SchedulerProcess) StatusUpdate(drv bindings.SchedulerDriver, ts *mesos.TaskStatus) { + self.logError(self.Master().Do(proc.Action(func() { + self.Scheduler.StatusUpdate(drv, ts) + }))) +} + +func (self *SchedulerProcess) FrameworkMessage(drv bindings.SchedulerDriver, eid *mesos.ExecutorID, sid *mesos.SlaveID, m string) { + self.logError(self.Master().Do(proc.Action(func() { + self.Scheduler.FrameworkMessage(drv, eid, sid, m) + }))) +} + +func (self *SchedulerProcess) SlaveLost(drv bindings.SchedulerDriver, sid *mesos.SlaveID) { + self.logError(self.Master().Do(proc.Action(func() { + self.Scheduler.SlaveLost(drv, sid) + }))) +} + +func (self *SchedulerProcess) ExecutorLost(drv bindings.SchedulerDriver, eid *mesos.ExecutorID, sid *mesos.SlaveID, x int) { + self.logError(self.Master().Do(proc.Action(func() { + self.Scheduler.ExecutorLost(drv, eid, sid, x) + }))) +} + +func (self *SchedulerProcess) Error(drv bindings.SchedulerDriver, msg string) { + self.Scheduler.Error(drv, msg) +} diff --git a/contrib/mesos/pkg/scheduler/meta/annotations.go b/contrib/mesos/pkg/scheduler/meta/annotations.go new file mode 100644 index 00000000000..5c9bf099182 --- /dev/null +++ b/contrib/mesos/pkg/scheduler/meta/annotations.go @@ -0,0 +1,30 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package meta + +// kubernetes api object annotations +const ( + BindingHostKey = "k8s.mesosphere.io/bindingHost" + TaskIdKey = "k8s.mesosphere.io/taskId" + SlaveIdKey = "k8s.mesosphere.io/slaveId" + OfferIdKey = "k8s.mesosphere.io/offerId" + ExecutorIdKey = "k8s.mesosphere.io/executorId" + PortMappingKeyPrefix = "k8s.mesosphere.io/port_" + PortMappingKeyFormat = PortMappingKeyPrefix + "%s_%d" + PortNameMappingKeyPrefix = "k8s.mesosphere.io/portName_" + PortNameMappingKeyFormat = PortNameMappingKeyPrefix + "%s_%s" +) diff --git a/contrib/mesos/pkg/scheduler/meta/doc.go b/contrib/mesos/pkg/scheduler/meta/doc.go new file mode 100644 index 00000000000..e9d834c94c5 --- /dev/null +++ b/contrib/mesos/pkg/scheduler/meta/doc.go @@ -0,0 +1,22 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package meta defines framework constants used as keys in k8s annotations +// that are attached to k8s pods. The scheduler uses some of these annotations +// for reconciliation upon failover. Other annotations are used as part of +// the host-to-pod port-mapping implementation understood by the k8s-mesos +// scheduler and custom endpoints-controller implementation. +package meta diff --git a/contrib/mesos/pkg/scheduler/meta/store.go b/contrib/mesos/pkg/scheduler/meta/store.go new file mode 100644 index 00000000000..7203a12c948 --- /dev/null +++ b/contrib/mesos/pkg/scheduler/meta/store.go @@ -0,0 +1,24 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package meta + +// keys for things that we store +const ( + //TODO(jdef) this should also be a format instead of a fixed path + FrameworkIDKey = "/mesos/k8sm/frameworkid" + DefaultElectionFormat = "/mesos/k8sm/framework/%s/leader" +) diff --git a/contrib/mesos/pkg/scheduler/metrics/doc.go b/contrib/mesos/pkg/scheduler/metrics/doc.go new file mode 100644 index 00000000000..861c0205c61 --- /dev/null +++ b/contrib/mesos/pkg/scheduler/metrics/doc.go @@ -0,0 +1,18 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package metrics defines and exposes instrumentation metrics of the scheduler. +package metrics diff --git a/contrib/mesos/pkg/scheduler/metrics/metrics.go b/contrib/mesos/pkg/scheduler/metrics/metrics.go new file mode 100644 index 00000000000..5bdb6f00196 --- /dev/null +++ b/contrib/mesos/pkg/scheduler/metrics/metrics.go @@ -0,0 +1,102 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "sync" + "time" + + "github.com/prometheus/client_golang/prometheus" +) + +const ( + schedulerSubsystem = "mesos_scheduler" +) + +var ( + QueueWaitTime = prometheus.NewSummary( + prometheus.SummaryOpts{ + Subsystem: schedulerSubsystem, + Name: "queue_wait_time_microseconds", + Help: "Launch queue wait time in microseconds", + }, + ) + BindLatency = prometheus.NewSummary( + prometheus.SummaryOpts{ + Subsystem: schedulerSubsystem, + Name: "bind_latency_microseconds", + Help: "Latency in microseconds between pod-task launch and pod binding.", + }, + ) + StatusUpdates = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Subsystem: schedulerSubsystem, + Name: "status_updates", + Help: "Counter of TaskStatus updates, broken out by source, reason, state.", + }, + []string{"source", "reason", "state"}, + ) + ReconciliationLatency = prometheus.NewSummary( + prometheus.SummaryOpts{ + Subsystem: schedulerSubsystem, + Name: "reconciliation_latency_microseconds", + Help: "Latency in microseconds to execute explicit task reconciliation.", + }, + ) + ReconciliationRequested = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Subsystem: schedulerSubsystem, + Name: "reconciliation_requested", + Help: "Counter of requested task reconciliations, broken out by kind.", + }, + []string{"kind"}, + ) + ReconciliationExecuted = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Subsystem: schedulerSubsystem, + Name: "reconciliation_executed", + Help: "Counter of executed task reconciliations requests, broken out by kind.", + }, + []string{"kind"}, + ) + ReconciliationCancelled = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Subsystem: schedulerSubsystem, + Name: "reconciliation_cancelled", + Help: "Counter of cancelled task reconciliations requests, broken out by kind.", + }, + []string{"kind"}, + ) +) + +var registerMetrics sync.Once + +func Register() { + registerMetrics.Do(func() { + prometheus.MustRegister(QueueWaitTime) + prometheus.MustRegister(BindLatency) + prometheus.MustRegister(StatusUpdates) + prometheus.MustRegister(ReconciliationLatency) + prometheus.MustRegister(ReconciliationRequested) + prometheus.MustRegister(ReconciliationExecuted) + prometheus.MustRegister(ReconciliationCancelled) + }) +} + +func InMicroseconds(d time.Duration) float64 { + return float64(d.Nanoseconds() / time.Microsecond.Nanoseconds()) +} diff --git a/contrib/mesos/pkg/scheduler/mock_test.go b/contrib/mesos/pkg/scheduler/mock_test.go new file mode 100644 index 00000000000..1dbb9da78d0 --- /dev/null +++ b/contrib/mesos/pkg/scheduler/mock_test.go @@ -0,0 +1,203 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package scheduler + +import ( + "sync" + "testing" + + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/offers" + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/podtask" + "github.com/GoogleCloudPlatform/kubernetes/pkg/api" + mesos "github.com/mesos/mesos-go/mesosproto" + "github.com/stretchr/testify/mock" +) + +// implements SchedulerInterface +type MockScheduler struct { + sync.RWMutex + mock.Mock +} + +func (m *MockScheduler) slaveFor(id string) (slave *Slave, ok bool) { + args := m.Called(id) + x := args.Get(0) + if x != nil { + slave = x.(*Slave) + } + ok = args.Bool(1) + return +} +func (m *MockScheduler) algorithm() (f PodScheduleFunc) { + args := m.Called() + x := args.Get(0) + if x != nil { + f = x.(PodScheduleFunc) + } + return +} +func (m *MockScheduler) createPodTask(ctx api.Context, pod *api.Pod) (task *podtask.T, err error) { + args := m.Called(ctx, pod) + x := args.Get(0) + if x != nil { + task = x.(*podtask.T) + } + err = args.Error(1) + return +} +func (m *MockScheduler) offers() (f offers.Registry) { + args := m.Called() + x := args.Get(0) + if x != nil { + f = x.(offers.Registry) + } + return +} +func (m *MockScheduler) tasks() (f podtask.Registry) { + args := m.Called() + x := args.Get(0) + if x != nil { + f = x.(podtask.Registry) + } + return +} +func (m *MockScheduler) killTask(taskId string) error { + args := m.Called(taskId) + return args.Error(0) +} +func (m *MockScheduler) launchTask(task *podtask.T) error { + args := m.Called(task) + return args.Error(0) +} + +// @deprecated this is a placeholder for me to test the mock package +func TestNoSlavesYet(t *testing.T) { + obj := &MockScheduler{} + obj.On("slaveFor", "foo").Return(nil, false) + obj.slaveFor("foo") + obj.AssertExpectations(t) +} + +/*----------------------------------------------------------------------------- + | + | this really belongs in the mesos-go package, but that's being updated soon + | any way so just keep it here for now unless we *really* need it there. + | + \----------------------------------------------------------------------------- + +// Scheduler defines the interfaces that needed to be implemented. +type Scheduler interface { + Registered(SchedulerDriver, *FrameworkID, *MasterInfo) + Reregistered(SchedulerDriver, *MasterInfo) + Disconnected(SchedulerDriver) + ResourceOffers(SchedulerDriver, []*Offer) + OfferRescinded(SchedulerDriver, *OfferID) + StatusUpdate(SchedulerDriver, *TaskStatus) + FrameworkMessage(SchedulerDriver, *ExecutorID, *SlaveID, string) + SlaveLost(SchedulerDriver, *SlaveID) + ExecutorLost(SchedulerDriver, *ExecutorID, *SlaveID, int) + Error(SchedulerDriver, string) +} +*/ + +func status(args mock.Arguments, at int) (val mesos.Status) { + if x := args.Get(at); x != nil { + val = x.(mesos.Status) + } + return +} + +type extendedMock struct { + mock.Mock +} + +// Upon returns a chan that closes upon the execution of the most recently registered call. +func (m *extendedMock) Upon() <-chan struct{} { + ch := make(chan struct{}) + call := &m.ExpectedCalls[len(m.ExpectedCalls)-1] + f := call.Run + call.Run = func(args mock.Arguments) { + defer close(ch) + if f != nil { + f(args) + } + } + return ch +} + +type MockSchedulerDriver struct { + extendedMock +} + +func (m *MockSchedulerDriver) Init() error { + args := m.Called() + return args.Error(0) +} +func (m *MockSchedulerDriver) Start() (mesos.Status, error) { + args := m.Called() + return status(args, 0), args.Error(1) +} +func (m *MockSchedulerDriver) Stop(b bool) (mesos.Status, error) { + args := m.Called(b) + return status(args, 0), args.Error(1) +} +func (m *MockSchedulerDriver) Abort() (mesos.Status, error) { + args := m.Called() + return status(args, 0), args.Error(1) +} +func (m *MockSchedulerDriver) Join() (mesos.Status, error) { + args := m.Called() + return status(args, 0), args.Error(1) +} +func (m *MockSchedulerDriver) Run() (mesos.Status, error) { + args := m.Called() + return status(args, 0), args.Error(1) +} +func (m *MockSchedulerDriver) RequestResources(r []*mesos.Request) (mesos.Status, error) { + args := m.Called(r) + return status(args, 0), args.Error(1) +} +func (m *MockSchedulerDriver) ReconcileTasks(statuses []*mesos.TaskStatus) (mesos.Status, error) { + args := m.Called(statuses) + return status(args, 0), args.Error(1) +} +func (m *MockSchedulerDriver) LaunchTasks(offerIds []*mesos.OfferID, ti []*mesos.TaskInfo, f *mesos.Filters) (mesos.Status, error) { + args := m.Called(offerIds, ti, f) + return status(args, 0), args.Error(1) +} +func (m *MockSchedulerDriver) KillTask(tid *mesos.TaskID) (mesos.Status, error) { + args := m.Called(tid) + return status(args, 0), args.Error(1) +} +func (m *MockSchedulerDriver) DeclineOffer(oid *mesos.OfferID, f *mesos.Filters) (mesos.Status, error) { + args := m.Called(oid, f) + return status(args, 0), args.Error(1) +} +func (m *MockSchedulerDriver) ReviveOffers() (mesos.Status, error) { + args := m.Called() + return status(args, 0), args.Error(0) +} +func (m *MockSchedulerDriver) SendFrameworkMessage(eid *mesos.ExecutorID, sid *mesos.SlaveID, s string) (mesos.Status, error) { + args := m.Called(eid, sid, s) + return status(args, 0), args.Error(1) +} +func (m *MockSchedulerDriver) Destroy() { + m.Called() +} +func (m *MockSchedulerDriver) Wait() { + m.Called() +} diff --git a/contrib/mesos/pkg/scheduler/plugin.go b/contrib/mesos/pkg/scheduler/plugin.go new file mode 100644 index 00000000000..1fc02e7a000 --- /dev/null +++ b/contrib/mesos/pkg/scheduler/plugin.go @@ -0,0 +1,879 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package scheduler + +import ( + "fmt" + "io" + "net/http" + "strconv" + "sync" + "time" + + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/backoff" + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/offers" + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/queue" + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/runtime" + annotation "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/meta" + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/podtask" + "github.com/GoogleCloudPlatform/kubernetes/pkg/api" + "github.com/GoogleCloudPlatform/kubernetes/pkg/api/errors" + "github.com/GoogleCloudPlatform/kubernetes/pkg/client" + "github.com/GoogleCloudPlatform/kubernetes/pkg/client/cache" + "github.com/GoogleCloudPlatform/kubernetes/pkg/client/record" + "github.com/GoogleCloudPlatform/kubernetes/pkg/fields" + "github.com/GoogleCloudPlatform/kubernetes/pkg/util" + plugin "github.com/GoogleCloudPlatform/kubernetes/plugin/pkg/scheduler" + "github.com/GoogleCloudPlatform/kubernetes/plugin/pkg/scheduler/algorithm" + log "github.com/golang/glog" + mesos "github.com/mesos/mesos-go/mesosproto" + mutil "github.com/mesos/mesos-go/mesosutil" +) + +const ( + enqueuePopTimeout = 200 * time.Millisecond + enqueueWaitTimeout = 1 * time.Second + yieldPopTimeout = 200 * time.Millisecond + yieldWaitTimeout = 1 * time.Second + pluginRecoveryDelay = 100 * time.Millisecond // delay after scheduler plugin crashes, before we resume scheduling +) + +// scheduler abstraction to allow for easier unit testing +type schedulerInterface interface { + sync.Locker // synchronize scheduler plugin operations + SlaveIndex + algorithm() PodScheduleFunc // see types.go + offers() offers.Registry + tasks() podtask.Registry + + // driver calls + + killTask(taskId string) error + launchTask(*podtask.T) error + + // convenience + + createPodTask(api.Context, *api.Pod) (*podtask.T, error) +} + +type k8smScheduler struct { + sync.Mutex + internal *KubernetesScheduler +} + +func (k *k8smScheduler) algorithm() PodScheduleFunc { + return k.internal.scheduleFunc +} + +func (k *k8smScheduler) offers() offers.Registry { + return k.internal.offers +} + +func (k *k8smScheduler) tasks() podtask.Registry { + return k.internal.taskRegistry +} + +func (k *k8smScheduler) createPodTask(ctx api.Context, pod *api.Pod) (*podtask.T, error) { + return podtask.New(ctx, "", *pod, k.internal.executor) +} + +func (k *k8smScheduler) slaveFor(id string) (slave *Slave, ok bool) { + slave, ok = k.internal.slaves.getSlave(id) + return +} + +func (k *k8smScheduler) killTask(taskId string) error { + killTaskId := mutil.NewTaskID(taskId) + _, err := k.internal.driver.KillTask(killTaskId) + return err +} + +func (k *k8smScheduler) launchTask(task *podtask.T) error { + // assume caller is holding scheduler lock + taskList := []*mesos.TaskInfo{task.BuildTaskInfo()} + offerIds := []*mesos.OfferID{task.Offer.Details().Id} + filters := &mesos.Filters{} + _, err := k.internal.driver.LaunchTasks(offerIds, taskList, filters) + return err +} + +type binder struct { + api schedulerInterface +} + +// implements binding.Registry, launches the pod-associated-task in mesos +func (b *binder) Bind(binding *api.Binding) error { + + ctx := api.WithNamespace(api.NewContext(), binding.Namespace) + + // default upstream scheduler passes pod.Name as binding.Name + podKey, err := podtask.MakePodKey(ctx, binding.Name) + if err != nil { + return err + } + + b.api.Lock() + defer b.api.Unlock() + + switch task, state := b.api.tasks().ForPod(podKey); state { + case podtask.StatePending: + return b.bind(ctx, binding, task) + default: + // in this case it's likely that the pod has been deleted between Schedule + // and Bind calls + log.Infof("No pending task for pod %s", podKey) + return noSuchPodErr //TODO(jdef) this error is somewhat misleading since the task could be running?! + } +} + +func (b *binder) rollback(task *podtask.T, err error) error { + task.Offer.Release() + task.Reset() + if err2 := b.api.tasks().Update(task); err2 != nil { + log.Errorf("failed to update pod task: %v", err2) + } + return err +} + +// assumes that: caller has acquired scheduler lock and that the task is still pending +func (b *binder) bind(ctx api.Context, binding *api.Binding, task *podtask.T) (err error) { + // sanity check: ensure that the task hasAcceptedOffer(), it's possible that between + // Schedule() and now that the offer for this task was rescinded or invalidated. + // ((we should never see this here)) + if !task.HasAcceptedOffer() { + return fmt.Errorf("task has not accepted a valid offer %v", task.ID) + } + + // By this time, there is a chance that the slave is disconnected. + offerId := task.GetOfferId() + if offer, ok := b.api.offers().Get(offerId); !ok || offer.HasExpired() { + // already rescinded or timed out or otherwise invalidated + return b.rollback(task, fmt.Errorf("failed prior to launchTask due to expired offer for task %v", task.ID)) + } + + if err = b.prepareTaskForLaunch(ctx, binding.Target.Name, task, offerId); err == nil { + log.V(2).Infof("launching task: %q on target %q slave %q for pod \"%v/%v\"", + task.ID, binding.Target.Name, task.Spec.SlaveID, task.Pod.Namespace, task.Pod.Name) + if err = b.api.launchTask(task); err == nil { + b.api.offers().Invalidate(offerId) + task.Set(podtask.Launched) + if err = b.api.tasks().Update(task); err != nil { + // this should only happen if the task has been removed or has changed status, + // which SHOULD NOT HAPPEN as long as we're synchronizing correctly + log.Errorf("failed to update task w/ Launched status: %v", err) + } + return + } + } + return b.rollback(task, fmt.Errorf("Failed to launch task %v: %v", task.ID, err)) +} + +//TODO(jdef) unit test this, ensure that task's copy of api.Pod is not modified +func (b *binder) prepareTaskForLaunch(ctx api.Context, machine string, task *podtask.T, offerId string) error { + pod := task.Pod + + // we make an effort here to avoid making changes to the task's copy of the pod, since + // we want that to reflect the initial user spec, and not the modified spec that we + // build for the executor to consume. + oemCt := pod.Spec.Containers + pod.Spec.Containers = append([]api.Container{}, oemCt...) // (shallow) clone before mod + + if pod.Annotations == nil { + pod.Annotations = make(map[string]string) + } else { + oemAnn := pod.Annotations + pod.Annotations = make(map[string]string) + for k, v := range oemAnn { + pod.Annotations[k] = v + } + } + pod.Annotations[annotation.BindingHostKey] = machine + task.SaveRecoveryInfo(pod.Annotations) + + for _, entry := range task.Spec.PortMap { + oemPorts := pod.Spec.Containers[entry.ContainerIdx].Ports + ports := append([]api.ContainerPort{}, oemPorts...) + p := &ports[entry.PortIdx] + p.HostPort = int(entry.OfferPort) + op := strconv.FormatUint(entry.OfferPort, 10) + pod.Annotations[fmt.Sprintf(annotation.PortMappingKeyFormat, p.Protocol, p.ContainerPort)] = op + if p.Name != "" { + pod.Annotations[fmt.Sprintf(annotation.PortNameMappingKeyFormat, p.Protocol, p.Name)] = op + } + pod.Spec.Containers[entry.ContainerIdx].Ports = ports + } + + // the kubelet-executor uses this to instantiate the pod + log.V(3).Infof("prepared pod spec: %+v", pod) + + data, err := api.Codec.Encode(&pod) + if err != nil { + log.V(2).Infof("Failed to marshal the pod spec: %v", err) + return err + } + task.Spec.Data = data + return nil +} + +type kubeScheduler struct { + api schedulerInterface + podUpdates queue.FIFO +} + +// Schedule implements the Scheduler interface of Kubernetes. +// It returns the selectedMachine's name and error (if there's any). +func (k *kubeScheduler) Schedule(pod *api.Pod, unused algorithm.MinionLister) (string, error) { + log.Infof("Try to schedule pod %v\n", pod.Name) + ctx := api.WithNamespace(api.NewDefaultContext(), pod.Namespace) + + // default upstream scheduler passes pod.Name as binding.PodID + podKey, err := podtask.MakePodKey(ctx, pod.Name) + if err != nil { + return "", err + } + + k.api.Lock() + defer k.api.Unlock() + + switch task, state := k.api.tasks().ForPod(podKey); state { + case podtask.StateUnknown: + // There's a bit of a potential race here, a pod could have been yielded() and + // then before we get *here* it could be deleted. + // We use meta to index the pod in the store since that's what k8s reflector does. + podName, err := cache.MetaNamespaceKeyFunc(pod) + if err != nil { + log.Warningf("aborting Schedule, unable to understand pod object %+v", pod) + return "", noSuchPodErr + } + if deleted := k.podUpdates.Poll(podName, queue.DELETE_EVENT); deleted { + // avoid scheduling a pod that's been deleted between yieldPod() and Schedule() + log.Infof("aborting Schedule, pod has been deleted %+v", pod) + return "", noSuchPodErr + } + return k.doSchedule(k.api.tasks().Register(k.api.createPodTask(ctx, pod))) + + //TODO(jdef) it's possible that the pod state has diverged from what + //we knew previously, we should probably update the task.Pod state here + //before proceeding with scheduling + case podtask.StatePending: + if pod.UID != task.Pod.UID { + // we're dealing with a brand new pod spec here, so the old one must have been + // deleted -- and so our task store is out of sync w/ respect to reality + //TODO(jdef) reconcile task + return "", fmt.Errorf("task %v spec is out of sync with pod %v spec, aborting schedule", task.ID, pod.Name) + } else if task.Has(podtask.Launched) { + // task has been marked as "launched" but the pod binding creation may have failed in k8s, + // but we're going to let someone else handle it, probably the mesos task error handler + return "", fmt.Errorf("task %s has already been launched, aborting schedule", task.ID) + } else { + return k.doSchedule(task, nil) + } + + default: + return "", fmt.Errorf("task %s is not pending, nothing to schedule", task.ID) + } +} + +// Call ScheduleFunc and subtract some resources, returning the name of the machine the task is scheduled on +func (k *kubeScheduler) doSchedule(task *podtask.T, err error) (string, error) { + var offer offers.Perishable + if task.HasAcceptedOffer() { + // verify that the offer is still on the table + offerId := task.GetOfferId() + if offer, ok := k.api.offers().Get(offerId); ok && !offer.HasExpired() { + // skip tasks that have already have assigned offers + offer = task.Offer + } else { + task.Offer.Release() + task.Reset() + if err = k.api.tasks().Update(task); err != nil { + return "", err + } + } + } + if err == nil && offer == nil { + offer, err = k.api.algorithm()(k.api.offers(), k.api, task) + } + if err != nil { + return "", err + } + details := offer.Details() + if details == nil { + return "", fmt.Errorf("offer already invalid/expired for task %v", task.ID) + } + slaveId := details.GetSlaveId().GetValue() + if slave, ok := k.api.slaveFor(slaveId); !ok { + // not much sense in Release()ing the offer here since its owner died + offer.Release() + k.api.offers().Invalidate(details.Id.GetValue()) + return "", fmt.Errorf("Slave disappeared (%v) while scheduling task %v", slaveId, task.ID) + } else { + if task.Offer != nil && task.Offer != offer { + return "", fmt.Errorf("task.offer assignment must be idempotent, task %+v: offer %+v", task, offer) + } + task.Offer = offer + //TODO(jdef) FillFromDetails currently allocates fixed (hardwired) cpu and memory resources for all + //tasks. This will be fixed once we properly integrate parent-cgroup support into the kublet-executor. + //For now we are completely ignoring the resources specified in the pod. + //see: https://github.com/mesosphere/kubernetes-mesos/issues/68 + task.FillFromDetails(details) + if err := k.api.tasks().Update(task); err != nil { + offer.Release() + return "", err + } + return slave.HostName, nil + } +} + +type queuer struct { + lock sync.Mutex // shared by condition variables of this struct + podUpdates queue.FIFO // queue of pod updates to be processed + podQueue *queue.DelayFIFO // queue of pods to be scheduled + deltaCond sync.Cond // pod changes are available for processing + unscheduledCond sync.Cond // there are unscheduled pods for processing +} + +func newQueuer(store queue.FIFO) *queuer { + q := &queuer{ + podQueue: queue.NewDelayFIFO(), + podUpdates: store, + } + q.deltaCond.L = &q.lock + q.unscheduledCond.L = &q.lock + return q +} + +func (q *queuer) installDebugHandlers(mux *http.ServeMux) { + mux.HandleFunc("/debug/scheduler/podqueue", func(w http.ResponseWriter, r *http.Request) { + for _, x := range q.podQueue.List() { + if _, err := io.WriteString(w, fmt.Sprintf("%+v\n", x)); err != nil { + break + } + } + }) + mux.HandleFunc("/debug/scheduler/podstore", func(w http.ResponseWriter, r *http.Request) { + for _, x := range q.podUpdates.List() { + if _, err := io.WriteString(w, fmt.Sprintf("%+v\n", x)); err != nil { + break + } + } + }) +} + +// signal that there are probably pod updates waiting to be processed +func (q *queuer) updatesAvailable() { + q.deltaCond.Broadcast() +} + +// delete a pod from the to-be-scheduled queue +func (q *queuer) dequeue(id string) { + q.podQueue.Delete(id) +} + +// re-add a pod to the to-be-scheduled queue, will not overwrite existing pod data (that +// may have already changed). +func (q *queuer) requeue(pod *Pod) { + // use KeepExisting in case the pod has already been updated (can happen if binding fails + // due to constraint voilations); we don't want to overwrite a newer entry with stale data. + q.podQueue.Add(pod, queue.KeepExisting) + q.unscheduledCond.Broadcast() +} + +// same as requeue but calls podQueue.Offer instead of podQueue.Add +func (q *queuer) reoffer(pod *Pod) { + // use KeepExisting in case the pod has already been updated (can happen if binding fails + // due to constraint voilations); we don't want to overwrite a newer entry with stale data. + if q.podQueue.Offer(pod, queue.KeepExisting) { + q.unscheduledCond.Broadcast() + } +} + +// spawns a go-routine to watch for unscheduled pods and queue them up +// for scheduling. returns immediately. +func (q *queuer) Run(done <-chan struct{}) { + go runtime.Until(func() { + log.Info("Watching for newly created pods") + q.lock.Lock() + defer q.lock.Unlock() + + for { + // limit blocking here for short intervals so that scheduling + // may proceed even if there have been no recent pod changes + p := q.podUpdates.Await(enqueuePopTimeout) + if p == nil { + signalled := runtime.After(q.deltaCond.Wait) + // we've yielded the lock + select { + case <-time.After(enqueueWaitTimeout): + q.deltaCond.Broadcast() // abort Wait() + <-signalled // wait for lock re-acquisition + log.V(4).Infoln("timed out waiting for a pod update") + case <-signalled: + // we've acquired the lock and there may be + // changes for us to process now + } + continue + } + + pod := p.(*Pod) + if pod.Spec.NodeName != "" { + log.V(3).Infof("dequeuing pod for scheduling: %v", pod.Pod.Name) + q.dequeue(pod.GetUID()) + } else { + // use ReplaceExisting because we are always pushing the latest state + now := time.Now() + pod.deadline = &now + if q.podQueue.Offer(pod, queue.ReplaceExisting) { + q.unscheduledCond.Broadcast() + log.V(3).Infof("queued pod for scheduling: %v", pod.Pod.Name) + } else { + log.Warningf("failed to queue pod for scheduling: %v", pod.Pod.Name) + } + } + } + }, 1*time.Second, done) +} + +// implementation of scheduling plugin's NextPod func; see k8s plugin/pkg/scheduler +func (q *queuer) yield() *api.Pod { + log.V(2).Info("attempting to yield a pod") + q.lock.Lock() + defer q.lock.Unlock() + + for { + // limit blocking here to short intervals so that we don't block the + // enqueuer Run() routine for very long + kpod := q.podQueue.Await(yieldPopTimeout) + if kpod == nil { + signalled := runtime.After(q.unscheduledCond.Wait) + // lock is yielded at this point and we're going to wait for either + // a timeout, or a signal that there's data + select { + case <-time.After(yieldWaitTimeout): + q.unscheduledCond.Broadcast() // abort Wait() + <-signalled // wait for the go-routine, and the lock + log.V(4).Infoln("timed out waiting for a pod to yield") + case <-signalled: + // we have acquired the lock, and there + // may be a pod for us to pop now + } + continue + } + + pod := kpod.(*Pod).Pod + if podName, err := cache.MetaNamespaceKeyFunc(pod); err != nil { + log.Warningf("yield unable to understand pod object %+v, will skip: %v", pod, err) + } else if !q.podUpdates.Poll(podName, queue.POP_EVENT) { + log.V(1).Infof("yield popped a transitioning pod, skipping: %+v", pod) + } else if pod.Spec.NodeName != "" { + // should never happen if enqueuePods is filtering properly + log.Warningf("yield popped an already-scheduled pod, skipping: %+v", pod) + } else { + return pod + } + } +} + +type errorHandler struct { + api schedulerInterface + backoff *backoff.Backoff + qr *queuer +} + +// implementation of scheduling plugin's Error func; see plugin/pkg/scheduler +func (k *errorHandler) handleSchedulingError(pod *api.Pod, schedulingErr error) { + + if schedulingErr == noSuchPodErr { + log.V(2).Infof("Not rescheduling non-existent pod %v", pod.Name) + return + } + + log.Infof("Error scheduling %v: %v; retrying", pod.Name, schedulingErr) + defer util.HandleCrash() + + // default upstream scheduler passes pod.Name as binding.PodID + ctx := api.WithNamespace(api.NewDefaultContext(), pod.Namespace) + podKey, err := podtask.MakePodKey(ctx, pod.Name) + if err != nil { + log.Errorf("Failed to construct pod key, aborting scheduling for pod %v: %v", pod.Name, err) + return + } + + k.backoff.GC() + k.api.Lock() + defer k.api.Unlock() + + switch task, state := k.api.tasks().ForPod(podKey); state { + case podtask.StateUnknown: + // if we don't have a mapping here any more then someone deleted the pod + log.V(2).Infof("Could not resolve pod to task, aborting pod reschdule: %s", podKey) + return + + case podtask.StatePending: + if task.Has(podtask.Launched) { + log.V(2).Infof("Skipping re-scheduling for already-launched pod %v", podKey) + return + } + breakoutEarly := queue.BreakChan(nil) + if schedulingErr == noSuitableOffersErr { + log.V(3).Infof("adding backoff breakout handler for pod %v", podKey) + breakoutEarly = queue.BreakChan(k.api.offers().Listen(podKey, func(offer *mesos.Offer) bool { + k.api.Lock() + defer k.api.Unlock() + switch task, state := k.api.tasks().Get(task.ID); state { + case podtask.StatePending: + return !task.Has(podtask.Launched) && task.AcceptOffer(offer) + default: + // no point in continuing to check for matching offers + return true + } + })) + } + delay := k.backoff.Get(podKey) + log.V(3).Infof("requeuing pod %v with delay %v", podKey, delay) + k.qr.requeue(&Pod{Pod: pod, delay: &delay, notify: breakoutEarly}) + + default: + log.V(2).Infof("Task is no longer pending, aborting reschedule for pod %v", podKey) + } +} + +type deleter struct { + api schedulerInterface + qr *queuer +} + +// currently monitors for "pod deleted" events, upon which handle() +// is invoked. +func (k *deleter) Run(updates <-chan queue.Entry, done <-chan struct{}) { + go runtime.Until(func() { + for { + entry := <-updates + pod := entry.Value().(*Pod) + if entry.Is(queue.DELETE_EVENT) { + if err := k.deleteOne(pod); err != nil { + log.Error(err) + } + } else if !entry.Is(queue.POP_EVENT) { + k.qr.updatesAvailable() + } + } + }, 1*time.Second, done) +} + +func (k *deleter) deleteOne(pod *Pod) error { + ctx := api.WithNamespace(api.NewDefaultContext(), pod.Namespace) + podKey, err := podtask.MakePodKey(ctx, pod.Name) + if err != nil { + return err + } + + log.V(2).Infof("pod deleted: %v", podKey) + + // order is important here: we want to make sure we have the lock before + // removing the pod from the scheduling queue. this makes the concurrent + // execution of scheduler-error-handling and delete-handling easier to + // reason about. + k.api.Lock() + defer k.api.Unlock() + + // prevent the scheduler from attempting to pop this; it's also possible that + // it's concurrently being scheduled (somewhere between pod scheduling and + // binding) - if so, then we'll end up removing it from taskRegistry which + // will abort Bind()ing + k.qr.dequeue(pod.GetUID()) + + switch task, state := k.api.tasks().ForPod(podKey); state { + case podtask.StateUnknown: + log.V(2).Infof("Could not resolve pod '%s' to task id", podKey) + return noSuchPodErr + + // determine if the task has already been launched to mesos, if not then + // cleanup is easier (unregister) since there's no state to sync + case podtask.StatePending: + if !task.Has(podtask.Launched) { + // we've been invoked in between Schedule() and Bind() + if task.HasAcceptedOffer() { + task.Offer.Release() + task.Reset() + task.Set(podtask.Deleted) + //TODO(jdef) probably want better handling here + if err := k.api.tasks().Update(task); err != nil { + return err + } + } + k.api.tasks().Unregister(task) + return nil + } + fallthrough + + case podtask.StateRunning: + // signal to watchers that the related pod is going down + task.Set(podtask.Deleted) + if err := k.api.tasks().Update(task); err != nil { + log.Errorf("failed to update task w/ Deleted status: %v", err) + } + return k.api.killTask(task.ID) + + default: + log.Infof("cannot kill pod '%s': non-terminal task not found %v", podKey, task.ID) + return noSuchTaskErr + } +} + +// Create creates a scheduler plugin and all supporting background functions. +func (k *KubernetesScheduler) NewDefaultPluginConfig(terminate <-chan struct{}, mux *http.ServeMux) *PluginConfig { + // use ListWatch watching pods using the client by default + return k.NewPluginConfig(terminate, mux, createAllPodsLW(k.client)) +} + +func (k *KubernetesScheduler) NewPluginConfig(terminate <-chan struct{}, mux *http.ServeMux, + podsWatcher *cache.ListWatch) *PluginConfig { + + // Watch and queue pods that need scheduling. + updates := make(chan queue.Entry, k.schedcfg.UpdatesBacklog) + podUpdates := &podStoreAdapter{queue.NewHistorical(updates)} + reflector := cache.NewReflector(podsWatcher, &api.Pod{}, podUpdates, 0) + + // lock that guards critial sections that involve transferring pods from + // the store (cache) to the scheduling queue; its purpose is to maintain + // an ordering (vs interleaving) of operations that's easier to reason about. + kapi := &k8smScheduler{internal: k} + q := newQueuer(podUpdates) + podDeleter := &deleter{ + api: kapi, + qr: q, + } + eh := &errorHandler{ + api: kapi, + backoff: backoff.New(k.schedcfg.InitialPodBackoff.Duration, k.schedcfg.MaxPodBackoff.Duration), + qr: q, + } + startLatch := make(chan struct{}) + eventBroadcaster := record.NewBroadcaster() + runtime.On(startLatch, func() { + eventBroadcaster.StartRecordingToSink(k.client.Events("")) + reflector.Run() // TODO(jdef) should listen for termination + podDeleter.Run(updates, terminate) + q.Run(terminate) + + q.installDebugHandlers(mux) + podtask.InstallDebugHandlers(k.taskRegistry, mux) + }) + return &PluginConfig{ + Config: &plugin.Config{ + MinionLister: nil, + Algorithm: &kubeScheduler{ + api: kapi, + podUpdates: podUpdates, + }, + Binder: &binder{api: kapi}, + NextPod: q.yield, + Error: eh.handleSchedulingError, + Recorder: eventBroadcaster.NewRecorder(api.EventSource{Component: "scheduler"}), + }, + api: kapi, + client: k.client, + qr: q, + deleter: podDeleter, + starting: startLatch, + } +} + +type PluginConfig struct { + *plugin.Config + api schedulerInterface + client *client.Client + qr *queuer + deleter *deleter + starting chan struct{} // startup latch +} + +func NewPlugin(c *PluginConfig) PluginInterface { + return &schedulingPlugin{ + config: c.Config, + api: c.api, + client: c.client, + qr: c.qr, + deleter: c.deleter, + starting: c.starting, + } +} + +type schedulingPlugin struct { + config *plugin.Config + api schedulerInterface + client *client.Client + qr *queuer + deleter *deleter + starting chan struct{} +} + +func (s *schedulingPlugin) Run(done <-chan struct{}) { + defer close(s.starting) + go runtime.Until(s.scheduleOne, pluginRecoveryDelay, done) +} + +// hacked from GoogleCloudPlatform/kubernetes/plugin/pkg/scheduler/scheduler.go, +// with the Modeler stuff removed since we don't use it because we have mesos. +func (s *schedulingPlugin) scheduleOne() { + pod := s.config.NextPod() + log.V(3).Infof("Attempting to schedule: %v", pod) + dest, err := s.config.Algorithm.Schedule(pod, s.config.MinionLister) // call kubeScheduler.Schedule + if err != nil { + log.V(1).Infof("Failed to schedule: %v", pod) + s.config.Recorder.Eventf(pod, "failedScheduling", "Error scheduling: %v", err) + s.config.Error(pod, err) + return + } + b := &api.Binding{ + ObjectMeta: api.ObjectMeta{Namespace: pod.Namespace, Name: pod.Name}, + Target: api.ObjectReference{ + Kind: "Node", + Name: dest, + }, + } + if err := s.config.Binder.Bind(b); err != nil { + log.V(1).Infof("Failed to bind pod: %v", err) + s.config.Recorder.Eventf(pod, "failedScheduling", "Binding rejected: %v", err) + s.config.Error(pod, err) + return + } + s.config.Recorder.Eventf(pod, "scheduled", "Successfully assigned %v to %v", pod.Name, dest) +} + +// this pod may be out of sync with respect to the API server registry: +// this pod | apiserver registry +// -------------|---------------------- +// host=.* | 404 ; pod was deleted +// host=.* | 5xx ; failed to sync, try again later? +// host="" | host="" ; perhaps no updates to process? +// host="" | host="..." ; pod has been scheduled and assigned, is there a task assigned? (check TaskIdKey in binding?) +// host="..." | host="" ; pod is no longer scheduled, does it need to be re-queued? +// host="..." | host="..." ; perhaps no updates to process? +// +// TODO(jdef) this needs an integration test +func (s *schedulingPlugin) reconcilePod(oldPod api.Pod) { + log.V(1).Infof("reconcile pod %v", oldPod.Name) + ctx := api.WithNamespace(api.NewDefaultContext(), oldPod.Namespace) + pod, err := s.client.Pods(api.NamespaceValue(ctx)).Get(oldPod.Name) + if err != nil { + if errors.IsNotFound(err) { + // attempt to delete + if err = s.deleter.deleteOne(&Pod{Pod: &oldPod}); err != nil && err != noSuchPodErr && err != noSuchTaskErr { + log.Errorf("failed to delete pod: %v: %v", oldPod.Name, err) + } + } else { + //TODO(jdef) other errors should probably trigger a retry (w/ backoff). + //For now, drop the pod on the floor + log.Warning("aborting reconciliation for pod %v: %v", oldPod.Name, err) + } + return + } + if oldPod.Spec.NodeName != pod.Spec.NodeName { + if pod.Spec.NodeName == "" { + // pod is unscheduled. + // it's possible that we dropped the pod in the scheduler error handler + // because of task misalignment with the pod (task.Has(podtask.Launched) == true) + + podKey, err := podtask.MakePodKey(ctx, pod.Name) + if err != nil { + log.Error(err) + return + } + + s.api.Lock() + defer s.api.Unlock() + + if _, state := s.api.tasks().ForPod(podKey); state != podtask.StateUnknown { + //TODO(jdef) reconcile the task + log.Errorf("task already registered for pod %v", pod.Name) + return + } + + now := time.Now() + log.V(3).Infof("reoffering pod %v", podKey) + s.qr.reoffer(&Pod{ + Pod: pod, + deadline: &now, + }) + } else { + // pod is scheduled. + // not sure how this happened behind our backs. attempt to reconstruct + // at least a partial podtask.T record. + //TODO(jdef) reconcile the task + log.Errorf("pod already scheduled: %v", pod.Name) + } + } else { + //TODO(jdef) for now, ignore the fact that the rest of the spec may be different + //and assume that our knowledge of the pod aligns with that of the apiserver + log.Error("pod reconciliation does not support updates; not yet implemented") + } +} + +func parseSelectorOrDie(s string) fields.Selector { + selector, err := fields.ParseSelector(s) + if err != nil { + panic(err) + } + return selector +} + +// createAllPodsLW returns a listWatch that finds all pods +func createAllPodsLW(cl *client.Client) *cache.ListWatch { + return cache.NewListWatchFromClient(cl, "pods", api.NamespaceAll, parseSelectorOrDie("")) +} + +// Consumes *api.Pod, produces *Pod; the k8s reflector wants to push *api.Pod +// objects at us, but we want to store more flexible (Pod) type defined in +// this package. The adapter implementation facilitates this. It's a little +// hackish since the object type going in is different than the object type +// coming out -- you've been warned. +type podStoreAdapter struct { + queue.FIFO +} + +func (psa *podStoreAdapter) Add(obj interface{}) error { + pod := obj.(*api.Pod) + return psa.FIFO.Add(&Pod{Pod: pod}) +} + +func (psa *podStoreAdapter) Update(obj interface{}) error { + pod := obj.(*api.Pod) + return psa.FIFO.Update(&Pod{Pod: pod}) +} + +func (psa *podStoreAdapter) Delete(obj interface{}) error { + pod := obj.(*api.Pod) + return psa.FIFO.Delete(&Pod{Pod: pod}) +} + +func (psa *podStoreAdapter) Get(obj interface{}) (interface{}, bool, error) { + pod := obj.(*api.Pod) + return psa.FIFO.Get(&Pod{Pod: pod}) +} + +// Replace will delete the contents of the store, using instead the +// given map. This store implementation does NOT take ownership of the map. +func (psa *podStoreAdapter) Replace(objs []interface{}) error { + newobjs := make([]interface{}, len(objs)) + for i, v := range objs { + pod := v.(*api.Pod) + newobjs[i] = &Pod{Pod: pod} + } + return psa.FIFO.Replace(newobjs) +} diff --git a/contrib/mesos/pkg/scheduler/plugin_test.go b/contrib/mesos/pkg/scheduler/plugin_test.go new file mode 100644 index 00000000000..637086b2bd9 --- /dev/null +++ b/contrib/mesos/pkg/scheduler/plugin_test.go @@ -0,0 +1,700 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package scheduler + +import ( + "fmt" + "net/http" + "net/http/httptest" + "sync" + "testing" + "time" + + "github.com/GoogleCloudPlatform/kubernetes/pkg/api" + "github.com/GoogleCloudPlatform/kubernetes/pkg/api/testapi" + "github.com/GoogleCloudPlatform/kubernetes/pkg/client" + "github.com/GoogleCloudPlatform/kubernetes/pkg/client/cache" + "github.com/GoogleCloudPlatform/kubernetes/pkg/runtime" + kutil "github.com/GoogleCloudPlatform/kubernetes/pkg/util" + "github.com/GoogleCloudPlatform/kubernetes/pkg/watch" + + assertext "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/assert" + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/executor/messages" + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/queue" + schedcfg "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/config" + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/ha" + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/podtask" + log "github.com/golang/glog" + mesos "github.com/mesos/mesos-go/mesosproto" + util "github.com/mesos/mesos-go/mesosutil" + bindings "github.com/mesos/mesos-go/scheduler" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" +) + +// A apiserver mock which partially mocks the pods API +type TestServer struct { + server *httptest.Server + stats map[string]uint + lock sync.Mutex +} + +func NewTestServer(t *testing.T, namespace string, mockPodListWatch *MockPodsListWatch) *TestServer { + ts := TestServer{ + stats: map[string]uint{}, + } + mux := http.NewServeMux() + + mux.HandleFunc(testapi.ResourcePath("pods", namespace, ""), func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + pods := mockPodListWatch.Pods() + w.Write([]byte(runtime.EncodeOrDie(testapi.Codec(), &pods))) + }) + + podsPrefix := testapi.ResourcePath("pods", namespace, "") + "/" + mux.HandleFunc(podsPrefix, func(w http.ResponseWriter, r *http.Request) { + name := r.URL.Path[len(podsPrefix):] + + // update statistics for this pod + ts.lock.Lock() + defer ts.lock.Unlock() + ts.stats[name] = ts.stats[name] + 1 + + p := mockPodListWatch.GetPod(name) + if p != nil { + w.WriteHeader(http.StatusOK) + w.Write([]byte(runtime.EncodeOrDie(testapi.Codec(), p))) + return + } + w.WriteHeader(http.StatusNotFound) + }) + + mux.HandleFunc(testapi.ResourcePath("events", namespace, ""), func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + }) + + mux.HandleFunc("/", func(res http.ResponseWriter, req *http.Request) { + t.Errorf("unexpected request: %v", req.RequestURI) + res.WriteHeader(http.StatusNotFound) + }) + + ts.server = httptest.NewServer(mux) + return &ts +} +func (ts *TestServer) Stats(name string) uint { + ts.lock.Lock() + defer ts.lock.Unlock() + + return ts.stats[name] +} + +// Create mock of pods ListWatch, usually listening on the apiserver pods watch endpoint +type MockPodsListWatch struct { + ListWatch cache.ListWatch + fakeWatcher *watch.FakeWatcher + list api.PodList + lock sync.Mutex +} + +func NewMockPodsListWatch(initialPodList api.PodList) *MockPodsListWatch { + lw := MockPodsListWatch{ + fakeWatcher: watch.NewFake(), + list: initialPodList, + } + lw.ListWatch = cache.ListWatch{ + WatchFunc: func(resourceVersion string) (watch.Interface, error) { + return lw.fakeWatcher, nil + }, + ListFunc: func() (runtime.Object, error) { + return &lw.list, nil + }, + } + return &lw +} +func (lw *MockPodsListWatch) Pods() api.PodList { + lw.lock.Lock() + defer lw.lock.Unlock() + + return lw.list +} +func (lw *MockPodsListWatch) GetPod(name string) *api.Pod { + lw.lock.Lock() + defer lw.lock.Unlock() + + for _, p := range lw.list.Items { + if p.Name == name { + return &p + } + } + + return nil +} +func (lw *MockPodsListWatch) Add(pod *api.Pod, notify bool) { + lw.lock.Lock() + defer lw.lock.Unlock() + + lw.list.Items = append(lw.list.Items, *pod) + if notify { + lw.fakeWatcher.Add(pod) + } +} +func (lw *MockPodsListWatch) Modify(pod *api.Pod, notify bool) { + lw.lock.Lock() + defer lw.lock.Unlock() + + for i, otherPod := range lw.list.Items { + if otherPod.Name == pod.Name { + lw.list.Items[i] = *pod + if notify { + lw.fakeWatcher.Modify(pod) + } + return + } + } + log.Fatalf("Cannot find pod %v to modify in MockPodsListWatch", pod.Name) +} +func (lw *MockPodsListWatch) Delete(pod *api.Pod, notify bool) { + lw.lock.Lock() + defer lw.lock.Unlock() + + for i, otherPod := range lw.list.Items { + if otherPod.Name == pod.Name { + lw.list.Items = append(lw.list.Items[:i], lw.list.Items[i+1:]...) + if notify { + lw.fakeWatcher.Delete(&otherPod) + } + return + } + } + log.Fatalf("Cannot find pod %v to delete in MockPodsListWatch", pod.Name) +} + +// Create a pod with a given index, requiring one port +func NewTestPod(i int) *api.Pod { + name := fmt.Sprintf("pod%d", i) + return &api.Pod{ + TypeMeta: api.TypeMeta{APIVersion: testapi.Version()}, + ObjectMeta: api.ObjectMeta{ + Name: name, + Namespace: "default", + SelfLink: fmt.Sprintf("http://1.2.3.4/api/v1beta1/pods/%s", name), + }, + Spec: api.PodSpec{ + Containers: []api.Container{ + { + Ports: []api.ContainerPort{ + { + ContainerPort: 8000 + i, + Protocol: api.ProtocolTCP, + }, + }, + }, + }, + }, + Status: api.PodStatus{ + PodIP: fmt.Sprintf("1.2.3.%d", 4+i), + Conditions: []api.PodCondition{ + { + Type: api.PodReady, + Status: api.ConditionTrue, + }, + }, + }, + } +} + +// Offering some cpus and memory and the 8000-9000 port range +func NewTestOffer(i int) *mesos.Offer { + hostname := fmt.Sprintf("h%d", i) + cpus := util.NewScalarResource("cpus", 3.75) + mem := util.NewScalarResource("mem", 940) + var port8000 uint64 = 8000 + var port9000 uint64 = 9000 + ports8000to9000 := mesos.Value_Range{Begin: &port8000, End: &port9000} + ports := util.NewRangesResource("ports", []*mesos.Value_Range{&ports8000to9000}) + return &mesos.Offer{ + Id: util.NewOfferID(fmt.Sprintf("offer%d", i)), + Hostname: &hostname, + SlaveId: util.NewSlaveID(hostname), + Resources: []*mesos.Resource{cpus, mem, ports}, + } +} + +// Add assertions to reason about event streams +type Event struct { + Object runtime.Object + Reason string + Message string +} + +type EventPredicate func(e Event) bool + +type EventAssertions struct { + assert.Assertions +} + +// EventObserver implements record.EventRecorder for the purposes of validation via EventAssertions. +type EventObserver struct { + fifo chan Event +} + +func NewEventObserver() *EventObserver { + return &EventObserver{ + fifo: make(chan Event, 1000), + } +} +func (o *EventObserver) Event(object runtime.Object, reason, message string) { + o.fifo <- Event{Object: object, Reason: reason, Message: message} +} +func (o *EventObserver) Eventf(object runtime.Object, reason, messageFmt string, args ...interface{}) { + o.fifo <- Event{Object: object, Reason: reason, Message: fmt.Sprintf(messageFmt, args...)} +} +func (o *EventObserver) PastEventf(object runtime.Object, timestamp kutil.Time, reason, messageFmt string, args ...interface{}) { + o.fifo <- Event{Object: object, Reason: reason, Message: fmt.Sprintf(messageFmt, args...)} +} + +func (a *EventAssertions) Event(observer *EventObserver, pred EventPredicate, msgAndArgs ...interface{}) bool { + // parse msgAndArgs: first possibly a duration, otherwise a format string with further args + timeout := time.Second * 2 + msg := "event not received" + msgArgStart := 0 + if len(msgAndArgs) > 0 { + switch msgAndArgs[0].(type) { + case time.Duration: + timeout = msgAndArgs[0].(time.Duration) + msgArgStart += 1 + } + } + if len(msgAndArgs) > msgArgStart { + msg = fmt.Sprintf(msgAndArgs[msgArgStart].(string), msgAndArgs[msgArgStart+1:]...) + } + + // watch events + result := make(chan bool) + stop := make(chan struct{}) + go func() { + for { + select { + case e, ok := <-observer.fifo: + if !ok { + result <- false + return + } else if pred(e) { + log.V(3).Infof("found asserted event for reason '%v': %v", e.Reason, e.Message) + result <- true + return + } else { + log.V(5).Infof("ignoring not-asserted event for reason '%v': %v", e.Reason, e.Message) + } + case _, ok := <-stop: + if !ok { + return + } + } + } + }() + defer close(stop) + + // wait for watch to match or timeout + select { + case matched := <-result: + return matched + case <-time.After(timeout): + return a.Fail(msg) + } +} +func (a *EventAssertions) EventWithReason(observer *EventObserver, reason string, msgAndArgs ...interface{}) bool { + return a.Event(observer, func(e Event) bool { + return e.Reason == reason + }, msgAndArgs...) +} + +type joinableDriver struct { + MockSchedulerDriver + joinFunc func() (mesos.Status, error) +} + +// Join invokes joinFunc if it has been set, otherwise blocks forever +func (m *joinableDriver) Join() (mesos.Status, error) { + if m.joinFunc != nil { + return m.joinFunc() + } + select {} +} + +// Create mesos.TaskStatus for a given task +func newTaskStatusForTask(task *mesos.TaskInfo, state mesos.TaskState) *mesos.TaskStatus { + healthy := state == mesos.TaskState_TASK_RUNNING + ts := float64(time.Now().Nanosecond()) / 1000000000.0 + source := mesos.TaskStatus_SOURCE_EXECUTOR + return &mesos.TaskStatus{ + TaskId: task.TaskId, + State: &state, + SlaveId: task.SlaveId, + ExecutorId: task.Executor.ExecutorId, + Timestamp: &ts, + Healthy: &healthy, + Source: &source, + Data: task.Data, + } +} + +// Test to create the scheduler plugin with an empty plugin config +func TestPlugin_New(t *testing.T) { + assert := assert.New(t) + + c := PluginConfig{} + p := NewPlugin(&c) + assert.NotNil(p) +} + +// Test to create the scheduler plugin with the config returned by the scheduler, +// and play through the whole life cycle of the plugin while creating pods, deleting +// and failing them. +func TestPlugin_LifeCycle(t *testing.T) { + assert := &EventAssertions{*assert.New(t)} + + // create a fake pod watch. We use that below to submit new pods to the scheduler + podListWatch := NewMockPodsListWatch(api.PodList{}) + + // create fake apiserver + testApiServer := NewTestServer(t, api.NamespaceDefault, podListWatch) + defer testApiServer.server.Close() + + // create scheduler + testScheduler := New(Config{ + Executor: util.NewExecutorInfo( + util.NewExecutorID("executor-id"), + util.NewCommandInfo("executor-cmd"), + ), + Client: client.NewOrDie(&client.Config{Host: testApiServer.server.URL, Version: testapi.Version()}), + ScheduleFunc: FCFSScheduleFunc, + Schedcfg: *schedcfg.CreateDefaultConfig(), + }) + + assert.NotNil(testScheduler.client, "client is nil") + assert.NotNil(testScheduler.executor, "executor is nil") + assert.NotNil(testScheduler.offers, "offer registry is nil") + + // create scheduler process + schedulerProcess := ha.New(testScheduler) + + // get plugin config from it + c := testScheduler.NewPluginConfig(schedulerProcess.Terminal(), http.DefaultServeMux, &podListWatch.ListWatch) + assert.NotNil(c) + + // make events observable + eventObserver := NewEventObserver() + c.Recorder = eventObserver + + // create plugin + p := NewPlugin(c) + assert.NotNil(p) + + // run plugin + p.Run(schedulerProcess.Terminal()) + defer schedulerProcess.End() + + // init scheduler + err := testScheduler.Init(schedulerProcess.Master(), p, http.DefaultServeMux) + assert.NoError(err) + + // create mock mesos scheduler driver + mockDriver := &joinableDriver{} + mockDriver.On("Start").Return(mesos.Status_DRIVER_RUNNING, nil).Once() + started := mockDriver.Upon() + + mAny := mock.AnythingOfType + mockDriver.On("ReconcileTasks", mAny("[]*mesosproto.TaskStatus")).Return(mesos.Status_DRIVER_RUNNING, nil) + mockDriver.On("SendFrameworkMessage", mAny("*mesosproto.ExecutorID"), mAny("*mesosproto.SlaveID"), mAny("string")). + Return(mesos.Status_DRIVER_RUNNING, nil) + + launchedTasks := make(chan *mesos.TaskInfo, 1) + launchTasksCalledFunc := func(args mock.Arguments) { + taskInfos := args.Get(1).([]*mesos.TaskInfo) + assert.Equal(1, len(taskInfos)) + launchedTasks <- taskInfos[0] + } + mockDriver.On("LaunchTasks", mAny("[]*mesosproto.OfferID"), mAny("[]*mesosproto.TaskInfo"), mAny("*mesosproto.Filters")). + Return(mesos.Status_DRIVER_RUNNING, nil).Run(launchTasksCalledFunc) + + // elect master with mock driver + driverFactory := ha.DriverFactory(func() (bindings.SchedulerDriver, error) { + return mockDriver, nil + }) + schedulerProcess.Elect(driverFactory) + elected := schedulerProcess.Elected() + + // driver will be started + <-started + + // tell scheduler to be registered + testScheduler.Registered( + mockDriver, + util.NewFrameworkID("kubernetes-id"), + util.NewMasterInfo("master-id", (192<<24)+(168<<16)+(0<<8)+1, 5050), + ) + + // wait for being elected + <-elected + + //TODO(jdef) refactor things above here into a test suite setup of some sort + + // fake new, unscheduled pod + pod1 := NewTestPod(1) + podListWatch.Add(pod1, true) // notify watchers + + // wait for failedScheduling event because there is no offer + assert.EventWithReason(eventObserver, "failedScheduling", "failedScheduling event not received") + + // add some matching offer + offers1 := []*mesos.Offer{NewTestOffer(1)} + testScheduler.ResourceOffers(nil, offers1) + + // and wait for scheduled pod + assert.EventWithReason(eventObserver, "scheduled") + select { + case launchedTask := <-launchedTasks: + // report back that the task has been staged, and then started by mesos + testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask, mesos.TaskState_TASK_STAGING)) + testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask, mesos.TaskState_TASK_RUNNING)) + + // report back that the task has been lost + mockDriver.AssertNumberOfCalls(t, "SendFrameworkMessage", 0) + testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask, mesos.TaskState_TASK_LOST)) + + // and wait that framework message is sent to executor + mockDriver.AssertNumberOfCalls(t, "SendFrameworkMessage", 1) + + case <-time.After(5 * time.Second): + t.Fatalf("timed out waiting for launchTasks call") + } + + // start another pod + podNum := 1 + startPod := func(offers []*mesos.Offer) (*api.Pod, *mesos.TaskInfo) { + podNum = podNum + 1 + + // create pod and matching offer + pod := NewTestPod(podNum) + podListWatch.Add(pod, true) // notify watchers + testScheduler.ResourceOffers(mockDriver, offers) + assert.EventWithReason(eventObserver, "scheduled") + + // wait for driver.launchTasks call + select { + case launchedTask := <-launchedTasks: + testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask, mesos.TaskState_TASK_STAGING)) + testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask, mesos.TaskState_TASK_RUNNING)) + return pod, launchedTask + + case <-time.After(5 * time.Second): + t.Fatal("timed out waiting for launchTasks") + return nil, nil + } + } + + pod, launchedTask := startPod(offers1) + + // mock drvier.KillTask, should be invoked when a pod is deleted + mockDriver.On("KillTask", mAny("*mesosproto.TaskID")).Return(mesos.Status_DRIVER_RUNNING, nil).Run(func(args mock.Arguments) { + killedTaskId := *(args.Get(0).(*mesos.TaskID)) + assert.Equal(*launchedTask.TaskId, killedTaskId, "expected same TaskID as during launch") + }) + killTaskCalled := mockDriver.Upon() + + // stop it again via the apiserver mock + podListWatch.Delete(pod, true) // notify watchers + + // and wait for the driver killTask call with the correct TaskId + select { + case <-killTaskCalled: + // report back that the task is finished + testScheduler.StatusUpdate(mockDriver, newTaskStatusForTask(launchedTask, mesos.TaskState_TASK_FINISHED)) + + case <-time.After(5 * time.Second): + t.Fatal("timed out waiting for KillTask") + } + + // start pods: + // - which are failing while binding, + // - leading to reconciliation + // - with different states on the apiserver + + failPodFromExecutor := func(task *mesos.TaskInfo) { + beforePodLookups := testApiServer.Stats(pod.Name) + status := newTaskStatusForTask(task, mesos.TaskState_TASK_FAILED) + message := messages.CreateBindingFailure + status.Message = &message + testScheduler.StatusUpdate(mockDriver, status) + + // wait until pod is looked up at the apiserver + assertext.EventuallyTrue(t, time.Second, func() bool { + return testApiServer.Stats(pod.Name) == beforePodLookups+1 + }, "expect that reconcilePod will access apiserver for pod %v", pod.Name) + } + + // 1. with pod deleted from the apiserver + pod, launchedTask = startPod(offers1) + podListWatch.Delete(pod, false) // not notifying the watchers + failPodFromExecutor(launchedTask) + + // 2. with pod still on the apiserver, not bound + pod, launchedTask = startPod(offers1) + failPodFromExecutor(launchedTask) + + // 3. with pod still on the apiserver, bound i.e. host!="" + pod, launchedTask = startPod(offers1) + pod.Spec.NodeName = *offers1[0].Hostname + podListWatch.Modify(pod, false) // not notifying the watchers + failPodFromExecutor(launchedTask) + + // 4. with pod still on the apiserver, bound i.e. host!="", notified via ListWatch + pod, launchedTask = startPod(offers1) + pod.Spec.NodeName = *offers1[0].Hostname + podListWatch.Modify(pod, true) // notifying the watchers + time.Sleep(time.Second / 2) + failPodFromExecutor(launchedTask) +} + +func TestDeleteOne_NonexistentPod(t *testing.T) { + assert := assert.New(t) + obj := &MockScheduler{} + reg := podtask.NewInMemoryRegistry() + obj.On("tasks").Return(reg) + + qr := newQueuer(nil) + assert.Equal(0, len(qr.podQueue.List())) + d := &deleter{ + api: obj, + qr: qr, + } + pod := &Pod{Pod: &api.Pod{ + ObjectMeta: api.ObjectMeta{ + Name: "foo", + Namespace: api.NamespaceDefault, + }}} + err := d.deleteOne(pod) + assert.Equal(err, noSuchPodErr) + obj.AssertExpectations(t) +} + +func TestDeleteOne_PendingPod(t *testing.T) { + assert := assert.New(t) + obj := &MockScheduler{} + reg := podtask.NewInMemoryRegistry() + obj.On("tasks").Return(reg) + + pod := &Pod{Pod: &api.Pod{ + ObjectMeta: api.ObjectMeta{ + Name: "foo", + UID: "foo0", + Namespace: api.NamespaceDefault, + }}} + _, err := reg.Register(podtask.New(api.NewDefaultContext(), "bar", *pod.Pod, &mesos.ExecutorInfo{})) + if err != nil { + t.Fatalf("failed to create task: %v", err) + } + + // preconditions + qr := newQueuer(nil) + qr.podQueue.Add(pod, queue.ReplaceExisting) + assert.Equal(1, len(qr.podQueue.List())) + _, found := qr.podQueue.Get("default/foo") + assert.True(found) + + // exec & post conditions + d := &deleter{ + api: obj, + qr: qr, + } + err = d.deleteOne(pod) + assert.Nil(err) + _, found = qr.podQueue.Get("foo0") + assert.False(found) + assert.Equal(0, len(qr.podQueue.List())) + obj.AssertExpectations(t) +} + +func TestDeleteOne_Running(t *testing.T) { + assert := assert.New(t) + obj := &MockScheduler{} + reg := podtask.NewInMemoryRegistry() + obj.On("tasks").Return(reg) + + pod := &Pod{Pod: &api.Pod{ + ObjectMeta: api.ObjectMeta{ + Name: "foo", + UID: "foo0", + Namespace: api.NamespaceDefault, + }}} + task, err := reg.Register(podtask.New(api.NewDefaultContext(), "bar", *pod.Pod, &mesos.ExecutorInfo{})) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + task.Set(podtask.Launched) + err = reg.Update(task) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + // preconditions + qr := newQueuer(nil) + qr.podQueue.Add(pod, queue.ReplaceExisting) + assert.Equal(1, len(qr.podQueue.List())) + _, found := qr.podQueue.Get("default/foo") + assert.True(found) + + obj.On("killTask", task.ID).Return(nil) + + // exec & post conditions + d := &deleter{ + api: obj, + qr: qr, + } + err = d.deleteOne(pod) + assert.Nil(err) + _, found = qr.podQueue.Get("foo0") + assert.False(found) + assert.Equal(0, len(qr.podQueue.List())) + obj.AssertExpectations(t) +} + +func TestDeleteOne_badPodNaming(t *testing.T) { + assert := assert.New(t) + obj := &MockScheduler{} + pod := &Pod{Pod: &api.Pod{}} + d := &deleter{ + api: obj, + qr: newQueuer(nil), + } + + err := d.deleteOne(pod) + assert.NotNil(err) + + pod.Pod.ObjectMeta.Name = "foo" + err = d.deleteOne(pod) + assert.NotNil(err) + + pod.Pod.ObjectMeta.Name = "" + pod.Pod.ObjectMeta.Namespace = "bar" + err = d.deleteOne(pod) + assert.NotNil(err) + + obj.AssertExpectations(t) +} diff --git a/contrib/mesos/pkg/scheduler/pod.go b/contrib/mesos/pkg/scheduler/pod.go new file mode 100644 index 00000000000..4a9a9388b8f --- /dev/null +++ b/contrib/mesos/pkg/scheduler/pod.go @@ -0,0 +1,80 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package scheduler + +import ( + "fmt" + "time" + + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/queue" + "github.com/GoogleCloudPlatform/kubernetes/pkg/api" + "github.com/GoogleCloudPlatform/kubernetes/pkg/client/cache" +) + +// wrapper for the k8s pod type so that we can define additional methods on a "pod" +type Pod struct { + *api.Pod + deadline *time.Time + delay *time.Duration + notify queue.BreakChan +} + +// implements Copyable +func (p *Pod) Copy() queue.Copyable { + if p == nil { + return nil + } + //TODO(jdef) we may need a better "deep-copy" implementation + pod := *(p.Pod) + return &Pod{Pod: &pod} +} + +// implements Unique +func (p *Pod) GetUID() string { + if id, err := cache.MetaNamespaceKeyFunc(p.Pod); err != nil { + panic(fmt.Sprintf("failed to determine pod id for '%+v'", p.Pod)) + } else { + return id + } +} + +// implements Deadlined +func (dp *Pod) Deadline() (time.Time, bool) { + if dp.deadline != nil { + return *(dp.deadline), true + } + return time.Time{}, false +} + +func (dp *Pod) GetDelay() time.Duration { + if dp.delay != nil { + return *(dp.delay) + } + return 0 +} + +func (p *Pod) Breaker() queue.BreakChan { + return p.notify +} + +func (p *Pod) String() string { + displayDeadline := "" + if deadline, ok := p.Deadline(); ok { + displayDeadline = deadline.String() + } + return fmt.Sprintf("{pod:%v, deadline:%v, delay:%v}", p.Pod.Name, displayDeadline, p.GetDelay()) +} diff --git a/contrib/mesos/pkg/scheduler/podtask/debug.go b/contrib/mesos/pkg/scheduler/podtask/debug.go new file mode 100644 index 00000000000..72d1a6b788d --- /dev/null +++ b/contrib/mesos/pkg/scheduler/podtask/debug.go @@ -0,0 +1,54 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package podtask + +import ( + "fmt" + "io" + "net/http" + + log "github.com/golang/glog" +) + +//TODO(jdef) we use a Locker to guard against concurrent task state changes, but it would be +//really, really nice to avoid doing this. Maybe someday the registry won't return data ptrs +//but plain structs instead. +func InstallDebugHandlers(reg Registry, mux *http.ServeMux) { + mux.HandleFunc("/debug/registry/tasks", func(w http.ResponseWriter, r *http.Request) { + //TODO(jdef) support filtering tasks based on status + alltasks := reg.List(nil) + io.WriteString(w, fmt.Sprintf("task_count=%d\n", len(alltasks))) + for _, task := range alltasks { + if err := func() (err error) { + podName := task.Pod.Name + podNamespace := task.Pod.Namespace + offerId := "" + if task.Offer != nil { + offerId = task.Offer.Id() + } + _, err = io.WriteString(w, fmt.Sprintf("%v\t%v/%v\t%v\t%v\n", task.ID, podNamespace, podName, task.State, offerId)) + return + }(); err != nil { + log.Warningf("aborting debug handler: %v", err) + break // stop listing on I/O errors + } + } + if flusher, ok := w.(http.Flusher); ok { + flusher.Flush() + } + }) +} diff --git a/contrib/mesos/pkg/scheduler/podtask/doc.go b/contrib/mesos/pkg/scheduler/podtask/doc.go new file mode 100644 index 00000000000..7c36ae5116b --- /dev/null +++ b/contrib/mesos/pkg/scheduler/podtask/doc.go @@ -0,0 +1,18 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package podtask maps Kubernetes pods to Mesos tasks. +package podtask diff --git a/contrib/mesos/pkg/scheduler/podtask/leaky.go b/contrib/mesos/pkg/scheduler/podtask/leaky.go new file mode 100644 index 00000000000..a0a66d7edc5 --- /dev/null +++ b/contrib/mesos/pkg/scheduler/podtask/leaky.go @@ -0,0 +1,29 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package podtask + +// Concepts that have leaked to where they should not have. + +import ( + "github.com/GoogleCloudPlatform/kubernetes/pkg/api" + "github.com/GoogleCloudPlatform/kubernetes/pkg/registry/etcd" +) + +// makePodKey constructs etcd paths to pod items enforcing namespace rules. +func MakePodKey(ctx api.Context, id string) (string, error) { + return etcd.MakeEtcdItemKey(ctx, PodPath, id) +} diff --git a/contrib/mesos/pkg/scheduler/podtask/pod_task.go b/contrib/mesos/pkg/scheduler/podtask/pod_task.go new file mode 100644 index 00000000000..a90aa1d3c00 --- /dev/null +++ b/contrib/mesos/pkg/scheduler/podtask/pod_task.go @@ -0,0 +1,374 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package podtask + +import ( + "fmt" + "strings" + "time" + + "code.google.com/p/go-uuid/uuid" + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/offers" + annotation "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/meta" + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/metrics" + "github.com/GoogleCloudPlatform/kubernetes/pkg/api" + "github.com/gogo/protobuf/proto" + log "github.com/golang/glog" + mesos "github.com/mesos/mesos-go/mesosproto" + mutil "github.com/mesos/mesos-go/mesosutil" +) + +const ( + containerCpus = 0.25 // initial CPU allocated for executor + containerMem = 64 // initial MB of memory allocated for executor +) + +type StateType int + +const ( + StatePending StateType = iota + StateRunning + StateFinished + StateUnknown +) + +type FlagType string + +const ( + Launched = FlagType("launched") + Bound = FlagType("bound") + Deleted = FlagType("deleted") +) + +// A struct that describes a pod task. +type T struct { + ID string + Pod api.Pod + Spec Spec + Offer offers.Perishable // thread-safe + State StateType + Flags map[FlagType]struct{} + CreateTime time.Time + UpdatedTime time.Time // time of the most recent StatusUpdate we've seen from the mesos master + + podStatus api.PodStatus + executor *mesos.ExecutorInfo // readonly + podKey string + launchTime time.Time + bindTime time.Time + mapper HostPortMappingType +} + +type Spec struct { + SlaveID string + CPU float64 + Memory float64 + PortMap []HostPortMapping + Ports []uint64 + Data []byte +} + +// mostly-clone this pod task. the clone will actually share the some fields: +// - executor // OK because it's read only +// - Offer // OK because it's guarantees safe concurrent access +func (t *T) Clone() *T { + if t == nil { + return nil + } + + // shallow-copy + clone := *t + + // deep copy + (&t.Spec).copyTo(&clone.Spec) + clone.Flags = map[FlagType]struct{}{} + for k := range t.Flags { + clone.Flags[k] = struct{}{} + } + return &clone +} + +func (old *Spec) copyTo(new *Spec) { + if len(old.PortMap) > 0 { + new.PortMap = append(([]HostPortMapping)(nil), old.PortMap...) + } + if len(old.Ports) > 0 { + new.Ports = append(([]uint64)(nil), old.Ports...) + } + if len(old.Data) > 0 { + new.Data = append(([]byte)(nil), old.Data...) + } +} + +func (t *T) HasAcceptedOffer() bool { + return t.Spec.SlaveID != "" +} + +func (t *T) GetOfferId() string { + if t.Offer == nil { + return "" + } + return t.Offer.Details().Id.GetValue() +} + +func generateTaskName(pod *api.Pod) string { + ns := pod.Namespace + if ns == "" { + ns = api.NamespaceDefault + } + return fmt.Sprintf("%s.%s.pods", pod.Name, ns) +} + +func (t *T) BuildTaskInfo() *mesos.TaskInfo { + info := &mesos.TaskInfo{ + Name: proto.String(generateTaskName(&t.Pod)), + TaskId: mutil.NewTaskID(t.ID), + SlaveId: mutil.NewSlaveID(t.Spec.SlaveID), + Executor: t.executor, + Data: t.Spec.Data, + Resources: []*mesos.Resource{ + mutil.NewScalarResource("cpus", t.Spec.CPU), + mutil.NewScalarResource("mem", t.Spec.Memory), + }, + } + if portsResource := rangeResource("ports", t.Spec.Ports); portsResource != nil { + info.Resources = append(info.Resources, portsResource) + } + return info +} + +// Fill the Spec in the T, should be called during k8s scheduling, +// before binding. +// TODO(jdef): remove hardcoded values and make use of actual pod resource settings +func (t *T) FillFromDetails(details *mesos.Offer) error { + if details == nil { + //programming error + panic("offer details are nil") + } + + log.V(3).Infof("Recording offer(s) %v against pod %v", details.Id, t.Pod.Name) + + t.Spec = Spec{ + SlaveID: details.GetSlaveId().GetValue(), + CPU: containerCpus, + Memory: containerMem, + } + + if mapping, err := t.mapper.Generate(t, details); err != nil { + t.Reset() + return err + } else { + ports := []uint64{} + for _, entry := range mapping { + ports = append(ports, entry.OfferPort) + } + t.Spec.PortMap = mapping + t.Spec.Ports = ports + } + + // hostname needs of the executor needs to match that of the offer, otherwise + // the kubelet node status checker/updater is very unhappy + const HOSTNAME_OVERRIDE_FLAG = "--hostname-override=" + hostname := details.GetHostname() // required field, non-empty + hostnameOverride := HOSTNAME_OVERRIDE_FLAG + hostname + + argv := t.executor.Command.Arguments + overwrite := false + for i, arg := range argv { + if strings.HasPrefix(arg, HOSTNAME_OVERRIDE_FLAG) { + overwrite = true + argv[i] = hostnameOverride + break + } + } + if !overwrite { + t.executor.Command.Arguments = append(argv, hostnameOverride) + } + return nil +} + +// Clear offer-related details from the task, should be called if/when an offer +// has already been assigned to a task but for some reason is no longer valid. +func (t *T) Reset() { + log.V(3).Infof("Clearing offer(s) from pod %v", t.Pod.Name) + t.Offer = nil + t.Spec = Spec{} +} + +func (t *T) AcceptOffer(offer *mesos.Offer) bool { + if offer == nil { + return false + } + var ( + cpus float64 = 0 + mem float64 = 0 + ) + for _, resource := range offer.Resources { + if resource.GetName() == "cpus" { + cpus = *resource.GetScalar().Value + } + + if resource.GetName() == "mem" { + mem = *resource.GetScalar().Value + } + } + if _, err := t.mapper.Generate(t, offer); err != nil { + log.V(3).Info(err) + return false + } + + // for now hard-coded, constant values are used for cpus and mem. This is necessary + // until parent-cgroup integration is finished for mesos and k8sm. Then the k8sm + // executor can become the parent of pods and subsume their resource usage and + // therefore be compliant with expectations of mesos executors w/ respect to + // resource allocation and management. + // + // TODO(jdef): remove hardcoded values and make use of actual pod resource settings + if (cpus < containerCpus) || (mem < containerMem) { + log.V(3).Infof("not enough resources: cpus: %f mem: %f", cpus, mem) + return false + } + return true +} + +func (t *T) Set(f FlagType) { + t.Flags[f] = struct{}{} + if Launched == f { + t.launchTime = time.Now() + queueWaitTime := t.launchTime.Sub(t.CreateTime) + metrics.QueueWaitTime.Observe(metrics.InMicroseconds(queueWaitTime)) + } +} + +func (t *T) Has(f FlagType) (exists bool) { + _, exists = t.Flags[f] + return +} + +func New(ctx api.Context, id string, pod api.Pod, executor *mesos.ExecutorInfo) (*T, error) { + if executor == nil { + return nil, fmt.Errorf("illegal argument: executor was nil") + } + key, err := MakePodKey(ctx, pod.Name) + if err != nil { + return nil, err + } + if id == "" { + id = "pod." + uuid.NewUUID().String() + } + task := &T{ + ID: id, + Pod: pod, + State: StatePending, + podKey: key, + mapper: MappingTypeForPod(&pod), + Flags: make(map[FlagType]struct{}), + executor: proto.Clone(executor).(*mesos.ExecutorInfo), + } + task.CreateTime = time.Now() + return task, nil +} + +func (t *T) SaveRecoveryInfo(dict map[string]string) { + dict[annotation.TaskIdKey] = t.ID + dict[annotation.SlaveIdKey] = t.Spec.SlaveID + dict[annotation.OfferIdKey] = t.Offer.Details().Id.GetValue() + dict[annotation.ExecutorIdKey] = t.executor.ExecutorId.GetValue() +} + +// reconstruct a task from metadata stashed in a pod entry. there are limited pod states that +// support reconstruction. if we expect to be able to reconstruct state but encounter errors +// in the process then those errors are returned. if the pod is in a seemingly valid state but +// otherwise does not support task reconstruction return false. if we're able to reconstruct +// state then return a reconstructed task and true. +// +// at this time task reconstruction is only supported for pods that have been annotated with +// binding metadata, which implies that they've previously been associated with a task and +// that mesos knows about it. +// +// assumes that the pod data comes from the k8s registry and reflects the desired state. +// +func RecoverFrom(pod api.Pod) (*T, bool, error) { + // we only expect annotations if pod has been bound, which implies that it has already + // been scheduled and launched + if pod.Spec.NodeName == "" && len(pod.Annotations) == 0 { + log.V(1).Infof("skipping recovery for unbound pod %v/%v", pod.Namespace, pod.Name) + return nil, false, nil + } + + // only process pods that are not in a terminal state + switch pod.Status.Phase { + case api.PodPending, api.PodRunning, api.PodUnknown: // continue + default: + log.V(1).Infof("skipping recovery for terminal pod %v/%v", pod.Namespace, pod.Name) + return nil, false, nil + } + + ctx := api.WithNamespace(api.NewDefaultContext(), pod.Namespace) + key, err := MakePodKey(ctx, pod.Name) + if err != nil { + return nil, false, err + } + + //TODO(jdef) recover ports (and other resource requirements?) from the pod spec as well + + now := time.Now() + t := &T{ + Pod: pod, + CreateTime: now, + podKey: key, + State: StatePending, // possibly running? mesos will tell us during reconciliation + Flags: make(map[FlagType]struct{}), + mapper: MappingTypeForPod(&pod), + launchTime: now, + bindTime: now, + } + var ( + offerId string + hostname string + ) + for _, k := range []string{ + annotation.BindingHostKey, + annotation.TaskIdKey, + annotation.SlaveIdKey, + annotation.OfferIdKey, + annotation.ExecutorIdKey, + } { + v, found := pod.Annotations[k] + if !found { + return nil, false, fmt.Errorf("incomplete metadata: missing value for pod annotation: %v", k) + } + switch k { + case annotation.BindingHostKey: + hostname = v + case annotation.SlaveIdKey: + t.Spec.SlaveID = v + case annotation.OfferIdKey: + offerId = v + case annotation.TaskIdKey: + t.ID = v + case annotation.ExecutorIdKey: + // this is nowhere near sufficient to re-launch a task, but we really just + // want this for tracking + t.executor = &mesos.ExecutorInfo{ExecutorId: mutil.NewExecutorID(v)} + } + } + t.Offer = offers.Expired(offerId, hostname, 0) + t.Flags[Launched] = struct{}{} + t.Flags[Bound] = struct{}{} + return t, true, nil +} diff --git a/contrib/mesos/pkg/scheduler/podtask/pod_task_test.go b/contrib/mesos/pkg/scheduler/podtask/pod_task_test.go new file mode 100644 index 00000000000..02506c5df9a --- /dev/null +++ b/contrib/mesos/pkg/scheduler/podtask/pod_task_test.go @@ -0,0 +1,153 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package podtask + +import ( + "testing" + + "github.com/GoogleCloudPlatform/kubernetes/pkg/api" + mesos "github.com/mesos/mesos-go/mesosproto" + mutil "github.com/mesos/mesos-go/mesosutil" +) + +const ( + t_min_cpu = 128 + t_min_mem = 128 +) + +func fakePodTask(id string) (*T, error) { + return New(api.NewDefaultContext(), "", api.Pod{ + ObjectMeta: api.ObjectMeta{ + Name: id, + Namespace: api.NamespaceDefault, + }, + }, &mesos.ExecutorInfo{}) +} + +func TestEmptyOffer(t *testing.T) { + t.Parallel() + task, err := fakePodTask("foo") + if err != nil { + t.Fatal(err) + } + if ok := task.AcceptOffer(nil); ok { + t.Fatalf("accepted nil offer") + } + if ok := task.AcceptOffer(&mesos.Offer{}); ok { + t.Fatalf("accepted empty offer") + } +} + +func TestNoPortsInPodOrOffer(t *testing.T) { + t.Parallel() + task, err := fakePodTask("foo") + if err != nil || task == nil { + t.Fatal(err) + } + + offer := &mesos.Offer{ + Resources: []*mesos.Resource{ + mutil.NewScalarResource("cpus", 0.001), + mutil.NewScalarResource("mem", 0.001), + }, + } + if ok := task.AcceptOffer(offer); ok { + t.Fatalf("accepted offer %v:", offer) + } + + offer = &mesos.Offer{ + Resources: []*mesos.Resource{ + mutil.NewScalarResource("cpus", t_min_cpu), + mutil.NewScalarResource("mem", t_min_mem), + }, + } + if ok := task.AcceptOffer(offer); !ok { + t.Fatalf("did not accepted offer %v:", offer) + } +} + +func TestAcceptOfferPorts(t *testing.T) { + t.Parallel() + task, _ := fakePodTask("foo") + pod := &task.Pod + + offer := &mesos.Offer{ + Resources: []*mesos.Resource{ + mutil.NewScalarResource("cpus", t_min_cpu), + mutil.NewScalarResource("mem", t_min_mem), + rangeResource("ports", []uint64{1, 1}), + }, + } + if ok := task.AcceptOffer(offer); !ok { + t.Fatalf("did not accepted offer %v:", offer) + } + + pod.Spec = api.PodSpec{ + Containers: []api.Container{{ + Ports: []api.ContainerPort{{ + HostPort: 123, + }}, + }}, + } + if ok := task.AcceptOffer(offer); ok { + t.Fatalf("accepted offer %v:", offer) + } + + pod.Spec.Containers[0].Ports[0].HostPort = 1 + if ok := task.AcceptOffer(offer); !ok { + t.Fatalf("did not accepted offer %v:", offer) + } + + pod.Spec.Containers[0].Ports[0].HostPort = 0 + if ok := task.AcceptOffer(offer); !ok { + t.Fatalf("did not accepted offer %v:", offer) + } + + offer.Resources = []*mesos.Resource{ + mutil.NewScalarResource("cpus", t_min_cpu), + mutil.NewScalarResource("mem", t_min_mem), + } + if ok := task.AcceptOffer(offer); ok { + t.Fatalf("accepted offer %v:", offer) + } + + pod.Spec.Containers[0].Ports[0].HostPort = 1 + if ok := task.AcceptOffer(offer); ok { + t.Fatalf("accepted offer %v:", offer) + } +} + +func TestGeneratePodName(t *testing.T) { + p := &api.Pod{ + ObjectMeta: api.ObjectMeta{ + Name: "foo", + Namespace: "bar", + }, + } + name := generateTaskName(p) + expected := "foo.bar.pods" + if name != expected { + t.Fatalf("expected %q instead of %q", expected, name) + } + + p.Namespace = "" + name = generateTaskName(p) + expected = "foo.default.pods" + if name != expected { + t.Fatalf("expected %q instead of %q", expected, name) + } +} diff --git a/contrib/mesos/pkg/scheduler/podtask/port_mapping.go b/contrib/mesos/pkg/scheduler/podtask/port_mapping.go new file mode 100644 index 00000000000..9c90ef15b70 --- /dev/null +++ b/contrib/mesos/pkg/scheduler/podtask/port_mapping.go @@ -0,0 +1,185 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package podtask + +import ( + "fmt" + + "github.com/GoogleCloudPlatform/kubernetes/pkg/api" + "github.com/GoogleCloudPlatform/kubernetes/pkg/labels" + log "github.com/golang/glog" + mesos "github.com/mesos/mesos-go/mesosproto" +) + +type HostPortMappingType string + +const ( + // maps a Container.HostPort to the same exact offered host port, ignores .HostPort = 0 + HostPortMappingFixed HostPortMappingType = "fixed" + // same as HostPortMappingFixed, except that .HostPort of 0 are mapped to any port offered + HostPortMappingWildcard = "wildcard" +) + +type HostPortMapper interface { + // abstracts the way that host ports are mapped to pod container ports + Generate(t *T, offer *mesos.Offer) ([]HostPortMapping, error) +} + +type HostPortMapping struct { + ContainerIdx int // index of the container in the pod spec + PortIdx int // index of the port in a container's port spec + OfferPort uint64 +} + +func (self HostPortMappingType) Generate(t *T, offer *mesos.Offer) ([]HostPortMapping, error) { + switch self { + case HostPortMappingWildcard: + return wildcardHostPortMapping(t, offer) + case HostPortMappingFixed: + default: + log.Warningf("illegal host-port mapping spec %q, defaulting to %q", self, HostPortMappingFixed) + } + return defaultHostPortMapping(t, offer) +} + +type PortAllocationError struct { + PodId string + Ports []uint64 +} + +func (err *PortAllocationError) Error() string { + return fmt.Sprintf("Could not schedule pod %s: %d port(s) could not be allocated", err.PodId, len(err.Ports)) +} + +type DuplicateHostPortError struct { + m1, m2 HostPortMapping +} + +func (err *DuplicateHostPortError) Error() string { + return fmt.Sprintf( + "Host port %d is specified for container %d, pod %d and container %d, pod %d", + err.m1.OfferPort, err.m1.ContainerIdx, err.m1.PortIdx, err.m2.ContainerIdx, err.m2.PortIdx) +} + +// wildcard k8s host port mapping implementation: hostPort == 0 gets mapped to any available offer port +func wildcardHostPortMapping(t *T, offer *mesos.Offer) ([]HostPortMapping, error) { + mapping, err := defaultHostPortMapping(t, offer) + if err != nil { + return nil, err + } + taken := make(map[uint64]struct{}) + for _, entry := range mapping { + taken[entry.OfferPort] = struct{}{} + } + wildports := []HostPortMapping{} + for i, container := range t.Pod.Spec.Containers { + for pi, port := range container.Ports { + if port.HostPort == 0 { + wildports = append(wildports, HostPortMapping{ + ContainerIdx: i, + PortIdx: pi, + }) + } + } + } + remaining := len(wildports) + foreachRange(offer, "ports", func(bp, ep uint64) { + log.V(3).Infof("Searching for wildcard port in range {%d:%d}", bp, ep) + for _, entry := range wildports { + if entry.OfferPort != 0 { + continue + } + for port := bp; port <= ep && remaining > 0; port++ { + if _, inuse := taken[port]; inuse { + continue + } + entry.OfferPort = port + mapping = append(mapping, entry) + remaining-- + taken[port] = struct{}{} + break + } + } + }) + if remaining > 0 { + err := &PortAllocationError{ + PodId: t.Pod.Name, + } + // it doesn't make sense to include a port list here because they were all zero (wildcards) + return nil, err + } + return mapping, nil +} + +// default k8s host port mapping implementation: hostPort == 0 means containerPort remains pod-private, and so +// no offer ports will be mapped to such Container ports. +func defaultHostPortMapping(t *T, offer *mesos.Offer) ([]HostPortMapping, error) { + requiredPorts := make(map[uint64]HostPortMapping) + mapping := []HostPortMapping{} + for i, container := range t.Pod.Spec.Containers { + // strip all port==0 from this array; k8s already knows what to do with zero- + // ports (it does not create 'port bindings' on the minion-host); we need to + // remove the wildcards from this array since they don't consume host resources + for pi, port := range container.Ports { + if port.HostPort == 0 { + continue // ignore + } + m := HostPortMapping{ + ContainerIdx: i, + PortIdx: pi, + OfferPort: uint64(port.HostPort), + } + if entry, inuse := requiredPorts[uint64(port.HostPort)]; inuse { + return nil, &DuplicateHostPortError{entry, m} + } + requiredPorts[uint64(port.HostPort)] = m + } + } + foreachRange(offer, "ports", func(bp, ep uint64) { + for port := range requiredPorts { + log.V(3).Infof("evaluating port range {%d:%d} %d", bp, ep, port) + if (bp <= port) && (port <= ep) { + mapping = append(mapping, requiredPorts[port]) + delete(requiredPorts, port) + } + } + }) + unsatisfiedPorts := len(requiredPorts) + if unsatisfiedPorts > 0 { + err := &PortAllocationError{ + PodId: t.Pod.Name, + } + for p := range requiredPorts { + err.Ports = append(err.Ports, p) + } + return nil, err + } + return mapping, nil +} + +const PortMappingLabelKey = "k8s.mesosphere.io/portMapping" + +func MappingTypeForPod(pod *api.Pod) HostPortMappingType { + filter := map[string]string{ + PortMappingLabelKey: string(HostPortMappingFixed), + } + selector := labels.Set(filter).AsSelector() + if selector.Matches(labels.Set(pod.Labels)) { + return HostPortMappingFixed + } + return HostPortMappingWildcard +} diff --git a/contrib/mesos/pkg/scheduler/podtask/port_mapping_test.go b/contrib/mesos/pkg/scheduler/podtask/port_mapping_test.go new file mode 100644 index 00000000000..d5c911f3bf2 --- /dev/null +++ b/contrib/mesos/pkg/scheduler/podtask/port_mapping_test.go @@ -0,0 +1,205 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package podtask + +import ( + "testing" + + "github.com/GoogleCloudPlatform/kubernetes/pkg/api" + mesos "github.com/mesos/mesos-go/mesosproto" +) + +func TestDefaultHostPortMatching(t *testing.T) { + t.Parallel() + task, _ := fakePodTask("foo") + pod := &task.Pod + + offer := &mesos.Offer{ + Resources: []*mesos.Resource{ + rangeResource("ports", []uint64{1, 1}), + }, + } + mapping, err := defaultHostPortMapping(task, offer) + if err != nil { + t.Fatal(err) + } + if len(mapping) > 0 { + t.Fatalf("Found mappings for a pod without ports: %v", pod) + } + + //-- + pod.Spec = api.PodSpec{ + Containers: []api.Container{{ + Ports: []api.ContainerPort{{ + HostPort: 123, + }, { + HostPort: 123, + }}, + }}, + } + task, err = New(api.NewDefaultContext(), "", *pod, &mesos.ExecutorInfo{}) + if err != nil { + t.Fatal(err) + } + _, err = defaultHostPortMapping(task, offer) + if err, _ := err.(*DuplicateHostPortError); err == nil { + t.Fatal("Expected duplicate port error") + } else if err.m1.OfferPort != 123 { + t.Fatal("Expected duplicate host port 123") + } +} + +func TestWildcardHostPortMatching(t *testing.T) { + t.Parallel() + task, _ := fakePodTask("foo") + pod := &task.Pod + + offer := &mesos.Offer{} + mapping, err := wildcardHostPortMapping(task, offer) + if err != nil { + t.Fatal(err) + } + if len(mapping) > 0 { + t.Fatalf("Found mappings for an empty offer and a pod without ports: %v", pod) + } + + //-- + offer = &mesos.Offer{ + Resources: []*mesos.Resource{ + rangeResource("ports", []uint64{1, 1}), + }, + } + mapping, err = wildcardHostPortMapping(task, offer) + if err != nil { + t.Fatal(err) + } + if len(mapping) > 0 { + t.Fatalf("Found mappings for a pod without ports: %v", pod) + } + + //-- + pod.Spec = api.PodSpec{ + Containers: []api.Container{{ + Ports: []api.ContainerPort{{ + HostPort: 123, + }}, + }}, + } + task, err = New(api.NewDefaultContext(), "", *pod, &mesos.ExecutorInfo{}) + if err != nil { + t.Fatal(err) + } + mapping, err = wildcardHostPortMapping(task, offer) + if err == nil { + t.Fatalf("expected error instead of mappings: %#v", mapping) + } else if err, _ := err.(*PortAllocationError); err == nil { + t.Fatal("Expected port allocation error") + } else if !(len(err.Ports) == 1 && err.Ports[0] == 123) { + t.Fatal("Expected port allocation error for host port 123") + } + + //-- + pod.Spec = api.PodSpec{ + Containers: []api.Container{{ + Ports: []api.ContainerPort{{ + HostPort: 0, + }, { + HostPort: 123, + }}, + }}, + } + task, err = New(api.NewDefaultContext(), "", *pod, &mesos.ExecutorInfo{}) + if err != nil { + t.Fatal(err) + } + mapping, err = wildcardHostPortMapping(task, offer) + if err, _ := err.(*PortAllocationError); err == nil { + t.Fatal("Expected port allocation error") + } else if !(len(err.Ports) == 1 && err.Ports[0] == 123) { + t.Fatal("Expected port allocation error for host port 123") + } + + //-- + pod.Spec = api.PodSpec{ + Containers: []api.Container{{ + Ports: []api.ContainerPort{{ + HostPort: 0, + }, { + HostPort: 1, + }}, + }}, + } + task, err = New(api.NewDefaultContext(), "", *pod, &mesos.ExecutorInfo{}) + if err != nil { + t.Fatal(err) + } + mapping, err = wildcardHostPortMapping(task, offer) + if err, _ := err.(*PortAllocationError); err == nil { + t.Fatal("Expected port allocation error") + } else if len(err.Ports) != 0 { + t.Fatal("Expected port allocation error for wildcard port") + } + + //-- + offer = &mesos.Offer{ + Resources: []*mesos.Resource{ + rangeResource("ports", []uint64{1, 2}), + }, + } + mapping, err = wildcardHostPortMapping(task, offer) + if err != nil { + t.Fatal(err) + } else if len(mapping) != 2 { + t.Fatal("Expected both ports allocated") + } + valid := 0 + for _, entry := range mapping { + if entry.ContainerIdx == 0 && entry.PortIdx == 0 && entry.OfferPort == 2 { + valid++ + } + if entry.ContainerIdx == 0 && entry.PortIdx == 1 && entry.OfferPort == 1 { + valid++ + } + } + if valid < 2 { + t.Fatalf("Expected 2 valid port mappings, not %d", valid) + } +} + +func TestMappingTypeForPod(t *testing.T) { + pod := &api.Pod{ + ObjectMeta: api.ObjectMeta{ + Labels: map[string]string{}, + }, + } + mt := MappingTypeForPod(pod) + if mt != HostPortMappingWildcard { + t.Fatalf("expected wildcard mapping") + } + + pod.Labels[PortMappingLabelKey] = string(HostPortMappingFixed) + mt = MappingTypeForPod(pod) + if mt != HostPortMappingFixed { + t.Fatalf("expected fixed mapping") + } + + pod.Labels[PortMappingLabelKey] = string(HostPortMappingWildcard) + mt = MappingTypeForPod(pod) + if mt != HostPortMappingWildcard { + t.Fatalf("expected wildcard mapping") + } +} diff --git a/contrib/mesos/pkg/scheduler/podtask/protobuf.go b/contrib/mesos/pkg/scheduler/podtask/protobuf.go new file mode 100644 index 00000000000..c8245425549 --- /dev/null +++ b/contrib/mesos/pkg/scheduler/podtask/protobuf.go @@ -0,0 +1,57 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package podtask + +import ( + "github.com/gogo/protobuf/proto" + mesos "github.com/mesos/mesos-go/mesosproto" +) + +// create a range resource for the listed ports +func rangeResource(name string, ports []uint64) *mesos.Resource { + if len(ports) == 0 { + // pod may consist of a container that doesn't expose any ports on the host + return nil + } + return &mesos.Resource{ + Name: proto.String(name), + Type: mesos.Value_RANGES.Enum(), + Ranges: newRanges(ports), + } +} + +// generate port ranges from a list of ports. this implementation is very naive +func newRanges(ports []uint64) *mesos.Value_Ranges { + r := make([]*mesos.Value_Range, 0) + for _, port := range ports { + x := proto.Uint64(port) + r = append(r, &mesos.Value_Range{Begin: x, End: x}) + } + return &mesos.Value_Ranges{Range: r} +} + +func foreachRange(offer *mesos.Offer, resourceName string, f func(begin, end uint64)) { + for _, resource := range offer.Resources { + if resource.GetName() == resourceName { + for _, r := range (*resource).GetRanges().Range { + bp := r.GetBegin() + ep := r.GetEnd() + f(bp, ep) + } + } + } +} diff --git a/contrib/mesos/pkg/scheduler/podtask/registry.go b/contrib/mesos/pkg/scheduler/podtask/registry.go new file mode 100644 index 00000000000..589484fb143 --- /dev/null +++ b/contrib/mesos/pkg/scheduler/podtask/registry.go @@ -0,0 +1,335 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package podtask + +import ( + "container/ring" + "encoding/json" + "fmt" + "sync" + "time" + + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/metrics" + "github.com/GoogleCloudPlatform/kubernetes/pkg/api" + log "github.com/golang/glog" + mesos "github.com/mesos/mesos-go/mesosproto" +) + +const ( + //TODO(jdef) move this somewhere else + PodPath = "/pods" + + // length of historical record of finished tasks + defaultFinishedTasksSize = 1024 +) + +// state store for pod tasks +type Registry interface { + // register the specified task with this registry, as long as the current error + // condition is nil. if no errors occur then return a copy of the registered task. + Register(*T, error) (*T, error) + + // unregister the specified task from this registry + Unregister(*T) + + // update state for the registered task identified by task.ID, returning a copy of + // the updated task, if any. + Update(task *T) error + + // return the task registered for the specified task ID and its current state. + // if there is no such task then StateUnknown is returned. + Get(taskId string) (task *T, currentState StateType) + + // return the non-terminal task corresponding to the specified pod ID + ForPod(podID string) (task *T, currentState StateType) + + // update the task status given the specified mesos task status update, returning a + // copy of the updated task (if any) and its state. + UpdateStatus(status *mesos.TaskStatus) (*T, StateType) + + // return a list of task ID's that match the given filter, or all task ID's if filter == nil. + List(filter func(*T) bool) []*T +} + +type inMemoryRegistry struct { + rw sync.RWMutex + taskRegistry map[string]*T + tasksFinished *ring.Ring + podToTask map[string]string +} + +func NewInMemoryRegistry() Registry { + return &inMemoryRegistry{ + taskRegistry: make(map[string]*T), + tasksFinished: ring.New(defaultFinishedTasksSize), + podToTask: make(map[string]string), + } +} + +func (k *inMemoryRegistry) List(accepts func(t *T) bool) (tasks []*T) { + k.rw.RLock() + defer k.rw.RUnlock() + for _, task := range k.taskRegistry { + if accepts == nil || accepts(task) { + tasks = append(tasks, task.Clone()) + } + } + return +} + +func (k *inMemoryRegistry) ForPod(podID string) (task *T, currentState StateType) { + k.rw.RLock() + defer k.rw.RUnlock() + tid, ok := k.podToTask[podID] + if !ok { + return nil, StateUnknown + } + t, state := k._get(tid) + return t.Clone(), state +} + +// registers a pod task unless the spec'd error is not nil +func (k *inMemoryRegistry) Register(task *T, err error) (*T, error) { + if err == nil { + k.rw.Lock() + defer k.rw.Unlock() + if _, found := k.podToTask[task.podKey]; found { + return nil, fmt.Errorf("task already registered for pod key %q", task.podKey) + } + if _, found := k.taskRegistry[task.ID]; found { + return nil, fmt.Errorf("task already registered for id %q", task.ID) + } + k.podToTask[task.podKey] = task.ID + k.taskRegistry[task.ID] = task + } + return task.Clone(), err +} + +// updates internal task state. updates are limited to Spec, Flags, and Offer for +// StatePending tasks, and are limited to Flag updates (additive only) for StateRunning tasks. +func (k *inMemoryRegistry) Update(task *T) error { + if task == nil { + return nil + } + k.rw.Lock() + defer k.rw.Unlock() + switch internal, state := k._get(task.ID); state { + case StateUnknown: + return fmt.Errorf("no such task: %v", task.ID) + case StatePending: + internal.Offer = task.Offer + internal.Spec = task.Spec + (&task.Spec).copyTo(&internal.Spec) + internal.Flags = map[FlagType]struct{}{} + fallthrough + case StateRunning: + for k, v := range task.Flags { + internal.Flags[k] = v + } + return nil + default: + return fmt.Errorf("may not update task %v in state %v", task.ID, state) + } +} + +func (k *inMemoryRegistry) Unregister(task *T) { + k.rw.Lock() + defer k.rw.Unlock() + delete(k.podToTask, task.podKey) + delete(k.taskRegistry, task.ID) +} + +func (k *inMemoryRegistry) Get(taskId string) (*T, StateType) { + k.rw.RLock() + defer k.rw.RUnlock() + t, state := k._get(taskId) + return t.Clone(), state +} + +// assume that the caller has already locked around access to task state. +// the caller is also responsible for cloning the task object before it leaves +// the context of this registry. +func (k *inMemoryRegistry) _get(taskId string) (*T, StateType) { + if task, found := k.taskRegistry[taskId]; found { + return task, task.State + } + return nil, StateUnknown +} + +func (k *inMemoryRegistry) UpdateStatus(status *mesos.TaskStatus) (*T, StateType) { + taskId := status.GetTaskId().GetValue() + + k.rw.Lock() + defer k.rw.Unlock() + task, state := k._get(taskId) + + switch status.GetState() { + case mesos.TaskState_TASK_STAGING: + k.handleTaskStaging(task, state, status) + case mesos.TaskState_TASK_STARTING: + k.handleTaskStarting(task, state, status) + case mesos.TaskState_TASK_RUNNING: + k.handleTaskRunning(task, state, status) + case mesos.TaskState_TASK_FINISHED: + k.handleTaskFinished(task, state, status) + case mesos.TaskState_TASK_FAILED: + k.handleTaskFailed(task, state, status) + case mesos.TaskState_TASK_KILLED: + k.handleTaskKilled(task, state, status) + case mesos.TaskState_TASK_LOST: + k.handleTaskLost(task, state, status) + default: + log.Warningf("unhandled status update for task: %v", taskId) + } + return task.Clone(), state +} + +func (k *inMemoryRegistry) handleTaskStaging(task *T, state StateType, status *mesos.TaskStatus) { + if status.GetSource() != mesos.TaskStatus_SOURCE_MASTER { + log.Errorf("received STAGING for task %v with unexpected source: %v", + status.GetTaskId().GetValue(), status.GetSource()) + } +} + +func (k *inMemoryRegistry) handleTaskStarting(task *T, state StateType, status *mesos.TaskStatus) { + // we expect to receive this when a launched task is finally "bound" + // via the API server. however, there's nothing specific for us to do here. + switch state { + case StatePending: + task.UpdatedTime = time.Now() + if !task.Has(Bound) { + task.Set(Bound) + task.bindTime = task.UpdatedTime + timeToBind := task.bindTime.Sub(task.launchTime) + metrics.BindLatency.Observe(metrics.InMicroseconds(timeToBind)) + } + default: + taskId := status.GetTaskId().GetValue() + log.Warningf("Ignore status TASK_STARTING because the task %v is not pending", taskId) + } +} + +func (k *inMemoryRegistry) handleTaskRunning(task *T, state StateType, status *mesos.TaskStatus) { + taskId := status.GetTaskId().GetValue() + switch state { + case StatePending: + task.UpdatedTime = time.Now() + log.Infof("Received running status for pending task: %v", taskId) + fillRunningPodInfo(task, status) + task.State = StateRunning + case StateRunning: + task.UpdatedTime = time.Now() + log.V(2).Infof("Ignore status TASK_RUNNING because the task %v is already running", taskId) + case StateFinished: + log.Warningf("Ignore status TASK_RUNNING because the task %v is already finished", taskId) + default: + log.Warningf("Ignore status TASK_RUNNING because the task %v is discarded", taskId) + } +} + +func ParsePodStatusResult(taskStatus *mesos.TaskStatus) (result api.PodStatusResult, err error) { + if taskStatus.Data != nil { + err = json.Unmarshal(taskStatus.Data, &result) + } else { + err = fmt.Errorf("missing TaskStatus.Data") + } + return +} + +func fillRunningPodInfo(task *T, taskStatus *mesos.TaskStatus) { + if taskStatus.GetReason() == mesos.TaskStatus_REASON_RECONCILIATION && taskStatus.GetSource() == mesos.TaskStatus_SOURCE_MASTER { + // there is no data.. + return + } + //TODO(jdef) determine the usefullness of this information (if any) + if result, err := ParsePodStatusResult(taskStatus); err != nil { + log.Errorf("invalid TaskStatus.Data for task '%v': %v", task.ID, err) + } else { + task.podStatus = result.Status + log.Infof("received pod status for task %v: %+v", task.ID, result.Status) + } +} + +func (k *inMemoryRegistry) handleTaskFinished(task *T, state StateType, status *mesos.TaskStatus) { + taskId := status.GetTaskId().GetValue() + switch state { + case StatePending: + panic(fmt.Sprintf("Pending task %v finished, this couldn't happen", taskId)) + case StateRunning: + log.V(2).Infof("received finished status for running task: %v", taskId) + delete(k.podToTask, task.podKey) + task.State = StateFinished + task.UpdatedTime = time.Now() + k.tasksFinished = k.recordFinishedTask(task.ID) + case StateFinished: + log.Warningf("Ignore status TASK_FINISHED because the task %v is already finished", taskId) + default: + log.Warningf("Ignore status TASK_FINISHED because the task %v is not running", taskId) + } +} + +// record that a task has finished. +// older record are expunged one at a time once the historical ring buffer is saturated. +// assumes caller is holding state lock. +func (k *inMemoryRegistry) recordFinishedTask(taskId string) *ring.Ring { + slot := k.tasksFinished.Next() + if slot.Value != nil { + // garbage collect older finished task from the registry + gctaskId := slot.Value.(string) + if gctask, found := k.taskRegistry[gctaskId]; found && gctask.State == StateFinished { + delete(k.taskRegistry, gctaskId) + } + } + slot.Value = taskId + return slot +} + +func (k *inMemoryRegistry) handleTaskFailed(task *T, state StateType, status *mesos.TaskStatus) { + switch state { + case StatePending: + delete(k.taskRegistry, task.ID) + delete(k.podToTask, task.podKey) + case StateRunning: + delete(k.taskRegistry, task.ID) + delete(k.podToTask, task.podKey) + } +} + +func (k *inMemoryRegistry) handleTaskKilled(task *T, state StateType, status *mesos.TaskStatus) { + defer func() { + msg := fmt.Sprintf("task killed: %+v, task %+v", status, task) + if task != nil && task.Has(Deleted) { + // we were expecting this, nothing out of the ordinary + log.V(2).Infoln(msg) + } else { + log.Errorln(msg) + } + }() + switch state { + case StatePending, StateRunning: + delete(k.taskRegistry, task.ID) + delete(k.podToTask, task.podKey) + } +} + +func (k *inMemoryRegistry) handleTaskLost(task *T, state StateType, status *mesos.TaskStatus) { + switch state { + case StateRunning, StatePending: + delete(k.taskRegistry, task.ID) + delete(k.podToTask, task.podKey) + } +} diff --git a/contrib/mesos/pkg/scheduler/podtask/registry_test.go b/contrib/mesos/pkg/scheduler/podtask/registry_test.go new file mode 100644 index 00000000000..92716b2bb81 --- /dev/null +++ b/contrib/mesos/pkg/scheduler/podtask/registry_test.go @@ -0,0 +1,320 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package podtask + +import ( + "testing" + "time" + + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/offers" + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/proc" + mesos "github.com/mesos/mesos-go/mesosproto" + "github.com/mesos/mesos-go/mesosutil" + "github.com/stretchr/testify/assert" +) + +func TestInMemoryRegistry_RegisterGetUnregister(t *testing.T) { + assert := assert.New(t) + + registry := NewInMemoryRegistry() + + // it's empty at the beginning + tasks := registry.List(func(t *T) bool { return true }) + assert.Empty(tasks) + + // add a task + a, _ := fakePodTask("a") + a_clone, err := registry.Register(a, nil) + assert.NoError(err) + assert.Equal(a_clone.ID, a.ID) + assert.Equal(a_clone.podKey, a.podKey) + + // add another task + b, _ := fakePodTask("b") + b_clone, err := registry.Register(b, nil) + assert.NoError(err) + assert.Equal(b_clone.ID, b.ID) + assert.Equal(b_clone.podKey, b.podKey) + + // find tasks in the registry + tasks = registry.List(func(t *T) bool { return true }) + assert.Len(tasks, 2) + assert.Contains(tasks, a_clone) + assert.Contains(tasks, b_clone) + + tasks = registry.List(func(t *T) bool { return t.ID == a.ID }) + assert.Len(tasks, 1) + assert.Contains(tasks, a_clone) + + task, _ := registry.ForPod(a.podKey) + assert.NotNil(task) + assert.Equal(task.ID, a.ID) + + task, _ = registry.ForPod(b.podKey) + assert.NotNil(task) + assert.Equal(task.ID, b.ID) + + task, _ = registry.ForPod("no-pod-key") + assert.Nil(task) + + task, _ = registry.Get(a.ID) + assert.NotNil(task) + assert.Equal(task.ID, a.ID) + + task, _ = registry.Get("unknown-task-id") + assert.Nil(task) + + // re-add a task + a_clone, err = registry.Register(a, nil) + assert.Error(err) + assert.Nil(a_clone) + + // re-add a task with another podKey, but same task id + another_a := a.Clone() + another_a.podKey = "another-pod" + another_a_clone, err := registry.Register(another_a, nil) + assert.Error(err) + assert.Nil(another_a_clone) + + // re-add a task with another task ID, but same podKey + another_b := b.Clone() + another_b.ID = "another-task-id" + another_b_clone, err := registry.Register(another_b, nil) + assert.Error(err) + assert.Nil(another_b_clone) + + // unregister a task + registry.Unregister(b) + + tasks = registry.List(func(t *T) bool { return true }) + assert.Len(tasks, 1) + assert.Contains(tasks, a) + + // unregister a task not registered + unregistered_task, _ := fakePodTask("unregistered-task") + registry.Unregister(unregistered_task) +} + +func fakeStatusUpdate(taskId string, state mesos.TaskState) *mesos.TaskStatus { + status := mesosutil.NewTaskStatus(mesosutil.NewTaskID(taskId), state) + status.Data = []byte("{}") // empty json + masterSource := mesos.TaskStatus_SOURCE_MASTER + status.Source = &masterSource + return status +} + +func TestInMemoryRegistry_State(t *testing.T) { + assert := assert.New(t) + + registry := NewInMemoryRegistry() + + // add a task + a, _ := fakePodTask("a") + a_clone, err := registry.Register(a, nil) + assert.NoError(err) + assert.Equal(a.State, a_clone.State) + + // update the status + assert.Equal(a_clone.State, StatePending) + a_clone, state := registry.UpdateStatus(fakeStatusUpdate(a.ID, mesos.TaskState_TASK_RUNNING)) + assert.Equal(state, StatePending) // old state + assert.Equal(a_clone.State, StateRunning) // new state + + // update unknown task + unknown_clone, state := registry.UpdateStatus(fakeStatusUpdate("unknown-task-id", mesos.TaskState_TASK_RUNNING)) + assert.Nil(unknown_clone) + assert.Equal(state, StateUnknown) +} + +func TestInMemoryRegistry_Update(t *testing.T) { + assert := assert.New(t) + + // create offers registry + ttl := time.Second / 4 + config := offers.RegistryConfig{ + DeclineOffer: func(offerId string) <-chan error { + return proc.ErrorChan(nil) + }, + Compat: func(o *mesos.Offer) bool { + return true + }, + TTL: ttl, + LingerTTL: 2 * ttl, + } + storage := offers.CreateRegistry(config) + + // Add offer + offerId := mesosutil.NewOfferID("foo") + mesosOffer := &mesos.Offer{Id: offerId} + storage.Add([]*mesos.Offer{mesosOffer}) + offer, ok := storage.Get(offerId.GetValue()) + assert.True(ok) + + // create registry + registry := NewInMemoryRegistry() + a, _ := fakePodTask("a") + registry.Register(a.Clone(), nil) // here clone a because we change it below + + // state changes are ignored + a.State = StateRunning + err := registry.Update(a) + assert.NoError(err) + a_clone, _ := registry.Get(a.ID) + assert.Equal(StatePending, a_clone.State) + + // offer is updated while pending + a.Offer = offer + err = registry.Update(a) + assert.NoError(err) + a_clone, _ = registry.Get(a.ID) + assert.Equal(offer.Id(), a_clone.Offer.Id()) + + // spec is updated while pending + a.Spec = Spec{SlaveID: "slave-1"} + err = registry.Update(a) + assert.NoError(err) + a_clone, _ = registry.Get(a.ID) + assert.Equal("slave-1", a_clone.Spec.SlaveID) + + // flags are updated while pending + a.Flags[Launched] = struct{}{} + err = registry.Update(a) + assert.NoError(err) + a_clone, _ = registry.Get(a.ID) + + _, found_launched := a_clone.Flags[Launched] + assert.True(found_launched) + + // flags are updated while running + registry.UpdateStatus(fakeStatusUpdate(a.ID, mesos.TaskState_TASK_RUNNING)) + a.Flags[Bound] = struct{}{} + err = registry.Update(a) + assert.NoError(err) + a_clone, _ = registry.Get(a.ID) + + _, found_launched = a_clone.Flags[Launched] + assert.True(found_launched) + _, found_bound := a_clone.Flags[Bound] + assert.True(found_bound) + + // spec is ignored while running + a.Spec = Spec{SlaveID: "slave-2"} + err = registry.Update(a) + assert.NoError(err) + a_clone, _ = registry.Get(a.ID) + assert.Equal("slave-1", a_clone.Spec.SlaveID) + + // error when finished + registry.UpdateStatus(fakeStatusUpdate(a.ID, mesos.TaskState_TASK_FINISHED)) + err = registry.Update(a) + assert.Error(err) + + // update unknown task + unknown_task, _ := fakePodTask("unknown-task") + err = registry.Update(unknown_task) + assert.Error(err) + + // update nil task + err = registry.Update(nil) + assert.Nil(err) +} + +type transition struct { + statusUpdate mesos.TaskState + expectedState *StateType + expectPanic bool +} + +func NewTransition(statusUpdate mesos.TaskState, expectedState StateType) transition { + return transition{statusUpdate: statusUpdate, expectedState: &expectedState, expectPanic: false} +} + +func NewTransitionToDeletedTask(statusUpdate mesos.TaskState) transition { + return transition{statusUpdate: statusUpdate, expectedState: nil, expectPanic: false} +} + +func NewTransitionWhichPanics(statusUpdate mesos.TaskState) transition { + return transition{statusUpdate: statusUpdate, expectPanic: true} +} + +func testStateTrace(t *testing.T, transitions []transition) *Registry { + assert := assert.New(t) + + registry := NewInMemoryRegistry() + a, _ := fakePodTask("a") + a, _ = registry.Register(a, nil) + + // initial pending state + assert.Equal(a.State, StatePending) + + for _, transition := range transitions { + if transition.expectPanic { + assert.Panics(func() { + registry.UpdateStatus(fakeStatusUpdate(a.ID, transition.statusUpdate)) + }) + } else { + a, _ = registry.UpdateStatus(fakeStatusUpdate(a.ID, transition.statusUpdate)) + if transition.expectedState == nil { + a, _ = registry.Get(a.ID) + assert.Nil(a, "expected task to be deleted from registry after status update to %v", transition.statusUpdate) + } else { + assert.Equal(a.State, *transition.expectedState) + } + } + } + + return ®istry +} + +func TestInMemoryRegistry_TaskLifeCycle(t *testing.T) { + testStateTrace(t, []transition{ + NewTransition(mesos.TaskState_TASK_STAGING, StatePending), + NewTransition(mesos.TaskState_TASK_STARTING, StatePending), + NewTransitionWhichPanics(mesos.TaskState_TASK_FINISHED), + NewTransition(mesos.TaskState_TASK_RUNNING, StateRunning), + NewTransition(mesos.TaskState_TASK_RUNNING, StateRunning), + NewTransition(mesos.TaskState_TASK_STARTING, StateRunning), + NewTransition(mesos.TaskState_TASK_FINISHED, StateFinished), + NewTransition(mesos.TaskState_TASK_FINISHED, StateFinished), + NewTransition(mesos.TaskState_TASK_RUNNING, StateFinished), + }) +} + +func TestInMemoryRegistry_NotFinished(t *testing.T) { + // all these behave the same + notFinishedStates := []mesos.TaskState{ + mesos.TaskState_TASK_FAILED, + mesos.TaskState_TASK_KILLED, + mesos.TaskState_TASK_LOST, + } + for _, notFinishedState := range notFinishedStates { + testStateTrace(t, []transition{ + NewTransitionToDeletedTask(notFinishedState), + }) + + testStateTrace(t, []transition{ + NewTransition(mesos.TaskState_TASK_RUNNING, StateRunning), + NewTransitionToDeletedTask(notFinishedState), + }) + + testStateTrace(t, []transition{ + NewTransition(mesos.TaskState_TASK_RUNNING, StateRunning), + NewTransition(mesos.TaskState_TASK_FINISHED, StateFinished), + NewTransition(notFinishedState, StateFinished), + }) + } +} diff --git a/contrib/mesos/pkg/scheduler/scheduler.go b/contrib/mesos/pkg/scheduler/scheduler.go new file mode 100644 index 00000000000..f6633cdacf6 --- /dev/null +++ b/contrib/mesos/pkg/scheduler/scheduler.go @@ -0,0 +1,924 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package scheduler + +import ( + "fmt" + "io" + "math" + "net/http" + "reflect" + "sync" + "time" + + execcfg "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/executor/config" + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/executor/messages" + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/offers" + offerMetrics "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/offers/metrics" + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/proc" + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/runtime" + schedcfg "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/config" + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/meta" + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/metrics" + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/podtask" + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/uid" + "github.com/GoogleCloudPlatform/kubernetes/pkg/api" + "github.com/GoogleCloudPlatform/kubernetes/pkg/api/errors" + "github.com/GoogleCloudPlatform/kubernetes/pkg/client" + "github.com/GoogleCloudPlatform/kubernetes/pkg/fields" + "github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/container" + "github.com/GoogleCloudPlatform/kubernetes/pkg/labels" + "github.com/GoogleCloudPlatform/kubernetes/pkg/tools" + "github.com/GoogleCloudPlatform/kubernetes/pkg/util" + log "github.com/golang/glog" + mesos "github.com/mesos/mesos-go/mesosproto" + mutil "github.com/mesos/mesos-go/mesosutil" + bindings "github.com/mesos/mesos-go/scheduler" +) + +type Slave struct { + HostName string +} + +func newSlave(hostName string) *Slave { + return &Slave{ + HostName: hostName, + } +} + +type slaveStorage struct { + sync.Mutex + slaves map[string]*Slave // SlaveID => slave. +} + +func newSlaveStorage() *slaveStorage { + return &slaveStorage{ + slaves: make(map[string]*Slave), + } +} + +// Create a mapping between a slaveID and slave if not existing. +func (self *slaveStorage) checkAndAdd(slaveId, slaveHostname string) { + self.Lock() + defer self.Unlock() + _, exists := self.slaves[slaveId] + if !exists { + self.slaves[slaveId] = newSlave(slaveHostname) + } +} + +func (self *slaveStorage) getSlaveIds() []string { + self.Lock() + defer self.Unlock() + slaveIds := make([]string, 0, len(self.slaves)) + for slaveID := range self.slaves { + slaveIds = append(slaveIds, slaveID) + } + return slaveIds +} + +func (self *slaveStorage) getSlave(slaveId string) (*Slave, bool) { + self.Lock() + defer self.Unlock() + slave, exists := self.slaves[slaveId] + return slave, exists +} + +type PluginInterface interface { + // the apiserver may have a different state for the pod than we do + // so reconcile our records, but only for this one pod + reconcilePod(api.Pod) + + // execute the Scheduling plugin, should start a go routine and return immediately + Run(<-chan struct{}) +} + +// KubernetesScheduler implements: +// 1: A mesos scheduler. +// 2: A kubernetes scheduler plugin. +// 3: A kubernetes pod.Registry. +type KubernetesScheduler struct { + // We use a lock here to avoid races + // between invoking the mesos callback + // and the invoking the pod registry interfaces. + // In particular, changes to podtask.T objects are currently guarded by this lock. + *sync.RWMutex + + // Config related, write-once + + schedcfg *schedcfg.Config + executor *mesos.ExecutorInfo + executorGroup uint64 + scheduleFunc PodScheduleFunc + client *client.Client + etcdClient tools.EtcdGetSet + failoverTimeout float64 // in seconds + reconcileInterval int64 + + // Mesos context. + + driver bindings.SchedulerDriver // late initialization + frameworkId *mesos.FrameworkID + masterInfo *mesos.MasterInfo + registered bool + registration chan struct{} // signal chan that closes upon first successful registration + onRegistration sync.Once + offers offers.Registry + slaves *slaveStorage + + // unsafe state, needs to be guarded + + taskRegistry podtask.Registry + + // via deferred init + + plugin PluginInterface + reconciler *Reconciler + reconcileCooldown time.Duration + asRegisteredMaster proc.Doer + terminate <-chan struct{} // signal chan, closes when we should kill background tasks +} + +type Config struct { + Schedcfg schedcfg.Config + Executor *mesos.ExecutorInfo + ScheduleFunc PodScheduleFunc + Client *client.Client + EtcdClient tools.EtcdGetSet + FailoverTimeout float64 + ReconcileInterval int64 + ReconcileCooldown time.Duration +} + +// New creates a new KubernetesScheduler +func New(config Config) *KubernetesScheduler { + var k *KubernetesScheduler + k = &KubernetesScheduler{ + schedcfg: &config.Schedcfg, + RWMutex: new(sync.RWMutex), + executor: config.Executor, + executorGroup: uid.Parse(config.Executor.ExecutorId.GetValue()).Group(), + scheduleFunc: config.ScheduleFunc, + client: config.Client, + etcdClient: config.EtcdClient, + failoverTimeout: config.FailoverTimeout, + reconcileInterval: config.ReconcileInterval, + offers: offers.CreateRegistry(offers.RegistryConfig{ + Compat: func(o *mesos.Offer) bool { + // filter the offers: the executor IDs must not identify a kubelet- + // executor with a group that doesn't match ours + for _, eid := range o.GetExecutorIds() { + execuid := uid.Parse(eid.GetValue()) + if execuid.Name() == execcfg.DefaultInfoID && execuid.Group() != k.executorGroup { + return false + } + } + return true + }, + DeclineOffer: func(id string) <-chan error { + errOnce := proc.NewErrorOnce(k.terminate) + errOuter := k.asRegisteredMaster.Do(func() { + var err error + defer errOnce.Report(err) + offerId := mutil.NewOfferID(id) + filters := &mesos.Filters{} + _, err = k.driver.DeclineOffer(offerId, filters) + }) + return errOnce.Send(errOuter).Err() + }, + // remember expired offers so that we can tell if a previously scheduler offer relies on one + LingerTTL: config.Schedcfg.OfferLingerTTL.Duration, + TTL: config.Schedcfg.OfferTTL.Duration, + ListenerDelay: config.Schedcfg.ListenerDelay.Duration, + }), + slaves: newSlaveStorage(), + taskRegistry: podtask.NewInMemoryRegistry(), + reconcileCooldown: config.ReconcileCooldown, + registration: make(chan struct{}), + asRegisteredMaster: proc.DoerFunc(func(proc.Action) <-chan error { + return proc.ErrorChanf("cannot execute action with unregistered scheduler") + }), + } + return k +} + +func (k *KubernetesScheduler) Init(electedMaster proc.Process, pl PluginInterface, mux *http.ServeMux) error { + log.V(1).Infoln("initializing kubernetes mesos scheduler") + + k.asRegisteredMaster = proc.DoerFunc(func(a proc.Action) <-chan error { + if !k.registered { + return proc.ErrorChanf("failed to execute action, scheduler is disconnected") + } + return electedMaster.Do(a) + }) + k.terminate = electedMaster.Done() + k.plugin = pl + k.offers.Init(k.terminate) + k.InstallDebugHandlers(mux) + return k.recoverTasks() +} + +func (k *KubernetesScheduler) asMaster() proc.Doer { + k.RLock() + defer k.RUnlock() + return k.asRegisteredMaster +} + +func (k *KubernetesScheduler) InstallDebugHandlers(mux *http.ServeMux) { + wrappedHandler := func(uri string, h http.Handler) { + mux.HandleFunc(uri, func(w http.ResponseWriter, r *http.Request) { + ch := make(chan struct{}) + closer := runtime.Closer(ch) + proc.OnError(k.asMaster().Do(func() { + defer closer() + h.ServeHTTP(w, r) + }), func(err error) { + defer closer() + log.Warningf("failed HTTP request for %s: %v", uri, err) + w.WriteHeader(http.StatusServiceUnavailable) + }, k.terminate) + select { + case <-time.After(k.schedcfg.HttpHandlerTimeout.Duration): + log.Warningf("timed out waiting for request to be processed") + w.WriteHeader(http.StatusServiceUnavailable) + return + case <-ch: // noop + } + }) + } + requestReconciliation := func(uri string, requestAction func()) { + wrappedHandler(uri, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + requestAction() + w.WriteHeader(http.StatusNoContent) + })) + } + requestReconciliation("/debug/actions/requestExplicit", k.reconciler.RequestExplicit) + requestReconciliation("/debug/actions/requestImplicit", k.reconciler.RequestImplicit) + + wrappedHandler("/debug/actions/kamikaze", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + slaves := k.slaves.getSlaveIds() + for _, slaveId := range slaves { + _, err := k.driver.SendFrameworkMessage( + k.executor.ExecutorId, + mutil.NewSlaveID(slaveId), + messages.Kamikaze) + if err != nil { + log.Warningf("failed to send kamikaze message to slave %s: %v", slaveId, err) + } else { + io.WriteString(w, fmt.Sprintf("kamikaze slave %s\n", slaveId)) + } + } + io.WriteString(w, "OK") + })) +} + +func (k *KubernetesScheduler) Registration() <-chan struct{} { + return k.registration +} + +// Registered is called when the scheduler registered with the master successfully. +func (k *KubernetesScheduler) Registered(drv bindings.SchedulerDriver, fid *mesos.FrameworkID, mi *mesos.MasterInfo) { + log.Infof("Scheduler registered with the master: %v with frameworkId: %v\n", mi, fid) + + k.driver = drv + k.frameworkId = fid + k.masterInfo = mi + k.registered = true + + k.onRegistration.Do(func() { k.onInitialRegistration(drv) }) + k.reconciler.RequestExplicit() +} + +func (k *KubernetesScheduler) storeFrameworkId() { + // TODO(jdef): port FrameworkId store to generic Kubernetes config store as soon as available + _, err := k.etcdClient.Set(meta.FrameworkIDKey, k.frameworkId.GetValue(), uint64(k.failoverTimeout)) + if err != nil { + log.Errorf("failed to renew frameworkId TTL: %v", err) + } +} + +// Reregistered is called when the scheduler re-registered with the master successfully. +// This happends when the master fails over. +func (k *KubernetesScheduler) Reregistered(drv bindings.SchedulerDriver, mi *mesos.MasterInfo) { + log.Infof("Scheduler reregistered with the master: %v\n", mi) + + k.driver = drv + k.masterInfo = mi + k.registered = true + + k.onRegistration.Do(func() { k.onInitialRegistration(drv) }) + k.reconciler.RequestExplicit() +} + +// perform one-time initialization actions upon the first registration event received from Mesos. +func (k *KubernetesScheduler) onInitialRegistration(driver bindings.SchedulerDriver) { + defer close(k.registration) + + if k.failoverTimeout > 0 { + refreshInterval := k.schedcfg.FrameworkIdRefreshInterval.Duration + if k.failoverTimeout < k.schedcfg.FrameworkIdRefreshInterval.Duration.Seconds() { + refreshInterval = time.Duration(math.Max(1, k.failoverTimeout/2)) * time.Second + } + go runtime.Until(k.storeFrameworkId, refreshInterval, k.terminate) + } + + r1 := k.makeTaskRegistryReconciler() + r2 := k.makePodRegistryReconciler() + + k.reconciler = newReconciler(k.asRegisteredMaster, k.makeCompositeReconciler(r1, r2), + k.reconcileCooldown, k.schedcfg.ExplicitReconciliationAbortTimeout.Duration, k.terminate) + go k.reconciler.Run(driver) + + if k.reconcileInterval > 0 { + ri := time.Duration(k.reconcileInterval) * time.Second + time.AfterFunc(k.schedcfg.InitialImplicitReconciliationDelay.Duration, func() { runtime.Until(k.reconciler.RequestImplicit, ri, k.terminate) }) + log.Infof("will perform implicit task reconciliation at interval: %v after %v", ri, k.schedcfg.InitialImplicitReconciliationDelay.Duration) + } +} + +// Disconnected is called when the scheduler loses connection to the master. +func (k *KubernetesScheduler) Disconnected(driver bindings.SchedulerDriver) { + log.Infof("Master disconnected!\n") + + k.registered = false + + // discard all cached offers to avoid unnecessary TASK_LOST updates + k.offers.Invalidate("") +} + +// ResourceOffers is called when the scheduler receives some offers from the master. +func (k *KubernetesScheduler) ResourceOffers(driver bindings.SchedulerDriver, offers []*mesos.Offer) { + log.V(2).Infof("Received offers %+v", offers) + + // Record the offers in the global offer map as well as each slave's offer map. + k.offers.Add(offers) + for _, offer := range offers { + slaveId := offer.GetSlaveId().GetValue() + k.slaves.checkAndAdd(slaveId, offer.GetHostname()) + } +} + +// OfferRescinded is called when the resources are recinded from the scheduler. +func (k *KubernetesScheduler) OfferRescinded(driver bindings.SchedulerDriver, offerId *mesos.OfferID) { + log.Infof("Offer rescinded %v\n", offerId) + + oid := offerId.GetValue() + k.offers.Delete(oid, offerMetrics.OfferRescinded) +} + +// StatusUpdate is called when a status update message is sent to the scheduler. +func (k *KubernetesScheduler) StatusUpdate(driver bindings.SchedulerDriver, taskStatus *mesos.TaskStatus) { + + source, reason := "none", "none" + if taskStatus.Source != nil { + source = (*taskStatus.Source).String() + } + if taskStatus.Reason != nil { + reason = (*taskStatus.Reason).String() + } + taskState := taskStatus.GetState() + metrics.StatusUpdates.WithLabelValues(source, reason, taskState.String()).Inc() + + log.Infof( + "task status update %q from %q for task %q on slave %q executor %q for reason %q", + taskState.String(), + source, + taskStatus.TaskId.GetValue(), + taskStatus.SlaveId.GetValue(), + taskStatus.ExecutorId.GetValue(), + reason) + + switch taskState { + case mesos.TaskState_TASK_RUNNING, mesos.TaskState_TASK_FINISHED, mesos.TaskState_TASK_STARTING, mesos.TaskState_TASK_STAGING: + if _, state := k.taskRegistry.UpdateStatus(taskStatus); state == podtask.StateUnknown { + if taskState != mesos.TaskState_TASK_FINISHED { + //TODO(jdef) what if I receive this after a TASK_LOST or TASK_KILLED? + //I don't want to reincarnate then.. TASK_LOST is a special case because + //the master is stateless and there are scenarios where I may get TASK_LOST + //followed by TASK_RUNNING. + //TODO(jdef) consider running this asynchronously since there are API server + //calls that may be made + k.reconcileNonTerminalTask(driver, taskStatus) + } // else, we don't really care about FINISHED tasks that aren't registered + return + } + if _, exists := k.slaves.getSlave(taskStatus.GetSlaveId().GetValue()); !exists { + // a registered task has an update reported by a slave that we don't recognize. + // this should never happen! So we don't reconcile it. + log.Errorf("Ignore status %+v because the slave does not exist", taskStatus) + return + } + case mesos.TaskState_TASK_FAILED: + if task, _ := k.taskRegistry.UpdateStatus(taskStatus); task != nil { + if task.Has(podtask.Launched) && !task.Has(podtask.Bound) { + go k.plugin.reconcilePod(task.Pod) + return + } + } else { + // unknown task failed, not much we can do about it + return + } + // last-ditch effort to reconcile our records + fallthrough + case mesos.TaskState_TASK_LOST, mesos.TaskState_TASK_KILLED: + k.reconcileTerminalTask(driver, taskStatus) + } +} + +func (k *KubernetesScheduler) reconcileTerminalTask(driver bindings.SchedulerDriver, taskStatus *mesos.TaskStatus) { + task, state := k.taskRegistry.UpdateStatus(taskStatus) + + if (state == podtask.StateRunning || state == podtask.StatePending) && taskStatus.SlaveId != nil && + ((taskStatus.GetSource() == mesos.TaskStatus_SOURCE_MASTER && taskStatus.GetReason() == mesos.TaskStatus_REASON_RECONCILIATION) || + (taskStatus.GetSource() == mesos.TaskStatus_SOURCE_SLAVE && taskStatus.GetReason() == mesos.TaskStatus_REASON_EXECUTOR_TERMINATED) || + (taskStatus.GetSource() == mesos.TaskStatus_SOURCE_SLAVE && taskStatus.GetReason() == mesos.TaskStatus_REASON_EXECUTOR_UNREGISTERED)) { + //-- + // pod-task has metadata that refers to: + // (1) a task that Mesos no longer knows about, or else + // (2) a pod that the Kubelet will never report as "failed" + // For now, destroy the pod and hope that there's a replication controller backing it up. + // TODO(jdef) for case #2 don't delete the pod, just update it's status to Failed + pod := &task.Pod + log.Warningf("deleting rogue pod %v/%v for lost task %v", pod.Namespace, pod.Name, task.ID) + if err := k.client.Pods(pod.Namespace).Delete(pod.Name, nil); err != nil && !errors.IsNotFound(err) { + log.Errorf("failed to delete pod %v/%v for terminal task %v: %v", pod.Namespace, pod.Name, task.ID, err) + } + } else if taskStatus.GetReason() == mesos.TaskStatus_REASON_EXECUTOR_TERMINATED || taskStatus.GetReason() == mesos.TaskStatus_REASON_EXECUTOR_UNREGISTERED { + // attempt to prevent dangling pods in the pod and task registries + log.V(1).Infof("request explicit reconciliation to clean up for task %v after executor reported (terminated/unregistered)", taskStatus.TaskId.GetValue()) + k.reconciler.RequestExplicit() + } else if taskStatus.GetState() == mesos.TaskState_TASK_LOST && state == podtask.StateRunning && taskStatus.ExecutorId != nil && taskStatus.SlaveId != nil { + //TODO(jdef) this may not be meaningful once we have proper checkpointing and master detection + //If we're reconciling and receive this then the executor may be + //running a task that we need it to kill. It's possible that the framework + //is unrecognized by the master at this point, so KillTask is not guaranteed + //to do anything. The underlying driver transport may be able to send a + //FrameworkMessage directly to the slave to terminate the task. + log.V(2).Info("forwarding TASK_LOST message to executor %v on slave %v", taskStatus.ExecutorId, taskStatus.SlaveId) + data := fmt.Sprintf("task-lost:%s", task.ID) //TODO(jdef) use a real message type + if _, err := driver.SendFrameworkMessage(taskStatus.ExecutorId, taskStatus.SlaveId, data); err != nil { + log.Error(err.Error()) + } + } +} + +// reconcile an unknown (from the perspective of our registry) non-terminal task +func (k *KubernetesScheduler) reconcileNonTerminalTask(driver bindings.SchedulerDriver, taskStatus *mesos.TaskStatus) { + // attempt to recover task from pod info: + // - task data may contain an api.PodStatusResult; if status.reason == REASON_RECONCILIATION then status.data == nil + // - the Name can be parsed by container.ParseFullName() to yield a pod Name and Namespace + // - pull the pod metadata down from the api server + // - perform task recovery based on pod metadata + taskId := taskStatus.TaskId.GetValue() + if taskStatus.GetReason() == mesos.TaskStatus_REASON_RECONCILIATION && taskStatus.GetSource() == mesos.TaskStatus_SOURCE_MASTER { + // there will be no data in the task status that we can use to determine the associated pod + switch taskStatus.GetState() { + case mesos.TaskState_TASK_STAGING: + // there is still hope for this task, don't kill it just yet + //TODO(jdef) there should probably be a limit for how long we tolerate tasks stuck in this state + return + default: + // for TASK_{STARTING,RUNNING} we should have already attempted to recoverTasks() for. + // if the scheduler failed over before the executor fired TASK_STARTING, then we should *not* + // be processing this reconciliation update before we process the one from the executor. + // point: we don't know what this task is (perhaps there was unrecoverable metadata in the pod), + // so it gets killed. + log.Errorf("killing non-terminal, unrecoverable task %v", taskId) + } + } else if podStatus, err := podtask.ParsePodStatusResult(taskStatus); err != nil { + // possible rogue pod exists at this point because we can't identify it; should kill the task + log.Errorf("possible rogue pod; illegal task status data for task %v, expected an api.PodStatusResult: %v", taskId, err) + } else if name, namespace, err := container.ParsePodFullName(podStatus.Name); err != nil { + // possible rogue pod exists at this point because we can't identify it; should kill the task + log.Errorf("possible rogue pod; illegal api.PodStatusResult, unable to parse full pod name from: '%v' for task %v: %v", + podStatus.Name, taskId, err) + } else if pod, err := k.client.Pods(namespace).Get(name); err == nil { + if t, ok, err := podtask.RecoverFrom(*pod); ok { + log.Infof("recovered task %v from metadata in pod %v/%v", taskId, namespace, name) + _, err := k.taskRegistry.Register(t, nil) + if err != nil { + // someone beat us to it?! + log.Warningf("failed to register recovered task: %v", err) + return + } else { + k.taskRegistry.UpdateStatus(taskStatus) + } + return + } else if err != nil { + //should kill the pod and the task + log.Errorf("killing pod, failed to recover task from pod %v/%v: %v", namespace, name, err) + if err := k.client.Pods(namespace).Delete(name, nil); err != nil { + log.Errorf("failed to delete pod %v/%v: %v", namespace, name, err) + } + } else { + //this is pretty unexpected: we received a TASK_{STARTING,RUNNING} message, but the apiserver's pod + //metadata is not appropriate for task reconstruction -- which should almost certainly never + //be the case unless someone swapped out the pod on us (and kept the same namespace/name) while + //we were failed over. + + //kill this task, allow the newly launched scheduler to schedule the new pod + log.Warningf("unexpected pod metadata for task %v in apiserver, assuming new unscheduled pod spec: %+v", taskId, pod) + } + } else if errors.IsNotFound(err) { + // pod lookup failed, should delete the task since the pod is no longer valid; may be redundant, that's ok + log.Infof("killing task %v since pod %v/%v no longer exists", taskId, namespace, name) + } else if errors.IsServerTimeout(err) { + log.V(2).Infof("failed to reconcile task due to API server timeout: %v", err) + return + } else { + log.Errorf("unexpected API server error, aborting reconcile for task %v: %v", taskId, err) + return + } + if _, err := driver.KillTask(taskStatus.TaskId); err != nil { + log.Errorf("failed to kill task %v: %v", taskId, err) + } +} + +// FrameworkMessage is called when the scheduler receives a message from the executor. +func (k *KubernetesScheduler) FrameworkMessage(driver bindings.SchedulerDriver, + executorId *mesos.ExecutorID, slaveId *mesos.SlaveID, message string) { + log.Infof("Received messages from executor %v of slave %v, %v\n", executorId, slaveId, message) +} + +// SlaveLost is called when some slave is lost. +func (k *KubernetesScheduler) SlaveLost(driver bindings.SchedulerDriver, slaveId *mesos.SlaveID) { + log.Infof("Slave %v is lost\n", slaveId) + + sid := slaveId.GetValue() + k.offers.InvalidateForSlave(sid) + + // TODO(jdef): delete slave from our internal list? probably not since we may need to reconcile + // tasks. it would be nice to somehow flag the slave as lost so that, perhaps, we can periodically + // flush lost slaves older than X, and for which no tasks or pods reference. + + // unfinished tasks/pods will be dropped. use a replication controller if you want pods to + // be restarted when slaves die. +} + +// ExecutorLost is called when some executor is lost. +func (k *KubernetesScheduler) ExecutorLost(driver bindings.SchedulerDriver, executorId *mesos.ExecutorID, slaveId *mesos.SlaveID, status int) { + log.Infof("Executor %v of slave %v is lost, status: %v\n", executorId, slaveId, status) + // TODO(yifan): Restart any unfinished tasks of the executor. +} + +// Error is called when there is an unrecoverable error in the scheduler or scheduler driver. +// The driver should have been aborted before this is invoked. +func (k *KubernetesScheduler) Error(driver bindings.SchedulerDriver, message string) { + log.Fatalf("fatal scheduler error: %v\n", message) +} + +// filter func used for explicit task reconciliation, selects only non-terminal tasks which +// have been communicated to mesos (read: launched). +func explicitTaskFilter(t *podtask.T) bool { + switch t.State { + case podtask.StateRunning: + return true + case podtask.StatePending: + return t.Has(podtask.Launched) + default: + return false + } +} + +// invoke the given ReconcilerAction funcs in sequence, aborting the sequence if reconciliation +// is cancelled. if any other errors occur the composite reconciler will attempt to complete the +// sequence, reporting only the last generated error. +func (k *KubernetesScheduler) makeCompositeReconciler(actions ...ReconcilerAction) ReconcilerAction { + if x := len(actions); x == 0 { + // programming error + panic("no actions specified for composite reconciler") + } else if x == 1 { + return actions[0] + } + chained := func(d bindings.SchedulerDriver, c <-chan struct{}, a, b ReconcilerAction) <-chan error { + ech := a(d, c) + ch := make(chan error, 1) + go func() { + select { + case <-k.terminate: + case <-c: + case e := <-ech: + if e != nil { + ch <- e + return + } + ech = b(d, c) + select { + case <-k.terminate: + case <-c: + case e := <-ech: + if e != nil { + ch <- e + return + } + close(ch) + return + } + } + ch <- fmt.Errorf("aborting composite reconciler action") + }() + return ch + } + result := func(d bindings.SchedulerDriver, c <-chan struct{}) <-chan error { + return chained(d, c, actions[0], actions[1]) + } + for i := 2; i < len(actions); i++ { + i := i + next := func(d bindings.SchedulerDriver, c <-chan struct{}) <-chan error { + return chained(d, c, ReconcilerAction(result), actions[i]) + } + result = next + } + return ReconcilerAction(result) +} + +// reconciler action factory, performs explicit task reconciliation for non-terminal +// tasks listed in the scheduler's internal taskRegistry. +func (k *KubernetesScheduler) makeTaskRegistryReconciler() ReconcilerAction { + return ReconcilerAction(func(drv bindings.SchedulerDriver, cancel <-chan struct{}) <-chan error { + taskToSlave := make(map[string]string) + for _, t := range k.taskRegistry.List(explicitTaskFilter) { + if t.Spec.SlaveID != "" { + taskToSlave[t.ID] = t.Spec.SlaveID + } + } + return proc.ErrorChan(k.explicitlyReconcileTasks(drv, taskToSlave, cancel)) + }) +} + +// reconciler action factory, performs explicit task reconciliation for non-terminal +// tasks identified by annotations in the Kubernetes pod registry. +func (k *KubernetesScheduler) makePodRegistryReconciler() ReconcilerAction { + return ReconcilerAction(func(drv bindings.SchedulerDriver, cancel <-chan struct{}) <-chan error { + ctx := api.NewDefaultContext() + podList, err := k.client.Pods(api.NamespaceValue(ctx)).List(labels.Everything(), fields.Everything()) + if err != nil { + return proc.ErrorChanf("failed to reconcile pod registry: %v", err) + } + taskToSlave := make(map[string]string) + for _, pod := range podList.Items { + if len(pod.Annotations) == 0 { + continue + } + taskId, found := pod.Annotations[meta.TaskIdKey] + if !found { + continue + } + slaveId, found := pod.Annotations[meta.SlaveIdKey] + if !found { + continue + } + taskToSlave[taskId] = slaveId + } + return proc.ErrorChan(k.explicitlyReconcileTasks(drv, taskToSlave, cancel)) + }) +} + +// execute an explicit task reconciliation, as per http://mesos.apache.org/documentation/latest/reconciliation/ +func (k *KubernetesScheduler) explicitlyReconcileTasks(driver bindings.SchedulerDriver, taskToSlave map[string]string, cancel <-chan struct{}) error { + log.Info("explicit reconcile tasks") + + // tell mesos to send us the latest status updates for all the non-terminal tasks that we know about + statusList := []*mesos.TaskStatus{} + remaining := util.KeySet(reflect.ValueOf(taskToSlave)) + for taskId, slaveId := range taskToSlave { + if slaveId == "" { + delete(taskToSlave, taskId) + continue + } + statusList = append(statusList, &mesos.TaskStatus{ + TaskId: mutil.NewTaskID(taskId), + SlaveId: mutil.NewSlaveID(slaveId), + State: mesos.TaskState_TASK_RUNNING.Enum(), // req'd field, doesn't have to reflect reality + }) + } + + select { + case <-cancel: + return reconciliationCancelledErr + default: + if _, err := driver.ReconcileTasks(statusList); err != nil { + return err + } + } + + start := time.Now() + first := true + for backoff := 1 * time.Second; first || remaining.Len() > 0; backoff = backoff * 2 { + first = false + // nothing to do here other than wait for status updates.. + if backoff > k.schedcfg.ExplicitReconciliationMaxBackoff.Duration { + backoff = k.schedcfg.ExplicitReconciliationMaxBackoff.Duration + } + select { + case <-cancel: + return reconciliationCancelledErr + case <-time.After(backoff): + for taskId := range remaining { + if task, _ := k.taskRegistry.Get(taskId); task != nil && explicitTaskFilter(task) && task.UpdatedTime.Before(start) { + // keep this task in remaining list + continue + } + remaining.Delete(taskId) + } + } + } + return nil +} + +var ( + reconciliationCancelledErr = fmt.Errorf("explicit task reconciliation cancelled") +) + +type ReconcilerAction func(driver bindings.SchedulerDriver, cancel <-chan struct{}) <-chan error + +type Reconciler struct { + proc.Doer + Action ReconcilerAction + explicit chan struct{} // send an empty struct to trigger explicit reconciliation + implicit chan struct{} // send an empty struct to trigger implicit reconciliation + done <-chan struct{} // close this when you want the reconciler to exit + cooldown time.Duration + explicitReconciliationAbortTimeout time.Duration +} + +func newReconciler(doer proc.Doer, action ReconcilerAction, + cooldown, explicitReconciliationAbortTimeout time.Duration, done <-chan struct{}) *Reconciler { + return &Reconciler{ + Doer: doer, + explicit: make(chan struct{}, 1), + implicit: make(chan struct{}, 1), + cooldown: cooldown, + explicitReconciliationAbortTimeout: explicitReconciliationAbortTimeout, + done: done, + Action: func(driver bindings.SchedulerDriver, cancel <-chan struct{}) <-chan error { + // trigged the reconciler action in the doer's execution context, + // but it could take a while and the scheduler needs to be able to + // process updates, the callbacks for which ALSO execute in the SAME + // deferred execution context -- so the action MUST be executed async. + errOnce := proc.NewErrorOnce(cancel) + return errOnce.Send(doer.Do(func() { + // only triggers the action if we're the currently elected, + // registered master and runs the action async. + go func() { + var err <-chan error + defer errOnce.Send(err) + err = action(driver, cancel) + }() + })).Err() + }, + } +} + +func (r *Reconciler) RequestExplicit() { + select { + case r.explicit <- struct{}{}: // noop + default: // request queue full; noop + } +} + +func (r *Reconciler) RequestImplicit() { + select { + case r.implicit <- struct{}{}: // noop + default: // request queue full; noop + } +} + +// execute task reconciliation, returns when r.done is closed. intended to run as a goroutine. +// if reconciliation is requested while another is in progress, the in-progress operation will be +// cancelled before the new reconciliation operation begins. +func (r *Reconciler) Run(driver bindings.SchedulerDriver) { + var cancel, finished chan struct{} +requestLoop: + for { + select { + case <-r.done: + return + default: // proceed + } + select { + case <-r.implicit: + metrics.ReconciliationRequested.WithLabelValues("implicit").Inc() + select { + case <-r.done: + return + case <-r.explicit: + break // give preference to a pending request for explicit + default: // continue + // don't run implicit reconciliation while explicit is ongoing + if finished != nil { + select { + case <-finished: // continue w/ implicit + default: + log.Infoln("skipping implicit reconcile because explicit reconcile is ongoing") + continue requestLoop + } + } + errOnce := proc.NewErrorOnce(r.done) + errCh := r.Do(func() { + var err error + defer errOnce.Report(err) + log.Infoln("implicit reconcile tasks") + metrics.ReconciliationExecuted.WithLabelValues("implicit").Inc() + if _, err = driver.ReconcileTasks([]*mesos.TaskStatus{}); err != nil { + log.V(1).Infof("failed to request implicit reconciliation from mesos: %v", err) + } + }) + proc.OnError(errOnce.Send(errCh).Err(), func(err error) { + log.Errorf("failed to run implicit reconciliation: %v", err) + }, r.done) + goto slowdown + } + case <-r.done: + return + case <-r.explicit: // continue + metrics.ReconciliationRequested.WithLabelValues("explicit").Inc() + } + + if cancel != nil { + close(cancel) + cancel = nil + + // play nice and wait for the prior operation to finish, complain + // if it doesn't + select { + case <-r.done: + return + case <-finished: // noop, expected + case <-time.After(r.explicitReconciliationAbortTimeout): // very unexpected + log.Error("reconciler action failed to stop upon cancellation") + } + } + // copy 'finished' to 'fin' here in case we end up with simultaneous go-routines, + // if cancellation takes too long or fails - we don't want to close the same chan + // more than once + cancel = make(chan struct{}) + finished = make(chan struct{}) + go func(fin chan struct{}) { + startedAt := time.Now() + defer func() { + metrics.ReconciliationLatency.Observe(metrics.InMicroseconds(time.Since(startedAt))) + }() + + metrics.ReconciliationExecuted.WithLabelValues("explicit").Inc() + defer close(fin) + err := <-r.Action(driver, cancel) + if err == reconciliationCancelledErr { + metrics.ReconciliationCancelled.WithLabelValues("explicit").Inc() + log.Infoln(err.Error()) + } else if err != nil { + log.Errorf("reconciler action failed: %v", err) + } + }(finished) + slowdown: + // don't allow reconciliation to run very frequently, either explicit or implicit + select { + case <-r.done: + return + case <-time.After(r.cooldown): // noop + } + } // for +} + +func (ks *KubernetesScheduler) recoverTasks() error { + ctx := api.NewDefaultContext() + podList, err := ks.client.Pods(api.NamespaceValue(ctx)).List(labels.Everything(), fields.Everything()) + if err != nil { + log.V(1).Infof("failed to recover pod registry, madness may ensue: %v", err) + return err + } + recoverSlave := func(t *podtask.T) { + + slaveId := t.Spec.SlaveID + ks.slaves.checkAndAdd(slaveId, t.Offer.Host()) + } + for _, pod := range podList.Items { + if t, ok, err := podtask.RecoverFrom(pod); err != nil { + log.Errorf("failed to recover task from pod, will attempt to delete '%v/%v': %v", pod.Namespace, pod.Name, err) + err := ks.client.Pods(pod.Namespace).Delete(pod.Name, nil) + //TODO(jdef) check for temporary or not-found errors + if err != nil { + log.Errorf("failed to delete pod '%v/%v': %v", pod.Namespace, pod.Name, err) + } + } else if ok { + ks.taskRegistry.Register(t, nil) + recoverSlave(t) + log.Infof("recovered task %v from pod %v/%v", t.ID, pod.Namespace, pod.Name) + } + } + return nil +} diff --git a/contrib/mesos/pkg/scheduler/scheduler_test.go b/contrib/mesos/pkg/scheduler/scheduler_test.go new file mode 100644 index 00000000000..e4fe2577e25 --- /dev/null +++ b/contrib/mesos/pkg/scheduler/scheduler_test.go @@ -0,0 +1,350 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package scheduler + +import ( + "testing" + + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/offers" + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/proc" + schedcfg "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/config" + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/podtask" + mesos "github.com/mesos/mesos-go/mesosproto" + util "github.com/mesos/mesos-go/mesosutil" + "github.com/stretchr/testify/assert" +) + +// Check that same slave is only added once. +func TestSlaveStorage_checkAndAdd(t *testing.T) { + assert := assert.New(t) + + slaveStorage := newSlaveStorage() + assert.Equal(0, len(slaveStorage.slaves)) + + slaveId := "slave1" + slaveHostname := "slave1Hostname" + slaveStorage.checkAndAdd(slaveId, slaveHostname) + assert.Equal(1, len(slaveStorage.getSlaveIds())) + + slaveStorage.checkAndAdd(slaveId, slaveHostname) + assert.Equal(1, len(slaveStorage.getSlaveIds())) +} + +// Check that getSlave returns notExist for nonexisting slave. +func TestSlaveStorage_getSlave(t *testing.T) { + assert := assert.New(t) + + slaveStorage := newSlaveStorage() + assert.Equal(0, len(slaveStorage.slaves)) + + slaveId := "slave1" + slaveHostname := "slave1Hostname" + + _, exists := slaveStorage.getSlave(slaveId) + assert.Equal(false, exists) + + slaveStorage.checkAndAdd(slaveId, slaveHostname) + assert.Equal(1, len(slaveStorage.getSlaveIds())) + + _, exists = slaveStorage.getSlave(slaveId) + assert.Equal(true, exists) +} + +// Check that getSlaveIds returns array with all slaveIds. +func TestSlaveStorage_getSlaveIds(t *testing.T) { + assert := assert.New(t) + + slaveStorage := newSlaveStorage() + assert.Equal(0, len(slaveStorage.slaves)) + + slaveId := "1" + slaveHostname := "hn1" + slaveStorage.checkAndAdd(slaveId, slaveHostname) + assert.Equal(1, len(slaveStorage.getSlaveIds())) + + slaveId = "2" + slaveHostname = "hn2" + slaveStorage.checkAndAdd(slaveId, slaveHostname) + assert.Equal(2, len(slaveStorage.getSlaveIds())) + + slaveIds := slaveStorage.getSlaveIds() + + slaveIdsMap := make(map[string]bool, len(slaveIds)) + for _, s := range slaveIds { + slaveIdsMap[s] = true + } + + _, ok := slaveIdsMap["1"] + assert.Equal(ok, true) + + _, ok = slaveIdsMap["2"] + assert.Equal(ok, true) + +} + +//get number of non-expired offers from offer registry +func getNumberOffers(os offers.Registry) int { + //walk offers and check it is stored in registry + walked := 0 + walker1 := func(p offers.Perishable) (bool, error) { + walked++ + return false, nil + + } + os.Walk(walker1) + return walked +} + +//test adding of ressource offer, should be added to offer registry and slavesf +func TestResourceOffer_Add(t *testing.T) { + assert := assert.New(t) + + testScheduler := &KubernetesScheduler{ + offers: offers.CreateRegistry(offers.RegistryConfig{ + Compat: func(o *mesos.Offer) bool { + return true + }, + DeclineOffer: func(offerId string) <-chan error { + return proc.ErrorChan(nil) + }, + // remember expired offers so that we can tell if a previously scheduler offer relies on one + LingerTTL: schedcfg.DefaultOfferLingerTTL, + TTL: schedcfg.DefaultOfferTTL, + ListenerDelay: schedcfg.DefaultListenerDelay, + }), + slaves: newSlaveStorage(), + } + + hostname := "h1" + offerID1 := util.NewOfferID("test1") + offer1 := &mesos.Offer{Id: offerID1, Hostname: &hostname, SlaveId: util.NewSlaveID(hostname)} + offers1 := []*mesos.Offer{offer1} + testScheduler.ResourceOffers(nil, offers1) + + assert.Equal(1, getNumberOffers(testScheduler.offers)) + //check slave hostname + assert.Equal(1, len(testScheduler.slaves.getSlaveIds())) + + //add another offer + hostname2 := "h2" + offer2 := &mesos.Offer{Id: util.NewOfferID("test2"), Hostname: &hostname2, SlaveId: util.NewSlaveID(hostname2)} + offers2 := []*mesos.Offer{offer2} + testScheduler.ResourceOffers(nil, offers2) + + //check it is stored in registry + assert.Equal(2, getNumberOffers(testScheduler.offers)) + + //check slave hostnames + assert.Equal(2, len(testScheduler.slaves.getSlaveIds())) +} + +//test adding of ressource offer, should be added to offer registry and slavesf +func TestResourceOffer_Add_Rescind(t *testing.T) { + assert := assert.New(t) + + testScheduler := &KubernetesScheduler{ + offers: offers.CreateRegistry(offers.RegistryConfig{ + Compat: func(o *mesos.Offer) bool { + return true + }, + DeclineOffer: func(offerId string) <-chan error { + return proc.ErrorChan(nil) + }, + // remember expired offers so that we can tell if a previously scheduler offer relies on one + LingerTTL: schedcfg.DefaultOfferLingerTTL, + TTL: schedcfg.DefaultOfferTTL, + ListenerDelay: schedcfg.DefaultListenerDelay, + }), + slaves: newSlaveStorage(), + } + + hostname := "h1" + offerID1 := util.NewOfferID("test1") + offer1 := &mesos.Offer{Id: offerID1, Hostname: &hostname, SlaveId: util.NewSlaveID(hostname)} + offers1 := []*mesos.Offer{offer1} + testScheduler.ResourceOffers(nil, offers1) + + assert.Equal(1, getNumberOffers(testScheduler.offers)) + + //check slave hostname + assert.Equal(1, len(testScheduler.slaves.getSlaveIds())) + + //add another offer + hostname2 := "h2" + offer2 := &mesos.Offer{Id: util.NewOfferID("test2"), Hostname: &hostname2, SlaveId: util.NewSlaveID(hostname2)} + offers2 := []*mesos.Offer{offer2} + testScheduler.ResourceOffers(nil, offers2) + + assert.Equal(2, getNumberOffers(testScheduler.offers)) + + //check slave hostnames + assert.Equal(2, len(testScheduler.slaves.getSlaveIds())) + + //next whether offers can be rescinded + testScheduler.OfferRescinded(nil, offerID1) + assert.Equal(1, getNumberOffers(testScheduler.offers)) + + //next whether offers can be rescinded + testScheduler.OfferRescinded(nil, util.NewOfferID("test2")) + //walk offers again and check it is removed from registry + assert.Equal(0, getNumberOffers(testScheduler.offers)) + + //remove non existing ID + testScheduler.OfferRescinded(nil, util.NewOfferID("notExist")) +} + +//test that when a slave is lost we remove all offers +func TestSlave_Lost(t *testing.T) { + assert := assert.New(t) + + // + testScheduler := &KubernetesScheduler{ + offers: offers.CreateRegistry(offers.RegistryConfig{ + Compat: func(o *mesos.Offer) bool { + return true + }, + // remember expired offers so that we can tell if a previously scheduler offer relies on one + LingerTTL: schedcfg.DefaultOfferLingerTTL, + TTL: schedcfg.DefaultOfferTTL, + ListenerDelay: schedcfg.DefaultListenerDelay, + }), + slaves: newSlaveStorage(), + } + + hostname := "h1" + offer1 := &mesos.Offer{Id: util.NewOfferID("test1"), Hostname: &hostname, SlaveId: util.NewSlaveID(hostname)} + offers1 := []*mesos.Offer{offer1} + testScheduler.ResourceOffers(nil, offers1) + offer2 := &mesos.Offer{Id: util.NewOfferID("test2"), Hostname: &hostname, SlaveId: util.NewSlaveID(hostname)} + offers2 := []*mesos.Offer{offer2} + testScheduler.ResourceOffers(nil, offers2) + + //add another offer from different slaveID + hostname2 := "h2" + offer3 := &mesos.Offer{Id: util.NewOfferID("test3"), Hostname: &hostname2, SlaveId: util.NewSlaveID(hostname2)} + offers3 := []*mesos.Offer{offer3} + testScheduler.ResourceOffers(nil, offers3) + + //test precondition + assert.Equal(3, getNumberOffers(testScheduler.offers)) + assert.Equal(2, len(testScheduler.slaves.getSlaveIds())) + + //remove first slave + testScheduler.SlaveLost(nil, util.NewSlaveID(hostname)) + + //offers should be removed + assert.Equal(1, getNumberOffers(testScheduler.offers)) + //slave hostnames should still be all present + assert.Equal(2, len(testScheduler.slaves.getSlaveIds())) + + //remove second slave + testScheduler.SlaveLost(nil, util.NewSlaveID(hostname2)) + + //offers should be removed + assert.Equal(0, getNumberOffers(testScheduler.offers)) + //slave hostnames should still be all present + assert.Equal(2, len(testScheduler.slaves.getSlaveIds())) + + //try to remove non existing slave + testScheduler.SlaveLost(nil, util.NewSlaveID("notExist")) + +} + +//test when we loose connection to master we invalidate all cached offers +func TestDisconnect(t *testing.T) { + assert := assert.New(t) + + // + testScheduler := &KubernetesScheduler{ + offers: offers.CreateRegistry(offers.RegistryConfig{ + Compat: func(o *mesos.Offer) bool { + return true + }, + // remember expired offers so that we can tell if a previously scheduler offer relies on one + LingerTTL: schedcfg.DefaultOfferLingerTTL, + TTL: schedcfg.DefaultOfferTTL, + ListenerDelay: schedcfg.DefaultListenerDelay, + }), + slaves: newSlaveStorage(), + } + + hostname := "h1" + offer1 := &mesos.Offer{Id: util.NewOfferID("test1"), Hostname: &hostname, SlaveId: util.NewSlaveID(hostname)} + offers1 := []*mesos.Offer{offer1} + testScheduler.ResourceOffers(nil, offers1) + offer2 := &mesos.Offer{Id: util.NewOfferID("test2"), Hostname: &hostname, SlaveId: util.NewSlaveID(hostname)} + offers2 := []*mesos.Offer{offer2} + testScheduler.ResourceOffers(nil, offers2) + + //add another offer from different slaveID + hostname2 := "h2" + offer3 := &mesos.Offer{Id: util.NewOfferID("test2"), Hostname: &hostname2, SlaveId: util.NewSlaveID(hostname2)} + offers3 := []*mesos.Offer{offer3} + testScheduler.ResourceOffers(nil, offers3) + + //disconnect + testScheduler.Disconnected(nil) + + //all offers should be removed + assert.Equal(0, getNumberOffers(testScheduler.offers)) + //slave hostnames should still be all present + assert.Equal(2, len(testScheduler.slaves.getSlaveIds())) +} + +//test we can handle different status updates, TODO check state transitions +func TestStatus_Update(t *testing.T) { + + mockdriver := MockSchedulerDriver{} + // setup expectations + mockdriver.On("KillTask", util.NewTaskID("test-task-001")).Return(mesos.Status_DRIVER_RUNNING, nil) + + testScheduler := &KubernetesScheduler{ + offers: offers.CreateRegistry(offers.RegistryConfig{ + Compat: func(o *mesos.Offer) bool { + return true + }, + // remember expired offers so that we can tell if a previously scheduler offer relies on one + LingerTTL: schedcfg.DefaultOfferLingerTTL, + TTL: schedcfg.DefaultOfferTTL, + ListenerDelay: schedcfg.DefaultListenerDelay, + }), + slaves: newSlaveStorage(), + driver: &mockdriver, + taskRegistry: podtask.NewInMemoryRegistry(), + } + + taskStatus_task_starting := util.NewTaskStatus( + util.NewTaskID("test-task-001"), + mesos.TaskState_TASK_RUNNING, + ) + testScheduler.StatusUpdate(testScheduler.driver, taskStatus_task_starting) + + taskStatus_task_running := util.NewTaskStatus( + util.NewTaskID("test-task-001"), + mesos.TaskState_TASK_RUNNING, + ) + testScheduler.StatusUpdate(testScheduler.driver, taskStatus_task_running) + + taskStatus_task_failed := util.NewTaskStatus( + util.NewTaskID("test-task-001"), + mesos.TaskState_TASK_FAILED, + ) + testScheduler.StatusUpdate(testScheduler.driver, taskStatus_task_failed) + + //assert that mock was invoked + mockdriver.AssertExpectations(t) +} diff --git a/contrib/mesos/pkg/scheduler/service/compat_testing.go b/contrib/mesos/pkg/scheduler/service/compat_testing.go new file mode 100644 index 00000000000..8e7ba2bb033 --- /dev/null +++ b/contrib/mesos/pkg/scheduler/service/compat_testing.go @@ -0,0 +1,32 @@ +// +build unit_test + +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package service + +import ( + "os" + "syscall" +) + +func makeFailoverSigChan() <-chan os.Signal { + return nil +} + +func makeDisownedProcAttr() *syscall.SysProcAttr { + return nil +} diff --git a/contrib/mesos/pkg/scheduler/service/compat_unix.go b/contrib/mesos/pkg/scheduler/service/compat_unix.go new file mode 100644 index 00000000000..90d3bdeff18 --- /dev/null +++ b/contrib/mesos/pkg/scheduler/service/compat_unix.go @@ -0,0 +1,38 @@ +// +build darwin dragonfly freebsd linux netbsd openbsd +// +build !unit_test + +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package service + +import ( + "os" + "os/signal" + "syscall" +) + +func makeFailoverSigChan() <-chan os.Signal { + ch := make(chan os.Signal, 1) + signal.Notify(ch, syscall.SIGUSR1) + return ch +} + +func makeDisownedProcAttr() *syscall.SysProcAttr { + return &syscall.SysProcAttr{ + Setpgid: true, // disown the spawned scheduler + } +} diff --git a/contrib/mesos/pkg/scheduler/service/compat_windows.go b/contrib/mesos/pkg/scheduler/service/compat_windows.go new file mode 100644 index 00000000000..5ce9a5d7edd --- /dev/null +++ b/contrib/mesos/pkg/scheduler/service/compat_windows.go @@ -0,0 +1,51 @@ +// +build windows +// +build !unit_test + +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package service + +import ( + "os" + "syscall" +) + +func makeFailoverSigChan() <-chan os.Signal { + /* TODO(jdef) + from go's windows compatibility test, it looks like we need to provide a filtered + signal channel here + + c := make(chan os.Signal, 10) + signal.Notify(c) + select { + case s := <-c: + if s != os.Interrupt { + log.Fatalf("Wrong signal received: got %q, want %q\n", s, os.Interrupt) + } + case <-time.After(3 * time.Second): + log.Fatalf("Timeout waiting for Ctrl+Break\n") + } + */ + return nil +} + +func makeDisownedProcAttr() *syscall.SysProcAttr { + //TODO(jdef) test this somehow?!?! + return &syscall.SysProcAttr{ + CreationFlags: syscall.CREATE_NEW_PROCESS_GROUP | syscall.CREATE_UNICODE_ENVIRONMENT, + } +} diff --git a/contrib/mesos/pkg/scheduler/service/doc.go b/contrib/mesos/pkg/scheduler/service/doc.go new file mode 100644 index 00000000000..61ffbcecfff --- /dev/null +++ b/contrib/mesos/pkg/scheduler/service/doc.go @@ -0,0 +1,18 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package service contains the cmd/k8sm-scheduler glue code +package service diff --git a/contrib/mesos/pkg/scheduler/service/publish.go b/contrib/mesos/pkg/scheduler/service/publish.go new file mode 100644 index 00000000000..2a356e5af3e --- /dev/null +++ b/contrib/mesos/pkg/scheduler/service/publish.go @@ -0,0 +1,121 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package service + +import ( + "net" + "reflect" + "time" + + "github.com/GoogleCloudPlatform/kubernetes/pkg/api" + "github.com/GoogleCloudPlatform/kubernetes/pkg/api/errors" + "github.com/GoogleCloudPlatform/kubernetes/pkg/master/ports" + + "github.com/golang/glog" +) + +const ( + SCHEDULER_SERVICE_NAME = "k8sm-scheduler" +) + +func (m *SchedulerServer) newServiceWriter(stop <-chan struct{}) func() { + return func() { + for { + // Update service & endpoint records. + // TODO(k8s): when it becomes possible to change this stuff, + // stop polling and start watching. + if err := m.createSchedulerServiceIfNeeded(SCHEDULER_SERVICE_NAME, ports.SchedulerPort); err != nil { + glog.Errorf("Can't create scheduler service: %v", err) + } + + if err := m.setEndpoints(SCHEDULER_SERVICE_NAME, net.IP(m.Address), m.Port); err != nil { + glog.Errorf("Can't create scheduler endpoints: %v", err) + } + + select { + case <-stop: + return + case <-time.After(10 * time.Second): + } + } + } +} + +// createSchedulerServiceIfNeeded will create the specified service if it +// doesn't already exist. +func (m *SchedulerServer) createSchedulerServiceIfNeeded(serviceName string, servicePort int) error { + ctx := api.NewDefaultContext() + if _, err := m.client.Services(api.NamespaceValue(ctx)).Get(serviceName); err == nil { + // The service already exists. + return nil + } + svc := &api.Service{ + ObjectMeta: api.ObjectMeta{ + Name: serviceName, + Namespace: api.NamespaceDefault, + Labels: map[string]string{"provider": "k8sm", "component": "scheduler"}, + }, + Spec: api.ServiceSpec{ + Ports: []api.ServicePort{{Port: servicePort, Protocol: api.ProtocolTCP}}, + // maintained by this code, not by the pod selector + Selector: nil, + SessionAffinity: api.ServiceAffinityNone, + }, + } + if m.ServiceAddress != nil { + svc.Spec.ClusterIP = m.ServiceAddress.String() + } + _, err := m.client.Services(api.NamespaceValue(ctx)).Create(svc) + if err != nil && errors.IsAlreadyExists(err) { + err = nil + } + return err +} + +// setEndpoints sets the endpoints for the given service. +// in a multi-master scenario only the master will be publishing an endpoint. +// see SchedulerServer.bootstrap. +func (m *SchedulerServer) setEndpoints(serviceName string, ip net.IP, port int) error { + // The setting we want to find. + want := []api.EndpointSubset{{ + Addresses: []api.EndpointAddress{{IP: ip.String()}}, + Ports: []api.EndpointPort{{Port: port, Protocol: api.ProtocolTCP}}, + }} + + ctx := api.NewDefaultContext() + e, err := m.client.Endpoints(api.NamespaceValue(ctx)).Get(serviceName) + createOrUpdate := m.client.Endpoints(api.NamespaceValue(ctx)).Update + if err != nil { + if errors.IsNotFound(err) { + createOrUpdate = m.client.Endpoints(api.NamespaceValue(ctx)).Create + } + e = &api.Endpoints{ + ObjectMeta: api.ObjectMeta{ + Name: serviceName, + Namespace: api.NamespaceDefault, + }, + } + } + if !reflect.DeepEqual(e.Subsets, want) { + e.Subsets = want + glog.Infof("setting endpoints for master service %q to %#v", serviceName, e) + _, err = createOrUpdate(e) + return err + } + // We didn't make any changes, no need to actually call update. + return nil +} diff --git a/contrib/mesos/pkg/scheduler/service/service.go b/contrib/mesos/pkg/scheduler/service/service.go new file mode 100644 index 00000000000..8acc7517be8 --- /dev/null +++ b/contrib/mesos/pkg/scheduler/service/service.go @@ -0,0 +1,751 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package service + +import ( + "bufio" + "errors" + "fmt" + "io/ioutil" + "net" + "net/http" + "os" + "os/exec" + "os/user" + "strconv" + "strings" + "sync" + "time" + + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/election" + execcfg "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/executor/config" + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/hyperkube" + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/profile" + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/runtime" + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler" + schedcfg "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/config" + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/ha" + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/meta" + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/metrics" + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/uid" + "github.com/GoogleCloudPlatform/kubernetes/pkg/client" + "github.com/GoogleCloudPlatform/kubernetes/pkg/clientauth" + "github.com/GoogleCloudPlatform/kubernetes/pkg/master/ports" + "github.com/GoogleCloudPlatform/kubernetes/pkg/tools" + "github.com/GoogleCloudPlatform/kubernetes/pkg/util" + "github.com/coreos/go-etcd/etcd" + "github.com/gogo/protobuf/proto" + log "github.com/golang/glog" + "github.com/kardianos/osext" + "github.com/mesos/mesos-go/auth" + "github.com/mesos/mesos-go/auth/sasl" + "github.com/mesos/mesos-go/auth/sasl/mech" + mesos "github.com/mesos/mesos-go/mesosproto" + mutil "github.com/mesos/mesos-go/mesosutil" + bindings "github.com/mesos/mesos-go/scheduler" + "github.com/prometheus/client_golang/prometheus" + "github.com/spf13/pflag" + "golang.org/x/net/context" +) + +const ( + defaultMesosMaster = "localhost:5050" + defaultMesosUser = "root" // should have privs to execute docker and iptables commands + defaultReconcileInterval = 300 // 5m default task reconciliation interval + defaultReconcileCooldown = 15 * time.Second + defaultFrameworkName = "Kubernetes" +) + +type SchedulerServer struct { + Port int + Address util.IP + EnableProfiling bool + AuthPath string + APIServerList util.StringList + EtcdServerList util.StringList + EtcdConfigFile string + AllowPrivileged bool + ExecutorPath string + ProxyPath string + MesosMaster string + MesosUser string + MesosRole string + MesosAuthPrincipal string + MesosAuthSecretFile string + Checkpoint bool + FailoverTimeout float64 + ExecutorBindall bool + ExecutorRunProxy bool + ExecutorProxyBindall bool + ExecutorLogV int + ExecutorSuicideTimeout time.Duration + MesosAuthProvider string + DriverPort uint + HostnameOverride string + ReconcileInterval int64 + ReconcileCooldown time.Duration + SchedulerConfigFileName string + Graceful bool + FrameworkName string + FrameworkWebURI string + HA bool + AdvertisedAddress string + ServiceAddress util.IP + HADomain string + KMPath string + ClusterDNS util.IP + ClusterDomain string + KubeletRootDirectory string + KubeletDockerEndpoint string + KubeletPodInfraContainerImage string + KubeletCadvisorPort uint + KubeletHostNetworkSources string + KubeletSyncFrequency time.Duration + KubeletNetworkPluginName string + + executable string // path to the binary running this service + client *client.Client + driver bindings.SchedulerDriver + driverMutex sync.RWMutex + mux *http.ServeMux +} + +// useful for unit testing specific funcs +type schedulerProcessInterface interface { + End() <-chan struct{} + Failover() <-chan struct{} + Terminal() <-chan struct{} +} + +// NewSchedulerServer creates a new SchedulerServer with default parameters +func NewSchedulerServer() *SchedulerServer { + s := SchedulerServer{ + Port: ports.SchedulerPort, + Address: util.IP(net.ParseIP("127.0.0.1")), + FailoverTimeout: time.Duration((1 << 62) - 1).Seconds(), + ExecutorRunProxy: true, + ExecutorSuicideTimeout: execcfg.DefaultSuicideTimeout, + MesosAuthProvider: sasl.ProviderName, + MesosMaster: defaultMesosMaster, + MesosUser: defaultMesosUser, + ReconcileInterval: defaultReconcileInterval, + ReconcileCooldown: defaultReconcileCooldown, + Checkpoint: true, + FrameworkName: defaultFrameworkName, + HA: false, + mux: http.NewServeMux(), + KubeletCadvisorPort: 4194, // copied from github.com/GoogleCloudPlatform/kubernetes/blob/release-0.14/cmd/kubelet/app/server.go + KubeletSyncFrequency: 10 * time.Second, + } + // cache this for later use. also useful in case the original binary gets deleted, e.g. + // during upgrades, development deployments, etc. + if filename, err := osext.Executable(); err != nil { + log.Fatalf("failed to determine path to currently running executable: %v", err) + } else { + s.executable = filename + s.KMPath = filename + } + + return &s +} + +func (s *SchedulerServer) addCoreFlags(fs *pflag.FlagSet) { + fs.IntVar(&s.Port, "port", s.Port, "The port that the scheduler's http service runs on") + fs.Var(&s.Address, "address", "The IP address to serve on (set to 0.0.0.0 for all interfaces)") + fs.BoolVar(&s.EnableProfiling, "profiling", s.EnableProfiling, "Enable profiling via web interface host:port/debug/pprof/") + fs.Var(&s.APIServerList, "api-servers", "List of Kubernetes API servers for publishing events, and reading pods and services. (ip:port), comma separated.") + fs.StringVar(&s.AuthPath, "auth-path", s.AuthPath, "Path to .kubernetes_auth file, specifying how to authenticate to API server.") + fs.Var(&s.EtcdServerList, "etcd-servers", "List of etcd servers to watch (http://ip:port), comma separated. Mutually exclusive with --etcd-config") + fs.StringVar(&s.EtcdConfigFile, "etcd-config", s.EtcdConfigFile, "The config file for the etcd client. Mutually exclusive with --etcd-servers.") + fs.BoolVar(&s.AllowPrivileged, "allow-privileged", s.AllowPrivileged, "If true, allow privileged containers.") + fs.StringVar(&s.ClusterDomain, "cluster-domain", s.ClusterDomain, "Domain for this cluster. If set, kubelet will configure all containers to search this domain in addition to the host's search domains") + fs.Var(&s.ClusterDNS, "cluster-dns", "IP address for a cluster DNS server. If set, kubelet will configure all containers to use this for DNS resolution in addition to the host's DNS servers") + + fs.StringVar(&s.MesosMaster, "mesos-master", s.MesosMaster, "Location of the Mesos master. The format is a comma-delimited list of of hosts like zk://host1:port,host2:port/mesos. If using ZooKeeper, pay particular attention to the leading zk:// and trailing /mesos! If not using ZooKeeper, standard URLs like http://localhost are also acceptable.") + fs.StringVar(&s.MesosUser, "mesos-user", s.MesosUser, "Mesos user for this framework, defaults to root.") + fs.StringVar(&s.MesosRole, "mesos-role", s.MesosRole, "Mesos role for this framework, defaults to none.") + fs.StringVar(&s.MesosAuthPrincipal, "mesos-authentication-principal", s.MesosAuthPrincipal, "Mesos authentication principal.") + fs.StringVar(&s.MesosAuthSecretFile, "mesos-authentication-secret-file", s.MesosAuthSecretFile, "Mesos authentication secret file.") + fs.StringVar(&s.MesosAuthProvider, "mesos-authentication-provider", s.MesosAuthProvider, fmt.Sprintf("Authentication provider to use, default is SASL that supports mechanisms: %+v", mech.ListSupported())) + fs.BoolVar(&s.Checkpoint, "checkpoint", s.Checkpoint, "Enable/disable checkpointing for the kubernetes-mesos framework.") + fs.Float64Var(&s.FailoverTimeout, "failover-timeout", s.FailoverTimeout, fmt.Sprintf("Framework failover timeout, in sec.")) + fs.UintVar(&s.DriverPort, "driver-port", s.DriverPort, "Port that the Mesos scheduler driver process should listen on.") + fs.StringVar(&s.HostnameOverride, "hostname-override", s.HostnameOverride, "If non-empty, will use this string as identification instead of the actual hostname.") + fs.Int64Var(&s.ReconcileInterval, "reconcile-interval", s.ReconcileInterval, "Interval at which to execute task reconciliation, in sec. Zero disables.") + fs.DurationVar(&s.ReconcileCooldown, "reconcile-cooldown", s.ReconcileCooldown, "Minimum rest period between task reconciliation operations.") + fs.StringVar(&s.SchedulerConfigFileName, "scheduler-config", s.SchedulerConfigFileName, "An ini-style configuration file with low-level scheduler settings.") + fs.BoolVar(&s.Graceful, "graceful", s.Graceful, "Indicator of a graceful failover, intended for internal use only.") + fs.BoolVar(&s.HA, "ha", s.HA, "Run the scheduler in high availability mode with leader election. All peers should be configured exactly the same.") + fs.StringVar(&s.FrameworkName, "framework-name", s.FrameworkName, "The framework name to register with Mesos.") + fs.StringVar(&s.FrameworkWebURI, "framework-weburi", s.FrameworkWebURI, "A URI that points to a web-based interface for interacting with the framework.") + fs.StringVar(&s.AdvertisedAddress, "advertised-address", s.AdvertisedAddress, "host:port address that is advertised to clients. May be used to construct artifact download URIs.") + fs.Var(&s.ServiceAddress, "service-address", "The service portal IP address that the scheduler should register with (if unset, chooses randomly)") + + fs.BoolVar(&s.ExecutorBindall, "executor-bindall", s.ExecutorBindall, "When true will set -address of the executor to 0.0.0.0.") + fs.IntVar(&s.ExecutorLogV, "executor-logv", s.ExecutorLogV, "Logging verbosity of spawned executor processes.") + fs.BoolVar(&s.ExecutorProxyBindall, "executor-proxy-bindall", s.ExecutorProxyBindall, "When true pass -proxy-bindall to the executor.") + fs.BoolVar(&s.ExecutorRunProxy, "executor-run-proxy", s.ExecutorRunProxy, "Run the kube-proxy as a child process of the executor.") + fs.DurationVar(&s.ExecutorSuicideTimeout, "executor-suicide-timeout", s.ExecutorSuicideTimeout, "Executor self-terminates after this period of inactivity. Zero disables suicide watch.") + + fs.StringVar(&s.KubeletRootDirectory, "kubelet-root-dir", s.KubeletRootDirectory, "Directory path for managing kubelet files (volume mounts,etc). Defaults to executor sandbox.") + fs.StringVar(&s.KubeletDockerEndpoint, "kubelet-docker-endpoint", s.KubeletDockerEndpoint, "If non-empty, kubelet will use this for the docker endpoint to communicate with.") + fs.StringVar(&s.KubeletPodInfraContainerImage, "kubelet-pod-infra-container-image", s.KubeletPodInfraContainerImage, "The image whose network/ipc namespaces containers in each pod will use.") + fs.UintVar(&s.KubeletCadvisorPort, "kubelet-cadvisor-port", s.KubeletCadvisorPort, "The port of the kubelet's local cAdvisor endpoint") + fs.StringVar(&s.KubeletHostNetworkSources, "kubelet-host-network-sources", s.KubeletHostNetworkSources, "Comma-separated list of sources from which the Kubelet allows pods to use of host network. For all sources use \"*\" [default=\"file\"]") + fs.DurationVar(&s.KubeletSyncFrequency, "kubelet-sync-frequency", s.KubeletSyncFrequency, "Max period between synchronizing running containers and config") + fs.StringVar(&s.KubeletNetworkPluginName, "kubelet-network-plugin", s.KubeletNetworkPluginName, " The name of the network plugin to be invoked for various events in kubelet/pod lifecycle") + + //TODO(jdef) support this flag once we have a better handle on mesos-dns and k8s DNS integration + //fs.StringVar(&s.HADomain, "ha-domain", s.HADomain, "Domain of the HA scheduler service, only used in HA mode. If specified may be used to construct artifact download URIs.") +} + +func (s *SchedulerServer) AddStandaloneFlags(fs *pflag.FlagSet) { + s.addCoreFlags(fs) + fs.StringVar(&s.ExecutorPath, "executor-path", s.ExecutorPath, "Location of the kubernetes executor executable") + fs.StringVar(&s.ProxyPath, "proxy-path", s.ProxyPath, "Location of the kubernetes proxy executable") +} + +func (s *SchedulerServer) AddHyperkubeFlags(fs *pflag.FlagSet) { + s.addCoreFlags(fs) + fs.StringVar(&s.KMPath, "km-path", s.KMPath, "Location of the km executable, may be a URI or an absolute file path.") +} + +// returns (downloadURI, basename(path)) +func (s *SchedulerServer) serveFrameworkArtifact(path string) (string, string) { + serveFile := func(pattern string, filename string) { + s.mux.HandleFunc(pattern, func(w http.ResponseWriter, r *http.Request) { + http.ServeFile(w, r, filename) + }) + } + + // Create base path (http://foobar:5000/) + pathSplit := strings.Split(path, "/") + var base string + if len(pathSplit) > 0 { + base = pathSplit[len(pathSplit)-1] + } else { + base = path + } + serveFile("/"+base, path) + + hostURI := "" + if s.AdvertisedAddress != "" { + hostURI = fmt.Sprintf("http://%s/%s", s.AdvertisedAddress, base) + } else if s.HA && s.HADomain != "" { + hostURI = fmt.Sprintf("http://%s.%s:%d/%s", SCHEDULER_SERVICE_NAME, s.HADomain, ports.SchedulerPort, base) + } else { + hostURI = fmt.Sprintf("http://%s:%d/%s", s.Address.String(), s.Port, base) + } + log.V(2).Infof("Hosting artifact '%s' at '%s'", path, hostURI) + + return hostURI, base +} + +func (s *SchedulerServer) prepareExecutorInfo(hks hyperkube.Interface) (*mesos.ExecutorInfo, *uid.UID, error) { + ci := &mesos.CommandInfo{ + Shell: proto.Bool(false), + } + + //TODO(jdef) these should be shared constants with km + const ( + KM_EXECUTOR = "executor" + KM_PROXY = "proxy" + ) + + if s.ExecutorPath != "" { + uri, executorCmd := s.serveFrameworkArtifact(s.ExecutorPath) + ci.Uris = append(ci.Uris, &mesos.CommandInfo_URI{Value: proto.String(uri), Executable: proto.Bool(true)}) + ci.Value = proto.String(fmt.Sprintf("./%s", executorCmd)) + } else if !hks.FindServer(KM_EXECUTOR) { + return nil, nil, fmt.Errorf("either run this scheduler via km or else --executor-path is required") + } else { + if strings.Index(s.KMPath, "://") > 0 { + // URI could point directly to executable, e.g. hdfs:///km + // or else indirectly, e.g. http://acmestorage/tarball.tgz + // so we assume that for this case the command will always "km" + ci.Uris = append(ci.Uris, &mesos.CommandInfo_URI{Value: proto.String(s.KMPath), Executable: proto.Bool(true)}) + ci.Value = proto.String("./km") // TODO(jdef) extract constant + } else if s.KMPath != "" { + uri, kmCmd := s.serveFrameworkArtifact(s.KMPath) + ci.Uris = append(ci.Uris, &mesos.CommandInfo_URI{Value: proto.String(uri), Executable: proto.Bool(true)}) + ci.Value = proto.String(fmt.Sprintf("./%s", kmCmd)) + } else { + uri, kmCmd := s.serveFrameworkArtifact(s.executable) + ci.Uris = append(ci.Uris, &mesos.CommandInfo_URI{Value: proto.String(uri), Executable: proto.Bool(true)}) + ci.Value = proto.String(fmt.Sprintf("./%s", kmCmd)) + } + ci.Arguments = append(ci.Arguments, KM_EXECUTOR) + } + + if s.ProxyPath != "" { + uri, proxyCmd := s.serveFrameworkArtifact(s.ProxyPath) + ci.Uris = append(ci.Uris, &mesos.CommandInfo_URI{Value: proto.String(uri), Executable: proto.Bool(true)}) + ci.Arguments = append(ci.Arguments, fmt.Sprintf("--proxy-exec=./%s", proxyCmd)) + } else if !hks.FindServer(KM_PROXY) { + return nil, nil, fmt.Errorf("either run this scheduler via km or else --proxy-path is required") + } else if s.ExecutorPath != "" { + return nil, nil, fmt.Errorf("proxy can only use km binary if executor does the same") + } // else, executor is smart enough to know when proxy-path is required, or to use km + + //TODO(jdef): provide some way (env var?) for users to customize executor config + //TODO(jdef): set -address to 127.0.0.1 if `address` is 127.0.0.1 + //TODO(jdef): propagate dockercfg from RootDirectory? + + apiServerArgs := strings.Join(s.APIServerList, ",") + ci.Arguments = append(ci.Arguments, fmt.Sprintf("--api-servers=%s", apiServerArgs)) + ci.Arguments = append(ci.Arguments, fmt.Sprintf("--v=%d", s.ExecutorLogV)) + ci.Arguments = append(ci.Arguments, fmt.Sprintf("--allow-privileged=%t", s.AllowPrivileged)) + ci.Arguments = append(ci.Arguments, fmt.Sprintf("--suicide-timeout=%v", s.ExecutorSuicideTimeout)) + + if s.ExecutorBindall { + //TODO(jdef) determine whether hostname-override is really needed for bindall because + //it conflicts with kubelet node status checks/updates + //ci.Arguments = append(ci.Arguments, "--hostname-override=0.0.0.0") + ci.Arguments = append(ci.Arguments, "--address=0.0.0.0") + } + + ci.Arguments = append(ci.Arguments, fmt.Sprintf("--proxy-bindall=%v", s.ExecutorProxyBindall)) + ci.Arguments = append(ci.Arguments, fmt.Sprintf("--run-proxy=%v", s.ExecutorRunProxy)) + ci.Arguments = append(ci.Arguments, fmt.Sprintf("--cadvisor-port=%v", s.KubeletCadvisorPort)) + ci.Arguments = append(ci.Arguments, fmt.Sprintf("--sync-frequency=%v", s.KubeletSyncFrequency)) + + if s.AuthPath != "" { + //TODO(jdef) should probably support non-local files, e.g. hdfs:///some/config/file + uri, basename := s.serveFrameworkArtifact(s.AuthPath) + ci.Uris = append(ci.Uris, &mesos.CommandInfo_URI{Value: proto.String(uri)}) + ci.Arguments = append(ci.Arguments, fmt.Sprintf("--auth-path=%s", basename)) + } + appendOptional := func(name string, value string) { + if value != "" { + ci.Arguments = append(ci.Arguments, fmt.Sprintf("--%s=%s", name, value)) + } + } + if s.ClusterDNS != nil { + appendOptional("cluster-dns", s.ClusterDNS.String()) + } + appendOptional("cluster-domain", s.ClusterDomain) + appendOptional("root-dir", s.KubeletRootDirectory) + appendOptional("docker-endpoint", s.KubeletDockerEndpoint) + appendOptional("pod-infra-container-image", s.KubeletPodInfraContainerImage) + appendOptional("host-network-sources", s.KubeletHostNetworkSources) + appendOptional("network-plugin", s.KubeletNetworkPluginName) + + log.V(1).Infof("prepared executor command %q with args '%+v'", ci.GetValue(), ci.Arguments) + + // Create mesos scheduler driver. + info := &mesos.ExecutorInfo{ + Command: ci, + Name: proto.String(execcfg.DefaultInfoName), + Source: proto.String(execcfg.DefaultInfoSource), + } + + // calculate ExecutorInfo hash to be used for validating compatibility + // of ExecutorInfo's generated by other HA schedulers. + ehash := hashExecutorInfo(info) + eid := uid.New(ehash, execcfg.DefaultInfoID) + info.ExecutorId = &mesos.ExecutorID{Value: proto.String(eid.String())} + + return info, eid, nil +} + +// TODO(jdef): hacked from kubelet/server/server.go +// TODO(k8s): replace this with clientcmd +func (s *SchedulerServer) createAPIServerClient() (*client.Client, error) { + authInfo, err := clientauth.LoadFromFile(s.AuthPath) + if err != nil { + log.Warningf("Could not load kubernetes auth path: %v. Continuing with defaults.", err) + } + if authInfo == nil { + // authInfo didn't load correctly - continue with defaults. + authInfo = &clientauth.Info{} + } + clientConfig, err := authInfo.MergeWithConfig(client.Config{}) + if err != nil { + return nil, err + } + if len(s.APIServerList) < 1 { + return nil, fmt.Errorf("no api servers specified") + } + // TODO: adapt Kube client to support LB over several servers + if len(s.APIServerList) > 1 { + log.Infof("Multiple api servers specified. Picking first one") + } + clientConfig.Host = s.APIServerList[0] + c, err := client.New(&clientConfig) + if err != nil { + return nil, err + } + return c, nil +} + +func (s *SchedulerServer) setDriver(driver bindings.SchedulerDriver) { + s.driverMutex.Lock() + defer s.driverMutex.Unlock() + s.driver = driver +} + +func (s *SchedulerServer) getDriver() (driver bindings.SchedulerDriver) { + s.driverMutex.RLock() + defer s.driverMutex.RUnlock() + return s.driver +} + +func (s *SchedulerServer) Run(hks hyperkube.Interface, _ []string) error { + // get scheduler low-level config + sc := schedcfg.CreateDefaultConfig() + if s.SchedulerConfigFileName != "" { + f, err := os.Open(s.SchedulerConfigFileName) + if err != nil { + log.Fatalf("Cannot open scheduler config file: %v", err) + } + + err = sc.Read(bufio.NewReader(f)) + if err != nil { + log.Fatalf("Invalid scheduler config file: %v", err) + } + } + + schedulerProcess, driverFactory, etcdClient, eid := s.bootstrap(hks, sc) + + if s.EnableProfiling { + profile.InstallHandler(s.mux) + } + go runtime.Until(func() { + log.V(1).Info("Starting HTTP interface") + log.Error(http.ListenAndServe(net.JoinHostPort(s.Address.String(), strconv.Itoa(s.Port)), s.mux)) + }, sc.HttpBindInterval.Duration, schedulerProcess.Terminal()) + + if s.HA { + validation := ha.ValidationFunc(validateLeadershipTransition) + srv := ha.NewCandidate(schedulerProcess, driverFactory, validation) + path := fmt.Sprintf(meta.DefaultElectionFormat, s.FrameworkName) + sid := uid.New(eid.Group(), "").String() + log.Infof("registering for election at %v with id %v", path, sid) + go election.Notify(election.NewEtcdMasterElector(etcdClient), path, sid, srv, nil) + } else { + log.Infoln("self-electing in non-HA mode") + schedulerProcess.Elect(driverFactory) + } + return s.awaitFailover(schedulerProcess, func() error { return s.failover(s.getDriver(), hks) }) +} + +// watch the scheduler process for failover signals and properly handle such. may never return. +func (s *SchedulerServer) awaitFailover(schedulerProcess schedulerProcessInterface, handler func() error) error { + + // we only want to return the first error (if any), everyone else can block forever + errCh := make(chan error, 1) + doFailover := func() error { + // we really don't expect handler to return, if it does something went seriously wrong + err := handler() + if err != nil { + defer schedulerProcess.End() + err = fmt.Errorf("failover failed, scheduler will terminate: %v", err) + } + return err + } + + // guard for failover signal processing, first signal processor wins + failoverLatch := &runtime.Latch{} + runtime.On(schedulerProcess.Terminal(), func() { + if !failoverLatch.Acquire() { + log.V(1).Infof("scheduler process ending, already failing over") + select {} + } + var err error + defer func() { errCh <- err }() + select { + case <-schedulerProcess.Failover(): + err = doFailover() + default: + if s.HA { + err = fmt.Errorf("ha scheduler exiting instead of failing over") + } else { + log.Infof("exiting scheduler") + } + } + }) + runtime.OnOSSignal(makeFailoverSigChan(), func(_ os.Signal) { + if !failoverLatch.Acquire() { + log.V(1).Infof("scheduler process signalled, already failing over") + select {} + } + errCh <- doFailover() + }) + return <-errCh +} + +func validateLeadershipTransition(desired, current string) { + log.Infof("validating leadership transition") + d := uid.Parse(desired).Group() + c := uid.Parse(current).Group() + if d == 0 { + // should *never* happen, but.. + log.Fatalf("illegal scheduler UID: %q", desired) + } + if d != c && c != 0 { + log.Fatalf("desired scheduler group (%x) != current scheduler group (%x)", d, c) + } +} + +// hacked from https://github.com/GoogleCloudPlatform/kubernetes/blob/release-0.14/cmd/kube-apiserver/app/server.go +func newEtcd(etcdConfigFile string, etcdServerList util.StringList) (client tools.EtcdGetSet, err error) { + if etcdConfigFile != "" { + client, err = etcd.NewClientFromFile(etcdConfigFile) + } else { + client = etcd.NewClient(etcdServerList) + } + return +} + +func (s *SchedulerServer) bootstrap(hks hyperkube.Interface, sc *schedcfg.Config) (*ha.SchedulerProcess, ha.DriverFactory, tools.EtcdGetSet, *uid.UID) { + + s.FrameworkName = strings.TrimSpace(s.FrameworkName) + if s.FrameworkName == "" { + log.Fatalf("framework-name must be a non-empty string") + } + s.FrameworkWebURI = strings.TrimSpace(s.FrameworkWebURI) + + metrics.Register() + runtime.Register() + s.mux.Handle("/metrics", prometheus.Handler()) + + if (s.EtcdConfigFile != "" && len(s.EtcdServerList) != 0) || (s.EtcdConfigFile == "" && len(s.EtcdServerList) == 0) { + log.Fatalf("specify either --etcd-servers or --etcd-config") + } + + if len(s.APIServerList) < 1 { + log.Fatal("No api servers specified.") + } + + client, err := s.createAPIServerClient() + if err != nil { + log.Fatalf("Unable to make apiserver client: %v", err) + } + s.client = client + + if s.ReconcileCooldown < defaultReconcileCooldown { + s.ReconcileCooldown = defaultReconcileCooldown + log.Warningf("user-specified reconcile cooldown too small, defaulting to %v", s.ReconcileCooldown) + } + + executor, eid, err := s.prepareExecutorInfo(hks) + if err != nil { + log.Fatalf("misconfigured executor: %v", err) + } + + // TODO(jdef): remove the dependency on etcd as soon as + // (1) the generic config store is available for the FrameworkId storage + // (2) the generic master election is provided by the apiserver + // Compare docs/proposals/high-availability.md + etcdClient, err := newEtcd(s.EtcdConfigFile, s.EtcdServerList) + if err != nil { + log.Fatalf("misconfigured etcd: %v", err) + } + + mesosPodScheduler := scheduler.New(scheduler.Config{ + Schedcfg: *sc, + Executor: executor, + ScheduleFunc: scheduler.FCFSScheduleFunc, + Client: client, + EtcdClient: etcdClient, + FailoverTimeout: s.FailoverTimeout, + ReconcileInterval: s.ReconcileInterval, + ReconcileCooldown: s.ReconcileCooldown, + }) + + masterUri := s.MesosMaster + info, cred, err := s.buildFrameworkInfo() + if err != nil { + log.Fatalf("Misconfigured mesos framework: %v", err) + } + + schedulerProcess := ha.New(mesosPodScheduler) + dconfig := &bindings.DriverConfig{ + Scheduler: schedulerProcess, + Framework: info, + Master: masterUri, + Credential: cred, + BindingAddress: net.IP(s.Address), + BindingPort: uint16(s.DriverPort), + HostnameOverride: s.HostnameOverride, + WithAuthContext: func(ctx context.Context) context.Context { + ctx = auth.WithLoginProvider(ctx, s.MesosAuthProvider) + ctx = sasl.WithBindingAddress(ctx, net.IP(s.Address)) + return ctx + }, + } + + kpl := scheduler.NewPlugin(mesosPodScheduler.NewDefaultPluginConfig(schedulerProcess.Terminal(), s.mux)) + runtime.On(mesosPodScheduler.Registration(), func() { kpl.Run(schedulerProcess.Terminal()) }) + runtime.On(mesosPodScheduler.Registration(), s.newServiceWriter(schedulerProcess.Terminal())) + + driverFactory := ha.DriverFactory(func() (drv bindings.SchedulerDriver, err error) { + log.V(1).Infoln("performing deferred initialization") + if err = mesosPodScheduler.Init(schedulerProcess.Master(), kpl, s.mux); err != nil { + return nil, fmt.Errorf("failed to initialize pod scheduler: %v", err) + } + log.V(1).Infoln("deferred init complete") + // defer obtaining framework ID to prevent multiple schedulers + // from overwriting each other's framework IDs + dconfig.Framework.Id, err = s.fetchFrameworkID(etcdClient) + if err != nil { + return nil, fmt.Errorf("failed to fetch framework ID from etcd: %v", err) + } + log.V(1).Infoln("constructing mesos scheduler driver") + drv, err = bindings.NewMesosSchedulerDriver(*dconfig) + if err != nil { + return nil, fmt.Errorf("failed to construct scheduler driver: %v", err) + } + log.V(1).Infoln("constructed mesos scheduler driver:", drv) + s.setDriver(drv) + return drv, nil + }) + + return schedulerProcess, driverFactory, etcdClient, eid +} + +func (s *SchedulerServer) failover(driver bindings.SchedulerDriver, hks hyperkube.Interface) error { + if driver != nil { + stat, err := driver.Stop(true) + if stat != mesos.Status_DRIVER_STOPPED { + return fmt.Errorf("failed to stop driver for failover, received unexpected status code: %v", stat) + } else if err != nil { + return err + } + } + + // there's no guarantee that all goroutines are actually programmed intelligently with 'done' + // signals, so we'll need to restart if we want to really stop everything + + // run the same command that we were launched with + //TODO(jdef) assumption here is that the sheduler is the only service running in this process, we should probably validate that somehow + args := []string{} + flags := pflag.CommandLine + if hks != nil { + args = append(args, hks.Name()) + flags = hks.Flags() + } + flags.Visit(func(flag *pflag.Flag) { + if flag.Name != "api-servers" && flag.Name != "etcd-servers" { + args = append(args, fmt.Sprintf("--%s=%s", flag.Name, flag.Value.String())) + } + }) + if !s.Graceful { + args = append(args, "--graceful") + } + if len(s.APIServerList) > 0 { + args = append(args, "--api-servers="+strings.Join(s.APIServerList, ",")) + } + if len(s.EtcdServerList) > 0 { + args = append(args, "--etcd-servers="+strings.Join(s.EtcdServerList, ",")) + } + args = append(args, flags.Args()...) + + log.V(1).Infof("spawning scheduler for graceful failover: %s %+v", s.executable, args) + + cmd := exec.Command(s.executable, args...) + cmd.Stdin = os.Stdin + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + cmd.SysProcAttr = makeDisownedProcAttr() + + // TODO(jdef) pass in a pipe FD so that we can block, waiting for the child proc to be ready + //cmd.ExtraFiles = []*os.File{} + + exitcode := 0 + log.Flush() // TODO(jdef) it would be really nice to ensure that no one else in our process was still logging + if err := cmd.Start(); err != nil { + //log to stdtout here to avoid conflicts with normal stderr logging + fmt.Fprintf(os.Stdout, "failed to spawn failover process: %v\n", err) + os.Exit(1) + } + os.Exit(exitcode) + select {} // will never reach here +} + +func (s *SchedulerServer) buildFrameworkInfo() (info *mesos.FrameworkInfo, cred *mesos.Credential, err error) { + username, err := s.getUsername() + if err != nil { + return nil, nil, err + } + log.V(2).Infof("Framework configured with mesos user %v", username) + info = &mesos.FrameworkInfo{ + Name: proto.String(s.FrameworkName), + User: proto.String(username), + Checkpoint: proto.Bool(s.Checkpoint), + } + if s.FrameworkWebURI != "" { + info.WebuiUrl = proto.String(s.FrameworkWebURI) + } + if s.FailoverTimeout > 0 { + info.FailoverTimeout = proto.Float64(s.FailoverTimeout) + } + if s.MesosRole != "" { + info.Role = proto.String(s.MesosRole) + } + if s.MesosAuthPrincipal != "" { + info.Principal = proto.String(s.MesosAuthPrincipal) + if s.MesosAuthSecretFile == "" { + return nil, nil, errors.New("authentication principal specified without the required credentials file") + } + secret, err := ioutil.ReadFile(s.MesosAuthSecretFile) + if err != nil { + return nil, nil, err + } + cred = &mesos.Credential{ + Principal: proto.String(s.MesosAuthPrincipal), + Secret: secret, + } + } + return +} + +func (s *SchedulerServer) fetchFrameworkID(client tools.EtcdGetSet) (*mesos.FrameworkID, error) { + if s.FailoverTimeout > 0 { + if response, err := client.Get(meta.FrameworkIDKey, false, false); err != nil { + if !tools.IsEtcdNotFound(err) { + return nil, fmt.Errorf("unexpected failure attempting to load framework ID from etcd: %v", err) + } + log.V(1).Infof("did not find framework ID in etcd") + } else if response.Node.Value != "" { + log.Infof("configuring FrameworkInfo with Id found in etcd: '%s'", response.Node.Value) + return mutil.NewFrameworkID(response.Node.Value), nil + } + } else { + //TODO(jdef) this seems like a totally hackish way to clean up the framework ID + if _, err := client.Delete(meta.FrameworkIDKey, true); err != nil { + if !tools.IsEtcdNotFound(err) { + return nil, fmt.Errorf("failed to delete framework ID from etcd: %v", err) + } + log.V(1).Infof("nothing to delete: did not find framework ID in etcd") + } + } + return nil, nil +} + +func (s *SchedulerServer) getUsername() (username string, err error) { + username = s.MesosUser + if username == "" { + if u, err := user.Current(); err == nil { + username = u.Username + if username == "" { + username = defaultMesosUser + } + } + } + return +} diff --git a/contrib/mesos/pkg/scheduler/service/service_test.go b/contrib/mesos/pkg/scheduler/service/service_test.go new file mode 100644 index 00000000000..5db9c6726c1 --- /dev/null +++ b/contrib/mesos/pkg/scheduler/service/service_test.go @@ -0,0 +1,108 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// +build unit_test + +package service + +import ( + "testing" + "time" +) + +type fakeSchedulerProcess struct { + doneFunc func() <-chan struct{} + failoverFunc func() <-chan struct{} +} + +func (self *fakeSchedulerProcess) Terminal() <-chan struct{} { + if self == nil || self.doneFunc == nil { + return nil + } + return self.doneFunc() +} + +func (self *fakeSchedulerProcess) Failover() <-chan struct{} { + if self == nil || self.failoverFunc == nil { + return nil + } + return self.failoverFunc() +} + +func (self *fakeSchedulerProcess) End() <-chan struct{} { + ch := make(chan struct{}) + close(ch) + return ch +} + +func Test_awaitFailoverDone(t *testing.T) { + done := make(chan struct{}) + p := &fakeSchedulerProcess{ + doneFunc: func() <-chan struct{} { return done }, + } + ss := &SchedulerServer{} + failoverHandlerCalled := false + failoverFailedHandler := func() error { + failoverHandlerCalled = true + return nil + } + errCh := make(chan error, 1) + go func() { + errCh <- ss.awaitFailover(p, failoverFailedHandler) + }() + close(done) + select { + case err := <-errCh: + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + case <-time.After(1 * time.Second): + t.Fatalf("timed out waiting for failover") + } + if failoverHandlerCalled { + t.Fatalf("unexpected call to failover handler") + } +} + +func Test_awaitFailoverDoneFailover(t *testing.T) { + ch := make(chan struct{}) + p := &fakeSchedulerProcess{ + doneFunc: func() <-chan struct{} { return ch }, + failoverFunc: func() <-chan struct{} { return ch }, + } + ss := &SchedulerServer{} + failoverHandlerCalled := false + failoverFailedHandler := func() error { + failoverHandlerCalled = true + return nil + } + errCh := make(chan error, 1) + go func() { + errCh <- ss.awaitFailover(p, failoverFailedHandler) + }() + close(ch) + select { + case err := <-errCh: + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + case <-time.After(1 * time.Second): + t.Fatalf("timed out waiting for failover") + } + if !failoverHandlerCalled { + t.Fatalf("expected call to failover handler") + } +} diff --git a/contrib/mesos/pkg/scheduler/service/util.go b/contrib/mesos/pkg/scheduler/service/util.go new file mode 100644 index 00000000000..33b4a1057f8 --- /dev/null +++ b/contrib/mesos/pkg/scheduler/service/util.go @@ -0,0 +1,88 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package service + +import ( + "bytes" + "fmt" + "hash/crc64" + "sort" + "strconv" + + mesos "github.com/mesos/mesos-go/mesosproto" +) + +// compute a hashcode for ExecutorInfo that may be used as a reasonable litmus test +// with respect to compatibility across HA schedulers. the intent is that an HA scheduler +// should fail-fast if it doesn't pass this test, rather than generating (potentially many) +// errors at run-time because a Mesos master decides that the ExecutorInfo generated by a +// secondary scheduler doesn't match that of the primary scheduler. +// +// see https://github.com/apache/mesos/blob/0.22.0/src/common/type_utils.cpp#L110 +func hashExecutorInfo(info *mesos.ExecutorInfo) uint64 { + // !!! we specifically do NOT include: + // - Framework ID because it's a value that's initialized too late for us to use + // - Executor ID because it's a value that includes a copy of this hash + buf := &bytes.Buffer{} + buf.WriteString(info.GetName()) + buf.WriteString(info.GetSource()) + buf.Write(info.Data) + + if info.Command != nil { + buf.WriteString(info.Command.GetValue()) + buf.WriteString(info.Command.GetUser()) + buf.WriteString(strconv.FormatBool(info.Command.GetShell())) + if sz := len(info.Command.Arguments); sz > 0 { + x := make([]string, sz) + copy(x, info.Command.Arguments) + sort.Strings(x) + for _, item := range x { + buf.WriteString(item) + } + } + if vars := info.Command.Environment.GetVariables(); vars != nil && len(vars) > 0 { + names := []string{} + e := make(map[string]string) + + for _, v := range vars { + if name := v.GetName(); name != "" { + names = append(names, name) + e[name] = v.GetValue() + } + } + sort.Strings(names) + for _, n := range names { + buf.WriteString(n) + buf.WriteString("=") + buf.WriteString(e[n]) + } + } + if uris := info.Command.GetUris(); len(uris) > 0 { + su := []string{} + for _, uri := range uris { + su = append(su, fmt.Sprintf("%s%t%t", uri.GetValue(), uri.GetExecutable(), uri.GetExtract())) + } + sort.Strings(su) + for _, uri := range su { + buf.WriteString(uri) + } + } + //TODO(jdef) add support for Resources and Container + } + table := crc64.MakeTable(crc64.ECMA) + return crc64.Checksum(buf.Bytes(), table) +} diff --git a/contrib/mesos/pkg/scheduler/types.go b/contrib/mesos/pkg/scheduler/types.go new file mode 100644 index 00000000000..a8ab9bd399a --- /dev/null +++ b/contrib/mesos/pkg/scheduler/types.go @@ -0,0 +1,49 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package scheduler + +import ( + "errors" + + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/offers" + "github.com/GoogleCloudPlatform/kubernetes/contrib/mesos/pkg/scheduler/podtask" +) + +// PodScheduleFunc implements how to schedule pods among slaves. +// We can have different implementation for different scheduling policy. +// +// The Schedule function accepts a group of slaves (each contains offers from +// that slave) and a single pod, which aligns well with the k8s scheduling +// algorithm. It returns an offerId that is acceptable for the pod, otherwise +// nil. The caller is responsible for filling in task state w/ relevant offer +// details. +// +// See the FCFSScheduleFunc for example. +type PodScheduleFunc func(r offers.Registry, slaves SlaveIndex, task *podtask.T) (offers.Perishable, error) + +// A minimal placeholder +type empty struct{} + +var ( + noSuitableOffersErr = errors.New("No suitable offers for pod/task") + noSuchPodErr = errors.New("No such pod exists") + noSuchTaskErr = errors.New("No such task exists") +) + +type SlaveIndex interface { + slaveFor(id string) (*Slave, bool) +} diff --git a/contrib/mesos/pkg/scheduler/uid/doc.go b/contrib/mesos/pkg/scheduler/uid/doc.go new file mode 100644 index 00000000000..cc8c35432cc --- /dev/null +++ b/contrib/mesos/pkg/scheduler/uid/doc.go @@ -0,0 +1,18 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package uid encapsulates unique identifiers code used by the scheduler. +package uid diff --git a/contrib/mesos/pkg/scheduler/uid/uid.go b/contrib/mesos/pkg/scheduler/uid/uid.go new file mode 100644 index 00000000000..37f4701d373 --- /dev/null +++ b/contrib/mesos/pkg/scheduler/uid/uid.go @@ -0,0 +1,85 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package uid + +import ( + "fmt" + "strconv" + "strings" + + "code.google.com/p/go-uuid/uuid" + log "github.com/golang/glog" +) + +type UID struct { + group uint64 + name string + ser string +} + +func New(group uint64, name string) *UID { + if name == "" { + name = uuid.New() + } + return &UID{ + group: group, + name: name, + ser: fmt.Sprintf("%x_%s", group, name), + } +} + +func (self *UID) Name() string { + if self != nil { + return self.name + } + return "" +} + +func (self *UID) Group() uint64 { + if self != nil { + return self.group + } + return 0 +} + +func (self *UID) String() string { + if self != nil { + return self.ser + } + return "" +} + +func Parse(ser string) *UID { + parts := strings.SplitN(ser, "_", 2) + if len(parts) != 2 { + return nil + } + group, err := strconv.ParseUint(parts[0], 16, 64) + if err != nil { + log.Errorf("illegal UID group %q: %v", parts[0], err) + return nil + } + if parts[1] == "" { + log.Errorf("missing UID name: %q", ser) + return nil + } + return &UID{ + group: group, + name: parts[1], + ser: ser, + } +} diff --git a/contrib/mesos/pkg/scheduler/uid/uid_test.go b/contrib/mesos/pkg/scheduler/uid/uid_test.go new file mode 100644 index 00000000000..67e60fdf14a --- /dev/null +++ b/contrib/mesos/pkg/scheduler/uid/uid_test.go @@ -0,0 +1,47 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package uid + +import ( + "testing" +) + +func TestUID_Parse(t *testing.T) { + valid := []string{"1234567890abcdef_foo", "123_bar", "face_time"} + groups := []uint64{0x1234567890abcdef, 0x123, 0xface} + + for i, good := range valid { + u := Parse(good) + if u == nil { + t.Errorf("expected parsed UID, not nil") + } + if groups[i] != u.Group() { + t.Errorf("expected matching group instead of %x", u.Group()) + } + if good != u.String() { + t.Errorf("expected %q instead of %q", good, u.String()) + } + } + + invalid := []string{"", "bad"} + for _, bad := range invalid { + u := Parse(bad) + if u != nil { + t.Errorf("expected nil UID instead of %v", u) + } + } +} diff --git a/contrib/mesos/target.sh b/contrib/mesos/target.sh new file mode 100644 index 00000000000..e305e81d6c1 --- /dev/null +++ b/contrib/mesos/target.sh @@ -0,0 +1,45 @@ +#!/bin/bash + +# Copyright 2014 The Kubernetes Authors All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# The set of server targets that we are only building for Linux +# Used by hack/lib/golang.sh +kube::contrib::mesos::server_targets() { + local -r targets=( + contrib/mesos/cmd/k8sm-scheduler + contrib/mesos/cmd/k8sm-executor + contrib/mesos/cmd/k8sm-controller-manager + contrib/mesos/cmd/km + ) + echo "${targets[@]}" +} + +# The set of test targets that we are building for all platforms +# Used by hack/lib/golang.sh +kube::contrib::mesos::test_targets() { + local -r targets=( + contrib/mesos/cmd/k8sm-redirfd + ) + echo "${targets[@]}" +} + +# The set of source targets to include in the kube-build image +# Used by build/common.sh +kube::contrib::mesos::source_targets() { + local -r targets=( + contrib/mesos + ) + echo "${targets[@]}" +} diff --git a/hack/lib/golang.sh b/hack/lib/golang.sh index 97aff8e394d..e2c83a9db79 100644 --- a/hack/lib/golang.sh +++ b/hack/lib/golang.sh @@ -18,16 +18,32 @@ readonly KUBE_GO_PACKAGE=github.com/GoogleCloudPlatform/kubernetes readonly KUBE_GOPATH="${KUBE_OUTPUT}/go" +# Load contrib target functions +if [ -n "${KUBERNETES_CONTRIB:-}" ]; then + for contrib in "${KUBERNETES_CONTRIB}"; do + source "${KUBE_ROOT}/contrib/${contrib}/target.sh" + done +fi + # The set of server targets that we are only building for Linux -readonly KUBE_SERVER_TARGETS=( - cmd/kube-proxy - cmd/kube-apiserver - cmd/kube-controller-manager - cmd/kubelet - cmd/hyperkube - cmd/kubernetes - plugin/cmd/kube-scheduler -) +kube::golang::server_targets() { + local targets=( + cmd/kube-proxy + cmd/kube-apiserver + cmd/kube-controller-manager + cmd/kubelet + cmd/hyperkube + cmd/kubernetes + plugin/cmd/kube-scheduler + ) + if [ -n "${KUBERNETES_CONTRIB:-}" ]; then + for contrib in "${KUBERNETES_CONTRIB}"; do + targets+=($(eval "kube::contrib::${contrib}::server_targets")) + done + fi + echo "${targets[@]}" +} +readonly KUBE_SERVER_TARGETS=($(kube::golang::server_targets)) readonly KUBE_SERVER_BINARIES=("${KUBE_SERVER_TARGETS[@]##*/}") # The server platform we are building on. @@ -43,17 +59,26 @@ readonly KUBE_CLIENT_BINARIES=("${KUBE_CLIENT_TARGETS[@]##*/}") readonly KUBE_CLIENT_BINARIES_WIN=("${KUBE_CLIENT_BINARIES[@]/%/.exe}") # The set of test targets that we are building for all platforms -readonly KUBE_TEST_TARGETS=( - cmd/integration - cmd/gendocs - cmd/genman - cmd/genbashcomp - cmd/genconversion - cmd/gendeepcopy - examples/k8petstore/web-server - github.com/onsi/ginkgo/ginkgo - test/e2e/e2e.test -) +kube::golang::test_targets() { + local targets=( + cmd/integration + cmd/gendocs + cmd/genman + cmd/genbashcomp + cmd/genconversion + cmd/gendeepcopy + examples/k8petstore/web-server + github.com/onsi/ginkgo/ginkgo + test/e2e/e2e.test + ) + if [ -n "${KUBERNETES_CONTRIB:-}" ]; then + for contrib in "${KUBERNETES_CONTRIB}"; do + targets+=($(eval "kube::contrib::${contrib}::test_targets")) + done + fi + echo "${targets[@]}" +} +readonly KUBE_TEST_TARGETS=($(kube::golang::test_targets)) readonly KUBE_TEST_BINARIES=("${KUBE_TEST_TARGETS[@]##*/}") readonly KUBE_TEST_BINARIES_WIN=("${KUBE_TEST_BINARIES[@]/%/.exe}") readonly KUBE_TEST_PORTABLE=( diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 5207dbf7e6a..c0981db355f 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -16,6 +16,9 @@ limitations under the License. package kubelet +// Note: if you change code in this file, you might need to change code in +// contrib/mesos/pkg/executor/. + import ( "errors" "fmt" diff --git a/plugin/pkg/scheduler/scheduler.go b/plugin/pkg/scheduler/scheduler.go index 57d99cbfa2f..ac56c2461fb 100644 --- a/plugin/pkg/scheduler/scheduler.go +++ b/plugin/pkg/scheduler/scheduler.go @@ -16,6 +16,9 @@ limitations under the License. package scheduler +// Note: if you change code in this file, you might need to change code in +// contrib/mesos/pkg/scheduler/. + import ( "time"