moved cgroup-root detection to minion service; kube proxy now configured to run in mesos container

This commit is contained in:
James DeFelice 2015-08-30 19:16:40 +00:00
parent 66ff3c133b
commit 7fbd290167
6 changed files with 46 additions and 42 deletions

View File

@ -26,5 +26,4 @@ const (
DefaultInfoSource = "kubernetes" DefaultInfoSource = "kubernetes"
DefaultInfoName = "Kubelet-Executor" DefaultInfoName = "Kubelet-Executor"
DefaultSuicideTimeout = 20 * time.Minute DefaultSuicideTimeout = 20 * time.Minute
DefaultCgroupPrefix = "mesos"
) )

View File

@ -17,13 +17,11 @@ limitations under the License.
package service package service
import ( import (
"fmt"
"io" "io"
"math/rand" "math/rand"
"net" "net"
"net/http" "net/http"
"os" "os"
"path"
"path/filepath" "path/filepath"
"strconv" "strconv"
"strings" "strings"
@ -63,33 +61,12 @@ type KubeletExecutorServer struct {
SuicideTimeout time.Duration SuicideTimeout time.Duration
ShutdownFD int ShutdownFD int
ShutdownFIFO string ShutdownFIFO string
cgroupRoot string
cgroupPrefix string
}
func findMesosCgroup(prefix string) string {
// derive our cgroup from MESOS_DIRECTORY environment
mesosDir := os.Getenv("MESOS_DIRECTORY")
if mesosDir == "" {
log.V(2).Infof("cannot derive executor's cgroup because MESOS_DIRECTORY is empty")
return ""
}
containerId := path.Base(mesosDir)
if containerId == "" {
log.V(2).Infof("cannot derive executor's cgroup from MESOS_DIRECTORY=%q", mesosDir)
return ""
}
trimmedPrefix := strings.Trim(prefix, "/")
cgroupRoot := fmt.Sprintf("/%s/%v", trimmedPrefix, containerId)
return cgroupRoot
} }
func NewKubeletExecutorServer() *KubeletExecutorServer { func NewKubeletExecutorServer() *KubeletExecutorServer {
k := &KubeletExecutorServer{ k := &KubeletExecutorServer{
KubeletServer: app.NewKubeletServer(), KubeletServer: app.NewKubeletServer(),
SuicideTimeout: config.DefaultSuicideTimeout, SuicideTimeout: config.DefaultSuicideTimeout,
cgroupPrefix: config.DefaultCgroupPrefix,
} }
if pwd, err := os.Getwd(); err != nil { if pwd, err := os.Getwd(); err != nil {
log.Warningf("failed to determine current directory: %v", err) log.Warningf("failed to determine current directory: %v", err)
@ -107,7 +84,6 @@ func (s *KubeletExecutorServer) AddFlags(fs *pflag.FlagSet) {
fs.DurationVar(&s.SuicideTimeout, "suicide-timeout", s.SuicideTimeout, "Self-terminate after this period of inactivity. Zero disables suicide watch.") fs.DurationVar(&s.SuicideTimeout, "suicide-timeout", s.SuicideTimeout, "Self-terminate after this period of inactivity. Zero disables suicide watch.")
fs.IntVar(&s.ShutdownFD, "shutdown-fd", s.ShutdownFD, "File descriptor used to signal shutdown to external watchers, requires shutdown-fifo flag") fs.IntVar(&s.ShutdownFD, "shutdown-fd", s.ShutdownFD, "File descriptor used to signal shutdown to external watchers, requires shutdown-fifo flag")
fs.StringVar(&s.ShutdownFIFO, "shutdown-fifo", s.ShutdownFIFO, "FIFO used to signal shutdown to external watchers, requires shutdown-fd flag") fs.StringVar(&s.ShutdownFIFO, "shutdown-fifo", s.ShutdownFIFO, "FIFO used to signal shutdown to external watchers, requires shutdown-fd flag")
fs.StringVar(&s.cgroupPrefix, "cgroup-prefix", s.cgroupPrefix, "The cgroup prefix concatenated with MESOS_DIRECTORY must give the executor cgroup set by Mesos")
} }
// returns a Closer that should be closed to signal impending shutdown, but only if ShutdownFD // returns a Closer that should be closed to signal impending shutdown, but only if ShutdownFD
@ -131,21 +107,11 @@ func (s *KubeletExecutorServer) Run(hks hyperkube.Interface, _ []string) error {
log.Info(err) log.Info(err)
} }
// derive the executor cgroup and use it as docker container cgroup root
mesosCgroup := findMesosCgroup(s.cgroupPrefix)
s.cgroupRoot = mesosCgroup
log.V(2).Infof("passing cgroup %q to the kubelet as cgroup root", s.CgroupRoot)
// empty string for the docker and system containers (= cgroup paths). This // empty string for the docker and system containers (= cgroup paths). This
// stops the kubelet taking any control over other system processes. // stops the kubelet taking any control over other system processes.
s.SystemContainer = "" s.SystemContainer = ""
s.DockerDaemonContainer = "" s.DockerDaemonContainer = ""
// We set kubelet container to its own cgroup below the executor cgroup.
// In contrast to the docker and system container, this has no other
// undesired side-effects.
s.ResourceContainer = mesosCgroup + "/kubelet"
// create apiserver client // create apiserver client
var apiclient *client.Client var apiclient *client.Client
clientConfig, err := s.CreateAPIServerClientConfig() clientConfig, err := s.CreateAPIServerClientConfig()
@ -253,7 +219,7 @@ func (s *KubeletExecutorServer) Run(hks hyperkube.Interface, _ []string) error {
Cloud: nil, // TODO(jdef) Cloud, specifying null here because we don't want all kubelets polling mesos-master; need to account for this in the cloudprovider impl Cloud: nil, // TODO(jdef) Cloud, specifying null here because we don't want all kubelets polling mesos-master; need to account for this in the cloudprovider impl
NodeStatusUpdateFrequency: s.NodeStatusUpdateFrequency, NodeStatusUpdateFrequency: s.NodeStatusUpdateFrequency,
ResourceContainer: s.ResourceContainer, ResourceContainer: s.ResourceContainer,
CgroupRoot: s.cgroupRoot, CgroupRoot: s.CgroupRoot,
ContainerRuntime: s.ContainerRuntime, ContainerRuntime: s.ContainerRuntime,
Mounter: mounter, Mounter: mounter,
DockerDaemonContainer: s.DockerDaemonContainer, DockerDaemonContainer: s.DockerDaemonContainer,

View File

@ -23,6 +23,8 @@ import (
const ( const (
DefaultLogMaxBackups = 5 // how many backup to keep DefaultLogMaxBackups = 5 // how many backup to keep
DefaultLogMaxAgeInDays = 7 // after how many days to rotate at most DefaultLogMaxAgeInDays = 7 // after how many days to rotate at most
DefaultCgroupPrefix = "mesos"
) )
// DefaultLogMaxSize returns the maximal log file size before rotation // DefaultLogMaxSize returns the maximal log file size before rotation

View File

@ -22,6 +22,7 @@ import (
"io/ioutil" "io/ioutil"
"os" "os"
"os/exec" "os/exec"
"path"
"strings" "strings"
"time" "time"
@ -51,6 +52,8 @@ type MinionServer struct {
exit chan error // to signal fatal errors exit chan error // to signal fatal errors
pathOverride string // the PATH environment for the sub-processes pathOverride string // the PATH environment for the sub-processes
cgroupPrefix string // e.g. mesos
cgroupRoot string // e.g. /mesos/{container-id}, determined at runtime
logMaxSize resource.Quantity logMaxSize resource.Quantity
logMaxBackups int logMaxBackups int
@ -69,6 +72,7 @@ func NewMinionServer() *MinionServer {
done: make(chan struct{}), done: make(chan struct{}),
exit: make(chan error), exit: make(chan error),
cgroupPrefix: config.DefaultCgroupPrefix,
logMaxSize: config.DefaultLogMaxSize(), logMaxSize: config.DefaultLogMaxSize(),
logMaxBackups: config.DefaultLogMaxBackups, logMaxBackups: config.DefaultLogMaxBackups,
logMaxAgeInDays: config.DefaultLogMaxAgeInDays, logMaxAgeInDays: config.DefaultLogMaxAgeInDays,
@ -102,6 +106,22 @@ func filterArgsByFlagSet(args []string, flags *pflag.FlagSet) ([]string, []strin
return matched, notMatched return matched, notMatched
} }
func findMesosCgroup(prefix string) string {
// derive our cgroup from MESOS_DIRECTORY environment
mesosDir := os.Getenv("MESOS_DIRECTORY")
if mesosDir == "" {
log.V(2).Infof("cannot derive executor's cgroup because MESOS_DIRECTORY is empty")
return ""
}
containerId := path.Base(mesosDir)
if containerId == "" {
log.V(2).Infof("cannot derive executor's cgroup from MESOS_DIRECTORY=%q", mesosDir)
return ""
}
return path.Join("/", prefix, containerId)
}
func (ms *MinionServer) launchProxyServer() { func (ms *MinionServer) launchProxyServer() {
bindAddress := "0.0.0.0" bindAddress := "0.0.0.0"
if !ms.proxyBindall { if !ms.proxyBindall {
@ -111,6 +131,7 @@ func (ms *MinionServer) launchProxyServer() {
fmt.Sprintf("--bind-address=%s", bindAddress), fmt.Sprintf("--bind-address=%s", bindAddress),
fmt.Sprintf("--v=%d", ms.proxyLogV), fmt.Sprintf("--v=%d", ms.proxyLogV),
"--logtostderr=true", "--logtostderr=true",
"--resource-container=" + path.Join("/", ms.cgroupRoot, "kube-proxy"),
} }
if ms.clientConfig.Host != "" { if ms.clientConfig.Host != "" {
@ -132,6 +153,11 @@ func (ms *MinionServer) launchExecutorServer() {
ms.AddExecutorFlags(executorFlags) ms.AddExecutorFlags(executorFlags)
executorArgs, _ := filterArgsByFlagSet(allArgs, executorFlags) executorArgs, _ := filterArgsByFlagSet(allArgs, executorFlags)
executorArgs = append(executorArgs, "--resource-container="+path.Join("/", ms.cgroupRoot, "kubelet"))
if ms.cgroupRoot != "" {
executorArgs = append(executorArgs, "--cgroup-root="+ms.cgroupRoot)
}
// run executor and quit minion server when this exits cleanly // run executor and quit minion server when this exits cleanly
err := ms.launchHyperkubeServer(hyperkube.CommandExecutor, &executorArgs, "executor.log") err := ms.launchHyperkubeServer(hyperkube.CommandExecutor, &executorArgs, "executor.log")
if err != nil { if err != nil {
@ -257,6 +283,17 @@ func (ms *MinionServer) Run(hks hyperkube.Interface, _ []string) error {
} }
ms.clientConfig = clientConfig ms.clientConfig = clientConfig
// derive the executor cgroup and use it as:
// - pod container cgroup root (e.g. docker cgroup-parent)
// - parent of kubelet container
// - parent of kube-proxy container
ms.cgroupRoot = findMesosCgroup(ms.cgroupPrefix)
cgroupLogger := log.Infof
if ms.cgroupRoot == "" {
cgroupLogger = log.Warningf
}
cgroupLogger("using cgroup-root %q", ms.cgroupRoot)
// run subprocesses until ms.done is closed on return of this function // run subprocesses until ms.done is closed on return of this function
defer close(ms.done) defer close(ms.done)
if ms.runProxy { if ms.runProxy {
@ -275,6 +312,7 @@ func (ms *MinionServer) AddExecutorFlags(fs *pflag.FlagSet) {
func (ms *MinionServer) AddMinionFlags(fs *pflag.FlagSet) { func (ms *MinionServer) AddMinionFlags(fs *pflag.FlagSet) {
// general minion flags // general minion flags
fs.StringVar(&ms.cgroupPrefix, "mesos-cgroup-prefix", ms.cgroupPrefix, "The cgroup prefix concatenated with MESOS_DIRECTORY must give the executor cgroup set by Mesos")
fs.BoolVar(&ms.privateMountNS, "private-mountns", ms.privateMountNS, "Enter a private mount NS before spawning procs (linux only). Experimental, not yet compatible with k8s volumes.") fs.BoolVar(&ms.privateMountNS, "private-mountns", ms.privateMountNS, "Enter a private mount NS before spawning procs (linux only). Experimental, not yet compatible with k8s volumes.")
fs.StringVar(&ms.pathOverride, "path-override", ms.pathOverride, "Override the PATH in the environment of the sub-processes.") fs.StringVar(&ms.pathOverride, "path-override", ms.pathOverride, "Override the PATH in the environment of the sub-processes.")

View File

@ -95,13 +95,13 @@ type SchedulerServer struct {
MesosRole string MesosRole string
MesosAuthPrincipal string MesosAuthPrincipal string
MesosAuthSecretFile string MesosAuthSecretFile string
MesosCgroupPrefix string
Checkpoint bool Checkpoint bool
FailoverTimeout float64 FailoverTimeout float64
ExecutorLogV int ExecutorLogV int
ExecutorBindall bool ExecutorBindall bool
ExecutorSuicideTimeout time.Duration ExecutorSuicideTimeout time.Duration
ExecutorCgroupPrefix string
RunProxy bool RunProxy bool
ProxyBindall bool ProxyBindall bool
@ -163,7 +163,6 @@ func NewSchedulerServer() *SchedulerServer {
RunProxy: true, RunProxy: true,
ExecutorSuicideTimeout: execcfg.DefaultSuicideTimeout, ExecutorSuicideTimeout: execcfg.DefaultSuicideTimeout,
ExecutorCgroupPrefix: execcfg.DefaultCgroupPrefix,
DefaultContainerCPULimit: mresource.DefaultDefaultContainerCPULimit, DefaultContainerCPULimit: mresource.DefaultDefaultContainerCPULimit,
DefaultContainerMemLimit: mresource.DefaultDefaultContainerMemLimit, DefaultContainerMemLimit: mresource.DefaultDefaultContainerMemLimit,
@ -172,6 +171,7 @@ func NewSchedulerServer() *SchedulerServer {
MinionLogMaxAgeInDays: minioncfg.DefaultLogMaxAgeInDays, MinionLogMaxAgeInDays: minioncfg.DefaultLogMaxAgeInDays,
MesosAuthProvider: sasl.ProviderName, MesosAuthProvider: sasl.ProviderName,
MesosCgroupPrefix: minioncfg.DefaultCgroupPrefix,
MesosMaster: defaultMesosMaster, MesosMaster: defaultMesosMaster,
MesosUser: defaultMesosUser, MesosUser: defaultMesosUser,
ReconcileInterval: defaultReconcileInterval, ReconcileInterval: defaultReconcileInterval,
@ -215,6 +215,7 @@ func (s *SchedulerServer) addCoreFlags(fs *pflag.FlagSet) {
fs.StringVar(&s.MesosAuthSecretFile, "mesos-authentication-secret-file", s.MesosAuthSecretFile, "Mesos authentication secret file.") fs.StringVar(&s.MesosAuthSecretFile, "mesos-authentication-secret-file", s.MesosAuthSecretFile, "Mesos authentication secret file.")
fs.StringVar(&s.MesosAuthProvider, "mesos-authentication-provider", s.MesosAuthProvider, fmt.Sprintf("Authentication provider to use, default is SASL that supports mechanisms: %+v", mech.ListSupported())) fs.StringVar(&s.MesosAuthProvider, "mesos-authentication-provider", s.MesosAuthProvider, fmt.Sprintf("Authentication provider to use, default is SASL that supports mechanisms: %+v", mech.ListSupported()))
fs.StringVar(&s.DockerCfgPath, "dockercfg-path", s.DockerCfgPath, "Path to a dockercfg file that will be used by the docker instance of the minions.") fs.StringVar(&s.DockerCfgPath, "dockercfg-path", s.DockerCfgPath, "Path to a dockercfg file that will be used by the docker instance of the minions.")
fs.StringVar(&s.MesosCgroupPrefix, "mesos-cgroup-prefix", s.MesosCgroupPrefix, "The cgroup prefix concatenated with MESOS_DIRECTORY must give the executor cgroup set by Mesos")
fs.BoolVar(&s.Checkpoint, "checkpoint", s.Checkpoint, "Enable/disable checkpointing for the kubernetes-mesos framework.") fs.BoolVar(&s.Checkpoint, "checkpoint", s.Checkpoint, "Enable/disable checkpointing for the kubernetes-mesos framework.")
fs.Float64Var(&s.FailoverTimeout, "failover-timeout", s.FailoverTimeout, fmt.Sprintf("Framework failover timeout, in sec.")) fs.Float64Var(&s.FailoverTimeout, "failover-timeout", s.FailoverTimeout, fmt.Sprintf("Framework failover timeout, in sec."))
fs.UintVar(&s.DriverPort, "driver-port", s.DriverPort, "Port that the Mesos scheduler driver process should listen on.") fs.UintVar(&s.DriverPort, "driver-port", s.DriverPort, "Port that the Mesos scheduler driver process should listen on.")
@ -234,7 +235,6 @@ func (s *SchedulerServer) addCoreFlags(fs *pflag.FlagSet) {
fs.IntVar(&s.ExecutorLogV, "executor-logv", s.ExecutorLogV, "Logging verbosity of spawned minion and executor processes.") fs.IntVar(&s.ExecutorLogV, "executor-logv", s.ExecutorLogV, "Logging verbosity of spawned minion and executor processes.")
fs.BoolVar(&s.ExecutorBindall, "executor-bindall", s.ExecutorBindall, "When true will set -address of the executor to 0.0.0.0.") fs.BoolVar(&s.ExecutorBindall, "executor-bindall", s.ExecutorBindall, "When true will set -address of the executor to 0.0.0.0.")
fs.DurationVar(&s.ExecutorSuicideTimeout, "executor-suicide-timeout", s.ExecutorSuicideTimeout, "Executor self-terminates after this period of inactivity. Zero disables suicide watch.") fs.DurationVar(&s.ExecutorSuicideTimeout, "executor-suicide-timeout", s.ExecutorSuicideTimeout, "Executor self-terminates after this period of inactivity. Zero disables suicide watch.")
fs.StringVar(&s.ExecutorCgroupPrefix, "executor-cgroup-prefix", s.ExecutorCgroupPrefix, "The cgroup prefix concatenated with MESOS_DIRECTORY must give the executor cgroup set by Mesos")
fs.BoolVar(&s.ProxyBindall, "proxy-bindall", s.ProxyBindall, "When true pass -proxy-bindall to the executor.") fs.BoolVar(&s.ProxyBindall, "proxy-bindall", s.ProxyBindall, "When true pass -proxy-bindall to the executor.")
fs.BoolVar(&s.RunProxy, "run-proxy", s.RunProxy, "Run the kube-proxy as a side process of the executor.") fs.BoolVar(&s.RunProxy, "run-proxy", s.RunProxy, "Run the kube-proxy as a side process of the executor.")
@ -364,7 +364,7 @@ func (s *SchedulerServer) prepareExecutorInfo(hks hyperkube.Interface) (*mesos.E
ci.Arguments = append(ci.Arguments, "--address=0.0.0.0") ci.Arguments = append(ci.Arguments, "--address=0.0.0.0")
} }
ci.Arguments = append(ci.Arguments, fmt.Sprintf("--cgroup-prefix=%v", s.ExecutorCgroupPrefix)) ci.Arguments = append(ci.Arguments, fmt.Sprintf("--mesos-cgroup-prefix=%v", s.MesosCgroupPrefix))
ci.Arguments = append(ci.Arguments, fmt.Sprintf("--cadvisor-port=%v", s.KubeletCadvisorPort)) ci.Arguments = append(ci.Arguments, fmt.Sprintf("--cadvisor-port=%v", s.KubeletCadvisorPort))
ci.Arguments = append(ci.Arguments, fmt.Sprintf("--sync-frequency=%v", s.KubeletSyncFrequency)) ci.Arguments = append(ci.Arguments, fmt.Sprintf("--sync-frequency=%v", s.KubeletSyncFrequency))

View File

@ -28,7 +28,6 @@ bind-pods-qps
cadvisor-port cadvisor-port
cert-dir cert-dir
certificate-authority certificate-authority
cgroup-prefix
cgroup-root cgroup-root
chaos-chance chaos-chance
cleanup-iptables cleanup-iptables
@ -74,7 +73,6 @@ etcd-server
etcd-servers etcd-servers
event-ttl event-ttl
executor-bindall executor-bindall
executor-cgroup-prefix
executor-logv executor-logv
executor-path executor-path
executor-suicide-timeout executor-suicide-timeout
@ -152,6 +150,7 @@ max-requests-inflight
mesos-authentication-principal mesos-authentication-principal
mesos-authentication-provider mesos-authentication-provider
mesos-authentication-secret-file mesos-authentication-secret-file
mesos-cgroup-prefix
mesos-master mesos-master
mesos-role mesos-role
mesos-user mesos-user