avoid updating nf_conntrack-related settings, by default, when running k8s on mesos

This commit is contained in:
James DeFelice 2016-01-05 22:45:34 +00:00
parent e663dbc302
commit 58407ca8a2
3 changed files with 63 additions and 39 deletions

View File

@ -22,6 +22,16 @@ It is **strongly** recommended that all of the Kubernetes-Mesos executors are de
Not following the above steps prior to upgrading the scheduler can result in a cluster wherein pods will never again be scheduled upon one or more nodes.
This issue is being tracked here: https://github.com/mesosphere/kubernetes-mesos/issues/572.
### Netfilter Connection Tracking
The scheduler offers flags to tweak connection tracking for kube-proxy instances that are launched on slave nodes:
- conntrack-max (do **NOT** set this to a non-zero value if the Mesos slave process is running in a non-root network namespace)
- conntrack-tcp-timeout-established
By default both of these are set to 0 when running Kubernetes-Mesos.
Setting either of these flags to non-zero values may impact connection tracking for the entire slave.
### Port Specifications
In order for pods (replicated, or otherwise) to be scheduled on the cluster, it is strongly recommended that:

View File

@ -23,6 +23,7 @@ import (
"os"
"os/signal"
"path"
"strconv"
"strings"
"syscall"
@ -70,6 +71,8 @@ type MinionServer struct {
proxyLogV int
proxyBindall bool
proxyMode string
conntrackMax int
conntrackTCPTimeoutEstablished int
}
// NewMinionServer creates the MinionServer struct with default values to be used by hyperkube
@ -139,11 +142,8 @@ func (ms *MinionServer) launchProxyServer() {
"--logtostderr=true",
"--resource-container=" + path.Join("/", ms.mesosCgroup, "kube-proxy"),
"--proxy-mode=" + ms.proxyMode,
// TODO(jdef) this is a temporary hack to fix failing smoke tests. a following PR
// will more properly fix the smoke tests as well as make these flags configrable
// at the framework level (as opposed to hardcoded here)
"--conntrack-max=0",
"--conntrack-tcp-timeout-established=0",
"--conntrack-max=" + strconv.Itoa(ms.conntrackMax),
"--conntrack-tcp-timeout-established=" + strconv.Itoa(ms.conntrackTCPTimeoutEstablished),
}
if ms.clientConfig.Host != "" {
@ -351,4 +351,6 @@ func (ms *MinionServer) AddMinionFlags(fs *pflag.FlagSet) {
fs.IntVar(&ms.proxyLogV, "proxy-logv", ms.proxyLogV, "Log verbosity of the child kube-proxy.")
fs.BoolVar(&ms.proxyBindall, "proxy-bindall", ms.proxyBindall, "When true will cause kube-proxy to bind to 0.0.0.0.")
fs.StringVar(&ms.proxyMode, "proxy-mode", ms.proxyMode, "Which proxy mode to use: 'userspace' (older) or 'iptables' (faster). If the iptables proxy is selected, regardless of how, but the system's kernel or iptables versions are insufficient, this always falls back to the userspace proxy.")
fs.IntVar(&ms.conntrackMax, "conntrack-max", ms.conntrackMax, "Maximum number of NAT connections to track on agent nodes (0 to leave as-is)")
fs.IntVar(&ms.conntrackTCPTimeoutEstablished, "conntrack-tcp-timeout-established", ms.conntrackTCPTimeoutEstablished, "Idle timeout for established TCP connections on agent nodes (0 to leave as-is)")
}

View File

@ -163,6 +163,8 @@ type SchedulerServer struct {
containPodResources bool
nodeRelistPeriod time.Duration
sandboxOverlay string
conntrackMax int
conntrackTCPTimeoutEstablished int
executable string // path to the binary running this service
client *client.Client
@ -216,6 +218,12 @@ func NewSchedulerServer() *SchedulerServer {
kubeletEnableDebuggingHandlers: true,
containPodResources: true,
nodeRelistPeriod: defaultNodeRelistPeriod,
conntrackTCPTimeoutEstablished: 0, // non-zero values may require hand-tuning other sysctl's on the host; do so with caution
// non-zero values can trigger failures when updating /sys/module/nf_conntrack/parameters/hashsize
// when kube-proxy is running in a non-root netns (init_net); setting this to a non-zero value will
// impact connection tracking for the entire host on which kube-proxy is running. xref (k8s#19182)
conntrackMax: 0,
}
// cache this for later use. also useful in case the original binary gets deleted, e.g.
// during upgrades, development deployments, etc.
@ -294,6 +302,8 @@ func (s *SchedulerServer) addCoreFlags(fs *pflag.FlagSet) {
fs.DurationVar(&s.kubeletSyncFrequency, "kubelet-sync-frequency", s.kubeletSyncFrequency, "Max period between synchronizing running containers and config")
fs.StringVar(&s.kubeletNetworkPluginName, "kubelet-network-plugin", s.kubeletNetworkPluginName, "<Warning: Alpha feature> The name of the network plugin to be invoked for various events in kubelet/pod lifecycle")
fs.BoolVar(&s.kubeletEnableDebuggingHandlers, "kubelet-enable-debugging-handlers", s.kubeletEnableDebuggingHandlers, "Enables kubelet endpoints for log collection and local running of containers and commands")
fs.IntVar(&s.conntrackMax, "conntrack-max", s.conntrackMax, "Maximum number of NAT connections to track on agent nodes (0 to leave as-is)")
fs.IntVar(&s.conntrackTCPTimeoutEstablished, "conntrack-tcp-timeout-established", s.conntrackTCPTimeoutEstablished, "Idle timeout for established TCP connections on agent nodes (0 to leave as-is)")
//TODO(jdef) support this flag once we have a better handle on mesos-dns and k8s DNS integration
//fs.StringVar(&s.HADomain, "ha-domain", s.HADomain, "Domain of the HA scheduler service, only used in HA mode. If specified may be used to construct artifact download URIs.")
@ -413,6 +423,8 @@ func (s *SchedulerServer) prepareExecutorInfo(hks hyperkube.Interface) (*mesos.E
ci.Arguments = append(ci.Arguments, fmt.Sprintf("--sync-frequency=%v", s.kubeletSyncFrequency))
ci.Arguments = append(ci.Arguments, fmt.Sprintf("--contain-pod-resources=%t", s.containPodResources))
ci.Arguments = append(ci.Arguments, fmt.Sprintf("--enable-debugging-handlers=%t", s.kubeletEnableDebuggingHandlers))
ci.Arguments = append(ci.Arguments, fmt.Sprintf("--conntrack-max=%d", s.conntrackMax))
ci.Arguments = append(ci.Arguments, fmt.Sprintf("--conntrack-tcp-timeout-established=%d", s.conntrackTCPTimeoutEstablished))
if s.authPath != "" {
//TODO(jdef) should probably support non-local files, e.g. hdfs:///some/config/file