mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-28 14:07:14 +00:00
avoid updating nf_conntrack-related settings, by default, when running k8s on mesos
This commit is contained in:
parent
e663dbc302
commit
58407ca8a2
@ -22,6 +22,16 @@ It is **strongly** recommended that all of the Kubernetes-Mesos executors are de
|
|||||||
Not following the above steps prior to upgrading the scheduler can result in a cluster wherein pods will never again be scheduled upon one or more nodes.
|
Not following the above steps prior to upgrading the scheduler can result in a cluster wherein pods will never again be scheduled upon one or more nodes.
|
||||||
This issue is being tracked here: https://github.com/mesosphere/kubernetes-mesos/issues/572.
|
This issue is being tracked here: https://github.com/mesosphere/kubernetes-mesos/issues/572.
|
||||||
|
|
||||||
|
### Netfilter Connection Tracking
|
||||||
|
|
||||||
|
The scheduler offers flags to tweak connection tracking for kube-proxy instances that are launched on slave nodes:
|
||||||
|
|
||||||
|
- conntrack-max (do **NOT** set this to a non-zero value if the Mesos slave process is running in a non-root network namespace)
|
||||||
|
- conntrack-tcp-timeout-established
|
||||||
|
|
||||||
|
By default both of these are set to 0 when running Kubernetes-Mesos.
|
||||||
|
Setting either of these flags to non-zero values may impact connection tracking for the entire slave.
|
||||||
|
|
||||||
### Port Specifications
|
### Port Specifications
|
||||||
|
|
||||||
In order for pods (replicated, or otherwise) to be scheduled on the cluster, it is strongly recommended that:
|
In order for pods (replicated, or otherwise) to be scheduled on the cluster, it is strongly recommended that:
|
||||||
|
@ -23,6 +23,7 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"os/signal"
|
"os/signal"
|
||||||
"path"
|
"path"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"syscall"
|
"syscall"
|
||||||
|
|
||||||
@ -66,10 +67,12 @@ type MinionServer struct {
|
|||||||
logMaxAgeInDays int
|
logMaxAgeInDays int
|
||||||
logVerbosity int32 // see glog.Level
|
logVerbosity int32 // see glog.Level
|
||||||
|
|
||||||
runProxy bool
|
runProxy bool
|
||||||
proxyLogV int
|
proxyLogV int
|
||||||
proxyBindall bool
|
proxyBindall bool
|
||||||
proxyMode string
|
proxyMode string
|
||||||
|
conntrackMax int
|
||||||
|
conntrackTCPTimeoutEstablished int
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewMinionServer creates the MinionServer struct with default values to be used by hyperkube
|
// NewMinionServer creates the MinionServer struct with default values to be used by hyperkube
|
||||||
@ -139,11 +142,8 @@ func (ms *MinionServer) launchProxyServer() {
|
|||||||
"--logtostderr=true",
|
"--logtostderr=true",
|
||||||
"--resource-container=" + path.Join("/", ms.mesosCgroup, "kube-proxy"),
|
"--resource-container=" + path.Join("/", ms.mesosCgroup, "kube-proxy"),
|
||||||
"--proxy-mode=" + ms.proxyMode,
|
"--proxy-mode=" + ms.proxyMode,
|
||||||
// TODO(jdef) this is a temporary hack to fix failing smoke tests. a following PR
|
"--conntrack-max=" + strconv.Itoa(ms.conntrackMax),
|
||||||
// will more properly fix the smoke tests as well as make these flags configrable
|
"--conntrack-tcp-timeout-established=" + strconv.Itoa(ms.conntrackTCPTimeoutEstablished),
|
||||||
// at the framework level (as opposed to hardcoded here)
|
|
||||||
"--conntrack-max=0",
|
|
||||||
"--conntrack-tcp-timeout-established=0",
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if ms.clientConfig.Host != "" {
|
if ms.clientConfig.Host != "" {
|
||||||
@ -351,4 +351,6 @@ func (ms *MinionServer) AddMinionFlags(fs *pflag.FlagSet) {
|
|||||||
fs.IntVar(&ms.proxyLogV, "proxy-logv", ms.proxyLogV, "Log verbosity of the child kube-proxy.")
|
fs.IntVar(&ms.proxyLogV, "proxy-logv", ms.proxyLogV, "Log verbosity of the child kube-proxy.")
|
||||||
fs.BoolVar(&ms.proxyBindall, "proxy-bindall", ms.proxyBindall, "When true will cause kube-proxy to bind to 0.0.0.0.")
|
fs.BoolVar(&ms.proxyBindall, "proxy-bindall", ms.proxyBindall, "When true will cause kube-proxy to bind to 0.0.0.0.")
|
||||||
fs.StringVar(&ms.proxyMode, "proxy-mode", ms.proxyMode, "Which proxy mode to use: 'userspace' (older) or 'iptables' (faster). If the iptables proxy is selected, regardless of how, but the system's kernel or iptables versions are insufficient, this always falls back to the userspace proxy.")
|
fs.StringVar(&ms.proxyMode, "proxy-mode", ms.proxyMode, "Which proxy mode to use: 'userspace' (older) or 'iptables' (faster). If the iptables proxy is selected, regardless of how, but the system's kernel or iptables versions are insufficient, this always falls back to the userspace proxy.")
|
||||||
|
fs.IntVar(&ms.conntrackMax, "conntrack-max", ms.conntrackMax, "Maximum number of NAT connections to track on agent nodes (0 to leave as-is)")
|
||||||
|
fs.IntVar(&ms.conntrackTCPTimeoutEstablished, "conntrack-tcp-timeout-established", ms.conntrackTCPTimeoutEstablished, "Idle timeout for established TCP connections on agent nodes (0 to leave as-is)")
|
||||||
}
|
}
|
||||||
|
@ -133,36 +133,38 @@ type SchedulerServer struct {
|
|||||||
minionLogMaxBackups int
|
minionLogMaxBackups int
|
||||||
minionLogMaxAgeInDays int
|
minionLogMaxAgeInDays int
|
||||||
|
|
||||||
mesosAuthProvider string
|
mesosAuthProvider string
|
||||||
driverPort uint
|
driverPort uint
|
||||||
hostnameOverride string
|
hostnameOverride string
|
||||||
reconcileInterval int64
|
reconcileInterval int64
|
||||||
reconcileCooldown time.Duration
|
reconcileCooldown time.Duration
|
||||||
defaultContainerCPULimit mresource.CPUShares
|
defaultContainerCPULimit mresource.CPUShares
|
||||||
defaultContainerMemLimit mresource.MegaBytes
|
defaultContainerMemLimit mresource.MegaBytes
|
||||||
schedulerConfigFileName string
|
schedulerConfigFileName string
|
||||||
graceful bool
|
graceful bool
|
||||||
frameworkName string
|
frameworkName string
|
||||||
frameworkWebURI string
|
frameworkWebURI string
|
||||||
ha bool
|
ha bool
|
||||||
advertisedAddress string
|
advertisedAddress string
|
||||||
serviceAddress net.IP
|
serviceAddress net.IP
|
||||||
haDomain string
|
haDomain string
|
||||||
kmPath string
|
kmPath string
|
||||||
clusterDNS net.IP
|
clusterDNS net.IP
|
||||||
clusterDomain string
|
clusterDomain string
|
||||||
kubeletRootDirectory string
|
kubeletRootDirectory string
|
||||||
kubeletDockerEndpoint string
|
kubeletDockerEndpoint string
|
||||||
kubeletPodInfraContainerImage string
|
kubeletPodInfraContainerImage string
|
||||||
kubeletCadvisorPort uint
|
kubeletCadvisorPort uint
|
||||||
kubeletHostNetworkSources string
|
kubeletHostNetworkSources string
|
||||||
kubeletSyncFrequency time.Duration
|
kubeletSyncFrequency time.Duration
|
||||||
kubeletNetworkPluginName string
|
kubeletNetworkPluginName string
|
||||||
staticPodsConfigPath string
|
staticPodsConfigPath string
|
||||||
dockerCfgPath string
|
dockerCfgPath string
|
||||||
containPodResources bool
|
containPodResources bool
|
||||||
nodeRelistPeriod time.Duration
|
nodeRelistPeriod time.Duration
|
||||||
sandboxOverlay string
|
sandboxOverlay string
|
||||||
|
conntrackMax int
|
||||||
|
conntrackTCPTimeoutEstablished int
|
||||||
|
|
||||||
executable string // path to the binary running this service
|
executable string // path to the binary running this service
|
||||||
client *client.Client
|
client *client.Client
|
||||||
@ -216,6 +218,12 @@ func NewSchedulerServer() *SchedulerServer {
|
|||||||
kubeletEnableDebuggingHandlers: true,
|
kubeletEnableDebuggingHandlers: true,
|
||||||
containPodResources: true,
|
containPodResources: true,
|
||||||
nodeRelistPeriod: defaultNodeRelistPeriod,
|
nodeRelistPeriod: defaultNodeRelistPeriod,
|
||||||
|
conntrackTCPTimeoutEstablished: 0, // non-zero values may require hand-tuning other sysctl's on the host; do so with caution
|
||||||
|
|
||||||
|
// non-zero values can trigger failures when updating /sys/module/nf_conntrack/parameters/hashsize
|
||||||
|
// when kube-proxy is running in a non-root netns (init_net); setting this to a non-zero value will
|
||||||
|
// impact connection tracking for the entire host on which kube-proxy is running. xref (k8s#19182)
|
||||||
|
conntrackMax: 0,
|
||||||
}
|
}
|
||||||
// cache this for later use. also useful in case the original binary gets deleted, e.g.
|
// cache this for later use. also useful in case the original binary gets deleted, e.g.
|
||||||
// during upgrades, development deployments, etc.
|
// during upgrades, development deployments, etc.
|
||||||
@ -294,6 +302,8 @@ func (s *SchedulerServer) addCoreFlags(fs *pflag.FlagSet) {
|
|||||||
fs.DurationVar(&s.kubeletSyncFrequency, "kubelet-sync-frequency", s.kubeletSyncFrequency, "Max period between synchronizing running containers and config")
|
fs.DurationVar(&s.kubeletSyncFrequency, "kubelet-sync-frequency", s.kubeletSyncFrequency, "Max period between synchronizing running containers and config")
|
||||||
fs.StringVar(&s.kubeletNetworkPluginName, "kubelet-network-plugin", s.kubeletNetworkPluginName, "<Warning: Alpha feature> The name of the network plugin to be invoked for various events in kubelet/pod lifecycle")
|
fs.StringVar(&s.kubeletNetworkPluginName, "kubelet-network-plugin", s.kubeletNetworkPluginName, "<Warning: Alpha feature> The name of the network plugin to be invoked for various events in kubelet/pod lifecycle")
|
||||||
fs.BoolVar(&s.kubeletEnableDebuggingHandlers, "kubelet-enable-debugging-handlers", s.kubeletEnableDebuggingHandlers, "Enables kubelet endpoints for log collection and local running of containers and commands")
|
fs.BoolVar(&s.kubeletEnableDebuggingHandlers, "kubelet-enable-debugging-handlers", s.kubeletEnableDebuggingHandlers, "Enables kubelet endpoints for log collection and local running of containers and commands")
|
||||||
|
fs.IntVar(&s.conntrackMax, "conntrack-max", s.conntrackMax, "Maximum number of NAT connections to track on agent nodes (0 to leave as-is)")
|
||||||
|
fs.IntVar(&s.conntrackTCPTimeoutEstablished, "conntrack-tcp-timeout-established", s.conntrackTCPTimeoutEstablished, "Idle timeout for established TCP connections on agent nodes (0 to leave as-is)")
|
||||||
|
|
||||||
//TODO(jdef) support this flag once we have a better handle on mesos-dns and k8s DNS integration
|
//TODO(jdef) support this flag once we have a better handle on mesos-dns and k8s DNS integration
|
||||||
//fs.StringVar(&s.HADomain, "ha-domain", s.HADomain, "Domain of the HA scheduler service, only used in HA mode. If specified may be used to construct artifact download URIs.")
|
//fs.StringVar(&s.HADomain, "ha-domain", s.HADomain, "Domain of the HA scheduler service, only used in HA mode. If specified may be used to construct artifact download URIs.")
|
||||||
@ -413,6 +423,8 @@ func (s *SchedulerServer) prepareExecutorInfo(hks hyperkube.Interface) (*mesos.E
|
|||||||
ci.Arguments = append(ci.Arguments, fmt.Sprintf("--sync-frequency=%v", s.kubeletSyncFrequency))
|
ci.Arguments = append(ci.Arguments, fmt.Sprintf("--sync-frequency=%v", s.kubeletSyncFrequency))
|
||||||
ci.Arguments = append(ci.Arguments, fmt.Sprintf("--contain-pod-resources=%t", s.containPodResources))
|
ci.Arguments = append(ci.Arguments, fmt.Sprintf("--contain-pod-resources=%t", s.containPodResources))
|
||||||
ci.Arguments = append(ci.Arguments, fmt.Sprintf("--enable-debugging-handlers=%t", s.kubeletEnableDebuggingHandlers))
|
ci.Arguments = append(ci.Arguments, fmt.Sprintf("--enable-debugging-handlers=%t", s.kubeletEnableDebuggingHandlers))
|
||||||
|
ci.Arguments = append(ci.Arguments, fmt.Sprintf("--conntrack-max=%d", s.conntrackMax))
|
||||||
|
ci.Arguments = append(ci.Arguments, fmt.Sprintf("--conntrack-tcp-timeout-established=%d", s.conntrackTCPTimeoutEstablished))
|
||||||
|
|
||||||
if s.authPath != "" {
|
if s.authPath != "" {
|
||||||
//TODO(jdef) should probably support non-local files, e.g. hdfs:///some/config/file
|
//TODO(jdef) should probably support non-local files, e.g. hdfs:///some/config/file
|
||||||
|
Loading…
Reference in New Issue
Block a user