From 6248939e11a4d5b422da5ffdc7ec52a6c1ded54a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 4 Feb 2016 09:06:42 -0600 Subject: [PATCH] Push responsibility for bridge-nf-call-iptables to kubelet network plugins bridge-nf-call-iptables appears to only be relevant when the containers are attached to a Linux bridge, which is usually the case with default Kubernetes setups, docker, and flannel. That ensures that the container traffic is actually subject to the iptables rules since it traverses a Linux bridge and bridged traffic is only subject to iptables when bridge-nf-call-iptables=1. But with other networking solutions (like openshift-sdn) that don't use Linux bridges, bridge-nf-call-iptables may not be not relevant, because iptables is invoked at other points not involving a Linux bridge. The decision to set bridge-nf-call-iptables should be influenced by networking plugins, so push the responsiblity out to them. If no network plugin is specified, fall back to the existing bridge-nf-call-iptables=1 behavior. --- docs/admin/network-plugins.md | 6 ++++++ pkg/kubelet/network/plugins.go | 20 ++++++++++++++++++++ pkg/proxy/iptables/proxier.go | 19 +++++++++++++------ 3 files changed, 39 insertions(+), 6 deletions(-) diff --git a/docs/admin/network-plugins.md b/docs/admin/network-plugins.md index 41249f83b37..424ab701f86 100644 --- a/docs/admin/network-plugins.md +++ b/docs/admin/network-plugins.md @@ -42,6 +42,12 @@ The kubelet has a single default network plugin, and a default network common to * `network-plugin-dir`: Kubelet probes this directory for plugins on startup * `network-plugin`: The network plugin to use from `network-plugin-dir`. It must match the name reported by a plugin probed from the plugin directory. For CNI plugins, this is simply "cni". +## Network Plugin Requirements + +Besides providing the [`NetworkPlugin` interface](../../pkg/kubelet/network/plugins.go) to configure and clean up pod networking, the plugin may also need specific support for kube-proxy. The iptables proxy obviously depends on iptables, and the plugin may need to ensure that container traffic is made available to iptables. For example, if the plugin connects containers to a Linux bridge, the plugin must set the `net/bridge/bridge-nf-call-iptables` sysctl to `1` to ensure that the iptables proxy functions correctly. If the plugin does not use a Linux bridge (but instead something like Open vSwitch or some other mechanism) it should ensure container traffic is appropriately routed for the proxy. + +By default if no kubelet network plugin is specified, the `noop` plugin is used, which sets `net/bridge/bridge-nf-call-iptables=1` to ensure simple configurations (like docker with a bridge) work correctly with the iptables proxy. + ### Exec Place plugins in `network-plugin-dir/plugin-name/plugin-name`, i.e if you have a bridge plugin and `network-plugin-dir` is `/usr/lib/kubernetes`, you'd place the bridge plugin executable at `/usr/lib/kubernetes/bridge/bridge`. See [this comment](../../pkg/kubelet/network/exec/exec.go) for more details. diff --git a/pkg/kubelet/network/plugins.go b/pkg/kubelet/network/plugins.go index 25ab60447ce..dc56d9caee4 100644 --- a/pkg/kubelet/network/plugins.go +++ b/pkg/kubelet/network/plugins.go @@ -28,6 +28,8 @@ import ( "k8s.io/kubernetes/pkg/api/unversioned" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" utilerrors "k8s.io/kubernetes/pkg/util/errors" + utilexec "k8s.io/kubernetes/pkg/util/exec" + utilsysctl "k8s.io/kubernetes/pkg/util/sysctl" "k8s.io/kubernetes/pkg/util/validation" ) @@ -93,6 +95,9 @@ func InitNetworkPlugin(plugins []NetworkPlugin, networkPluginName string, host H if networkPluginName == "" { // default to the no_op plugin plug := &noopNetworkPlugin{} + if err := plug.Init(host); err != nil { + return nil, err + } return plug, nil } @@ -135,7 +140,22 @@ func UnescapePluginName(in string) string { type noopNetworkPlugin struct { } +const sysctlBridgeCallIptables = "net/bridge/bridge-nf-call-iptables" + func (plugin *noopNetworkPlugin) Init(host Host) error { + // Set bridge-nf-call-iptables=1 to maintain compatibility with older + // kubernetes versions to ensure the iptables-based kube proxy functions + // correctly. Other plugins are responsible for setting this correctly + // depending on whether or not they connect containers to Linux bridges + // or use some other mechanism (ie, SDN vswitch). + + // Ensure the netfilter module is loaded on kernel >= 3.18; previously + // it was built-in. + utilexec.New().Command("modprobe", "br-netfilter").CombinedOutput() + if err := utilsysctl.SetSysctl(sysctlBridgeCallIptables, 1); err != nil { + glog.Warningf("can't set sysctl %s: %v", sysctlBridgeCallIptables, err) + } + return nil } diff --git a/pkg/proxy/iptables/proxier.go b/pkg/proxy/iptables/proxier.go index 6c4f0ede9d9..2e5db0461fe 100644 --- a/pkg/proxy/iptables/proxier.go +++ b/pkg/proxy/iptables/proxier.go @@ -26,6 +26,7 @@ import ( "encoding/base32" "fmt" "net" + "os" "reflect" "strconv" "strings" @@ -190,12 +191,18 @@ func NewProxier(ipt utiliptables.Interface, exec utilexec.Interface, syncPeriod return nil, fmt.Errorf("can't set sysctl %s: %v", sysctlRouteLocalnet, err) } - // Load the module. It's OK if this fails (e.g. the module is not present) - // because we'll catch the error on the sysctl, which is what we actually - // care about. - exec.Command("modprobe", "br-netfilter").CombinedOutput() - if err := utilsysctl.SetSysctl(sysctlBridgeCallIptables, 1); err != nil { - glog.Warningf("can't set sysctl %s: %v", sysctlBridgeCallIptables, err) + // Proxy needs br_netfilter and bridge-nf-call-iptables=1 when containers + // are connected to a Linux bridge (but not SDN bridges). Until most + // plugins handle this, log when config is missing + warnBrNetfilter := false + if _, err := os.Stat("/sys/module/br_netfilter"); os.IsNotExist(err) { + warnBrNetfilter = true + } + if val, err := utilsysctl.GetSysctl(sysctlBridgeCallIptables); err == nil && val != 1 { + warnBrNetfilter = true + } + if warnBrNetfilter { + glog.Infof("missing br-netfilter module or unset br-nf-call-iptables; proxy may not work as intended") } // Generate the masquerade mark to use for SNAT rules.