diff --git a/cluster/saltbase/salt/base.sls b/cluster/saltbase/salt/base.sls index 80ea7f3f5c2..91639730dd0 100644 --- a/cluster/saltbase/salt/base.sls +++ b/cluster/saltbase/salt/base.sls @@ -2,6 +2,7 @@ pkg-core: pkg.installed: - names: - curl + - ebtables {% if grains['os_family'] == 'RedHat' %} - python - git diff --git a/pkg/kubelet/network/kubenet/kubenet_linux.go b/pkg/kubelet/network/kubenet/kubenet_linux.go index ab2314792ac..996524f7b26 100644 --- a/pkg/kubelet/network/kubenet/kubenet_linux.go +++ b/pkg/kubelet/network/kubenet/kubenet_linux.go @@ -37,6 +37,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/network" "k8s.io/kubernetes/pkg/util/bandwidth" utildbus "k8s.io/kubernetes/pkg/util/dbus" + utilebtables "k8s.io/kubernetes/pkg/util/ebtables" utilerrors "k8s.io/kubernetes/pkg/util/errors" utilexec "k8s.io/kubernetes/pkg/util/exec" utiliptables "k8s.io/kubernetes/pkg/util/iptables" @@ -59,7 +60,13 @@ const ( fallbackMTU = 1460 // private mac prefix safe to use + // Universally administered and locally administered addresses are distinguished by setting the second-least-significant + // bit of the first octet of the address. If it is 1, the address is locally administered. For example, for address 0a:00:00:00:00:00, + // the first cotet is 0a(hex), the binary form of which is 00001010, where the second-least-significant bit is 1. privateMACPrefix = "0a:58" + + // ebtables Chain to store dedup rules + dedupChain = utilebtables.Chain("KUBE-DEDUP") ) type kubenetNetworkPlugin struct { @@ -79,12 +86,13 @@ type kubenetNetworkPlugin struct { hostportHandler hostport.HostportHandler iptables utiliptables.Interface sysctl utilsysctl.Interface + ebtables utilebtables.Interface // vendorDir is passed by kubelet network-plugin-dir parameter. // kubenet will search for cni binaries in DefaultCNIDir first, then continue to vendorDir. vendorDir string nonMasqueradeCIDR string - podCidr string - gateway net.IP + podCidr string + gateway net.IP } func NewPlugin(networkPluginDir string) network.NetworkPlugin { @@ -335,7 +343,6 @@ func (plugin *kubenetNetworkPlugin) setup(namespace string, name string, id kube return fmt.Errorf("CNI plugin reported an invalid IPv4 address for container %v: %+v.", id, res.IP4) } - // Explicitly assign mac address to cbr0. If bridge mac address is not explicitly set will adopt the lowest MAC address of the attached veths. // TODO: Remove this once upstream cni bridge plugin handles this link, err := netlink.LinkByName(BridgeName) @@ -363,8 +370,8 @@ func (plugin *kubenetNetworkPlugin) setup(namespace string, name string, id kube return fmt.Errorf("Error setting promiscuous mode on %s: %v", BridgeName, err) } } - - + // configure the ebtables rules to eliminate duplicate packets by best effort + plugin.syncEbtablesDedupRules(macAddr) } // The first SetUpPod call creates the bridge; get a shaper for the sake of @@ -611,6 +618,48 @@ func (plugin *kubenetNetworkPlugin) shaper() bandwidth.BandwidthShaper { return plugin.bandwidthShaper } +//TODO: make this into a goroutine and rectify the dedup rules periodically +func (plugin *kubenetNetworkPlugin) syncEbtablesDedupRules(macAddr net.HardwareAddr) { + if plugin.ebtables == nil { + plugin.ebtables = utilebtables.New(plugin.execer) + glog.V(3).Infof("Flushing dedup chain") + if err := plugin.ebtables.FlushChain(utilebtables.TableFilter, dedupChain); err != nil { + glog.Errorf("Failed to flush dedup chain: %v", err) + } + } + _, err := plugin.ebtables.GetVersion() + if err != nil { + glog.Warningf("Failed to get ebtables version. Skip syncing ebtables dedup rules: %v", err) + return + } + + glog.V(3).Infof("Filtering packets with ebtables on mac address: %v, gateway: %v, pod CIDR: %v", macAddr.String(), plugin.gateway.String(), plugin.podCidr) + _, err = plugin.ebtables.EnsureChain(utilebtables.TableFilter, dedupChain) + if err != nil { + glog.Errorf("Failed to ensure %v chain %v", utilebtables.TableFilter, dedupChain) + return + } + + _, err = plugin.ebtables.EnsureRule(utilebtables.Append, utilebtables.TableFilter, utilebtables.ChainOutput, "-j", string(dedupChain)) + if err != nil { + glog.Errorf("Failed to ensure %v chain %v jump to %v chain: %v", utilebtables.TableFilter, utilebtables.ChainOutput, dedupChain, err) + return + } + + commonArgs := []string{"-p", "IPv4", "-s", macAddr.String(), "-o", "veth+"} + _, err = plugin.ebtables.EnsureRule(utilebtables.Prepend, utilebtables.TableFilter, dedupChain, append(commonArgs, "--ip-src", plugin.gateway.String(), "-j", "ACCEPT")...) + if err != nil { + glog.Errorf("Failed to ensure packets from cbr0 gateway to be accepted") + return + + } + _, err = plugin.ebtables.EnsureRule(utilebtables.Append, utilebtables.TableFilter, dedupChain, append(commonArgs, "--ip-src", plugin.podCidr, "-j", "DROP")...) + if err != nil { + glog.Errorf("Failed to ensure packets from podCidr but has mac address of cbr0 to get dropped.") + return + } +} + // generateHardwareAddr generates 48 bit virtual mac addresses based on the IP input. func generateHardwareAddr(ip net.IP) (net.HardwareAddr, error) { if ip.To4() == nil { @@ -627,4 +676,4 @@ func generateHardwareAddr(ip net.IP) (net.HardwareAddr, error) { return nil, fmt.Errorf("Failed to parse mac address %s generated based on ip %s due to: %v", mac, ip, err) } return hwAddr, nil -} \ No newline at end of file +}