From d86d1defa1e619b60031d173ed401b00a2d8957f Mon Sep 17 00:00:00 2001 From: Mike Spreitzer Date: Mon, 26 Aug 2019 22:47:21 -0400 Subject: [PATCH] Made IPVS and iptables modes of kube-proxy fully randomize masquerading if possible Work around Linux kernel bug that sometimes causes multiple flows to get mapped to the same IP:PORT and consequently some suffer packet drops. Also made the same update in kubelet. Also added cross-pointers between the two bodies of code, in comments. Some day we should eliminate the duplicate code. But today is not that day. --- .../network/hostport/fake_iptables.go | 4 +++ pkg/kubelet/kubelet_network_linux.go | 16 ++++++++-- pkg/proxy/iptables/proxier.go | 12 ++++++-- pkg/proxy/iptables/proxier_test.go | 28 +++++++++++++++++ pkg/proxy/ipvs/proxier.go | 12 ++++++-- pkg/proxy/ipvs/proxier_test.go | 30 +++++++++++++++++++ pkg/util/iptables/iptables.go | 14 +++++++++ pkg/util/iptables/testing/fake.go | 15 ++++++++-- 8 files changed, 123 insertions(+), 8 deletions(-) diff --git a/pkg/kubelet/dockershim/network/hostport/fake_iptables.go b/pkg/kubelet/dockershim/network/hostport/fake_iptables.go index 2755bf05815..3b7dc695116 100644 --- a/pkg/kubelet/dockershim/network/hostport/fake_iptables.go +++ b/pkg/kubelet/dockershim/network/hostport/fake_iptables.go @@ -347,3 +347,7 @@ func (f *fakeIPTables) isBuiltinChain(tableName utiliptables.Table, chainName ut } return false } + +func (f *fakeIPTables) HasRandomFully() bool { + return false +} diff --git a/pkg/kubelet/kubelet_network_linux.go b/pkg/kubelet/kubelet_network_linux.go index ec7d41d9557..1c9ad46b989 100644 --- a/pkg/kubelet/kubelet_network_linux.go +++ b/pkg/kubelet/kubelet_network_linux.go @@ -96,9 +96,21 @@ func (kl *Kubelet) syncNetworkUtil() { klog.Errorf("Failed to ensure that %s chain %s jumps to %s: %v", utiliptables.TableNAT, utiliptables.ChainPostrouting, KubePostroutingChain, err) return } - if _, err := kl.iptClient.EnsureRule(utiliptables.Append, utiliptables.TableNAT, KubePostroutingChain, + // Establish the masquerading rule. + // NB: THIS MUST MATCH the corresponding code in the iptables and ipvs + // modes of kube-proxy + masqRule := []string{ "-m", "comment", "--comment", "kubernetes service traffic requiring SNAT", - "-m", "mark", "--mark", masqueradeMark, "-j", "MASQUERADE"); err != nil { + "-m", "mark", "--mark", masqueradeMark, + "-j", "MASQUERADE", + } + if kl.iptClient.HasRandomFully() { + masqRule = append(masqRule, "--random-fully") + klog.V(3).Info("Using `--random-fully` in the MASQUERADE rule for iptables") + } else { + klog.V(2).Info("Not using `--random-fully` in the MASQUERADE rule for iptables because the local version of iptables does not support it") + } + if _, err := kl.iptClient.EnsureRule(utiliptables.Append, utiliptables.TableNAT, KubePostroutingChain, masqRule...); err != nil { klog.Errorf("Failed to ensure SNAT rule for packets marked by %v in %v chain %v: %v", KubeMarkMasqChain, utiliptables.TableNAT, KubePostroutingChain, err) return } diff --git a/pkg/proxy/iptables/proxier.go b/pkg/proxy/iptables/proxier.go index 5b4741b9d4c..c537d5bfae7 100644 --- a/pkg/proxy/iptables/proxier.go +++ b/pkg/proxy/iptables/proxier.go @@ -780,12 +780,20 @@ func (proxier *Proxier) syncProxyRules() { // Install the kubernetes-specific postrouting rules. We use a whole chain for // this so that it is easier to flush and change, for example if the mark // value should ever change. - writeLine(proxier.natRules, []string{ + // NB: THIS MUST MATCH the corresponding code in the kubelet + masqRule := []string{ "-A", string(kubePostroutingChain), "-m", "comment", "--comment", `"kubernetes service traffic requiring SNAT"`, "-m", "mark", "--mark", proxier.masqueradeMark, "-j", "MASQUERADE", - }...) + } + if proxier.iptables.HasRandomFully() { + masqRule = append(masqRule, "--random-fully") + klog.V(3).Info("Using `--random-fully` in the MASQUERADE rule for iptables") + } else { + klog.V(2).Info("Not using `--random-fully` in the MASQUERADE rule for iptables because the local version of iptables does not support it") + } + writeLine(proxier.natRules, masqRule...) // Install the kubernetes-specific masquerade mark rule. We use a whole chain for // this so that it is easier to flush and change, for example if the mark diff --git a/pkg/proxy/iptables/proxier_test.go b/pkg/proxy/iptables/proxier_test.go index 41be026e368..407461a5ac3 100644 --- a/pkg/proxy/iptables/proxier_test.go +++ b/pkg/proxy/iptables/proxier_test.go @@ -438,6 +438,15 @@ func hasSrcType(rules []iptablestest.Rule, srcType string) bool { return false } +func hasMasqRandomFully(rules []iptablestest.Rule) bool { + for _, r := range rules { + if r[iptablestest.Masquerade] == "--random-fully" { + return true + } + } + return false +} + func TestHasJump(t *testing.T) { testCases := map[string]struct { rules []iptablestest.Rule @@ -784,6 +793,25 @@ func TestNodePort(t *testing.T) { } } +func TestMasqueradeRule(t *testing.T) { + for _, testcase := range []bool{false, true} { + ipt := iptablestest.NewFake().SetHasRandomFully(testcase) + fp := NewFakeProxier(ipt, false) + makeServiceMap(fp) + makeEndpointsMap(fp) + fp.syncProxyRules() + + postRoutingRules := ipt.GetRules(string(kubePostroutingChain)) + if !hasJump(postRoutingRules, "MASQUERADE", "", 0) { + errorf(fmt.Sprintf("Failed to find -j MASQUERADE in %s chain", kubePostroutingChain), postRoutingRules, t) + } + if hasMasqRandomFully(postRoutingRules) != testcase { + probs := map[bool]string{false: "found", true: "did not find"} + errorf(fmt.Sprintf("%s --random-fully in -j MASQUERADE rule in %s chain when HasRandomFully()==%v", probs[testcase], kubePostroutingChain, testcase), postRoutingRules, t) + } + } +} + func TestExternalIPsReject(t *testing.T) { ipt := iptablestest.NewFake() fp := NewFakeProxier(ipt, false) diff --git a/pkg/proxy/ipvs/proxier.go b/pkg/proxy/ipvs/proxier.go index 8bd805b8b87..b8a3267ef29 100644 --- a/pkg/proxy/ipvs/proxier.go +++ b/pkg/proxy/ipvs/proxier.go @@ -1654,12 +1654,20 @@ func (proxier *Proxier) createAndLinkeKubeChain() { // Install the kubernetes-specific postrouting rules. We use a whole chain for // this so that it is easier to flush and change, for example if the mark // value should ever change. - writeLine(proxier.natRules, []string{ + // NB: THIS MUST MATCH the corresponding code in the kubelet + masqRule := []string{ "-A", string(kubePostroutingChain), "-m", "comment", "--comment", `"kubernetes service traffic requiring SNAT"`, "-m", "mark", "--mark", proxier.masqueradeMark, "-j", "MASQUERADE", - }...) + } + if proxier.iptables.HasRandomFully() { + masqRule = append(masqRule, "--random-fully") + klog.V(3).Info("Using `--random-fully` in the MASQUERADE rule for iptables") + } else { + klog.V(2).Info("Not using `--random-fully` in the MASQUERADE rule for iptables because the local version of iptables does not support it") + } + writeLine(proxier.natRules, masqRule...) // Install the kubernetes-specific masquerade mark rule. We use a whole chain for // this so that it is easier to flush and change, for example if the mark diff --git a/pkg/proxy/ipvs/proxier_test.go b/pkg/proxy/ipvs/proxier_test.go index 7b5600c09f9..36f444de46f 100644 --- a/pkg/proxy/ipvs/proxier_test.go +++ b/pkg/proxy/ipvs/proxier_test.go @@ -1120,6 +1120,27 @@ func TestClusterIP(t *testing.T) { } } +func TestMasqueradeRule(t *testing.T) { + for _, testcase := range []bool{false, true} { + ipt := iptablestest.NewFake().SetHasRandomFully(testcase) + ipvs := ipvstest.NewFake() + ipset := ipsettest.NewFake(testIPSetVersion) + fp := NewFakeProxier(ipt, ipvs, ipset, nil, nil, false) + makeServiceMap(fp) + makeEndpointsMap(fp) + fp.syncProxyRules() + + postRoutingRules := ipt.GetRules(string(kubePostroutingChain)) + if !hasJump(postRoutingRules, "MASQUERADE", "") { + t.Errorf("Failed to find -j MASQUERADE in %s chain", kubePostroutingChain) + } + if hasMasqRandomFully(postRoutingRules) != testcase { + probs := map[bool]string{false: "found", true: "did not find"} + t.Errorf("%s --random-fully in -j MASQUERADE rule in %s chain for HasRandomFully()=%v", probs[testcase], kubePostroutingChain, testcase) + } + } +} + func TestExternalIPsNoEndpoint(t *testing.T) { ipt := iptablestest.NewFake() ipvs := ipvstest.NewFake() @@ -3112,6 +3133,15 @@ func hasJump(rules []iptablestest.Rule, destChain, ipSet string) bool { return false } +func hasMasqRandomFully(rules []iptablestest.Rule) bool { + for _, r := range rules { + if r[iptablestest.Masquerade] == "--random-fully" { + return true + } + } + return false +} + // checkIptabless to check expected iptables chain and rules func checkIptables(t *testing.T, ipt *iptablestest.FakeIPTables, epIpt netlinktest.ExpectedIptablesChain) { for epChain, epRules := range epIpt { diff --git a/pkg/util/iptables/iptables.go b/pkg/util/iptables/iptables.go index 2f1faed3e07..a874bb8a58c 100644 --- a/pkg/util/iptables/iptables.go +++ b/pkg/util/iptables/iptables.go @@ -69,6 +69,12 @@ type Interface interface { AddReloadFunc(reloadFunc func()) // Destroy cleans up resources used by the Interface Destroy() + // HasRandomFully reveals whether `-j MASQUERADE` takes the + // `--random-fully` option. This is helpful to work around a + // Linux kernel bug that sometimes causes multiple flows to get + // mapped to the same IP:PORT and consequently some suffer packet + // drops. + HasRandomFully() bool } type Protocol byte @@ -121,6 +127,8 @@ const NoFlushTables FlushFlag = false // (test whether a rule exists). var MinCheckVersion = utilversion.MustParseGeneric("1.4.11") +var RandomFullyMinVersion = utilversion.MustParseGeneric("1.6.2") + // Minimum iptables versions supporting the -w and -w flags var WaitMinVersion = utilversion.MustParseGeneric("1.4.20") var WaitSecondsMinVersion = utilversion.MustParseGeneric("1.4.22") @@ -139,6 +147,7 @@ type runner struct { protocol Protocol hasCheck bool hasListener bool + hasRandomFully bool waitFlag []string restoreWaitFlag []string lockfilePath string @@ -166,6 +175,7 @@ func newInternal(exec utilexec.Interface, dbus utildbus.Interface, protocol Prot protocol: protocol, hasCheck: version.AtLeast(MinCheckVersion), hasListener: false, + hasRandomFully: version.AtLeast(RandomFullyMinVersion), waitFlag: getIPTablesWaitFlag(version), restoreWaitFlag: getIPTablesRestoreWaitFlag(version), lockfilePath: lockfilePath, @@ -632,6 +642,10 @@ func (runner *runner) reload() { } } +func (runner *runner) HasRandomFully() bool { + return runner.hasRandomFully +} + var iptablesNotFoundStrings = []string{ // iptables-legacy [-A|-I] BAD-CHAIN [...] // iptables-legacy [-C|-D] GOOD-CHAIN [...non-matching rule...] diff --git a/pkg/util/iptables/testing/fake.go b/pkg/util/iptables/testing/fake.go index 6470eb86289..16382355915 100644 --- a/pkg/util/iptables/testing/fake.go +++ b/pkg/util/iptables/testing/fake.go @@ -35,19 +35,26 @@ const ( Recent = "recent " MatchSet = "--match-set " SrcType = "--src-type " + Masquerade = "MASQUERADE " ) type Rule map[string]string // no-op implementation of iptables Interface type FakeIPTables struct { - Lines []byte + hasRandomFully bool + Lines []byte } func NewFake() *FakeIPTables { return &FakeIPTables{} } +func (f *FakeIPTables) SetHasRandomFully(can bool) *FakeIPTables { + f.hasRandomFully = can + return f +} + func (*FakeIPTables) EnsureChain(table iptables.Table, chain iptables.Chain) (bool, error) { return true, nil } @@ -110,7 +117,7 @@ func (f *FakeIPTables) GetRules(chainName string) (rules []Rule) { for _, l := range strings.Split(string(f.Lines), "\n") { if strings.Contains(l, fmt.Sprintf("-A %v", chainName)) { newRule := Rule(map[string]string{}) - for _, arg := range []string{Destination, Source, DPort, Protocol, Jump, ToDest, Recent, MatchSet, SrcType} { + for _, arg := range []string{Destination, Source, DPort, Protocol, Jump, ToDest, Recent, MatchSet, SrcType, Masquerade} { tok := getToken(l, arg) if tok != "" { newRule[arg] = tok @@ -122,4 +129,8 @@ func (f *FakeIPTables) GetRules(chainName string) (rules []Rule) { return } +func (f *FakeIPTables) HasRandomFully() bool { + return f.hasRandomFully +} + var _ = iptables.Interface(&FakeIPTables{})