diff --git a/pkg/proxy/iptables/proxier.go b/pkg/proxy/iptables/proxier.go index df7a282111f..e8442ff9632 100644 --- a/pkg/proxy/iptables/proxier.go +++ b/pkg/proxy/iptables/proxier.go @@ -92,6 +92,7 @@ const ( const sysctlRouteLocalnet = "net/ipv4/conf/all/route_localnet" const sysctlBridgeCallIPTables = "net/bridge/bridge-nf-call-iptables" +const sysctlNFConntrackTCPBeLiberal = "net/netfilter/nf_conntrack_tcp_be_liberal" // internal struct for string service information type servicePortInfo struct { @@ -196,6 +197,10 @@ type Proxier struct { // localhostNodePorts indicates whether we allow NodePort services to be accessed // via localhost. localhostNodePorts bool + + // conntrackTCPLiberal indicates whether the system sets the kernel nf_conntrack_tcp_be_liberal + conntrackTCPLiberal bool + // nodePortAddresses selects the interfaces where nodePort works. nodePortAddresses *proxyutil.NodePortAddresses // networkInterfacer defines an interface for several net library functions. @@ -241,6 +246,14 @@ func NewProxier(ipFamily v1.IPFamily, } } + // Be conservative in what you do, be liberal in what you accept from others. + // If it's non-zero, we mark only out of window RST segments as INVALID. + // Ref: https://docs.kernel.org/networking/nf_conntrack-sysctl.html + conntrackTCPLiberal := false + if val, err := sysctl.GetSysctl(sysctlNFConntrackTCPBeLiberal); err == nil && val != 0 { + conntrackTCPLiberal = true + klog.InfoS("nf_conntrack_tcp_be_liberal set, not installing DROP rules for INVALID packets") + } // Proxy needs br_netfilter and bridge-nf-call-iptables=1 when containers // are connected to a Linux bridge (but not SDN bridges). Until most // plugins handle this, log when config is missing @@ -282,6 +295,7 @@ func NewProxier(ipFamily v1.IPFamily, localhostNodePorts: localhostNodePorts, nodePortAddresses: nodePortAddresses, networkInterfacer: proxyutil.RealNetwork{}, + conntrackTCPLiberal: conntrackTCPLiberal, } burstSyncs := 2 @@ -1443,14 +1457,17 @@ func (proxier *Proxier) syncProxyRules() { } // Drop the packets in INVALID state, which would potentially cause - // unexpected connection reset. - // https://github.com/kubernetes/kubernetes/issues/74839 - proxier.filterRules.Write( - "-A", string(kubeForwardChain), - "-m", "conntrack", - "--ctstate", "INVALID", - "-j", "DROP", - ) + // unexpected connection reset if nf_conntrack_tcp_be_liberal is not set. + // Ref: https://github.com/kubernetes/kubernetes/issues/74839 + // Ref: https://github.com/kubernetes/kubernetes/issues/117924 + if !proxier.conntrackTCPLiberal { + proxier.filterRules.Write( + "-A", string(kubeForwardChain), + "-m", "conntrack", + "--ctstate", "INVALID", + "-j", "DROP", + ) + } // If the masqueradeMark has been added then we want to forward that same // traffic, this allows NodePort traffic to be forwarded even if the default diff --git a/pkg/proxy/iptables/proxier_test.go b/pkg/proxy/iptables/proxier_test.go index 591a87aee8f..ca00461124b 100644 --- a/pkg/proxy/iptables/proxier_test.go +++ b/pkg/proxy/iptables/proxier_test.go @@ -2565,6 +2565,49 @@ func TestHealthCheckNodePort(t *testing.T) { }) } +func TestDropInvalidRule(t *testing.T) { + for _, testcase := range []bool{false, true} { + t.Run(fmt.Sprintf("tcpLiberal %t", testcase), func(t *testing.T) { + ipt := iptablestest.NewFake() + fp := NewFakeProxier(ipt) + fp.conntrackTCPLiberal = testcase + fp.syncProxyRules() + + expected := dedent.Dedent(` + *filter + :KUBE-NODEPORTS - [0:0] + :KUBE-SERVICES - [0:0] + :KUBE-EXTERNAL-SERVICES - [0:0] + :KUBE-FIREWALL - [0:0] + :KUBE-FORWARD - [0:0] + :KUBE-PROXY-FIREWALL - [0:0] + -A KUBE-FIREWALL -m comment --comment "block incoming localnet connections" -d 127.0.0.0/8 ! -s 127.0.0.0/8 -m conntrack ! --ctstate RELATED,ESTABLISHED,DNAT -j DROP`) + if !testcase { + expected += "\n-A KUBE-FORWARD -m conntrack --ctstate INVALID -j DROP" + } + + expected += dedent.Dedent(` + -A KUBE-FORWARD -m comment --comment "kubernetes forwarding rules" -m mark --mark 0x4000/0x4000 -j ACCEPT + -A KUBE-FORWARD -m comment --comment "kubernetes forwarding conntrack rule" -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT + COMMIT + *nat + :KUBE-NODEPORTS - [0:0] + :KUBE-SERVICES - [0:0] + :KUBE-MARK-MASQ - [0:0] + :KUBE-POSTROUTING - [0:0] + -A KUBE-SERVICES -m comment --comment "kubernetes service nodeports; NOTE: this must be the last rule in this chain" -m addrtype --dst-type LOCAL -j KUBE-NODEPORTS + -A KUBE-MARK-MASQ -j MARK --or-mark 0x4000 + -A KUBE-POSTROUTING -m mark ! --mark 0x4000/0x4000 -j RETURN + -A KUBE-POSTROUTING -j MARK --xor-mark 0x4000 + -A KUBE-POSTROUTING -m comment --comment "kubernetes service traffic requiring SNAT" -j MASQUERADE + COMMIT + `) + + assertIPTablesRulesEqual(t, getLine(), true, expected, fp.iptablesData.String()) + }) + } +} + func TestMasqueradeRule(t *testing.T) { for _, testcase := range []bool{false, true} { ipt := iptablestest.NewFake().SetHasRandomFully(testcase)