Merge pull request #122663 from aroradaman/drop-ct-state-invalid-rule

pkg/proxy/nftables: drop conntrack state invalid rule
This commit is contained in:
Kubernetes Prow Robot 2024-01-13 19:01:16 +01:00 committed by GitHub
commit 12fc215656
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 16 additions and 85 deletions

View File

@ -144,14 +144,6 @@ func diffNFTablesChain(nft *knftables.Fake, chain, expected string) string {
return cmp.Diff(expected, result)
}
// assertNFTablesChainEqual asserts that the indicated chain in nft's table contains
// exactly the rules in expected (in that order).
func assertNFTablesChainEqual(t *testing.T, line string, nft *knftables.Fake, chain, expected string) {
if diff := diffNFTablesChain(nft, chain, expected); diff != "" {
t.Errorf("rules do not match%s:\ndiff:\n%s", line, diff)
}
}
// nftablesTracer holds data used while virtually tracing a packet through a set of
// iptables rules
type nftablesTracer struct {
@ -310,10 +302,6 @@ var ignoredRegexp = regexp.MustCompile(strings.Join(
// The trace tests only check new connections, so for our purposes, this
// check always succeeds (and thus can be ignored).
`^ct state new`,
// Likewise, this rule never matches and thus never drops anything, and so
// can be ignored.
`^ct state invalid drop$`,
},
"|",
))
@ -640,8 +628,6 @@ func runPacketFlowTests(t *testing.T, line string, nft *knftables.Fake, nodeIPs
var testInput = dedent.Dedent(`
add table ip testing { comment "rules for kube-proxy" ; }
add chain ip testing forward
add rule ip testing forward ct state invalid drop
add chain ip testing mark-for-masquerade
add rule ip testing mark-for-masquerade mark set mark or 0x4000
add chain ip testing masquerading
@ -697,7 +683,6 @@ var testExpected = dedent.Dedent(`
add chain ip testing external-42NFTM6N-ns2/svc2/tcp/p80
add chain ip testing firewall-allow-check
add chain ip testing firewall-check
add chain ip testing forward
add chain ip testing mark-for-masquerade
add chain ip testing masquerading
add chain ip testing service-42NFTM6N-ns2/svc2/tcp/p80
@ -712,7 +697,6 @@ var testExpected = dedent.Dedent(`
add rule ip testing firewall-allow-check ip daddr . meta l4proto . th dport . ip saddr @firewall-allow return
add rule ip testing firewall-allow-check drop
add rule ip testing firewall-check ip daddr . meta l4proto . th dport @firewall jump firewall-allow-check
add rule ip testing forward ct state invalid drop
add rule ip testing mark-for-masquerade mark set mark or 0x4000
add rule ip testing masquerading mark and 0x4000 == 0 return
add rule ip testing masquerading mark set mark xor 0x4000

View File

@ -84,13 +84,8 @@ const (
// masquerading
kubeMarkMasqChain = "mark-for-masquerade"
kubeMasqueradingChain = "masquerading"
// chain for special filtering rules
kubeForwardChain = "forward"
)
const sysctlNFConntrackTCPBeLiberal = "net/netfilter/nf_conntrack_tcp_be_liberal"
// internal struct for string service information
type servicePortInfo struct {
*proxy.BaseServicePortInfo
@ -176,9 +171,6 @@ type Proxier struct {
serviceHealthServer healthcheck.ServiceHealthServer
healthzServer *healthcheck.ProxierHealthServer
// conntrackTCPLiberal indicates whether the system sets the kernel nf_conntrack_tcp_be_liberal
conntrackTCPLiberal bool
// nodePortAddresses selects the interfaces where nodePort works.
nodePortAddresses *proxyutil.NodePortAddresses
// networkInterfacer defines an interface for several net library functions.
@ -211,15 +203,6 @@ func NewProxier(ipFamily v1.IPFamily,
) (*Proxier, error) {
nodePortAddresses := proxyutil.NewNodePortAddresses(ipFamily, nodePortAddressStrings)
// Be conservative in what you do, be liberal in what you accept from others.
// If it's non-zero, we mark only out of window RST segments as INVALID.
// Ref: https://docs.kernel.org/networking/nf_conntrack-sysctl.html
conntrackTCPLiberal := false
if val, err := sysctl.GetSysctl(sysctlNFConntrackTCPBeLiberal); err == nil && val != 0 {
conntrackTCPLiberal = true
klog.InfoS("nf_conntrack_tcp_be_liberal set, not installing DROP rules for INVALID packets")
}
if initOnly {
klog.InfoS("System initialized and --init-only specified")
return nil, nil
@ -262,7 +245,6 @@ func NewProxier(ipFamily v1.IPFamily,
healthzServer: healthzServer,
nodePortAddresses: nodePortAddresses,
networkInterfacer: proxyutil.RealNetwork{},
conntrackTCPLiberal: conntrackTCPLiberal,
staleChains: make(map[string]time.Time),
}
@ -353,8 +335,6 @@ var nftablesJumpChains = []nftablesJumpChain{
{kubeEndpointsCheckChain, "filter-forward", "ct state new"},
{kubeEndpointsCheckChain, "filter-output", "ct state new"},
{kubeForwardChain, "filter-forward", ""},
{kubeFirewallCheckChain, "filter-prerouting", "ct state new"},
{kubeFirewallCheckChain, "filter-output", "ct state new"},
@ -419,7 +399,7 @@ func (proxier *Proxier) setupNFTables(tx *knftables.Transaction) {
}
// Ensure all of our other "top-level" chains exist
for _, chain := range []string{kubeServicesChain, kubeForwardChain, kubeMasqueradingChain, kubeMarkMasqChain} {
for _, chain := range []string{kubeServicesChain, kubeMasqueradingChain, kubeMarkMasqChain} {
ensureChain(chain, tx, createdChains)
}
@ -449,17 +429,6 @@ func (proxier *Proxier) setupNFTables(tx *knftables.Transaction) {
Rule: "masquerade fully-random",
})
// Drop the packets in INVALID state, which would potentially cause
// unexpected connection reset if nf_conntrack_tcp_be_liberal is not set.
// Ref: https://github.com/kubernetes/kubernetes/issues/74839
// Ref: https://github.com/kubernetes/kubernetes/issues/117924
if !proxier.conntrackTCPLiberal {
tx.Add(&knftables.Rule{
Chain: kubeForwardChain,
Rule: "ct state invalid drop",
})
}
// Fill in nodeport-ips set if needed (or delete it if not). (We do "add+delete"
// rather than just "delete" when we want to ensure the set doesn't exist, because
// doing just "delete" would return an error if the set didn't exist.)

View File

@ -499,8 +499,6 @@ func TestOverallNFTablesRules(t *testing.T) {
expected := dedent.Dedent(`
add table ip kube-proxy { comment "rules for kube-proxy" ; }
add chain ip kube-proxy forward
add rule ip kube-proxy forward ct state invalid drop
add chain ip kube-proxy mark-for-masquerade
add rule ip kube-proxy mark-for-masquerade mark set mark or 0x4000
add chain ip kube-proxy masquerading
@ -512,7 +510,6 @@ func TestOverallNFTablesRules(t *testing.T) {
add rule ip kube-proxy filter-prerouting ct state new jump firewall-check
add chain ip kube-proxy filter-forward { type filter hook forward priority -110 ; }
add rule ip kube-proxy filter-forward ct state new jump endpoints-check
add rule ip kube-proxy filter-forward jump forward
add chain ip kube-proxy filter-input { type filter hook input priority -110 ; }
add rule ip kube-proxy filter-input ct state new jump endpoints-check
add chain ip kube-proxy filter-output { type filter hook output priority -110 ; }
@ -1226,23 +1223,6 @@ func TestNodePorts(t *testing.T) {
}
}
func TestDropInvalidRule(t *testing.T) {
for _, tcpLiberal := range []bool{false, true} {
t.Run(fmt.Sprintf("tcpLiberal %t", tcpLiberal), func(t *testing.T) {
nft, fp := NewFakeProxier(v1.IPv4Protocol)
fp.conntrackTCPLiberal = tcpLiberal
fp.syncProxyRules()
var expected string
if !tcpLiberal {
expected = "ct state invalid drop"
}
assertNFTablesChainEqual(t, getLine(), nft, kubeForwardChain, expected)
})
}
}
// TestExternalTrafficPolicyLocal tests that traffic to externally-facing IPs does not get
// masqueraded when using Local traffic policy. For traffic from external sources, that
// means it can also only be routed to local endpoints, but for traffic from internal
@ -4268,7 +4248,6 @@ func TestSyncProxyRulesRepeated(t *testing.T) {
add chain ip kube-proxy filter-input { type filter hook input priority -110 ; }
add chain ip kube-proxy filter-output { type filter hook output priority -110 ; }
add chain ip kube-proxy firewall-check
add chain ip kube-proxy forward
add chain ip kube-proxy mark-for-masquerade
add chain ip kube-proxy masquerading
add chain ip kube-proxy nat-output { type nat hook output priority -100 ; }
@ -4281,12 +4260,10 @@ func TestSyncProxyRulesRepeated(t *testing.T) {
add rule ip kube-proxy endpoints-check fib daddr type local ip daddr != 127.0.0.0/8 meta l4proto . th dport vmap @no-endpoint-nodeports
add rule ip kube-proxy filter-prerouting ct state new jump firewall-check
add rule ip kube-proxy filter-forward ct state new jump endpoints-check
add rule ip kube-proxy filter-forward jump forward
add rule ip kube-proxy filter-input ct state new jump endpoints-check
add rule ip kube-proxy filter-output ct state new jump endpoints-check
add rule ip kube-proxy filter-output ct state new jump firewall-check
add rule ip kube-proxy firewall-check ip daddr . meta l4proto . th dport vmap @firewall-ips
add rule ip kube-proxy forward ct state invalid drop
add rule ip kube-proxy mark-for-masquerade mark set mark or 0x4000
add rule ip kube-proxy masquerading mark and 0x4000 == 0 return
add rule ip kube-proxy masquerading mark set mark xor 0x4000

View File

@ -433,13 +433,15 @@ var _ = common.SIGDescribe("Conntrack", func() {
})
// Regression test for #74839, where:
// Packets considered INVALID by conntrack are now dropped. In particular, this fixes
// a problem where spurious retransmits in a long-running TCP connection to a service
// IP could result in the connection being closed with the error "Connection reset by
// peer"
// Regression test for #74839
// Packets considered INVALID by conntrack are not NATed, this can result
// in a problem where spurious retransmits in a long-running TCP connection
// to a service IP ends up with "Connection reset by peer" error.
// Proxy implementations (which leverage conntrack) can either drop packets
// marked INVALID by conntrack or enforce `nf_conntrack_tcp_be_liberal` to
// overcome this.
// xref: https://kubernetes.io/blog/2019/03/29/kube-proxy-subtleties-debugging-an-intermittent-connection-reset/
ginkgo.It("should drop INVALID conntrack entries [Privileged]", func(ctx context.Context) {
ginkgo.It("proxy implementation should not be vulnerable to the invalid conntrack state bug [Privileged]", func(ctx context.Context) {
serverLabel := map[string]string{
"app": "boom-server",
}
@ -535,22 +537,21 @@ var _ = common.SIGDescribe("Conntrack", func() {
e2epod.NewPodClient(fr).CreateSync(ctx, pod)
ginkgo.By("Client pod created")
// The client will open connections against the server
// The server will inject invalid packets
// if conntrack does not drop the invalid packets it will go through without NAT
// so the client will receive an unexpected TCP connection and RST the connection
// the server will log ERROR if that happens
ginkgo.By("checking client pod does not RST the TCP connection because it receives an INVALID packet")
// The client will open connections against the server.
// The server will inject packets with out-of-window sequence numbers and
// if these packets go without NAT client will receive an unexpected TCP
// packet and RST the connection, the server will log ERROR if that happens.
ginkgo.By("checking client pod does not RST the TCP connection because it receives an out-of-window packet")
if err := wait.PollUntilContextTimeout(ctx, 5*time.Second, time.Minute, true, logContainsFn("ERROR", "boom-server")); err == nil {
logs, err := e2epod.GetPodLogs(ctx, cs, ns, "boom-server", "boom-server")
framework.ExpectNoError(err)
framework.Logf("boom-server pod logs: %s", logs)
framework.Failf("boom-server pod received a RST from the client")
framework.Failf("boom-server pod received a RST from the client, enabling nf_conntrack_tcp_be_liberal or dropping packets marked invalid by conntrack might help here.")
}
logs, err := e2epod.GetPodLogs(ctx, cs, ns, "boom-server", "boom-server")
framework.ExpectNoError(err)
if !strings.Contains(string(logs), "connection established") {
if !strings.Contains(logs, "connection established") {
framework.Logf("boom-server pod logs: %s", logs)
framework.Failf("boom-server pod did not send any bad packet to the client")
}