diff --git a/pkg/proxy/nftables/helpers_test.go b/pkg/proxy/nftables/helpers_test.go index 0b727c8cdd3..3a681e13cc9 100644 --- a/pkg/proxy/nftables/helpers_test.go +++ b/pkg/proxy/nftables/helpers_test.go @@ -583,31 +583,22 @@ func (tracer *nftablesTracer) runChain(chname, sourceIP, protocol, destIP, destP func tracePacket(t *testing.T, nft *knftables.Fake, sourceIP, protocol, destIP, destPort string, nodeIPs []string) ([]string, string, bool) { tracer := newNFTablesTracer(t, nft, nodeIPs) - // Collect "base chains" (ie, the chains that are run by netfilter directly rather - // than only being run when they are jumped to). Skip postrouting because it only - // does masquerading and we handle that separately. - var baseChains []string - for chname, ch := range nft.Table.Chains { - if ch.Priority != nil && chname != "nat-postrouting" { - baseChains = append(baseChains, chname) - } + // filter-prerouting goes first, then nat-prerouting if not terminated. + if tracer.runChain("filter-prerouting", sourceIP, protocol, destIP, destPort) { + return tracer.matches, strings.Join(tracer.outputs, ", "), tracer.markMasq + } + tracer.runChain("nat-prerouting", sourceIP, protocol, destIP, destPort) + // After the prerouting rules run, pending DNATs are processed (which would affect + // the destination IP that later rules match against). + if len(tracer.outputs) != 0 { + destIP = strings.Split(tracer.outputs[0], ":")[0] } - // Sort by priority - sort.Slice(baseChains, func(i, j int) bool { - // FIXME: IPv4 vs IPv6 doesn't actually matter here - iprio, _ := knftables.ParsePriority(knftables.IPv4Family, string(*nft.Table.Chains[baseChains[i]].Priority)) - jprio, _ := knftables.ParsePriority(knftables.IPv4Family, string(*nft.Table.Chains[baseChains[j]].Priority)) - return iprio < jprio - }) - - for _, chname := range baseChains { - terminated := tracer.runChain(chname, sourceIP, protocol, destIP, destPort) - if terminated { - break - } - } + // Run filter-forward, skip filter-input as it ought to be fully redundant with the filter-forward chain. + tracer.runChain("filter-forward", sourceIP, protocol, destIP, destPort) + // Skip filter-output and nat-output as they ought to be fully redundant with the prerouting chains. + // Skip nat-postrouting because it only does masquerading and we handle that separately. return tracer.matches, strings.Join(tracer.outputs, ", "), tracer.markMasq } diff --git a/pkg/proxy/nftables/proxier.go b/pkg/proxy/nftables/proxier.go index d6281089007..4328663a31b 100644 --- a/pkg/proxy/nftables/proxier.go +++ b/pkg/proxy/nftables/proxier.go @@ -328,9 +328,10 @@ type nftablesBaseChain struct { var nftablesBaseChains = []nftablesBaseChain{ // We want our filtering rules to operate on pre-DNAT dest IPs, so our filter // chains have to run before DNAT. - {"filter-input", knftables.FilterType, knftables.InputHook, knftables.DNATPriority + "-1"}, - {"filter-forward", knftables.FilterType, knftables.ForwardHook, knftables.DNATPriority + "-1"}, - {"filter-output", knftables.FilterType, knftables.OutputHook, knftables.DNATPriority + "-1"}, + {"filter-prerouting", knftables.FilterType, knftables.PreroutingHook, knftables.DNATPriority + "-10"}, + {"filter-input", knftables.FilterType, knftables.InputHook, knftables.DNATPriority + "-10"}, + {"filter-forward", knftables.FilterType, knftables.ForwardHook, knftables.DNATPriority + "-10"}, + {"filter-output", knftables.FilterType, knftables.OutputHook, knftables.DNATPriority + "-10"}, {"nat-prerouting", knftables.NATType, knftables.PreroutingHook, knftables.DNATPriority}, {"nat-output", knftables.NATType, knftables.OutputHook, knftables.DNATPriority}, {"nat-postrouting", knftables.NATType, knftables.PostroutingHook, knftables.SNATPriority}, @@ -346,15 +347,17 @@ type nftablesJumpChain struct { } var nftablesJumpChains = []nftablesJumpChain{ + // We can't jump to kubeEndpointsCheckChain from filter-prerouting like + // kubeFirewallCheckChain because reject action is only valid in chains using the + // input, forward or output hooks. {kubeEndpointsCheckChain, "filter-input", "ct state new"}, {kubeEndpointsCheckChain, "filter-forward", "ct state new"}, {kubeEndpointsCheckChain, "filter-output", "ct state new"}, {kubeForwardChain, "filter-forward", ""}, - {kubeFirewallCheckChain, "filter-input", "ct state new"}, + {kubeFirewallCheckChain, "filter-prerouting", "ct state new"}, {kubeFirewallCheckChain, "filter-output", "ct state new"}, - {kubeFirewallCheckChain, "filter-forward", "ct state new"}, {kubeServicesChain, "nat-output", ""}, {kubeServicesChain, "nat-prerouting", ""}, diff --git a/pkg/proxy/nftables/proxier_test.go b/pkg/proxy/nftables/proxier_test.go index e67c89a9a7a..c2bec90c653 100644 --- a/pkg/proxy/nftables/proxier_test.go +++ b/pkg/proxy/nftables/proxier_test.go @@ -508,14 +508,14 @@ func TestOverallNFTablesRules(t *testing.T) { add rule ip kube-proxy masquerading mark set mark xor 0x4000 add rule ip kube-proxy masquerading masquerade fully-random add chain ip kube-proxy services - add chain ip kube-proxy filter-forward { type filter hook forward priority -101 ; } + add chain ip kube-proxy filter-prerouting { type filter hook prerouting priority -110 ; } + add rule ip kube-proxy filter-prerouting ct state new jump firewall-check + add chain ip kube-proxy filter-forward { type filter hook forward priority -110 ; } add rule ip kube-proxy filter-forward ct state new jump endpoints-check add rule ip kube-proxy filter-forward jump forward - add rule ip kube-proxy filter-forward ct state new jump firewall-check - add chain ip kube-proxy filter-input { type filter hook input priority -101 ; } + add chain ip kube-proxy filter-input { type filter hook input priority -110 ; } add rule ip kube-proxy filter-input ct state new jump endpoints-check - add rule ip kube-proxy filter-input ct state new jump firewall-check - add chain ip kube-proxy filter-output { type filter hook output priority -101 ; } + add chain ip kube-proxy filter-output { type filter hook output priority -110 ; } add rule ip kube-proxy filter-output ct state new jump endpoints-check add rule ip kube-proxy filter-output ct state new jump firewall-check add chain ip kube-proxy nat-output { type nat hook output priority -100 ; } @@ -4263,9 +4263,10 @@ func TestSyncProxyRulesRepeated(t *testing.T) { add table ip kube-proxy { comment "rules for kube-proxy" ; } add chain ip kube-proxy endpoints-check - add chain ip kube-proxy filter-forward { type filter hook forward priority -101 ; } - add chain ip kube-proxy filter-input { type filter hook input priority -101 ; } - add chain ip kube-proxy filter-output { type filter hook output priority -101 ; } + add chain ip kube-proxy filter-prerouting { type filter hook prerouting priority -110 ; } + add chain ip kube-proxy filter-forward { type filter hook forward priority -110 ; } + add chain ip kube-proxy filter-input { type filter hook input priority -110 ; } + add chain ip kube-proxy filter-output { type filter hook output priority -110 ; } add chain ip kube-proxy firewall-check add chain ip kube-proxy forward add chain ip kube-proxy mark-for-masquerade @@ -4278,11 +4279,10 @@ func TestSyncProxyRulesRepeated(t *testing.T) { add rule ip kube-proxy endpoints-check ip daddr . meta l4proto . th dport vmap @no-endpoint-services add rule ip kube-proxy endpoints-check fib daddr type local ip daddr != 127.0.0.0/8 meta l4proto . th dport vmap @no-endpoint-nodeports + add rule ip kube-proxy filter-prerouting ct state new jump firewall-check add rule ip kube-proxy filter-forward ct state new jump endpoints-check add rule ip kube-proxy filter-forward jump forward - add rule ip kube-proxy filter-forward ct state new jump firewall-check add rule ip kube-proxy filter-input ct state new jump endpoints-check - add rule ip kube-proxy filter-input ct state new jump firewall-check add rule ip kube-proxy filter-output ct state new jump endpoints-check add rule ip kube-proxy filter-output ct state new jump firewall-check add rule ip kube-proxy firewall-check ip daddr . meta l4proto . th dport vmap @firewall-ips