diff --git a/pkg/proxy/nftables/README.md b/pkg/proxy/nftables/README.md index 94488ce756d..57e0369ad66 100644 --- a/pkg/proxy/nftables/README.md +++ b/pkg/proxy/nftables/README.md @@ -91,13 +91,9 @@ This is implemented as follows: explicitly before or after any other rules (since they match packets that wouldn't be matched by any other rules). But with kernels before 5.9, `reject` is not allowed in `prerouting`, so we can't just do them in the same place as the source ranges - firewall. So we do these checks from `input`, `forward`, and `output`, to cover all - three paths. (In fact, we only need to check `@no-endpoint-nodeports` on the `input` - hook, but it's easier to just check them both in one place, and this code is likely to - be rewritten later anyway. Note that the converse statement "we only need to check - `@no-endpoint-services` on the `forward` and `output` hooks" is *not* true, because - `@no-endpoint-services` may include externalIPs/LB IPs that are assigned to local - interfaces.) + firewall. So we do these checks from `input`, `forward`, and `output` for + `@no-endpoint-services` and from `input` for `@no-endpoint-nodeports` to cover all + the possible paths. - Masquerading has to happen in the `postrouting` hook, because "masquerade" means "SNAT to the IP of the interface the packet is going out on", so it has to happen after the diff --git a/pkg/proxy/nftables/helpers_test.go b/pkg/proxy/nftables/helpers_test.go index bba8f399c1b..f3b047500dc 100644 --- a/pkg/proxy/nftables/helpers_test.go +++ b/pkg/proxy/nftables/helpers_test.go @@ -587,8 +587,13 @@ func tracePacket(t *testing.T, nft *knftables.Fake, sourceIP, protocol, destIP, } } - // Run filter-forward, skip filter-input as it ought to be fully redundant with the filter-forward chain. - tracer.runChain("filter-forward", sourceIP, protocol, destIP, destPort) + // Run filter-forward, return if packet is terminated. + if tracer.runChain("filter-forward", sourceIP, protocol, destIP, destPort) { + return tracer.matches, strings.Join(tracer.outputs, ", "), tracer.markMasq + } + + // Run filter-input + tracer.runChain("filter-input", sourceIP, protocol, destIP, destPort) // Skip filter-output and nat-output as they ought to be fully redundant with the prerouting chains. // Skip nat-postrouting because it only does masquerading and we handle that separately. diff --git a/pkg/proxy/nftables/proxier.go b/pkg/proxy/nftables/proxier.go index 25e9aabc8e1..b0504295980 100644 --- a/pkg/proxy/nftables/proxier.go +++ b/pkg/proxy/nftables/proxier.go @@ -86,10 +86,11 @@ const ( clusterIPsSet = "cluster-ips" // handling for services with no endpoints - endpointsCheckChain = "endpoints-check" - noEndpointServicesMap = "no-endpoint-services" - noEndpointNodePortsMap = "no-endpoint-nodeports" - rejectChain = "reject-chain" + serviceEndpointsCheckChain = "service-endpoints-check" + nodePortEndpointsCheckChain = "nodeport-endpoints-check" + noEndpointServicesMap = "no-endpoint-services" + noEndpointNodePortsMap = "no-endpoint-nodeports" + rejectChain = "reject-chain" // handling traffic to unallocated ClusterIPs and undefined ports of ClusterIPs clusterIPsCheckChain = "cluster-ips-check" @@ -353,9 +354,10 @@ var nftablesJumpChains = []nftablesJumpChain{ // We can't jump to endpointsCheckChain from filter-prerouting like // firewallCheckChain because reject action is only valid in chains using the // input, forward or output hooks. - {endpointsCheckChain, filterInputChain, "ct state new"}, - {endpointsCheckChain, filterForwardChain, "ct state new"}, - {endpointsCheckChain, filterOutputChain, "ct state new"}, + {nodePortEndpointsCheckChain, filterInputChain, "ct state new"}, + {serviceEndpointsCheckChain, filterInputChain, "ct state new"}, + {serviceEndpointsCheckChain, filterForwardChain, "ct state new"}, + {serviceEndpointsCheckChain, filterOutputChain, "ct state new"}, {firewallCheckChain, filterPreroutingChain, "ct state new"}, {firewallCheckChain, filterOutputChain, "ct state new"}, @@ -541,7 +543,7 @@ func (proxier *Proxier) setupNFTables(tx *knftables.Transaction) { }) tx.Add(&knftables.Rule{ - Chain: endpointsCheckChain, + Chain: serviceEndpointsCheckChain, Rule: knftables.Concat( ipX, "daddr", ".", "meta l4proto", ".", "th dport", "vmap", "@", noEndpointServicesMap, @@ -550,9 +552,8 @@ func (proxier *Proxier) setupNFTables(tx *knftables.Transaction) { if proxier.nodePortAddresses.MatchAll() { tx.Add(&knftables.Rule{ - Chain: endpointsCheckChain, + Chain: nodePortEndpointsCheckChain, Rule: knftables.Concat( - "fib daddr type local", noLocalhost, "meta l4proto . th dport", "vmap", "@", noEndpointNodePortsMap, @@ -560,7 +561,7 @@ func (proxier *Proxier) setupNFTables(tx *knftables.Transaction) { }) } else { tx.Add(&knftables.Rule{ - Chain: endpointsCheckChain, + Chain: nodePortEndpointsCheckChain, Rule: knftables.Concat( ipX, "daddr", "@", nodePortIPsSet, "meta l4proto . th dport", diff --git a/pkg/proxy/nftables/proxier_test.go b/pkg/proxy/nftables/proxier_test.go index 49ac7dc0cd3..c45c96a50dd 100644 --- a/pkg/proxy/nftables/proxier_test.go +++ b/pkg/proxy/nftables/proxier_test.go @@ -513,12 +513,13 @@ func TestOverallNFTablesRules(t *testing.T) { add chain ip kube-proxy filter-prerouting { type filter hook prerouting priority -110 ; } add rule ip kube-proxy filter-prerouting ct state new jump firewall-check add chain ip kube-proxy filter-forward { type filter hook forward priority -110 ; } - add rule ip kube-proxy filter-forward ct state new jump endpoints-check + add rule ip kube-proxy filter-forward ct state new jump service-endpoints-check add rule ip kube-proxy filter-forward ct state new jump cluster-ips-check add chain ip kube-proxy filter-input { type filter hook input priority -110 ; } - add rule ip kube-proxy filter-input ct state new jump endpoints-check + add rule ip kube-proxy filter-input ct state new jump nodeport-endpoints-check + add rule ip kube-proxy filter-input ct state new jump service-endpoints-check add chain ip kube-proxy filter-output { type filter hook output priority -110 ; } - add rule ip kube-proxy filter-output ct state new jump endpoints-check + add rule ip kube-proxy filter-output ct state new jump service-endpoints-check add rule ip kube-proxy filter-output ct state new jump firewall-check add chain ip kube-proxy filter-output-post-dnat { type filter hook output priority -90 ; } add rule ip kube-proxy filter-output-post-dnat ct state new jump cluster-ips-check @@ -544,9 +545,10 @@ func TestOverallNFTablesRules(t *testing.T) { add map ip kube-proxy no-endpoint-services { type ipv4_addr . inet_proto . inet_service : verdict ; comment "vmap to drop or reject packets to services with no endpoints" ; } add map ip kube-proxy no-endpoint-nodeports { type inet_proto . inet_service : verdict ; comment "vmap to drop or reject packets to service nodeports with no endpoints" ; } - add chain ip kube-proxy endpoints-check - add rule ip kube-proxy endpoints-check ip daddr . meta l4proto . th dport vmap @no-endpoint-services - add rule ip kube-proxy endpoints-check fib daddr type local ip daddr != 127.0.0.0/8 meta l4proto . th dport vmap @no-endpoint-nodeports + add chain ip kube-proxy nodeport-endpoints-check + add rule ip kube-proxy nodeport-endpoints-check ip daddr != 127.0.0.0/8 meta l4proto . th dport vmap @no-endpoint-nodeports + add chain ip kube-proxy service-endpoints-check + add rule ip kube-proxy service-endpoints-check ip daddr . meta l4proto . th dport vmap @no-endpoint-services add map ip kube-proxy service-ips { type ipv4_addr . inet_proto . inet_service : verdict ; comment "ClusterIP, ExternalIP and LoadBalancer IP traffic" ; } add map ip kube-proxy service-nodeports { type inet_proto . inet_service : verdict ; comment "NodePort traffic" ; } @@ -4268,7 +4270,6 @@ func TestSyncProxyRulesRepeated(t *testing.T) { add table ip kube-proxy { comment "rules for kube-proxy" ; } add chain ip kube-proxy cluster-ips-check - add chain ip kube-proxy endpoints-check add chain ip kube-proxy filter-prerouting { type filter hook prerouting priority -110 ; } add chain ip kube-proxy filter-forward { type filter hook forward priority -110 ; } add chain ip kube-proxy filter-input { type filter hook input priority -110 ; } @@ -4280,18 +4281,19 @@ func TestSyncProxyRulesRepeated(t *testing.T) { add chain ip kube-proxy nat-output { type nat hook output priority -100 ; } add chain ip kube-proxy nat-postrouting { type nat hook postrouting priority 100 ; } add chain ip kube-proxy nat-prerouting { type nat hook prerouting priority -100 ; } + add chain ip kube-proxy nodeport-endpoints-check add chain ip kube-proxy reject-chain { comment "helper for @no-endpoint-services / @no-endpoint-nodeports" ; } add chain ip kube-proxy services + add chain ip kube-proxy service-endpoints-check add rule ip kube-proxy cluster-ips-check ip daddr @cluster-ips reject comment "Reject traffic to invalid ports of ClusterIPs" add rule ip kube-proxy cluster-ips-check ip daddr { 172.30.0.0/16 } drop comment "Drop traffic to unallocated ClusterIPs" - add rule ip kube-proxy endpoints-check ip daddr . meta l4proto . th dport vmap @no-endpoint-services - add rule ip kube-proxy endpoints-check fib daddr type local ip daddr != 127.0.0.0/8 meta l4proto . th dport vmap @no-endpoint-nodeports add rule ip kube-proxy filter-prerouting ct state new jump firewall-check - add rule ip kube-proxy filter-forward ct state new jump endpoints-check + add rule ip kube-proxy filter-forward ct state new jump service-endpoints-check add rule ip kube-proxy filter-forward ct state new jump cluster-ips-check - add rule ip kube-proxy filter-input ct state new jump endpoints-check - add rule ip kube-proxy filter-output ct state new jump endpoints-check + add rule ip kube-proxy filter-input ct state new jump nodeport-endpoints-check + add rule ip kube-proxy filter-input ct state new jump service-endpoints-check + add rule ip kube-proxy filter-output ct state new jump service-endpoints-check add rule ip kube-proxy filter-output ct state new jump firewall-check add rule ip kube-proxy filter-output-post-dnat ct state new jump cluster-ips-check add rule ip kube-proxy firewall-check ip daddr . meta l4proto . th dport vmap @firewall-ips @@ -4302,9 +4304,11 @@ func TestSyncProxyRulesRepeated(t *testing.T) { add rule ip kube-proxy nat-output jump services add rule ip kube-proxy nat-postrouting jump masquerading add rule ip kube-proxy nat-prerouting jump services + add rule ip kube-proxy nodeport-endpoints-check ip daddr != 127.0.0.0/8 meta l4proto . th dport vmap @no-endpoint-nodeports add rule ip kube-proxy reject-chain reject add rule ip kube-proxy services ip daddr . meta l4proto . th dport vmap @service-ips add rule ip kube-proxy services fib daddr type local ip daddr != 127.0.0.0/8 meta l4proto . th dport vmap @service-nodeports + add rule ip kube-proxy service-endpoints-check ip daddr . meta l4proto . th dport vmap @no-endpoint-services add set ip kube-proxy cluster-ips { type ipv4_addr ; comment "Active ClusterIPs" ; }