mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-06 02:34:03 +00:00
pkg/proxy/nftables: handle traffic to node ports with no endpoints
NFTables proxy will no longer install drop and reject rules for node port services with no endpoints in chains associated with forward and output hooks. Signed-off-by: Daman Arora <aroradaman@gmail.com>
This commit is contained in:
parent
4b40299133
commit
25a40b1c7c
@ -91,13 +91,9 @@ This is implemented as follows:
|
||||
explicitly before or after any other rules (since they match packets that wouldn't be
|
||||
matched by any other rules). But with kernels before 5.9, `reject` is not allowed in
|
||||
`prerouting`, so we can't just do them in the same place as the source ranges
|
||||
firewall. So we do these checks from `input`, `forward`, and `output`, to cover all
|
||||
three paths. (In fact, we only need to check `@no-endpoint-nodeports` on the `input`
|
||||
hook, but it's easier to just check them both in one place, and this code is likely to
|
||||
be rewritten later anyway. Note that the converse statement "we only need to check
|
||||
`@no-endpoint-services` on the `forward` and `output` hooks" is *not* true, because
|
||||
`@no-endpoint-services` may include externalIPs/LB IPs that are assigned to local
|
||||
interfaces.)
|
||||
firewall. So we do these checks from `input`, `forward`, and `output` for
|
||||
`@no-endpoint-services` and from `input` for `@no-endpoint-nodeports` to cover all
|
||||
the possible paths.
|
||||
|
||||
- Masquerading has to happen in the `postrouting` hook, because "masquerade" means "SNAT
|
||||
to the IP of the interface the packet is going out on", so it has to happen after the
|
||||
|
@ -587,8 +587,13 @@ func tracePacket(t *testing.T, nft *knftables.Fake, sourceIP, protocol, destIP,
|
||||
}
|
||||
}
|
||||
|
||||
// Run filter-forward, skip filter-input as it ought to be fully redundant with the filter-forward chain.
|
||||
tracer.runChain("filter-forward", sourceIP, protocol, destIP, destPort)
|
||||
// Run filter-forward, return if packet is terminated.
|
||||
if tracer.runChain("filter-forward", sourceIP, protocol, destIP, destPort) {
|
||||
return tracer.matches, strings.Join(tracer.outputs, ", "), tracer.markMasq
|
||||
}
|
||||
|
||||
// Run filter-input
|
||||
tracer.runChain("filter-input", sourceIP, protocol, destIP, destPort)
|
||||
|
||||
// Skip filter-output and nat-output as they ought to be fully redundant with the prerouting chains.
|
||||
// Skip nat-postrouting because it only does masquerading and we handle that separately.
|
||||
|
@ -86,10 +86,11 @@ const (
|
||||
clusterIPsSet = "cluster-ips"
|
||||
|
||||
// handling for services with no endpoints
|
||||
endpointsCheckChain = "endpoints-check"
|
||||
noEndpointServicesMap = "no-endpoint-services"
|
||||
noEndpointNodePortsMap = "no-endpoint-nodeports"
|
||||
rejectChain = "reject-chain"
|
||||
serviceEndpointsCheckChain = "service-endpoints-check"
|
||||
nodePortEndpointsCheckChain = "nodeport-endpoints-check"
|
||||
noEndpointServicesMap = "no-endpoint-services"
|
||||
noEndpointNodePortsMap = "no-endpoint-nodeports"
|
||||
rejectChain = "reject-chain"
|
||||
|
||||
// handling traffic to unallocated ClusterIPs and undefined ports of ClusterIPs
|
||||
clusterIPsCheckChain = "cluster-ips-check"
|
||||
@ -353,9 +354,10 @@ var nftablesJumpChains = []nftablesJumpChain{
|
||||
// We can't jump to endpointsCheckChain from filter-prerouting like
|
||||
// firewallCheckChain because reject action is only valid in chains using the
|
||||
// input, forward or output hooks.
|
||||
{endpointsCheckChain, filterInputChain, "ct state new"},
|
||||
{endpointsCheckChain, filterForwardChain, "ct state new"},
|
||||
{endpointsCheckChain, filterOutputChain, "ct state new"},
|
||||
{nodePortEndpointsCheckChain, filterInputChain, "ct state new"},
|
||||
{serviceEndpointsCheckChain, filterInputChain, "ct state new"},
|
||||
{serviceEndpointsCheckChain, filterForwardChain, "ct state new"},
|
||||
{serviceEndpointsCheckChain, filterOutputChain, "ct state new"},
|
||||
|
||||
{firewallCheckChain, filterPreroutingChain, "ct state new"},
|
||||
{firewallCheckChain, filterOutputChain, "ct state new"},
|
||||
@ -541,7 +543,7 @@ func (proxier *Proxier) setupNFTables(tx *knftables.Transaction) {
|
||||
})
|
||||
|
||||
tx.Add(&knftables.Rule{
|
||||
Chain: endpointsCheckChain,
|
||||
Chain: serviceEndpointsCheckChain,
|
||||
Rule: knftables.Concat(
|
||||
ipX, "daddr", ".", "meta l4proto", ".", "th dport",
|
||||
"vmap", "@", noEndpointServicesMap,
|
||||
@ -550,9 +552,8 @@ func (proxier *Proxier) setupNFTables(tx *knftables.Transaction) {
|
||||
|
||||
if proxier.nodePortAddresses.MatchAll() {
|
||||
tx.Add(&knftables.Rule{
|
||||
Chain: endpointsCheckChain,
|
||||
Chain: nodePortEndpointsCheckChain,
|
||||
Rule: knftables.Concat(
|
||||
"fib daddr type local",
|
||||
noLocalhost,
|
||||
"meta l4proto . th dport",
|
||||
"vmap", "@", noEndpointNodePortsMap,
|
||||
@ -560,7 +561,7 @@ func (proxier *Proxier) setupNFTables(tx *knftables.Transaction) {
|
||||
})
|
||||
} else {
|
||||
tx.Add(&knftables.Rule{
|
||||
Chain: endpointsCheckChain,
|
||||
Chain: nodePortEndpointsCheckChain,
|
||||
Rule: knftables.Concat(
|
||||
ipX, "daddr", "@", nodePortIPsSet,
|
||||
"meta l4proto . th dport",
|
||||
|
@ -513,12 +513,13 @@ func TestOverallNFTablesRules(t *testing.T) {
|
||||
add chain ip kube-proxy filter-prerouting { type filter hook prerouting priority -110 ; }
|
||||
add rule ip kube-proxy filter-prerouting ct state new jump firewall-check
|
||||
add chain ip kube-proxy filter-forward { type filter hook forward priority -110 ; }
|
||||
add rule ip kube-proxy filter-forward ct state new jump endpoints-check
|
||||
add rule ip kube-proxy filter-forward ct state new jump service-endpoints-check
|
||||
add rule ip kube-proxy filter-forward ct state new jump cluster-ips-check
|
||||
add chain ip kube-proxy filter-input { type filter hook input priority -110 ; }
|
||||
add rule ip kube-proxy filter-input ct state new jump endpoints-check
|
||||
add rule ip kube-proxy filter-input ct state new jump nodeport-endpoints-check
|
||||
add rule ip kube-proxy filter-input ct state new jump service-endpoints-check
|
||||
add chain ip kube-proxy filter-output { type filter hook output priority -110 ; }
|
||||
add rule ip kube-proxy filter-output ct state new jump endpoints-check
|
||||
add rule ip kube-proxy filter-output ct state new jump service-endpoints-check
|
||||
add rule ip kube-proxy filter-output ct state new jump firewall-check
|
||||
add chain ip kube-proxy filter-output-post-dnat { type filter hook output priority -90 ; }
|
||||
add rule ip kube-proxy filter-output-post-dnat ct state new jump cluster-ips-check
|
||||
@ -544,9 +545,10 @@ func TestOverallNFTablesRules(t *testing.T) {
|
||||
add map ip kube-proxy no-endpoint-services { type ipv4_addr . inet_proto . inet_service : verdict ; comment "vmap to drop or reject packets to services with no endpoints" ; }
|
||||
add map ip kube-proxy no-endpoint-nodeports { type inet_proto . inet_service : verdict ; comment "vmap to drop or reject packets to service nodeports with no endpoints" ; }
|
||||
|
||||
add chain ip kube-proxy endpoints-check
|
||||
add rule ip kube-proxy endpoints-check ip daddr . meta l4proto . th dport vmap @no-endpoint-services
|
||||
add rule ip kube-proxy endpoints-check fib daddr type local ip daddr != 127.0.0.0/8 meta l4proto . th dport vmap @no-endpoint-nodeports
|
||||
add chain ip kube-proxy nodeport-endpoints-check
|
||||
add rule ip kube-proxy nodeport-endpoints-check ip daddr != 127.0.0.0/8 meta l4proto . th dport vmap @no-endpoint-nodeports
|
||||
add chain ip kube-proxy service-endpoints-check
|
||||
add rule ip kube-proxy service-endpoints-check ip daddr . meta l4proto . th dport vmap @no-endpoint-services
|
||||
|
||||
add map ip kube-proxy service-ips { type ipv4_addr . inet_proto . inet_service : verdict ; comment "ClusterIP, ExternalIP and LoadBalancer IP traffic" ; }
|
||||
add map ip kube-proxy service-nodeports { type inet_proto . inet_service : verdict ; comment "NodePort traffic" ; }
|
||||
@ -4268,7 +4270,6 @@ func TestSyncProxyRulesRepeated(t *testing.T) {
|
||||
add table ip kube-proxy { comment "rules for kube-proxy" ; }
|
||||
|
||||
add chain ip kube-proxy cluster-ips-check
|
||||
add chain ip kube-proxy endpoints-check
|
||||
add chain ip kube-proxy filter-prerouting { type filter hook prerouting priority -110 ; }
|
||||
add chain ip kube-proxy filter-forward { type filter hook forward priority -110 ; }
|
||||
add chain ip kube-proxy filter-input { type filter hook input priority -110 ; }
|
||||
@ -4280,18 +4281,19 @@ func TestSyncProxyRulesRepeated(t *testing.T) {
|
||||
add chain ip kube-proxy nat-output { type nat hook output priority -100 ; }
|
||||
add chain ip kube-proxy nat-postrouting { type nat hook postrouting priority 100 ; }
|
||||
add chain ip kube-proxy nat-prerouting { type nat hook prerouting priority -100 ; }
|
||||
add chain ip kube-proxy nodeport-endpoints-check
|
||||
add chain ip kube-proxy reject-chain { comment "helper for @no-endpoint-services / @no-endpoint-nodeports" ; }
|
||||
add chain ip kube-proxy services
|
||||
add chain ip kube-proxy service-endpoints-check
|
||||
|
||||
add rule ip kube-proxy cluster-ips-check ip daddr @cluster-ips reject comment "Reject traffic to invalid ports of ClusterIPs"
|
||||
add rule ip kube-proxy cluster-ips-check ip daddr { 172.30.0.0/16 } drop comment "Drop traffic to unallocated ClusterIPs"
|
||||
add rule ip kube-proxy endpoints-check ip daddr . meta l4proto . th dport vmap @no-endpoint-services
|
||||
add rule ip kube-proxy endpoints-check fib daddr type local ip daddr != 127.0.0.0/8 meta l4proto . th dport vmap @no-endpoint-nodeports
|
||||
add rule ip kube-proxy filter-prerouting ct state new jump firewall-check
|
||||
add rule ip kube-proxy filter-forward ct state new jump endpoints-check
|
||||
add rule ip kube-proxy filter-forward ct state new jump service-endpoints-check
|
||||
add rule ip kube-proxy filter-forward ct state new jump cluster-ips-check
|
||||
add rule ip kube-proxy filter-input ct state new jump endpoints-check
|
||||
add rule ip kube-proxy filter-output ct state new jump endpoints-check
|
||||
add rule ip kube-proxy filter-input ct state new jump nodeport-endpoints-check
|
||||
add rule ip kube-proxy filter-input ct state new jump service-endpoints-check
|
||||
add rule ip kube-proxy filter-output ct state new jump service-endpoints-check
|
||||
add rule ip kube-proxy filter-output ct state new jump firewall-check
|
||||
add rule ip kube-proxy filter-output-post-dnat ct state new jump cluster-ips-check
|
||||
add rule ip kube-proxy firewall-check ip daddr . meta l4proto . th dport vmap @firewall-ips
|
||||
@ -4302,9 +4304,11 @@ func TestSyncProxyRulesRepeated(t *testing.T) {
|
||||
add rule ip kube-proxy nat-output jump services
|
||||
add rule ip kube-proxy nat-postrouting jump masquerading
|
||||
add rule ip kube-proxy nat-prerouting jump services
|
||||
add rule ip kube-proxy nodeport-endpoints-check ip daddr != 127.0.0.0/8 meta l4proto . th dport vmap @no-endpoint-nodeports
|
||||
add rule ip kube-proxy reject-chain reject
|
||||
add rule ip kube-proxy services ip daddr . meta l4proto . th dport vmap @service-ips
|
||||
add rule ip kube-proxy services fib daddr type local ip daddr != 127.0.0.0/8 meta l4proto . th dport vmap @service-nodeports
|
||||
add rule ip kube-proxy service-endpoints-check ip daddr . meta l4proto . th dport vmap @no-endpoint-services
|
||||
|
||||
add set ip kube-proxy cluster-ips { type ipv4_addr ; comment "Active ClusterIPs" ; }
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user