From 28253f603077c7fdf035d010b7142a2bcbeaa9fe Mon Sep 17 00:00:00 2001 From: Dan Winship Date: Sat, 11 Jun 2022 13:30:51 -0400 Subject: [PATCH] proxy/ipvs: Use DROP directly rather than KUBE-MARK-DROP The ipvs proxier was figuring out LoadBalancerSourceRanges matches in the nat table and using KUBE-MARK-DROP to mark unmatched packets to be dropped later. But with ipvs, unlike with iptables, DNAT happens after the packet is "delivered" to the dummy interface, so the packet will still be unmodified when it reaches the filter table (the first time) so there's no reason to split the work between the nat and filter tables; we can just do it all from the filter table and call DROP directly. Before: - KUBE-LOAD-BALANCER (in nat) uses kubeLoadBalancerFWSet to match LB traffic for services using LoadBalancerSourceRanges, and sends it to KUBE-FIREWALL. - KUBE-FIREWALL uses kubeLoadBalancerSourceCIDRSet and kubeLoadBalancerSourceIPSet to match allowed source/dest combos and calls "-j RETURN". - All remaining traffic that doesn't escape KUBE-FIREWALL is sent to KUBE-MARK-DROP. - Traffic sent to KUBE-MARK-DROP later gets dropped by chains in filter created by kubelet. After: - All INPUT and FORWARD traffic gets routed to KUBE-PROXY-FIREWALL (in filter). (We don't use "KUBE-FIREWALL" any more because there's already a chain in filter by that name that belongs to kubelet.) - KUBE-PROXY-FIREWALL sends traffic matching kubeLoadbalancerFWSet to KUBE-SOURCE-RANGES-FIREWALL - KUBE-SOURCE-RANGES-FIREWALL uses kubeLoadBalancerSourceCIDRSet and kubeLoadBalancerSourceIPSet to match allowed source/dest combos and calls "-j RETURN". - All remaining traffic that doesn't escape KUBE-SOURCE-RANGES-FIREWALL is dropped (directly via "-j DROP"). - (KUBE-LOAD-BALANCER in nat is now used only to set up masquerading) --- pkg/proxy/ipvs/proxier.go | 48 ++++++++++++++-------------------- pkg/proxy/ipvs/proxier_test.go | 13 +++++---- 2 files changed, 25 insertions(+), 36 deletions(-) diff --git a/pkg/proxy/ipvs/proxier.go b/pkg/proxy/ipvs/proxier.go index ddee133472e..74e2f1c471c 100644 --- a/pkg/proxy/ipvs/proxier.go +++ b/pkg/proxy/ipvs/proxier.go @@ -63,8 +63,11 @@ const ( // kubeServicesChain is the services portal chain kubeServicesChain utiliptables.Chain = "KUBE-SERVICES" - // kubeFirewallChain is the kubernetes firewall chain. - kubeFirewallChain utiliptables.Chain = "KUBE-FIREWALL" + // kubeProxyFirewallChain is the kube-proxy firewall chain. + kubeProxyFirewallChain utiliptables.Chain = "KUBE-PROXY-FIREWALL" + + // kubeSourceRangesFirewallChain is the firewall subchain for LoadBalancerSourceRanges. + kubeSourceRangesFirewallChain utiliptables.Chain = "KUBE-SOURCE-RANGES-FIREWALL" // kubePostroutingChain is the kubernetes postrouting chain kubePostroutingChain utiliptables.Chain = "KUBE-POSTROUTING" @@ -75,9 +78,6 @@ const ( // kubeNodePortChain is the kubernetes node port chain kubeNodePortChain utiliptables.Chain = "KUBE-NODE-PORT" - // KubeMarkDropChain is the mark-for-drop chain - kubeMarkDropChain utiliptables.Chain = "KUBE-MARK-DROP" - // kubeForwardChain is the kubernetes forward chain kubeForwardChain utiliptables.Chain = "KUBE-FORWARD" @@ -110,6 +110,8 @@ var iptablesJumpChain = []struct { {utiliptables.TableNAT, utiliptables.ChainPostrouting, kubePostroutingChain, "kubernetes postrouting rules"}, {utiliptables.TableFilter, utiliptables.ChainForward, kubeForwardChain, "kubernetes forwarding rules"}, {utiliptables.TableFilter, utiliptables.ChainInput, kubeNodePortChain, "kubernetes health check rules"}, + {utiliptables.TableFilter, utiliptables.ChainInput, kubeProxyFirewallChain, "kube-proxy firewall rules"}, + {utiliptables.TableFilter, utiliptables.ChainForward, kubeProxyFirewallChain, "kube-proxy firewall rules"}, } var iptablesChains = []struct { @@ -118,19 +120,13 @@ var iptablesChains = []struct { }{ {utiliptables.TableNAT, kubeServicesChain}, {utiliptables.TableNAT, kubePostroutingChain}, - {utiliptables.TableNAT, kubeFirewallChain}, {utiliptables.TableNAT, kubeNodePortChain}, {utiliptables.TableNAT, kubeLoadBalancerChain}, {utiliptables.TableNAT, kubeMarkMasqChain}, {utiliptables.TableFilter, kubeForwardChain}, {utiliptables.TableFilter, kubeNodePortChain}, -} - -var iptablesEnsureChains = []struct { - table utiliptables.Table - chain utiliptables.Chain -}{ - {utiliptables.TableNAT, kubeMarkDropChain}, + {utiliptables.TableFilter, kubeProxyFirewallChain}, + {utiliptables.TableFilter, kubeSourceRangesFirewallChain}, } var iptablesCleanupChains = []struct { @@ -139,11 +135,12 @@ var iptablesCleanupChains = []struct { }{ {utiliptables.TableNAT, kubeServicesChain}, {utiliptables.TableNAT, kubePostroutingChain}, - {utiliptables.TableNAT, kubeFirewallChain}, {utiliptables.TableNAT, kubeNodePortChain}, {utiliptables.TableNAT, kubeLoadBalancerChain}, {utiliptables.TableFilter, kubeForwardChain}, {utiliptables.TableFilter, kubeNodePortChain}, + {utiliptables.TableFilter, kubeProxyFirewallChain}, + {utiliptables.TableFilter, kubeSourceRangesFirewallChain}, } // ipsetInfo is all ipset we needed in ipvs proxier @@ -185,9 +182,6 @@ var ipsetWithIptablesChain = []struct { }{ {kubeLoopBackIPSet, utiliptables.TableNAT, string(kubePostroutingChain), "MASQUERADE", "dst,dst,src", ""}, {kubeLoadBalancerSet, utiliptables.TableNAT, string(kubeServicesChain), string(kubeLoadBalancerChain), "dst,dst", ""}, - {kubeLoadBalancerFWSet, utiliptables.TableNAT, string(kubeLoadBalancerChain), string(kubeFirewallChain), "dst,dst", ""}, - {kubeLoadBalancerSourceCIDRSet, utiliptables.TableNAT, string(kubeFirewallChain), "RETURN", "dst,dst,src", ""}, - {kubeLoadBalancerSourceIPSet, utiliptables.TableNAT, string(kubeFirewallChain), "RETURN", "dst,dst,src", ""}, {kubeLoadBalancerLocalSet, utiliptables.TableNAT, string(kubeLoadBalancerChain), "RETURN", "dst,dst", ""}, {kubeNodePortLocalSetTCP, utiliptables.TableNAT, string(kubeNodePortChain), "RETURN", "dst", utilipset.ProtocolTCP}, {kubeNodePortSetTCP, utiliptables.TableNAT, string(kubeNodePortChain), string(kubeMarkMasqChain), "dst", utilipset.ProtocolTCP}, @@ -195,6 +189,10 @@ var ipsetWithIptablesChain = []struct { {kubeNodePortSetUDP, utiliptables.TableNAT, string(kubeNodePortChain), string(kubeMarkMasqChain), "dst", utilipset.ProtocolUDP}, {kubeNodePortLocalSetSCTP, utiliptables.TableNAT, string(kubeNodePortChain), "RETURN", "dst,dst", utilipset.ProtocolSCTP}, {kubeNodePortSetSCTP, utiliptables.TableNAT, string(kubeNodePortChain), string(kubeMarkMasqChain), "dst,dst", utilipset.ProtocolSCTP}, + + {kubeLoadBalancerFWSet, utiliptables.TableFilter, string(kubeProxyFirewallChain), string(kubeSourceRangesFirewallChain), "dst,dst", ""}, + {kubeLoadBalancerSourceCIDRSet, utiliptables.TableFilter, string(kubeSourceRangesFirewallChain), "RETURN", "dst,dst,src", ""}, + {kubeLoadBalancerSourceIPSet, utiliptables.TableFilter, string(kubeSourceRangesFirewallChain), "RETURN", "dst,dst,src", ""}, } // In IPVS proxy mode, the following flags need to be set @@ -1742,10 +1740,10 @@ func (proxier *Proxier) writeIptablesRules() { "-j", string(kubeMarkMasqChain), ) - // mark drop for KUBE-FIREWALL - proxier.natRules.Write( - "-A", string(kubeFirewallChain), - "-j", string(kubeMarkDropChain), + // drop packets filtered by KUBE-SOURCE-RANGES-FIREWALL + proxier.filterRules.Write( + "-A", string(kubeSourceRangesFirewallChain), + "-j", "DROP", ) // Accept all traffic with destination of ipvs virtual service, in case other iptables rules @@ -1844,14 +1842,6 @@ func (proxier *Proxier) createAndLinkKubeChain() { existingFilterChains := proxier.getExistingChains(proxier.filterChainsData, utiliptables.TableFilter) existingNATChains := proxier.getExistingChains(proxier.iptablesData, utiliptables.TableNAT) - // ensure KUBE-MARK-DROP chain exist but do not change any rules - for _, ch := range iptablesEnsureChains { - if _, err := proxier.iptables.EnsureChain(ch.table, ch.chain); err != nil { - klog.ErrorS(err, "Failed to ensure chain exists", "table", ch.table, "chain", ch.chain) - return - } - } - // Make sure we keep stats for the top-level chains for _, ch := range iptablesChains { if _, err := proxier.iptables.EnsureChain(ch.table, ch.chain); err != nil { diff --git a/pkg/proxy/ipvs/proxier_test.go b/pkg/proxy/ipvs/proxier_test.go index 47634137b7e..5a149d0a8d3 100644 --- a/pkg/proxy/ipvs/proxier_test.go +++ b/pkg/proxy/ipvs/proxier_test.go @@ -2243,15 +2243,13 @@ func TestLoadBalancerSourceRanges(t *testing.T) { }, { JumpChain: "ACCEPT", MatchSet: kubeLoadBalancerSet, }}, - string(kubeLoadBalancerChain): {{ - JumpChain: string(kubeFirewallChain), MatchSet: kubeLoadBalancerFWSet, - }, { - JumpChain: string(kubeMarkMasqChain), MatchSet: "", + string(kubeProxyFirewallChain): {{ + JumpChain: string(kubeSourceRangesFirewallChain), MatchSet: kubeLoadBalancerFWSet, }}, - string(kubeFirewallChain): {{ + string(kubeSourceRangesFirewallChain): {{ JumpChain: "RETURN", MatchSet: kubeLoadBalancerSourceCIDRSet, }, { - JumpChain: string(kubeMarkDropChain), MatchSet: "", + JumpChain: "DROP", MatchSet: "", }}, } checkIptables(t, ipt, epIpt) @@ -4675,13 +4673,14 @@ func TestCreateAndLinkKubeChain(t *testing.T) { fp.createAndLinkKubeChain() expectedNATChains := `:KUBE-SERVICES - [0:0] :KUBE-POSTROUTING - [0:0] -:KUBE-FIREWALL - [0:0] :KUBE-NODE-PORT - [0:0] :KUBE-LOAD-BALANCER - [0:0] :KUBE-MARK-MASQ - [0:0] ` expectedFilterChains := `:KUBE-FORWARD - [0:0] :KUBE-NODE-PORT - [0:0] +:KUBE-PROXY-FIREWALL - [0:0] +:KUBE-SOURCE-RANGES-FIREWALL - [0:0] ` assert.Equal(t, expectedNATChains, string(fp.natChains.Bytes())) assert.Equal(t, expectedFilterChains, string(fp.filterChains.Bytes()))