diff --git a/pkg/proxy/nftables/helpers_test.go b/pkg/proxy/nftables/helpers_test.go index b5286b952a3..c49360b672b 100644 --- a/pkg/proxy/nftables/helpers_test.go +++ b/pkg/proxy/nftables/helpers_test.go @@ -207,6 +207,17 @@ func (tracer *nftablesTracer) addressMatches(ipStr, not, ruleAddress string) boo } } +func (tracer *nftablesTracer) noneAddressesMatch(ipStr, ruleAddress string) bool { + ruleAddress = strings.ReplaceAll(ruleAddress, " ", "") + addresses := strings.Split(ruleAddress, ",") + for _, address := range addresses { + if tracer.addressMatches(ipStr, "", address) { + return false + } + } + return true +} + // matchDestIPOnly checks an "ip daddr" against a set/map, and returns the matching // Element, if found. func (tracer *nftablesTracer) matchDestIPOnly(elements []*knftables.Element, destIP string) *knftables.Element { @@ -264,6 +275,7 @@ func (tracer *nftablesTracer) matchDestPort(elements []*knftables.Element, proto // match verdictRegexp. var destAddrRegexp = regexp.MustCompile(`^ip6* daddr (!= )?(\S+)`) +var destAddrLookupRegexp = regexp.MustCompile(`^ip6* daddr != \{([^}]*)\}`) var destAddrLocalRegexp = regexp.MustCompile(`^fib daddr type local`) var destPortRegexp = regexp.MustCompile(`^(tcp|udp|sctp) dport (\d+)`) var destIPOnlyLookupRegexp = regexp.MustCompile(`^ip6* daddr @(\S+)`) @@ -275,6 +287,7 @@ var destDispatchRegexp = regexp.MustCompile(`^ip6* daddr \. meta l4proto \. th d var destPortDispatchRegexp = regexp.MustCompile(`^meta l4proto \. th dport vmap @(\S+)$`) var sourceAddrRegexp = regexp.MustCompile(`^ip6* saddr (!= )?(\S+)`) +var sourceAddrLookupRegexp = regexp.MustCompile(`^ip6* saddr != \{([^}]*)\}`) var sourceAddrLocalRegexp = regexp.MustCompile(`^fib saddr type local`) var endpointVMAPRegexp = regexp.MustCompile(`^numgen random mod \d+ vmap \{(.*)\}$`) @@ -397,6 +410,16 @@ func (tracer *nftablesTracer) runChain(chname, sourceIP, protocol, destIP, destP rule = element.Value[0] } + case destAddrLookupRegexp.MatchString(rule): + // `^ip6* daddr != \{([^}]*)\}` + // Tests whether destIP doesn't match an anonymous set. + match := destAddrLookupRegexp.FindStringSubmatch(rule) + rule = strings.TrimPrefix(rule, match[0]) + if !tracer.noneAddressesMatch(destIP, match[1]) { + rule = "" + break + } + case destAddrRegexp.MatchString(rule): // `^ip6* daddr (!= )?(\S+)` // Tests whether destIP does/doesn't match a literal. @@ -429,6 +452,16 @@ func (tracer *nftablesTracer) runChain(chname, sourceIP, protocol, destIP, destP break } + case sourceAddrLookupRegexp.MatchString(rule): + // `^ip6* saddr != \{([^}]*)\}` + // Tests whether sourceIP doesn't match an anonymous set. + match := sourceAddrLookupRegexp.FindStringSubmatch(rule) + rule = strings.TrimPrefix(rule, match[0]) + if !tracer.noneAddressesMatch(sourceIP, match[1]) { + rule = "" + break + } + case sourceAddrRegexp.MatchString(rule): // `^ip6* saddr (!= )?(\S+)` // Tests whether sourceIP does/doesn't match a literal. diff --git a/pkg/proxy/nftables/proxier.go b/pkg/proxy/nftables/proxier.go index 57374f5ce20..0d992a4c258 100644 --- a/pkg/proxy/nftables/proxier.go +++ b/pkg/proxy/nftables/proxier.go @@ -76,10 +76,8 @@ const ( kubeRejectChain = "reject-chain" // LoadBalancerSourceRanges handling - kubeFirewallSet = "firewall" - kubeFirewallCheckChain = "firewall-check" - kubeFirewallAllowSet = "firewall-allow" - kubeFirewallAllowCheckChain = "firewall-allow-check" + kubeFirewallIPsMap = "firewall-ips" + kubeFirewallCheckChain = "firewall-check" // masquerading kubeMarkMasqChain = "mark-for-masquerade" @@ -99,6 +97,7 @@ type servicePortInfo struct { clusterPolicyChainName string localPolicyChainName string externalChainName string + firewallChainName string } // returns a new proxy.ServicePort which abstracts a serviceInfo @@ -114,6 +113,7 @@ func newServiceInfo(port *v1.ServicePort, service *v1.Service, bsvcPortInfo *pro svcPort.clusterPolicyChainName = servicePortPolicyClusterChainNamePrefix + chainNameBase svcPort.localPolicyChainName = servicePortPolicyLocalChainNamePrefix + chainNameBase svcPort.externalChainName = serviceExternalChainNamePrefix + chainNameBase + svcPort.firewallChainName = servicePortFirewallChainNamePrefix + chainNameBase return svcPort } @@ -543,38 +543,20 @@ func (proxier *Proxier) setupNFTables(tx *knftables.Transaction) { } // Set up LoadBalancerSourceRanges firewalling - tx.Add(&knftables.Set{ - Name: kubeFirewallSet, - Type: ipvX_addr + " . inet_proto . inet_service", + tx.Add(&knftables.Map{ + Name: kubeFirewallIPsMap, + Type: ipvX_addr + " . inet_proto . inet_service : verdict", Comment: ptr.To("destinations that are subject to LoadBalancerSourceRanges"), }) - tx.Add(&knftables.Set{ - Name: kubeFirewallAllowSet, - Type: ipvX_addr + " . inet_proto . inet_service . " + ipvX_addr, - Flags: []knftables.SetFlag{knftables.IntervalFlag}, - Comment: ptr.To("destinations+sources that are allowed by LoadBalancerSourceRanges"), - }) ensureChain(kubeFirewallCheckChain, tx, createdChains) - ensureChain(kubeFirewallAllowCheckChain, tx, createdChains) tx.Add(&knftables.Rule{ Chain: kubeFirewallCheckChain, Rule: knftables.Concat( - ipX, "daddr", ".", "meta l4proto", ".", "th dport", "@", kubeFirewallSet, - "jump", kubeFirewallAllowCheckChain, + ipX, "daddr", ".", "meta l4proto", ".", "th dport", + "vmap", "@", kubeFirewallIPsMap, ), }) - tx.Add(&knftables.Rule{ - Chain: kubeFirewallAllowCheckChain, - Rule: knftables.Concat( - ipX, "daddr", ".", "meta l4proto", ".", "th dport", ".", ipX, "saddr", "@", kubeFirewallAllowSet, - "return", - ), - }) - tx.Add(&knftables.Rule{ - Chain: kubeFirewallAllowCheckChain, - Rule: "drop", - }) // Set up service dispatch tx.Add(&knftables.Map{ @@ -824,6 +806,7 @@ const ( serviceExternalChainNamePrefix = "external-" servicePortEndpointChainNamePrefix = "endpoint-" servicePortEndpointAffinityNamePrefix = "affinity-" + servicePortFirewallChainNamePrefix = "firewall-" ) // hashAndTruncate prefixes name with a hash of itself and then truncates to @@ -998,11 +981,8 @@ func (proxier *Proxier) syncProxyRules() { } // We currently fully-rebuild our sets and maps on each resync - tx.Flush(&knftables.Set{ - Name: kubeFirewallSet, - }) - tx.Flush(&knftables.Set{ - Name: kubeFirewallAllowSet, + tx.Flush(&knftables.Map{ + Name: kubeFirewallIPsMap, }) tx.Flush(&knftables.Map{ Name: kubeNoEndpointServicesMap, @@ -1205,6 +1185,44 @@ func (proxier *Proxier) syncProxyRules() { } } + usesFWChain := len(svcInfo.LoadBalancerVIPStrings()) > 0 && len(svcInfo.LoadBalancerSourceRanges()) > 0 + fwChain := svcInfo.firewallChainName + if usesFWChain { + ensureChain(fwChain, tx, activeChains) + var sources []string + allowFromNode := false + for _, src := range svcInfo.LoadBalancerSourceRanges() { + _, cidr, _ := netutils.ParseCIDRSloppy(src) + if cidr == nil { + continue + } + if len(sources) > 0 { + sources = append(sources, ",") + } + sources = append(sources, src) + if cidr.Contains(proxier.nodeIP) { + allowFromNode = true + } + } + // For VIP-like LBs, the VIP is often added as a local + // address (via an IP route rule). In that case, a request + // from a node to the VIP will not hit the loadbalancer but + // will loop back with the source IP set to the VIP. We + // need the following rules to allow requests from this node. + if allowFromNode { + for _, lbip := range svcInfo.LoadBalancerVIPStrings() { + sources = append(sources, ",", lbip) + } + } + tx.Add(&knftables.Rule{ + Chain: fwChain, + Rule: knftables.Concat( + ipX, "saddr", "!=", "{", sources, "}", + "drop", + ), + }) + } + // Capture load-balancer ingress. for _, lbip := range svcInfo.LoadBalancerVIPStrings() { if hasEndpoints { @@ -1221,53 +1239,19 @@ func (proxier *Proxier) syncProxyRules() { }) } - if len(svcInfo.LoadBalancerSourceRanges()) > 0 { + if usesFWChain { tx.Add(&knftables.Element{ - Set: kubeFirewallSet, + Map: kubeFirewallIPsMap, Key: []string{ lbip, protocol, strconv.Itoa(svcInfo.Port()), }, + Value: []string{ + fmt.Sprintf("goto %s", fwChain), + }, Comment: &svcPortNameString, }) - - allowFromNode := false - for _, src := range svcInfo.LoadBalancerSourceRanges() { - _, cidr, _ := netutils.ParseCIDRSloppy(src) - if cidr == nil { - continue - } - tx.Add(&knftables.Element{ - Set: kubeFirewallAllowSet, - Key: []string{ - lbip, - protocol, - strconv.Itoa(svcInfo.Port()), - src, - }, - Comment: &svcPortNameString, - }) - if cidr.Contains(proxier.nodeIP) { - allowFromNode = true - } - } - // For VIP-like LBs, the VIP is often added as a local - // address (via an IP route rule). In that case, a request - // from a node to the VIP will not hit the loadbalancer but - // will loop back with the source IP set to the VIP. We - // need the following rules to allow requests from this node. - if allowFromNode { - tx.Add(&knftables.Element{ - Set: kubeFirewallAllowSet, - Key: []string{ - lbip, - protocol, - strconv.Itoa(svcInfo.Port()), - lbip, - }, - }) - } } } if !hasExternalEndpoints { diff --git a/pkg/proxy/nftables/proxier_test.go b/pkg/proxy/nftables/proxier_test.go index c5ea7db8760..0ab904953b8 100644 --- a/pkg/proxy/nftables/proxier_test.go +++ b/pkg/proxy/nftables/proxier_test.go @@ -521,13 +521,9 @@ func TestOverallNFTablesRules(t *testing.T) { add chain ip kube-proxy nat-prerouting { type nat hook prerouting priority -100 ; } add rule ip kube-proxy nat-prerouting jump services - add set ip kube-proxy firewall { type ipv4_addr . inet_proto . inet_service ; comment "destinations that are subject to LoadBalancerSourceRanges" ; } - add set ip kube-proxy firewall-allow { type ipv4_addr . inet_proto . inet_service . ipv4_addr ; flags interval ; comment "destinations+sources that are allowed by LoadBalancerSourceRanges" ; } + add map ip kube-proxy firewall-ips { type ipv4_addr . inet_proto . inet_service : verdict ; comment "destinations that are subject to LoadBalancerSourceRanges" ; } add chain ip kube-proxy firewall-check - add chain ip kube-proxy firewall-allow-check - add rule ip kube-proxy firewall-allow-check ip daddr . meta l4proto . th dport . ip saddr @firewall-allow return - add rule ip kube-proxy firewall-allow-check drop - add rule ip kube-proxy firewall-check ip daddr . meta l4proto . th dport @firewall jump firewall-allow-check + add rule ip kube-proxy firewall-check ip daddr . meta l4proto . th dport vmap @firewall-ips add chain ip kube-proxy reject-chain { comment "helper for @no-endpoint-services / @no-endpoint-nodeports" ; } add rule ip kube-proxy reject-chain reject @@ -622,11 +618,13 @@ func TestOverallNFTablesRules(t *testing.T) { add rule ip kube-proxy endpoint-GTK6MW7G-ns5/svc5/tcp/p80__10.180.0.3/80 update @affinity-GTK6MW7G-ns5/svc5/tcp/p80__10.180.0.3/80 { ip saddr } add rule ip kube-proxy endpoint-GTK6MW7G-ns5/svc5/tcp/p80__10.180.0.3/80 meta l4proto tcp dnat to 10.180.0.3:80 + add chain ip kube-proxy firewall-HVFWP5L3-ns5/svc5/tcp/p80 + add rule ip kube-proxy firewall-HVFWP5L3-ns5/svc5/tcp/p80 ip saddr != { 203.0.113.0/25 } drop + add element ip kube-proxy service-ips { 172.30.0.45 . tcp . 80 : goto service-HVFWP5L3-ns5/svc5/tcp/p80 } add element ip kube-proxy service-ips { 5.6.7.8 . tcp . 80 : goto external-HVFWP5L3-ns5/svc5/tcp/p80 } add element ip kube-proxy service-nodeports { tcp . 3002 : goto external-HVFWP5L3-ns5/svc5/tcp/p80 } - add element ip kube-proxy firewall { 5.6.7.8 . tcp . 80 comment "ns5/svc5:p80" } - add element ip kube-proxy firewall-allow { 5.6.7.8 . tcp . 80 . 203.0.113.0/25 comment "ns5/svc5:p80" } + add element ip kube-proxy firewall-ips { 5.6.7.8 . tcp . 80 comment "ns5/svc5:p80" : goto firewall-HVFWP5L3-ns5/svc5/tcp/p80 } # svc6 add element ip kube-proxy no-endpoint-services { 172.30.0.46 . tcp . 80 comment "ns6/svc6:p80" : goto reject-chain } @@ -4264,7 +4262,6 @@ func TestSyncProxyRulesRepeated(t *testing.T) { add chain ip kube-proxy filter-forward { type filter hook forward priority -101 ; } add chain ip kube-proxy filter-input { type filter hook input priority -101 ; } add chain ip kube-proxy filter-output { type filter hook output priority -101 ; } - add chain ip kube-proxy firewall-allow-check add chain ip kube-proxy firewall-check add chain ip kube-proxy forward add chain ip kube-proxy mark-for-masquerade @@ -4284,9 +4281,7 @@ func TestSyncProxyRulesRepeated(t *testing.T) { add rule ip kube-proxy filter-input ct state new jump firewall-check add rule ip kube-proxy filter-output ct state new jump endpoints-check add rule ip kube-proxy filter-output ct state new jump firewall-check - add rule ip kube-proxy firewall-allow-check ip daddr . meta l4proto . th dport . ip saddr @firewall-allow return - add rule ip kube-proxy firewall-allow-check drop - add rule ip kube-proxy firewall-check ip daddr . meta l4proto . th dport @firewall jump firewall-allow-check + add rule ip kube-proxy firewall-check ip daddr . meta l4proto . th dport vmap @firewall-ips add rule ip kube-proxy forward ct state invalid drop add rule ip kube-proxy mark-for-masquerade mark set mark or 0x4000 add rule ip kube-proxy masquerading mark and 0x4000 == 0 return @@ -4299,8 +4294,7 @@ func TestSyncProxyRulesRepeated(t *testing.T) { add rule ip kube-proxy services ip daddr . meta l4proto . th dport vmap @service-ips add rule ip kube-proxy services fib daddr type local ip daddr != 127.0.0.0/8 meta l4proto . th dport vmap @service-nodeports - add set ip kube-proxy firewall { type ipv4_addr . inet_proto . inet_service ; comment "destinations that are subject to LoadBalancerSourceRanges" ; } - add set ip kube-proxy firewall-allow { type ipv4_addr . inet_proto . inet_service . ipv4_addr ; flags interval ; comment "destinations+sources that are allowed by LoadBalancerSourceRanges" ; } + add map ip kube-proxy firewall-ips { type ipv4_addr . inet_proto . inet_service : verdict ; comment "destinations that are subject to LoadBalancerSourceRanges" ; } add map ip kube-proxy no-endpoint-nodeports { type inet_proto . inet_service : verdict ; comment "vmap to drop or reject packets to service nodeports with no endpoints" ; } add map ip kube-proxy no-endpoint-services { type ipv4_addr . inet_proto . inet_service : verdict ; comment "vmap to drop or reject packets to services with no endpoints" ; } add map ip kube-proxy service-ips { type ipv4_addr . inet_proto . inet_service : verdict ; comment "ClusterIP, ExternalIP and LoadBalancer IP traffic" ; }