mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-08-10 04:27:54 +00:00
Merge pull request #122296 from tnqn/nftables-kernel-requirement
kube-proxy: change implementation of LoadBalancerSourceRanges for wider kernel support
This commit is contained in:
commit
f538feed8c
@ -210,6 +210,17 @@ func (tracer *nftablesTracer) addressMatches(ipStr, not, ruleAddress string) boo
|
||||
}
|
||||
}
|
||||
|
||||
func (tracer *nftablesTracer) noneAddressesMatch(ipStr, ruleAddress string) bool {
|
||||
ruleAddress = strings.ReplaceAll(ruleAddress, " ", "")
|
||||
addresses := strings.Split(ruleAddress, ",")
|
||||
for _, address := range addresses {
|
||||
if tracer.addressMatches(ipStr, "", address) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// matchDestIPOnly checks an "ip daddr" against a set/map, and returns the matching
|
||||
// Element, if found.
|
||||
func (tracer *nftablesTracer) matchDestIPOnly(elements []*knftables.Element, destIP string) *knftables.Element {
|
||||
@ -267,6 +278,7 @@ func (tracer *nftablesTracer) matchDestPort(elements []*knftables.Element, proto
|
||||
// match verdictRegexp.
|
||||
|
||||
var destAddrRegexp = regexp.MustCompile(`^ip6* daddr (!= )?(\S+)`)
|
||||
var destAddrLookupRegexp = regexp.MustCompile(`^ip6* daddr != \{([^}]*)\}`)
|
||||
var destAddrLocalRegexp = regexp.MustCompile(`^fib daddr type local`)
|
||||
var destPortRegexp = regexp.MustCompile(`^(tcp|udp|sctp) dport (\d+)`)
|
||||
var destIPOnlyLookupRegexp = regexp.MustCompile(`^ip6* daddr @(\S+)`)
|
||||
@ -278,6 +290,7 @@ var destDispatchRegexp = regexp.MustCompile(`^ip6* daddr \. meta l4proto \. th d
|
||||
var destPortDispatchRegexp = regexp.MustCompile(`^meta l4proto \. th dport vmap @(\S+)$`)
|
||||
|
||||
var sourceAddrRegexp = regexp.MustCompile(`^ip6* saddr (!= )?(\S+)`)
|
||||
var sourceAddrLookupRegexp = regexp.MustCompile(`^ip6* saddr != \{([^}]*)\}`)
|
||||
var sourceAddrLocalRegexp = regexp.MustCompile(`^fib saddr type local`)
|
||||
|
||||
var endpointVMAPRegexp = regexp.MustCompile(`^numgen random mod \d+ vmap \{(.*)\}$`)
|
||||
@ -400,6 +413,16 @@ func (tracer *nftablesTracer) runChain(chname, sourceIP, protocol, destIP, destP
|
||||
rule = element.Value[0]
|
||||
}
|
||||
|
||||
case destAddrLookupRegexp.MatchString(rule):
|
||||
// `^ip6* daddr != \{([^}]*)\}`
|
||||
// Tests whether destIP doesn't match an anonymous set.
|
||||
match := destAddrLookupRegexp.FindStringSubmatch(rule)
|
||||
rule = strings.TrimPrefix(rule, match[0])
|
||||
if !tracer.noneAddressesMatch(destIP, match[1]) {
|
||||
rule = ""
|
||||
break
|
||||
}
|
||||
|
||||
case destAddrRegexp.MatchString(rule):
|
||||
// `^ip6* daddr (!= )?(\S+)`
|
||||
// Tests whether destIP does/doesn't match a literal.
|
||||
@ -432,6 +455,16 @@ func (tracer *nftablesTracer) runChain(chname, sourceIP, protocol, destIP, destP
|
||||
break
|
||||
}
|
||||
|
||||
case sourceAddrLookupRegexp.MatchString(rule):
|
||||
// `^ip6* saddr != \{([^}]*)\}`
|
||||
// Tests whether sourceIP doesn't match an anonymous set.
|
||||
match := sourceAddrLookupRegexp.FindStringSubmatch(rule)
|
||||
rule = strings.TrimPrefix(rule, match[0])
|
||||
if !tracer.noneAddressesMatch(sourceIP, match[1]) {
|
||||
rule = ""
|
||||
break
|
||||
}
|
||||
|
||||
case sourceAddrRegexp.MatchString(rule):
|
||||
// `^ip6* saddr (!= )?(\S+)`
|
||||
// Tests whether sourceIP does/doesn't match a literal.
|
||||
|
@ -79,10 +79,8 @@ const (
|
||||
kubeRejectChain = "reject-chain"
|
||||
|
||||
// LoadBalancerSourceRanges handling
|
||||
kubeFirewallSet = "firewall"
|
||||
kubeFirewallCheckChain = "firewall-check"
|
||||
kubeFirewallAllowSet = "firewall-allow"
|
||||
kubeFirewallAllowCheckChain = "firewall-allow-check"
|
||||
kubeFirewallIPsMap = "firewall-ips"
|
||||
kubeFirewallCheckChain = "firewall-check"
|
||||
|
||||
// masquerading
|
||||
kubeMarkMasqChain = "mark-for-masquerade"
|
||||
@ -102,6 +100,7 @@ type servicePortInfo struct {
|
||||
clusterPolicyChainName string
|
||||
localPolicyChainName string
|
||||
externalChainName string
|
||||
firewallChainName string
|
||||
}
|
||||
|
||||
// returns a new proxy.ServicePort which abstracts a serviceInfo
|
||||
@ -117,6 +116,7 @@ func newServiceInfo(port *v1.ServicePort, service *v1.Service, bsvcPortInfo *pro
|
||||
svcPort.clusterPolicyChainName = servicePortPolicyClusterChainNamePrefix + chainNameBase
|
||||
svcPort.localPolicyChainName = servicePortPolicyLocalChainNamePrefix + chainNameBase
|
||||
svcPort.externalChainName = serviceExternalChainNamePrefix + chainNameBase
|
||||
svcPort.firewallChainName = servicePortFirewallChainNamePrefix + chainNameBase
|
||||
|
||||
return svcPort
|
||||
}
|
||||
@ -546,38 +546,20 @@ func (proxier *Proxier) setupNFTables(tx *knftables.Transaction) {
|
||||
}
|
||||
|
||||
// Set up LoadBalancerSourceRanges firewalling
|
||||
tx.Add(&knftables.Set{
|
||||
Name: kubeFirewallSet,
|
||||
Type: ipvX_addr + " . inet_proto . inet_service",
|
||||
tx.Add(&knftables.Map{
|
||||
Name: kubeFirewallIPsMap,
|
||||
Type: ipvX_addr + " . inet_proto . inet_service : verdict",
|
||||
Comment: ptr.To("destinations that are subject to LoadBalancerSourceRanges"),
|
||||
})
|
||||
tx.Add(&knftables.Set{
|
||||
Name: kubeFirewallAllowSet,
|
||||
Type: ipvX_addr + " . inet_proto . inet_service . " + ipvX_addr,
|
||||
Flags: []knftables.SetFlag{knftables.IntervalFlag},
|
||||
Comment: ptr.To("destinations+sources that are allowed by LoadBalancerSourceRanges"),
|
||||
})
|
||||
|
||||
ensureChain(kubeFirewallCheckChain, tx, createdChains)
|
||||
ensureChain(kubeFirewallAllowCheckChain, tx, createdChains)
|
||||
tx.Add(&knftables.Rule{
|
||||
Chain: kubeFirewallCheckChain,
|
||||
Rule: knftables.Concat(
|
||||
ipX, "daddr", ".", "meta l4proto", ".", "th dport", "@", kubeFirewallSet,
|
||||
"jump", kubeFirewallAllowCheckChain,
|
||||
ipX, "daddr", ".", "meta l4proto", ".", "th dport",
|
||||
"vmap", "@", kubeFirewallIPsMap,
|
||||
),
|
||||
})
|
||||
tx.Add(&knftables.Rule{
|
||||
Chain: kubeFirewallAllowCheckChain,
|
||||
Rule: knftables.Concat(
|
||||
ipX, "daddr", ".", "meta l4proto", ".", "th dport", ".", ipX, "saddr", "@", kubeFirewallAllowSet,
|
||||
"return",
|
||||
),
|
||||
})
|
||||
tx.Add(&knftables.Rule{
|
||||
Chain: kubeFirewallAllowCheckChain,
|
||||
Rule: "drop",
|
||||
})
|
||||
|
||||
// Set up service dispatch
|
||||
tx.Add(&knftables.Map{
|
||||
@ -827,6 +809,7 @@ const (
|
||||
serviceExternalChainNamePrefix = "external-"
|
||||
servicePortEndpointChainNamePrefix = "endpoint-"
|
||||
servicePortEndpointAffinityNamePrefix = "affinity-"
|
||||
servicePortFirewallChainNamePrefix = "firewall-"
|
||||
)
|
||||
|
||||
// hashAndTruncate prefixes name with a hash of itself and then truncates to
|
||||
@ -1001,11 +984,8 @@ func (proxier *Proxier) syncProxyRules() {
|
||||
}
|
||||
|
||||
// We currently fully-rebuild our sets and maps on each resync
|
||||
tx.Flush(&knftables.Set{
|
||||
Name: kubeFirewallSet,
|
||||
})
|
||||
tx.Flush(&knftables.Set{
|
||||
Name: kubeFirewallAllowSet,
|
||||
tx.Flush(&knftables.Map{
|
||||
Name: kubeFirewallIPsMap,
|
||||
})
|
||||
tx.Flush(&knftables.Map{
|
||||
Name: kubeNoEndpointServicesMap,
|
||||
@ -1208,6 +1188,44 @@ func (proxier *Proxier) syncProxyRules() {
|
||||
}
|
||||
}
|
||||
|
||||
usesFWChain := len(svcInfo.LoadBalancerVIPStrings()) > 0 && len(svcInfo.LoadBalancerSourceRanges()) > 0
|
||||
fwChain := svcInfo.firewallChainName
|
||||
if usesFWChain {
|
||||
ensureChain(fwChain, tx, activeChains)
|
||||
var sources []string
|
||||
allowFromNode := false
|
||||
for _, src := range svcInfo.LoadBalancerSourceRanges() {
|
||||
_, cidr, _ := netutils.ParseCIDRSloppy(src)
|
||||
if cidr == nil {
|
||||
continue
|
||||
}
|
||||
if len(sources) > 0 {
|
||||
sources = append(sources, ",")
|
||||
}
|
||||
sources = append(sources, src)
|
||||
if cidr.Contains(proxier.nodeIP) {
|
||||
allowFromNode = true
|
||||
}
|
||||
}
|
||||
// For VIP-like LBs, the VIP is often added as a local
|
||||
// address (via an IP route rule). In that case, a request
|
||||
// from a node to the VIP will not hit the loadbalancer but
|
||||
// will loop back with the source IP set to the VIP. We
|
||||
// need the following rules to allow requests from this node.
|
||||
if allowFromNode {
|
||||
for _, lbip := range svcInfo.LoadBalancerVIPStrings() {
|
||||
sources = append(sources, ",", lbip)
|
||||
}
|
||||
}
|
||||
tx.Add(&knftables.Rule{
|
||||
Chain: fwChain,
|
||||
Rule: knftables.Concat(
|
||||
ipX, "saddr", "!=", "{", sources, "}",
|
||||
"drop",
|
||||
),
|
||||
})
|
||||
}
|
||||
|
||||
// Capture load-balancer ingress.
|
||||
for _, lbip := range svcInfo.LoadBalancerVIPStrings() {
|
||||
if hasEndpoints {
|
||||
@ -1224,53 +1242,19 @@ func (proxier *Proxier) syncProxyRules() {
|
||||
})
|
||||
}
|
||||
|
||||
if len(svcInfo.LoadBalancerSourceRanges()) > 0 {
|
||||
if usesFWChain {
|
||||
tx.Add(&knftables.Element{
|
||||
Set: kubeFirewallSet,
|
||||
Map: kubeFirewallIPsMap,
|
||||
Key: []string{
|
||||
lbip,
|
||||
protocol,
|
||||
strconv.Itoa(svcInfo.Port()),
|
||||
},
|
||||
Value: []string{
|
||||
fmt.Sprintf("goto %s", fwChain),
|
||||
},
|
||||
Comment: &svcPortNameString,
|
||||
})
|
||||
|
||||
allowFromNode := false
|
||||
for _, src := range svcInfo.LoadBalancerSourceRanges() {
|
||||
_, cidr, _ := netutils.ParseCIDRSloppy(src)
|
||||
if cidr == nil {
|
||||
continue
|
||||
}
|
||||
tx.Add(&knftables.Element{
|
||||
Set: kubeFirewallAllowSet,
|
||||
Key: []string{
|
||||
lbip,
|
||||
protocol,
|
||||
strconv.Itoa(svcInfo.Port()),
|
||||
src,
|
||||
},
|
||||
Comment: &svcPortNameString,
|
||||
})
|
||||
if cidr.Contains(proxier.nodeIP) {
|
||||
allowFromNode = true
|
||||
}
|
||||
}
|
||||
// For VIP-like LBs, the VIP is often added as a local
|
||||
// address (via an IP route rule). In that case, a request
|
||||
// from a node to the VIP will not hit the loadbalancer but
|
||||
// will loop back with the source IP set to the VIP. We
|
||||
// need the following rules to allow requests from this node.
|
||||
if allowFromNode {
|
||||
tx.Add(&knftables.Element{
|
||||
Set: kubeFirewallAllowSet,
|
||||
Key: []string{
|
||||
lbip,
|
||||
protocol,
|
||||
strconv.Itoa(svcInfo.Port()),
|
||||
lbip,
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
if !hasExternalEndpoints {
|
||||
|
@ -524,13 +524,9 @@ func TestOverallNFTablesRules(t *testing.T) {
|
||||
add chain ip kube-proxy nat-prerouting { type nat hook prerouting priority -100 ; }
|
||||
add rule ip kube-proxy nat-prerouting jump services
|
||||
|
||||
add set ip kube-proxy firewall { type ipv4_addr . inet_proto . inet_service ; comment "destinations that are subject to LoadBalancerSourceRanges" ; }
|
||||
add set ip kube-proxy firewall-allow { type ipv4_addr . inet_proto . inet_service . ipv4_addr ; flags interval ; comment "destinations+sources that are allowed by LoadBalancerSourceRanges" ; }
|
||||
add map ip kube-proxy firewall-ips { type ipv4_addr . inet_proto . inet_service : verdict ; comment "destinations that are subject to LoadBalancerSourceRanges" ; }
|
||||
add chain ip kube-proxy firewall-check
|
||||
add chain ip kube-proxy firewall-allow-check
|
||||
add rule ip kube-proxy firewall-allow-check ip daddr . meta l4proto . th dport . ip saddr @firewall-allow return
|
||||
add rule ip kube-proxy firewall-allow-check drop
|
||||
add rule ip kube-proxy firewall-check ip daddr . meta l4proto . th dport @firewall jump firewall-allow-check
|
||||
add rule ip kube-proxy firewall-check ip daddr . meta l4proto . th dport vmap @firewall-ips
|
||||
|
||||
add chain ip kube-proxy reject-chain { comment "helper for @no-endpoint-services / @no-endpoint-nodeports" ; }
|
||||
add rule ip kube-proxy reject-chain reject
|
||||
@ -625,11 +621,13 @@ func TestOverallNFTablesRules(t *testing.T) {
|
||||
add rule ip kube-proxy endpoint-GTK6MW7G-ns5/svc5/tcp/p80__10.180.0.3/80 update @affinity-GTK6MW7G-ns5/svc5/tcp/p80__10.180.0.3/80 { ip saddr }
|
||||
add rule ip kube-proxy endpoint-GTK6MW7G-ns5/svc5/tcp/p80__10.180.0.3/80 meta l4proto tcp dnat to 10.180.0.3:80
|
||||
|
||||
add chain ip kube-proxy firewall-HVFWP5L3-ns5/svc5/tcp/p80
|
||||
add rule ip kube-proxy firewall-HVFWP5L3-ns5/svc5/tcp/p80 ip saddr != { 203.0.113.0/25 } drop
|
||||
|
||||
add element ip kube-proxy service-ips { 172.30.0.45 . tcp . 80 : goto service-HVFWP5L3-ns5/svc5/tcp/p80 }
|
||||
add element ip kube-proxy service-ips { 5.6.7.8 . tcp . 80 : goto external-HVFWP5L3-ns5/svc5/tcp/p80 }
|
||||
add element ip kube-proxy service-nodeports { tcp . 3002 : goto external-HVFWP5L3-ns5/svc5/tcp/p80 }
|
||||
add element ip kube-proxy firewall { 5.6.7.8 . tcp . 80 comment "ns5/svc5:p80" }
|
||||
add element ip kube-proxy firewall-allow { 5.6.7.8 . tcp . 80 . 203.0.113.0/25 comment "ns5/svc5:p80" }
|
||||
add element ip kube-proxy firewall-ips { 5.6.7.8 . tcp . 80 comment "ns5/svc5:p80" : goto firewall-HVFWP5L3-ns5/svc5/tcp/p80 }
|
||||
|
||||
# svc6
|
||||
add element ip kube-proxy no-endpoint-services { 172.30.0.46 . tcp . 80 comment "ns6/svc6:p80" : goto reject-chain }
|
||||
@ -4267,7 +4265,6 @@ func TestSyncProxyRulesRepeated(t *testing.T) {
|
||||
add chain ip kube-proxy filter-forward { type filter hook forward priority -101 ; }
|
||||
add chain ip kube-proxy filter-input { type filter hook input priority -101 ; }
|
||||
add chain ip kube-proxy filter-output { type filter hook output priority -101 ; }
|
||||
add chain ip kube-proxy firewall-allow-check
|
||||
add chain ip kube-proxy firewall-check
|
||||
add chain ip kube-proxy forward
|
||||
add chain ip kube-proxy mark-for-masquerade
|
||||
@ -4287,9 +4284,7 @@ func TestSyncProxyRulesRepeated(t *testing.T) {
|
||||
add rule ip kube-proxy filter-input ct state new jump firewall-check
|
||||
add rule ip kube-proxy filter-output ct state new jump endpoints-check
|
||||
add rule ip kube-proxy filter-output ct state new jump firewall-check
|
||||
add rule ip kube-proxy firewall-allow-check ip daddr . meta l4proto . th dport . ip saddr @firewall-allow return
|
||||
add rule ip kube-proxy firewall-allow-check drop
|
||||
add rule ip kube-proxy firewall-check ip daddr . meta l4proto . th dport @firewall jump firewall-allow-check
|
||||
add rule ip kube-proxy firewall-check ip daddr . meta l4proto . th dport vmap @firewall-ips
|
||||
add rule ip kube-proxy forward ct state invalid drop
|
||||
add rule ip kube-proxy mark-for-masquerade mark set mark or 0x4000
|
||||
add rule ip kube-proxy masquerading mark and 0x4000 == 0 return
|
||||
@ -4302,8 +4297,7 @@ func TestSyncProxyRulesRepeated(t *testing.T) {
|
||||
add rule ip kube-proxy services ip daddr . meta l4proto . th dport vmap @service-ips
|
||||
add rule ip kube-proxy services fib daddr type local ip daddr != 127.0.0.0/8 meta l4proto . th dport vmap @service-nodeports
|
||||
|
||||
add set ip kube-proxy firewall { type ipv4_addr . inet_proto . inet_service ; comment "destinations that are subject to LoadBalancerSourceRanges" ; }
|
||||
add set ip kube-proxy firewall-allow { type ipv4_addr . inet_proto . inet_service . ipv4_addr ; flags interval ; comment "destinations+sources that are allowed by LoadBalancerSourceRanges" ; }
|
||||
add map ip kube-proxy firewall-ips { type ipv4_addr . inet_proto . inet_service : verdict ; comment "destinations that are subject to LoadBalancerSourceRanges" ; }
|
||||
add map ip kube-proxy no-endpoint-nodeports { type inet_proto . inet_service : verdict ; comment "vmap to drop or reject packets to service nodeports with no endpoints" ; }
|
||||
add map ip kube-proxy no-endpoint-services { type ipv4_addr . inet_proto . inet_service : verdict ; comment "vmap to drop or reject packets to services with no endpoints" ; }
|
||||
add map ip kube-proxy service-ips { type ipv4_addr . inet_proto . inet_service : verdict ; comment "ClusterIP, ExternalIP and LoadBalancer IP traffic" ; }
|
||||
|
Loading…
Reference in New Issue
Block a user