Redo no-endpoint handling with maps

This commit is contained in:
Dan Winship 2023-10-04 21:19:51 -04:00
parent 4128631d0f
commit 9d71513ac1
3 changed files with 201 additions and 67 deletions

View File

@ -62,8 +62,6 @@ var objectOrder = map[string]int{
// with per-service rules, we don't know what order syncProxyRules is going to output them
// in, but the order doesn't matter anyway. So we sort the rules in those chains.
var sortedChains = sets.New(
kubeServicesFilterChain,
kubeExternalServicesChain,
kubeServicesChain,
kubeNodePortsChain,
)
@ -256,6 +254,17 @@ func (tracer *nftablesTracer) matchDestAndSource(elements []*knftables.Element,
return nil
}
// matchDestPort checks an "meta l4proto . th dport" against a set/map, and returns the
// matching Element, if found.
func (tracer *nftablesTracer) matchDestPort(elements []*knftables.Element, protocol, destPort string) *knftables.Element {
for _, element := range elements {
if element.Key[0] == protocol && element.Key[1] == destPort {
return element
}
}
return nil
}
// We intentionally don't try to parse arbitrary nftables rules, as the syntax is quite
// complicated and context sensitive. (E.g., "ip daddr" could be the start of an address
// comparison, or it could be the start of a set/map lookup.) Instead, we just have
@ -273,6 +282,10 @@ var destPortRegexp = regexp.MustCompile(`^(tcp|udp|sctp) dport (\d+)`)
var destIPOnlyLookupRegexp = regexp.MustCompile(`^ip6* daddr @(\S+)`)
var destLookupRegexp = regexp.MustCompile(`^ip6* daddr \. meta l4proto \. th dport @(\S+)`)
var destSourceLookupRegexp = regexp.MustCompile(`^ip6* daddr \. meta l4proto \. th dport \. ip6* saddr @(\S+)`)
var destPortLookupRegexp = regexp.MustCompile(`^meta l4proto \. th dport @(\S+)`)
var destDispatchRegexp = regexp.MustCompile(`^ip6* daddr \. meta l4proto \. th dport vmap @(\S+)$`)
var destPortDispatchRegexp = regexp.MustCompile(`^meta l4proto \. th dport vmap @(\S+)$`)
var sourceAddrRegexp = regexp.MustCompile(`^ip6* saddr (!= )?(\S+)`)
var sourceAddrLocalRegexp = regexp.MustCompile(`^fib saddr type local`)
@ -357,6 +370,46 @@ func (tracer *nftablesTracer) runChain(chname, sourceIP, protocol, destIP, destP
break
}
case destPortLookupRegexp.MatchString(rule):
// `^meta l4proto . th dport @(\S+)`
// Tests whether "protocol . destPort" is a member of the
// indicated set.
match := destPortLookupRegexp.FindStringSubmatch(rule)
rule = strings.TrimPrefix(rule, match[0])
set := match[1]
if tracer.matchDestPort(tracer.nft.Table.Sets[set].Elements, protocol, destPort) == nil {
rule = ""
break
}
case destDispatchRegexp.MatchString(rule):
// `^ip6* daddr \. meta l4proto \. th dport vmap @(\S+)$`
// Looks up "destIP . protocol . destPort" in the indicated
// verdict map, and if found, runs the assocated verdict.
match := destDispatchRegexp.FindStringSubmatch(rule)
mapName := match[1]
element := tracer.matchDest(tracer.nft.Table.Maps[mapName].Elements, destIP, protocol, destPort)
if element == nil {
rule = ""
break
} else {
rule = element.Value[0]
}
case destPortDispatchRegexp.MatchString(rule):
// `^meta l4proto \. th dport vmap @(\S+)$`
// Looks up "protocol . destPort" in the indicated verdict map,
// and if found, runs the assocated verdict.
match := destPortDispatchRegexp.FindStringSubmatch(rule)
mapName := match[1]
element := tracer.matchDestPort(tracer.nft.Table.Maps[mapName].Elements, protocol, destPort)
if element == nil {
rule = ""
break
} else {
rule = element.Value[0]
}
case destAddrRegexp.MatchString(rule):
// `^ip6* daddr (!= )?(\S+)`
// Tests whether destIP does/doesn't match a literal.

View File

@ -69,8 +69,10 @@ const (
kubeNodePortIPsSet = "nodeport-ips"
// handling for services with no endpoints
kubeServicesFilterChain = "services-filter"
kubeExternalServicesChain = "external-services"
kubeEndpointsCheckChain = "endpoints-check"
kubeNoEndpointServicesMap = "no-endpoint-services"
kubeNoEndpointNodePortsMap = "no-endpoint-nodeports"
kubeRejectChain = "reject-chain"
// LoadBalancerSourceRanges handling
kubeFirewallSet = "firewall"
@ -340,10 +342,10 @@ type nftablesJumpChain struct {
}
var nftablesJumpChains = []nftablesJumpChain{
{kubeExternalServicesChain, "filter-input", "ct state new"},
{kubeExternalServicesChain, "filter-forward", "ct state new"},
{kubeServicesFilterChain, "filter-forward", "ct state new"},
{kubeServicesFilterChain, "filter-output", "ct state new"},
{kubeEndpointsCheckChain, "filter-input", "ct state new"},
{kubeEndpointsCheckChain, "filter-forward", "ct state new"},
{kubeEndpointsCheckChain, "filter-output", "ct state new"},
{kubeForwardChain, "filter-forward", ""},
{kubeFirewallCheckChain, "filter-input", "ct state new"},
@ -374,9 +376,11 @@ func ensureChain(chain string, tx *knftables.Transaction, createdChains sets.Set
func (proxier *Proxier) setupNFTables(tx *knftables.Transaction) {
ipX := "ip"
ipvX_addr := "ipv4_addr" //nolint:stylecheck // var name intentionally resembles value
noLocalhost := "ip daddr != 127.0.0.0/8"
if proxier.ipFamily == v1.IPv6Protocol {
ipX = "ip6"
ipvX_addr = "ipv6_addr"
noLocalhost = "ip6 daddr != ::1"
}
tx.Add(&knftables.Table{
@ -409,7 +413,7 @@ func (proxier *Proxier) setupNFTables(tx *knftables.Transaction) {
}
// Ensure all of our other "top-level" chains exist
for _, chain := range []string{kubeServicesFilterChain, kubeServicesChain, kubeExternalServicesChain, kubeForwardChain, kubeNodePortsChain, kubeMasqueradingChain, kubeMarkMasqChain} {
for _, chain := range []string{kubeServicesChain, kubeForwardChain, kubeNodePortsChain, kubeMasqueradingChain, kubeMarkMasqChain} {
ensureChain(chain, tx, createdChains)
}
@ -484,6 +488,59 @@ func (proxier *Proxier) setupNFTables(tx *knftables.Transaction) {
}
}
// Set up "no endpoints" drop/reject handling
tx.Add(&knftables.Map{
Name: kubeNoEndpointServicesMap,
Type: ipvX_addr + " . inet_proto . inet_service : verdict",
Comment: ptr.To("vmap to drop or reject packets to services with no endpoints"),
})
tx.Add(&knftables.Map{
Name: kubeNoEndpointNodePortsMap,
Type: "inet_proto . inet_service : verdict",
Comment: ptr.To("vmap to drop or reject packets to service nodeports with no endpoints"),
})
tx.Add(&knftables.Chain{
Name: kubeRejectChain,
Comment: ptr.To("helper for @no-endpoint-services / @no-endpoint-nodeports"),
})
tx.Flush(&knftables.Chain{
Name: kubeRejectChain,
})
tx.Add(&knftables.Rule{
Chain: kubeRejectChain,
Rule: "reject",
})
tx.Add(&knftables.Rule{
Chain: kubeEndpointsCheckChain,
Rule: knftables.Concat(
ipX, "daddr", ".", "meta l4proto", ".", "th dport",
"vmap", "@", kubeNoEndpointServicesMap,
),
})
if proxier.nodePortAddresses.MatchAll() {
tx.Add(&knftables.Rule{
Chain: kubeEndpointsCheckChain,
Rule: knftables.Concat(
"fib daddr type local",
noLocalhost,
"meta l4proto . th dport",
"vmap", "@", kubeNoEndpointNodePortsMap,
),
})
} else {
tx.Add(&knftables.Rule{
Chain: kubeEndpointsCheckChain,
Rule: knftables.Concat(
ipX, "daddr", "@", kubeNodePortIPsSet,
"meta l4proto . th dport",
"vmap", "@", kubeNoEndpointNodePortsMap,
),
})
}
// Set up LoadBalancerSourceRanges firewalling
tx.Add(&knftables.Set{
Name: kubeFirewallSet,
@ -907,6 +964,12 @@ func (proxier *Proxier) syncProxyRules() {
tx.Flush(&knftables.Set{
Name: kubeFirewallAllowSet,
})
tx.Flush(&knftables.Map{
Name: kubeNoEndpointServicesMap,
})
tx.Flush(&knftables.Map{
Name: kubeNoEndpointNodePortsMap,
})
// Accumulate service/endpoint chains and affinity sets to keep.
activeChains := sets.New[string]()
@ -1004,25 +1067,21 @@ func (proxier *Proxier) syncProxyRules() {
ensureChain(externalTrafficChain, tx, activeChains)
}
var internalTrafficFilterVerdict, internalTrafficFilterComment string
var externalTrafficFilterVerdict, externalTrafficFilterComment string
var internalTrafficFilterVerdict, externalTrafficFilterVerdict string
if !hasEndpoints {
// The service has no endpoints at all; hasInternalEndpoints and
// hasExternalEndpoints will also be false, and we will not
// generate any chains in the "nat" table for the service; only
// rules in the "filter" table rejecting incoming packets for
// the service's IPs.
internalTrafficFilterVerdict = "reject"
internalTrafficFilterComment = fmt.Sprintf("%s has no endpoints", svcPortNameString)
externalTrafficFilterVerdict = "reject"
externalTrafficFilterComment = internalTrafficFilterComment
internalTrafficFilterVerdict = fmt.Sprintf("goto %s", kubeRejectChain)
externalTrafficFilterVerdict = fmt.Sprintf("goto %s", kubeRejectChain)
} else {
if !hasInternalEndpoints {
// The internalTrafficPolicy is "Local" but there are no local
// endpoints. Traffic to the clusterIP will be dropped, but
// external traffic may still be accepted.
internalTrafficFilterVerdict = "drop"
internalTrafficFilterComment = fmt.Sprintf("%s has no local endpoints", svcPortNameString)
serviceNoLocalEndpointsTotalInternal++
}
if !hasExternalEndpoints {
@ -1031,7 +1090,6 @@ func (proxier *Proxier) syncProxyRules() {
// the cluster will be dropped, but traffic from inside
// the cluster may still be accepted.
externalTrafficFilterVerdict = "drop"
externalTrafficFilterComment = fmt.Sprintf("%s has no local endpoints", svcPortNameString)
serviceNoLocalEndpointsTotalExternal++
}
}
@ -1048,14 +1106,17 @@ func (proxier *Proxier) syncProxyRules() {
})
} else {
// No endpoints.
tx.Add(&knftables.Rule{
Chain: kubeServicesFilterChain,
Rule: knftables.Concat(
ipX, "daddr", svcInfo.ClusterIP(),
protocol, "dport", svcInfo.Port(),
tx.Add(&knftables.Element{
Map: kubeNoEndpointServicesMap,
Key: []string{
svcInfo.ClusterIP().String(),
protocol,
strconv.Itoa(svcInfo.Port()),
},
Value: []string{
internalTrafficFilterVerdict,
),
Comment: &internalTrafficFilterComment,
},
Comment: &svcPortNameString,
})
}
@ -1077,14 +1138,17 @@ func (proxier *Proxier) syncProxyRules() {
// Either no endpoints at all (REJECT) or no endpoints for
// external traffic (DROP anything that didn't get
// short-circuited by the EXT chain.)
tx.Add(&knftables.Rule{
Chain: kubeExternalServicesChain,
Rule: knftables.Concat(
ipX, "daddr", externalIP,
protocol, "dport", svcInfo.Port(),
tx.Add(&knftables.Element{
Map: kubeNoEndpointServicesMap,
Key: []string{
externalIP,
protocol,
strconv.Itoa(svcInfo.Port()),
},
Value: []string{
externalTrafficFilterVerdict,
),
Comment: &externalTrafficFilterComment,
},
Comment: &svcPortNameString,
})
}
}
@ -1156,14 +1220,17 @@ func (proxier *Proxier) syncProxyRules() {
// external traffic (DROP anything that didn't get short-circuited
// by the EXT chain.)
for _, lbip := range svcInfo.LoadBalancerVIPStrings() {
tx.Add(&knftables.Rule{
Chain: kubeExternalServicesChain,
Rule: knftables.Concat(
ipX, "daddr", lbip,
protocol, "dport", svcInfo.Port(),
tx.Add(&knftables.Element{
Map: kubeNoEndpointServicesMap,
Key: []string{
lbip,
protocol,
strconv.Itoa(svcInfo.Port()),
},
Value: []string{
externalTrafficFilterVerdict,
),
Comment: &externalTrafficFilterComment,
},
Comment: &svcPortNameString,
})
}
}
@ -1186,14 +1253,16 @@ func (proxier *Proxier) syncProxyRules() {
// Either no endpoints at all (REJECT) or no endpoints for
// external traffic (DROP anything that didn't get
// short-circuited by the EXT chain.)
tx.Add(&knftables.Rule{
Chain: kubeExternalServicesChain,
Rule: knftables.Concat(
"fib daddr type local",
protocol, "dport", svcInfo.NodePort(),
tx.Add(&knftables.Element{
Map: kubeNoEndpointNodePortsMap,
Key: []string{
protocol,
strconv.Itoa(svcInfo.NodePort()),
},
Value: []string{
externalTrafficFilterVerdict,
),
Comment: &externalTrafficFilterComment,
},
Comment: &svcPortNameString,
})
}
}

View File

@ -495,7 +495,6 @@ func TestOverallNFTablesRules(t *testing.T) {
expected := dedent.Dedent(`
add table ip kube-proxy { comment "rules for kube-proxy" ; }
add chain ip kube-proxy external-services
add chain ip kube-proxy forward
add rule ip kube-proxy forward ct state invalid drop
add chain ip kube-proxy mark-for-masquerade
@ -506,17 +505,16 @@ func TestOverallNFTablesRules(t *testing.T) {
add rule ip kube-proxy masquerading mark set mark xor 0x4000
add rule ip kube-proxy masquerading masquerade fully-random
add chain ip kube-proxy services
add chain ip kube-proxy services-filter
add chain ip kube-proxy firewall-check
add chain ip kube-proxy firewall-allow-check
add chain ip kube-proxy filter-forward { type filter hook forward priority -101 ; }
add rule ip kube-proxy filter-forward ct state new jump external-services
add rule ip kube-proxy filter-forward ct state new jump services-filter
add rule ip kube-proxy filter-forward ct state new jump endpoints-check
add rule ip kube-proxy filter-forward jump forward
add rule ip kube-proxy filter-forward ct state new jump firewall-check
add chain ip kube-proxy filter-input { type filter hook input priority -101 ; }
add rule ip kube-proxy filter-input ct state new jump external-services
add rule ip kube-proxy filter-input ct state new jump endpoints-check
add rule ip kube-proxy filter-input ct state new jump firewall-check
add chain ip kube-proxy filter-output { type filter hook output priority -101 ; }
add rule ip kube-proxy filter-output ct state new jump services-filter
add rule ip kube-proxy filter-output ct state new jump endpoints-check
add rule ip kube-proxy filter-output ct state new jump firewall-check
add chain ip kube-proxy nat-output { type nat hook output priority -100 ; }
add rule ip kube-proxy nat-output jump services
add chain ip kube-proxy nat-postrouting { type nat hook postrouting priority 100 ; }
@ -526,12 +524,21 @@ func TestOverallNFTablesRules(t *testing.T) {
add set ip kube-proxy firewall { type ipv4_addr . inet_proto . inet_service ; comment "destinations that are subject to LoadBalancerSourceRanges" ; }
add set ip kube-proxy firewall-allow { type ipv4_addr . inet_proto . inet_service . ipv4_addr ; flags interval ; comment "destinations+sources that are allowed by LoadBalancerSourceRanges" ; }
add chain ip kube-proxy firewall-check
add chain ip kube-proxy firewall-allow-check
add rule ip kube-proxy firewall-allow-check ip daddr . meta l4proto . th dport . ip saddr @firewall-allow return
add rule ip kube-proxy firewall-allow-check drop
add rule ip kube-proxy firewall-check ip daddr . meta l4proto . th dport @firewall jump firewall-allow-check
add rule ip kube-proxy filter-forward ct state new jump firewall-check
add rule ip kube-proxy filter-input ct state new jump firewall-check
add rule ip kube-proxy filter-output ct state new jump firewall-check
add chain ip kube-proxy reject-chain { comment "helper for @no-endpoint-services / @no-endpoint-nodeports" ; }
add rule ip kube-proxy reject-chain reject
add map ip kube-proxy no-endpoint-services { type ipv4_addr . inet_proto . inet_service : verdict ; comment "vmap to drop or reject packets to services with no endpoints" ; }
add map ip kube-proxy no-endpoint-nodeports { type inet_proto . inet_service : verdict ; comment "vmap to drop or reject packets to service nodeports with no endpoints" ; }
add chain ip kube-proxy endpoints-check
add rule ip kube-proxy endpoints-check ip daddr . meta l4proto . th dport vmap @no-endpoint-services
add rule ip kube-proxy endpoints-check fib daddr type local ip daddr != 127.0.0.0/8 meta l4proto . th dport vmap @no-endpoint-nodeports
# svc1
add chain ip kube-proxy service-ULMVA6XW-ns1/svc1/tcp/p80
@ -558,11 +565,12 @@ func TestOverallNFTablesRules(t *testing.T) {
add rule ip kube-proxy services ip daddr 172.30.0.42 tcp dport 80 goto service-42NFTM6N-ns2/svc2/tcp/p80
add rule ip kube-proxy services ip daddr 192.168.99.22 tcp dport 80 goto external-42NFTM6N-ns2/svc2/tcp/p80
add rule ip kube-proxy services ip daddr 1.2.3.4 tcp dport 80 goto external-42NFTM6N-ns2/svc2/tcp/p80
add rule ip kube-proxy external-services ip daddr 192.168.99.22 tcp dport 80 drop comment "ns2/svc2:p80 has no local endpoints"
add rule ip kube-proxy external-services ip daddr 1.2.3.4 tcp dport 80 drop comment "ns2/svc2:p80 has no local endpoints"
add rule ip kube-proxy external-services fib daddr type local tcp dport 3001 drop comment "ns2/svc2:p80 has no local endpoints"
add rule ip kube-proxy nodeports tcp dport 3001 goto external-42NFTM6N-ns2/svc2/tcp/p80
add element ip kube-proxy no-endpoint-nodeports { tcp . 3001 comment "ns2/svc2:p80" : drop }
add element ip kube-proxy no-endpoint-services { 1.2.3.4 . tcp . 80 comment "ns2/svc2:p80" : drop }
add element ip kube-proxy no-endpoint-services { 192.168.99.22 . tcp . 80 comment "ns2/svc2:p80" : drop }
# svc3
add chain ip kube-proxy service-4AT6LBPK-ns3/svc3/tcp/p80
add rule ip kube-proxy service-4AT6LBPK-ns3/svc3/tcp/p80 ip daddr 172.30.0.43 tcp dport 80 ip saddr != 10.0.0.0/8 jump mark-for-masquerade
@ -613,7 +621,7 @@ func TestOverallNFTablesRules(t *testing.T) {
add element ip kube-proxy firewall-allow { 5.6.7.8 . tcp . 80 . 203.0.113.0/25 comment "ns5/svc5:p80" }
# svc6
add rule ip kube-proxy services-filter ip daddr 172.30.0.46 tcp dport 80 reject comment "ns6/svc6:p80 has no endpoints"
add element ip kube-proxy no-endpoint-services { 172.30.0.46 . tcp . 80 comment "ns6/svc6:p80" : goto reject-chain }
add rule ip kube-proxy services fib daddr type local ip daddr != 127.0.0.0/8 jump nodeports comment "kubernetes service nodeports; NOTE: this must be the last rule in this chain"
`)
@ -4246,7 +4254,7 @@ func TestSyncProxyRulesRepeated(t *testing.T) {
baseRules := dedent.Dedent(`
add table ip kube-proxy { comment "rules for kube-proxy" ; }
add chain ip kube-proxy external-services
add chain ip kube-proxy endpoints-check
add chain ip kube-proxy filter-forward { type filter hook forward priority -101 ; }
add chain ip kube-proxy filter-input { type filter hook input priority -101 ; }
add chain ip kube-proxy filter-output { type filter hook output priority -101 ; }
@ -4259,16 +4267,17 @@ func TestSyncProxyRulesRepeated(t *testing.T) {
add chain ip kube-proxy nat-postrouting { type nat hook postrouting priority 100 ; }
add chain ip kube-proxy nat-prerouting { type nat hook prerouting priority -100 ; }
add chain ip kube-proxy nodeports
add chain ip kube-proxy reject-chain { comment "helper for @no-endpoint-services / @no-endpoint-nodeports" ; }
add chain ip kube-proxy services
add chain ip kube-proxy services-filter
add rule ip kube-proxy filter-forward ct state new jump external-services
add rule ip kube-proxy filter-forward ct state new jump services-filter
add rule ip kube-proxy endpoints-check ip daddr . meta l4proto . th dport vmap @no-endpoint-services
add rule ip kube-proxy endpoints-check fib daddr type local ip daddr != 127.0.0.0/8 meta l4proto . th dport vmap @no-endpoint-nodeports
add rule ip kube-proxy filter-forward ct state new jump endpoints-check
add rule ip kube-proxy filter-forward jump forward
add rule ip kube-proxy filter-forward ct state new jump firewall-check
add rule ip kube-proxy filter-input ct state new jump external-services
add rule ip kube-proxy filter-input ct state new jump endpoints-check
add rule ip kube-proxy filter-input ct state new jump firewall-check
add rule ip kube-proxy filter-output ct state new jump services-filter
add rule ip kube-proxy filter-output ct state new jump endpoints-check
add rule ip kube-proxy filter-output ct state new jump firewall-check
add rule ip kube-proxy firewall-allow-check ip daddr . meta l4proto . th dport . ip saddr @firewall-allow return
add rule ip kube-proxy firewall-allow-check drop
@ -4281,9 +4290,12 @@ func TestSyncProxyRulesRepeated(t *testing.T) {
add rule ip kube-proxy nat-output jump services
add rule ip kube-proxy nat-postrouting jump masquerading
add rule ip kube-proxy nat-prerouting jump services
add rule ip kube-proxy reject-chain reject
add set ip kube-proxy firewall { type ipv4_addr . inet_proto . inet_service ; comment "destinations that are subject to LoadBalancerSourceRanges" ; }
add set ip kube-proxy firewall-allow { type ipv4_addr . inet_proto . inet_service . ipv4_addr ; flags interval ; comment "destinations+sources that are allowed by LoadBalancerSourceRanges" ; }
add map ip kube-proxy no-endpoint-nodeports { type inet_proto . inet_service : verdict ; comment "vmap to drop or reject packets to service nodeports with no endpoints" ; }
add map ip kube-proxy no-endpoint-services { type ipv4_addr . inet_proto . inet_service : verdict ; comment "vmap to drop or reject packets to services with no endpoints" ; }
`)
// Helper function to make it look like time has passed (from the point of view of
@ -4508,7 +4520,7 @@ func TestSyncProxyRulesRepeated(t *testing.T) {
add rule ip kube-proxy endpoint-2OCDJSZQ-ns3/svc3/tcp/p80__10.0.3.1/80 ip saddr 10.0.3.1 jump mark-for-masquerade
add rule ip kube-proxy endpoint-2OCDJSZQ-ns3/svc3/tcp/p80__10.0.3.1/80 meta l4proto tcp dnat to 10.0.3.1:80
add rule ip kube-proxy services-filter ip daddr 172.30.0.44 tcp dport 80 reject comment "ns4/svc4:p80 has no endpoints"
add element ip kube-proxy no-endpoint-services { 172.30.0.44 . tcp . 80 comment "ns4/svc4:p80" : goto reject-chain }
`)
assertNFTablesTransactionEqual(t, getLine(), expected, nft.Dump())