mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 19:56:01 +00:00
Merge pull request #57461 from danwinship/proxier-no-dummy-nat-rules
Automatic merge from submit-queue (batch tested with PRs 55637, 57461, 60268, 60290, 60210). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Don't create no-op iptables rules for services with no endpoints Currently for all services we create `-t nat -A KUBE-SERVICES` rules that match the destination IPs (ClusterIP, ExternalIP, NodePort IPs, etc) and then jump to the appropriate `KUBE-SVC-XXXXXX` chain. But if the service has no endpoints then the `KUBE-SVC-XXXXXX` chain will be empty and so nothing happens except that we wasted time (a) forcing iptables-restore to parse the match rules, and (b) forcing the kernel to test matches that aren't going to have any effect. This PR gets rid of the match rules in this case. Which is to say, it changes things so that every incoming service packet is matched *either* by nat rules to rewrite it *or* by filter rules to ICMP reject it, but not both. (Actually, that's not quite true: there are no filter rules to reject Ingress-addressed packets, and I *think* that's a bug?) I also got rid of some comments that seemed redundant. The patch is mostly reindentation, so best viewed with `diff -w`. Partial fix for #56842 / Related to #56164 (which it conflicts with but I'll fix that after one or the other merges). **Release note**: ```release-note Removed some redundant rules created by the iptables proxier, to improve performance on systems with very many services. ```
This commit is contained in:
commit
e6c2a5de10
@ -832,7 +832,6 @@ func (proxier *Proxier) syncProxyRules() {
|
||||
args := make([]string, 64)
|
||||
|
||||
// Build rules for each service.
|
||||
var svcNameString string
|
||||
for svcName, svc := range proxier.serviceMap {
|
||||
svcInfo, ok := svc.(*serviceInfo)
|
||||
if !ok {
|
||||
@ -841,16 +840,19 @@ func (proxier *Proxier) syncProxyRules() {
|
||||
}
|
||||
isIPv6 := utilproxy.IsIPv6(svcInfo.clusterIP)
|
||||
protocol := strings.ToLower(string(svcInfo.protocol))
|
||||
svcNameString = svcInfo.serviceNameString
|
||||
svcNameString := svcInfo.serviceNameString
|
||||
hasEndpoints := len(proxier.endpointsMap[svcName]) > 0
|
||||
|
||||
// Create the per-service chain, retaining counters if possible.
|
||||
svcChain := svcInfo.servicePortChainName
|
||||
if hasEndpoints {
|
||||
// Create the per-service chain, retaining counters if possible.
|
||||
if chain, ok := existingNATChains[svcChain]; ok {
|
||||
writeLine(proxier.natChains, chain)
|
||||
} else {
|
||||
writeLine(proxier.natChains, utiliptables.MakeChainLine(svcChain))
|
||||
}
|
||||
activeNATChains[svcChain] = true
|
||||
}
|
||||
|
||||
svcXlbChain := svcInfo.serviceLBChainName
|
||||
if svcInfo.onlyNodeLocalEndpoints {
|
||||
@ -862,12 +864,10 @@ func (proxier *Proxier) syncProxyRules() {
|
||||
writeLine(proxier.natChains, utiliptables.MakeChainLine(svcXlbChain))
|
||||
}
|
||||
activeNATChains[svcXlbChain] = true
|
||||
} else if activeNATChains[svcXlbChain] {
|
||||
// Cleanup the previously created XLB chain for this service
|
||||
delete(activeNATChains, svcXlbChain)
|
||||
}
|
||||
|
||||
// Capture the clusterIP.
|
||||
if hasEndpoints {
|
||||
args = append(args[:0],
|
||||
"-A", string(kubeServicesChain),
|
||||
"-m", "comment", "--comment", fmt.Sprintf(`"%s cluster IP"`, svcNameString),
|
||||
@ -886,6 +886,16 @@ func (proxier *Proxier) syncProxyRules() {
|
||||
writeLine(proxier.natRules, append(args, "! -s", proxier.clusterCIDR, "-j", string(KubeMarkMasqChain))...)
|
||||
}
|
||||
writeLine(proxier.natRules, append(args, "-j", string(svcChain))...)
|
||||
} else {
|
||||
writeLine(proxier.filterRules,
|
||||
"-A", string(kubeServicesChain),
|
||||
"-m", "comment", "--comment", fmt.Sprintf(`"%s has no endpoints"`, svcNameString),
|
||||
"-m", protocol, "-p", protocol,
|
||||
"-d", utilproxy.ToCIDR(svcInfo.clusterIP),
|
||||
"--dport", strconv.Itoa(svcInfo.port),
|
||||
"-j", "REJECT",
|
||||
)
|
||||
}
|
||||
|
||||
// Capture externalIPs.
|
||||
for _, externalIP := range svcInfo.externalIPs {
|
||||
@ -921,7 +931,9 @@ func (proxier *Proxier) syncProxyRules() {
|
||||
}
|
||||
replacementPortsMap[lp] = socket
|
||||
}
|
||||
} // We're holding the port, so it's OK to install iptables rules.
|
||||
}
|
||||
|
||||
if hasEndpoints {
|
||||
args = append(args[:0],
|
||||
"-A", string(kubeServicesChain),
|
||||
"-m", "comment", "--comment", fmt.Sprintf(`"%s external IP"`, svcNameString),
|
||||
@ -944,10 +956,7 @@ func (proxier *Proxier) syncProxyRules() {
|
||||
// Allow traffic bound for external IPs that happen to be recognized as local IPs to stay local.
|
||||
// This covers cases like GCE load-balancers which get added to the local routing table.
|
||||
writeLine(proxier.natRules, append(dstLocalOnlyArgs, "-j", string(svcChain))...)
|
||||
|
||||
// If the service has no endpoints then reject packets coming via externalIP
|
||||
// Install ICMP Reject rule in filter table for destination=externalIP and dport=svcport
|
||||
if len(proxier.endpointsMap[svcName]) == 0 {
|
||||
} else {
|
||||
writeLine(proxier.filterRules,
|
||||
"-A", string(kubeExternalServicesChain),
|
||||
"-m", "comment", "--comment", fmt.Sprintf(`"%s has no endpoints"`, svcNameString),
|
||||
@ -960,6 +969,7 @@ func (proxier *Proxier) syncProxyRules() {
|
||||
}
|
||||
|
||||
// Capture load-balancer ingress.
|
||||
if hasEndpoints {
|
||||
fwChain := svcInfo.serviceFirewallChainName
|
||||
for _, ingress := range svcInfo.loadBalancerStatus.Ingress {
|
||||
if ingress.IP != "" {
|
||||
@ -1025,6 +1035,8 @@ func (proxier *Proxier) syncProxyRules() {
|
||||
writeLine(proxier.natRules, append(args, "-j", string(KubeMarkDropChain))...)
|
||||
}
|
||||
}
|
||||
}
|
||||
// FIXME: do we need REJECT rules for load-balancer ingress if !hasEndpoints?
|
||||
|
||||
// Capture nodeports. If we had more than 2 rules it might be
|
||||
// worthwhile to make a new per-service chain for nodeport rules, but
|
||||
@ -1058,8 +1070,9 @@ func (proxier *Proxier) syncProxyRules() {
|
||||
}
|
||||
}
|
||||
replacementPortsMap[lp] = socket
|
||||
} // We're holding the port, so it's OK to install iptables rules.
|
||||
}
|
||||
|
||||
if hasEndpoints {
|
||||
args = append(args[:0],
|
||||
"-A", string(kubeNodePortsChain),
|
||||
"-m", "comment", "--comment", svcNameString,
|
||||
@ -1083,12 +1096,7 @@ func (proxier *Proxier) syncProxyRules() {
|
||||
writeLine(proxier.natRules, append(args, "-s", loopback, "-j", string(KubeMarkMasqChain))...)
|
||||
writeLine(proxier.natRules, append(args, "-j", string(svcXlbChain))...)
|
||||
}
|
||||
|
||||
// If the service has no endpoints then reject packets. The filter
|
||||
// table doesn't currently have the same per-service structure that
|
||||
// the nat table does, so we just stick this into the kube-services
|
||||
// chain.
|
||||
if len(proxier.endpointsMap[svcName]) == 0 {
|
||||
} else {
|
||||
writeLine(proxier.filterRules,
|
||||
"-A", string(kubeExternalServicesChain),
|
||||
"-m", "comment", "--comment", fmt.Sprintf(`"%s has no endpoints"`, svcNameString),
|
||||
@ -1100,21 +1108,10 @@ func (proxier *Proxier) syncProxyRules() {
|
||||
}
|
||||
}
|
||||
|
||||
// If the service has no endpoints then reject packets.
|
||||
if len(proxier.endpointsMap[svcName]) == 0 {
|
||||
writeLine(proxier.filterRules,
|
||||
"-A", string(kubeServicesChain),
|
||||
"-m", "comment", "--comment", fmt.Sprintf(`"%s has no endpoints"`, svcNameString),
|
||||
"-m", protocol, "-p", protocol,
|
||||
"-d", utilproxy.ToCIDR(svcInfo.clusterIP),
|
||||
"--dport", strconv.Itoa(svcInfo.port),
|
||||
"-j", "REJECT",
|
||||
)
|
||||
if !hasEndpoints {
|
||||
continue
|
||||
}
|
||||
|
||||
// From here on, we assume there are active endpoints.
|
||||
|
||||
// Generate the per-endpoint chains. We do this in multiple passes so we
|
||||
// can group rules together.
|
||||
// These two slices parallel each other - keep in sync
|
||||
|
Loading…
Reference in New Issue
Block a user