From 477b14b3c4545d121d68a98e41b1256760f92c82 Mon Sep 17 00:00:00 2001 From: Surya Seetharaman Date: Thu, 1 Oct 2020 14:22:39 +0200 Subject: [PATCH] Kube-proxy: Perf-fix: Shrink INPUT chain In #56164, we had split the reject rules for non-ep existing services into KUBE-EXTERNAL-SERVICES chain in order to avoid calling KUBE-SERVICES from INPUT. However in #74394 KUBE-SERVICES was re-added into INPUT. As noted in #56164, kernel is sensitive to the size of INPUT chain. This patch refrains from calling the KUBE-SERVICES chain from INPUT and FORWARD, instead adds the lb reject rule to the KUBE-EXTERNAL-SERVICES chain which will be called from INPUT and FORWARD. --- pkg/proxy/iptables/proxier.go | 9 ++++-- pkg/proxy/iptables/proxier_test.go | 49 ++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 3 deletions(-) diff --git a/pkg/proxy/iptables/proxier.go b/pkg/proxy/iptables/proxier.go index 5e618e12918..05d9036f614 100644 --- a/pkg/proxy/iptables/proxier.go +++ b/pkg/proxy/iptables/proxier.go @@ -390,9 +390,9 @@ type iptablesJumpChain struct { var iptablesJumpChains = []iptablesJumpChain{ {utiliptables.TableFilter, kubeExternalServicesChain, utiliptables.ChainInput, "kubernetes externally-visible service portals", []string{"-m", "conntrack", "--ctstate", "NEW"}}, + {utiliptables.TableFilter, kubeExternalServicesChain, utiliptables.ChainForward, "kubernetes externally-visible service portals", []string{"-m", "conntrack", "--ctstate", "NEW"}}, {utiliptables.TableFilter, kubeServicesChain, utiliptables.ChainForward, "kubernetes service portals", []string{"-m", "conntrack", "--ctstate", "NEW"}}, {utiliptables.TableFilter, kubeServicesChain, utiliptables.ChainOutput, "kubernetes service portals", []string{"-m", "conntrack", "--ctstate", "NEW"}}, - {utiliptables.TableFilter, kubeServicesChain, utiliptables.ChainInput, "kubernetes service portals", []string{"-m", "conntrack", "--ctstate", "NEW"}}, {utiliptables.TableFilter, kubeForwardChain, utiliptables.ChainForward, "kubernetes forwarding rules", nil}, {utiliptables.TableNAT, kubeServicesChain, utiliptables.ChainOutput, "kubernetes service portals", nil}, {utiliptables.TableNAT, kubeServicesChain, utiliptables.ChainPrerouting, "kubernetes service portals", nil}, @@ -406,7 +406,10 @@ var iptablesEnsureChains = []struct { {utiliptables.TableNAT, KubeMarkDropChain}, } -var iptablesCleanupOnlyChains = []iptablesJumpChain{} +var iptablesCleanupOnlyChains = []iptablesJumpChain{ + // Present in kube 1.13 - 1.19. Removed by #95252 in favor of adding reject rules for incoming/forwarding packets to kubeExternalServicesChain + {utiliptables.TableFilter, kubeServicesChain, utiliptables.ChainInput, "kubernetes service portals", []string{"-m", "conntrack", "--ctstate", "NEW"}}, +} // CleanupLeftovers removes all iptables rules and chains created by the Proxier // It returns true if an error was encountered. Errors are logged. @@ -1219,7 +1222,7 @@ func (proxier *Proxier) syncProxyRules() { } else { // No endpoints. writeLine(proxier.filterRules, - "-A", string(kubeServicesChain), + "-A", string(kubeExternalServicesChain), "-m", "comment", "--comment", fmt.Sprintf(`"%s has no endpoints"`, svcNameString), "-m", protocol, "-p", protocol, "-d", utilproxy.ToCIDR(net.ParseIP(ingress)), diff --git a/pkg/proxy/iptables/proxier_test.go b/pkg/proxy/iptables/proxier_test.go index 0b63d022d32..8877549ff9f 100644 --- a/pkg/proxy/iptables/proxier_test.go +++ b/pkg/proxy/iptables/proxier_test.go @@ -941,6 +941,55 @@ func TestNodePortReject(t *testing.T) { } } +func TestLoadBalancerReject(t *testing.T) { + ipt := iptablestest.NewFake() + fp := NewFakeProxier(ipt, false) + svcIP := "10.20.30.41" + svcPort := 80 + svcNodePort := 3001 + svcLBIP := "1.2.3.4" + svcPortName := proxy.ServicePortName{ + NamespacedName: makeNSN("ns1", "svc1"), + Port: "p80", + Protocol: v1.ProtocolTCP, + } + svcSessionAffinityTimeout := int32(10800) + makeServiceMap(fp, + makeTestService(svcPortName.Namespace, svcPortName.Name, func(svc *v1.Service) { + svc.Spec.Type = "LoadBalancer" + svc.Spec.ClusterIP = svcIP + svc.Spec.Ports = []v1.ServicePort{{ + Name: svcPortName.Port, + Port: int32(svcPort), + Protocol: v1.ProtocolTCP, + NodePort: int32(svcNodePort), + }} + svc.Status.LoadBalancer.Ingress = []v1.LoadBalancerIngress{{ + IP: svcLBIP, + }} + svc.Spec.ExternalTrafficPolicy = v1.ServiceExternalTrafficPolicyTypeLocal + svc.Spec.SessionAffinity = v1.ServiceAffinityClientIP + svc.Spec.SessionAffinityConfig = &v1.SessionAffinityConfig{ + ClientIP: &v1.ClientIPConfig{TimeoutSeconds: &svcSessionAffinityTimeout}, + } + }), + ) + makeEndpointsMap(fp) + + fp.syncProxyRules() + + kubeSvcExtRules := ipt.GetRules(string(kubeExternalServicesChain)) + if !hasJump(kubeSvcExtRules, iptablestest.Reject, svcLBIP, svcPort) { + errorf(fmt.Sprintf("Failed to find a %v rule for LoadBalancer %v with no endpoints", iptablestest.Reject, svcPortName), kubeSvcExtRules, t) + } + + kubeSvcRules := ipt.GetRules(string(kubeServicesChain)) + if hasJump(kubeSvcRules, iptablestest.Reject, svcLBIP, svcPort) { + errorf(fmt.Sprintf("Found a %v rule for LoadBalancer %v with no endpoints in kubeServicesChain", iptablestest.Reject, svcPortName), kubeSvcRules, t) + } + +} + func TestOnlyLocalLoadBalancing(t *testing.T) { ipt := iptablestest.NewFake() fp := NewFakeProxier(ipt, false)