diff --git a/pkg/proxy/iptables/proxier.go b/pkg/proxy/iptables/proxier.go index 54593353a92..cd5f0b5eaec 100644 --- a/pkg/proxy/iptables/proxier.go +++ b/pkg/proxy/iptables/proxier.go @@ -80,6 +80,11 @@ const ( // kube proxy canary chain is used for monitoring rule reload kubeProxyCanaryChain utiliptables.Chain = "KUBE-PROXY-CANARY" + + // largeClusterEndpointsThreshold is the number of endpoints at which + // we switch into "large cluster mode" and optimize for iptables + // performance over iptables debuggability + largeClusterEndpointsThreshold = 1000 ) // KernelCompatTester tests whether the required kernel capabilities are @@ -219,11 +224,10 @@ type Proxier struct { natChains utilproxy.LineBuffer natRules utilproxy.LineBuffer - // endpointChainsNumber is the total amount of endpointChains across all - // services that we will generate (it is computed at the beginning of - // syncProxyRules method). If that is large enough, comments in some - // iptable rules are dropped to improve performance. - endpointChainsNumber int + // largeClusterMode is set at the beginning of syncProxyRules if we are + // going to end up outputting "lots" of iptables rules and so we need to + // optimize for performance over debuggability. + largeClusterMode bool // Values are as a parameter to select the interfaces where nodeport works. nodePortAddresses []string @@ -787,14 +791,12 @@ func (proxier *Proxier) deleteEndpointConnections(connectionMap []proxy.ServiceE } } -const endpointChainsNumberThreshold = 1000 - // Assumes proxier.mu is held. func (proxier *Proxier) appendServiceCommentLocked(args []string, svcName string) []string { // Not printing these comments, can reduce size of iptables (in case of large // number of endpoints) even by 40%+. So if total number of endpoint chains // is large enough, we simply drop those comments. - if proxier.endpointChainsNumber > endpointChainsNumberThreshold { + if proxier.largeClusterMode { return args } return append(args, "-m", "comment", "--comment", svcName) @@ -956,11 +958,13 @@ func (proxier *Proxier) syncProxyRules() { // is just for efficiency, not correctness. args := make([]string, 64) - // Compute total number of endpoint chains across all services. - proxier.endpointChainsNumber = 0 + // Compute total number of endpoint chains across all services to get + // a sense of how big the cluster is. + totalEndpoints := 0 for svcName := range proxier.serviceMap { - proxier.endpointChainsNumber += len(proxier.endpointsMap[svcName]) + totalEndpoints += len(proxier.endpointsMap[svcName]) } + proxier.largeClusterMode = (totalEndpoints > largeClusterEndpointsThreshold) nodeAddresses, err := utilproxy.GetNodeAddresses(proxier.nodePortAddresses, proxier.networkInterfacer) if err != nil { @@ -1422,7 +1426,7 @@ func (proxier *Proxier) syncProxyRules() { klog.V(2).InfoS("Reloading service iptables data", "numServices", len(proxier.serviceMap), - "numEndpoints", proxier.endpointChainsNumber, + "numEndpoints", totalEndpoints, "numFilterChains", proxier.filterChains.Lines(), "numFilterRules", proxier.filterRules.Lines(), "numNATChains", proxier.natChains.Lines(), diff --git a/pkg/proxy/iptables/proxier_test.go b/pkg/proxy/iptables/proxier_test.go index 2c98c1edd90..38e8d24910b 100644 --- a/pkg/proxy/iptables/proxier_test.go +++ b/pkg/proxy/iptables/proxier_test.go @@ -7434,7 +7434,7 @@ func countEndpointsAndComments(iptablesData string, matchEndpoint string) (strin return matched, numEndpoints, numComments } -func TestEndpointCommentElision(t *testing.T) { +func TestSyncProxyRulesLargeClusterMode(t *testing.T) { ipt := iptablestest.NewFake() fp := NewFakeProxier(ipt) fp.masqueradeAll = true @@ -7473,7 +7473,7 @@ func TestEndpointCommentElision(t *testing.T) { populateEndpointSlices(fp, makeTestEndpointSlice("ns1", "svc1", 1, func(eps *discovery.EndpointSlice) { eps.AddressType = discovery.AddressTypeIPv4 - eps.Endpoints = make([]discovery.Endpoint, endpointChainsNumberThreshold/2-1) + eps.Endpoints = make([]discovery.Endpoint, largeClusterEndpointsThreshold/2-1) for i := range eps.Endpoints { eps.Endpoints[i].Addresses = []string{fmt.Sprintf("10.0.%d.%d", i%256, i/256)} } @@ -7485,7 +7485,7 @@ func TestEndpointCommentElision(t *testing.T) { }), makeTestEndpointSlice("ns2", "svc2", 1, func(eps *discovery.EndpointSlice) { eps.AddressType = discovery.AddressTypeIPv4 - eps.Endpoints = make([]discovery.Endpoint, endpointChainsNumberThreshold/2-1) + eps.Endpoints = make([]discovery.Endpoint, largeClusterEndpointsThreshold/2-1) for i := range eps.Endpoints { eps.Endpoints[i].Addresses = []string{fmt.Sprintf("10.1.%d.%d", i%256, i/256)} } @@ -7498,15 +7498,15 @@ func TestEndpointCommentElision(t *testing.T) { ) fp.syncProxyRules() + expectedEndpoints := 2 * (largeClusterEndpointsThreshold/2 - 1) - expectedEndpoints := 2 * (endpointChainsNumberThreshold/2 - 1) firstEndpoint, numEndpoints, numComments := countEndpointsAndComments(fp.iptablesData.String(), "10.0.0.0") assert.Equal(t, "-A KUBE-SEP-DKGQUZGBKLTPAR56 -m comment --comment ns1/svc1:p80 -m tcp -p tcp -j DNAT --to-destination 10.0.0.0:80", firstEndpoint) if numEndpoints != expectedEndpoints { t.Errorf("Found wrong number of endpoints: expected %d, got %d", expectedEndpoints, numEndpoints) } if numComments != numEndpoints { - t.Errorf("numComments (%d) != numEndpoints (%d) when numEndpoints < threshold (%d)", numComments, numEndpoints, endpointChainsNumberThreshold) + t.Errorf("numComments (%d) != numEndpoints (%d) when numEndpoints < threshold (%d)", numComments, numEndpoints, largeClusterEndpointsThreshold) } fp.OnEndpointSliceAdd(makeTestEndpointSlice("ns3", "svc3", 1, func(eps *discovery.EndpointSlice) { @@ -7525,15 +7525,15 @@ func TestEndpointCommentElision(t *testing.T) { }} })) fp.syncProxyRules() - expectedEndpoints += 3 + firstEndpoint, numEndpoints, numComments = countEndpointsAndComments(fp.iptablesData.String(), "10.0.0.0") assert.Equal(t, "-A KUBE-SEP-DKGQUZGBKLTPAR56 -m tcp -p tcp -j DNAT --to-destination 10.0.0.0:80", firstEndpoint) if numEndpoints != expectedEndpoints { t.Errorf("Found wrong number of endpoints: expected %d, got %d", expectedEndpoints, numEndpoints) } if numComments != 0 { - t.Errorf("numComments (%d) != 0 when numEndpoints (%d) > threshold (%d)", numComments, numEndpoints, endpointChainsNumberThreshold) + t.Errorf("numComments (%d) != 0 when numEndpoints (%d) > threshold (%d)", numComments, numEndpoints, largeClusterEndpointsThreshold) } }