proxy/iptables: abstract the "endpointChainsNumberThreshold" a bit

Turn this into a generic "large cluster mode" that determines whether
we optimize for performance or debuggability.
This commit is contained in:
Dan Winship 2022-06-01 14:21:58 -04:00
parent c12da17838
commit 1cd461bd24
2 changed files with 23 additions and 19 deletions

View File

@ -80,6 +80,11 @@ const (
// kube proxy canary chain is used for monitoring rule reload
kubeProxyCanaryChain utiliptables.Chain = "KUBE-PROXY-CANARY"
// largeClusterEndpointsThreshold is the number of endpoints at which
// we switch into "large cluster mode" and optimize for iptables
// performance over iptables debuggability
largeClusterEndpointsThreshold = 1000
)
// KernelCompatTester tests whether the required kernel capabilities are
@ -219,11 +224,10 @@ type Proxier struct {
natChains utilproxy.LineBuffer
natRules utilproxy.LineBuffer
// endpointChainsNumber is the total amount of endpointChains across all
// services that we will generate (it is computed at the beginning of
// syncProxyRules method). If that is large enough, comments in some
// iptable rules are dropped to improve performance.
endpointChainsNumber int
// largeClusterMode is set at the beginning of syncProxyRules if we are
// going to end up outputting "lots" of iptables rules and so we need to
// optimize for performance over debuggability.
largeClusterMode bool
// Values are as a parameter to select the interfaces where nodeport works.
nodePortAddresses []string
@ -787,14 +791,12 @@ func (proxier *Proxier) deleteEndpointConnections(connectionMap []proxy.ServiceE
}
}
const endpointChainsNumberThreshold = 1000
// Assumes proxier.mu is held.
func (proxier *Proxier) appendServiceCommentLocked(args []string, svcName string) []string {
// Not printing these comments, can reduce size of iptables (in case of large
// number of endpoints) even by 40%+. So if total number of endpoint chains
// is large enough, we simply drop those comments.
if proxier.endpointChainsNumber > endpointChainsNumberThreshold {
if proxier.largeClusterMode {
return args
}
return append(args, "-m", "comment", "--comment", svcName)
@ -956,11 +958,13 @@ func (proxier *Proxier) syncProxyRules() {
// is just for efficiency, not correctness.
args := make([]string, 64)
// Compute total number of endpoint chains across all services.
proxier.endpointChainsNumber = 0
// Compute total number of endpoint chains across all services to get
// a sense of how big the cluster is.
totalEndpoints := 0
for svcName := range proxier.serviceMap {
proxier.endpointChainsNumber += len(proxier.endpointsMap[svcName])
totalEndpoints += len(proxier.endpointsMap[svcName])
}
proxier.largeClusterMode = (totalEndpoints > largeClusterEndpointsThreshold)
nodeAddresses, err := utilproxy.GetNodeAddresses(proxier.nodePortAddresses, proxier.networkInterfacer)
if err != nil {
@ -1422,7 +1426,7 @@ func (proxier *Proxier) syncProxyRules() {
klog.V(2).InfoS("Reloading service iptables data",
"numServices", len(proxier.serviceMap),
"numEndpoints", proxier.endpointChainsNumber,
"numEndpoints", totalEndpoints,
"numFilterChains", proxier.filterChains.Lines(),
"numFilterRules", proxier.filterRules.Lines(),
"numNATChains", proxier.natChains.Lines(),

View File

@ -7434,7 +7434,7 @@ func countEndpointsAndComments(iptablesData string, matchEndpoint string) (strin
return matched, numEndpoints, numComments
}
func TestEndpointCommentElision(t *testing.T) {
func TestSyncProxyRulesLargeClusterMode(t *testing.T) {
ipt := iptablestest.NewFake()
fp := NewFakeProxier(ipt)
fp.masqueradeAll = true
@ -7473,7 +7473,7 @@ func TestEndpointCommentElision(t *testing.T) {
populateEndpointSlices(fp,
makeTestEndpointSlice("ns1", "svc1", 1, func(eps *discovery.EndpointSlice) {
eps.AddressType = discovery.AddressTypeIPv4
eps.Endpoints = make([]discovery.Endpoint, endpointChainsNumberThreshold/2-1)
eps.Endpoints = make([]discovery.Endpoint, largeClusterEndpointsThreshold/2-1)
for i := range eps.Endpoints {
eps.Endpoints[i].Addresses = []string{fmt.Sprintf("10.0.%d.%d", i%256, i/256)}
}
@ -7485,7 +7485,7 @@ func TestEndpointCommentElision(t *testing.T) {
}),
makeTestEndpointSlice("ns2", "svc2", 1, func(eps *discovery.EndpointSlice) {
eps.AddressType = discovery.AddressTypeIPv4
eps.Endpoints = make([]discovery.Endpoint, endpointChainsNumberThreshold/2-1)
eps.Endpoints = make([]discovery.Endpoint, largeClusterEndpointsThreshold/2-1)
for i := range eps.Endpoints {
eps.Endpoints[i].Addresses = []string{fmt.Sprintf("10.1.%d.%d", i%256, i/256)}
}
@ -7498,15 +7498,15 @@ func TestEndpointCommentElision(t *testing.T) {
)
fp.syncProxyRules()
expectedEndpoints := 2 * (largeClusterEndpointsThreshold/2 - 1)
expectedEndpoints := 2 * (endpointChainsNumberThreshold/2 - 1)
firstEndpoint, numEndpoints, numComments := countEndpointsAndComments(fp.iptablesData.String(), "10.0.0.0")
assert.Equal(t, "-A KUBE-SEP-DKGQUZGBKLTPAR56 -m comment --comment ns1/svc1:p80 -m tcp -p tcp -j DNAT --to-destination 10.0.0.0:80", firstEndpoint)
if numEndpoints != expectedEndpoints {
t.Errorf("Found wrong number of endpoints: expected %d, got %d", expectedEndpoints, numEndpoints)
}
if numComments != numEndpoints {
t.Errorf("numComments (%d) != numEndpoints (%d) when numEndpoints < threshold (%d)", numComments, numEndpoints, endpointChainsNumberThreshold)
t.Errorf("numComments (%d) != numEndpoints (%d) when numEndpoints < threshold (%d)", numComments, numEndpoints, largeClusterEndpointsThreshold)
}
fp.OnEndpointSliceAdd(makeTestEndpointSlice("ns3", "svc3", 1, func(eps *discovery.EndpointSlice) {
@ -7525,15 +7525,15 @@ func TestEndpointCommentElision(t *testing.T) {
}}
}))
fp.syncProxyRules()
expectedEndpoints += 3
firstEndpoint, numEndpoints, numComments = countEndpointsAndComments(fp.iptablesData.String(), "10.0.0.0")
assert.Equal(t, "-A KUBE-SEP-DKGQUZGBKLTPAR56 -m tcp -p tcp -j DNAT --to-destination 10.0.0.0:80", firstEndpoint)
if numEndpoints != expectedEndpoints {
t.Errorf("Found wrong number of endpoints: expected %d, got %d", expectedEndpoints, numEndpoints)
}
if numComments != 0 {
t.Errorf("numComments (%d) != 0 when numEndpoints (%d) > threshold (%d)", numComments, numEndpoints, endpointChainsNumberThreshold)
t.Errorf("numComments (%d) != 0 when numEndpoints (%d) > threshold (%d)", numComments, numEndpoints, largeClusterEndpointsThreshold)
}
}