proxy/iptables: abstract the "endpointChainsNumberThreshold" a bit

Turn this into a generic "large cluster mode" that determines whether
we optimize for performance or debuggability.
This commit is contained in:
Dan Winship 2022-06-01 14:21:58 -04:00
parent c12da17838
commit 1cd461bd24
2 changed files with 23 additions and 19 deletions

View File

@ -80,6 +80,11 @@ const (
// kube proxy canary chain is used for monitoring rule reload // kube proxy canary chain is used for monitoring rule reload
kubeProxyCanaryChain utiliptables.Chain = "KUBE-PROXY-CANARY" kubeProxyCanaryChain utiliptables.Chain = "KUBE-PROXY-CANARY"
// largeClusterEndpointsThreshold is the number of endpoints at which
// we switch into "large cluster mode" and optimize for iptables
// performance over iptables debuggability
largeClusterEndpointsThreshold = 1000
) )
// KernelCompatTester tests whether the required kernel capabilities are // KernelCompatTester tests whether the required kernel capabilities are
@ -219,11 +224,10 @@ type Proxier struct {
natChains utilproxy.LineBuffer natChains utilproxy.LineBuffer
natRules utilproxy.LineBuffer natRules utilproxy.LineBuffer
// endpointChainsNumber is the total amount of endpointChains across all // largeClusterMode is set at the beginning of syncProxyRules if we are
// services that we will generate (it is computed at the beginning of // going to end up outputting "lots" of iptables rules and so we need to
// syncProxyRules method). If that is large enough, comments in some // optimize for performance over debuggability.
// iptable rules are dropped to improve performance. largeClusterMode bool
endpointChainsNumber int
// Values are as a parameter to select the interfaces where nodeport works. // Values are as a parameter to select the interfaces where nodeport works.
nodePortAddresses []string nodePortAddresses []string
@ -787,14 +791,12 @@ func (proxier *Proxier) deleteEndpointConnections(connectionMap []proxy.ServiceE
} }
} }
const endpointChainsNumberThreshold = 1000
// Assumes proxier.mu is held. // Assumes proxier.mu is held.
func (proxier *Proxier) appendServiceCommentLocked(args []string, svcName string) []string { func (proxier *Proxier) appendServiceCommentLocked(args []string, svcName string) []string {
// Not printing these comments, can reduce size of iptables (in case of large // Not printing these comments, can reduce size of iptables (in case of large
// number of endpoints) even by 40%+. So if total number of endpoint chains // number of endpoints) even by 40%+. So if total number of endpoint chains
// is large enough, we simply drop those comments. // is large enough, we simply drop those comments.
if proxier.endpointChainsNumber > endpointChainsNumberThreshold { if proxier.largeClusterMode {
return args return args
} }
return append(args, "-m", "comment", "--comment", svcName) return append(args, "-m", "comment", "--comment", svcName)
@ -956,11 +958,13 @@ func (proxier *Proxier) syncProxyRules() {
// is just for efficiency, not correctness. // is just for efficiency, not correctness.
args := make([]string, 64) args := make([]string, 64)
// Compute total number of endpoint chains across all services. // Compute total number of endpoint chains across all services to get
proxier.endpointChainsNumber = 0 // a sense of how big the cluster is.
totalEndpoints := 0
for svcName := range proxier.serviceMap { for svcName := range proxier.serviceMap {
proxier.endpointChainsNumber += len(proxier.endpointsMap[svcName]) totalEndpoints += len(proxier.endpointsMap[svcName])
} }
proxier.largeClusterMode = (totalEndpoints > largeClusterEndpointsThreshold)
nodeAddresses, err := utilproxy.GetNodeAddresses(proxier.nodePortAddresses, proxier.networkInterfacer) nodeAddresses, err := utilproxy.GetNodeAddresses(proxier.nodePortAddresses, proxier.networkInterfacer)
if err != nil { if err != nil {
@ -1422,7 +1426,7 @@ func (proxier *Proxier) syncProxyRules() {
klog.V(2).InfoS("Reloading service iptables data", klog.V(2).InfoS("Reloading service iptables data",
"numServices", len(proxier.serviceMap), "numServices", len(proxier.serviceMap),
"numEndpoints", proxier.endpointChainsNumber, "numEndpoints", totalEndpoints,
"numFilterChains", proxier.filterChains.Lines(), "numFilterChains", proxier.filterChains.Lines(),
"numFilterRules", proxier.filterRules.Lines(), "numFilterRules", proxier.filterRules.Lines(),
"numNATChains", proxier.natChains.Lines(), "numNATChains", proxier.natChains.Lines(),

View File

@ -7434,7 +7434,7 @@ func countEndpointsAndComments(iptablesData string, matchEndpoint string) (strin
return matched, numEndpoints, numComments return matched, numEndpoints, numComments
} }
func TestEndpointCommentElision(t *testing.T) { func TestSyncProxyRulesLargeClusterMode(t *testing.T) {
ipt := iptablestest.NewFake() ipt := iptablestest.NewFake()
fp := NewFakeProxier(ipt) fp := NewFakeProxier(ipt)
fp.masqueradeAll = true fp.masqueradeAll = true
@ -7473,7 +7473,7 @@ func TestEndpointCommentElision(t *testing.T) {
populateEndpointSlices(fp, populateEndpointSlices(fp,
makeTestEndpointSlice("ns1", "svc1", 1, func(eps *discovery.EndpointSlice) { makeTestEndpointSlice("ns1", "svc1", 1, func(eps *discovery.EndpointSlice) {
eps.AddressType = discovery.AddressTypeIPv4 eps.AddressType = discovery.AddressTypeIPv4
eps.Endpoints = make([]discovery.Endpoint, endpointChainsNumberThreshold/2-1) eps.Endpoints = make([]discovery.Endpoint, largeClusterEndpointsThreshold/2-1)
for i := range eps.Endpoints { for i := range eps.Endpoints {
eps.Endpoints[i].Addresses = []string{fmt.Sprintf("10.0.%d.%d", i%256, i/256)} eps.Endpoints[i].Addresses = []string{fmt.Sprintf("10.0.%d.%d", i%256, i/256)}
} }
@ -7485,7 +7485,7 @@ func TestEndpointCommentElision(t *testing.T) {
}), }),
makeTestEndpointSlice("ns2", "svc2", 1, func(eps *discovery.EndpointSlice) { makeTestEndpointSlice("ns2", "svc2", 1, func(eps *discovery.EndpointSlice) {
eps.AddressType = discovery.AddressTypeIPv4 eps.AddressType = discovery.AddressTypeIPv4
eps.Endpoints = make([]discovery.Endpoint, endpointChainsNumberThreshold/2-1) eps.Endpoints = make([]discovery.Endpoint, largeClusterEndpointsThreshold/2-1)
for i := range eps.Endpoints { for i := range eps.Endpoints {
eps.Endpoints[i].Addresses = []string{fmt.Sprintf("10.1.%d.%d", i%256, i/256)} eps.Endpoints[i].Addresses = []string{fmt.Sprintf("10.1.%d.%d", i%256, i/256)}
} }
@ -7498,15 +7498,15 @@ func TestEndpointCommentElision(t *testing.T) {
) )
fp.syncProxyRules() fp.syncProxyRules()
expectedEndpoints := 2 * (largeClusterEndpointsThreshold/2 - 1)
expectedEndpoints := 2 * (endpointChainsNumberThreshold/2 - 1)
firstEndpoint, numEndpoints, numComments := countEndpointsAndComments(fp.iptablesData.String(), "10.0.0.0") firstEndpoint, numEndpoints, numComments := countEndpointsAndComments(fp.iptablesData.String(), "10.0.0.0")
assert.Equal(t, "-A KUBE-SEP-DKGQUZGBKLTPAR56 -m comment --comment ns1/svc1:p80 -m tcp -p tcp -j DNAT --to-destination 10.0.0.0:80", firstEndpoint) assert.Equal(t, "-A KUBE-SEP-DKGQUZGBKLTPAR56 -m comment --comment ns1/svc1:p80 -m tcp -p tcp -j DNAT --to-destination 10.0.0.0:80", firstEndpoint)
if numEndpoints != expectedEndpoints { if numEndpoints != expectedEndpoints {
t.Errorf("Found wrong number of endpoints: expected %d, got %d", expectedEndpoints, numEndpoints) t.Errorf("Found wrong number of endpoints: expected %d, got %d", expectedEndpoints, numEndpoints)
} }
if numComments != numEndpoints { if numComments != numEndpoints {
t.Errorf("numComments (%d) != numEndpoints (%d) when numEndpoints < threshold (%d)", numComments, numEndpoints, endpointChainsNumberThreshold) t.Errorf("numComments (%d) != numEndpoints (%d) when numEndpoints < threshold (%d)", numComments, numEndpoints, largeClusterEndpointsThreshold)
} }
fp.OnEndpointSliceAdd(makeTestEndpointSlice("ns3", "svc3", 1, func(eps *discovery.EndpointSlice) { fp.OnEndpointSliceAdd(makeTestEndpointSlice("ns3", "svc3", 1, func(eps *discovery.EndpointSlice) {
@ -7525,15 +7525,15 @@ func TestEndpointCommentElision(t *testing.T) {
}} }}
})) }))
fp.syncProxyRules() fp.syncProxyRules()
expectedEndpoints += 3 expectedEndpoints += 3
firstEndpoint, numEndpoints, numComments = countEndpointsAndComments(fp.iptablesData.String(), "10.0.0.0") firstEndpoint, numEndpoints, numComments = countEndpointsAndComments(fp.iptablesData.String(), "10.0.0.0")
assert.Equal(t, "-A KUBE-SEP-DKGQUZGBKLTPAR56 -m tcp -p tcp -j DNAT --to-destination 10.0.0.0:80", firstEndpoint) assert.Equal(t, "-A KUBE-SEP-DKGQUZGBKLTPAR56 -m tcp -p tcp -j DNAT --to-destination 10.0.0.0:80", firstEndpoint)
if numEndpoints != expectedEndpoints { if numEndpoints != expectedEndpoints {
t.Errorf("Found wrong number of endpoints: expected %d, got %d", expectedEndpoints, numEndpoints) t.Errorf("Found wrong number of endpoints: expected %d, got %d", expectedEndpoints, numEndpoints)
} }
if numComments != 0 { if numComments != 0 {
t.Errorf("numComments (%d) != 0 when numEndpoints (%d) > threshold (%d)", numComments, numEndpoints, endpointChainsNumberThreshold) t.Errorf("numComments (%d) != 0 when numEndpoints (%d) > threshold (%d)", numComments, numEndpoints, largeClusterEndpointsThreshold)
} }
} }