From c12da17838a91917d379f4cc52079c417241530a Mon Sep 17 00:00:00 2001 From: Dan Winship Date: Fri, 24 Jun 2022 09:07:56 -0400 Subject: [PATCH 1/3] proxy/iptables: Add a unit test with multiple resyncs --- pkg/proxy/iptables/proxier_test.go | 488 +++++++++++++++++++++++++++++ 1 file changed, 488 insertions(+) diff --git a/pkg/proxy/iptables/proxier_test.go b/pkg/proxy/iptables/proxier_test.go index 9b062a7120a..2c98c1edd90 100644 --- a/pkg/proxy/iptables/proxier_test.go +++ b/pkg/proxy/iptables/proxier_test.go @@ -7537,6 +7537,494 @@ func TestEndpointCommentElision(t *testing.T) { } } +// Test calling syncProxyRules() multiple times with various changes +func TestSyncProxyRulesRepeated(t *testing.T) { + ipt := iptablestest.NewFake() + fp := NewFakeProxier(ipt) + + // Create initial state + var svc2 *v1.Service + + makeServiceMap(fp, + makeTestService("ns1", "svc1", func(svc *v1.Service) { + svc.Spec.Type = v1.ServiceTypeClusterIP + svc.Spec.ClusterIP = "172.30.0.41" + svc.Spec.Ports = []v1.ServicePort{{ + Name: "p80", + Port: 80, + Protocol: v1.ProtocolTCP, + }} + }), + makeTestService("ns2", "svc2", func(svc *v1.Service) { + svc2 = svc + svc.Spec.Type = v1.ServiceTypeClusterIP + svc.Spec.ClusterIP = "172.30.0.42" + svc.Spec.Ports = []v1.ServicePort{{ + Name: "p8080", + Port: 8080, + Protocol: v1.ProtocolTCP, + }} + }), + ) + + tcpProtocol := v1.ProtocolTCP + populateEndpointSlices(fp, + makeTestEndpointSlice("ns1", "svc1", 1, func(eps *discovery.EndpointSlice) { + eps.AddressType = discovery.AddressTypeIPv4 + eps.Endpoints = []discovery.Endpoint{{ + Addresses: []string{"10.0.1.1"}, + }} + eps.Ports = []discovery.EndpointPort{{ + Name: utilpointer.StringPtr("p80"), + Port: utilpointer.Int32(80), + Protocol: &tcpProtocol, + }} + }), + makeTestEndpointSlice("ns2", "svc2", 1, func(eps *discovery.EndpointSlice) { + eps.AddressType = discovery.AddressTypeIPv4 + eps.Endpoints = []discovery.Endpoint{{ + Addresses: []string{"10.0.2.1"}, + }} + eps.Ports = []discovery.EndpointPort{{ + Name: utilpointer.StringPtr("p8080"), + Port: utilpointer.Int32(8080), + Protocol: &tcpProtocol, + }} + }), + ) + + fp.syncProxyRules() + + expected := dedent.Dedent(` + *filter + :KUBE-NODEPORTS - [0:0] + :KUBE-SERVICES - [0:0] + :KUBE-EXTERNAL-SERVICES - [0:0] + :KUBE-FORWARD - [0:0] + -A KUBE-FORWARD -m conntrack --ctstate INVALID -j DROP + -A KUBE-FORWARD -m comment --comment "kubernetes forwarding rules" -m mark --mark 0x4000/0x4000 -j ACCEPT + -A KUBE-FORWARD -m comment --comment "kubernetes forwarding conntrack rule" -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT + COMMIT + *nat + :KUBE-NODEPORTS - [0:0] + :KUBE-SERVICES - [0:0] + :KUBE-MARK-MASQ - [0:0] + :KUBE-POSTROUTING - [0:0] + :KUBE-SEP-SNQ3ZNILQDEJNDQO - [0:0] + :KUBE-SEP-UHEGFW77JX3KXTOV - [0:0] + :KUBE-SVC-2VJB64SDSIJUP5T6 - [0:0] + :KUBE-SVC-XPGD46QRK7WJZT7O - [0:0] + -A KUBE-SERVICES -m comment --comment "ns1/svc1:p80 cluster IP" -m tcp -p tcp -d 172.30.0.41 --dport 80 -j KUBE-SVC-XPGD46QRK7WJZT7O + -A KUBE-SERVICES -m comment --comment "ns2/svc2:p8080 cluster IP" -m tcp -p tcp -d 172.30.0.42 --dport 8080 -j KUBE-SVC-2VJB64SDSIJUP5T6 + -A KUBE-SERVICES -m comment --comment "kubernetes service nodeports; NOTE: this must be the last rule in this chain" -m addrtype --dst-type LOCAL -j KUBE-NODEPORTS + -A KUBE-MARK-MASQ -j MARK --or-mark 0x4000 + -A KUBE-POSTROUTING -m mark ! --mark 0x4000/0x4000 -j RETURN + -A KUBE-POSTROUTING -j MARK --xor-mark 0x4000 + -A KUBE-POSTROUTING -m comment --comment "kubernetes service traffic requiring SNAT" -j MASQUERADE + -A KUBE-SEP-SNQ3ZNILQDEJNDQO -m comment --comment ns1/svc1:p80 -s 10.0.1.1 -j KUBE-MARK-MASQ + -A KUBE-SEP-SNQ3ZNILQDEJNDQO -m comment --comment ns1/svc1:p80 -m tcp -p tcp -j DNAT --to-destination 10.0.1.1:80 + -A KUBE-SEP-UHEGFW77JX3KXTOV -m comment --comment ns2/svc2:p8080 -s 10.0.2.1 -j KUBE-MARK-MASQ + -A KUBE-SEP-UHEGFW77JX3KXTOV -m comment --comment ns2/svc2:p8080 -m tcp -p tcp -j DNAT --to-destination 10.0.2.1:8080 + -A KUBE-SVC-2VJB64SDSIJUP5T6 -m comment --comment "ns2/svc2:p8080 cluster IP" -m tcp -p tcp -d 172.30.0.42 --dport 8080 ! -s 10.0.0.0/8 -j KUBE-MARK-MASQ + -A KUBE-SVC-2VJB64SDSIJUP5T6 -m comment --comment "ns2/svc2:p8080 -> 10.0.2.1:8080" -j KUBE-SEP-UHEGFW77JX3KXTOV + -A KUBE-SVC-XPGD46QRK7WJZT7O -m comment --comment "ns1/svc1:p80 cluster IP" -m tcp -p tcp -d 172.30.0.41 --dport 80 ! -s 10.0.0.0/8 -j KUBE-MARK-MASQ + -A KUBE-SVC-XPGD46QRK7WJZT7O -m comment --comment "ns1/svc1:p80 -> 10.0.1.1:80" -j KUBE-SEP-SNQ3ZNILQDEJNDQO + COMMIT + `) + assertIPTablesRulesEqual(t, getLine(), expected, fp.iptablesData.String()) + + // Add a new service and its endpoints + makeServiceMap(fp, + makeTestService("ns3", "svc3", func(svc *v1.Service) { + svc.Spec.Type = v1.ServiceTypeClusterIP + svc.Spec.ClusterIP = "172.30.0.43" + svc.Spec.Ports = []v1.ServicePort{{ + Name: "p80", + Port: 80, + Protocol: v1.ProtocolTCP, + }} + }), + ) + var eps3 *discovery.EndpointSlice + populateEndpointSlices(fp, + makeTestEndpointSlice("ns3", "svc3", 1, func(eps *discovery.EndpointSlice) { + eps3 = eps + eps.AddressType = discovery.AddressTypeIPv4 + eps.Endpoints = []discovery.Endpoint{{ + Addresses: []string{"10.0.3.1"}, + }} + eps.Ports = []discovery.EndpointPort{{ + Name: utilpointer.StringPtr("p80"), + Port: utilpointer.Int32(80), + Protocol: &tcpProtocol, + }} + }), + ) + fp.syncProxyRules() + + expected = dedent.Dedent(` + *filter + :KUBE-NODEPORTS - [0:0] + :KUBE-SERVICES - [0:0] + :KUBE-EXTERNAL-SERVICES - [0:0] + :KUBE-FORWARD - [0:0] + -A KUBE-FORWARD -m conntrack --ctstate INVALID -j DROP + -A KUBE-FORWARD -m comment --comment "kubernetes forwarding rules" -m mark --mark 0x4000/0x4000 -j ACCEPT + -A KUBE-FORWARD -m comment --comment "kubernetes forwarding conntrack rule" -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT + COMMIT + *nat + :KUBE-NODEPORTS - [0:0] + :KUBE-SERVICES - [0:0] + :KUBE-MARK-MASQ - [0:0] + :KUBE-POSTROUTING - [0:0] + :KUBE-SEP-BSWRHOQ77KEXZLNL - [0:0] + :KUBE-SEP-SNQ3ZNILQDEJNDQO - [0:0] + :KUBE-SEP-UHEGFW77JX3KXTOV - [0:0] + :KUBE-SVC-2VJB64SDSIJUP5T6 - [0:0] + :KUBE-SVC-X27LE4BHSL4DOUIK - [0:0] + :KUBE-SVC-XPGD46QRK7WJZT7O - [0:0] + -A KUBE-SERVICES -m comment --comment "ns1/svc1:p80 cluster IP" -m tcp -p tcp -d 172.30.0.41 --dport 80 -j KUBE-SVC-XPGD46QRK7WJZT7O + -A KUBE-SERVICES -m comment --comment "ns2/svc2:p8080 cluster IP" -m tcp -p tcp -d 172.30.0.42 --dport 8080 -j KUBE-SVC-2VJB64SDSIJUP5T6 + -A KUBE-SERVICES -m comment --comment "ns3/svc3:p80 cluster IP" -m tcp -p tcp -d 172.30.0.43 --dport 80 -j KUBE-SVC-X27LE4BHSL4DOUIK + -A KUBE-SERVICES -m comment --comment "kubernetes service nodeports; NOTE: this must be the last rule in this chain" -m addrtype --dst-type LOCAL -j KUBE-NODEPORTS + -A KUBE-MARK-MASQ -j MARK --or-mark 0x4000 + -A KUBE-POSTROUTING -m mark ! --mark 0x4000/0x4000 -j RETURN + -A KUBE-POSTROUTING -j MARK --xor-mark 0x4000 + -A KUBE-POSTROUTING -m comment --comment "kubernetes service traffic requiring SNAT" -j MASQUERADE + -A KUBE-SEP-BSWRHOQ77KEXZLNL -m comment --comment ns3/svc3:p80 -s 10.0.3.1 -j KUBE-MARK-MASQ + -A KUBE-SEP-BSWRHOQ77KEXZLNL -m comment --comment ns3/svc3:p80 -m tcp -p tcp -j DNAT --to-destination 10.0.3.1:80 + -A KUBE-SEP-SNQ3ZNILQDEJNDQO -m comment --comment ns1/svc1:p80 -s 10.0.1.1 -j KUBE-MARK-MASQ + -A KUBE-SEP-SNQ3ZNILQDEJNDQO -m comment --comment ns1/svc1:p80 -m tcp -p tcp -j DNAT --to-destination 10.0.1.1:80 + -A KUBE-SEP-UHEGFW77JX3KXTOV -m comment --comment ns2/svc2:p8080 -s 10.0.2.1 -j KUBE-MARK-MASQ + -A KUBE-SEP-UHEGFW77JX3KXTOV -m comment --comment ns2/svc2:p8080 -m tcp -p tcp -j DNAT --to-destination 10.0.2.1:8080 + -A KUBE-SVC-2VJB64SDSIJUP5T6 -m comment --comment "ns2/svc2:p8080 cluster IP" -m tcp -p tcp -d 172.30.0.42 --dport 8080 ! -s 10.0.0.0/8 -j KUBE-MARK-MASQ + -A KUBE-SVC-2VJB64SDSIJUP5T6 -m comment --comment "ns2/svc2:p8080 -> 10.0.2.1:8080" -j KUBE-SEP-UHEGFW77JX3KXTOV + -A KUBE-SVC-X27LE4BHSL4DOUIK -m comment --comment "ns3/svc3:p80 cluster IP" -m tcp -p tcp -d 172.30.0.43 --dport 80 ! -s 10.0.0.0/8 -j KUBE-MARK-MASQ + -A KUBE-SVC-X27LE4BHSL4DOUIK -m comment --comment "ns3/svc3:p80 -> 10.0.3.1:80" -j KUBE-SEP-BSWRHOQ77KEXZLNL + -A KUBE-SVC-XPGD46QRK7WJZT7O -m comment --comment "ns1/svc1:p80 cluster IP" -m tcp -p tcp -d 172.30.0.41 --dport 80 ! -s 10.0.0.0/8 -j KUBE-MARK-MASQ + -A KUBE-SVC-XPGD46QRK7WJZT7O -m comment --comment "ns1/svc1:p80 -> 10.0.1.1:80" -j KUBE-SEP-SNQ3ZNILQDEJNDQO + COMMIT + `) + assertIPTablesRulesEqual(t, getLine(), expected, fp.iptablesData.String()) + + // Delete a service + fp.OnServiceDelete(svc2) + fp.syncProxyRules() + + expected = dedent.Dedent(` + *filter + :KUBE-NODEPORTS - [0:0] + :KUBE-SERVICES - [0:0] + :KUBE-EXTERNAL-SERVICES - [0:0] + :KUBE-FORWARD - [0:0] + -A KUBE-FORWARD -m conntrack --ctstate INVALID -j DROP + -A KUBE-FORWARD -m comment --comment "kubernetes forwarding rules" -m mark --mark 0x4000/0x4000 -j ACCEPT + -A KUBE-FORWARD -m comment --comment "kubernetes forwarding conntrack rule" -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT + COMMIT + *nat + :KUBE-NODEPORTS - [0:0] + :KUBE-SERVICES - [0:0] + :KUBE-MARK-MASQ - [0:0] + :KUBE-POSTROUTING - [0:0] + :KUBE-SEP-BSWRHOQ77KEXZLNL - [0:0] + :KUBE-SEP-SNQ3ZNILQDEJNDQO - [0:0] + :KUBE-SEP-UHEGFW77JX3KXTOV - [0:0] + :KUBE-SVC-2VJB64SDSIJUP5T6 - [0:0] + :KUBE-SVC-X27LE4BHSL4DOUIK - [0:0] + :KUBE-SVC-XPGD46QRK7WJZT7O - [0:0] + -A KUBE-SERVICES -m comment --comment "ns1/svc1:p80 cluster IP" -m tcp -p tcp -d 172.30.0.41 --dport 80 -j KUBE-SVC-XPGD46QRK7WJZT7O + -A KUBE-SERVICES -m comment --comment "ns3/svc3:p80 cluster IP" -m tcp -p tcp -d 172.30.0.43 --dport 80 -j KUBE-SVC-X27LE4BHSL4DOUIK + -A KUBE-SERVICES -m comment --comment "kubernetes service nodeports; NOTE: this must be the last rule in this chain" -m addrtype --dst-type LOCAL -j KUBE-NODEPORTS + -A KUBE-MARK-MASQ -j MARK --or-mark 0x4000 + -A KUBE-POSTROUTING -m mark ! --mark 0x4000/0x4000 -j RETURN + -A KUBE-POSTROUTING -j MARK --xor-mark 0x4000 + -A KUBE-POSTROUTING -m comment --comment "kubernetes service traffic requiring SNAT" -j MASQUERADE + -A KUBE-SEP-BSWRHOQ77KEXZLNL -m comment --comment ns3/svc3:p80 -s 10.0.3.1 -j KUBE-MARK-MASQ + -A KUBE-SEP-BSWRHOQ77KEXZLNL -m comment --comment ns3/svc3:p80 -m tcp -p tcp -j DNAT --to-destination 10.0.3.1:80 + -A KUBE-SEP-SNQ3ZNILQDEJNDQO -m comment --comment ns1/svc1:p80 -s 10.0.1.1 -j KUBE-MARK-MASQ + -A KUBE-SEP-SNQ3ZNILQDEJNDQO -m comment --comment ns1/svc1:p80 -m tcp -p tcp -j DNAT --to-destination 10.0.1.1:80 + -A KUBE-SVC-X27LE4BHSL4DOUIK -m comment --comment "ns3/svc3:p80 cluster IP" -m tcp -p tcp -d 172.30.0.43 --dport 80 ! -s 10.0.0.0/8 -j KUBE-MARK-MASQ + -A KUBE-SVC-X27LE4BHSL4DOUIK -m comment --comment "ns3/svc3:p80 -> 10.0.3.1:80" -j KUBE-SEP-BSWRHOQ77KEXZLNL + -A KUBE-SVC-XPGD46QRK7WJZT7O -m comment --comment "ns1/svc1:p80 cluster IP" -m tcp -p tcp -d 172.30.0.41 --dport 80 ! -s 10.0.0.0/8 -j KUBE-MARK-MASQ + -A KUBE-SVC-XPGD46QRK7WJZT7O -m comment --comment "ns1/svc1:p80 -> 10.0.1.1:80" -j KUBE-SEP-SNQ3ZNILQDEJNDQO + -X KUBE-SEP-UHEGFW77JX3KXTOV + -X KUBE-SVC-2VJB64SDSIJUP5T6 + COMMIT + `) + assertIPTablesRulesEqual(t, getLine(), expected, fp.iptablesData.String()) + + // Add a service, sync, then add its endpoints + makeServiceMap(fp, + makeTestService("ns4", "svc4", func(svc *v1.Service) { + svc.Spec.Type = v1.ServiceTypeClusterIP + svc.Spec.ClusterIP = "172.30.0.44" + svc.Spec.Ports = []v1.ServicePort{{ + Name: "p80", + Port: 80, + Protocol: v1.ProtocolTCP, + }} + }), + ) + fp.syncProxyRules() + expected = dedent.Dedent(` + *filter + :KUBE-NODEPORTS - [0:0] + :KUBE-SERVICES - [0:0] + :KUBE-EXTERNAL-SERVICES - [0:0] + :KUBE-FORWARD - [0:0] + -A KUBE-SERVICES -m comment --comment "ns4/svc4:p80 has no endpoints" -m tcp -p tcp -d 172.30.0.44 --dport 80 -j REJECT + -A KUBE-FORWARD -m conntrack --ctstate INVALID -j DROP + -A KUBE-FORWARD -m comment --comment "kubernetes forwarding rules" -m mark --mark 0x4000/0x4000 -j ACCEPT + -A KUBE-FORWARD -m comment --comment "kubernetes forwarding conntrack rule" -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT + COMMIT + *nat + :KUBE-NODEPORTS - [0:0] + :KUBE-SERVICES - [0:0] + :KUBE-MARK-MASQ - [0:0] + :KUBE-POSTROUTING - [0:0] + :KUBE-SEP-BSWRHOQ77KEXZLNL - [0:0] + :KUBE-SEP-SNQ3ZNILQDEJNDQO - [0:0] + :KUBE-SVC-X27LE4BHSL4DOUIK - [0:0] + :KUBE-SVC-XPGD46QRK7WJZT7O - [0:0] + -A KUBE-SERVICES -m comment --comment "ns1/svc1:p80 cluster IP" -m tcp -p tcp -d 172.30.0.41 --dport 80 -j KUBE-SVC-XPGD46QRK7WJZT7O + -A KUBE-SERVICES -m comment --comment "ns3/svc3:p80 cluster IP" -m tcp -p tcp -d 172.30.0.43 --dport 80 -j KUBE-SVC-X27LE4BHSL4DOUIK + -A KUBE-SERVICES -m comment --comment "kubernetes service nodeports; NOTE: this must be the last rule in this chain" -m addrtype --dst-type LOCAL -j KUBE-NODEPORTS + -A KUBE-MARK-MASQ -j MARK --or-mark 0x4000 + -A KUBE-POSTROUTING -m mark ! --mark 0x4000/0x4000 -j RETURN + -A KUBE-POSTROUTING -j MARK --xor-mark 0x4000 + -A KUBE-POSTROUTING -m comment --comment "kubernetes service traffic requiring SNAT" -j MASQUERADE + -A KUBE-SEP-BSWRHOQ77KEXZLNL -m comment --comment ns3/svc3:p80 -s 10.0.3.1 -j KUBE-MARK-MASQ + -A KUBE-SEP-BSWRHOQ77KEXZLNL -m comment --comment ns3/svc3:p80 -m tcp -p tcp -j DNAT --to-destination 10.0.3.1:80 + -A KUBE-SEP-SNQ3ZNILQDEJNDQO -m comment --comment ns1/svc1:p80 -s 10.0.1.1 -j KUBE-MARK-MASQ + -A KUBE-SEP-SNQ3ZNILQDEJNDQO -m comment --comment ns1/svc1:p80 -m tcp -p tcp -j DNAT --to-destination 10.0.1.1:80 + -A KUBE-SVC-X27LE4BHSL4DOUIK -m comment --comment "ns3/svc3:p80 cluster IP" -m tcp -p tcp -d 172.30.0.43 --dport 80 ! -s 10.0.0.0/8 -j KUBE-MARK-MASQ + -A KUBE-SVC-X27LE4BHSL4DOUIK -m comment --comment "ns3/svc3:p80 -> 10.0.3.1:80" -j KUBE-SEP-BSWRHOQ77KEXZLNL + -A KUBE-SVC-XPGD46QRK7WJZT7O -m comment --comment "ns1/svc1:p80 cluster IP" -m tcp -p tcp -d 172.30.0.41 --dport 80 ! -s 10.0.0.0/8 -j KUBE-MARK-MASQ + -A KUBE-SVC-XPGD46QRK7WJZT7O -m comment --comment "ns1/svc1:p80 -> 10.0.1.1:80" -j KUBE-SEP-SNQ3ZNILQDEJNDQO + COMMIT + `) + assertIPTablesRulesEqual(t, getLine(), expected, fp.iptablesData.String()) + + populateEndpointSlices(fp, + makeTestEndpointSlice("ns4", "svc4", 1, func(eps *discovery.EndpointSlice) { + eps.AddressType = discovery.AddressTypeIPv4 + eps.Endpoints = []discovery.Endpoint{{ + Addresses: []string{"10.0.4.1"}, + }} + eps.Ports = []discovery.EndpointPort{{ + Name: utilpointer.StringPtr("p80"), + Port: utilpointer.Int32(80), + Protocol: &tcpProtocol, + }} + }), + ) + fp.syncProxyRules() + expected = dedent.Dedent(` + *filter + :KUBE-NODEPORTS - [0:0] + :KUBE-SERVICES - [0:0] + :KUBE-EXTERNAL-SERVICES - [0:0] + :KUBE-FORWARD - [0:0] + -A KUBE-FORWARD -m conntrack --ctstate INVALID -j DROP + -A KUBE-FORWARD -m comment --comment "kubernetes forwarding rules" -m mark --mark 0x4000/0x4000 -j ACCEPT + -A KUBE-FORWARD -m comment --comment "kubernetes forwarding conntrack rule" -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT + COMMIT + *nat + :KUBE-NODEPORTS - [0:0] + :KUBE-SERVICES - [0:0] + :KUBE-MARK-MASQ - [0:0] + :KUBE-POSTROUTING - [0:0] + :KUBE-SEP-AYCN5HPXMIRJNJXU - [0:0] + :KUBE-SEP-BSWRHOQ77KEXZLNL - [0:0] + :KUBE-SEP-SNQ3ZNILQDEJNDQO - [0:0] + :KUBE-SVC-4SW47YFZTEDKD3PK - [0:0] + :KUBE-SVC-X27LE4BHSL4DOUIK - [0:0] + :KUBE-SVC-XPGD46QRK7WJZT7O - [0:0] + -A KUBE-SERVICES -m comment --comment "ns1/svc1:p80 cluster IP" -m tcp -p tcp -d 172.30.0.41 --dport 80 -j KUBE-SVC-XPGD46QRK7WJZT7O + -A KUBE-SERVICES -m comment --comment "ns3/svc3:p80 cluster IP" -m tcp -p tcp -d 172.30.0.43 --dport 80 -j KUBE-SVC-X27LE4BHSL4DOUIK + -A KUBE-SERVICES -m comment --comment "ns4/svc4:p80 cluster IP" -m tcp -p tcp -d 172.30.0.44 --dport 80 -j KUBE-SVC-4SW47YFZTEDKD3PK + -A KUBE-SERVICES -m comment --comment "kubernetes service nodeports; NOTE: this must be the last rule in this chain" -m addrtype --dst-type LOCAL -j KUBE-NODEPORTS + -A KUBE-MARK-MASQ -j MARK --or-mark 0x4000 + -A KUBE-POSTROUTING -m mark ! --mark 0x4000/0x4000 -j RETURN + -A KUBE-POSTROUTING -j MARK --xor-mark 0x4000 + -A KUBE-POSTROUTING -m comment --comment "kubernetes service traffic requiring SNAT" -j MASQUERADE + -A KUBE-SEP-AYCN5HPXMIRJNJXU -m comment --comment ns4/svc4:p80 -s 10.0.4.1 -j KUBE-MARK-MASQ + -A KUBE-SEP-AYCN5HPXMIRJNJXU -m comment --comment ns4/svc4:p80 -m tcp -p tcp -j DNAT --to-destination 10.0.4.1:80 + -A KUBE-SEP-BSWRHOQ77KEXZLNL -m comment --comment ns3/svc3:p80 -s 10.0.3.1 -j KUBE-MARK-MASQ + -A KUBE-SEP-BSWRHOQ77KEXZLNL -m comment --comment ns3/svc3:p80 -m tcp -p tcp -j DNAT --to-destination 10.0.3.1:80 + -A KUBE-SEP-SNQ3ZNILQDEJNDQO -m comment --comment ns1/svc1:p80 -s 10.0.1.1 -j KUBE-MARK-MASQ + -A KUBE-SEP-SNQ3ZNILQDEJNDQO -m comment --comment ns1/svc1:p80 -m tcp -p tcp -j DNAT --to-destination 10.0.1.1:80 + -A KUBE-SVC-4SW47YFZTEDKD3PK -m comment --comment "ns4/svc4:p80 cluster IP" -m tcp -p tcp -d 172.30.0.44 --dport 80 ! -s 10.0.0.0/8 -j KUBE-MARK-MASQ + -A KUBE-SVC-4SW47YFZTEDKD3PK -m comment --comment "ns4/svc4:p80 -> 10.0.4.1:80" -j KUBE-SEP-AYCN5HPXMIRJNJXU + -A KUBE-SVC-X27LE4BHSL4DOUIK -m comment --comment "ns3/svc3:p80 cluster IP" -m tcp -p tcp -d 172.30.0.43 --dport 80 ! -s 10.0.0.0/8 -j KUBE-MARK-MASQ + -A KUBE-SVC-X27LE4BHSL4DOUIK -m comment --comment "ns3/svc3:p80 -> 10.0.3.1:80" -j KUBE-SEP-BSWRHOQ77KEXZLNL + -A KUBE-SVC-XPGD46QRK7WJZT7O -m comment --comment "ns1/svc1:p80 cluster IP" -m tcp -p tcp -d 172.30.0.41 --dport 80 ! -s 10.0.0.0/8 -j KUBE-MARK-MASQ + -A KUBE-SVC-XPGD46QRK7WJZT7O -m comment --comment "ns1/svc1:p80 -> 10.0.1.1:80" -j KUBE-SEP-SNQ3ZNILQDEJNDQO + COMMIT + `) + assertIPTablesRulesEqual(t, getLine(), expected, fp.iptablesData.String()) + + // Change an endpoint of an existing service + eps3update := eps3.DeepCopy() + eps3update.Endpoints[0].Addresses[0] = "10.0.3.2" + fp.OnEndpointSliceUpdate(eps3, eps3update) + fp.syncProxyRules() + + expected = dedent.Dedent(` + *filter + :KUBE-NODEPORTS - [0:0] + :KUBE-SERVICES - [0:0] + :KUBE-EXTERNAL-SERVICES - [0:0] + :KUBE-FORWARD - [0:0] + -A KUBE-FORWARD -m conntrack --ctstate INVALID -j DROP + -A KUBE-FORWARD -m comment --comment "kubernetes forwarding rules" -m mark --mark 0x4000/0x4000 -j ACCEPT + -A KUBE-FORWARD -m comment --comment "kubernetes forwarding conntrack rule" -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT + COMMIT + *nat + :KUBE-NODEPORTS - [0:0] + :KUBE-SERVICES - [0:0] + :KUBE-MARK-MASQ - [0:0] + :KUBE-POSTROUTING - [0:0] + :KUBE-SEP-AYCN5HPXMIRJNJXU - [0:0] + :KUBE-SEP-BSWRHOQ77KEXZLNL - [0:0] + :KUBE-SEP-DKCFIS26GWF2WLWC - [0:0] + :KUBE-SEP-SNQ3ZNILQDEJNDQO - [0:0] + :KUBE-SVC-4SW47YFZTEDKD3PK - [0:0] + :KUBE-SVC-X27LE4BHSL4DOUIK - [0:0] + :KUBE-SVC-XPGD46QRK7WJZT7O - [0:0] + -A KUBE-SERVICES -m comment --comment "ns1/svc1:p80 cluster IP" -m tcp -p tcp -d 172.30.0.41 --dport 80 -j KUBE-SVC-XPGD46QRK7WJZT7O + -A KUBE-SERVICES -m comment --comment "ns3/svc3:p80 cluster IP" -m tcp -p tcp -d 172.30.0.43 --dport 80 -j KUBE-SVC-X27LE4BHSL4DOUIK + -A KUBE-SERVICES -m comment --comment "ns4/svc4:p80 cluster IP" -m tcp -p tcp -d 172.30.0.44 --dport 80 -j KUBE-SVC-4SW47YFZTEDKD3PK + -A KUBE-SERVICES -m comment --comment "kubernetes service nodeports; NOTE: this must be the last rule in this chain" -m addrtype --dst-type LOCAL -j KUBE-NODEPORTS + -A KUBE-MARK-MASQ -j MARK --or-mark 0x4000 + -A KUBE-POSTROUTING -m mark ! --mark 0x4000/0x4000 -j RETURN + -A KUBE-POSTROUTING -j MARK --xor-mark 0x4000 + -A KUBE-POSTROUTING -m comment --comment "kubernetes service traffic requiring SNAT" -j MASQUERADE + -A KUBE-SEP-AYCN5HPXMIRJNJXU -m comment --comment ns4/svc4:p80 -s 10.0.4.1 -j KUBE-MARK-MASQ + -A KUBE-SEP-AYCN5HPXMIRJNJXU -m comment --comment ns4/svc4:p80 -m tcp -p tcp -j DNAT --to-destination 10.0.4.1:80 + -A KUBE-SEP-DKCFIS26GWF2WLWC -m comment --comment ns3/svc3:p80 -s 10.0.3.2 -j KUBE-MARK-MASQ + -A KUBE-SEP-DKCFIS26GWF2WLWC -m comment --comment ns3/svc3:p80 -m tcp -p tcp -j DNAT --to-destination 10.0.3.2:80 + -A KUBE-SEP-SNQ3ZNILQDEJNDQO -m comment --comment ns1/svc1:p80 -s 10.0.1.1 -j KUBE-MARK-MASQ + -A KUBE-SEP-SNQ3ZNILQDEJNDQO -m comment --comment ns1/svc1:p80 -m tcp -p tcp -j DNAT --to-destination 10.0.1.1:80 + -A KUBE-SVC-4SW47YFZTEDKD3PK -m comment --comment "ns4/svc4:p80 cluster IP" -m tcp -p tcp -d 172.30.0.44 --dport 80 ! -s 10.0.0.0/8 -j KUBE-MARK-MASQ + -A KUBE-SVC-4SW47YFZTEDKD3PK -m comment --comment "ns4/svc4:p80 -> 10.0.4.1:80" -j KUBE-SEP-AYCN5HPXMIRJNJXU + -A KUBE-SVC-X27LE4BHSL4DOUIK -m comment --comment "ns3/svc3:p80 cluster IP" -m tcp -p tcp -d 172.30.0.43 --dport 80 ! -s 10.0.0.0/8 -j KUBE-MARK-MASQ + -A KUBE-SVC-X27LE4BHSL4DOUIK -m comment --comment "ns3/svc3:p80 -> 10.0.3.2:80" -j KUBE-SEP-DKCFIS26GWF2WLWC + -A KUBE-SVC-XPGD46QRK7WJZT7O -m comment --comment "ns1/svc1:p80 cluster IP" -m tcp -p tcp -d 172.30.0.41 --dport 80 ! -s 10.0.0.0/8 -j KUBE-MARK-MASQ + -A KUBE-SVC-XPGD46QRK7WJZT7O -m comment --comment "ns1/svc1:p80 -> 10.0.1.1:80" -j KUBE-SEP-SNQ3ZNILQDEJNDQO + -X KUBE-SEP-BSWRHOQ77KEXZLNL + COMMIT + `) + assertIPTablesRulesEqual(t, getLine(), expected, fp.iptablesData.String()) + + // Add an endpoint to a service + eps3update2 := eps3update.DeepCopy() + eps3update2.Endpoints = append(eps3update2.Endpoints, discovery.Endpoint{Addresses: []string{"10.0.3.3"}}) + fp.OnEndpointSliceUpdate(eps3update, eps3update2) + fp.syncProxyRules() + + expected = dedent.Dedent(` + *filter + :KUBE-NODEPORTS - [0:0] + :KUBE-SERVICES - [0:0] + :KUBE-EXTERNAL-SERVICES - [0:0] + :KUBE-FORWARD - [0:0] + -A KUBE-FORWARD -m conntrack --ctstate INVALID -j DROP + -A KUBE-FORWARD -m comment --comment "kubernetes forwarding rules" -m mark --mark 0x4000/0x4000 -j ACCEPT + -A KUBE-FORWARD -m comment --comment "kubernetes forwarding conntrack rule" -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT + COMMIT + *nat + :KUBE-NODEPORTS - [0:0] + :KUBE-SERVICES - [0:0] + :KUBE-MARK-MASQ - [0:0] + :KUBE-POSTROUTING - [0:0] + :KUBE-SEP-AYCN5HPXMIRJNJXU - [0:0] + :KUBE-SEP-DKCFIS26GWF2WLWC - [0:0] + :KUBE-SEP-JVVZVJ7BSEPPRNBS - [0:0] + :KUBE-SEP-SNQ3ZNILQDEJNDQO - [0:0] + :KUBE-SVC-4SW47YFZTEDKD3PK - [0:0] + :KUBE-SVC-X27LE4BHSL4DOUIK - [0:0] + :KUBE-SVC-XPGD46QRK7WJZT7O - [0:0] + -A KUBE-SERVICES -m comment --comment "ns1/svc1:p80 cluster IP" -m tcp -p tcp -d 172.30.0.41 --dport 80 -j KUBE-SVC-XPGD46QRK7WJZT7O + -A KUBE-SERVICES -m comment --comment "ns3/svc3:p80 cluster IP" -m tcp -p tcp -d 172.30.0.43 --dport 80 -j KUBE-SVC-X27LE4BHSL4DOUIK + -A KUBE-SERVICES -m comment --comment "ns4/svc4:p80 cluster IP" -m tcp -p tcp -d 172.30.0.44 --dport 80 -j KUBE-SVC-4SW47YFZTEDKD3PK + -A KUBE-SERVICES -m comment --comment "kubernetes service nodeports; NOTE: this must be the last rule in this chain" -m addrtype --dst-type LOCAL -j KUBE-NODEPORTS + -A KUBE-MARK-MASQ -j MARK --or-mark 0x4000 + -A KUBE-POSTROUTING -m mark ! --mark 0x4000/0x4000 -j RETURN + -A KUBE-POSTROUTING -j MARK --xor-mark 0x4000 + -A KUBE-POSTROUTING -m comment --comment "kubernetes service traffic requiring SNAT" -j MASQUERADE + -A KUBE-SEP-AYCN5HPXMIRJNJXU -m comment --comment ns4/svc4:p80 -s 10.0.4.1 -j KUBE-MARK-MASQ + -A KUBE-SEP-AYCN5HPXMIRJNJXU -m comment --comment ns4/svc4:p80 -m tcp -p tcp -j DNAT --to-destination 10.0.4.1:80 + -A KUBE-SEP-DKCFIS26GWF2WLWC -m comment --comment ns3/svc3:p80 -s 10.0.3.2 -j KUBE-MARK-MASQ + -A KUBE-SEP-DKCFIS26GWF2WLWC -m comment --comment ns3/svc3:p80 -m tcp -p tcp -j DNAT --to-destination 10.0.3.2:80 + -A KUBE-SEP-JVVZVJ7BSEPPRNBS -m comment --comment ns3/svc3:p80 -s 10.0.3.3 -j KUBE-MARK-MASQ + -A KUBE-SEP-JVVZVJ7BSEPPRNBS -m comment --comment ns3/svc3:p80 -m tcp -p tcp -j DNAT --to-destination 10.0.3.3:80 + -A KUBE-SEP-SNQ3ZNILQDEJNDQO -m comment --comment ns1/svc1:p80 -s 10.0.1.1 -j KUBE-MARK-MASQ + -A KUBE-SEP-SNQ3ZNILQDEJNDQO -m comment --comment ns1/svc1:p80 -m tcp -p tcp -j DNAT --to-destination 10.0.1.1:80 + -A KUBE-SVC-4SW47YFZTEDKD3PK -m comment --comment "ns4/svc4:p80 cluster IP" -m tcp -p tcp -d 172.30.0.44 --dport 80 ! -s 10.0.0.0/8 -j KUBE-MARK-MASQ + -A KUBE-SVC-4SW47YFZTEDKD3PK -m comment --comment "ns4/svc4:p80 -> 10.0.4.1:80" -j KUBE-SEP-AYCN5HPXMIRJNJXU + -A KUBE-SVC-X27LE4BHSL4DOUIK -m comment --comment "ns3/svc3:p80 cluster IP" -m tcp -p tcp -d 172.30.0.43 --dport 80 ! -s 10.0.0.0/8 -j KUBE-MARK-MASQ + -A KUBE-SVC-X27LE4BHSL4DOUIK -m comment --comment "ns3/svc3:p80 -> 10.0.3.2:80" -m statistic --mode random --probability 0.5000000000 -j KUBE-SEP-DKCFIS26GWF2WLWC + -A KUBE-SVC-X27LE4BHSL4DOUIK -m comment --comment "ns3/svc3:p80 -> 10.0.3.3:80" -j KUBE-SEP-JVVZVJ7BSEPPRNBS + -A KUBE-SVC-XPGD46QRK7WJZT7O -m comment --comment "ns1/svc1:p80 cluster IP" -m tcp -p tcp -d 172.30.0.41 --dport 80 ! -s 10.0.0.0/8 -j KUBE-MARK-MASQ + -A KUBE-SVC-XPGD46QRK7WJZT7O -m comment --comment "ns1/svc1:p80 -> 10.0.1.1:80" -j KUBE-SEP-SNQ3ZNILQDEJNDQO + COMMIT + `) + assertIPTablesRulesEqual(t, getLine(), expected, fp.iptablesData.String()) + + // Sync with no new changes... + fp.syncProxyRules() + + expected = dedent.Dedent(` + *filter + :KUBE-NODEPORTS - [0:0] + :KUBE-SERVICES - [0:0] + :KUBE-EXTERNAL-SERVICES - [0:0] + :KUBE-FORWARD - [0:0] + -A KUBE-FORWARD -m conntrack --ctstate INVALID -j DROP + -A KUBE-FORWARD -m comment --comment "kubernetes forwarding rules" -m mark --mark 0x4000/0x4000 -j ACCEPT + -A KUBE-FORWARD -m comment --comment "kubernetes forwarding conntrack rule" -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT + COMMIT + *nat + :KUBE-NODEPORTS - [0:0] + :KUBE-SERVICES - [0:0] + :KUBE-MARK-MASQ - [0:0] + :KUBE-POSTROUTING - [0:0] + :KUBE-SEP-AYCN5HPXMIRJNJXU - [0:0] + :KUBE-SEP-DKCFIS26GWF2WLWC - [0:0] + :KUBE-SEP-JVVZVJ7BSEPPRNBS - [0:0] + :KUBE-SEP-SNQ3ZNILQDEJNDQO - [0:0] + :KUBE-SVC-4SW47YFZTEDKD3PK - [0:0] + :KUBE-SVC-X27LE4BHSL4DOUIK - [0:0] + :KUBE-SVC-XPGD46QRK7WJZT7O - [0:0] + -A KUBE-SERVICES -m comment --comment "ns1/svc1:p80 cluster IP" -m tcp -p tcp -d 172.30.0.41 --dport 80 -j KUBE-SVC-XPGD46QRK7WJZT7O + -A KUBE-SERVICES -m comment --comment "ns3/svc3:p80 cluster IP" -m tcp -p tcp -d 172.30.0.43 --dport 80 -j KUBE-SVC-X27LE4BHSL4DOUIK + -A KUBE-SERVICES -m comment --comment "ns4/svc4:p80 cluster IP" -m tcp -p tcp -d 172.30.0.44 --dport 80 -j KUBE-SVC-4SW47YFZTEDKD3PK + -A KUBE-SERVICES -m comment --comment "kubernetes service nodeports; NOTE: this must be the last rule in this chain" -m addrtype --dst-type LOCAL -j KUBE-NODEPORTS + -A KUBE-MARK-MASQ -j MARK --or-mark 0x4000 + -A KUBE-POSTROUTING -m mark ! --mark 0x4000/0x4000 -j RETURN + -A KUBE-POSTROUTING -j MARK --xor-mark 0x4000 + -A KUBE-POSTROUTING -m comment --comment "kubernetes service traffic requiring SNAT" -j MASQUERADE + -A KUBE-SEP-AYCN5HPXMIRJNJXU -m comment --comment ns4/svc4:p80 -s 10.0.4.1 -j KUBE-MARK-MASQ + -A KUBE-SEP-AYCN5HPXMIRJNJXU -m comment --comment ns4/svc4:p80 -m tcp -p tcp -j DNAT --to-destination 10.0.4.1:80 + -A KUBE-SEP-DKCFIS26GWF2WLWC -m comment --comment ns3/svc3:p80 -s 10.0.3.2 -j KUBE-MARK-MASQ + -A KUBE-SEP-DKCFIS26GWF2WLWC -m comment --comment ns3/svc3:p80 -m tcp -p tcp -j DNAT --to-destination 10.0.3.2:80 + -A KUBE-SEP-JVVZVJ7BSEPPRNBS -m comment --comment ns3/svc3:p80 -s 10.0.3.3 -j KUBE-MARK-MASQ + -A KUBE-SEP-JVVZVJ7BSEPPRNBS -m comment --comment ns3/svc3:p80 -m tcp -p tcp -j DNAT --to-destination 10.0.3.3:80 + -A KUBE-SEP-SNQ3ZNILQDEJNDQO -m comment --comment ns1/svc1:p80 -s 10.0.1.1 -j KUBE-MARK-MASQ + -A KUBE-SEP-SNQ3ZNILQDEJNDQO -m comment --comment ns1/svc1:p80 -m tcp -p tcp -j DNAT --to-destination 10.0.1.1:80 + -A KUBE-SVC-4SW47YFZTEDKD3PK -m comment --comment "ns4/svc4:p80 cluster IP" -m tcp -p tcp -d 172.30.0.44 --dport 80 ! -s 10.0.0.0/8 -j KUBE-MARK-MASQ + -A KUBE-SVC-4SW47YFZTEDKD3PK -m comment --comment "ns4/svc4:p80 -> 10.0.4.1:80" -j KUBE-SEP-AYCN5HPXMIRJNJXU + -A KUBE-SVC-X27LE4BHSL4DOUIK -m comment --comment "ns3/svc3:p80 cluster IP" -m tcp -p tcp -d 172.30.0.43 --dport 80 ! -s 10.0.0.0/8 -j KUBE-MARK-MASQ + -A KUBE-SVC-X27LE4BHSL4DOUIK -m comment --comment "ns3/svc3:p80 -> 10.0.3.2:80" -m statistic --mode random --probability 0.5000000000 -j KUBE-SEP-DKCFIS26GWF2WLWC + -A KUBE-SVC-X27LE4BHSL4DOUIK -m comment --comment "ns3/svc3:p80 -> 10.0.3.3:80" -j KUBE-SEP-JVVZVJ7BSEPPRNBS + -A KUBE-SVC-XPGD46QRK7WJZT7O -m comment --comment "ns1/svc1:p80 cluster IP" -m tcp -p tcp -d 172.30.0.41 --dport 80 ! -s 10.0.0.0/8 -j KUBE-MARK-MASQ + -A KUBE-SVC-XPGD46QRK7WJZT7O -m comment --comment "ns1/svc1:p80 -> 10.0.1.1:80" -j KUBE-SEP-SNQ3ZNILQDEJNDQO + COMMIT + `) + assertIPTablesRulesEqual(t, getLine(), expected, fp.iptablesData.String()) +} + func TestNoEndpointsMetric(t *testing.T) { type endpoint struct { ip string From 1cd461bd247ae04ebcbafdf64bef8a6099b57d6e Mon Sep 17 00:00:00 2001 From: Dan Winship Date: Wed, 1 Jun 2022 14:21:58 -0400 Subject: [PATCH 2/3] proxy/iptables: abstract the "endpointChainsNumberThreshold" a bit Turn this into a generic "large cluster mode" that determines whether we optimize for performance or debuggability. --- pkg/proxy/iptables/proxier.go | 28 ++++++++++++++++------------ pkg/proxy/iptables/proxier_test.go | 14 +++++++------- 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/pkg/proxy/iptables/proxier.go b/pkg/proxy/iptables/proxier.go index 54593353a92..cd5f0b5eaec 100644 --- a/pkg/proxy/iptables/proxier.go +++ b/pkg/proxy/iptables/proxier.go @@ -80,6 +80,11 @@ const ( // kube proxy canary chain is used for monitoring rule reload kubeProxyCanaryChain utiliptables.Chain = "KUBE-PROXY-CANARY" + + // largeClusterEndpointsThreshold is the number of endpoints at which + // we switch into "large cluster mode" and optimize for iptables + // performance over iptables debuggability + largeClusterEndpointsThreshold = 1000 ) // KernelCompatTester tests whether the required kernel capabilities are @@ -219,11 +224,10 @@ type Proxier struct { natChains utilproxy.LineBuffer natRules utilproxy.LineBuffer - // endpointChainsNumber is the total amount of endpointChains across all - // services that we will generate (it is computed at the beginning of - // syncProxyRules method). If that is large enough, comments in some - // iptable rules are dropped to improve performance. - endpointChainsNumber int + // largeClusterMode is set at the beginning of syncProxyRules if we are + // going to end up outputting "lots" of iptables rules and so we need to + // optimize for performance over debuggability. + largeClusterMode bool // Values are as a parameter to select the interfaces where nodeport works. nodePortAddresses []string @@ -787,14 +791,12 @@ func (proxier *Proxier) deleteEndpointConnections(connectionMap []proxy.ServiceE } } -const endpointChainsNumberThreshold = 1000 - // Assumes proxier.mu is held. func (proxier *Proxier) appendServiceCommentLocked(args []string, svcName string) []string { // Not printing these comments, can reduce size of iptables (in case of large // number of endpoints) even by 40%+. So if total number of endpoint chains // is large enough, we simply drop those comments. - if proxier.endpointChainsNumber > endpointChainsNumberThreshold { + if proxier.largeClusterMode { return args } return append(args, "-m", "comment", "--comment", svcName) @@ -956,11 +958,13 @@ func (proxier *Proxier) syncProxyRules() { // is just for efficiency, not correctness. args := make([]string, 64) - // Compute total number of endpoint chains across all services. - proxier.endpointChainsNumber = 0 + // Compute total number of endpoint chains across all services to get + // a sense of how big the cluster is. + totalEndpoints := 0 for svcName := range proxier.serviceMap { - proxier.endpointChainsNumber += len(proxier.endpointsMap[svcName]) + totalEndpoints += len(proxier.endpointsMap[svcName]) } + proxier.largeClusterMode = (totalEndpoints > largeClusterEndpointsThreshold) nodeAddresses, err := utilproxy.GetNodeAddresses(proxier.nodePortAddresses, proxier.networkInterfacer) if err != nil { @@ -1422,7 +1426,7 @@ func (proxier *Proxier) syncProxyRules() { klog.V(2).InfoS("Reloading service iptables data", "numServices", len(proxier.serviceMap), - "numEndpoints", proxier.endpointChainsNumber, + "numEndpoints", totalEndpoints, "numFilterChains", proxier.filterChains.Lines(), "numFilterRules", proxier.filterRules.Lines(), "numNATChains", proxier.natChains.Lines(), diff --git a/pkg/proxy/iptables/proxier_test.go b/pkg/proxy/iptables/proxier_test.go index 2c98c1edd90..38e8d24910b 100644 --- a/pkg/proxy/iptables/proxier_test.go +++ b/pkg/proxy/iptables/proxier_test.go @@ -7434,7 +7434,7 @@ func countEndpointsAndComments(iptablesData string, matchEndpoint string) (strin return matched, numEndpoints, numComments } -func TestEndpointCommentElision(t *testing.T) { +func TestSyncProxyRulesLargeClusterMode(t *testing.T) { ipt := iptablestest.NewFake() fp := NewFakeProxier(ipt) fp.masqueradeAll = true @@ -7473,7 +7473,7 @@ func TestEndpointCommentElision(t *testing.T) { populateEndpointSlices(fp, makeTestEndpointSlice("ns1", "svc1", 1, func(eps *discovery.EndpointSlice) { eps.AddressType = discovery.AddressTypeIPv4 - eps.Endpoints = make([]discovery.Endpoint, endpointChainsNumberThreshold/2-1) + eps.Endpoints = make([]discovery.Endpoint, largeClusterEndpointsThreshold/2-1) for i := range eps.Endpoints { eps.Endpoints[i].Addresses = []string{fmt.Sprintf("10.0.%d.%d", i%256, i/256)} } @@ -7485,7 +7485,7 @@ func TestEndpointCommentElision(t *testing.T) { }), makeTestEndpointSlice("ns2", "svc2", 1, func(eps *discovery.EndpointSlice) { eps.AddressType = discovery.AddressTypeIPv4 - eps.Endpoints = make([]discovery.Endpoint, endpointChainsNumberThreshold/2-1) + eps.Endpoints = make([]discovery.Endpoint, largeClusterEndpointsThreshold/2-1) for i := range eps.Endpoints { eps.Endpoints[i].Addresses = []string{fmt.Sprintf("10.1.%d.%d", i%256, i/256)} } @@ -7498,15 +7498,15 @@ func TestEndpointCommentElision(t *testing.T) { ) fp.syncProxyRules() + expectedEndpoints := 2 * (largeClusterEndpointsThreshold/2 - 1) - expectedEndpoints := 2 * (endpointChainsNumberThreshold/2 - 1) firstEndpoint, numEndpoints, numComments := countEndpointsAndComments(fp.iptablesData.String(), "10.0.0.0") assert.Equal(t, "-A KUBE-SEP-DKGQUZGBKLTPAR56 -m comment --comment ns1/svc1:p80 -m tcp -p tcp -j DNAT --to-destination 10.0.0.0:80", firstEndpoint) if numEndpoints != expectedEndpoints { t.Errorf("Found wrong number of endpoints: expected %d, got %d", expectedEndpoints, numEndpoints) } if numComments != numEndpoints { - t.Errorf("numComments (%d) != numEndpoints (%d) when numEndpoints < threshold (%d)", numComments, numEndpoints, endpointChainsNumberThreshold) + t.Errorf("numComments (%d) != numEndpoints (%d) when numEndpoints < threshold (%d)", numComments, numEndpoints, largeClusterEndpointsThreshold) } fp.OnEndpointSliceAdd(makeTestEndpointSlice("ns3", "svc3", 1, func(eps *discovery.EndpointSlice) { @@ -7525,15 +7525,15 @@ func TestEndpointCommentElision(t *testing.T) { }} })) fp.syncProxyRules() - expectedEndpoints += 3 + firstEndpoint, numEndpoints, numComments = countEndpointsAndComments(fp.iptablesData.String(), "10.0.0.0") assert.Equal(t, "-A KUBE-SEP-DKGQUZGBKLTPAR56 -m tcp -p tcp -j DNAT --to-destination 10.0.0.0:80", firstEndpoint) if numEndpoints != expectedEndpoints { t.Errorf("Found wrong number of endpoints: expected %d, got %d", expectedEndpoints, numEndpoints) } if numComments != 0 { - t.Errorf("numComments (%d) != 0 when numEndpoints (%d) > threshold (%d)", numComments, numEndpoints, endpointChainsNumberThreshold) + t.Errorf("numComments (%d) != 0 when numEndpoints (%d) > threshold (%d)", numComments, numEndpoints, largeClusterEndpointsThreshold) } } From 7d3ba837f53b2ff89479d451e374049f15de1729 Mon Sep 17 00:00:00 2001 From: Dan Winship Date: Wed, 1 Jun 2022 14:10:22 -0400 Subject: [PATCH 3/3] proxy/iptables: only clean up chains periodically in large clusters "iptables-save" takes several seconds to run on machines with lots of iptables rules, and we only use its result to figure out which chains are no longer referenced by any rules. While it makes things less confusing if we delete unused chains immediately, it's not actually _necessary_ since they never get called during packet processing. So in large clusters, make it so we only clean up chains periodically rather than on every sync. --- pkg/proxy/iptables/proxier.go | 51 +++++++++++++------------ pkg/proxy/iptables/proxier_test.go | 60 ++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 23 deletions(-) diff --git a/pkg/proxy/iptables/proxier.go b/pkg/proxy/iptables/proxier.go index cd5f0b5eaec..ac6a28fdafb 100644 --- a/pkg/proxy/iptables/proxier.go +++ b/pkg/proxy/iptables/proxier.go @@ -196,6 +196,7 @@ type Proxier struct { initialized int32 syncRunner *async.BoundedFrequencyRunner // governs calls to syncProxyRules syncPeriod time.Duration + lastIPTablesCleanup time.Time // These are effectively const and do not need the mutex to be held. iptables utiliptables.Interface @@ -890,17 +891,6 @@ func (proxier *Proxier) syncProxyRules() { // Below this point we will not return until we try to write the iptables rules. // - // Get iptables-save output so we can check for existing chains and rules. - // This will be a map of chain name to chain with rules as stored in iptables-save/iptables-restore - existingNATChains := make(map[utiliptables.Chain]struct{}) - proxier.iptablesData.Reset() - err := proxier.iptables.SaveInto(utiliptables.TableNAT, proxier.iptablesData) - if err != nil { - klog.ErrorS(err, "Failed to execute iptables-save: stale chains will not be deleted") - } else { - existingNATChains = utiliptables.GetChainsFromTable(proxier.iptablesData.Bytes()) - } - // Reset all buffers used later. // This is to avoid memory reallocations and thus improve performance. proxier.filterChains.Reset() @@ -1339,19 +1329,34 @@ func (proxier *Proxier) syncProxyRules() { } } - // Delete chains no longer in use. - for chain := range existingNATChains { - if !activeNATChains[chain] { - chainString := string(chain) - if !isServiceChainName(chainString) { - // Ignore chains that aren't ours. - continue + // Delete chains no longer in use. Since "iptables-save" can take several seconds + // to run on hosts with lots of iptables rules, we don't bother to do this on + // every sync in large clusters. (Stale chains will not be referenced by any + // active rules, so they're harmless other than taking up memory.) + if !proxier.largeClusterMode || time.Since(proxier.lastIPTablesCleanup) > proxier.syncPeriod { + var existingNATChains map[utiliptables.Chain]struct{} + + proxier.iptablesData.Reset() + if err := proxier.iptables.SaveInto(utiliptables.TableNAT, proxier.iptablesData); err == nil { + existingNATChains = utiliptables.GetChainsFromTable(proxier.iptablesData.Bytes()) + + for chain := range existingNATChains { + if !activeNATChains[chain] { + chainString := string(chain) + if !isServiceChainName(chainString) { + // Ignore chains that aren't ours. + continue + } + // We must (as per iptables) write a chain-line + // for it, which has the nice effect of flushing + // the chain. Then we can remove the chain. + proxier.natChains.Write(utiliptables.MakeChainLine(chain)) + proxier.natRules.Write("-X", chainString) + } } - // We must (as per iptables) write a chain-line for it, which has - // the nice effect of flushing the chain. Then we can remove the - // chain. - proxier.natChains.Write(utiliptables.MakeChainLine(chain)) - proxier.natRules.Write("-X", chainString) + proxier.lastIPTablesCleanup = time.Now() + } else { + klog.ErrorS(err, "Failed to execute iptables-save: stale chains will not be deleted") } } diff --git a/pkg/proxy/iptables/proxier_test.go b/pkg/proxy/iptables/proxier_test.go index 38e8d24910b..6a885aabd54 100644 --- a/pkg/proxy/iptables/proxier_test.go +++ b/pkg/proxy/iptables/proxier_test.go @@ -7438,6 +7438,7 @@ func TestSyncProxyRulesLargeClusterMode(t *testing.T) { ipt := iptablestest.NewFake() fp := NewFakeProxier(ipt) fp.masqueradeAll = true + fp.syncPeriod = 30 * time.Second makeServiceMap(fp, makeTestService("ns1", "svc1", func(svc *v1.Service) { @@ -7535,6 +7536,65 @@ func TestSyncProxyRulesLargeClusterMode(t *testing.T) { if numComments != 0 { t.Errorf("numComments (%d) != 0 when numEndpoints (%d) > threshold (%d)", numComments, numEndpoints, largeClusterEndpointsThreshold) } + + // Now test service deletion; we have to create another service to do this though, + // because if we deleted any of the existing services, we'd fall back out of large + // cluster mode. + svc4 := makeTestService("ns4", "svc4", func(svc *v1.Service) { + svc.Spec.Type = v1.ServiceTypeClusterIP + svc.Spec.ClusterIP = "172.30.0.44" + svc.Spec.Ports = []v1.ServicePort{{ + Name: "p8082", + Port: 8082, + Protocol: v1.ProtocolTCP, + }} + }) + fp.OnServiceAdd(svc4) + fp.OnEndpointSliceAdd(makeTestEndpointSlice("ns4", "svc4", 1, func(eps *discovery.EndpointSlice) { + eps.AddressType = discovery.AddressTypeIPv4 + eps.Endpoints = []discovery.Endpoint{{ + Addresses: []string{"10.4.0.1"}, + }} + eps.Ports = []discovery.EndpointPort{{ + Name: utilpointer.StringPtr("p8082"), + Port: utilpointer.Int32(8082), + Protocol: &tcpProtocol, + }} + })) + fp.syncProxyRules() + expectedEndpoints += 1 + + svc4Endpoint, numEndpoints, _ := countEndpointsAndComments(fp.iptablesData.String(), "10.4.0.1") + assert.Equal(t, "-A KUBE-SEP-SU5STNODRYEWJAUF -m tcp -p tcp -j DNAT --to-destination 10.4.0.1:8082", svc4Endpoint, "svc4 endpoint was not created") + if numEndpoints != expectedEndpoints { + t.Errorf("Found wrong number of endpoints after svc4 creation: expected %d, got %d", expectedEndpoints, numEndpoints) + } + + // In large-cluster mode, if we delete a service, it will not re-sync its chains + // but it will not delete them immediately either. + fp.lastIPTablesCleanup = time.Now() + fp.OnServiceDelete(svc4) + fp.syncProxyRules() + expectedEndpoints -= 1 + + svc4Endpoint, numEndpoints, _ = countEndpointsAndComments(fp.iptablesData.String(), "10.4.0.1") + assert.Equal(t, "", svc4Endpoint, "svc4 endpoint was still created!") + if numEndpoints != expectedEndpoints { + t.Errorf("Found wrong number of endpoints after service deletion: expected %d, got %d", expectedEndpoints, numEndpoints) + } + assert.NotContains(t, fp.iptablesData.String(), "-X ", "iptables data unexpectedly contains chain deletions") + + // But resyncing after a long-enough delay will delete the stale chains + fp.lastIPTablesCleanup = time.Now().Add(-fp.syncPeriod).Add(-1) + fp.syncProxyRules() + + svc4Endpoint, numEndpoints, _ = countEndpointsAndComments(fp.iptablesData.String(), "10.4.0.1") + assert.Equal(t, "", svc4Endpoint, "svc4 endpoint was still created!") + if numEndpoints != expectedEndpoints { + t.Errorf("Found wrong number of endpoints after delayed resync: expected %d, got %d", expectedEndpoints, numEndpoints) + } + assert.Contains(t, fp.iptablesData.String(), "-X KUBE-SVC-EBDQOQU5SJFXRIL3", "iptables data does not contain chain deletion") + assert.Contains(t, fp.iptablesData.String(), "-X KUBE-SEP-SU5STNODRYEWJAUF", "iptables data does not contain endpoint deletions") } // Test calling syncProxyRules() multiple times with various changes