kubelet, kube-proxy: unmark packets before masquerading them

It seems that if you set the packet mark on a packet and then route
that packet through a kernel VXLAN interface, the VXLAN-encapsulated
packet will still have the mark from the original packet. Since our
NAT rules are based on the packet mark, this was causing us to
double-NAT some packets, which then triggered a kernel checksumming
bug. But even without the checksum bug, there are reasons to avoid
double-NATting, so fix the rules to unmark the packets before
masquerading them.
This commit is contained in:
Dan Winship 2020-06-11 09:11:03 -04:00
parent 2930723a25
commit c12534d8b4
5 changed files with 56 additions and 19 deletions

View File

@ -62,7 +62,7 @@ func (kl *Kubelet) syncNetworkUtil() {
klog.Errorf("Failed to ensure that %s chain %s exists: %v", utiliptables.TableNAT, KubeMarkDropChain, err)
return
}
if _, err := kl.iptClient.EnsureRule(utiliptables.Append, utiliptables.TableNAT, KubeMarkDropChain, "-j", "MARK", "--set-xmark", dropMark); err != nil {
if _, err := kl.iptClient.EnsureRule(utiliptables.Append, utiliptables.TableNAT, KubeMarkDropChain, "-j", "MARK", "--or-mark", dropMark); err != nil {
klog.Errorf("Failed to ensure marking rule for %v: %v", KubeMarkDropChain, err)
return
}
@ -72,7 +72,7 @@ func (kl *Kubelet) syncNetworkUtil() {
}
if _, err := kl.iptClient.EnsureRule(utiliptables.Append, utiliptables.TableFilter, KubeFirewallChain,
"-m", "comment", "--comment", "kubernetes firewall for dropping marked packets",
"-m", "mark", "--mark", dropMark,
"-m", "mark", "--mark", fmt.Sprintf("%s/%s", dropMark, dropMark),
"-j", "DROP"); err != nil {
klog.Errorf("Failed to ensure rule to drop packet marked by %v in %v chain %v: %v", KubeMarkDropChain, utiliptables.TableFilter, KubeFirewallChain, err)
return
@ -112,7 +112,7 @@ func (kl *Kubelet) syncNetworkUtil() {
klog.Errorf("Failed to ensure that %s chain %s exists: %v", utiliptables.TableNAT, KubePostroutingChain, err)
return
}
if _, err := kl.iptClient.EnsureRule(utiliptables.Append, utiliptables.TableNAT, KubeMarkMasqChain, "-j", "MARK", "--set-xmark", masqueradeMark); err != nil {
if _, err := kl.iptClient.EnsureRule(utiliptables.Append, utiliptables.TableNAT, KubeMarkMasqChain, "-j", "MARK", "--or-mark", masqueradeMark); err != nil {
klog.Errorf("Failed to ensure marking rule for %v: %v", KubeMarkMasqChain, err)
return
}
@ -121,12 +121,26 @@ func (kl *Kubelet) syncNetworkUtil() {
klog.Errorf("Failed to ensure that %s chain %s jumps to %s: %v", utiliptables.TableNAT, utiliptables.ChainPostrouting, KubePostroutingChain, err)
return
}
// Establish the masquerading rule.
// Set up KUBE-POSTROUTING to unmark and masquerade marked packets
// NB: THIS MUST MATCH the corresponding code in the iptables and ipvs
// modes of kube-proxy
if _, err := kl.iptClient.EnsureRule(utiliptables.Append, utiliptables.TableNAT, KubePostroutingChain,
"-m", "mark", "!", "--mark", fmt.Sprintf("%s/%s", masqueradeMark, masqueradeMark),
"-j", "RETURN"); err != nil {
klog.Errorf("Failed to ensure filtering rule for %v: %v", KubePostroutingChain, err)
return
}
// Clear the mark to avoid re-masquerading if the packet re-traverses the network stack.
// We know the mark bit is currently set so we can use --xor-mark to clear it (without needing
// to Sprintf another bitmask).
if _, err := kl.iptClient.EnsureRule(utiliptables.Append, utiliptables.TableNAT, KubePostroutingChain,
"-j", "MARK", "--xor-mark", masqueradeMark); err != nil {
klog.Errorf("Failed to ensure unmarking rule for %v: %v", KubePostroutingChain, err)
return
}
masqRule := []string{
"-m", "comment", "--comment", "kubernetes service traffic requiring SNAT",
"-m", "mark", "--mark", masqueradeMark,
"-j", "MASQUERADE",
}
if kl.iptClient.HasRandomFully() {
@ -141,5 +155,5 @@ func (kl *Kubelet) syncNetworkUtil() {
// getIPTablesMark returns the fwmark given the bit
func getIPTablesMark(bit int) string {
value := 1 << uint(bit)
return fmt.Sprintf("%#08x/%#08x", value, value)
return fmt.Sprintf("%#08x", value)
}

View File

@ -31,11 +31,11 @@ func TestGetIPTablesMark(t *testing.T) {
}{
{
14,
"0x00004000/0x00004000",
"0x00004000",
},
{
15,
"0x00008000/0x00008000",
"0x00008000",
},
}
for _, tc := range tests {

View File

@ -282,7 +282,7 @@ func NewProxier(ipt utiliptables.Interface,
// Generate the masquerade mark to use for SNAT rules.
masqueradeValue := 1 << uint(masqueradeBit)
masqueradeMark := fmt.Sprintf("%#08x/%#08x", masqueradeValue, masqueradeValue)
masqueradeMark := fmt.Sprintf("%#08x", masqueradeValue)
klog.V(2).Infof("iptables(%s) masquerade mark: %s", ipt.Protocol(), masqueradeMark)
endpointSlicesEnabled := utilfeature.DefaultFeatureGate.Enabled(features.EndpointSliceProxying)
@ -919,10 +919,20 @@ func (proxier *Proxier) syncProxyRules() {
// this so that it is easier to flush and change, for example if the mark
// value should ever change.
// NB: THIS MUST MATCH the corresponding code in the kubelet
writeLine(proxier.natRules, []string{
"-A", string(kubePostroutingChain),
"-m", "mark", "!", "--mark", fmt.Sprintf("%s/%s", proxier.masqueradeMark, proxier.masqueradeMark),
"-j", "RETURN",
}...)
// Clear the mark to avoid re-masquerading if the packet re-traverses the network stack.
writeLine(proxier.natRules, []string{
"-A", string(kubePostroutingChain),
// XOR proxier.masqueradeMark to unset it
"-j", "MARK", "--xor-mark", proxier.masqueradeMark,
}...)
masqRule := []string{
"-A", string(kubePostroutingChain),
"-m", "comment", "--comment", `"kubernetes service traffic requiring SNAT"`,
"-m", "mark", "--mark", proxier.masqueradeMark,
"-j", "MASQUERADE",
}
if proxier.iptables.HasRandomFully() {
@ -935,7 +945,7 @@ func (proxier *Proxier) syncProxyRules() {
// value should ever change.
writeLine(proxier.natRules, []string{
"-A", string(KubeMarkMasqChain),
"-j", "MARK", "--set-xmark", proxier.masqueradeMark,
"-j", "MARK", "--or-mark", proxier.masqueradeMark,
}...)
// Accumulate NAT chains to keep.
@ -1514,7 +1524,7 @@ func (proxier *Proxier) syncProxyRules() {
writeLine(proxier.filterRules,
"-A", string(kubeForwardChain),
"-m", "comment", "--comment", `"kubernetes forwarding rules"`,
"-m", "mark", "--mark", proxier.masqueradeMark,
"-m", "mark", "--mark", fmt.Sprintf("%s/%s", proxier.masqueradeMark, proxier.masqueradeMark),
"-j", "ACCEPT",
)

View File

@ -357,6 +357,7 @@ func NewFakeProxier(ipt utiliptables.Interface, endpointSlicesEnabled bool) *Pro
endpointsMap: make(proxy.EndpointsMap),
endpointsChanges: proxy.NewEndpointChangeTracker(testHostname, newEndpointInfo, nil, nil, endpointSlicesEnabled),
iptables: ipt,
masqueradeMark: "0x4000",
localDetector: detectLocal,
hostname: testHostname,
portsMap: make(map[utilproxy.LocalPort]utilproxy.Closeable),
@ -2418,7 +2419,7 @@ func TestEndpointSliceE2E(t *testing.T) {
:KUBE-EXTERNAL-SERVICES - [0:0]
:KUBE-FORWARD - [0:0]
-A KUBE-FORWARD -m conntrack --ctstate INVALID -j DROP
-A KUBE-FORWARD -m comment --comment "kubernetes forwarding rules" -m mark --mark -j ACCEPT
-A KUBE-FORWARD -m comment --comment "kubernetes forwarding rules" -m mark --mark 0x4000/0x4000 -j ACCEPT
-A KUBE-FORWARD -m comment --comment "kubernetes forwarding conntrack pod source rule" -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT
-A KUBE-FORWARD -m comment --comment "kubernetes forwarding conntrack pod destination rule" -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT
COMMIT
@ -2431,8 +2432,10 @@ COMMIT
:KUBE-SEP-3JOIVZTXZZRGORX4 - [0:0]
:KUBE-SEP-IO5XOSKPAXIFQXAJ - [0:0]
:KUBE-SEP-XGJFVO3L2O5SRFNT - [0:0]
-A KUBE-POSTROUTING -m comment --comment "kubernetes service traffic requiring SNAT" -m mark --mark -j MASQUERADE
-A KUBE-MARK-MASQ -j MARK --set-xmark
-A KUBE-POSTROUTING -m mark ! --mark 0x4000/0x4000 -j RETURN
-A KUBE-POSTROUTING -j MARK --xor-mark 0x4000
-A KUBE-POSTROUTING -m comment --comment "kubernetes service traffic requiring SNAT" -j MASQUERADE
-A KUBE-MARK-MASQ -j MARK --or-mark 0x4000
-A KUBE-SERVICES -m comment --comment "ns1/svc1 cluster IP" -m tcp -p tcp -d 172.20.1.1/32 --dport 0 ! -s 10.0.0.0/24 -j KUBE-MARK-MASQ
-A KUBE-SERVICES -m comment --comment "ns1/svc1 cluster IP" -m tcp -p tcp -d 172.20.1.1/32 --dport 0 -j KUBE-SVC-AQI2S6QIMU7PVVRP
-A KUBE-SVC-AQI2S6QIMU7PVVRP -m comment --comment ns1/svc1 -m statistic --mode random --probability 0.3333333333 -j KUBE-SEP-3JOIVZTXZZRGORX4

View File

@ -418,7 +418,7 @@ func NewProxier(ipt utiliptables.Interface,
// Generate the masquerade mark to use for SNAT rules.
masqueradeValue := 1 << uint(masqueradeBit)
masqueradeMark := fmt.Sprintf("%#08x/%#08x", masqueradeValue, masqueradeValue)
masqueradeMark := fmt.Sprintf("%#08x", masqueradeValue)
isIPv6 := utilnet.IsIPv6(nodeIP)
@ -1758,7 +1758,7 @@ func (proxier *Proxier) writeIptablesRules() {
writeLine(proxier.filterRules,
"-A", string(KubeForwardChain),
"-m", "comment", "--comment", `"kubernetes forwarding rules"`,
"-m", "mark", "--mark", proxier.masqueradeMark,
"-m", "mark", "--mark", fmt.Sprintf("%s/%s", proxier.masqueradeMark, proxier.masqueradeMark),
"-j", "ACCEPT",
)
@ -1842,10 +1842,20 @@ func (proxier *Proxier) createAndLinkeKubeChain() {
// this so that it is easier to flush and change, for example if the mark
// value should ever change.
// NB: THIS MUST MATCH the corresponding code in the kubelet
writeLine(proxier.natRules, []string{
"-A", string(kubePostroutingChain),
"-m", "mark", "!", "--mark", fmt.Sprintf("%s/%s", proxier.masqueradeMark, proxier.masqueradeMark),
"-j", "RETURN",
}...)
// Clear the mark to avoid re-masquerading if the packet re-traverses the network stack.
writeLine(proxier.natRules, []string{
"-A", string(kubePostroutingChain),
// XOR proxier.masqueradeMark to unset it
"-j", "MARK", "--xor-mark", proxier.masqueradeMark,
}...)
masqRule := []string{
"-A", string(kubePostroutingChain),
"-m", "comment", "--comment", `"kubernetes service traffic requiring SNAT"`,
"-m", "mark", "--mark", proxier.masqueradeMark,
"-j", "MASQUERADE",
}
if proxier.iptables.HasRandomFully() {
@ -1858,7 +1868,7 @@ func (proxier *Proxier) createAndLinkeKubeChain() {
// value should ever change.
writeLine(proxier.natRules, []string{
"-A", string(KubeMarkMasqChain),
"-j", "MARK", "--set-xmark", proxier.masqueradeMark,
"-j", "MARK", "--or-mark", proxier.masqueradeMark,
}...)
}