mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-09-26 12:46:06 +00:00
kubelet, kube-proxy: unmark packets before masquerading them
It seems that if you set the packet mark on a packet and then route that packet through a kernel VXLAN interface, the VXLAN-encapsulated packet will still have the mark from the original packet. Since our NAT rules are based on the packet mark, this was causing us to double-NAT some packets, which then triggered a kernel checksumming bug. But even without the checksum bug, there are reasons to avoid double-NATting, so fix the rules to unmark the packets before masquerading them.
This commit is contained in:
@@ -282,7 +282,7 @@ func NewProxier(ipt utiliptables.Interface,
|
||||
|
||||
// Generate the masquerade mark to use for SNAT rules.
|
||||
masqueradeValue := 1 << uint(masqueradeBit)
|
||||
masqueradeMark := fmt.Sprintf("%#08x/%#08x", masqueradeValue, masqueradeValue)
|
||||
masqueradeMark := fmt.Sprintf("%#08x", masqueradeValue)
|
||||
klog.V(2).Infof("iptables(%s) masquerade mark: %s", ipt.Protocol(), masqueradeMark)
|
||||
|
||||
endpointSlicesEnabled := utilfeature.DefaultFeatureGate.Enabled(features.EndpointSliceProxying)
|
||||
@@ -919,10 +919,20 @@ func (proxier *Proxier) syncProxyRules() {
|
||||
// this so that it is easier to flush and change, for example if the mark
|
||||
// value should ever change.
|
||||
// NB: THIS MUST MATCH the corresponding code in the kubelet
|
||||
writeLine(proxier.natRules, []string{
|
||||
"-A", string(kubePostroutingChain),
|
||||
"-m", "mark", "!", "--mark", fmt.Sprintf("%s/%s", proxier.masqueradeMark, proxier.masqueradeMark),
|
||||
"-j", "RETURN",
|
||||
}...)
|
||||
// Clear the mark to avoid re-masquerading if the packet re-traverses the network stack.
|
||||
writeLine(proxier.natRules, []string{
|
||||
"-A", string(kubePostroutingChain),
|
||||
// XOR proxier.masqueradeMark to unset it
|
||||
"-j", "MARK", "--xor-mark", proxier.masqueradeMark,
|
||||
}...)
|
||||
masqRule := []string{
|
||||
"-A", string(kubePostroutingChain),
|
||||
"-m", "comment", "--comment", `"kubernetes service traffic requiring SNAT"`,
|
||||
"-m", "mark", "--mark", proxier.masqueradeMark,
|
||||
"-j", "MASQUERADE",
|
||||
}
|
||||
if proxier.iptables.HasRandomFully() {
|
||||
@@ -935,7 +945,7 @@ func (proxier *Proxier) syncProxyRules() {
|
||||
// value should ever change.
|
||||
writeLine(proxier.natRules, []string{
|
||||
"-A", string(KubeMarkMasqChain),
|
||||
"-j", "MARK", "--set-xmark", proxier.masqueradeMark,
|
||||
"-j", "MARK", "--or-mark", proxier.masqueradeMark,
|
||||
}...)
|
||||
|
||||
// Accumulate NAT chains to keep.
|
||||
@@ -1514,7 +1524,7 @@ func (proxier *Proxier) syncProxyRules() {
|
||||
writeLine(proxier.filterRules,
|
||||
"-A", string(kubeForwardChain),
|
||||
"-m", "comment", "--comment", `"kubernetes forwarding rules"`,
|
||||
"-m", "mark", "--mark", proxier.masqueradeMark,
|
||||
"-m", "mark", "--mark", fmt.Sprintf("%s/%s", proxier.masqueradeMark, proxier.masqueradeMark),
|
||||
"-j", "ACCEPT",
|
||||
)
|
||||
|
||||
|
@@ -357,6 +357,7 @@ func NewFakeProxier(ipt utiliptables.Interface, endpointSlicesEnabled bool) *Pro
|
||||
endpointsMap: make(proxy.EndpointsMap),
|
||||
endpointsChanges: proxy.NewEndpointChangeTracker(testHostname, newEndpointInfo, nil, nil, endpointSlicesEnabled),
|
||||
iptables: ipt,
|
||||
masqueradeMark: "0x4000",
|
||||
localDetector: detectLocal,
|
||||
hostname: testHostname,
|
||||
portsMap: make(map[utilproxy.LocalPort]utilproxy.Closeable),
|
||||
@@ -2418,7 +2419,7 @@ func TestEndpointSliceE2E(t *testing.T) {
|
||||
:KUBE-EXTERNAL-SERVICES - [0:0]
|
||||
:KUBE-FORWARD - [0:0]
|
||||
-A KUBE-FORWARD -m conntrack --ctstate INVALID -j DROP
|
||||
-A KUBE-FORWARD -m comment --comment "kubernetes forwarding rules" -m mark --mark -j ACCEPT
|
||||
-A KUBE-FORWARD -m comment --comment "kubernetes forwarding rules" -m mark --mark 0x4000/0x4000 -j ACCEPT
|
||||
-A KUBE-FORWARD -m comment --comment "kubernetes forwarding conntrack pod source rule" -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT
|
||||
-A KUBE-FORWARD -m comment --comment "kubernetes forwarding conntrack pod destination rule" -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT
|
||||
COMMIT
|
||||
@@ -2431,8 +2432,10 @@ COMMIT
|
||||
:KUBE-SEP-3JOIVZTXZZRGORX4 - [0:0]
|
||||
:KUBE-SEP-IO5XOSKPAXIFQXAJ - [0:0]
|
||||
:KUBE-SEP-XGJFVO3L2O5SRFNT - [0:0]
|
||||
-A KUBE-POSTROUTING -m comment --comment "kubernetes service traffic requiring SNAT" -m mark --mark -j MASQUERADE
|
||||
-A KUBE-MARK-MASQ -j MARK --set-xmark
|
||||
-A KUBE-POSTROUTING -m mark ! --mark 0x4000/0x4000 -j RETURN
|
||||
-A KUBE-POSTROUTING -j MARK --xor-mark 0x4000
|
||||
-A KUBE-POSTROUTING -m comment --comment "kubernetes service traffic requiring SNAT" -j MASQUERADE
|
||||
-A KUBE-MARK-MASQ -j MARK --or-mark 0x4000
|
||||
-A KUBE-SERVICES -m comment --comment "ns1/svc1 cluster IP" -m tcp -p tcp -d 172.20.1.1/32 --dport 0 ! -s 10.0.0.0/24 -j KUBE-MARK-MASQ
|
||||
-A KUBE-SERVICES -m comment --comment "ns1/svc1 cluster IP" -m tcp -p tcp -d 172.20.1.1/32 --dport 0 -j KUBE-SVC-AQI2S6QIMU7PVVRP
|
||||
-A KUBE-SVC-AQI2S6QIMU7PVVRP -m comment --comment ns1/svc1 -m statistic --mode random --probability 0.3333333333 -j KUBE-SEP-3JOIVZTXZZRGORX4
|
||||
|
Reference in New Issue
Block a user