mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-21 02:41:25 +00:00
Merge pull request #109826 from danwinship/multi-load-balancer
fix kube-proxy bug with multiple LB IPs and source ranges
This commit is contained in:
commit
2b3508e0f1
@ -1243,6 +1243,46 @@ func (proxier *Proxier) syncProxyRules() {
|
|||||||
// The firewall chain will jump to the "external destination"
|
// The firewall chain will jump to the "external destination"
|
||||||
// chain.
|
// chain.
|
||||||
nextChain = svcInfo.firewallChainName
|
nextChain = svcInfo.firewallChainName
|
||||||
|
|
||||||
|
// The service firewall rules are created based on the
|
||||||
|
// loadBalancerSourceRanges field. This only works for
|
||||||
|
// VIP-like loadbalancers that preserve source IPs. For
|
||||||
|
// loadbalancers which direct traffic to service NodePort, the
|
||||||
|
// firewall rules will not apply.
|
||||||
|
args = append(args[:0],
|
||||||
|
"-A", string(nextChain),
|
||||||
|
"-m", "comment", "--comment", fmt.Sprintf(`"%s loadbalancer IP"`, svcNameString),
|
||||||
|
)
|
||||||
|
|
||||||
|
// firewall filter based on each source range
|
||||||
|
allowFromNode := false
|
||||||
|
for _, src := range svcInfo.LoadBalancerSourceRanges() {
|
||||||
|
proxier.natRules.Write(args, "-s", src, "-j", string(externalTrafficChain))
|
||||||
|
_, cidr, err := netutils.ParseCIDRSloppy(src)
|
||||||
|
if err != nil {
|
||||||
|
klog.ErrorS(err, "Error parsing CIDR in LoadBalancerSourceRanges, dropping it", "cidr", cidr)
|
||||||
|
} else if cidr.Contains(proxier.nodeIP) {
|
||||||
|
allowFromNode = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// For VIP-like LBs, the VIP is often added as a local
|
||||||
|
// address (via an IP route rule). In that case, a request
|
||||||
|
// from a node to the VIP will not hit the loadbalancer but
|
||||||
|
// will loop back with the source IP set to the VIP. We
|
||||||
|
// need the following rules to allow requests from this node.
|
||||||
|
if allowFromNode {
|
||||||
|
for _, lbip := range svcInfo.LoadBalancerIPStrings() {
|
||||||
|
proxier.natRules.Write(
|
||||||
|
args,
|
||||||
|
"-s", lbip,
|
||||||
|
"-j", string(externalTrafficChain))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the packet was able to reach the end of firewall chain,
|
||||||
|
// then it did not get DNATed. It means the packet cannot go
|
||||||
|
// thru the firewall, then mark it for DROP.
|
||||||
|
proxier.natRules.Write(args, "-j", string(KubeMarkDropChain))
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, lbip := range svcInfo.LoadBalancerIPStrings() {
|
for _, lbip := range svcInfo.LoadBalancerIPStrings() {
|
||||||
@ -1254,44 +1294,6 @@ func (proxier *Proxier) syncProxyRules() {
|
|||||||
"--dport", strconv.Itoa(svcInfo.Port()),
|
"--dport", strconv.Itoa(svcInfo.Port()),
|
||||||
"-j", string(nextChain))
|
"-j", string(nextChain))
|
||||||
|
|
||||||
// The service firewall rules are created based on the
|
|
||||||
// loadBalancerSourceRanges field. This only works for
|
|
||||||
// VIP-like loadbalancers that preserve source IPs. For
|
|
||||||
// loadbalancers which direct traffic to service NodePort, the
|
|
||||||
// firewall rules will not apply.
|
|
||||||
if len(svcInfo.LoadBalancerSourceRanges()) > 0 {
|
|
||||||
args = append(args[:0],
|
|
||||||
"-A", string(nextChain),
|
|
||||||
"-m", "comment", "--comment", fmt.Sprintf(`"%s loadbalancer IP"`, svcNameString),
|
|
||||||
)
|
|
||||||
|
|
||||||
// firewall filter based on each source range
|
|
||||||
allowFromNode := false
|
|
||||||
for _, src := range svcInfo.LoadBalancerSourceRanges() {
|
|
||||||
proxier.natRules.Write(args, "-s", src, "-j", string(externalTrafficChain))
|
|
||||||
_, cidr, err := netutils.ParseCIDRSloppy(src)
|
|
||||||
if err != nil {
|
|
||||||
klog.ErrorS(err, "Error parsing CIDR in LoadBalancerSourceRanges, dropping it", "cidr", cidr)
|
|
||||||
} else if cidr.Contains(proxier.nodeIP) {
|
|
||||||
allowFromNode = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// For VIP-like LBs, the VIP is often added as a local
|
|
||||||
// address (via an IP route rule). In that case, a request
|
|
||||||
// from a node to the VIP will not hit the loadbalancer but
|
|
||||||
// will loop back with the source IP set to the VIP. We
|
|
||||||
// need the following rule to allow requests from this node.
|
|
||||||
if allowFromNode {
|
|
||||||
proxier.natRules.Write(
|
|
||||||
args,
|
|
||||||
"-s", lbip,
|
|
||||||
"-j", string(externalTrafficChain))
|
|
||||||
}
|
|
||||||
// If the packet was able to reach the end of firewall chain,
|
|
||||||
// then it did not get DNATed. It means the packet cannot go
|
|
||||||
// thru the firewall, then mark it for DROP.
|
|
||||||
proxier.natRules.Write(args, "-j", string(KubeMarkDropChain))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// No endpoints.
|
// No endpoints.
|
||||||
|
@ -420,6 +420,7 @@ func NewFakeProxier(ipt utiliptables.Interface) *Proxier {
|
|||||||
filterRules: utilproxy.LineBuffer{},
|
filterRules: utilproxy.LineBuffer{},
|
||||||
natChains: utilproxy.LineBuffer{},
|
natChains: utilproxy.LineBuffer{},
|
||||||
natRules: utilproxy.LineBuffer{},
|
natRules: utilproxy.LineBuffer{},
|
||||||
|
nodeIP: netutils.ParseIPSloppy(testNodeIP),
|
||||||
nodePortAddresses: make([]string, 0),
|
nodePortAddresses: make([]string, 0),
|
||||||
networkInterfacer: networkInterfacer,
|
networkInterfacer: networkInterfacer,
|
||||||
}
|
}
|
||||||
@ -2369,7 +2370,8 @@ func TestLoadBalancer(t *testing.T) {
|
|||||||
svcIP := "172.30.0.41"
|
svcIP := "172.30.0.41"
|
||||||
svcPort := 80
|
svcPort := 80
|
||||||
svcNodePort := 3001
|
svcNodePort := 3001
|
||||||
svcLBIP := "1.2.3.4"
|
svcLBIP1 := "1.2.3.4"
|
||||||
|
svcLBIP2 := "5.6.7.8"
|
||||||
svcPortName := proxy.ServicePortName{
|
svcPortName := proxy.ServicePortName{
|
||||||
NamespacedName: makeNSN("ns1", "svc1"),
|
NamespacedName: makeNSN("ns1", "svc1"),
|
||||||
Port: "p80",
|
Port: "p80",
|
||||||
@ -2386,12 +2388,16 @@ func TestLoadBalancer(t *testing.T) {
|
|||||||
Protocol: v1.ProtocolTCP,
|
Protocol: v1.ProtocolTCP,
|
||||||
NodePort: int32(svcNodePort),
|
NodePort: int32(svcNodePort),
|
||||||
}}
|
}}
|
||||||
svc.Status.LoadBalancer.Ingress = []v1.LoadBalancerIngress{{
|
svc.Status.LoadBalancer.Ingress = []v1.LoadBalancerIngress{
|
||||||
IP: svcLBIP,
|
{IP: svcLBIP1},
|
||||||
}}
|
{IP: svcLBIP2},
|
||||||
// Also ensure that invalid LoadBalancerSourceRanges will not result
|
}
|
||||||
// in a crash.
|
svc.Spec.LoadBalancerSourceRanges = []string{
|
||||||
svc.Spec.LoadBalancerSourceRanges = []string{" 203.0.113.0/25"}
|
"192.168.0.0/24",
|
||||||
|
|
||||||
|
// Regression test that excess whitespace gets ignored
|
||||||
|
" 203.0.113.0/25",
|
||||||
|
}
|
||||||
}),
|
}),
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -2435,10 +2441,14 @@ func TestLoadBalancer(t *testing.T) {
|
|||||||
-A KUBE-NODEPORTS -m comment --comment ns1/svc1:p80 -m tcp -p tcp --dport 3001 -j KUBE-EXT-XPGD46QRK7WJZT7O
|
-A KUBE-NODEPORTS -m comment --comment ns1/svc1:p80 -m tcp -p tcp --dport 3001 -j KUBE-EXT-XPGD46QRK7WJZT7O
|
||||||
-A KUBE-SERVICES -m comment --comment "ns1/svc1:p80 cluster IP" -m tcp -p tcp -d 172.30.0.41 --dport 80 -j KUBE-SVC-XPGD46QRK7WJZT7O
|
-A KUBE-SERVICES -m comment --comment "ns1/svc1:p80 cluster IP" -m tcp -p tcp -d 172.30.0.41 --dport 80 -j KUBE-SVC-XPGD46QRK7WJZT7O
|
||||||
-A KUBE-SERVICES -m comment --comment "ns1/svc1:p80 loadbalancer IP" -m tcp -p tcp -d 1.2.3.4 --dport 80 -j KUBE-FW-XPGD46QRK7WJZT7O
|
-A KUBE-SERVICES -m comment --comment "ns1/svc1:p80 loadbalancer IP" -m tcp -p tcp -d 1.2.3.4 --dport 80 -j KUBE-FW-XPGD46QRK7WJZT7O
|
||||||
|
-A KUBE-SERVICES -m comment --comment "ns1/svc1:p80 loadbalancer IP" -m tcp -p tcp -d 5.6.7.8 --dport 80 -j KUBE-FW-XPGD46QRK7WJZT7O
|
||||||
-A KUBE-SERVICES -m comment --comment "kubernetes service nodeports; NOTE: this must be the last rule in this chain" -m addrtype --dst-type LOCAL -j KUBE-NODEPORTS
|
-A KUBE-SERVICES -m comment --comment "kubernetes service nodeports; NOTE: this must be the last rule in this chain" -m addrtype --dst-type LOCAL -j KUBE-NODEPORTS
|
||||||
-A KUBE-EXT-XPGD46QRK7WJZT7O -m comment --comment "masquerade traffic for ns1/svc1:p80 external destinations" -j KUBE-MARK-MASQ
|
-A KUBE-EXT-XPGD46QRK7WJZT7O -m comment --comment "masquerade traffic for ns1/svc1:p80 external destinations" -j KUBE-MARK-MASQ
|
||||||
-A KUBE-EXT-XPGD46QRK7WJZT7O -j KUBE-SVC-XPGD46QRK7WJZT7O
|
-A KUBE-EXT-XPGD46QRK7WJZT7O -j KUBE-SVC-XPGD46QRK7WJZT7O
|
||||||
|
-A KUBE-FW-XPGD46QRK7WJZT7O -m comment --comment "ns1/svc1:p80 loadbalancer IP" -s 192.168.0.0/24 -j KUBE-EXT-XPGD46QRK7WJZT7O
|
||||||
-A KUBE-FW-XPGD46QRK7WJZT7O -m comment --comment "ns1/svc1:p80 loadbalancer IP" -s 203.0.113.0/25 -j KUBE-EXT-XPGD46QRK7WJZT7O
|
-A KUBE-FW-XPGD46QRK7WJZT7O -m comment --comment "ns1/svc1:p80 loadbalancer IP" -s 203.0.113.0/25 -j KUBE-EXT-XPGD46QRK7WJZT7O
|
||||||
|
-A KUBE-FW-XPGD46QRK7WJZT7O -m comment --comment "ns1/svc1:p80 loadbalancer IP" -s 1.2.3.4 -j KUBE-EXT-XPGD46QRK7WJZT7O
|
||||||
|
-A KUBE-FW-XPGD46QRK7WJZT7O -m comment --comment "ns1/svc1:p80 loadbalancer IP" -s 5.6.7.8 -j KUBE-EXT-XPGD46QRK7WJZT7O
|
||||||
-A KUBE-FW-XPGD46QRK7WJZT7O -m comment --comment "ns1/svc1:p80 loadbalancer IP" -j KUBE-MARK-DROP
|
-A KUBE-FW-XPGD46QRK7WJZT7O -m comment --comment "ns1/svc1:p80 loadbalancer IP" -j KUBE-MARK-DROP
|
||||||
-A KUBE-MARK-MASQ -j MARK --or-mark 0x4000
|
-A KUBE-MARK-MASQ -j MARK --or-mark 0x4000
|
||||||
-A KUBE-POSTROUTING -m mark ! --mark 0x4000/0x4000 -j RETURN
|
-A KUBE-POSTROUTING -m mark ! --mark 0x4000/0x4000 -j RETURN
|
||||||
@ -2479,34 +2489,86 @@ func TestLoadBalancer(t *testing.T) {
|
|||||||
masq: true,
|
masq: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "accepted external to LB",
|
name: "accepted external to LB1",
|
||||||
sourceIP: testExternalClient,
|
sourceIP: testExternalClient,
|
||||||
destIP: svcLBIP,
|
destIP: svcLBIP1,
|
||||||
destPort: svcPort,
|
destPort: svcPort,
|
||||||
output: fmt.Sprintf("%s:%d", epIP, svcPort),
|
output: fmt.Sprintf("%s:%d", epIP, svcPort),
|
||||||
masq: true,
|
masq: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "blocked external to LB",
|
name: "accepted external to LB2",
|
||||||
|
sourceIP: testExternalClient,
|
||||||
|
destIP: svcLBIP2,
|
||||||
|
destPort: svcPort,
|
||||||
|
output: fmt.Sprintf("%s:%d", epIP, svcPort),
|
||||||
|
masq: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "blocked external to LB1",
|
||||||
sourceIP: testExternalClientBlocked,
|
sourceIP: testExternalClientBlocked,
|
||||||
destIP: svcLBIP,
|
destIP: svcLBIP1,
|
||||||
destPort: svcPort,
|
destPort: svcPort,
|
||||||
output: "DROP",
|
output: "DROP",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "pod to LB (blocked by LoadBalancerSourceRanges)",
|
name: "blocked external to LB2",
|
||||||
|
sourceIP: testExternalClientBlocked,
|
||||||
|
destIP: svcLBIP2,
|
||||||
|
destPort: svcPort,
|
||||||
|
output: "DROP",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "pod to LB1 (blocked by LoadBalancerSourceRanges)",
|
||||||
sourceIP: "10.0.0.2",
|
sourceIP: "10.0.0.2",
|
||||||
destIP: svcLBIP,
|
destIP: svcLBIP1,
|
||||||
destPort: svcPort,
|
destPort: svcPort,
|
||||||
output: "DROP",
|
output: "DROP",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "node to LB (blocked by LoadBalancerSourceRanges)",
|
name: "pod to LB2 (blocked by LoadBalancerSourceRanges)",
|
||||||
sourceIP: testNodeIP,
|
sourceIP: "10.0.0.2",
|
||||||
destIP: svcLBIP,
|
destIP: svcLBIP2,
|
||||||
destPort: svcPort,
|
destPort: svcPort,
|
||||||
output: "DROP",
|
output: "DROP",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "node to LB1 (allowed by LoadBalancerSourceRanges)",
|
||||||
|
sourceIP: testNodeIP,
|
||||||
|
destIP: svcLBIP1,
|
||||||
|
destPort: svcPort,
|
||||||
|
output: fmt.Sprintf("%s:%d", epIP, svcPort),
|
||||||
|
masq: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "node to LB2 (allowed by LoadBalancerSourceRanges)",
|
||||||
|
sourceIP: testNodeIP,
|
||||||
|
destIP: svcLBIP2,
|
||||||
|
destPort: svcPort,
|
||||||
|
output: fmt.Sprintf("%s:%d", epIP, svcPort),
|
||||||
|
masq: true,
|
||||||
|
},
|
||||||
|
|
||||||
|
// The LB rules assume that when you connect from a node to a LB IP, that
|
||||||
|
// something external to kube-proxy will cause the connection to be
|
||||||
|
// SNATted to the LB IP, so if the LoadBalancerSourceRanges include the
|
||||||
|
// node IP, then we add a rule allowing traffic from the LB IP as well...
|
||||||
|
{
|
||||||
|
name: "same node to LB1, SNATted to LB1 (implicitly allowed)",
|
||||||
|
sourceIP: svcLBIP1,
|
||||||
|
destIP: svcLBIP1,
|
||||||
|
destPort: svcPort,
|
||||||
|
output: fmt.Sprintf("%s:%d", epIP, svcPort),
|
||||||
|
masq: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "same node to LB2, SNATted to LB2 (implicitly allowed)",
|
||||||
|
sourceIP: svcLBIP2,
|
||||||
|
destIP: svcLBIP2,
|
||||||
|
destPort: svcPort,
|
||||||
|
output: fmt.Sprintf("%s:%d", epIP, svcPort),
|
||||||
|
masq: true,
|
||||||
|
},
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user