mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-24 12:15:52 +00:00
Merge pull request #58769 from pmichali/node-cidr-fix
Automatic merge from submit-queue (batch tested with PRs 59394, 58769, 59423, 59363, 59245). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. IPv6: Ensure calculated node CIDR size for pod subnets is valid With IPv4, the node CIDR prefix is set to /24, which gives 256 pods per node and 256 nodes, when assuming a /16 is used for the pod subnet. For IPv6, the node CIDR prefix, is hard coded to /64. This does not work, because the pod subnet prefix must be /66 or higher and must be a larger subnet (lower value) than the node CIDR prefix. In addition, the bit mask used to track the subnets (implying the number of nodes), can only handle 32K entries, so the difference between pod subnet prefix and node CIDR prefix cannot be more than 16 (bits). To address this, the following algorithm is proposed to provide as many pods per node as possible, and not exceed the number of nodes. If the pod subnet prefix is from /66 to /104, we'll set the node CIDR prefix to 16 more, so that the bit map is not exceeded. The rest of the bits will be for pods per node. If the subnet prefix is from /105 to /112, we'll split the available bits between what is used for the nodes and what is used for the pods per node. This will give a node CIDR prefix from /116 to /120. If the subnet prefix is from /113 to /119, we'll do like IPv4 and ensure that there are 256 pods per node, and the remaining bits will be for the nodes, giving a node CIDR prefix of /120 always. This supports a limited number of nodes, in some cases. If the subnet prefix is /120 to /128, we don't have enough bits and will set the node CIDR prefix to be the same as the pod subnet prefix. This will cause a falure later, when it tests that the pod subnet prefix is larger than the node CIDR prefi. **What this PR does / why we need it**: **Which issue(s) this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close the issue(s) when PR gets merged)*: Fixes #58766 **Special notes for your reviewer**: **Release note**: ```release-note NONE ```
This commit is contained in:
commit
da932c19c8
@ -21,6 +21,7 @@ import (
|
||||
"net"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
@ -223,6 +224,53 @@ func getAPIServerCommand(cfg *kubeadmapi.MasterConfiguration, k8sVersion *versio
|
||||
return command
|
||||
}
|
||||
|
||||
// calcNodeCidrSize determines the size of the subnets used on each node, based
|
||||
// on the pod subnet provided. For IPv4, we assume that the pod subnet will
|
||||
// be /16 and use /24. If the pod subnet cannot be parsed, the IPv4 value will
|
||||
// be used (/24).
|
||||
//
|
||||
// For IPv6, the algorithm will do two three. First, the node CIDR will be set
|
||||
// to a multiple of 8, using the available bits for easier readability by user.
|
||||
// Second, the number of nodes will be 512 to 64K to attempt to maximize the
|
||||
// number of nodes (see NOTE below). Third, pod networks of /113 and larger will
|
||||
// be rejected, as the amount of bits available is too small.
|
||||
//
|
||||
// A special case is when the pod network size is /112, where /120 will be used,
|
||||
// only allowing 256 nodes and 256 pods.
|
||||
//
|
||||
// If the pod network size is /113 or larger, the node CIDR will be set to the same
|
||||
// size and this will be rejected later in validation.
|
||||
//
|
||||
// NOTE: Currently, the pod network must be /66 or larger. It is not reflected here,
|
||||
// but a smaller value will fail later validation.
|
||||
//
|
||||
// NOTE: Currently, the design allows a maximum of 64K nodes. This algorithm splits
|
||||
// the available bits to maximize the number used for nodes, but still have the node
|
||||
// CIDR be a multiple of eight.
|
||||
//
|
||||
func calcNodeCidrSize(podSubnet string) string {
|
||||
maskSize := "24"
|
||||
if ip, podCidr, err := net.ParseCIDR(podSubnet); err == nil {
|
||||
if ip.To4() == nil {
|
||||
var nodeCidrSize int
|
||||
podNetSize, totalBits := podCidr.Mask.Size()
|
||||
switch {
|
||||
case podNetSize == 112:
|
||||
// Special case, allows 256 nodes, 256 pods/node
|
||||
nodeCidrSize = 120
|
||||
case podNetSize < 112:
|
||||
// Use multiple of 8 for node CIDR, with 512 to 64K nodes
|
||||
nodeCidrSize = totalBits - ((totalBits-podNetSize-1)/8-1)*8
|
||||
default:
|
||||
// Not enough bits, will fail later, when validate
|
||||
nodeCidrSize = podNetSize
|
||||
}
|
||||
maskSize = strconv.Itoa(nodeCidrSize)
|
||||
}
|
||||
}
|
||||
return maskSize
|
||||
}
|
||||
|
||||
// getControllerManagerCommand builds the right controller manager command from the given config object and version
|
||||
func getControllerManagerCommand(cfg *kubeadmapi.MasterConfiguration, k8sVersion *version.Version) []string {
|
||||
defaultArguments := map[string]string{
|
||||
@ -259,12 +307,7 @@ func getControllerManagerCommand(cfg *kubeadmapi.MasterConfiguration, k8sVersion
|
||||
// Let the controller-manager allocate Node CIDRs for the Pod network.
|
||||
// Each node will get a subspace of the address CIDR provided with --pod-network-cidr.
|
||||
if cfg.Networking.PodSubnet != "" {
|
||||
maskSize := "24"
|
||||
if ip, _, err := net.ParseCIDR(cfg.Networking.PodSubnet); err == nil {
|
||||
if ip.To4() == nil {
|
||||
maskSize = "64"
|
||||
}
|
||||
}
|
||||
maskSize := calcNodeCidrSize(cfg.Networking.PodSubnet)
|
||||
command = append(command, "--allocate-node-cidrs=true", "--cluster-cidr="+cfg.Networking.PodSubnet,
|
||||
"--node-cidr-mask-size="+maskSize)
|
||||
}
|
||||
|
@ -629,7 +629,7 @@ func TestGetControllerManagerCommand(t *testing.T) {
|
||||
},
|
||||
{
|
||||
cfg: &kubeadmapi.MasterConfiguration{
|
||||
Networking: kubeadmapi.Networking{PodSubnet: "2001:101:115::/48"},
|
||||
Networking: kubeadmapi.Networking{PodSubnet: "2001:db8::/64"},
|
||||
CertificatesDir: testCertsDir,
|
||||
KubernetesVersion: "v1.7.0",
|
||||
},
|
||||
@ -645,8 +645,8 @@ func TestGetControllerManagerCommand(t *testing.T) {
|
||||
"--use-service-account-credentials=true",
|
||||
"--controllers=*,bootstrapsigner,tokencleaner",
|
||||
"--allocate-node-cidrs=true",
|
||||
"--cluster-cidr=2001:101:115::/48",
|
||||
"--node-cidr-mask-size=64",
|
||||
"--cluster-cidr=2001:db8::/64",
|
||||
"--node-cidr-mask-size=80",
|
||||
},
|
||||
},
|
||||
}
|
||||
@ -661,6 +661,77 @@ func TestGetControllerManagerCommand(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestCalcNodeCidrSize(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
podSubnet string
|
||||
expectedPrefix string
|
||||
}{
|
||||
{
|
||||
name: "Malformed pod subnet",
|
||||
podSubnet: "10.10.10/160",
|
||||
expectedPrefix: "24",
|
||||
},
|
||||
{
|
||||
name: "V4: Always uses 24",
|
||||
podSubnet: "10.10.10.10/16",
|
||||
expectedPrefix: "24",
|
||||
},
|
||||
{
|
||||
name: "V6: Use pod subnet size, when not enough space",
|
||||
podSubnet: "2001:db8::/128",
|
||||
expectedPrefix: "128",
|
||||
},
|
||||
{
|
||||
name: "V6: Use pod subnet size, when not enough space",
|
||||
podSubnet: "2001:db8::/113",
|
||||
expectedPrefix: "113",
|
||||
},
|
||||
{
|
||||
name: "V6: Special case with 256 nodes",
|
||||
podSubnet: "2001:db8::/112",
|
||||
expectedPrefix: "120",
|
||||
},
|
||||
{
|
||||
name: "V6: Using /120 for node CIDR",
|
||||
podSubnet: "2001:db8::/104",
|
||||
expectedPrefix: "120",
|
||||
},
|
||||
{
|
||||
name: "V6: Using /112 for node CIDR",
|
||||
podSubnet: "2001:db8::/103",
|
||||
expectedPrefix: "112",
|
||||
},
|
||||
{
|
||||
name: "V6: Using /112 for node CIDR",
|
||||
podSubnet: "2001:db8::/96",
|
||||
expectedPrefix: "112",
|
||||
},
|
||||
{
|
||||
name: "V6: Using /104 for node CIDR",
|
||||
podSubnet: "2001:db8::/95",
|
||||
expectedPrefix: "104",
|
||||
},
|
||||
{
|
||||
name: "V6: Largest subnet currently supported",
|
||||
podSubnet: "2001:db8::/66",
|
||||
expectedPrefix: "80",
|
||||
},
|
||||
{
|
||||
name: "V6: For /64 pod net, use /80",
|
||||
podSubnet: "2001:db8::/64",
|
||||
expectedPrefix: "80",
|
||||
},
|
||||
}
|
||||
for _, test := range tests {
|
||||
actualPrefix := calcNodeCidrSize(test.podSubnet)
|
||||
if actualPrefix != test.expectedPrefix {
|
||||
t.Errorf("Case [%s]\nCalc of node CIDR size for pod subnet %q failed: Expected %q, saw %q",
|
||||
test.name, test.podSubnet, test.expectedPrefix, actualPrefix)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
func TestGetControllerManagerCommandExternalCA(t *testing.T) {
|
||||
|
||||
tests := []struct {
|
||||
|
Loading…
Reference in New Issue
Block a user