IPv6: Ensure calculated node CIDR size for pod subnets is valid

With IPv4, the node CIDR prefix is set to /24, which gives 256 pods per node
and 256 nodes, when assuming a /16 is used for the pod subnet.

For IPv6, the node CIDR prefix, is hard coded to /64. This does not work,
because currently the pod subnet prefix must be /66 or higher and must be a
larger subnet (lower value) than the node CIDR prefix.

In addition, the bit mask used to track the subnets (implying the number of
nodes), can only handle 64K entries, so the difference between pod subnet
prefix and node CIDR prefix cannot be more than 16 (bits). The node CIDR
value needs to support this restriction.

To address this, the following algorithm is proposed...

For pod subnet prefixes of /113 or smaller, the remaining bits will be used
for the node CIDR, in multiples of 8, and 9-16 bits will be reserved for the
nodes, so that there are 512-64K nodes and 256, 512, 768, ... pods/node.

For example, with a pod network of /111, there will be 17 bits available. This
would give 8 bits for pods per node and 9 bits for nodes. The node CIDR would
be /120. For a pod network of /104, there will be 24 bits available. There will
be 8 bits for nodes, and 16 bits for pods/node, using a /112 node CIDR.

If the pod subnet prefix is /112, then the node CIDR will be set to /120, and
256 nodes and 256 pods/node will be available.

If the subnet prefix is /113 to /128, we don't have enough bits and will set
the node CIDR prefix to be the same as the pod subnet prefix. This will cause
a falure later, when it tests that the pod subnet prefix is larger than the
node CIDR prefix.
This commit is contained in:
Paul Michali
2018-01-24 20:04:36 +00:00
parent b13092554c
commit fcd0ab1f6e
2 changed files with 123 additions and 9 deletions

View File

@@ -21,6 +21,7 @@ import (
"net"
"os"
"path/filepath"
"strconv"
"strings"
"k8s.io/api/core/v1"
@@ -223,6 +224,53 @@ func getAPIServerCommand(cfg *kubeadmapi.MasterConfiguration, k8sVersion *versio
return command
}
// calcNodeCidrSize determines the size of the subnets used on each node, based
// on the pod subnet provided. For IPv4, we assume that the pod subnet will
// be /16 and use /24. If the pod subnet cannot be parsed, the IPv4 value will
// be used (/24).
//
// For IPv6, the algorithm will do two three. First, the node CIDR will be set
// to a multiple of 8, using the available bits for easier readability by user.
// Second, the number of nodes will be 512 to 64K to attempt to maximize the
// number of nodes (see NOTE below). Third, pod networks of /113 and larger will
// be rejected, as the amount of bits available is too small.
//
// A special case is when the pod network size is /112, where /120 will be used,
// only allowing 256 nodes and 256 pods.
//
// If the pod network size is /113 or larger, the node CIDR will be set to the same
// size and this will be rejected later in validation.
//
// NOTE: Currently, the pod network must be /66 or larger. It is not reflected here,
// but a smaller value will fail later validation.
//
// NOTE: Currently, the design allows a maximum of 64K nodes. This algorithm splits
// the available bits to maximize the number used for nodes, but still have the node
// CIDR be a multiple of eight.
//
func calcNodeCidrSize(podSubnet string) string {
maskSize := "24"
if ip, podCidr, err := net.ParseCIDR(podSubnet); err == nil {
if ip.To4() == nil {
var nodeCidrSize int
podNetSize, totalBits := podCidr.Mask.Size()
switch {
case podNetSize == 112:
// Special case, allows 256 nodes, 256 pods/node
nodeCidrSize = 120
case podNetSize < 112:
// Use multiple of 8 for node CIDR, with 512 to 64K nodes
nodeCidrSize = totalBits - ((totalBits-podNetSize-1)/8-1)*8
default:
// Not enough bits, will fail later, when validate
nodeCidrSize = podNetSize
}
maskSize = strconv.Itoa(nodeCidrSize)
}
}
return maskSize
}
// getControllerManagerCommand builds the right controller manager command from the given config object and version
func getControllerManagerCommand(cfg *kubeadmapi.MasterConfiguration, k8sVersion *version.Version) []string {
defaultArguments := map[string]string{
@@ -259,12 +307,7 @@ func getControllerManagerCommand(cfg *kubeadmapi.MasterConfiguration, k8sVersion
// Let the controller-manager allocate Node CIDRs for the Pod network.
// Each node will get a subspace of the address CIDR provided with --pod-network-cidr.
if cfg.Networking.PodSubnet != "" {
maskSize := "24"
if ip, _, err := net.ParseCIDR(cfg.Networking.PodSubnet); err == nil {
if ip.To4() == nil {
maskSize = "64"
}
}
maskSize := calcNodeCidrSize(cfg.Networking.PodSubnet)
command = append(command, "--allocate-node-cidrs=true", "--cluster-cidr="+cfg.Networking.PodSubnet,
"--node-cidr-mask-size="+maskSize)
}

View File

@@ -629,7 +629,7 @@ func TestGetControllerManagerCommand(t *testing.T) {
},
{
cfg: &kubeadmapi.MasterConfiguration{
Networking: kubeadmapi.Networking{PodSubnet: "2001:101:115::/48"},
Networking: kubeadmapi.Networking{PodSubnet: "2001:db8::/64"},
CertificatesDir: testCertsDir,
KubernetesVersion: "v1.7.0",
},
@@ -645,8 +645,8 @@ func TestGetControllerManagerCommand(t *testing.T) {
"--use-service-account-credentials=true",
"--controllers=*,bootstrapsigner,tokencleaner",
"--allocate-node-cidrs=true",
"--cluster-cidr=2001:101:115::/48",
"--node-cidr-mask-size=64",
"--cluster-cidr=2001:db8::/64",
"--node-cidr-mask-size=80",
},
},
}
@@ -661,6 +661,77 @@ func TestGetControllerManagerCommand(t *testing.T) {
}
}
func TestCalcNodeCidrSize(t *testing.T) {
tests := []struct {
name string
podSubnet string
expectedPrefix string
}{
{
name: "Malformed pod subnet",
podSubnet: "10.10.10/160",
expectedPrefix: "24",
},
{
name: "V4: Always uses 24",
podSubnet: "10.10.10.10/16",
expectedPrefix: "24",
},
{
name: "V6: Use pod subnet size, when not enough space",
podSubnet: "2001:db8::/128",
expectedPrefix: "128",
},
{
name: "V6: Use pod subnet size, when not enough space",
podSubnet: "2001:db8::/113",
expectedPrefix: "113",
},
{
name: "V6: Special case with 256 nodes",
podSubnet: "2001:db8::/112",
expectedPrefix: "120",
},
{
name: "V6: Using /120 for node CIDR",
podSubnet: "2001:db8::/104",
expectedPrefix: "120",
},
{
name: "V6: Using /112 for node CIDR",
podSubnet: "2001:db8::/103",
expectedPrefix: "112",
},
{
name: "V6: Using /112 for node CIDR",
podSubnet: "2001:db8::/96",
expectedPrefix: "112",
},
{
name: "V6: Using /104 for node CIDR",
podSubnet: "2001:db8::/95",
expectedPrefix: "104",
},
{
name: "V6: Largest subnet currently supported",
podSubnet: "2001:db8::/66",
expectedPrefix: "80",
},
{
name: "V6: For /64 pod net, use /80",
podSubnet: "2001:db8::/64",
expectedPrefix: "80",
},
}
for _, test := range tests {
actualPrefix := calcNodeCidrSize(test.podSubnet)
if actualPrefix != test.expectedPrefix {
t.Errorf("Case [%s]\nCalc of node CIDR size for pod subnet %q failed: Expected %q, saw %q",
test.name, test.podSubnet, test.expectedPrefix, actualPrefix)
}
}
}
func TestGetControllerManagerCommandExternalCA(t *testing.T) {
tests := []struct {