Merge pull request #58769 from pmichali/node-cidr-fix

Automatic merge from submit-queue (batch tested with PRs 59394, 58769, 59423, 59363, 59245). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

IPv6: Ensure calculated node CIDR size for pod subnets is valid

With IPv4, the node CIDR prefix is set to /24, which gives 256 pods per node
and 256 nodes, when assuming a /16 is used for the pod subnet.

For IPv6, the node CIDR prefix, is hard coded to /64. This does not work,
because the pod subnet prefix must be /66 or higher and must be a larger subnet
(lower value) than the node CIDR prefix.

In addition, the bit mask used to track the subnets (implying the number of
nodes), can only handle 32K entries, so the difference between pod subnet
prefix and node CIDR prefix cannot be more than 16 (bits).

To address this, the following algorithm is proposed to provide as many pods
per node as possible, and not exceed the number of nodes.

If the pod subnet prefix is from /66 to /104, we'll set the node CIDR prefix
to 16 more, so that the bit map is not exceeded. The rest of the bits will be
for pods per node.

If the subnet prefix is from /105 to /112, we'll split the available bits
between what is used for the nodes and what is used for the pods per node.
This will give a node CIDR prefix from /116 to /120.

If the subnet prefix is from /113 to /119, we'll do like IPv4 and ensure that
there are 256 pods per node, and the remaining bits will be for the nodes,
giving a node CIDR prefix of /120 always. This supports a limited number of
nodes, in some cases.

If the subnet prefix is /120 to /128, we don't have enough bits and will set
the node CIDR prefix to be the same as the pod subnet prefix. This will cause
a falure later, when it tests that the pod subnet prefix is larger than the
node CIDR prefi.



**What this PR does / why we need it**:

**Which issue(s) this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close the issue(s) when PR gets merged)*:
Fixes #58766 

**Special notes for your reviewer**:

**Release note**:

```release-note
NONE
```
This commit is contained in:
Kubernetes Submit Queue 2018-02-06 21:34:40 -08:00 committed by GitHub
commit da932c19c8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 123 additions and 9 deletions

View File

@ -21,6 +21,7 @@ import (
"net"
"os"
"path/filepath"
"strconv"
"strings"
"k8s.io/api/core/v1"
@ -223,6 +224,53 @@ func getAPIServerCommand(cfg *kubeadmapi.MasterConfiguration, k8sVersion *versio
return command
}
// calcNodeCidrSize determines the size of the subnets used on each node, based
// on the pod subnet provided. For IPv4, we assume that the pod subnet will
// be /16 and use /24. If the pod subnet cannot be parsed, the IPv4 value will
// be used (/24).
//
// For IPv6, the algorithm will do two three. First, the node CIDR will be set
// to a multiple of 8, using the available bits for easier readability by user.
// Second, the number of nodes will be 512 to 64K to attempt to maximize the
// number of nodes (see NOTE below). Third, pod networks of /113 and larger will
// be rejected, as the amount of bits available is too small.
//
// A special case is when the pod network size is /112, where /120 will be used,
// only allowing 256 nodes and 256 pods.
//
// If the pod network size is /113 or larger, the node CIDR will be set to the same
// size and this will be rejected later in validation.
//
// NOTE: Currently, the pod network must be /66 or larger. It is not reflected here,
// but a smaller value will fail later validation.
//
// NOTE: Currently, the design allows a maximum of 64K nodes. This algorithm splits
// the available bits to maximize the number used for nodes, but still have the node
// CIDR be a multiple of eight.
//
func calcNodeCidrSize(podSubnet string) string {
maskSize := "24"
if ip, podCidr, err := net.ParseCIDR(podSubnet); err == nil {
if ip.To4() == nil {
var nodeCidrSize int
podNetSize, totalBits := podCidr.Mask.Size()
switch {
case podNetSize == 112:
// Special case, allows 256 nodes, 256 pods/node
nodeCidrSize = 120
case podNetSize < 112:
// Use multiple of 8 for node CIDR, with 512 to 64K nodes
nodeCidrSize = totalBits - ((totalBits-podNetSize-1)/8-1)*8
default:
// Not enough bits, will fail later, when validate
nodeCidrSize = podNetSize
}
maskSize = strconv.Itoa(nodeCidrSize)
}
}
return maskSize
}
// getControllerManagerCommand builds the right controller manager command from the given config object and version
func getControllerManagerCommand(cfg *kubeadmapi.MasterConfiguration, k8sVersion *version.Version) []string {
defaultArguments := map[string]string{
@ -259,12 +307,7 @@ func getControllerManagerCommand(cfg *kubeadmapi.MasterConfiguration, k8sVersion
// Let the controller-manager allocate Node CIDRs for the Pod network.
// Each node will get a subspace of the address CIDR provided with --pod-network-cidr.
if cfg.Networking.PodSubnet != "" {
maskSize := "24"
if ip, _, err := net.ParseCIDR(cfg.Networking.PodSubnet); err == nil {
if ip.To4() == nil {
maskSize = "64"
}
}
maskSize := calcNodeCidrSize(cfg.Networking.PodSubnet)
command = append(command, "--allocate-node-cidrs=true", "--cluster-cidr="+cfg.Networking.PodSubnet,
"--node-cidr-mask-size="+maskSize)
}

View File

@ -629,7 +629,7 @@ func TestGetControllerManagerCommand(t *testing.T) {
},
{
cfg: &kubeadmapi.MasterConfiguration{
Networking: kubeadmapi.Networking{PodSubnet: "2001:101:115::/48"},
Networking: kubeadmapi.Networking{PodSubnet: "2001:db8::/64"},
CertificatesDir: testCertsDir,
KubernetesVersion: "v1.7.0",
},
@ -645,8 +645,8 @@ func TestGetControllerManagerCommand(t *testing.T) {
"--use-service-account-credentials=true",
"--controllers=*,bootstrapsigner,tokencleaner",
"--allocate-node-cidrs=true",
"--cluster-cidr=2001:101:115::/48",
"--node-cidr-mask-size=64",
"--cluster-cidr=2001:db8::/64",
"--node-cidr-mask-size=80",
},
},
}
@ -661,6 +661,77 @@ func TestGetControllerManagerCommand(t *testing.T) {
}
}
func TestCalcNodeCidrSize(t *testing.T) {
tests := []struct {
name string
podSubnet string
expectedPrefix string
}{
{
name: "Malformed pod subnet",
podSubnet: "10.10.10/160",
expectedPrefix: "24",
},
{
name: "V4: Always uses 24",
podSubnet: "10.10.10.10/16",
expectedPrefix: "24",
},
{
name: "V6: Use pod subnet size, when not enough space",
podSubnet: "2001:db8::/128",
expectedPrefix: "128",
},
{
name: "V6: Use pod subnet size, when not enough space",
podSubnet: "2001:db8::/113",
expectedPrefix: "113",
},
{
name: "V6: Special case with 256 nodes",
podSubnet: "2001:db8::/112",
expectedPrefix: "120",
},
{
name: "V6: Using /120 for node CIDR",
podSubnet: "2001:db8::/104",
expectedPrefix: "120",
},
{
name: "V6: Using /112 for node CIDR",
podSubnet: "2001:db8::/103",
expectedPrefix: "112",
},
{
name: "V6: Using /112 for node CIDR",
podSubnet: "2001:db8::/96",
expectedPrefix: "112",
},
{
name: "V6: Using /104 for node CIDR",
podSubnet: "2001:db8::/95",
expectedPrefix: "104",
},
{
name: "V6: Largest subnet currently supported",
podSubnet: "2001:db8::/66",
expectedPrefix: "80",
},
{
name: "V6: For /64 pod net, use /80",
podSubnet: "2001:db8::/64",
expectedPrefix: "80",
},
}
for _, test := range tests {
actualPrefix := calcNodeCidrSize(test.podSubnet)
if actualPrefix != test.expectedPrefix {
t.Errorf("Case [%s]\nCalc of node CIDR size for pod subnet %q failed: Expected %q, saw %q",
test.name, test.podSubnet, test.expectedPrefix, actualPrefix)
}
}
}
func TestGetControllerManagerCommandExternalCA(t *testing.T) {
tests := []struct {