Merge pull request #47513 from gmarek/subnet

Automatic merge from submit-queue

Make big clusters work again after introduction of subnets

This PR does two things: 
  - make IP aliases automatically pick Node IP Range based on number of Nodes,
  - fix logic for starting clusters >4095 Nodes that was broken by introduction of subnets,

cc @wojtek-t @shyamjvs 

```release-note
Setting env var ENABLE_BIG_CLUSTER_SUBNETS=true will allow kube-up.sh to start clusters bigger that 4095 Nodes on GCE.
```

Ref https://github.com/kubernetes/kubernetes/issues/47344
This commit is contained in:
Kubernetes Submit Queue 2017-06-27 08:52:50 -07:00 committed by GitHub
commit ede78d9ee7
6 changed files with 83 additions and 19 deletions

View File

@ -36,6 +36,25 @@ function get-master-size {
echo "${suggested_master_size}"
}
function get-node-ip-range {
if [[ -n "${NODE_IP_RANGE:-}" ]]; then
>&2 echo "Using user provided NODE_IP_RANGE: ${NODE_IP_RANGE}"
echo "${NODE_IP_RANGE}"
return
fi
local suggested_range="10.40.0.0/22"
if [[ "${NUM_NODES}" -gt 1000 ]]; then
suggested_range="10.40.0.0/21"
fi
if [[ "${NUM_NODES}" -gt 2000 ]]; then
suggested_range="10.40.0.0/20"
fi
if [[ "${NUM_NODES}" -gt 4000 ]]; then
suggested_range="10.40.0.0/19"
fi
echo "${suggested_range}"
}
if [[ "${FEDERATION:-}" == true ]]; then
NODE_SCOPES="${NODE_SCOPES:-compute-rw,monitoring,logging-write,storage-ro,https://www.googleapis.com/auth/ndev.clouddns.readwrite}"
else

View File

@ -218,7 +218,7 @@ if [ ${ENABLE_IP_ALIASES} = true ]; then
SERVICE_CLUSTER_IP_SUBNETWORK=${KUBE_GCE_SERVICE_CLUSTER_IP_SUBNETWORK:-${INSTANCE_PREFIX}-subnet-services}
# NODE_IP_RANGE is used when ENABLE_IP_ALIASES=true. It is the primary range in
# the subnet and is the range used for node instance IPs.
NODE_IP_RANGE="${NODE_IP_RANGE:-10.40.0.0/22}"
NODE_IP_RANGE="$(get-node-ip-range)"
# Add to the provider custom variables.
PROVIDER_VARS="${PROVIDER_VARS} ENABLE_IP_ALIASES"
fi
@ -245,7 +245,7 @@ NETWORK_POLICY_PROVIDER="${NETWORK_POLICY_PROVIDER:-none}" # calico
# How should the kubelet configure hairpin mode?
HAIRPIN_MODE="${HAIRPIN_MODE:-promiscuous-bridge}" # promiscuous-bridge, hairpin-veth, none
# Optional: if set to true, kube-up will configure the cluster to run e2e tests.
E2E_STORAGE_TEST_ENVIRONMENT=${KUBE_E2E_STORAGE_TEST_ENVIRONMENT:-false}
E2E_STORAGE_TEST_ENVIRONMENT="${KUBE_E2E_STORAGE_TEST_ENVIRONMENT:-false}"
# Evict pods whenever compute resource availability on the nodes gets below a threshold.
EVICTION_HARD="${EVICTION_HARD:-memory.available<250Mi,nodefs.available<10%,nodefs.inodesFree<5%}"
@ -266,4 +266,6 @@ SOFTLOCKUP_PANIC="${SOFTLOCKUP_PANIC:-false}" # true, false
# Indicates if the values (i.e. KUBE_USER and KUBE_PASSWORD for basic
# authentication) in metadata should be treated as canonical, and therefore disk
# copies ought to be recreated/clobbered.
METADATA_CLOBBERS_CONFIG=${METADATA_CLOBBERS_CONFIG:-false}
METADATA_CLOBBERS_CONFIG="${METADATA_CLOBBERS_CONFIG:-false}"
ENABLE_BIG_CLUSTER_SUBNETS="${ENABLE_BIG_CLUSTER_SUBNETS:-false}"

View File

@ -91,7 +91,7 @@ CLUSTER_IP_RANGE="${CLUSTER_IP_RANGE:-10.100.0.0/14}"
MASTER_IP_RANGE="${MASTER_IP_RANGE:-10.246.0.0/24}"
# NODE_IP_RANGE is used when ENABLE_IP_ALIASES=true. It is the primary range in
# the subnet and is the range used for node instance IPs.
NODE_IP_RANGE="${NODE_IP_RANGE:-10.40.0.0/22}"
NODE_IP_RANGE="$(get-node-ip-range)"
RUNTIME_CONFIG="${KUBE_RUNTIME_CONFIG:-}"
@ -315,3 +315,5 @@ ENABLE_APISERVER_ADVANCED_AUDIT="${ENABLE_APISERVER_ADVANCED_AUDIT:-true}" # tru
if [[ "${ENABLE_APISERVER_ADVANCED_AUDIT}" == "true" ]]; then
FEATURE_GATES="${FEATURE_GATES},AdvancedAuditing=true"
fi
ENABLE_BIG_CLUSTER_SUBNETS="${ENABLE_BIG_CLUSTER_SUBNETS:-false}"

View File

@ -74,11 +74,11 @@ set-node-image
# Verfiy cluster autoscaler configuration.
if [[ "${ENABLE_CLUSTER_AUTOSCALER}" == "true" ]]; then
if [ -z $AUTOSCALER_MIN_NODES ]; then
if [[ -z $AUTOSCALER_MIN_NODES ]]; then
echo "AUTOSCALER_MIN_NODES not set."
exit 1
fi
if [ -z $AUTOSCALER_MAX_NODES ]; then
if [[ -z $AUTOSCALER_MAX_NODES ]]; then
echo "AUTOSCALER_MAX_NODES not set."
exit 1
fi
@ -88,6 +88,8 @@ NODE_INSTANCE_PREFIX="${INSTANCE_PREFIX}-minion"
NODE_TAGS="${NODE_TAG}"
ALLOCATE_NODE_CIDRS=true
PREEXISTING_NETWORK=false
PREEXISTING_NETWORK_MODE=""
KUBE_PROMPT_FOR_UPDATE=${KUBE_PROMPT_FOR_UPDATE:-"n"}
# How long (in seconds) to wait for cluster initialization.
@ -432,7 +434,7 @@ function create-static-ip() {
while true; do
now="$(date +%s)"
# Timeout set to 15 minutes
if [ $((now - start)) -gt 900 ]; then
if [[ $((now - start)) -gt 900 ]]; then
echo "Timeout while waiting for master IP visibility"
exit 2
fi
@ -508,7 +510,11 @@ function make-gcloud-network-argument() {
ret="${ret},aliases=pods-default:${alias_size}"
ret="${ret} --no-can-ip-forward"
else
ret="--network ${network}"
if [[ ${PREEXISTING_NETWORK} = "true" && "${PREEXISTING_NETWORK_MODE}" != "custom" ]]; then
ret="--network ${network}"
else
ret="--subnet=${network}"
fi
ret="${ret} --can-ip-forward"
if [[ -n ${address:-} ]]; then
ret="${ret} --address ${address}"
@ -564,7 +570,7 @@ function create-node-template() {
fi
local local_ssds=""
if [ ! -z ${NODE_LOCAL_SSDS+x} ]; then
if [[ ! -z ${NODE_LOCAL_SSDS+x} ]]; then
for i in $(seq ${NODE_LOCAL_SSDS}); do
local_ssds="$local_ssds--local-ssd=interface=SCSI "
done
@ -720,7 +726,7 @@ function kube-up() {
function check-existing() {
local running_in_terminal=false
# May be false if tty is not allocated (for example with ssh -T).
if [ -t 1 ]; then
if [[ -t 1 ]]; then
running_in_terminal=true
fi
@ -746,6 +752,10 @@ function create-network() {
# The network needs to be created synchronously or we have a race. The
# firewalls can be added concurrent with instance creation.
gcloud compute networks create --project "${PROJECT}" "${NETWORK}" --mode=auto
else
PREEXISTING_NETWORK=true
PREEXISTING_NETWORK_MODE="$(gcloud compute networks list ${NETWORK} --format='value(x_gcloud_mode)' || true)"
echo "Found existing network ${NETWORK} in ${PREEXISTING_NETWORK_MODE} mode."
fi
if ! gcloud compute firewall-rules --project "${PROJECT}" describe "${CLUSTER_NAME}-default-internal-master" &>/dev/null; then
@ -775,10 +785,31 @@ function create-network() {
fi
}
function expand-default-subnetwork() {
gcloud compute networks switch-mode "${NETWORK}" \
--mode custom \
--project "${PROJECT}" \
--quiet || true
gcloud compute networks subnets expand-ip-range "${NETWORK}" \
--region="${REGION}" \
--project "${PROJECT}" \
--prefix-length=19 \
--quiet
}
function create-subnetworks() {
case ${ENABLE_IP_ALIASES} in
true) ;;
false) return;;
true) echo "IP aliases are enabled. Creating subnetworks.";;
false)
echo "IP aliases are disabled."
if [[ "${ENABLE_BIG_CLUSTER_SUBNETS}" = "true" ]]; then
if [[ "${PREEXISTING_NETWORK}" != "true" ]]; then
expand-default-subnetwork
else
echo "${color_yellow}Using pre-existing network ${NETWORK}, subnets won't be expanded to /19!${color_norm}"
fi
fi
return;;
*) echo "${color_red}Invalid argument to ENABLE_IP_ALIASES${color_norm}"
exit 1;;
esac
@ -796,7 +827,7 @@ function create-subnetworks() {
exit 1
fi
if [ -z ${NODE_IP_RANGE:-} ]; then
if [[ -z ${NODE_IP_RANGE:-} ]]; then
echo "${color_red}NODE_IP_RANGE must be specified{color_norm}"
exit 1
fi
@ -867,6 +898,17 @@ function delete-network() {
function delete-subnetworks() {
if [[ ${ENABLE_IP_ALIASES:-} != "true" ]]; then
if [[ "${ENABLE_BIG_CLUSTER_SUBNETS}" = "true" ]]; then
# If running in custom mode network we need to delete subnets
mode="$(gcloud compute networks list ${NETWORK} --format='value(x_gcloud_mode)' || true)"
if [[ "${mode}" == "custom" ]]; then
echo "Deleting default subnets..."
# This value should be kept in sync with number of regions.
local parallelism=9
gcloud compute networks subnets list --network="${NETWORK}" --format='value(region.basename())' | \
xargs -i -P ${parallelism} gcloud --quiet compute networks subnets delete "${NETWORK}" --region="{}" || true
fi
fi
return
fi
@ -1200,7 +1242,7 @@ function create-nodes-template() {
# TODO(zmerlynn): Refactor setting scope flags.
local scope_flags=
if [ -n "${NODE_SCOPES}" ]; then
if [[ -n "${NODE_SCOPES}" ]]; then
scope_flags="--scopes ${NODE_SCOPES}"
else
scope_flags="--no-scopes"
@ -1612,9 +1654,8 @@ function kube-down() {
"${NETWORK}-default-ssh" \
"${NETWORK}-default-internal" # Pre-1.5 clusters
delete-subnetworks
if [[ "${KUBE_DELETE_NETWORK}" == "true" ]]; then
delete-subnetworks || true
delete-network || true # might fail if there are leaked firewall rules
fi
@ -1836,7 +1877,7 @@ function prepare-push() {
# TODO(zmerlynn): Refactor setting scope flags.
local scope_flags=
if [ -n "${NODE_SCOPES}" ]; then
if [[ -n "${NODE_SCOPES}" ]]; then
scope_flags="--scopes ${NODE_SCOPES}"
else
scope_flags="--no-scopes"

View File

@ -39,7 +39,7 @@ NODE_OS_DISTRIBUTION=${KUBE_NODE_OS_DISTRIBUTION:-debian}
MASTER_IMAGE=${KUBE_GCE_MASTER_IMAGE:-cos-stable-59-9460-64-0}
MASTER_IMAGE_PROJECT=${KUBE_GCE_MASTER_PROJECT:-cos-cloud}
NETWORK=${KUBE_GCE_NETWORK:-default}
NETWORK=${KUBE_GCE_NETWORK:-e2e}
INSTANCE_PREFIX="${INSTANCE_PREFIX:-"default"}"
MASTER_NAME="${INSTANCE_PREFIX}-kubemark-master"
AGGREGATOR_MASTER_NAME="${INSTANCE_PREFIX}-kubemark-aggregator"

View File

@ -80,7 +80,7 @@ function create-master-instance-with-resources {
--image-project="${MASTER_IMAGE_PROJECT}" \
--image "${MASTER_IMAGE}" \
--tags "${MASTER_TAG}" \
--network "${NETWORK}" \
--subnet "${NETWORK}" \
--scopes "storage-ro,compute-rw,logging-write" \
--boot-disk-size "${MASTER_ROOT_DISK_SIZE}" \
--disk "name=${MASTER_NAME}-pd,device-name=master-pd,mode=rw,boot=no,auto-delete=no"