From 70bb57a3e11ce568a10962bbe4c159d70ce29920 Mon Sep 17 00:00:00 2001 From: Filip Grzadkowski Date: Wed, 20 Jul 2016 16:37:31 +0200 Subject: [PATCH 1/2] Revert "Merge pull request #29278 from kubernetes/revert-29201-ha_master" This reverts commit ecebdb570739ec0a37368a25d895e5a8e10036db, reversing changes made to 976ca09d714cf114fb7a9e681bc0b170760cbdab. --- cluster/gce/util.sh | 141 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 121 insertions(+), 20 deletions(-) diff --git a/cluster/gce/util.sh b/cluster/gce/util.sh index 936e42da03f..d6fc0284ce3 100755 --- a/cluster/gce/util.sh +++ b/cluster/gce/util.sh @@ -386,8 +386,9 @@ function detect-master () { detect-project KUBE_MASTER=${MASTER_NAME} if [[ -z "${KUBE_MASTER_IP-}" ]]; then - KUBE_MASTER_IP=$(gcloud compute instances describe --project "${PROJECT}" --zone "${ZONE}" \ - "${MASTER_NAME}" --format='value(networkInterfaces[0].accessConfigs[0].natIP)') + local REGION=${ZONE%-*} + KUBE_MASTER_IP=$(gcloud compute addresses describe "${MASTER_NAME}-ip" \ + --project "${PROJECT}" --region "${REGION}" -q --format='value(address)') fi if [[ -z "${KUBE_MASTER_IP-}" ]]; then echo "Could not detect Kubernetes master node. Make sure you've launched a cluster with 'kube-up.sh'" >&2 @@ -609,6 +610,9 @@ function kube-up { if [[ ${KUBE_USE_EXISTING_MASTER:-} == "true" ]]; then parse-master-env create-nodes + elif [[ ${KUBE_REPLICATE_EXISTING_MASTER:-} == "true" ]]; then + create-loadbalancer + # TODO: Add logic for copying an existing master. else check-existing create-network @@ -739,6 +743,74 @@ function create-master() { create-master-instance "${MASTER_RESERVED_IP}" & } +# Detaches old and ataches new external IP to a VM. +# +# Arguments: +# $1 - VM name +# $2 - VM zone +# $3 - external static IP; if empty will use an ephemeral IP address. +function attach-external-ip() { + local NAME=${1} + local ZONE=${2} + local IP_ADDR=${3:-} + local ACCESS_CONFIG_NAME=$(gcloud compute instances describe "${NAME}" \ + --project "${PROJECT}" --zone "${ZONE}" \ + --format="value(networkInterfaces[0].accessConfigs[0].name)") + gcloud compute instances delete-access-config "${NAME}" \ + --project "${PROJECT}" --zone "${ZONE}" \ + --access-config-name "${ACCESS_CONFIG_NAME}" + if [[ -z ${IP_ADDR} ]]; then + gcloud compute instances add-access-config "${NAME}" \ + --project "${PROJECT}" --zone "${ZONE}" \ + --access-config-name "${ACCESS_CONFIG_NAME}" + else + gcloud compute instances add-access-config "${NAME}" \ + --project "${PROJECT}" --zone "${ZONE}" \ + --access-config-name "${ACCESS_CONFIG_NAME}" \ + --address "${IP_ADDR}" + fi +} + +# Creates load balancer in front of apiserver if it doesn't exists already. Assumes there's only one +# existing master replica. +# +# Assumes: +# PROJECT +# MASTER_NAME +# ZONE +function create-loadbalancer() { + detect-master + local REGION=${ZONE%-*} + + # Step 0: Return early if LB is already configured. + if gcloud compute forwarding-rules describe ${MASTER_NAME} \ + --project "${PROJECT}" --region ${REGION} > /dev/null 2>&1; then + echo "Load balancer already exists" + return + fi + local EXISTING_MASTER_ZONE=$(gcloud compute instances list "${MASTER_NAME}" \ + --project "${PROJECT}" --format="value(zone)") + echo "Creating load balancer in front of an already existing master in ${EXISTING_MASTER_ZONE}" + + # Step 1: Detach master IP address and attach ephemeral address to the existing master + attach-external-ip ${MASTER_NAME} ${EXISTING_MASTER_ZONE} + + # Step 2: Create target pool. + gcloud compute target-pools create "${MASTER_NAME}" --region "${REGION}" + # TODO: We should also add master instances with suffixes + gcloud compute target-pools add-instances ${MASTER_NAME} --instances ${MASTER_NAME} --zone ${EXISTING_MASTER_ZONE} + + # Step 3: Create forwarding rule. + # TODO: This step can take up to 20 min. We need to speed this up... + gcloud compute forwarding-rules create ${MASTER_NAME} \ + --project "${PROJECT}" --region ${REGION} \ + --target-pool ${MASTER_NAME} --address=${KUBE_MASTER_IP} --ports=443 + + echo -n "Waiting for the load balancer configuration to propagate..." + until $(curl -k -m1 https://${KUBE_MASTER_IP} > /dev/null 2>&1); do echo -n .; done + echo "DONE" +} + function create-nodes-firewall() { # Create a single firewall rule for all minions. create-firewall-rule "${NODE_TAG}-all" "${CLUSTER_IP_RANGE}" "${NODE_TAG}" & @@ -1029,6 +1101,53 @@ function kube-down { fi fi + # Check if this are any remaining master replicas. + local REMAINING_MASTER_COUNT=$(gcloud compute instances list \ + --project "${PROJECT}" \ + --regexp "${MASTER_NAME}(-...)?" \ + --format "value(zone)" | wc -l) + + # In the replicated scenario, if there's only a single master left, we should also delete load balancer in front of it. + if [[ "${REMAINING_MASTER_COUNT}" == "1" ]]; then + local REGION=${ZONE%-*} + if gcloud compute forwarding-rules describe "${MASTER_NAME}" --region "${REGION}" --project "${PROJECT}" &>/dev/null; then + detect-master + local REGION=${ZONE%-*} + local EXISTING_MASTER_ZONE=$(gcloud compute instances list "${MASTER_NAME}" \ + --project "${PROJECT}" --format="value(zone)") + gcloud compute forwarding-rules delete \ + --project "${PROJECT}" \ + --region "${REGION}" \ + --quiet \ + "${MASTER_NAME}" + attach-external-ip "${MASTER_NAME}" "${EXISTING_MASTER_ZONE}" "${KUBE_MASTER_IP}" + gcloud compute target-pools delete \ + --project "${PROJECT}" \ + --region "${REGION}" \ + --quiet \ + "${MASTER_NAME}" + fi + fi + + # If there are no more remaining master replicas, we should delete all remaining network resources. + if [[ "${REMAINING_MASTER_COUNT}" == "0" ]]; then + # Delete firewall rule for the master. + if gcloud compute firewall-rules describe --project "${PROJECT}" "${MASTER_NAME}-https" &>/dev/null; then + gcloud compute firewall-rules delete \ + --project "${PROJECT}" \ + --quiet \ + "${MASTER_NAME}-https" + fi + # Delete the master's reserved IP + if gcloud compute addresses describe "${MASTER_NAME}-ip" --region "${REGION}" --project "${PROJECT}" &>/dev/null; then + gcloud compute addresses delete \ + --project "${PROJECT}" \ + --region "${REGION}" \ + --quiet \ + "${MASTER_NAME}-ip" + fi + fi + # Find out what minions are running. local -a minions minions=( $(gcloud compute instances list \ @@ -1047,14 +1166,6 @@ function kube-down { minions=( "${minions[@]:${batch}}" ) done - # Delete firewall rule for the master. - if gcloud compute firewall-rules describe --project "${PROJECT}" "${MASTER_NAME}-https" &>/dev/null; then - gcloud compute firewall-rules delete \ - --project "${PROJECT}" \ - --quiet \ - "${MASTER_NAME}-https" - fi - # Delete firewall rule for minions. if gcloud compute firewall-rules describe --project "${PROJECT}" "${NODE_TAG}-all" &>/dev/null; then gcloud compute firewall-rules delete \ @@ -1083,16 +1194,6 @@ function kube-down { routes=( "${routes[@]:${batch}}" ) done - # Delete the master's reserved IP - local REGION=${ZONE%-*} - if gcloud compute addresses describe "${MASTER_NAME}-ip" --region "${REGION}" --project "${PROJECT}" &>/dev/null; then - gcloud compute addresses delete \ - --project "${PROJECT}" \ - --region "${REGION}" \ - --quiet \ - "${MASTER_NAME}-ip" - fi - # Delete persistent disk for influx-db. if gcloud compute disks describe "${INSTANCE_PREFIX}"-influxdb-pd --zone "${ZONE}" --project "${PROJECT}" &>/dev/null; then gcloud compute disks delete \ From e9585fba9134de4796d281497ac89dc42d0467eb Mon Sep 17 00:00:00 2001 From: Filip Grzadkowski Date: Wed, 20 Jul 2016 17:25:25 +0200 Subject: [PATCH 2/2] 1. Precompute REGION variable in config 2. Add timeout for waiting for loadbalancer 3. Fix kube-down so that it doesn't delete some resources if there are still masters/nodes in other zones --- cluster/gce/config-default.sh | 1 + cluster/gce/config-test.sh | 1 + cluster/gce/util.sh | 46 ++++++++++++++++++++--------------- 3 files changed, 28 insertions(+), 20 deletions(-) diff --git a/cluster/gce/config-default.sh b/cluster/gce/config-default.sh index ea1d5862942..1d02e73ffa8 100755 --- a/cluster/gce/config-default.sh +++ b/cluster/gce/config-default.sh @@ -21,6 +21,7 @@ source "${KUBE_ROOT}/cluster/gce/config-common.sh" GCLOUD=gcloud ZONE=${KUBE_GCE_ZONE:-us-central1-b} +REGION=${ZONE%-*} RELEASE_REGION_FALLBACK=${RELEASE_REGION_FALLBACK:-false} REGIONAL_KUBE_ADDONS=${REGIONAL_KUBE_ADDONS:-true} NODE_SIZE=${NODE_SIZE:-n1-standard-2} diff --git a/cluster/gce/config-test.sh b/cluster/gce/config-test.sh index e886655a307..5d81fb7d7f3 100755 --- a/cluster/gce/config-test.sh +++ b/cluster/gce/config-test.sh @@ -21,6 +21,7 @@ source "${KUBE_ROOT}/cluster/gce/config-common.sh" GCLOUD=gcloud ZONE=${KUBE_GCE_ZONE:-us-central1-b} +REGION=${ZONE%-*} RELEASE_REGION_FALLBACK=${RELEASE_REGION_FALLBACK:-false} REGIONAL_KUBE_ADDONS=${REGIONAL_KUBE_ADDONS:-true} NODE_SIZE=${NODE_SIZE:-n1-standard-2} diff --git a/cluster/gce/util.sh b/cluster/gce/util.sh index d6fc0284ce3..ad562c84f3d 100755 --- a/cluster/gce/util.sh +++ b/cluster/gce/util.sh @@ -379,6 +379,7 @@ function detect-nodes () { # Assumed vars: # MASTER_NAME # ZONE +# REGION # Vars set: # KUBE_MASTER # KUBE_MASTER_IP @@ -386,7 +387,6 @@ function detect-master () { detect-project KUBE_MASTER=${MASTER_NAME} if [[ -z "${KUBE_MASTER_IP-}" ]]; then - local REGION=${ZONE%-*} KUBE_MASTER_IP=$(gcloud compute addresses describe "${MASTER_NAME}-ip" \ --project "${PROJECT}" --region "${REGION}" -q --format='value(address)') fi @@ -727,10 +727,7 @@ function create-master() { KUBE_PROXY_TOKEN=$(dd if=/dev/urandom bs=128 count=1 2>/dev/null | base64 | tr -d "=+/" | dd bs=32 count=1 2>/dev/null) # Reserve the master's IP so that it can later be transferred to another VM - # without disrupting the kubelets. IPs are associated with regions, not zones, - # so extract the region name, which is the same as the zone but with the final - # dash and characters trailing the dash removed. - local REGION=${ZONE%-*} + # without disrupting the kubelets. create-static-ip "${MASTER_NAME}-ip" "${REGION}" MASTER_RESERVED_IP=$(gcloud compute addresses describe "${MASTER_NAME}-ip" \ --project "${PROJECT}" --region "${REGION}" -q --format='value(address)') @@ -778,9 +775,9 @@ function attach-external-ip() { # PROJECT # MASTER_NAME # ZONE +# REGION function create-loadbalancer() { detect-master - local REGION=${ZONE%-*} # Step 0: Return early if LB is already configured. if gcloud compute forwarding-rules describe ${MASTER_NAME} \ @@ -807,7 +804,16 @@ function create-loadbalancer() { --target-pool ${MASTER_NAME} --address=${KUBE_MASTER_IP} --ports=443 echo -n "Waiting for the load balancer configuration to propagate..." - until $(curl -k -m1 https://${KUBE_MASTER_IP} > /dev/null 2>&1); do echo -n .; done + local counter=0 + until $(curl -k -m1 https://${KUBE_MASTER_IP} &> /dev/null); do + counter=$((counter+1)) + echo -n . + if [[ ${counter} -ge 1800 ]]; then + echo -e "${color_red}TIMEOUT${color_norm}" >&2 + echo -e "${color_red}Load balancer failed to initialize within ${counter} seconds.${color_norm}" >&2 + exit 2 + fi + done echo "DONE" } @@ -1109,10 +1115,8 @@ function kube-down { # In the replicated scenario, if there's only a single master left, we should also delete load balancer in front of it. if [[ "${REMAINING_MASTER_COUNT}" == "1" ]]; then - local REGION=${ZONE%-*} if gcloud compute forwarding-rules describe "${MASTER_NAME}" --region "${REGION}" --project "${PROJECT}" &>/dev/null; then detect-master - local REGION=${ZONE%-*} local EXISTING_MASTER_ZONE=$(gcloud compute instances list "${MASTER_NAME}" \ --project "${PROJECT}" --format="value(zone)") gcloud compute forwarding-rules delete \ @@ -1146,6 +1150,13 @@ function kube-down { --quiet \ "${MASTER_NAME}-ip" fi + # Delete firewall rule for minions. + if gcloud compute firewall-rules describe --project "${PROJECT}" "${NODE_TAG}-all" &>/dev/null; then + gcloud compute firewall-rules delete \ + --project "${PROJECT}" \ + --quiet \ + "${NODE_TAG}-all" + fi fi # Find out what minions are running. @@ -1166,14 +1177,6 @@ function kube-down { minions=( "${minions[@]:${batch}}" ) done - # Delete firewall rule for minions. - if gcloud compute firewall-rules describe --project "${PROJECT}" "${NODE_TAG}-all" &>/dev/null; then - gcloud compute firewall-rules delete \ - --project "${PROJECT}" \ - --quiet \ - "${NODE_TAG}-all" - fi - # Delete routes. local -a routes # Clean up all routes w/ names like "-" @@ -1203,8 +1206,11 @@ function kube-down { "${INSTANCE_PREFIX}"-influxdb-pd fi - export CONTEXT="${PROJECT}_${INSTANCE_PREFIX}" - clear-kubeconfig + # If there are no more remaining master replicas, we should update kubeconfig. + if [[ "${REMAINING_MASTER_COUNT}" == "0" ]]; then + export CONTEXT="${PROJECT}_${INSTANCE_PREFIX}" + clear-kubeconfig + fi set -e } @@ -1225,6 +1231,7 @@ function get-template { # MASTER_NAME # NODE_INSTANCE_PREFIX # ZONE +# REGION # Vars set: # KUBE_RESOURCE_FOUND function check-resources { @@ -1293,7 +1300,6 @@ function check-resources { return 1 fi - local REGION=${ZONE%-*} if gcloud compute addresses describe --project "${PROJECT}" "${MASTER_NAME}-ip" --region "${REGION}" &>/dev/null; then KUBE_RESOURCE_FOUND="Master's reserved IP" return 1