From 05a41623b0f641dcfe8d26b9d5ee0f54e7123a52 Mon Sep 17 00:00:00 2001 From: Jerzy Szczepkowski Date: Tue, 2 Aug 2016 09:08:05 +0200 Subject: [PATCH] Support for creation/removes of master replicas. HA master: initial support for creation/removal of masters replicas by kube-up/kube-down scripts for GCE on gci. --- cluster/gce/gci/master-helper.sh | 40 ++++- cluster/gce/util.sh | 243 +++++++++++++++++++++++++++---- 2 files changed, 248 insertions(+), 35 deletions(-) diff --git a/cluster/gce/gci/master-helper.sh b/cluster/gce/gci/master-helper.sh index 61805a629d9..7c15643dd94 100755 --- a/cluster/gce/gci/master-helper.sh +++ b/cluster/gce/gci/master-helper.sh @@ -37,8 +37,32 @@ function create-master-instance { write-master-env ensure-gci-metadata-files - gcloud compute instances create "${MASTER_NAME}" \ - ${address_opt} \ + create-master-instance-internal "${MASTER_NAME}" "${address_opt}" +} + +function replicate-master-instance() { + local existing_master_zone="${1}" + local existing_master_name="${2}" + local existing_master_replicas="${3}" + + local kube_env="$(get-metadata "${existing_master_zone}" "${existing_master_name}" kube-env)" + # Substitute INITIAL_ETCD_CLUSTER to enable etcd clustering. + kube_env="$(echo "${kube_env}" | grep -v "INITIAL_ETCD_CLUSTER")" + kube_env="$(echo -e "${kube_env}\nINITIAL_ETCD_CLUSTER: '${existing_master_replicas},${REPLICA_NAME}'")" + echo "${kube_env}" > ${KUBE_TEMP}/master-kube-env.yaml + get-metadata "${existing_master_zone}" "${existing_master_name}" cluster-name > "${KUBE_TEMP}/cluster-name.txt" + get-metadata "${existing_master_zone}" "${existing_master_name}" gci-update-strategy > "${KUBE_TEMP}/gci-update.txt" + get-metadata "${existing_master_zone}" "${existing_master_name}" gci-ensure-gke-docker > "${KUBE_TEMP}/gci-ensure-gke-docker.txt" + get-metadata "${existing_master_zone}" "${existing_master_name}" gci-docker-version > "${KUBE_TEMP}/gci-docker-version.txt" + + create-master-instance-internal "${REPLICA_NAME}" +} + +function create-master-instance-internal() { + local -r master_name="${1}" + local -r address_option="${2:-}" + gcloud compute instances create "${master_name}" \ + ${address_option} \ --project "${PROJECT}" \ --zone "${ZONE}" \ --machine-type "${MASTER_SIZE}" \ @@ -50,6 +74,16 @@ function create-master-instance { --can-ip-forward \ --metadata-from-file \ "kube-env=${KUBE_TEMP}/master-kube-env.yaml,user-data=${KUBE_ROOT}/cluster/gce/gci/master.yaml,configure-sh=${KUBE_ROOT}/cluster/gce/gci/configure.sh,cluster-name=${KUBE_TEMP}/cluster-name.txt,gci-update-strategy=${KUBE_TEMP}/gci-update.txt,gci-ensure-gke-docker=${KUBE_TEMP}/gci-ensure-gke-docker.txt,gci-docker-version=${KUBE_TEMP}/gci-docker-version.txt" \ - --disk "name=${MASTER_NAME}-pd,device-name=master-pd,mode=rw,boot=no,auto-delete=no" \ + --disk "name=${master_name}-pd,device-name=master-pd,mode=rw,boot=no,auto-delete=no" \ --boot-disk-size "${MASTER_ROOT_DISK_SIZE:-10}" } + +function get-metadata() { + local zone="${1}" + local name="${2}" + local key="${3}" + gcloud compute ssh "${name}" \ + --project "${PROJECT}" \ + --zone "${zone}" \ + --command "curl \"http://metadata.google.internal/computeMetadata/v1/instance/attributes/${key}\" -H \"Metadata-Flavor: Google\"" 2>/dev/null +} diff --git a/cluster/gce/util.sh b/cluster/gce/util.sh index 8aca898e742..67659f85bcd 100755 --- a/cluster/gce/util.sh +++ b/cluster/gce/util.sh @@ -98,17 +98,17 @@ KUBE_SKIP_UPDATE=${KUBE_SKIP_UPDATE-"n"} # How long (in seconds) to wait for cluster initialization. KUBE_CLUSTER_INITIALIZATION_TIMEOUT=${KUBE_CLUSTER_INITIALIZATION_TIMEOUT:-300} -function join_csv { +function join_csv() { local IFS=','; echo "$*"; } # This function returns the first string before the comma -function split_csv { +function split_csv() { echo "$*" | cut -d',' -f1 } # Verify prereqs -function verify-prereqs { +function verify-prereqs() { local cmd for cmd in gcloud gsutil; do if ! which "${cmd}" >/dev/null; then @@ -149,7 +149,7 @@ function verify-prereqs { # # Vars set: # KUBE_TEMP -function ensure-temp-dir { +function ensure-temp-dir() { if [[ -z ${KUBE_TEMP-} ]]; then KUBE_TEMP=$(mktemp -d -t kubernetes.XXXXXX) trap 'rm -rf "${KUBE_TEMP}"' EXIT @@ -162,7 +162,7 @@ function ensure-temp-dir { # Vars set: # PROJECT # PROJECT_REPORTED -function detect-project () { +function detect-project() { if [[ -z "${PROJECT-}" ]]; then PROJECT=$(gcloud config list project --format 'value(core.project)') fi @@ -328,7 +328,7 @@ function upload-server-tars() { # Vars set: # NODE_NAMES # INSTANCE_GROUPS -function detect-node-names { +function detect-node-names() { detect-project INSTANCE_GROUPS=() INSTANCE_GROUPS+=($(gcloud compute instance-groups managed list \ @@ -354,7 +354,7 @@ function detect-node-names { # Vars set: # NODE_NAMES # KUBE_NODE_IP_ADDRESSES (array) -function detect-nodes () { +function detect-nodes() { detect-project detect-node-names KUBE_NODE_IP_ADDRESSES=() @@ -383,7 +383,7 @@ function detect-nodes () { # Vars set: # KUBE_MASTER # KUBE_MASTER_IP -function detect-master () { +function detect-master() { detect-project KUBE_MASTER=${MASTER_NAME} if [[ -z "${KUBE_MASTER_IP-}" ]]; then @@ -413,7 +413,7 @@ function get-master-env() { # Robustly try to create a static ip. # $1: The name of the ip to create # $2: The name of the region to create the ip in. -function create-static-ip { +function create-static-ip() { detect-project local attempt=0 local REGION="$2" @@ -446,7 +446,7 @@ function create-static-ip { # $1: The name of firewall rule. # $2: IP ranges. # $3: Target tags for this firewall rule. -function create-firewall-rule { +function create-firewall-rule() { detect-project local attempt=0 while true; do @@ -470,7 +470,7 @@ function create-firewall-rule { } # $1: version (required) -function get-template-name-from-version { +function get-template-name-from-version() { # trim template name to pass gce name validation echo "${NODE_INSTANCE_PREFIX}-template-${1}" | cut -c 1-63 | sed 's/[\.\+]/-/g;s/-*$//g' } @@ -479,7 +479,7 @@ function get-template-name-from-version { # $1: The name of the instance template. # $2: The scopes flag. # $3 and others: Metadata entries (must all be from a file). -function create-node-template { +function create-node-template() { detect-project local template_name="$1" @@ -536,7 +536,7 @@ function create-node-template { # Robustly try to add metadata on an instance. # $1: The name of the instance. # $2...$n: The metadata key=value pairs to add. -function add-instance-metadata { +function add-instance-metadata() { local -r instance=$1 shift 1 local -r kvs=( "$@" ) @@ -563,7 +563,7 @@ function add-instance-metadata { # Robustly try to add metadata on an instance, from a file. # $1: The name of the instance. # $2...$n: The metadata key=file pairs to add. -function add-instance-metadata-from-file { +function add-instance-metadata-from-file() { local -r instance=$1 shift 1 local -r kvs=( "$@" ) @@ -593,7 +593,7 @@ function add-instance-metadata-from-file { # Assumed vars # KUBE_ROOT # -function kube-up { +function kube-up() { ensure-temp-dir detect-project @@ -611,8 +611,17 @@ function kube-up { parse-master-env create-nodes elif [[ ${KUBE_REPLICATE_EXISTING_MASTER:-} == "true" ]]; then + # TODO(jsz): implement adding replica for other distributions. + if [[ "${MASTER_OS_DISTRIBUTION}" != "gci" ]]; then + echo "Master replication supported only for gci" + return 1 + fi create-loadbalancer - # TODO: Add logic for copying an existing master. + # If replication of master fails, we need to ensure that the replica is removed from etcd clusters. + if ! replicate-master; then + remove-replica-from-etcd 4001 || true + remove-replica-from-etcd 4002 || true + fi else check-existing create-network @@ -732,6 +741,80 @@ function create-master() { create-master-instance "${MASTER_RESERVED_IP}" & } +# Adds master replica to etcd cluster. +# +# Assumed vars: +# REPLICA_NAME +# PROJECT +# EXISTING_MASTER_NAME +# EXISTING_MASTER_ZONE +# +# $1: etcd client port +# $2: etcd internal port +# returns the result of ssh command which adds replica +function add-replica-to-etcd() { + local -r client_port="${1}" + local -r internal_port="${2}" + gcloud compute ssh "${EXISTING_MASTER_NAME}" \ + --project "${PROJECT}" \ + --zone "${EXISTING_MASTER_ZONE}" \ + --command \ + "curl localhost:${client_port}/v2/members -XPOST -H \"Content-Type: application/json\" -d '{\"peerURLs\":[\"http://${REPLICA_NAME}:${internal_port}\"]}'" + return $? +} + +# Sets EXISTING_MASTER_NAME and EXISTING_MASTER_ZONE variables. +# +# Assumed vars: +# PROJECT +# +# NOTE: Must be in sync with get-replica-name-regexp +function set-existing-master() { + local existing_master=$(gcloud compute instances list \ + --project "${PROJECT}" \ + --regexp "$(get-replica-name-regexp)" \ + --format "value(name,zone)" | head -n1) + EXISTING_MASTER_NAME="$(echo "${existing_master}" | cut -f1)" + EXISTING_MASTER_ZONE="$(echo "${existing_master}" | cut -f2)" +} + +function replicate-master() { + set-replica-name + set-existing-master + + echo "Replicating existing master ${EXISTING_MASTER_ZONE}/${EXISTING_MASTER_NAME} as ${ZONE}/${REPLICA_NAME}" + + # Before we do anything else, we should configure etcd to expect more replicas. + if ! add-replica-to-etcd 4001 2380; then + echo "Failed to add master replica to etcd cluster." + return 1 + fi + if ! add-replica-to-etcd 4002 2381; then + echo "Failed to add master replica to etcd events cluster." + return 1 + fi + + # We have to make sure the disk is created before creating the master VM, so + # run this in the foreground. + gcloud compute disks create "${REPLICA_NAME}-pd" \ + --project "${PROJECT}" \ + --zone "${ZONE}" \ + --type "${MASTER_DISK_TYPE}" \ + --size "${MASTER_DISK_SIZE}" + + # Sets MASTER_ROOT_DISK_SIZE that is used by create-master-instance + get-master-root-disk-size + + local existing_master_replicas="$(get-all-replica-names)" + replicate-master-instance "${EXISTING_MASTER_ZONE}" "${EXISTING_MASTER_NAME}" "${existing_master_replicas}" + + # Add new replica to the load balancer. + gcloud compute target-pools add-instances "${MASTER_NAME}" \ + --project "${PROJECT}" \ + --zone "${ZONE}" \ + --instances "${REPLICA_NAME}" +} + # Detaches old and ataches new external IP to a VM. # # Arguments: @@ -1022,6 +1105,26 @@ function check-cluster() { } +# Removes master replica from etcd cluster. +# +# Assumed vars: +# REPLICA_NAME +# PROJECT +# EXISTING_MASTER_NAME +# EXISTING_MASTER_ZONE +# +# $1: etcd client port +# returns the result of ssh command which removes replica +function remove-replica-from-etcd() { + local -r port="${1}" + gcloud compute ssh "${EXISTING_MASTER_NAME}" \ + --project "${PROJECT}" \ + --zone "${EXISTING_MASTER_ZONE}" \ + --command \ + "curl -s localhost:${port}/v2/members/\$(curl -s localhost:${port}/v2/members -XGET | sed 's/{\\\"id/\n/g' | grep ${REPLICA_NAME} | cut -f 3 -d \\\") -XDELETE -L 2>/dev/null" + return $? +} + # Delete a kubernetes cluster. This is called from test-teardown. # # Assumed vars: @@ -1031,7 +1134,7 @@ function check-cluster() { # This function tears down cluster resources 10 at a time to avoid issuing too many # API calls and exceeding API quota. It is important to bring down the instances before bringing # down the firewall rules and routes. -function kube-down { +function kube-down() { local -r batch=200 detect-project @@ -1069,23 +1172,39 @@ function kube-down { fi done - # First delete the master (if it exists). - if gcloud compute instances describe "${MASTER_NAME}" --zone "${ZONE}" --project "${PROJECT}" &>/dev/null; then + local -r REPLICA_NAME="$(get-replica-name)" + + set-existing-master + + # Un-register the master replica from etcd and events etcd. + remove-replica-from-etcd 4001 + remove-replica-from-etcd 4002 + + # Delete the master replica (if it exists). + if gcloud compute instances describe "${REPLICA_NAME}" --zone "${ZONE}" --project "${PROJECT}" &>/dev/null; then + # If there is a load balancer in front of apiservers we need to first update its configuration. + if gcloud compute target-pools describe "${MASTER_NAME}" --region "${REGION}" --project "${PROJECT}" &>/dev/null; then + gcloud compute target-pools remove-instances "${MASTER_NAME}" \ + --project "${PROJECT}" \ + --zone "${ZONE}" \ + --instances "${REPLICA_NAME}" + fi + # Now we can safely delete the VM. gcloud compute instances delete \ --project "${PROJECT}" \ --quiet \ --delete-disks all \ --zone "${ZONE}" \ - "${MASTER_NAME}" + "${REPLICA_NAME}" fi - # Delete the master pd (possibly leaked by kube-up if master create failed). - if gcloud compute disks describe "${MASTER_NAME}"-pd --zone "${ZONE}" --project "${PROJECT}" &>/dev/null; then + # Delete the master replica pd (possibly leaked by kube-up if master create failed). + if gcloud compute disks describe "${REPLICA_NAME}"-pd --zone "${ZONE}" --project "${PROJECT}" &>/dev/null; then gcloud compute disks delete \ --project "${PROJECT}" \ --quiet \ --zone "${ZONE}" \ - "${MASTER_NAME}"-pd + "${REPLICA_NAME}"-pd fi # Delete disk for cluster registry if enabled @@ -1102,7 +1221,7 @@ function kube-down { # Check if this are any remaining master replicas. local REMAINING_MASTER_COUNT=$(gcloud compute instances list \ --project "${PROJECT}" \ - --regexp "${MASTER_NAME}(-...)?" \ + --regexp "$(get-replica-name-regexp)" \ --format "value(zone)" | wc -l) # In the replicated scenario, if there's only a single master left, we should also delete load balancer in front of it. @@ -1206,13 +1325,73 @@ function kube-down { set -e } +# Prints name of one of the master replicas in the current zone. It will be either +# just MASTER_NAME or MASTER_NAME with a suffix for a replica (see get-replica-name-regexp). +# +# Assumed vars: +# PROJECT +# ZONE +# MASTER_NAME +# +# NOTE: Must be in sync with get-replica-name-regexp and set-replica-name. +function get-replica-name() { + echo $(gcloud compute instances list \ + --project "${PROJECT}" \ + --zone "${ZONE}" \ + --regexp "$(get-replica-name-regexp)" \ + --format "value(name)" | head -n1) +} + +# Prints comma-separated names of all of the master replicas in all zones. +# +# Assumed vars: +# PROJECT +# MASTER_NAME +# +# NOTE: Must be in sync with get-replica-name-regexp and set-replica-name. +function get-all-replica-names() { + echo $(gcloud compute instances list \ + --project "${PROJECT}" \ + --regexp "$(get-replica-name-regexp)" \ + --format "value(name)" | tr "\n" "," | sed 's/,$//') +} + +# Prints regexp for full master machine name. In a cluster with replicated master, +# VM names may either be MASTER_NAME or MASTER_NAME with a suffix for a replica. +function get-replica-name-regexp() { + echo "${MASTER_NAME}(-...)?" +} + +# Sets REPLICA_NAME to a unique name for a master replica that will match +# expected regexp (see get-replica-name-regexp). +# +# Assumed vars: +# PROJECT +# ZONE +# MASTER_NAME +# +# Sets: +# REPLICA_NAME +function set-replica-name() { + local instances=$(gcloud compute instances list \ + --project "${PROJECT}" \ + --regexp "$(get-replica-name-regexp)" \ + --format "value(name)") + + suffix="" + while echo "${instances}" | grep "${suffix}" &>/dev/null; do + suffix="$(date | md5sum | head -c3)" + done + REPLICA_NAME="${MASTER_NAME}-${suffix}" +} + # Gets the instance template for given NODE_INSTANCE_PREFIX. It echos the template name so that the function # output can be used. # Assumed vars: # NODE_INSTANCE_PREFIX # # $1: project -function get-template { +function get-template() { gcloud compute instance-templates list -r "${NODE_INSTANCE_PREFIX}-template(-(${KUBE_RELEASE_VERSION_DASHED_REGEX}|${KUBE_CI_VERSION_DASHED_REGEX}))?" \ --project="${1}" --format='value(name)' } @@ -1226,7 +1405,7 @@ function get-template { # REGION # Vars set: # KUBE_RESOURCE_FOUND -function check-resources { +function check-resources() { detect-project detect-node-names @@ -1372,7 +1551,7 @@ function prepare-push() { } # Push binaries to kubernetes master -function push-master { +function push-master() { echo "Updating master metadata ..." write-master-env prepare-startup-script @@ -1395,7 +1574,7 @@ function push-node() { } # Push binaries to kubernetes cluster -function kube-push { +function kube-push() { # Disable this until it's fixed. # See https://github.com/kubernetes/kubernetes/issues/17397 echo "./cluster/kube-push.sh is currently not supported in GCE." @@ -1438,7 +1617,7 @@ function kube-push { # # Assumed Vars: # KUBE_ROOT -function test-build-release { +function test-build-release() { # Make a release "${KUBE_ROOT}/build/release.sh" } @@ -1448,7 +1627,7 @@ function test-build-release { # # Assumed vars: # Variables from config.sh -function test-setup { +function test-setup() { # Detect the project into $PROJECT if it isn't set detect-project @@ -1503,7 +1682,7 @@ function test-setup { # Execute after running tests to perform any required clean-up. This is called # from hack/e2e.go -function test-teardown { +function test-teardown() { detect-project echo "Shutting down test cluster in background." gcloud compute firewall-rules delete \ @@ -1528,7 +1707,7 @@ function test-teardown { } # SSH to a node by name ($1) and run a command ($2). -function ssh-to-node { +function ssh-to-node() { local node="$1" local cmd="$2" # Loop until we can successfully ssh into the box