From 5b5d3b9410ff4eaf10a36f792a099c227ca658e2 Mon Sep 17 00:00:00 2001 From: Shyam Jeedigunta Date: Mon, 3 Jul 2017 14:33:28 +0200 Subject: [PATCH] Add parallelism to GCE cluster upgrade --- cluster/gce/upgrade.sh | 213 ++++++++++++++++++++++++----------------- 1 file changed, 124 insertions(+), 89 deletions(-) diff --git a/cluster/gce/upgrade.sh b/cluster/gce/upgrade.sh index ad84f1ff090..f6a1299564c 100755 --- a/cluster/gce/upgrade.sh +++ b/cluster/gce/upgrade.sh @@ -39,6 +39,7 @@ function usage() { echo " -M: Upgrade master only" echo " -N: Upgrade nodes only" echo " -P: Node upgrade prerequisites only (create a new instance template)" + echo " -c: Upgrade NODE_UPGRADE_PARALLELISM nodes in parallel (default=1) within a single instance group. The MIGs themselves are dealt serially." echo " -o: Use os distro sepcified in KUBE_NODE_OS_DISTRIBUTION for new nodes. Options include 'debian' or 'gci'" echo " -l: Use local(dev) binaries. This is only supported for master upgrades." echo "" @@ -254,7 +255,7 @@ function setup-base-image() { source "${KUBE_ROOT}/cluster/gce/${NODE_OS_DISTRIBUTION}/node-helper.sh" # Reset the node image based on current os distro set-node-image -fi + fi } # prepare-node-upgrade creates a new instance template suitable for upgrading @@ -327,10 +328,105 @@ function upgrade-node-env() { fi } +# Upgrades a single node. +# $1: The name of the node +# +# Note: This is called multiple times from do-node-upgrade() in parallel, so should be thread-safe. +function do-single-node-upgrade() { + local -r instance="$1" + instance_id=$(gcloud compute instances describe "${instance}" \ + --format='get(id)' \ + --project="${PROJECT}" \ + --zone="${ZONE}" 2>&1) && describe_rc=$? || describe_rc=$? + if [[ "${describe_rc}" != 0 ]]; then + echo "== FAILED to describe ${instance} ==" + echo "${instance_id}" + return ${describe_rc} + fi + + # Drain node + echo "== Draining ${instance}. == " >&2 + "${KUBE_ROOT}/cluster/kubectl.sh" drain --delete-local-data --force --ignore-daemonsets "${instance}" \ + && drain_rc=$? || drain_rc=$? + if [[ "${drain_rc}" != 0 ]]; then + echo "== FAILED to drain ${instance} ==" + return ${drain_rc} + fi + + # Recreate instance + echo "== Recreating instance ${instance}. ==" >&2 + recreate=$(gcloud compute instance-groups managed recreate-instances "${group}" \ + --project="${PROJECT}" \ + --zone="${ZONE}" \ + --instances="${instance}" 2>&1) && recreate_rc=$? || recreate_rc=$? + if [[ "${recreate_rc}" != 0 ]]; then + echo "== FAILED to recreate ${instance} ==" + echo "${recreate}" + return ${recreate_rc} + fi + + # Wait for instance to be recreated + echo "== Waiting for instance ${instance} to be recreated. ==" >&2 + while true; do + new_instance_id=$(gcloud compute instances describe "${instance}" \ + --format='get(id)' \ + --project="${PROJECT}" \ + --zone="${ZONE}" 2>&1) && describe_rc=$? || describe_rc=$? + if [[ "${describe_rc}" != 0 ]]; then + echo "== FAILED to describe ${instance} ==" + echo "${new_instance_id}" + echo " (Will retry.)" + elif [[ "${new_instance_id}" == "${instance_id}" ]]; then + echo -n . + else + echo "Instance ${instance} recreated." + break + fi + sleep 1 + done + + # Wait for k8s node object to reflect new instance id + echo "== Waiting for new node to be added to k8s. ==" >&2 + while true; do + external_id=$("${KUBE_ROOT}/cluster/kubectl.sh" get node "${instance}" --output=jsonpath='{.spec.externalID}' 2>&1) && kubectl_rc=$? || kubectl_rc=$? + if [[ "${kubectl_rc}" != 0 ]]; then + echo "== FAILED to get node ${instance} ==" + echo "${external_id}" + echo " (Will retry.)" + elif [[ "${external_id}" == "${new_instance_id}" ]]; then + echo "Node ${instance} recreated." + break + elif [[ "${external_id}" == "${instance_id}" ]]; then + echo -n . + else + echo "Unexpected external_id '${external_id}' matches neither old ('${instance_id}') nor new ('${new_instance_id}')." + echo " (Will retry.)" + fi + sleep 1 + done + + # Wait for the node to not have SchedulingDisabled=True and also to have + # Ready=True. + echo "== Waiting for ${instance} to become ready. ==" >&2 + while true; do + cordoned=$("${KUBE_ROOT}/cluster/kubectl.sh" get node "${instance}" --output='jsonpath={.status.conditions[?(@.type == "SchedulingDisabled")].status}') + ready=$("${KUBE_ROOT}/cluster/kubectl.sh" get node "${instance}" --output='jsonpath={.status.conditions[?(@.type == "Ready")].status}') + if [[ "${cordoned}" == 'True' ]]; then + echo "Node ${instance} is still not ready: SchedulingDisabled=${ready}" + elif [[ "${ready}" != 'True' ]]; then + echo "Node ${instance} is still not ready: Ready=${ready}" + else + echo "Node ${instance} Ready=${ready}" + break + fi + sleep 1 + done +} + # Prereqs: # - prepare-node-upgrade should have been called successfully function do-node-upgrade() { - echo "== Upgrading nodes to ${KUBE_VERSION}. ==" >&2 + echo "== Upgrading nodes to ${KUBE_VERSION} with max parallelism of ${node_upgrade_parallelism}. ==" >&2 # Do the actual upgrade. # NOTE(zmerlynn): If you are changing this gcloud command, update # test/e2e/cluster_upgrade.go to match this EXACTLY. @@ -362,95 +458,30 @@ function do-node-upgrade() { echo "${instances}" return ${list_instances_rc} fi + + process_count_left=${node_upgrade_parallelism} + pids=() + ret_code_sum=0 # Should stay 0 in the loop iff all parallel node upgrades succeed. for instance in ${instances[@]}; do - # Cache instance id for later - instance_id=$(gcloud compute instances describe "${instance}" \ - --format='get(id)' \ - --project="${PROJECT}" \ - --zone="${ZONE}" 2>&1) && describe_rc=$? || describe_rc=$? - if [[ "${describe_rc}" != 0 ]]; then - echo "== FAILED to describe ${instance} ==" - echo "${instance_id}" - return ${describe_rc} - fi + do-single-node-upgrade "${instance}" & pids+=("$!") - # Drain node - echo "== Draining ${instance}. == " >&2 - "${KUBE_ROOT}/cluster/kubectl.sh" drain --delete-local-data --force --ignore-daemonsets "${instance}" \ - && drain_rc=$? || drain_rc=$? - if [[ "${drain_rc}" != 0 ]]; then - echo "== FAILED to drain ${instance} ==" - return ${drain_rc} - fi - - # Recreate instance - echo "== Recreating instance ${instance}. ==" >&2 - recreate=$(gcloud compute instance-groups managed recreate-instances "${group}" \ - --project="${PROJECT}" \ - --zone="${ZONE}" \ - --instances="${instance}" 2>&1) && recreate_rc=$? || recreate_rc=$? - if [[ "${recreate_rc}" != 0 ]]; then - echo "== FAILED to recreate ${instance} ==" - echo "${recreate}" - return ${recreate_rc} - fi - - # Wait for instance to be recreated - echo "== Waiting for instance ${instance} to be recreated. ==" >&2 - while true; do - new_instance_id=$(gcloud compute instances describe "${instance}" \ - --format='get(id)' \ - --project="${PROJECT}" \ - --zone="${ZONE}" 2>&1) && describe_rc=$? || describe_rc=$? - if [[ "${describe_rc}" != 0 ]]; then - echo "== FAILED to describe ${instance} ==" - echo "${new_instance_id}" - echo " (Will retry.)" - elif [[ "${new_instance_id}" == "${instance_id}" ]]; then - echo -n . - else - echo "Instance ${instance} recreated." - break + # We don't want to run more than ${node_upgrade_parallelism} upgrades at a time, + # so wait once we hit that many nodes. This isn't ideal, since one might take much + # longer than the others, but it should help. + process_count_left=$((process_count_left - 1)) + if [[ process_count_left -eq 0 || "${instance}" == "${instances[-1]}" ]]; then + # Wait for each of the parallel node upgrades to finish. + for pid in "${pids[@]}"; do + wait $pid + ret_code_sum=$(( ret_code_sum + $? )) + done + # Return even if at least one of the node upgrades failed. + if [[ ${ret_code_sum} != 0 ]]; then + echo "== Some of the ${node_upgrade_parallelism} parallel node upgrades failed. ==" + return ${ret_code_sum} fi - sleep 1 - done - - # Wait for k8s node object to reflect new instance id - echo "== Waiting for new node to be added to k8s. ==" >&2 - while true; do - external_id=$("${KUBE_ROOT}/cluster/kubectl.sh" get node "${instance}" --output=jsonpath='{.spec.externalID}' 2>&1) && kubectl_rc=$? || kubectl_rc=$? - if [[ "${kubectl_rc}" != 0 ]]; then - echo "== FAILED to get node ${instance} ==" - echo "${external_id}" - echo " (Will retry.)" - elif [[ "${external_id}" == "${new_instance_id}" ]]; then - echo "Node ${instance} recreated." - break - elif [[ "${external_id}" == "${instance_id}" ]]; then - echo -n . - else - echo "Unexpected external_id '${external_id}' matches neither old ('${instance_id}') nor new ('${new_instance_id}')." - echo " (Will retry.)" - fi - sleep 1 - done - - # Wait for the node to not have SchedulingDisabled=True and also to have - # Ready=True. - echo "== Waiting for ${instance} to become ready. ==" >&2 - while true; do - cordoned=$("${KUBE_ROOT}/cluster/kubectl.sh" get node "${instance}" --output='jsonpath={.status.conditions[?(@.type == "SchedulingDisabled")].status}') - ready=$("${KUBE_ROOT}/cluster/kubectl.sh" get node "${instance}" --output='jsonpath={.status.conditions[?(@.type == "Ready")].status}') - if [[ "${cordoned}" == 'True' ]]; then - echo "Node ${instance} is still not ready: SchedulingDisabled=${ready}" - elif [[ "${ready}" != 'True' ]]; then - echo "Node ${instance} is still not ready: Ready=${ready}" - else - echo "Node ${instance} Ready=${ready}" - break - fi - sleep 1 - done + process_count_left=${node_upgrade_parallelism} + fi done done @@ -471,8 +502,9 @@ node_upgrade=true node_prereqs=false local_binaries=false env_os_distro=false +node_upgrade_parallelism=1 -while getopts ":MNPlho" opt; do +while getopts ":MNPlcho" opt; do case ${opt} in M) node_upgrade=false @@ -486,6 +518,9 @@ while getopts ":MNPlho" opt; do l) local_binaries=true ;; + c) + node_upgrade_parallelism=${NODE_UPGRADE_PARALLELISM:-1} + ;; o) env_os_distro=true ;;