mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-22 19:31:44 +00:00
Add parallelism to GCE cluster upgrade
This commit is contained in:
parent
a92123c530
commit
5b5d3b9410
@ -39,6 +39,7 @@ function usage() {
|
||||
echo " -M: Upgrade master only"
|
||||
echo " -N: Upgrade nodes only"
|
||||
echo " -P: Node upgrade prerequisites only (create a new instance template)"
|
||||
echo " -c: Upgrade NODE_UPGRADE_PARALLELISM nodes in parallel (default=1) within a single instance group. The MIGs themselves are dealt serially."
|
||||
echo " -o: Use os distro sepcified in KUBE_NODE_OS_DISTRIBUTION for new nodes. Options include 'debian' or 'gci'"
|
||||
echo " -l: Use local(dev) binaries. This is only supported for master upgrades."
|
||||
echo ""
|
||||
@ -254,7 +255,7 @@ function setup-base-image() {
|
||||
source "${KUBE_ROOT}/cluster/gce/${NODE_OS_DISTRIBUTION}/node-helper.sh"
|
||||
# Reset the node image based on current os distro
|
||||
set-node-image
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
# prepare-node-upgrade creates a new instance template suitable for upgrading
|
||||
@ -327,43 +328,12 @@ function upgrade-node-env() {
|
||||
fi
|
||||
}
|
||||
|
||||
# Prereqs:
|
||||
# - prepare-node-upgrade should have been called successfully
|
||||
function do-node-upgrade() {
|
||||
echo "== Upgrading nodes to ${KUBE_VERSION}. ==" >&2
|
||||
# Do the actual upgrade.
|
||||
# NOTE(zmerlynn): If you are changing this gcloud command, update
|
||||
# test/e2e/cluster_upgrade.go to match this EXACTLY.
|
||||
local template_name=$(get-template-name-from-version ${SANITIZED_VERSION})
|
||||
local old_templates=()
|
||||
local updates=()
|
||||
for group in ${INSTANCE_GROUPS[@]}; do
|
||||
old_templates+=($(gcloud compute instance-groups managed list \
|
||||
--project="${PROJECT}" \
|
||||
--zones="${ZONE}" \
|
||||
--regexp="${group}" \
|
||||
--format='value(instanceTemplate)' || true))
|
||||
set_instance_template_out=$(gcloud compute instance-groups managed set-instance-template "${group}" \
|
||||
--template="${template_name}" \
|
||||
--project="${PROJECT}" \
|
||||
--zone="${ZONE}" 2>&1) && set_instance_template_rc=$? || set_instance_template_rc=$?
|
||||
if [[ "${set_instance_template_rc}" != 0 ]]; then
|
||||
echo "== FAILED to set-instance-template for ${group} to ${template_name} =="
|
||||
echo "${set_instance_template_out}"
|
||||
return ${set_instance_template_rc}
|
||||
fi
|
||||
instances=()
|
||||
instances+=($(gcloud compute instance-groups managed list-instances "${group}" \
|
||||
--format='value(instance)' \
|
||||
--project="${PROJECT}" \
|
||||
--zone="${ZONE}" 2>&1)) && list_instances_rc=$? || list_instances_rc=$?
|
||||
if [[ "${list_instances_rc}" != 0 ]]; then
|
||||
echo "== FAILED to list instances in group ${group} =="
|
||||
echo "${instances}"
|
||||
return ${list_instances_rc}
|
||||
fi
|
||||
for instance in ${instances[@]}; do
|
||||
# Cache instance id for later
|
||||
# Upgrades a single node.
|
||||
# $1: The name of the node
|
||||
#
|
||||
# Note: This is called multiple times from do-node-upgrade() in parallel, so should be thread-safe.
|
||||
function do-single-node-upgrade() {
|
||||
local -r instance="$1"
|
||||
instance_id=$(gcloud compute instances describe "${instance}" \
|
||||
--format='get(id)' \
|
||||
--project="${PROJECT}" \
|
||||
@ -451,6 +421,67 @@ function do-node-upgrade() {
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
}
|
||||
|
||||
# Prereqs:
|
||||
# - prepare-node-upgrade should have been called successfully
|
||||
function do-node-upgrade() {
|
||||
echo "== Upgrading nodes to ${KUBE_VERSION} with max parallelism of ${node_upgrade_parallelism}. ==" >&2
|
||||
# Do the actual upgrade.
|
||||
# NOTE(zmerlynn): If you are changing this gcloud command, update
|
||||
# test/e2e/cluster_upgrade.go to match this EXACTLY.
|
||||
local template_name=$(get-template-name-from-version ${SANITIZED_VERSION})
|
||||
local old_templates=()
|
||||
local updates=()
|
||||
for group in ${INSTANCE_GROUPS[@]}; do
|
||||
old_templates+=($(gcloud compute instance-groups managed list \
|
||||
--project="${PROJECT}" \
|
||||
--zones="${ZONE}" \
|
||||
--regexp="${group}" \
|
||||
--format='value(instanceTemplate)' || true))
|
||||
set_instance_template_out=$(gcloud compute instance-groups managed set-instance-template "${group}" \
|
||||
--template="${template_name}" \
|
||||
--project="${PROJECT}" \
|
||||
--zone="${ZONE}" 2>&1) && set_instance_template_rc=$? || set_instance_template_rc=$?
|
||||
if [[ "${set_instance_template_rc}" != 0 ]]; then
|
||||
echo "== FAILED to set-instance-template for ${group} to ${template_name} =="
|
||||
echo "${set_instance_template_out}"
|
||||
return ${set_instance_template_rc}
|
||||
fi
|
||||
instances=()
|
||||
instances+=($(gcloud compute instance-groups managed list-instances "${group}" \
|
||||
--format='value(instance)' \
|
||||
--project="${PROJECT}" \
|
||||
--zone="${ZONE}" 2>&1)) && list_instances_rc=$? || list_instances_rc=$?
|
||||
if [[ "${list_instances_rc}" != 0 ]]; then
|
||||
echo "== FAILED to list instances in group ${group} =="
|
||||
echo "${instances}"
|
||||
return ${list_instances_rc}
|
||||
fi
|
||||
|
||||
process_count_left=${node_upgrade_parallelism}
|
||||
pids=()
|
||||
ret_code_sum=0 # Should stay 0 in the loop iff all parallel node upgrades succeed.
|
||||
for instance in ${instances[@]}; do
|
||||
do-single-node-upgrade "${instance}" & pids+=("$!")
|
||||
|
||||
# We don't want to run more than ${node_upgrade_parallelism} upgrades at a time,
|
||||
# so wait once we hit that many nodes. This isn't ideal, since one might take much
|
||||
# longer than the others, but it should help.
|
||||
process_count_left=$((process_count_left - 1))
|
||||
if [[ process_count_left -eq 0 || "${instance}" == "${instances[-1]}" ]]; then
|
||||
# Wait for each of the parallel node upgrades to finish.
|
||||
for pid in "${pids[@]}"; do
|
||||
wait $pid
|
||||
ret_code_sum=$(( ret_code_sum + $? ))
|
||||
done
|
||||
# Return even if at least one of the node upgrades failed.
|
||||
if [[ ${ret_code_sum} != 0 ]]; then
|
||||
echo "== Some of the ${node_upgrade_parallelism} parallel node upgrades failed. =="
|
||||
return ${ret_code_sum}
|
||||
fi
|
||||
process_count_left=${node_upgrade_parallelism}
|
||||
fi
|
||||
done
|
||||
done
|
||||
|
||||
@ -471,8 +502,9 @@ node_upgrade=true
|
||||
node_prereqs=false
|
||||
local_binaries=false
|
||||
env_os_distro=false
|
||||
node_upgrade_parallelism=1
|
||||
|
||||
while getopts ":MNPlho" opt; do
|
||||
while getopts ":MNPlcho" opt; do
|
||||
case ${opt} in
|
||||
M)
|
||||
node_upgrade=false
|
||||
@ -486,6 +518,9 @@ while getopts ":MNPlho" opt; do
|
||||
l)
|
||||
local_binaries=true
|
||||
;;
|
||||
c)
|
||||
node_upgrade_parallelism=${NODE_UPGRADE_PARALLELISM:-1}
|
||||
;;
|
||||
o)
|
||||
env_os_distro=true
|
||||
;;
|
||||
|
Loading…
Reference in New Issue
Block a user