mirror of
				https://github.com/k3s-io/kubernetes.git
				synced 2025-11-04 07:49:35 +00:00 
			
		
		
		
	Merge pull request #48429 from shyamjvs/parallel-upgrade
Automatic merge from submit-queue Add parallelism to GCE cluster upgrade Fixes https://github.com/kubernetes/kubernetes/issues/48373 Should allow upgrading 500-node cluster (1.6->1.7) in < 1 hr. It currently takes ~1.5 day. Though it is the duty of the upgrader to choose the right parallelism in order to avoid disrupting too many pods. /cc @kubernetes/sig-cluster-lifecycle-pr-reviews @kubernetes/sig-scalability-misc @mikedanese @gmarek
This commit is contained in:
		@@ -39,6 +39,7 @@ function usage() {
 | 
			
		||||
  echo "  -M:  Upgrade master only"
 | 
			
		||||
  echo "  -N:  Upgrade nodes only"
 | 
			
		||||
  echo "  -P:  Node upgrade prerequisites only (create a new instance template)"
 | 
			
		||||
  echo "  -c:  Upgrade NODE_UPGRADE_PARALLELISM nodes in parallel (default=1) within a single instance group. The MIGs themselves are dealt serially."
 | 
			
		||||
  echo "  -o:  Use os distro sepcified in KUBE_NODE_OS_DISTRIBUTION for new nodes. Options include 'debian' or 'gci'"
 | 
			
		||||
  echo "  -l:  Use local(dev) binaries. This is only supported for master upgrades."
 | 
			
		||||
  echo ""
 | 
			
		||||
@@ -254,7 +255,7 @@ function setup-base-image() {
 | 
			
		||||
    source "${KUBE_ROOT}/cluster/gce/${NODE_OS_DISTRIBUTION}/node-helper.sh"
 | 
			
		||||
    # Reset the node image based on current os distro
 | 
			
		||||
    set-node-image
 | 
			
		||||
fi
 | 
			
		||||
  fi
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
# prepare-node-upgrade creates a new instance template suitable for upgrading
 | 
			
		||||
@@ -327,10 +328,105 @@ function upgrade-node-env() {
 | 
			
		||||
  fi
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
# Upgrades a single node.
 | 
			
		||||
# $1: The name of the node
 | 
			
		||||
#
 | 
			
		||||
# Note: This is called multiple times from do-node-upgrade() in parallel, so should be thread-safe.
 | 
			
		||||
function do-single-node-upgrade() {
 | 
			
		||||
  local -r instance="$1"
 | 
			
		||||
  instance_id=$(gcloud compute instances describe "${instance}" \
 | 
			
		||||
    --format='get(id)' \
 | 
			
		||||
    --project="${PROJECT}" \
 | 
			
		||||
    --zone="${ZONE}" 2>&1) && describe_rc=$? || describe_rc=$?
 | 
			
		||||
  if [[ "${describe_rc}" != 0 ]]; then
 | 
			
		||||
    echo "== FAILED to describe ${instance} =="
 | 
			
		||||
    echo "${instance_id}"
 | 
			
		||||
    return ${describe_rc}
 | 
			
		||||
  fi
 | 
			
		||||
 | 
			
		||||
  # Drain node
 | 
			
		||||
  echo "== Draining ${instance}. == " >&2
 | 
			
		||||
  "${KUBE_ROOT}/cluster/kubectl.sh" drain --delete-local-data --force --ignore-daemonsets "${instance}" \
 | 
			
		||||
    && drain_rc=$? || drain_rc=$?
 | 
			
		||||
  if [[ "${drain_rc}" != 0 ]]; then
 | 
			
		||||
    echo "== FAILED to drain ${instance} =="
 | 
			
		||||
    return ${drain_rc}
 | 
			
		||||
  fi
 | 
			
		||||
 | 
			
		||||
  # Recreate instance
 | 
			
		||||
  echo "== Recreating instance ${instance}. ==" >&2
 | 
			
		||||
  recreate=$(gcloud compute instance-groups managed recreate-instances "${group}" \
 | 
			
		||||
    --project="${PROJECT}" \
 | 
			
		||||
    --zone="${ZONE}" \
 | 
			
		||||
    --instances="${instance}" 2>&1) && recreate_rc=$? || recreate_rc=$?
 | 
			
		||||
  if [[ "${recreate_rc}" != 0 ]]; then
 | 
			
		||||
    echo "== FAILED to recreate ${instance} =="
 | 
			
		||||
    echo "${recreate}"
 | 
			
		||||
    return ${recreate_rc}
 | 
			
		||||
  fi
 | 
			
		||||
 | 
			
		||||
  # Wait for instance to be recreated
 | 
			
		||||
  echo "== Waiting for instance ${instance} to be recreated. ==" >&2
 | 
			
		||||
  while true; do
 | 
			
		||||
    new_instance_id=$(gcloud compute instances describe "${instance}" \
 | 
			
		||||
      --format='get(id)' \
 | 
			
		||||
      --project="${PROJECT}" \
 | 
			
		||||
      --zone="${ZONE}" 2>&1) && describe_rc=$? || describe_rc=$?
 | 
			
		||||
    if [[ "${describe_rc}" != 0 ]]; then
 | 
			
		||||
      echo "== FAILED to describe ${instance} =="
 | 
			
		||||
      echo "${new_instance_id}"
 | 
			
		||||
      echo "  (Will retry.)"
 | 
			
		||||
    elif [[ "${new_instance_id}" == "${instance_id}" ]]; then
 | 
			
		||||
      echo -n .
 | 
			
		||||
    else
 | 
			
		||||
      echo "Instance ${instance} recreated."
 | 
			
		||||
      break
 | 
			
		||||
    fi
 | 
			
		||||
    sleep 1
 | 
			
		||||
  done
 | 
			
		||||
 | 
			
		||||
  # Wait for k8s node object to reflect new instance id
 | 
			
		||||
  echo "== Waiting for new node to be added to k8s.  ==" >&2
 | 
			
		||||
  while true; do
 | 
			
		||||
    external_id=$("${KUBE_ROOT}/cluster/kubectl.sh" get node "${instance}" --output=jsonpath='{.spec.externalID}' 2>&1) && kubectl_rc=$? || kubectl_rc=$?
 | 
			
		||||
    if [[ "${kubectl_rc}" != 0 ]]; then
 | 
			
		||||
      echo "== FAILED to get node ${instance} =="
 | 
			
		||||
      echo "${external_id}"
 | 
			
		||||
      echo "  (Will retry.)"
 | 
			
		||||
    elif [[ "${external_id}" == "${new_instance_id}" ]]; then
 | 
			
		||||
      echo "Node ${instance} recreated."
 | 
			
		||||
      break
 | 
			
		||||
    elif [[ "${external_id}" == "${instance_id}" ]]; then
 | 
			
		||||
      echo -n .
 | 
			
		||||
    else
 | 
			
		||||
      echo "Unexpected external_id '${external_id}' matches neither old ('${instance_id}') nor new ('${new_instance_id}')."
 | 
			
		||||
      echo "  (Will retry.)"
 | 
			
		||||
    fi
 | 
			
		||||
    sleep 1
 | 
			
		||||
  done
 | 
			
		||||
 | 
			
		||||
  # Wait for the node to not have SchedulingDisabled=True and also to have
 | 
			
		||||
  # Ready=True.
 | 
			
		||||
  echo "== Waiting for ${instance} to become ready. ==" >&2
 | 
			
		||||
  while true; do
 | 
			
		||||
    cordoned=$("${KUBE_ROOT}/cluster/kubectl.sh" get node "${instance}" --output='jsonpath={.status.conditions[?(@.type == "SchedulingDisabled")].status}')
 | 
			
		||||
    ready=$("${KUBE_ROOT}/cluster/kubectl.sh" get node "${instance}" --output='jsonpath={.status.conditions[?(@.type == "Ready")].status}')
 | 
			
		||||
    if [[ "${cordoned}" == 'True' ]]; then
 | 
			
		||||
      echo "Node ${instance} is still not ready: SchedulingDisabled=${ready}"
 | 
			
		||||
    elif [[ "${ready}" != 'True' ]]; then
 | 
			
		||||
      echo "Node ${instance} is still not ready: Ready=${ready}"
 | 
			
		||||
    else
 | 
			
		||||
      echo "Node ${instance} Ready=${ready}"
 | 
			
		||||
      break
 | 
			
		||||
    fi
 | 
			
		||||
    sleep 1
 | 
			
		||||
  done
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
# Prereqs:
 | 
			
		||||
# - prepare-node-upgrade should have been called successfully
 | 
			
		||||
function do-node-upgrade() {
 | 
			
		||||
  echo "== Upgrading nodes to ${KUBE_VERSION}. ==" >&2
 | 
			
		||||
  echo "== Upgrading nodes to ${KUBE_VERSION} with max parallelism of ${node_upgrade_parallelism}. ==" >&2
 | 
			
		||||
  # Do the actual upgrade.
 | 
			
		||||
  # NOTE(zmerlynn): If you are changing this gcloud command, update
 | 
			
		||||
  #                 test/e2e/cluster_upgrade.go to match this EXACTLY.
 | 
			
		||||
@@ -362,95 +458,30 @@ function do-node-upgrade() {
 | 
			
		||||
      echo "${instances}"
 | 
			
		||||
      return ${list_instances_rc}
 | 
			
		||||
    fi
 | 
			
		||||
 | 
			
		||||
    process_count_left=${node_upgrade_parallelism}
 | 
			
		||||
    pids=()
 | 
			
		||||
    ret_code_sum=0  # Should stay 0 in the loop iff all parallel node upgrades succeed.
 | 
			
		||||
    for instance in ${instances[@]}; do
 | 
			
		||||
      # Cache instance id for later
 | 
			
		||||
      instance_id=$(gcloud compute instances describe "${instance}" \
 | 
			
		||||
        --format='get(id)' \
 | 
			
		||||
        --project="${PROJECT}" \
 | 
			
		||||
        --zone="${ZONE}" 2>&1) && describe_rc=$? || describe_rc=$?
 | 
			
		||||
      if [[ "${describe_rc}" != 0 ]]; then
 | 
			
		||||
        echo "== FAILED to describe ${instance} =="
 | 
			
		||||
        echo "${instance_id}"
 | 
			
		||||
        return ${describe_rc}
 | 
			
		||||
      fi
 | 
			
		||||
      do-single-node-upgrade "${instance}" & pids+=("$!")
 | 
			
		||||
 | 
			
		||||
      # Drain node
 | 
			
		||||
      echo "== Draining ${instance}. == " >&2
 | 
			
		||||
      "${KUBE_ROOT}/cluster/kubectl.sh" drain --delete-local-data --force --ignore-daemonsets "${instance}" \
 | 
			
		||||
        && drain_rc=$? || drain_rc=$?
 | 
			
		||||
      if [[ "${drain_rc}" != 0 ]]; then
 | 
			
		||||
        echo "== FAILED to drain ${instance} =="
 | 
			
		||||
        return ${drain_rc}
 | 
			
		||||
      fi
 | 
			
		||||
 | 
			
		||||
      # Recreate instance
 | 
			
		||||
      echo "== Recreating instance ${instance}. ==" >&2
 | 
			
		||||
      recreate=$(gcloud compute instance-groups managed recreate-instances "${group}" \
 | 
			
		||||
        --project="${PROJECT}" \
 | 
			
		||||
        --zone="${ZONE}" \
 | 
			
		||||
        --instances="${instance}" 2>&1) && recreate_rc=$? || recreate_rc=$?
 | 
			
		||||
      if [[ "${recreate_rc}" != 0 ]]; then
 | 
			
		||||
        echo "== FAILED to recreate ${instance} =="
 | 
			
		||||
        echo "${recreate}"
 | 
			
		||||
        return ${recreate_rc}
 | 
			
		||||
      fi
 | 
			
		||||
 | 
			
		||||
      # Wait for instance to be recreated
 | 
			
		||||
      echo "== Waiting for instance ${instance} to be recreated. ==" >&2
 | 
			
		||||
      while true; do
 | 
			
		||||
        new_instance_id=$(gcloud compute instances describe "${instance}" \
 | 
			
		||||
          --format='get(id)' \
 | 
			
		||||
          --project="${PROJECT}" \
 | 
			
		||||
          --zone="${ZONE}" 2>&1) && describe_rc=$? || describe_rc=$?
 | 
			
		||||
        if [[ "${describe_rc}" != 0 ]]; then
 | 
			
		||||
          echo "== FAILED to describe ${instance} =="
 | 
			
		||||
          echo "${new_instance_id}"
 | 
			
		||||
          echo "  (Will retry.)"
 | 
			
		||||
        elif [[ "${new_instance_id}" == "${instance_id}" ]]; then
 | 
			
		||||
          echo -n .
 | 
			
		||||
        else
 | 
			
		||||
          echo "Instance ${instance} recreated."
 | 
			
		||||
          break
 | 
			
		||||
      # We don't want to run more than ${node_upgrade_parallelism} upgrades at a time,
 | 
			
		||||
      # so wait once we hit that many nodes. This isn't ideal, since one might take much
 | 
			
		||||
      # longer than the others, but it should help.
 | 
			
		||||
      process_count_left=$((process_count_left - 1))
 | 
			
		||||
      if [[ process_count_left -eq 0 || "${instance}" == "${instances[-1]}" ]]; then
 | 
			
		||||
        # Wait for each of the parallel node upgrades to finish.
 | 
			
		||||
        for pid in "${pids[@]}"; do
 | 
			
		||||
          wait $pid
 | 
			
		||||
          ret_code_sum=$(( ret_code_sum + $? ))
 | 
			
		||||
        done
 | 
			
		||||
        # Return even if at least one of the node upgrades failed.
 | 
			
		||||
        if [[ ${ret_code_sum} != 0 ]]; then
 | 
			
		||||
          echo "== Some of the ${node_upgrade_parallelism} parallel node upgrades failed. =="
 | 
			
		||||
          return ${ret_code_sum}
 | 
			
		||||
        fi
 | 
			
		||||
        sleep 1
 | 
			
		||||
      done
 | 
			
		||||
 | 
			
		||||
      # Wait for k8s node object to reflect new instance id
 | 
			
		||||
      echo "== Waiting for new node to be added to k8s.  ==" >&2
 | 
			
		||||
      while true; do
 | 
			
		||||
        external_id=$("${KUBE_ROOT}/cluster/kubectl.sh" get node "${instance}" --output=jsonpath='{.spec.externalID}' 2>&1) && kubectl_rc=$? || kubectl_rc=$?
 | 
			
		||||
        if [[ "${kubectl_rc}" != 0 ]]; then
 | 
			
		||||
          echo "== FAILED to get node ${instance} =="
 | 
			
		||||
          echo "${external_id}"
 | 
			
		||||
          echo "  (Will retry.)"
 | 
			
		||||
        elif [[ "${external_id}" == "${new_instance_id}" ]]; then
 | 
			
		||||
          echo "Node ${instance} recreated."
 | 
			
		||||
          break
 | 
			
		||||
        elif [[ "${external_id}" == "${instance_id}" ]]; then
 | 
			
		||||
          echo -n .
 | 
			
		||||
        else
 | 
			
		||||
          echo "Unexpected external_id '${external_id}' matches neither old ('${instance_id}') nor new ('${new_instance_id}')."
 | 
			
		||||
          echo "  (Will retry.)"
 | 
			
		||||
        fi
 | 
			
		||||
        sleep 1
 | 
			
		||||
      done
 | 
			
		||||
 | 
			
		||||
      # Wait for the node to not have SchedulingDisabled=True and also to have
 | 
			
		||||
      # Ready=True.
 | 
			
		||||
      echo "== Waiting for ${instance} to become ready. ==" >&2
 | 
			
		||||
      while true; do
 | 
			
		||||
        cordoned=$("${KUBE_ROOT}/cluster/kubectl.sh" get node "${instance}" --output='jsonpath={.status.conditions[?(@.type == "SchedulingDisabled")].status}')
 | 
			
		||||
        ready=$("${KUBE_ROOT}/cluster/kubectl.sh" get node "${instance}" --output='jsonpath={.status.conditions[?(@.type == "Ready")].status}')
 | 
			
		||||
        if [[ "${cordoned}" == 'True' ]]; then
 | 
			
		||||
          echo "Node ${instance} is still not ready: SchedulingDisabled=${ready}"
 | 
			
		||||
        elif [[ "${ready}" != 'True' ]]; then
 | 
			
		||||
          echo "Node ${instance} is still not ready: Ready=${ready}"
 | 
			
		||||
        else
 | 
			
		||||
          echo "Node ${instance} Ready=${ready}"
 | 
			
		||||
          break
 | 
			
		||||
        fi
 | 
			
		||||
        sleep 1
 | 
			
		||||
      done
 | 
			
		||||
        process_count_left=${node_upgrade_parallelism}
 | 
			
		||||
      fi
 | 
			
		||||
    done
 | 
			
		||||
  done
 | 
			
		||||
 | 
			
		||||
@@ -471,8 +502,9 @@ node_upgrade=true
 | 
			
		||||
node_prereqs=false
 | 
			
		||||
local_binaries=false
 | 
			
		||||
env_os_distro=false
 | 
			
		||||
node_upgrade_parallelism=1
 | 
			
		||||
 | 
			
		||||
while getopts ":MNPlho" opt; do
 | 
			
		||||
while getopts ":MNPlcho" opt; do
 | 
			
		||||
  case ${opt} in
 | 
			
		||||
    M)
 | 
			
		||||
      node_upgrade=false
 | 
			
		||||
@@ -486,6 +518,9 @@ while getopts ":MNPlho" opt; do
 | 
			
		||||
    l)
 | 
			
		||||
      local_binaries=true
 | 
			
		||||
      ;;
 | 
			
		||||
    c)
 | 
			
		||||
      node_upgrade_parallelism=${NODE_UPGRADE_PARALLELISM:-1}
 | 
			
		||||
      ;;
 | 
			
		||||
    o)
 | 
			
		||||
      env_os_distro=true
 | 
			
		||||
      ;;
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user