mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 19:56:01 +00:00
Use BootID instead of ExternalID to check for new instance
PR 60692 changed the way that ExternalID is reported on GCE. Its value is no longer the GCE instance ID. It is the instance name. So it cannot be used to determine VM uniqueness across time. Instead, upgrade will check that the boot ID changed.
This commit is contained in:
parent
214ce6572d
commit
1ed1cf6ef3
@ -291,18 +291,17 @@ function upgrade-node-env() {
|
|||||||
# Note: This is called multiple times from do-node-upgrade() in parallel, so should be thread-safe.
|
# Note: This is called multiple times from do-node-upgrade() in parallel, so should be thread-safe.
|
||||||
function do-single-node-upgrade() {
|
function do-single-node-upgrade() {
|
||||||
local -r instance="$1"
|
local -r instance="$1"
|
||||||
instance_id=$(gcloud compute instances describe "${instance}" \
|
local kubectl_rc
|
||||||
--format='get(id)' \
|
local boot_id=$("${KUBE_ROOT}/cluster/kubectl.sh" get node "${instance}" --output=jsonpath='{.status.nodeInfo.bootID}' 2>&1) && kubectl_rc=$? || kubectl_rc=$?
|
||||||
--project="${PROJECT}" \
|
if [[ "${kubectl_rc}" != 0 ]]; then
|
||||||
--zone="${ZONE}" 2>&1) && describe_rc=$? || describe_rc=$?
|
echo "== FAILED to get bootID ${instance} =="
|
||||||
if [[ "${describe_rc}" != 0 ]]; then
|
echo "${boot_id}"
|
||||||
echo "== FAILED to describe ${instance} =="
|
return ${kubectl_rc}
|
||||||
echo "${instance_id}"
|
|
||||||
return ${describe_rc}
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Drain node
|
# Drain node
|
||||||
echo "== Draining ${instance}. == " >&2
|
echo "== Draining ${instance}. == " >&2
|
||||||
|
local drain_rc
|
||||||
"${KUBE_ROOT}/cluster/kubectl.sh" drain --delete-local-data --force --ignore-daemonsets "${instance}" \
|
"${KUBE_ROOT}/cluster/kubectl.sh" drain --delete-local-data --force --ignore-daemonsets "${instance}" \
|
||||||
&& drain_rc=$? || drain_rc=$?
|
&& drain_rc=$? || drain_rc=$?
|
||||||
if [[ "${drain_rc}" != 0 ]]; then
|
if [[ "${drain_rc}" != 0 ]]; then
|
||||||
@ -312,7 +311,8 @@ function do-single-node-upgrade() {
|
|||||||
|
|
||||||
# Recreate instance
|
# Recreate instance
|
||||||
echo "== Recreating instance ${instance}. ==" >&2
|
echo "== Recreating instance ${instance}. ==" >&2
|
||||||
recreate=$(gcloud compute instance-groups managed recreate-instances "${group}" \
|
local recreate_rc
|
||||||
|
local recreate=$(gcloud compute instance-groups managed recreate-instances "${group}" \
|
||||||
--project="${PROJECT}" \
|
--project="${PROJECT}" \
|
||||||
--zone="${ZONE}" \
|
--zone="${ZONE}" \
|
||||||
--instances="${instance}" 2>&1) && recreate_rc=$? || recreate_rc=$?
|
--instances="${instance}" 2>&1) && recreate_rc=$? || recreate_rc=$?
|
||||||
@ -322,42 +322,22 @@ function do-single-node-upgrade() {
|
|||||||
return ${recreate_rc}
|
return ${recreate_rc}
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Wait for instance to be recreated
|
# Wait for node status to reflect a new boot ID. This guarantees us
|
||||||
echo "== Waiting for instance ${instance} to be recreated. ==" >&2
|
# that the node status in the API is from a different boot. This
|
||||||
while true; do
|
# does not guarantee that the status is from the upgraded node, but
|
||||||
new_instance_id=$(gcloud compute instances describe "${instance}" \
|
# it is a best effort approximation.
|
||||||
--format='get(id)' \
|
|
||||||
--project="${PROJECT}" \
|
|
||||||
--zone="${ZONE}" 2>&1) && describe_rc=$? || describe_rc=$?
|
|
||||||
if [[ "${describe_rc}" != 0 ]]; then
|
|
||||||
echo "== FAILED to describe ${instance} =="
|
|
||||||
echo "${new_instance_id}"
|
|
||||||
echo " (Will retry.)"
|
|
||||||
elif [[ "${new_instance_id}" == "${instance_id}" ]]; then
|
|
||||||
echo -n .
|
|
||||||
else
|
|
||||||
echo "Instance ${instance} recreated."
|
|
||||||
break
|
|
||||||
fi
|
|
||||||
sleep 1
|
|
||||||
done
|
|
||||||
|
|
||||||
# Wait for k8s node object to reflect new instance id
|
|
||||||
echo "== Waiting for new node to be added to k8s. ==" >&2
|
echo "== Waiting for new node to be added to k8s. ==" >&2
|
||||||
while true; do
|
while true; do
|
||||||
external_id=$("${KUBE_ROOT}/cluster/kubectl.sh" get node "${instance}" --output=jsonpath='{.spec.externalID}' 2>&1) && kubectl_rc=$? || kubectl_rc=$?
|
local new_boot_id=$("${KUBE_ROOT}/cluster/kubectl.sh" get node "${instance}" --output=jsonpath='{.status.nodeInfo.bootID}' 2>&1) && kubectl_rc=$? || kubectl_rc=$?
|
||||||
if [[ "${kubectl_rc}" != 0 ]]; then
|
if [[ "${kubectl_rc}" != 0 ]]; then
|
||||||
echo "== FAILED to get node ${instance} =="
|
echo "== FAILED to get node ${instance} =="
|
||||||
echo "${external_id}"
|
echo "${boot_id}"
|
||||||
echo " (Will retry.)"
|
echo " (Will retry.)"
|
||||||
elif [[ "${external_id}" == "${new_instance_id}" ]]; then
|
elif [[ "${boot_id}" != "${new_boot_id}" ]]; then
|
||||||
echo "Node ${instance} recreated."
|
echo "Node ${instance} recreated."
|
||||||
break
|
break
|
||||||
elif [[ "${external_id}" == "${instance_id}" ]]; then
|
|
||||||
echo -n .
|
|
||||||
else
|
else
|
||||||
echo "Unexpected external_id '${external_id}' matches neither old ('${instance_id}') nor new ('${new_instance_id}')."
|
echo -n .
|
||||||
echo " (Will retry.)"
|
|
||||||
fi
|
fi
|
||||||
sleep 1
|
sleep 1
|
||||||
done
|
done
|
||||||
@ -366,8 +346,8 @@ function do-single-node-upgrade() {
|
|||||||
# Ready=True.
|
# Ready=True.
|
||||||
echo "== Waiting for ${instance} to become ready. ==" >&2
|
echo "== Waiting for ${instance} to become ready. ==" >&2
|
||||||
while true; do
|
while true; do
|
||||||
cordoned=$("${KUBE_ROOT}/cluster/kubectl.sh" get node "${instance}" --output='jsonpath={.status.conditions[?(@.type == "SchedulingDisabled")].status}')
|
local cordoned=$("${KUBE_ROOT}/cluster/kubectl.sh" get node "${instance}" --output='jsonpath={.status.conditions[?(@.type == "SchedulingDisabled")].status}')
|
||||||
ready=$("${KUBE_ROOT}/cluster/kubectl.sh" get node "${instance}" --output='jsonpath={.status.conditions[?(@.type == "Ready")].status}')
|
local ready=$("${KUBE_ROOT}/cluster/kubectl.sh" get node "${instance}" --output='jsonpath={.status.conditions[?(@.type == "Ready")].status}')
|
||||||
if [[ "${cordoned}" == 'True' ]]; then
|
if [[ "${cordoned}" == 'True' ]]; then
|
||||||
echo "Node ${instance} is still not ready: SchedulingDisabled=${ready}"
|
echo "Node ${instance} is still not ready: SchedulingDisabled=${ready}"
|
||||||
elif [[ "${ready}" != 'True' ]]; then
|
elif [[ "${ready}" != 'True' ]]; then
|
||||||
|
Loading…
Reference in New Issue
Block a user