From 231fe79332db8662ecefae84cac58f055d1a997b Mon Sep 17 00:00:00 2001 From: Kris Date: Wed, 9 May 2018 11:11:09 -0700 Subject: [PATCH] Uncordon the node after upgrade Previous logic was relying on the node to recreate the node API object and, as a side-effect, uncordon itself. A change went in that no longer ensures the node recreates itself, so the bug in this logic was exposed. --- cluster/gce/upgrade.sh | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/cluster/gce/upgrade.sh b/cluster/gce/upgrade.sh index cff430bd684..905eabe528e 100755 --- a/cluster/gce/upgrade.sh +++ b/cluster/gce/upgrade.sh @@ -342,15 +342,21 @@ function do-single-node-upgrade() { sleep 1 done - # Wait for the node to not have SchedulingDisabled=True and also to have - # Ready=True. + # Uncordon the node. + echo "== Uncordon ${instance}. == " >&2 + local uncordon_rc + "${KUBE_ROOT}/cluster/kubectl.sh" uncordon "${instance}" \ + && uncordon_rc=$? || uncordon_rc=$? + if [[ "${uncordon_rc}" != 0 ]]; then + echo "== FAILED to uncordon ${instance} ==" + return ${uncordon_rc} + fi + + # Wait for the node to have Ready=True. echo "== Waiting for ${instance} to become ready. ==" >&2 while true; do - local cordoned=$("${KUBE_ROOT}/cluster/kubectl.sh" get node "${instance}" --output='jsonpath={.status.conditions[?(@.type == "SchedulingDisabled")].status}') local ready=$("${KUBE_ROOT}/cluster/kubectl.sh" get node "${instance}" --output='jsonpath={.status.conditions[?(@.type == "Ready")].status}') - if [[ "${cordoned}" == 'True' ]]; then - echo "Node ${instance} is still not ready: SchedulingDisabled=${ready}" - elif [[ "${ready}" != 'True' ]]; then + if [[ "${ready}" != 'True' ]]; then echo "Node ${instance} is still not ready: Ready=${ready}" else echo "Node ${instance} Ready=${ready}"