From e63d227bdff7a1a25bc9f45191b9a6745697ec2a Mon Sep 17 00:00:00 2001 From: hurf Date: Fri, 19 Jun 2015 13:46:43 +0800 Subject: [PATCH] Use -o template to validate cluster Instead of using human-oriented output, use -o template to validate cluster in order to avoid error caused by column change. --- cluster/validate-cluster.sh | 49 ++++++++++++++----------------------- 1 file changed, 18 insertions(+), 31 deletions(-) diff --git a/cluster/validate-cluster.sh b/cluster/validate-cluster.sh index 4b4360600ad..1aa1407cd5d 100755 --- a/cluster/validate-cluster.sh +++ b/cluster/validate-cluster.sh @@ -24,9 +24,6 @@ KUBE_ROOT=$(dirname "${BASH_SOURCE}")/.. source "${KUBE_ROOT}/cluster/kube-env.sh" source "${KUBE_ROOT}/cluster/kube-util.sh" -MINIONS_FILE=/tmp/minions-$$ -trap 'rm -rf "${MINIONS_FILE}"' EXIT - EXPECTED_NUM_NODES="${NUM_MINIONS}" if [[ "${REGISTER_MASTER_KUBELET:-}" == "true" ]]; then EXPECTED_NUM_NODES=$((EXPECTED_NUM_NODES+1)) @@ -34,21 +31,18 @@ fi # Make several attempts to deal with slow cluster birth. attempt=0 while true; do - # The "kubectl get nodes" output is three columns like this: + # The "kubectl get nodes -o template" exports node information. # - # NAME LABELS STATUS - # kubernetes-minion-03nb Ready - # - # Echo the output, strip the first line, then gather 2 counts: + # Echo the output and gather 2 counts: # - Total number of nodes. # - Number of "ready" nodes. # # Suppress errors from kubectl output because during cluster bootstrapping # for clusters where the master node is registered, the apiserver will become # available and then get restarted as the kubelet configures the docker bridge. - "${KUBE_ROOT}/cluster/kubectl.sh" get nodes > "${MINIONS_FILE}" 2> /dev/null || true - found=$(cat "${MINIONS_FILE}" | sed '1d' | grep -c .) || true - ready=$(cat "${MINIONS_FILE}" | sed '1d' | awk '{print $NF}' | grep -c '^Ready') || true + nodes_status=$("${KUBE_ROOT}/cluster/kubectl.sh" get nodes -o template --template='{{range .items}}{{with index .status.conditions 0}}{{.type}}:{{.status}},{{end}}{{end}}' --api-version=v1) + found=$(echo "${nodes_status}" | tr "," "\n" | grep -c 'Ready:') || true + ready=$(echo "${nodes_status}" | tr "," "\n" | grep -c 'Ready:True') || true if (( "${found}" == "${EXPECTED_NUM_NODES}" )) && (( "${ready}" == "${EXPECTED_NUM_NODES}")); then break @@ -56,7 +50,7 @@ while true; do # Set the timeout to ~10minutes (40 x 15 second) to avoid timeouts for 100-node clusters. if (( attempt > 40 )); then echo -e "${color_red}Detected ${ready} ready nodes, found ${found} nodes out of expected ${EXPECTED_NUM_NODES}. Your cluster may not be working.${color_norm}" - cat -n "${MINIONS_FILE}" + "${KUBE_ROOT}/cluster/kubectl.sh" get nodes exit 2 else echo -e "${color_yellow}Waiting for ${EXPECTED_NUM_NODES} ready nodes. ${ready} ready nodes, ${found} registered. Retrying.${color_norm}" @@ -65,35 +59,28 @@ while true; do sleep 15 fi done -echo "Found ${found} nodes." -echo -n " " -head -n 1 "${MINIONS_FILE}" -tail -n +2 "${MINIONS_FILE}" | cat -n +echo "Found ${found} node(s)." +"${KUBE_ROOT}/cluster/kubectl.sh" get nodes attempt=0 while true; do - kubectl_output=$("${KUBE_ROOT}/cluster/kubectl.sh" get cs) || true - - # The "kubectl componentstatuses" output is four columns like this: + # The "kubectl componentstatuses -o template" exports components health information. # - # COMPONENT HEALTH MSG ERR - # controller-manager Healthy ok nil - # - # Parse the output to capture the value of the second column("HEALTH"), then use grep to - # count the number of times it doesn't match "Healthy". - non_success_count=$(echo "${kubectl_output}" | \ - sed '1d' | - sed -n 's/^[[:alnum:][:punct:]]/&/p' | \ - grep --invert-match -c '^[[:alnum:][:punct:]]\{1,\}[[:space:]]\{1,\}Healthy') || true + # Echo the output and gather 2 counts: + # - Total number of componentstatuses. + # - Number of "healthy" components. + cs_status=$("${KUBE_ROOT}/cluster/kubectl.sh" get componentstatuses -o template --template='{{range .items}}{{with index .conditions 0}}{{.type}}:{{.status}},{{end}}{{end}}' --api-version=v1) || true + componentstatuses=$(echo "${cs_status}" | tr "," "\n" | grep -c 'Healthy:') || true + healthy=$(echo "${cs_status}" | tr "," "\n" | grep -c 'Healthy:True') || true - if ((non_success_count > 0)); then + if ((componentstatuses > healthy)); then if ((attempt < 5)); then echo -e "${color_yellow}Cluster not working yet.${color_norm}" attempt=$((attempt+1)) sleep 30 else echo -e " ${color_yellow}Validate output:${color_norm}" - echo "${kubectl_output}" + "${KUBE_ROOT}/cluster/kubectl.sh" get cs echo -e "${color_red}Validation returned one or more failed components. Cluster is probably broken.${color_norm}" exit 1 fi @@ -103,5 +90,5 @@ while true; do done echo "Validate output:" -echo "${kubectl_output}" +"${KUBE_ROOT}/cluster/kubectl.sh" get cs echo -e "${color_green}Cluster validation succeeded${color_norm}"