refactor cluster verify logic, use valicate-cluster.sh

when validate-cluster.sh returned error, check services status on master and nodes
This commit is contained in:
Kevin 2015-09-29 16:40:05 +00:00
parent 67cb65d906
commit dd82721764

View File

@ -96,7 +96,16 @@ function trap-add {
function validate-cluster() { function validate-cluster() {
# by default call the generic validate-cluster.sh script, customizable by # by default call the generic validate-cluster.sh script, customizable by
# any cluster provider if this does not fit. # any cluster provider if this does not fit.
set +e
"${KUBE_ROOT}/cluster/validate-cluster.sh" "${KUBE_ROOT}/cluster/validate-cluster.sh"
if [[ "$?" -ne "0" ]]; then
troubleshoot-master
for minion in ${MINIONS}; do
troubleshoot-minion ${minion}
done
exit 1
fi
set -e
} }
# Instantiate a kubernetes cluster # Instantiate a kubernetes cluster
@ -107,11 +116,6 @@ function kube-up() {
provision-minion ${minion} provision-minion ${minion}
done done
verify-master
for minion in ${MINIONS}; do
verify-minion ${minion}
done
detect-master detect-master
# set CONTEXT and KUBE_SERVER values for create-kubeconfig() and get-password() # set CONTEXT and KUBE_SERVER values for create-kubeconfig() and get-password()
@ -132,58 +136,42 @@ function kube-down() {
done done
} }
function troubleshoot-master() {
function verify-master() { # Troubleshooting on master if all required daemons are active.
# verify master has all required daemons echo "[INFO] Troubleshooting on master ${MASTER}"
printf "[INFO] Validating master ${MASTER}"
local -a required_daemon=("kube-apiserver" "kube-controller-manager" "kube-scheduler") local -a required_daemon=("kube-apiserver" "kube-controller-manager" "kube-scheduler")
local validated="1" local daemon
local try_count=0 local daemon_status
until [[ "$validated" == "0" ]]; do printf "%-24s %-10s \n" "PROCESS" "STATUS"
validated="0" for daemon in "${required_daemon[@]}"; do
local daemon local rc=0
for daemon in "${required_daemon[@]}"; do kube-ssh "${MASTER}" "sudo systemctl is-active ${daemon}" >/dev/null 2>&1 || rc="$?"
local rc=0 if [[ "${rc}" -ne "0" ]]; then
kube-ssh "${MASTER}" "sudo pgrep -f ${daemon}" >/dev/null 2>&1 || rc="$?" daemon_status="inactive"
if [[ "${rc}" -ne "0" ]]; then else
printf "." daemon_status="active"
validated="1" fi
((try_count=try_count+2)) printf "%-24s %s\n" ${daemon} ${daemon_status}
if [[ ${try_count} -gt ${PROCESS_CHECK_TIMEOUT} ]]; then
printf "\nWarning: Process \"${daemon}\" failed to run on ${MASTER}, please check.\n"
exit 1
fi
sleep 2
fi
done
done done
printf "\n" printf "\n"
} }
function verify-minion() { function troubleshoot-minion() {
# verify minion has all required daemons # Troubleshooting on minion if all required daemons are active.
printf "[INFO] Validating minion ${1}" echo "[INFO] Troubleshooting on minion ${1}"
local -a required_daemon=("kube-proxy" "kubelet" "docker") local -a required_daemon=("kube-proxy" "kubelet" "docker" "flannel")
local validated="1" local daemon
local try_count=0 local daemon_status
until [[ "$validated" == "0" ]]; do printf "%-24s %-10s \n" "PROCESS" "STATUS"
validated="0" for daemon in "${required_daemon[@]}"; do
local daemon local rc=0
for daemon in "${required_daemon[@]}"; do kube-ssh "${1}" "sudo systemctl is-active ${daemon}" >/dev/null 2>&1 || rc="$?"
local rc=0 if [[ "${rc}" -ne "0" ]]; then
kube-ssh "${1}" "sudo pgrep -f ${daemon}" >/dev/null 2>&1 || rc="$?" daemon_status="inactive"
if [[ "${rc}" -ne "0" ]]; then else
printf "." daemon_status="active"
validated="1" fi
((try_count=try_count+2)) printf "%-24s %s\n" ${daemon} ${daemon_status}
if [[ ${try_count} -gt ${PROCESS_CHECK_TIMEOUT} ]] ; then
printf "\nWarning: Process \"${daemon}\" failed to run on ${1}, please check.\n"
exit 1
fi
sleep 2
fi
done
done done
printf "\n" printf "\n"
} }