Merge pull request #15300 from kevin-wangzefeng/centos_baremetal_auto_troubleshooting

Auto commit by PR queue bot
This commit is contained in:
k8s-merge-robot 2015-11-23 13:41:22 -08:00
commit 46924670d9
2 changed files with 69 additions and 82 deletions

View File

@ -22,13 +22,13 @@ export MASTER_IP=${MASTER#*@}
# Define all your minion nodes, # Define all your minion nodes,
# And separated with blank space like <user_1@ip_1> <user_2@ip_2> <user_3@ip_3>. # And separated with blank space like <user_1@ip_1> <user_2@ip_2> <user_3@ip_3>.
# The user should have sudo privilege # The user should have sudo privilege
export MINIONS=${MINIONS:-"centos@172.10.0.12 centos@172.10.0.13"} export NODES=${NODES:-"centos@172.10.0.12 centos@172.10.0.13"}
# If it practically impossible to set an array as an environment variable
# from a script, so assume variable is a string then convert it to an array
export MINIONS_ARRAY=($MINIONS)
# Number of nodes in your cluster. # Number of nodes in your cluster.
export NUM_MINIONS=${NUM_MINIONS:-2} export NUM_NODES=${NUM_NODES:-2}
# Should be removed when NUM_MINIONS is deprecated in validate-cluster.sh
export NUM_MINIONS=${NUM_NODES}
# By default, the cluster will use the etcd installed on master. # By default, the cluster will use the etcd installed on master.
export ETCD_SERVERS=${ETCD_SERVERS:-"http://$MASTER_IP:4001"} export ETCD_SERVERS=${ETCD_SERVERS:-"http://$MASTER_IP:4001"}

View File

@ -31,7 +31,7 @@ source "$KUBE_ROOT/cluster/common.sh"
KUBECTL_PATH=${KUBE_ROOT}/cluster/centos/binaries/kubectl KUBECTL_PATH=${KUBE_ROOT}/cluster/centos/binaries/kubectl
# Directory to be used for master and minion provisioning. # Directory to be used for master and node provisioning.
KUBE_TEMP="~/kube_temp" KUBE_TEMP="~/kube_temp"
@ -43,13 +43,13 @@ function detect-master() {
echo "KUBE_MASTER: ${MASTER}" 1>&2 echo "KUBE_MASTER: ${MASTER}" 1>&2
} }
# Get minion IP addresses and store in KUBE_MINION_IP_ADDRESSES[] # Get node IP addresses and store in KUBE_NODE_IP_ADDRESSES[]
function detect-minions() { function detect-nodes() {
KUBE_MINION_IP_ADDRESSES=() KUBE_NODE_IP_ADDRESSES=()
for minion in ${MINIONS}; do for node in ${NODES}; do
KUBE_MINION_IP_ADDRESSES+=("${minion#*@}") KUBE_NODE_IP_ADDRESSES+=("${node#*@}")
done done
echo "KUBE_MINION_IP_ADDRESSES: [${KUBE_MINION_IP_ADDRESSES[*]}]" 1>&2 echo "KUBE_NODE_IP_ADDRESSES: [${KUBE_NODE_IP_ADDRESSES[*]}]" 1>&2
} }
# Verify prereqs on host machine # Verify prereqs on host machine
@ -96,20 +96,24 @@ function trap-add {
function validate-cluster() { function validate-cluster() {
# by default call the generic validate-cluster.sh script, customizable by # by default call the generic validate-cluster.sh script, customizable by
# any cluster provider if this does not fit. # any cluster provider if this does not fit.
set +e
"${KUBE_ROOT}/cluster/validate-cluster.sh" "${KUBE_ROOT}/cluster/validate-cluster.sh"
if [[ "$?" -ne "0" ]]; then
troubleshoot-master
for node in ${NODES}; do
troubleshoot-node ${node}
done
exit 1
fi
set -e
} }
# Instantiate a kubernetes cluster # Instantiate a kubernetes cluster
function kube-up() { function kube-up() {
provision-master provision-master
for minion in ${MINIONS}; do for node in ${NODES}; do
provision-minion ${minion} provision-node ${node}
done
verify-master
for minion in ${MINIONS}; do
verify-minion ${minion}
done done
detect-master detect-master
@ -127,63 +131,47 @@ function kube-up() {
# Delete a kubernetes cluster # Delete a kubernetes cluster
function kube-down() { function kube-down() {
tear-down-master tear-down-master
for minion in ${MINIONS}; do for node in ${NODES}; do
tear-down-minion ${minion} tear-down-node ${node}
done done
} }
function troubleshoot-master() {
function verify-master() { # Troubleshooting on master if all required daemons are active.
# verify master has all required daemons echo "[INFO] Troubleshooting on master ${MASTER}"
printf "[INFO] Validating master ${MASTER}"
local -a required_daemon=("kube-apiserver" "kube-controller-manager" "kube-scheduler") local -a required_daemon=("kube-apiserver" "kube-controller-manager" "kube-scheduler")
local validated="1" local daemon
local try_count=0 local daemon_status
until [[ "$validated" == "0" ]]; do printf "%-24s %-10s \n" "PROCESS" "STATUS"
validated="0" for daemon in "${required_daemon[@]}"; do
local daemon local rc=0
for daemon in "${required_daemon[@]}"; do kube-ssh "${MASTER}" "sudo systemctl is-active ${daemon}" >/dev/null 2>&1 || rc="$?"
local rc=0 if [[ "${rc}" -ne "0" ]]; then
kube-ssh "${MASTER}" "sudo pgrep -f ${daemon}" >/dev/null 2>&1 || rc="$?" daemon_status="inactive"
if [[ "${rc}" -ne "0" ]]; then else
printf "." daemon_status="active"
validated="1" fi
((try_count=try_count+2)) printf "%-24s %s\n" ${daemon} ${daemon_status}
if [[ ${try_count} -gt ${PROCESS_CHECK_TIMEOUT} ]]; then
printf "\nWarning: Process \"${daemon}\" failed to run on ${MASTER}, please check.\n"
exit 1
fi
sleep 2
fi
done
done done
printf "\n" printf "\n"
} }
function verify-minion() { function troubleshoot-node() {
# verify minion has all required daemons # Troubleshooting on node if all required daemons are active.
printf "[INFO] Validating minion ${1}" echo "[INFO] Troubleshooting on node ${1}"
local -a required_daemon=("kube-proxy" "kubelet" "docker") local -a required_daemon=("kube-proxy" "kubelet" "docker" "flannel")
local validated="1" local daemon
local try_count=0 local daemon_status
until [[ "$validated" == "0" ]]; do printf "%-24s %-10s \n" "PROCESS" "STATUS"
validated="0" for daemon in "${required_daemon[@]}"; do
local daemon local rc=0
for daemon in "${required_daemon[@]}"; do kube-ssh "${1}" "sudo systemctl is-active ${daemon}" >/dev/null 2>&1 || rc="$?"
local rc=0 if [[ "${rc}" -ne "0" ]]; then
kube-ssh "${1}" "sudo pgrep -f ${daemon}" >/dev/null 2>&1 || rc="$?" daemon_status="inactive"
if [[ "${rc}" -ne "0" ]]; then else
printf "." daemon_status="active"
validated="1" fi
((try_count=try_count+2)) printf "%-24s %s\n" ${daemon} ${daemon_status}
if [[ ${try_count} -gt ${PROCESS_CHECK_TIMEOUT} ]] ; then
printf "\nWarning: Process \"${daemon}\" failed to run on ${1}, please check.\n"
exit 1
fi
sleep 2
fi
done
done done
printf "\n" printf "\n"
} }
@ -205,9 +193,9 @@ echo "[INFO] tear-down-master on ${MASTER}"
kube-ssh "${MASTER}" "sudo rm -rf /var/lib/etcd" kube-ssh "${MASTER}" "sudo rm -rf /var/lib/etcd"
} }
# Clean up on minion # Clean up on node
function tear-down-minion() { function tear-down-node() {
echo "[INFO] tear-down-minion on $1" echo "[INFO] tear-down-node on $1"
for service_name in kube-proxy kubelet docker flannel ; do for service_name in kube-proxy kubelet docker flannel ; do
service_file="/usr/lib/systemd/system/${service_name}.service" service_file="/usr/lib/systemd/system/${service_name}.service"
kube-ssh "$1" " \ kube-ssh "$1" " \
@ -247,30 +235,29 @@ function provision-master() {
} }
# Provision minion # Provision node
# #
# Assumed vars: # Assumed vars:
# $1 (minion) # $1 (node)
# MASTER # MASTER
# KUBE_TEMP # KUBE_TEMP
# ETCD_SERVERS # ETCD_SERVERS
# FLANNEL_NET # FLANNEL_NET
# DOCKER_OPTS # DOCKER_OPTS
function provision-minion() { function provision-node() {
echo "[INFO] Provision minion on $1" echo "[INFO] Provision node on $1"
local master_ip=${MASTER#*@} local master_ip=${MASTER#*@}
local minion=$1 local node=$1
local minion_ip=${minion#*@} local node_ip=${node#*@}
ensure-setup-dir ${minion} ensure-setup-dir ${node}
# scp -r ${SSH_OPTS} minion config-default.sh copy-files.sh util.sh "${minion_ip}:${KUBE_TEMP}" kube-scp ${node} "${ROOT}/binaries/node ${ROOT}/node ${ROOT}/config-default.sh ${ROOT}/util.sh" ${KUBE_TEMP}
kube-scp ${minion} "${ROOT}/binaries/node ${ROOT}/node ${ROOT}/config-default.sh ${ROOT}/util.sh" ${KUBE_TEMP} kube-ssh "${node}" " \
kube-ssh "${minion}" " \
sudo cp -r ${KUBE_TEMP}/node/bin /opt/kubernetes; \ sudo cp -r ${KUBE_TEMP}/node/bin /opt/kubernetes; \
sudo chmod -R +x /opt/kubernetes/bin; \ sudo chmod -R +x /opt/kubernetes/bin; \
sudo bash ${KUBE_TEMP}/node/scripts/flannel.sh ${ETCD_SERVERS} ${FLANNEL_NET}; \ sudo bash ${KUBE_TEMP}/node/scripts/flannel.sh ${ETCD_SERVERS} ${FLANNEL_NET}; \
sudo bash ${KUBE_TEMP}/node/scripts/docker.sh \"${DOCKER_OPTS}\"; \ sudo bash ${KUBE_TEMP}/node/scripts/docker.sh \"${DOCKER_OPTS}\"; \
sudo bash ${KUBE_TEMP}/node/scripts/kubelet.sh ${master_ip} ${minion_ip}; \ sudo bash ${KUBE_TEMP}/node/scripts/kubelet.sh ${master_ip} ${node_ip}; \
sudo bash ${KUBE_TEMP}/node/scripts/proxy.sh ${master_ip}" sudo bash ${KUBE_TEMP}/node/scripts/proxy.sh ${master_ip}"
} }