diff --git a/cluster/aws/util.sh b/cluster/aws/util.sh index 96882dbd59c..13639fa56fb 100755 --- a/cluster/aws/util.sh +++ b/cluster/aws/util.sh @@ -1511,10 +1511,12 @@ function ssh-to-node { fi for try in $(seq 1 5); do - if ssh -oLogLevel=quiet -oStrictHostKeyChecking=no -i "${AWS_SSH_KEY}" ${SSH_USER}@${ip} "${cmd}"; then + if ssh -oLogLevel=quiet -oConnectTimeout=30 -oStrictHostKeyChecking=no -i "${AWS_SSH_KEY}" ${SSH_USER}@${ip} "echo test > /dev/null"; then break fi + sleep 5 done + ssh -oLogLevel=quiet -oConnectTimeout=30 -oStrictHostKeyChecking=no -i "${AWS_SSH_KEY}" ${SSH_USER}@${ip} "${cmd}" } # Restart the kube-proxy on a node ($1) diff --git a/cluster/gce/util.sh b/cluster/gce/util.sh index 724834d9d03..78c8a7aba55 100755 --- a/cluster/gce/util.sh +++ b/cluster/gce/util.sh @@ -1319,13 +1319,13 @@ function ssh-to-node { local cmd="$2" # Loop until we can successfully ssh into the box for try in $(seq 1 5); do - if gcloud compute ssh --ssh-flag="-o LogLevel=quiet" --project "${PROJECT}" --zone="${ZONE}" "${node}" --command "echo test > /dev/null"; then + if gcloud compute ssh --ssh-flag="-o LogLevel=quiet" --ssh-flag="-o ConnectTimeout=30" --project "${PROJECT}" --zone="${ZONE}" "${node}" --command "echo test > /dev/null"; then break fi sleep 5 done # Then actually try the command. - gcloud compute ssh --ssh-flag="-o LogLevel=quiet" --project "${PROJECT}" --zone="${ZONE}" "${node}" --command "${cmd}" + gcloud compute ssh --ssh-flag="-o LogLevel=quiet" --ssh-flag="-o ConnectTimeout=30" --project "${PROJECT}" --zone="${ZONE}" "${node}" --command "${cmd}" } # Restart the kube-proxy on a node ($1) diff --git a/cluster/gke/util.sh b/cluster/gke/util.sh index cc1931bffb7..e2a6e112965 100755 --- a/cluster/gke/util.sh +++ b/cluster/gke/util.sh @@ -263,13 +263,13 @@ function ssh-to-node() { local cmd="$2" # Loop until we can successfully ssh into the box for try in $(seq 1 5); do - if gcloud compute ssh --ssh-flag="-o LogLevel=quiet" --project "${PROJECT}" --zone="${ZONE}" "${node}" --command "echo test > /dev/null"; then + if gcloud compute ssh --ssh-flag="-o LogLevel=quiet" --ssh-flag="-o ConnectTimeout=30" --project "${PROJECT}" --zone="${ZONE}" "${node}" --command "echo test > /dev/null"; then break fi sleep 5 done # Then actually try the command. - gcloud compute ssh --ssh-flag="-o LogLevel=quiet" --project "${PROJECT}" --zone="${ZONE}" "${node}" --command "${cmd}" + gcloud compute ssh --ssh-flag="-o LogLevel=quiet" --ssh-flag="-o ConnectTimeout=30" --project "${PROJECT}" --zone="${ZONE}" "${node}" --command "${cmd}" } # Restart the kube-proxy on a node ($1) diff --git a/cluster/libvirt-coreos/util.sh b/cluster/libvirt-coreos/util.sh index f70d202e769..f10ccfddb90 100644 --- a/cluster/libvirt-coreos/util.sh +++ b/cluster/libvirt-coreos/util.sh @@ -345,7 +345,7 @@ function ssh-to-node { if [[ -z "$machine" ]]; then echo "$node is an unknown machine to ssh to" >&2 fi - ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ControlMaster=no "core@$machine" "$cmd" + ssh -o ConnectTimeout=30 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ControlMaster=no "core@$machine" "$cmd" } # Restart the kube-proxy on a node ($1) diff --git a/cluster/log-dump.sh b/cluster/log-dump.sh index 630bd3f4895..523f71fab46 100755 --- a/cluster/log-dump.sh +++ b/cluster/log-dump.sh @@ -30,6 +30,13 @@ source "${KUBE_ROOT}/cluster/kube-util.sh" readonly report_dir="${1:-_artifacts}" echo "Dumping master and node logs to ${report_dir}" +# Attempts to SSH to a node ($1) and run a simple command. Returns 0 on +# success and 1 on error. +function test-ssh() { + local -r node_name="${1}" + return $(ssh-to-node "${node_name}" "echo test > /dev/null" &> /dev/null) +} + # Saves a single output of running a given command ($2) on a given node ($1) # into a given local file ($3). Does not fail if the ssh command fails for any # reason, just prints an error to stderr. @@ -70,6 +77,8 @@ if [[ ! "${master_ssh_supported_providers}" =~ "${KUBERNETES_PROVIDER}" ]]; then echo "Master SSH not supported for ${KUBERNETES_PROVIDER}" elif ! $(detect-master &> /dev/null); then echo "Master not detected. Is the cluster up?" +elif ! test-ssh "${MASTER_NAME}"; then + echo "Could not SSH to ${MASTER_NAME}" >&2 else echo "Master Name: ${MASTER_NAME}" readonly master_prefix="${report_dir}/${MASTER_NAME}" @@ -88,6 +97,10 @@ elif [[ "${#NODE_NAMES[@]}" -eq 0 ]]; then else echo "Node Names: ${NODE_NAMES[*]}" for node_name in "${NODE_NAMES[@]}"; do + if ! test-ssh "${node_name}"; then + echo "Could not SSH to ${node_name}" >&2 + continue + fi node_prefix="${report_dir}/${node_name}" save-log "${node_name}" "cat /var/log/kube-proxy.log" "${node_prefix}-kube-proxy.log" save-common-logs "${node_name}" "${node_prefix}"