From bb68f740cd85f5c14c1122172ff758f59f7b915f Mon Sep 17 00:00:00 2001 From: Justin Santa Barbara Date: Tue, 15 Mar 2016 20:26:46 -0400 Subject: [PATCH] AWS kube-up: collect logs from e2e By implementing detect-node-names and a few other helpers, we are able to collect the logs from nodes. --- cluster/aws/util.sh | 37 +++++++++++++++++++++++++++++-------- cluster/log-dump.sh | 14 ++++++++++---- 2 files changed, 39 insertions(+), 12 deletions(-) diff --git a/cluster/aws/util.sh b/cluster/aws/util.sh index 95905abfe35..adb7d7d04a8 100755 --- a/cluster/aws/util.sh +++ b/cluster/aws/util.sh @@ -240,7 +240,13 @@ function query-running-minions () { --query ${query} } -function find-running-minions () { +function detect-node-names () { + # If this is called directly, VPC_ID might not be set + # (this is case from cluster/log-dump.sh) + if [[ -z "${VPC_ID:-}" ]]; then + VPC_ID=$(get_vpc_id) + fi + NODE_IDS=() NODE_NAMES=() for id in $(query-running-minions "Reservations[].Instances[].InstanceId"); do @@ -251,8 +257,14 @@ function find-running-minions () { done } +# Called to detect the project on GCE +# Not needed on AWS +function detect-project() { + : +} + function detect-nodes () { - find-running-minions + detect-node-names # This is inefficient, but we want NODE_NAMES / NODE_IDS to be ordered the same as KUBE_NODE_IP_ADDRESSES KUBE_NODE_IP_ADDRESSES=() @@ -1225,7 +1237,7 @@ function wait-minions { max_attempts=90 fi while true; do - find-running-minions > $LOG + detect-node-names > $LOG if [[ ${#NODE_IDS[@]} == ${NUM_NODES} ]]; then echo -e " ${color_green}${#NODE_IDS[@]} minions started; ready${color_norm}" break @@ -1552,24 +1564,33 @@ function test-teardown { } -# SSH to a node by name ($1) and run a command ($2). -function ssh-to-node { +# Gets the hostname (or IP) that we should SSH to for the given nodename +# For the master, we use the nodename, for the nodes we use their instanceids +function get_ssh_hostname { local node="$1" - local cmd="$2" if [[ "${node}" == "${MASTER_NAME}" ]]; then node=$(get_instanceid_from_name ${MASTER_NAME}) if [[ -z "${node-}" ]]; then - echo "Could not detect Kubernetes master node. Make sure you've launched a cluster with 'kube-up.sh'" + echo "Could not detect Kubernetes master node. Make sure you've launched a cluster with 'kube-up.sh'" 1>&2 exit 1 fi fi local ip=$(get_instance_public_ip ${node}) if [[ -z "$ip" ]]; then - echo "Could not detect IP for ${node}." + echo "Could not detect IP for ${node}." 1>&2 exit 1 fi + echo ${ip} +} + +# SSH to a node by name ($1) and run a command ($2). +function ssh-to-node { + local node="$1" + local cmd="$2" + + local ip=$(get_ssh_hostname ${node}) for try in $(seq 1 5); do if ssh -oLogLevel=quiet -oConnectTimeout=30 -oStrictHostKeyChecking=no -i "${AWS_SSH_KEY}" ${SSH_USER}@${ip} "echo test > /dev/null"; then diff --git a/cluster/log-dump.sh b/cluster/log-dump.sh index b751021dab2..ea59d2f455d 100755 --- a/cluster/log-dump.sh +++ b/cluster/log-dump.sh @@ -45,7 +45,7 @@ function copy-logs-from-node() { local -r scp_files="{$(echo ${files[*]} | tr ' ' ',')}" if [[ "${KUBERNETES_PROVIDER}" == "aws" ]]; then - local ip=$(get_instance_public_ip "${node}") + local ip=$(get_ssh_hostname "${node}") scp -i "${AWS_SSH_KEY}" "${SSH_USER}@${ip}:${scp_files}" "${dir}" > /dev/null || true else gcloud compute copy-files --project "${PROJECT}" --zone "${ZONE}" "${node}:${scp_files}" "${dir}" > /dev/null || true @@ -62,10 +62,14 @@ function save-logs() { if [[ "${KUBERNETES_PROVIDER}" == "gce" ]]; then files="${files} ${gce_logfiles}" fi + if [[ "${KUBERNETES_PROVIDER}" == "aws" ]]; then + files="${files} ${aws_logfiles}" + fi if ssh-to-node "${node_name}" "sudo systemctl status kubelet.service" &> /dev/null; then ssh-to-node "${node_name}" "sudo journalctl --output=cat -u kubelet.service" > "${dir}/kubelet.log" || true + ssh-to-node "${node_name}" "sudo journalctl --output=cat -u docker.service" > "${dir}/docker.log" || true else - files="${files} ${supervisord_logfiles}" + files="${files} ${initd_logfiles} ${supervisord_logfiles}" fi copy-logs-from-node "${node_name}" "${dir}" "${files}" } @@ -75,8 +79,10 @@ readonly node_ssh_supported_providers="gce gke aws" readonly master_logfiles="kube-apiserver kube-scheduler kube-controller-manager etcd" readonly node_logfiles="kube-proxy" +readonly aws_logfiles="cloud-init-output" readonly gce_logfiles="startupscript" -readonly common_logfiles="kern docker" +readonly common_logfiles="kern" +readonly initd_logfiles="docker" readonly supervisord_logfiles="kubelet supervisor/supervisord supervisor/kubelet-stdout supervisor/kubelet-stderr" # Limit the number of concurrent node connections so that we don't run out of @@ -85,7 +91,7 @@ readonly max_scp_processes=25 if [[ ! "${master_ssh_supported_providers}" =~ "${KUBERNETES_PROVIDER}" ]]; then echo "Master SSH not supported for ${KUBERNETES_PROVIDER}" -elif ! $(detect-master &> /dev/null); then +elif ! (detect-master &> /dev/null); then echo "Master not detected. Is the cluster up?" else readonly master_dir="${report_dir}/${MASTER_NAME}"