Merge pull request #23027 from justinsb/e2e_node_logs

Auto commit by PR queue bot
This commit is contained in:
k8s-merge-robot 2016-03-17 00:38:08 -07:00
commit e604efcf0d
2 changed files with 39 additions and 12 deletions

View File

@ -240,7 +240,13 @@ function query-running-minions () {
--query ${query}
}
function find-running-minions () {
function detect-node-names () {
# If this is called directly, VPC_ID might not be set
# (this is case from cluster/log-dump.sh)
if [[ -z "${VPC_ID:-}" ]]; then
VPC_ID=$(get_vpc_id)
fi
NODE_IDS=()
NODE_NAMES=()
for id in $(query-running-minions "Reservations[].Instances[].InstanceId"); do
@ -251,8 +257,14 @@ function find-running-minions () {
done
}
# Called to detect the project on GCE
# Not needed on AWS
function detect-project() {
:
}
function detect-nodes () {
find-running-minions
detect-node-names
# This is inefficient, but we want NODE_NAMES / NODE_IDS to be ordered the same as KUBE_NODE_IP_ADDRESSES
KUBE_NODE_IP_ADDRESSES=()
@ -1225,7 +1237,7 @@ function wait-minions {
max_attempts=90
fi
while true; do
find-running-minions > $LOG
detect-node-names > $LOG
if [[ ${#NODE_IDS[@]} == ${NUM_NODES} ]]; then
echo -e " ${color_green}${#NODE_IDS[@]} minions started; ready${color_norm}"
break
@ -1552,24 +1564,33 @@ function test-teardown {
}
# SSH to a node by name ($1) and run a command ($2).
function ssh-to-node {
# Gets the hostname (or IP) that we should SSH to for the given nodename
# For the master, we use the nodename, for the nodes we use their instanceids
function get_ssh_hostname {
local node="$1"
local cmd="$2"
if [[ "${node}" == "${MASTER_NAME}" ]]; then
node=$(get_instanceid_from_name ${MASTER_NAME})
if [[ -z "${node-}" ]]; then
echo "Could not detect Kubernetes master node. Make sure you've launched a cluster with 'kube-up.sh'"
echo "Could not detect Kubernetes master node. Make sure you've launched a cluster with 'kube-up.sh'" 1>&2
exit 1
fi
fi
local ip=$(get_instance_public_ip ${node})
if [[ -z "$ip" ]]; then
echo "Could not detect IP for ${node}."
echo "Could not detect IP for ${node}." 1>&2
exit 1
fi
echo ${ip}
}
# SSH to a node by name ($1) and run a command ($2).
function ssh-to-node {
local node="$1"
local cmd="$2"
local ip=$(get_ssh_hostname ${node})
for try in $(seq 1 5); do
if ssh -oLogLevel=quiet -oConnectTimeout=30 -oStrictHostKeyChecking=no -i "${AWS_SSH_KEY}" ${SSH_USER}@${ip} "echo test > /dev/null"; then

View File

@ -45,7 +45,7 @@ function copy-logs-from-node() {
local -r scp_files="{$(echo ${files[*]} | tr ' ' ',')}"
if [[ "${KUBERNETES_PROVIDER}" == "aws" ]]; then
local ip=$(get_instance_public_ip "${node}")
local ip=$(get_ssh_hostname "${node}")
scp -i "${AWS_SSH_KEY}" "${SSH_USER}@${ip}:${scp_files}" "${dir}" > /dev/null || true
else
gcloud compute copy-files --project "${PROJECT}" --zone "${ZONE}" "${node}:${scp_files}" "${dir}" > /dev/null || true
@ -62,10 +62,14 @@ function save-logs() {
if [[ "${KUBERNETES_PROVIDER}" == "gce" ]]; then
files="${files} ${gce_logfiles}"
fi
if [[ "${KUBERNETES_PROVIDER}" == "aws" ]]; then
files="${files} ${aws_logfiles}"
fi
if ssh-to-node "${node_name}" "sudo systemctl status kubelet.service" &> /dev/null; then
ssh-to-node "${node_name}" "sudo journalctl --output=cat -u kubelet.service" > "${dir}/kubelet.log" || true
ssh-to-node "${node_name}" "sudo journalctl --output=cat -u docker.service" > "${dir}/docker.log" || true
else
files="${files} ${supervisord_logfiles}"
files="${files} ${initd_logfiles} ${supervisord_logfiles}"
fi
copy-logs-from-node "${node_name}" "${dir}" "${files}"
}
@ -75,8 +79,10 @@ readonly node_ssh_supported_providers="gce gke aws"
readonly master_logfiles="kube-apiserver kube-scheduler kube-controller-manager etcd"
readonly node_logfiles="kube-proxy"
readonly aws_logfiles="cloud-init-output"
readonly gce_logfiles="startupscript"
readonly common_logfiles="kern docker"
readonly common_logfiles="kern"
readonly initd_logfiles="docker"
readonly supervisord_logfiles="kubelet supervisor/supervisord supervisor/kubelet-stdout supervisor/kubelet-stderr"
# Limit the number of concurrent node connections so that we don't run out of
@ -85,7 +91,7 @@ readonly max_scp_processes=25
if [[ ! "${master_ssh_supported_providers}" =~ "${KUBERNETES_PROVIDER}" ]]; then
echo "Master SSH not supported for ${KUBERNETES_PROVIDER}"
elif ! $(detect-master &> /dev/null); then
elif ! (detect-master &> /dev/null); then
echo "Master not detected. Is the cluster up?"
else
readonly master_dir="${report_dir}/${MASTER_NAME}"