Merge pull request #94762 from joakimr-axis/joakimr-axis_log-dump-array

log-dump.sh: Fix shellcheck issues
This commit is contained in:
Kubernetes Prow Robot 2020-10-08 10:10:55 -07:00 committed by GitHub
commit b53ba30c6a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 79 additions and 63 deletions

View File

@ -30,7 +30,7 @@ readonly logexporter_namespace="${3:-logexporter}"
# check for a function named log_dump_custom_get_instances. If it's # check for a function named log_dump_custom_get_instances. If it's
# defined, we assume the function can me called with one argument, the # defined, we assume the function can me called with one argument, the
# role, which is either "master" or "node". # role, which is either "master" or "node".
echo "Checking for custom logdump instances, if any" echo 'Checking for custom logdump instances, if any'
if [[ $(type -t log_dump_custom_get_instances) == "function" ]]; then if [[ $(type -t log_dump_custom_get_instances) == "function" ]]; then
readonly use_custom_instance_list=yes readonly use_custom_instance_list=yes
else else
@ -67,10 +67,10 @@ readonly max_dump_processes=25
function setup() { function setup() {
KUBE_ROOT=$(dirname "${BASH_SOURCE[0]}")/../.. KUBE_ROOT=$(dirname "${BASH_SOURCE[0]}")/../..
if [[ -z "${use_custom_instance_list}" ]]; then if [[ -z "${use_custom_instance_list}" ]]; then
: ${KUBE_CONFIG_FILE:="config-test.sh"} : "${KUBE_CONFIG_FILE:='config-test.sh'}"
echo "Sourcing kube-util.sh" echo 'Sourcing kube-util.sh'
source "${KUBE_ROOT}/cluster/kube-util.sh" source "${KUBE_ROOT}/cluster/kube-util.sh"
echo "Detecting project" echo 'Detecting project'
detect-project 2>&1 detect-project 2>&1
elif [[ "${KUBERNETES_PROVIDER}" == "gke" ]]; then elif [[ "${KUBERNETES_PROVIDER}" == "gke" ]]; then
echo "Using 'use_custom_instance_list' with gke, skipping check for LOG_DUMP_SSH_KEY and LOG_DUMP_SSH_USER" echo "Using 'use_custom_instance_list' with gke, skipping check for LOG_DUMP_SSH_KEY and LOG_DUMP_SSH_USER"
@ -80,17 +80,17 @@ function setup() {
source "${KUBE_ROOT}/cluster/gce/util.sh" source "${KUBE_ROOT}/cluster/gce/util.sh"
ZONE="${gke_zone}" ZONE="${gke_zone}"
elif [[ -z "${LOG_DUMP_SSH_KEY:-}" ]]; then elif [[ -z "${LOG_DUMP_SSH_KEY:-}" ]]; then
echo "LOG_DUMP_SSH_KEY not set, but required when using log_dump_custom_get_instances" echo 'LOG_DUMP_SSH_KEY not set, but required when using log_dump_custom_get_instances'
exit 1 exit 1
elif [[ -z "${LOG_DUMP_SSH_USER:-}" ]]; then elif [[ -z "${LOG_DUMP_SSH_USER:-}" ]]; then
echo "LOG_DUMP_SSH_USER not set, but required when using log_dump_custom_get_instances" echo 'LOG_DUMP_SSH_USER not set, but required when using log_dump_custom_get_instances'
exit 1 exit 1
fi fi
source "${KUBE_ROOT}/hack/lib/util.sh" source "${KUBE_ROOT}/hack/lib/util.sh"
} }
function log-dump-ssh() { function log-dump-ssh() {
if [[ "${gcloud_supported_providers}" =~ "${KUBERNETES_PROVIDER}" ]]; then if [[ "${gcloud_supported_providers}" =~ ${KUBERNETES_PROVIDER} ]]; then
ssh-to-node "$@" ssh-to-node "$@"
return return
fi fi
@ -102,12 +102,14 @@ function log-dump-ssh() {
} }
# Copy all files /var/log/{$3}.log on node $1 into local dir $2. # Copy all files /var/log/{$3}.log on node $1 into local dir $2.
# $3 should be a space-separated string of files. # $3 should be a string array of file names.
# This function shouldn't ever trigger errexit, but doesn't block stderr. # This function shouldn't ever trigger errexit, but doesn't block stderr.
function copy-logs-from-node() { function copy-logs-from-node() {
local -r node="${1}" local -r node="${1}"
local -r dir="${2}" local -r dir="${2}"
local files=( ${3} ) shift
shift
local files=("$@")
# Append "*" # Append "*"
# The * at the end is needed to also copy rotated logs (which happens # The * at the end is needed to also copy rotated logs (which happens
# in large clusters and long runs). # in large clusters and long runs).
@ -117,12 +119,13 @@ function copy-logs-from-node() {
# Comma delimit (even the singleton, or scp does the wrong thing), surround by braces. # Comma delimit (even the singleton, or scp does the wrong thing), surround by braces.
local -r scp_files="{$(printf "%s," "${files[@]}")}" local -r scp_files="{$(printf "%s," "${files[@]}")}"
if [[ "${gcloud_supported_providers}" =~ "${KUBERNETES_PROVIDER}" ]]; then if [[ "${gcloud_supported_providers}" =~ ${KUBERNETES_PROVIDER} ]]; then
# get-serial-port-output lets you ask for ports 1-4, but currently (11/21/2016) only port 1 contains useful information # get-serial-port-output lets you ask for ports 1-4, but currently (11/21/2016) only port 1 contains useful information
gcloud compute instances get-serial-port-output --project "${PROJECT}" --zone "${ZONE}" --port 1 "${node}" > "${dir}/serial-1.log" || true gcloud compute instances get-serial-port-output --project "${PROJECT}" --zone "${ZONE}" --port 1 "${node}" > "${dir}/serial-1.log" || true
gcloud compute scp --recurse --project "${PROJECT}" --zone "${ZONE}" "${node}:${scp_files}" "${dir}" > /dev/null || true gcloud compute scp --recurse --project "${PROJECT}" --zone "${ZONE}" "${node}:${scp_files}" "${dir}" > /dev/null || true
elif [[ "${KUBERNETES_PROVIDER}" == "aws" ]]; then elif [[ "${KUBERNETES_PROVIDER}" == "aws" ]]; then
local ip=$(get_ssh_hostname "${node}") local ip
ip=$(get_ssh_hostname "${node}")
scp -oLogLevel=quiet -oConnectTimeout=30 -oStrictHostKeyChecking=no -i "${AWS_SSH_KEY}" "${SSH_USER}@${ip}:${scp_files}" "${dir}" > /dev/null || true scp -oLogLevel=quiet -oConnectTimeout=30 -oStrictHostKeyChecking=no -i "${AWS_SSH_KEY}" "${SSH_USER}@${ip}:${scp_files}" "${dir}" > /dev/null || true
elif [[ -n "${use_custom_instance_list}" ]]; then elif [[ -n "${use_custom_instance_list}" ]]; then
scp -oLogLevel=quiet -oConnectTimeout=30 -oStrictHostKeyChecking=no -i "${LOG_DUMP_SSH_KEY}" "${LOG_DUMP_SSH_USER}@${node}:${scp_files}" "${dir}" > /dev/null || true scp -oLogLevel=quiet -oConnectTimeout=30 -oStrictHostKeyChecking=no -i "${LOG_DUMP_SSH_KEY}" "${LOG_DUMP_SSH_USER}@${node}:${scp_files}" "${dir}" > /dev/null || true
@ -139,26 +142,34 @@ function copy-logs-from-node() {
function save-logs() { function save-logs() {
local -r node_name="${1}" local -r node_name="${1}"
local -r dir="${2}" local -r dir="${2}"
local files="${3}" local files=()
IFS=' ' read -r -a files <<< "$3"
local opt_systemd_services="${4:-""}" local opt_systemd_services="${4:-""}"
local on_master="${5:-"false"}" local on_master="${5:-"false"}"
files="${files} ${extra_log_files}" local extra=()
IFS=' ' read -r -a extra <<< "$extra_log_files"
files+=("${extra[@]}")
if [[ -n "${use_custom_instance_list}" ]]; then if [[ -n "${use_custom_instance_list}" ]]; then
if [[ -n "${LOG_DUMP_SAVE_LOGS:-}" ]]; then if [[ -n "${LOG_DUMP_SAVE_LOGS:-}" ]]; then
files="${files} ${LOG_DUMP_SAVE_LOGS:-}" local dump=()
IFS=' ' read -r -a dump <<< "${LOG_DUMP_SAVE_LOGS:-}"
files+=("${dump[@]}")
fi fi
else else
local providerlogs=()
case "${KUBERNETES_PROVIDER}" in case "${KUBERNETES_PROVIDER}" in
gce|gke) gce|gke)
files="${files} ${gce_logfiles}" IFS=' ' read -r -a providerlogs <<< "${gce_logfiles}"
;; ;;
aws) aws)
files="${files} ${aws_logfiles}" IFS=' ' read -r -a providerlogs <<< "${aws_logfiles}"
;; ;;
esac esac
files+=("${providerlogs[@]}")
fi fi
local -r services=( ${systemd_services} ${opt_systemd_services} ${extra_systemd_services} ) local services
read -r -a services <<< "${systemd_services} ${opt_systemd_services} ${extra_systemd_services}"
if log-dump-ssh "${node_name}" "command -v journalctl" &> /dev/null; then if log-dump-ssh "${node_name}" "command -v journalctl" &> /dev/null; then
if [[ "${on_master}" == "true" ]]; then if [[ "${on_master}" == "true" ]]; then
@ -178,7 +189,11 @@ function save-logs() {
log-dump-ssh "${node_name}" "sudo journalctl --output=short-precise" > "${dir}/systemd.log" || true log-dump-ssh "${node_name}" "sudo journalctl --output=short-precise" > "${dir}/systemd.log" || true
fi fi
else else
files="${kern_logfile} ${files} ${initd_logfiles} ${supervisord_logfiles}" local tmpfiles=()
for f in "${kern_logfile}" "${initd_logfiles}" "${supervisord_logfiles}"; do
IFS=' ' read -r -a tmpfiles <<< "$f"
files+=("${tmpfiles[@]}")
done
fi fi
# Try dumping coverage profiles, if it looks like coverage is enabled in the first place. # Try dumping coverage profiles, if it looks like coverage is enabled in the first place.
@ -192,15 +207,15 @@ function save-logs() {
run-in-docker-container "${node_name}" "kube-proxy" "cat /tmp/k8s-kube-proxy.cov" > "${dir}/kube-proxy.cov" || true run-in-docker-container "${node_name}" "kube-proxy" "cat /tmp/k8s-kube-proxy.cov" > "${dir}/kube-proxy.cov" || true
fi fi
else else
echo "Coverage profiles seem to exist, but cannot be retrieved from inside containers." echo 'Coverage profiles seem to exist, but cannot be retrieved from inside containers.'
fi fi
fi fi
echo "Changing logfiles to be world-readable for download" echo 'Changing logfiles to be world-readable for download'
log-dump-ssh "${node_name}" "sudo chmod -R a+r /var/log" || true log-dump-ssh "${node_name}" "sudo chmod -R a+r /var/log" || true
echo "Copying '${files}' from ${node_name}" echo "Copying '${files[*]}' from ${node_name}"
copy-logs-from-node "${node_name}" "${dir}" "${files}" copy-logs-from-node "${node_name}" "${dir}" "${files[@]}"
} }
# Saves a copy of the Windows Docker event log to ${WINDOWS_LOGS_DIR}\docker.log # Saves a copy of the Windows Docker event log to ${WINDOWS_LOGS_DIR}\docker.log
@ -246,8 +261,9 @@ function save-windows-logs-via-diagnostics-tool() {
local node="${1}" local node="${1}"
local dest_dir="${2}" local dest_dir="${2}"
gcloud compute instances add-metadata ${node} --metadata enable-diagnostics=true --project=${PROJECT} --zone=${ZONE} gcloud compute instances add-metadata "${node}" --metadata enable-diagnostics=true --project="${PROJECT}" --zone="${ZONE}"
local logs_archive_in_gcs=$(gcloud alpha compute diagnose export-logs ${node} --zone=${ZONE} --project=${PROJECT} | tail -n 1) local logs_archive_in_gcs
logs_archive_in_gcs=$(gcloud alpha compute diagnose export-logs "${node}" "--zone=${ZONE}" "--project=${PROJECT}" | tail -n 1)
local temp_local_path="${node}.zip" local temp_local_path="${node}.zip"
for retry in {1..20}; do for retry in {1..20}; do
if gsutil mv "${logs_archive_in_gcs}" "${temp_local_path}" > /dev/null 2>&1; then if gsutil mv "${logs_archive_in_gcs}" "${temp_local_path}" > /dev/null 2>&1; then
@ -259,8 +275,8 @@ function save-windows-logs-via-diagnostics-tool() {
done done
if [[ -f "${temp_local_path}" ]]; then if [[ -f "${temp_local_path}" ]]; then
unzip ${temp_local_path} -d "${dest_dir}" > /dev/null unzip "${temp_local_path}" -d "${dest_dir}" > /dev/null
rm -f ${temp_local_path} rm -f "${temp_local_path}"
fi fi
} }
@ -273,14 +289,14 @@ function save-windows-logs-via-ssh() {
export-windows-docker-images-list "${node}" export-windows-docker-images-list "${node}"
local remote_files=() local remote_files=()
for file in ${windows_node_logfiles[@]}; do for file in "${windows_node_logfiles[@]}"; do
remote_files+=( "${WINDOWS_LOGS_DIR}\\${file}" ) remote_files+=( "${WINDOWS_LOGS_DIR}\\${file}" )
done done
remote_files+=( "${windows_node_otherfiles[@]}" ) remote_files+=( "${windows_node_otherfiles[@]}" )
# TODO(pjh, yujuhong): handle rotated logs and copying multiple files at the # TODO(pjh, yujuhong): handle rotated logs and copying multiple files at the
# same time. # same time.
for remote_file in ${remote_files[@]}; do for remote_file in "${remote_files[@]}"; do
# Retry up to 3 times to allow ssh keys to be properly propagated and # Retry up to 3 times to allow ssh keys to be properly propagated and
# stored. # stored.
for retry in {1..3}; do for retry in {1..3}; do
@ -302,7 +318,7 @@ function save-logs-windows() {
local -r node="${1}" local -r node="${1}"
local -r dest_dir="${2}" local -r dest_dir="${2}"
if [[ ! "${gcloud_supported_providers}" =~ "${KUBERNETES_PROVIDER}" ]]; then if [[ ! "${gcloud_supported_providers}" =~ ${KUBERNETES_PROVIDER} ]]; then
echo "Not saving logs for ${node}, Windows log dumping requires gcloud support" echo "Not saving logs for ${node}, Windows log dumping requires gcloud support"
return return
fi fi
@ -324,28 +340,28 @@ function run-in-docker-container() {
local node_name="$1" local node_name="$1"
local container="$2" local container="$2"
shift 2 shift 2
log-dump-ssh "${node_name}" "docker exec \"\$(docker ps -f label=io.kubernetes.container.name=${container} --format \"{{.ID}}\")\" $@" log-dump-ssh "${node_name}" "docker exec \"\$(docker ps -f label=io.kubernetes.container.name=${container} --format \"{{.ID}}\")\" $*"
} }
function dump_masters() { function dump_masters() {
local master_names local master_names
if [[ -n "${use_custom_instance_list}" ]]; then if [[ -n "${use_custom_instance_list}" ]]; then
master_names=( $(log_dump_custom_get_instances master) ) while IFS='' read -r line; do master_names+=("$line"); done < <(log_dump_custom_get_instances master)
elif [[ ! "${master_ssh_supported_providers}" =~ "${KUBERNETES_PROVIDER}" ]]; then elif [[ ! "${master_ssh_supported_providers}" =~ ${KUBERNETES_PROVIDER} ]]; then
echo "Master SSH not supported for ${KUBERNETES_PROVIDER}" echo "Master SSH not supported for ${KUBERNETES_PROVIDER}"
return return
elif [[ -n "${KUBEMARK_MASTER_NAME:-}" ]]; then elif [[ -n "${KUBEMARK_MASTER_NAME:-}" ]]; then
master_names=( "${KUBEMARK_MASTER_NAME}" ) master_names=( "${KUBEMARK_MASTER_NAME}" )
else else
if ! (detect-master); then if ! (detect-master); then
echo "Master not detected. Is the cluster up?" echo 'Master not detected. Is the cluster up?'
return return
fi fi
master_names=( "${MASTER_NAME}" ) master_names=( "${MASTER_NAME}" )
fi fi
if [[ "${#master_names[@]}" == 0 ]]; then if [[ "${#master_names[@]}" == 0 ]]; then
echo "No masters found?" echo 'No masters found?'
return return
fi fi
@ -378,16 +394,16 @@ function dump_nodes() {
local node_names=() local node_names=()
local windows_node_names=() local windows_node_names=()
if [[ -n "${1:-}" ]]; then if [[ -n "${1:-}" ]]; then
echo "Dumping logs for nodes provided as args to dump_nodes() function" echo 'Dumping logs for nodes provided as args to dump_nodes() function'
node_names=( "$@" ) node_names=( "$@" )
elif [[ -n "${use_custom_instance_list}" ]]; then elif [[ -n "${use_custom_instance_list}" ]]; then
echo "Dumping logs for nodes provided by log_dump_custom_get_instances() function" echo 'Dumping logs for nodes provided by log_dump_custom_get_instances() function'
node_names=( $(log_dump_custom_get_instances node) ) while IFS='' read -r line; do node_names+=("$line"); done < <(log_dump_custom_get_instances node)
elif [[ ! "${node_ssh_supported_providers}" =~ "${KUBERNETES_PROVIDER}" ]]; then elif [[ ! "${node_ssh_supported_providers}" =~ ${KUBERNETES_PROVIDER} ]]; then
echo "Node SSH not supported for ${KUBERNETES_PROVIDER}" echo "Node SSH not supported for ${KUBERNETES_PROVIDER}"
return return
else else
echo "Detecting nodes in the cluster" echo 'Detecting nodes in the cluster'
detect-node-names &> /dev/null detect-node-names &> /dev/null
if [[ -n "${NODE_NAMES:-}" ]]; then if [[ -n "${NODE_NAMES:-}" ]]; then
node_names=( "${NODE_NAMES[@]}" ) node_names=( "${NODE_NAMES[@]}" )
@ -398,7 +414,7 @@ function dump_nodes() {
fi fi
if [[ "${#node_names[@]}" == 0 && "${#windows_node_names[@]}" == 0 ]]; then if [[ "${#node_names[@]}" == 0 && "${#windows_node_names[@]}" == 0 ]]; then
echo "No nodes found!" echo 'No nodes found!'
return return
fi fi
@ -410,7 +426,7 @@ function dump_nodes() {
linux_nodes_selected_for_logs=() linux_nodes_selected_for_logs=()
if [[ -n "${LOGDUMP_ONLY_N_RANDOM_NODES:-}" ]]; then if [[ -n "${LOGDUMP_ONLY_N_RANDOM_NODES:-}" ]]; then
# We randomly choose 'LOGDUMP_ONLY_N_RANDOM_NODES' many nodes for fetching logs. # We randomly choose 'LOGDUMP_ONLY_N_RANDOM_NODES' many nodes for fetching logs.
for index in `shuf -i 0-$(( ${#node_names[*]} - 1 )) -n ${LOGDUMP_ONLY_N_RANDOM_NODES}` for index in $(shuf -i 0-$(( ${#node_names[*]} - 1 )) -n "${LOGDUMP_ONLY_N_RANDOM_NODES}")
do do
linux_nodes_selected_for_logs+=("${node_names[$index]}") linux_nodes_selected_for_logs+=("${node_names[$index]}")
done done
@ -473,10 +489,10 @@ function find_non_logexported_nodes() {
local file="${gcs_artifacts_dir}/logexported-nodes-registry" local file="${gcs_artifacts_dir}/logexported-nodes-registry"
echo "Listing marker files ($file) for successful nodes..." echo "Listing marker files ($file) for successful nodes..."
succeeded_nodes=$(gsutil ls "${file}") || return 1 succeeded_nodes=$(gsutil ls "${file}") || return 1
echo "Successfully listed marker files for successful nodes" echo 'Successfully listed marker files for successful nodes'
NON_LOGEXPORTED_NODES=() NON_LOGEXPORTED_NODES=()
for node in "${NODE_NAMES[@]}"; do for node in "${NODE_NAMES[@]}"; do
if [[ ! "${succeeded_nodes}" =~ "${node}" ]]; then if [[ ! "${succeeded_nodes}" =~ ${node} ]]; then
NON_LOGEXPORTED_NODES+=("${node}") NON_LOGEXPORTED_NODES+=("${node}")
fi fi
done done
@ -486,20 +502,21 @@ function find_non_logexported_nodes() {
# does not run on Windows nodes. # does not run on Windows nodes.
function dump_nodes_with_logexporter() { function dump_nodes_with_logexporter() {
if [[ -n "${use_custom_instance_list}" ]]; then if [[ -n "${use_custom_instance_list}" ]]; then
echo "Dumping logs for nodes provided by log_dump_custom_get_instances() function" echo 'Dumping logs for nodes provided by log_dump_custom_get_instances() function'
NODE_NAMES=( $(log_dump_custom_get_instances node) ) NODE_NAMES=()
while IFS='' read -r line; do NODE_NAMES+=("$line"); done < <(log_dump_custom_get_instances node)
else else
echo "Detecting nodes in the cluster" echo 'Detecting nodes in the cluster'
detect-node-names &> /dev/null detect-node-names &> /dev/null
fi fi
if [[ -z "${NODE_NAMES:-}" ]]; then if [[ -z "${NODE_NAMES:-}" ]]; then
echo "No nodes found!" echo 'No nodes found!'
return return
fi fi
# Obtain parameters required by logexporter. # Obtain parameters required by logexporter.
local -r service_account_credentials="$(cat ${GOOGLE_APPLICATION_CREDENTIALS} | base64 | tr -d '\n')" local -r service_account_credentials="$(base64 "${GOOGLE_APPLICATION_CREDENTIALS}" | tr -d '\n')"
local -r cloud_provider="${KUBERNETES_PROVIDER}" local -r cloud_provider="${KUBERNETES_PROVIDER}"
local -r enable_hollow_node_logs="${ENABLE_HOLLOW_NODE_LOGS:-false}" local -r enable_hollow_node_logs="${ENABLE_HOLLOW_NODE_LOGS:-false}"
local -r logexport_sleep_seconds="$(( 90 + NUM_NODES / 3 ))" local -r logexport_sleep_seconds="$(( 90 + NUM_NODES / 3 ))"
@ -526,7 +543,7 @@ function dump_nodes_with_logexporter() {
# Create the logexporter namespace, service-account secret and the logexporter daemonset within that namespace. # Create the logexporter namespace, service-account secret and the logexporter daemonset within that namespace.
KUBECTL="${KUBE_ROOT}/cluster/kubectl.sh" KUBECTL="${KUBE_ROOT}/cluster/kubectl.sh"
if ! "${KUBECTL}" create -f "${manifest_yaml}"; then if ! "${KUBECTL}" create -f "${manifest_yaml}"; then
echo "Failed to create logexporter daemonset.. falling back to logdump through SSH" echo 'Failed to create logexporter daemonset.. falling back to logdump through SSH'
"${KUBECTL}" delete namespace "${logexporter_namespace}" || true "${KUBECTL}" delete namespace "${logexporter_namespace}" || true
dump_nodes "${NODE_NAMES[@]}" dump_nodes "${NODE_NAMES[@]}"
return return
@ -538,7 +555,7 @@ function dump_nodes_with_logexporter() {
while true; do while true; do
now="$(date +%s)" now="$(date +%s)"
if [[ $((now - start)) -gt ${logexport_sleep_seconds} ]]; then if [[ $((now - start)) -gt ${logexport_sleep_seconds} ]]; then
echo "Waiting for all nodes to be logexported timed out." echo 'Waiting for all nodes to be logexported timed out.'
break break
fi fi
if find_non_logexported_nodes; then if find_non_logexported_nodes; then
@ -569,14 +586,13 @@ function dump_nodes_with_logexporter() {
done; wait) done; wait)
# List registry of marker files (of nodes whose logexporter succeeded) from GCS. # List registry of marker files (of nodes whose logexporter succeeded) from GCS.
local nodes_succeeded
for retry in {1..10}; do for retry in {1..10}; do
if find_non_logexported_nodes; then if find_non_logexported_nodes; then
break break
else else
echo "Attempt ${retry} failed to list marker files for successful nodes" echo "Attempt ${retry} failed to list marker files for successful nodes"
if [[ "${retry}" == 10 ]]; then if [[ "${retry}" == 10 ]]; then
echo "Final attempt to list marker files failed.. falling back to logdump through SSH" echo 'Final attempt to list marker files failed.. falling back to logdump through SSH'
"${KUBECTL}" delete namespace "${logexporter_namespace}" || true "${KUBECTL}" delete namespace "${logexporter_namespace}" || true
dump_nodes "${NODE_NAMES[@]}" dump_nodes "${NODE_NAMES[@]}"
return return
@ -599,32 +615,33 @@ function dump_nodes_with_logexporter() {
"${KUBECTL}" get pods --namespace "${logexporter_namespace}" || true "${KUBECTL}" get pods --namespace "${logexporter_namespace}" || true
"${KUBECTL}" delete namespace "${logexporter_namespace}" || true "${KUBECTL}" delete namespace "${logexporter_namespace}" || true
if [[ "${#failed_nodes[@]}" != 0 ]]; then if [[ "${#failed_nodes[@]}" != 0 ]]; then
echo -e "Dumping logs through SSH for the following nodes:\n${failed_nodes[@]}" echo -e "Dumping logs through SSH for the following nodes:\n${failed_nodes[*]}"
dump_nodes "${failed_nodes[@]}" dump_nodes "${failed_nodes[@]}"
fi fi
} }
function detect_node_failures() { function detect_node_failures() {
if ! [[ "${gcloud_supported_providers}" =~ "${KUBERNETES_PROVIDER}" ]]; then if ! [[ "${gcloud_supported_providers}" =~ ${KUBERNETES_PROVIDER} ]]; then
return return
fi fi
detect-node-names detect-node-names
if [[ "${KUBERNETES_PROVIDER}" == "gce" ]]; then if [[ "${KUBERNETES_PROVIDER}" == "gce" ]]; then
local all_instance_groups=(${INSTANCE_GROUPS[@]} ${WINDOWS_INSTANCE_GROUPS[@]}) local all_instance_groups=("${INSTANCE_GROUPS[@]}" "${WINDOWS_INSTANCE_GROUPS[@]}")
else else
local all_instance_groups=(${INSTANCE_GROUPS[@]}) local all_instance_groups=("${INSTANCE_GROUPS[@]}")
fi fi
if [ -z "${all_instance_groups:-}" ]; then if [ -z "${all_instance_groups:-}" ]; then
return return
fi fi
for group in "${all_instance_groups[@]}"; do for group in "${all_instance_groups[@]}"; do
local creation_timestamp=$(gcloud compute instance-groups managed describe \ local creation_timestamp
"${group}" \ creation_timestamp=$(gcloud compute instance-groups managed describe \
--project "${PROJECT}" \ "${group}" \
--zone "${ZONE}" \ --project "${PROJECT}" \
--format='value(creationTimestamp)') --zone "${ZONE}" \
--format='value(creationTimestamp)')
echo "Failures for ${group} (if any):" echo "Failures for ${group} (if any):"
gcloud logging read --order=asc \ gcloud logging read --order=asc \
--format='table(timestamp,jsonPayload.resource.name,jsonPayload.event_subtype)' \ --format='table(timestamp,jsonPayload.resource.name,jsonPayload.event_subtype)' \
@ -644,7 +661,7 @@ function main() {
echo "Dumping logs from master locally to '${report_dir}'" echo "Dumping logs from master locally to '${report_dir}'"
dump_masters dump_masters
if [[ "${DUMP_ONLY_MASTER_LOGS:-}" == "true" ]]; then if [[ "${DUMP_ONLY_MASTER_LOGS:-}" == "true" ]]; then
echo "Skipping dumping of node logs" echo 'Skipping dumping of node logs'
return return
fi fi

View File

@ -1,4 +1,3 @@
./cluster/gce/gci/configure.sh ./cluster/gce/gci/configure.sh
./cluster/gce/gci/master-helper.sh ./cluster/gce/gci/master-helper.sh
./cluster/gce/util.sh ./cluster/gce/util.sh
./cluster/log-dump/log-dump.sh