diff --git a/cluster/log-dump/log-dump.sh b/cluster/log-dump/log-dump.sh index c9cfed5aa4d..3d23f2cf1e4 100755 --- a/cluster/log-dump/log-dump.sh +++ b/cluster/log-dump/log-dump.sh @@ -420,6 +420,12 @@ function dump_nodes() { all_selected_nodes+=( "${windows_node_names[@]}" ) proc=${max_dump_processes} + start="$(date +%s)" + # log_dump_ssh_timeout is the maximal number of seconds the log dumping over + # SSH operation can take. Please note that the logic enforcing the timeout + # is only a best effort. The actual time of the operation may be longer + # due to waiting for all the child processes below. + log_dump_ssh_timeout_seconds="${LOG_DUMP_SSH_TIMEOUT_SECONDS:-}" for i in "${!all_selected_nodes[@]}"; do node_name="${all_selected_nodes[$i]}" node_dir="${report_dir}/${node_name}" @@ -439,6 +445,11 @@ function dump_nodes() { if [[ proc -eq 0 ]]; then proc=${max_dump_processes} wait + now="$(date +%s)" + if [[ -n "${log_dump_ssh_timeout_seconds}" && $((now - start)) -gt ${log_dump_ssh_timeout_seconds} ]]; then + echo "WARNING: Hit timeout after ${log_dump_ssh_timeout_seconds} seconds, finishing log dumping over SSH shortly" + break + fi fi done # Wait for any remaining processes.