Fix waiting for logexporter log fetching processes

Fix bug found by shellcheck in logexporter log fetching where last wait was not working properly. Fix DumpClusterLogs hanging in 5k nodes clusters: https://github.com/kubernetes/kubernetes/issues/85753 Change-Id: Id02bf9048b19e790940c7eac6d45d7fa7a3dfb2b
2025-07-21 10:51:29 +00:00 · 2019-12-04 18:06:59 +01:00 · 2019-12-04 18:06:59 +01:00 · 2dc3684cf7
commit 2dc3684cf7
parent 7eb6182a63
1 changed files with 4 additions and 7 deletions
--- a/cluster/log-dump/log-dump.sh
+++ b/cluster/log-dump/log-dump.sh
@ -504,10 +504,10 @@ function dump_nodes_with_logexporter() {
  # Store logs from logexporter pods to allow debugging log exporting process
  # itself.
  proc=${max_dump_processes}
-  "${KUBECTL}" get pods -n "${logexporter_namespace}" -o jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.spec.nodeName}{"\n"}{end}' | while read pod node; do
+  "${KUBECTL}" get pods -n "${logexporter_namespace}" -o jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.spec.nodeName}{"\n"}{end}' | (while read -r pod node; do
    echo "Fetching logs from ${pod} running on ${node}"
-    mkdir -p ${report_dir}/${node}
-    "${KUBECTL}" logs -n "${logexporter_namespace}" ${pod} > ${report_dir}/${node}/${pod}.log &
+    mkdir -p "${report_dir}/${node}"
+    "${KUBECTL}" logs -n "${logexporter_namespace}" "${pod}" > "${report_dir}/${node}/${pod}.log" &

    # We don't want to run more than ${max_dump_processes} at a time, so
    # wait once we hit that many nodes. This isn't ideal, since one might
@ -517,11 +517,8 @@ function dump_nodes_with_logexporter() {
      proc=${max_dump_processes}
      wait
    fi
-  done
  # Wait for any remaining processes.
-  if [[ proc -gt 0 && proc -lt ${max_dump_processes} ]]; then
-    wait
-  fi
+  done; wait)

  # List registry of marker files (of nodes whose logexporter succeeded) from GCS.
  local nodes_succeeded