mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-27 13:37:30 +00:00
Periodically fetch logexported nodes instead of sleeping
This commit is contained in:
parent
0f9dfca8e7
commit
43d217f904
@ -281,6 +281,24 @@ function dump_nodes() {
|
|||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Collect names of nodes which didn't run logexporter successfully.
|
||||||
|
# Note: This step is O(#nodes^2) as we check if each node is present in the list of succeeded nodes.
|
||||||
|
# Making it linear would add code complexity without much benefit (as it just takes ~1s for 5k nodes).
|
||||||
|
# Assumes:
|
||||||
|
# NODE_NAMES
|
||||||
|
# Sets:
|
||||||
|
# NON_LOGEXPORTED_NODES
|
||||||
|
function find_non_logexported_nodes() {
|
||||||
|
succeeded_nodes=$(gsutil ls ${gcs_artifacts_dir}/logexported-nodes-registry) || return 1
|
||||||
|
echo "Successfully listed marker files for successful nodes"
|
||||||
|
NON_LOGEXPORTED_NODES=()
|
||||||
|
for node in "${NODE_NAMES[@]}"; do
|
||||||
|
if [[ ! "${succeeded_nodes}" =~ "${node}" ]]; then
|
||||||
|
NON_LOGEXPORTED_NODES+=("${node}")
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
function dump_nodes_with_logexporter() {
|
function dump_nodes_with_logexporter() {
|
||||||
echo "Detecting nodes in the cluster"
|
echo "Detecting nodes in the cluster"
|
||||||
detect-node-names &> /dev/null
|
detect-node-names &> /dev/null
|
||||||
@ -312,14 +330,27 @@ function dump_nodes_with_logexporter() {
|
|||||||
return
|
return
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Give some time for the pods to finish uploading logs.
|
# Periodically fetch list of already logexported nodes to verify
|
||||||
sleep "${logexport_sleep_seconds}"
|
# if we aren't already done.
|
||||||
|
start="$(date +%s)"
|
||||||
|
while true; do
|
||||||
|
now="$(date +%s)"
|
||||||
|
if [[ $((now - start)) -gt ${logexport_sleep_seconds} ]]; then
|
||||||
|
echo "Waiting for all nodes to be logexported timed out."
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
if find_non_logexported_nodes; then
|
||||||
|
if [[ -z "${NON_LOGEXPORTED_NODES:-}" ]]; then
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
sleep 15
|
||||||
|
done
|
||||||
|
|
||||||
# List registry of marker files (of nodes whose logexporter succeeded) from GCS.
|
# List registry of marker files (of nodes whose logexporter succeeded) from GCS.
|
||||||
local nodes_succeeded
|
local nodes_succeeded
|
||||||
for retry in {1..10}; do
|
for retry in {1..10}; do
|
||||||
if nodes_succeeded=$(gsutil ls ${gcs_artifacts_dir}/logexported-nodes-registry); then
|
if find_non_logexported_nodes; then
|
||||||
echo "Successfully listed marker files for successful nodes"
|
|
||||||
break
|
break
|
||||||
else
|
else
|
||||||
echo "Attempt ${retry} failed to list marker files for succeessful nodes"
|
echo "Attempt ${retry} failed to list marker files for succeessful nodes"
|
||||||
@ -333,15 +364,10 @@ function dump_nodes_with_logexporter() {
|
|||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
# Collect names of nodes which didn't run logexporter successfully.
|
|
||||||
# Note: This step is O(#nodes^2) as we check if each node is present in the list of succeeded nodes.
|
|
||||||
# Making it linear would add code complexity without much benefit (as it just takes ~1s for 5k nodes).
|
|
||||||
failed_nodes=()
|
failed_nodes=()
|
||||||
for node in "${NODE_NAMES[@]}"; do
|
for node in "${NON_LOGEXPORTED_NODES[@]:-}"; do
|
||||||
if [[ ! "${nodes_succeeded}" =~ "${node}" ]]; then
|
|
||||||
echo "Logexporter didn't succeed on node ${node}. Queuing it for logdump through SSH."
|
echo "Logexporter didn't succeed on node ${node}. Queuing it for logdump through SSH."
|
||||||
failed_nodes+=("${node}")
|
failed_nodes+=("${node}")
|
||||||
fi
|
|
||||||
done
|
done
|
||||||
|
|
||||||
# Delete the logexporter resources and dump logs for the failed nodes (if any) through SSH.
|
# Delete the logexporter resources and dump logs for the failed nodes (if any) through SSH.
|
||||||
|
Loading…
Reference in New Issue
Block a user