mirror of
https://github.com/k3s-io/kubernetes.git
synced 2025-07-23 03:41:45 +00:00
Fixed infinite loop in cluster validation.
Fixed infinite loop in cluster validation when getting nodes failed.
This commit is contained in:
parent
59ad9a30ca
commit
3dbfa94798
@ -73,7 +73,9 @@ REQUIRED_NUM_NODES=$((EXPECTED_NUM_NODES - ALLOWED_NOTREADY_NODES))
|
|||||||
# Make several attempts to deal with slow cluster birth.
|
# Make several attempts to deal with slow cluster birth.
|
||||||
return_value=0
|
return_value=0
|
||||||
attempt=0
|
attempt=0
|
||||||
|
# Set the timeout to ~25minutes (100 x 15 second) to avoid timeouts for 1000-node clusters.
|
||||||
PAUSE_BETWEEN_ITERATIONS_SECONDS=15
|
PAUSE_BETWEEN_ITERATIONS_SECONDS=15
|
||||||
|
MAX_ATTEMPTS=100
|
||||||
ADDITIONAL_ITERATIONS=$(((CLUSTER_READY_ADDITIONAL_TIME_SECONDS + PAUSE_BETWEEN_ITERATIONS_SECONDS - 1)/PAUSE_BETWEEN_ITERATIONS_SECONDS))
|
ADDITIONAL_ITERATIONS=$(((CLUSTER_READY_ADDITIONAL_TIME_SECONDS + PAUSE_BETWEEN_ITERATIONS_SECONDS - 1)/PAUSE_BETWEEN_ITERATIONS_SECONDS))
|
||||||
while true; do
|
while true; do
|
||||||
# Pause between iterations of this large outer loop.
|
# Pause between iterations of this large outer loop.
|
||||||
@ -91,7 +93,18 @@ while true; do
|
|||||||
# Suppress errors from kubectl output because during cluster bootstrapping
|
# Suppress errors from kubectl output because during cluster bootstrapping
|
||||||
# for clusters where the master node is registered, the apiserver will become
|
# for clusters where the master node is registered, the apiserver will become
|
||||||
# available and then get restarted as the kubelet configures the docker bridge.
|
# available and then get restarted as the kubelet configures the docker bridge.
|
||||||
node=$(kubectl_retry get nodes) || continue
|
#
|
||||||
|
# We are assigning the result of kubectl_retry get nodes operation to the res
|
||||||
|
# varaible in that way, to prevent stopping the whole script on an error.
|
||||||
|
node=$(kubectl_retry get nodes) && res="$?" || res="$?"
|
||||||
|
if [ "${res}" -ne "0" ]; then
|
||||||
|
if [[ "${attempt}" -gt "${last_run:-$MAX_ATTEMPTS}" ]]; then
|
||||||
|
echo -e "${color_red} Failed to get nodes.${color_norm}"
|
||||||
|
exit 1
|
||||||
|
else
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
fi
|
||||||
found=$(($(echo "${node}" | wc -l) - 1))
|
found=$(($(echo "${node}" | wc -l) - 1))
|
||||||
ready=$(($(echo "${node}" | grep -v "NotReady" | wc -l ) - 1))
|
ready=$(($(echo "${node}" | grep -v "NotReady" | wc -l ) - 1))
|
||||||
|
|
||||||
@ -110,8 +123,7 @@ while true; do
|
|||||||
echo -e "${color_green}Found ${REQUIRED_NUM_NODES} Nodes, allowing additional ${ADDITIONAL_ITERATIONS} iterations for other Nodes to join.${color_norm}"
|
echo -e "${color_green}Found ${REQUIRED_NUM_NODES} Nodes, allowing additional ${ADDITIONAL_ITERATIONS} iterations for other Nodes to join.${color_norm}"
|
||||||
last_run="${last_run:-$((attempt + ADDITIONAL_ITERATIONS - 1))}"
|
last_run="${last_run:-$((attempt + ADDITIONAL_ITERATIONS - 1))}"
|
||||||
fi
|
fi
|
||||||
# Set the timeout to ~25minutes (100 x 15 second) to avoid timeouts for 1000-node clusters.
|
if [[ "${attempt}" -gt "${last_run:-$MAX_ATTEMPTS}" ]]; then
|
||||||
if [[ "${attempt}" -gt "${last_run:-100}" ]]; then
|
|
||||||
echo -e "${color_yellow}Detected ${ready} ready nodes, found ${found} nodes out of expected ${EXPECTED_NUM_NODES}. Your cluster may not be fully functional.${color_norm}"
|
echo -e "${color_yellow}Detected ${ready} ready nodes, found ${found} nodes out of expected ${EXPECTED_NUM_NODES}. Your cluster may not be fully functional.${color_norm}"
|
||||||
kubectl_retry get nodes
|
kubectl_retry get nodes
|
||||||
if [[ "${REQUIRED_NUM_NODES}" -gt "${ready}" ]]; then
|
if [[ "${REQUIRED_NUM_NODES}" -gt "${ready}" ]]; then
|
||||||
|
Loading…
Reference in New Issue
Block a user