From f7816e9206055299fa87c10c9dd5c54cb22d5f55 Mon Sep 17 00:00:00 2001 From: Hyounggyu Choi Date: Tue, 14 Jan 2025 14:32:06 +0100 Subject: [PATCH] tests: Introduce retry_kubectl_apply() for trusted storage On s390x, some tests for trusted storage occasionally failed due to: ```bash etcdserver: request timed out ``` or ```bash Internal error occurred: resource quota evaluation timed out ``` These timeouts were not observed previously on k3s but occur sporadically on kubeadm. Importantly, they appear to be temporary and transient, which means they can be ignored in most cases. To address this, we introduced a new wrapper function, `retry_kubectl_apply()`, for `kubectl create`. This function retries applying a given manifest up to 5 times if it fails due to a timeout. However, it will still catch and handle any other errors during pod creation. Fixes: #10651 Signed-off-by: Hyounggyu Choi --- .../kubernetes/k8s-guest-pull-image.bats | 6 +-- tests/integration/kubernetes/lib.sh | 45 ++++++++++++++++++- 2 files changed, 46 insertions(+), 5 deletions(-) diff --git a/tests/integration/kubernetes/k8s-guest-pull-image.bats b/tests/integration/kubernetes/k8s-guest-pull-image.bats index bae5499109..dff148785e 100644 --- a/tests/integration/kubernetes/k8s-guest-pull-image.bats +++ b/tests/integration/kubernetes/k8s-guest-pull-image.bats @@ -101,7 +101,7 @@ setup() { cat $storage_config # Create persistent volume and persistent volume claim - kubectl create -f $storage_config + retry_kubectl_apply $storage_config pod_config=$(mktemp "${BATS_FILE_TMPDIR}/$(basename "${pod_config_template}").XXX") IMAGE="$image_pulled_time_less_than_default_time" NODE_NAME="$node" envsubst < "$pod_config_template" > "$pod_config" @@ -146,7 +146,7 @@ setup() { cat $storage_config # Create persistent volume and persistent volume claim - kubectl create -f $storage_config + retry_kubectl_apply $storage_config pod_config=$(mktemp "${BATS_FILE_TMPDIR}/$(basename "${pod_config_template}").XXX") IMAGE="$large_image" NODE_NAME="$node" envsubst < "$pod_config_template" > "$pod_config" @@ -191,7 +191,7 @@ setup() { cat $storage_config # Create persistent volume and persistent volume claim - kubectl create -f $storage_config + retry_kubectl_apply $storage_config pod_config=$(mktemp "${BATS_FILE_TMPDIR}/$(basename "${pod_config_template}").XXX") IMAGE="$large_image" NODE_NAME="$node" envsubst < "$pod_config_template" > "$pod_config" diff --git a/tests/integration/kubernetes/lib.sh b/tests/integration/kubernetes/lib.sh index 9c2297f0f5..bfc4c3c3d9 100644 --- a/tests/integration/kubernetes/lib.sh +++ b/tests/integration/kubernetes/lib.sh @@ -33,6 +33,47 @@ k8s_wait_pod_be_ready() { kubectl wait --timeout="${wait_time}s" --for=condition=ready "pods/$pod_name" } +# Create a pod with a given number of retries if an output includes a timeout. +# +# Parameters: +# $1 - the pod configuration file. +# +retry_kubectl_apply() { + local file_path=$1 + local retries=5 + local delay=5 + local attempt=1 + local func_name="${FUNCNAME[0]}" + + while true; do + output=$(kubectl apply -f "$file_path" 2>&1) || true + echo "" + echo "$func_name: Attempt $attempt/$retries" + echo "$output" + + # Check for timeout and retry if needed + if echo "$output" | grep -iq "timed out"; then + if [ $attempt -ge $retries ]; then + echo "$func_name: Max ${retries} retries reached. Failed due to timeout." + return 1 + fi + echo "$func_name: Timeout encountered, retrying in $delay seconds..." + sleep $delay + attempt=$((attempt + 1)) + continue + fi + + # Check for any other kind of error + if echo "$output" | grep -iq "error"; then + echo "$func_name: Error detected in kubectl output. Aborting." + return 1 + fi + + echo "$func_name: Resource created successfully." + return 0 + done +} + # Create a pod and wait it be ready, otherwise fail. # # Parameters: @@ -49,7 +90,7 @@ k8s_create_pod() { return 1 fi - kubectl apply -f "${config_file}" + retry_kubectl_apply "${config_file}" if ! pod_name=$(kubectl get pods -o jsonpath='{.items..metadata.name}'); then echo "Failed to create the pod" return 1 @@ -143,7 +184,7 @@ assert_pod_fail() { echo "In assert_pod_fail: $container_config" echo "Attempt to create the container but it should fail" - kubectl apply -f "${container_config}" + retry_kubectl_apply "${container_config}" if ! pod_name=$(kubectl get pods -o jsonpath='{.items..metadata.name}'); then echo "Failed to create the pod" return 1