From eec6c8b0c4e51a8785529a728cc486af66734648 Mon Sep 17 00:00:00 2001 From: Dan Mihai Date: Thu, 18 Sep 2025 15:33:31 +0000 Subject: [PATCH 1/4] tests: k8s: retry after kubectl exec error Some of the k8s tests were already retrying if `kubectl exec` succeeded but produced empty output. Perform the same retries on `kubectl exec` error exit code too, instead of aborting the test immediately. Example of recent exec error: https://github.com/kata-containers/kata-containers/actions/runs/17813996758/job/50644372056 not ok 1 Check number of cpus ... error: Internal error occurred: error sending request: Post "https://10.224.0.4:10250/exec/kata-containers-k8s-tests/cpu-test/c1?command=sh&command=-c&command= cat+%!F(MISSING)proc%!F(MISSING)cpuinfo+%!C(MISSING)grep+processor%!C(MISSING)wc+-l&error=1&output=1": EOF Signed-off-by: Dan Mihai --- tests/integration/kubernetes/tests_common.sh | 43 +++++++++++++++----- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/tests/integration/kubernetes/tests_common.sh b/tests/integration/kubernetes/tests_common.sh index 48ece4e583..5b79908690 100644 --- a/tests/integration/kubernetes/tests_common.sh +++ b/tests/integration/kubernetes/tests_common.sh @@ -416,35 +416,56 @@ teardown_common() { fi } -# Invoke "kubectl exec", log its output, and check that a grep pattern is present in the output. +# Execute a command in a pod and grep kubectl's output. # -# Retry "kubectl exec" several times in case it unexpectedly returns an empty output string, -# in an attempt to work around issues similar to https://github.com/kubernetes/kubernetes/issues/124571. +# This function retries "kubectl exec" several times, if: +# - kubectl returns a failure exit code, or +# - kubectl exits successfully but produces empty console output. +# These retries are an attempt to work around issues similar to https://github.com/kubernetes/kubernetes/issues/124571. # # Parameters: # $1 - pod name # $2 - the grep pattern # $3+ - the command to execute using "kubectl exec" # +# Exit code: +# Equal to grep's exit code grep_pod_exec_output() { local -r pod_name="$1" shift local -r grep_arg="$1" shift - local grep_out="" + pod_exec_with_retries "${pod_name}" "$@" | grep "${grep_arg}" +} + +# Execute a command in a pod and echo kubectl's output to stdout. +# +# This function retries "kubectl exec" several times, if: +# - kubectl returns a failure exit code, or +# - kubectl exits successfully but produces empty console output. +# These retries are an attempt to work around issues similar to https://github.com/kubernetes/kubernetes/issues/124571. +# +# Parameters: +# $1 - pod name +# $2+ - the command to execute using "kubectl exec" +# +# Exit code: +# 0 +pod_exec_with_retries() { + local -r pod_name="$1" + shift local cmd_out="" for _ in {1..10}; do - info "Executing in pod ${pod_name}: $*" - cmd_out=$(kubectl exec "${pod_name}" -- "$@") + bats_unbuffered_info "Executing in pod ${pod_name}: $*" + cmd_out=$(kubectl exec "${pod_name}" -- "$@") || (bats_unbuffered_info "kubectl exec failed" ; cmd_out="") if [[ -n "${cmd_out}" ]]; then - info "command output: ${cmd_out}" - grep_out=$(echo "${cmd_out}" | grep "${grep_arg}") - info "grep output: ${grep_out}" + bats_unbuffered_info "command output: ${cmd_out}" break fi - warn "Empty output from kubectl exec" + bats_unbuffered_info "Warning: empty output from kubectl exec" sleep 1 done - [[ -n "${grep_out}" ]] + + echo "${cmd_out}" } From 91c3804959e75c8425b865b09f7d5f216d99200f Mon Sep 17 00:00:00 2001 From: Dan Mihai Date: Thu, 18 Sep 2025 22:20:14 +0000 Subject: [PATCH 2/4] tests: k8s: add container_exec_with_retries() Add container_exec_with_retries(), useful for retrying if needed commands similar to: kubectl exec -c -- Signed-off-by: Dan Mihai --- tests/integration/kubernetes/tests_common.sh | 42 ++++++++++++++++++-- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/tests/integration/kubernetes/tests_common.sh b/tests/integration/kubernetes/tests_common.sh index 5b79908690..7d254d7d75 100644 --- a/tests/integration/kubernetes/tests_common.sh +++ b/tests/integration/kubernetes/tests_common.sh @@ -454,17 +454,51 @@ grep_pod_exec_output() { pod_exec_with_retries() { local -r pod_name="$1" shift + local -r container_name="" + + container_exec_with_retries "${pod_name}" "${container_name}" "$@" +} + +# Execute a command in a pod's container and echo kubectl's output to stdout. +# +# If the caller specifies an empty container name as parameter, the command is executed in pod's default container, +# or in pod's first container if there is no default. +# +# This function retries "kubectl exec" several times, if: +# - kubectl returns a failure exit code, or +# - kubectl exits successfully but produces empty console output. +# These retries are an attempt to work around issues similar to https://github.com/kubernetes/kubernetes/issues/124571. +# +# Parameters: +# $1 - pod name +# $2 - container name +# $3+ - the command to execute using "kubectl exec" +# +# Exit code: +# 0 +container_exec_with_retries() { + local -r pod_name="$1" + shift + local -r container_name="$1" + shift local cmd_out="" for _ in {1..10}; do - bats_unbuffered_info "Executing in pod ${pod_name}: $*" - cmd_out=$(kubectl exec "${pod_name}" -- "$@") || (bats_unbuffered_info "kubectl exec failed" ; cmd_out="") + if [[ -n "${container_name}" ]]; then + bats_unbuffered_info "Executing in pod ${pod_name}, container ${container_name}: $*" + cmd_out=$(kubectl exec "${pod_name}" -c "${container_name}" -- "$@") || (bats_unbuffered_info "kubectl exec failed" ; cmd_out="") + else + bats_unbuffered_info "Executing in pod ${pod_name}: $*" + cmd_out=$(kubectl exec "${pod_name}" -- "$@") || (bats_unbuffered_info "kubectl exec failed" ; cmd_out="") + fi + if [[ -n "${cmd_out}" ]]; then bats_unbuffered_info "command output: ${cmd_out}" break + else + bats_unbuffered_info "Warning: empty output from kubectl exec" + sleep 1 fi - bats_unbuffered_info "Warning: empty output from kubectl exec" - sleep 1 done echo "${cmd_out}" From 01c7949bfdbddfa74481f2ddd9eb2ef528122462 Mon Sep 17 00:00:00 2001 From: Dan Mihai Date: Thu, 18 Sep 2025 21:33:04 +0000 Subject: [PATCH 3/4] tests: k8s-number-cpus: retry on kubectl exec error Retry after "kubectl exec" failure, instead of aborting the test immediately. Example of recent error: https://github.com/kata-containers/kata-containers/actions/runs/17813996758/job/50644372056 not ok 1 Check number of cpus ... error: Internal error occurred: error sending request: Post "https://10.224.0.4:10250/exec/kata-containers-k8s-tests/cpu-test/c1?command=sh&command=-c&command= cat+%!F(MISSING)proc%!F(MISSING)cpuinfo+%!C(MISSING)grep+processor%!C(MISSING)wc+-l&error=1&output=1": EOF Signed-off-by: Dan Mihai --- tests/integration/kubernetes/k8s-number-cpus.bats | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/integration/kubernetes/k8s-number-cpus.bats b/tests/integration/kubernetes/k8s-number-cpus.bats index adc1d4fd82..bbcaf469fe 100644 --- a/tests/integration/kubernetes/k8s-number-cpus.bats +++ b/tests/integration/kubernetes/k8s-number-cpus.bats @@ -29,19 +29,19 @@ setup() { # Skip on aarch64 due to missing cpu hotplug related functionality. @test "Check number of cpus" { + local -r retries="10" + local -r max_number_cpus="3" + local number_cpus="" + # Create pod kubectl create -f "${yaml_file}" # Check pod creation kubectl wait --for=condition=Ready --timeout=$timeout pod "$pod_name" - retries="10" - max_number_cpus="3" - for _ in $(seq 1 "$retries"); do # Get number of cpus - number_cpus=$(kubectl exec pod/"$pod_name" -c "$container_name" \ - -- "${exec_command[@]}") + number_cpus=$(container_exec_with_retries "$pod_name" "$container_name" "${exec_command[@]}") if [[ "$number_cpus" =~ ^[0-9]+$ ]]; then # Verify number of cpus [ "$number_cpus" -le "$max_number_cpus" ] From 524bf66cbc8c9c69e7bdd769d0ee16e1709ff529 Mon Sep 17 00:00:00 2001 From: Dan Mihai Date: Thu, 18 Sep 2025 21:52:51 +0000 Subject: [PATCH 4/4] tests: k8s-credentials-secrets: retry on exec error Retry after "kubectl exec" failure, instead of aborting the test immediately. Example of recent error: https://github.com/kata-containers/kata-containers/actions/runs/17828061309/job/50693999052?pr=11822 not ok 1 Credentials using secrets (in test file k8s-credentials-secrets.bats, line 59) `kubectl exec $pod_name -- "${pod_exec_command[@]}" | grep -w "username"' failed Signed-off-by: Dan Mihai --- tests/integration/kubernetes/k8s-credentials-secrets.bats | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/integration/kubernetes/k8s-credentials-secrets.bats b/tests/integration/kubernetes/k8s-credentials-secrets.bats index 7035c70fb9..dcc52093e8 100644 --- a/tests/integration/kubernetes/k8s-credentials-secrets.bats +++ b/tests/integration/kubernetes/k8s-credentials-secrets.bats @@ -55,8 +55,8 @@ setup() { kubectl wait --for=condition=Ready --timeout=$timeout pod "$pod_name" # List the files - kubectl exec $pod_name -- "${pod_exec_command[@]}" | grep -w "password" - kubectl exec $pod_name -- "${pod_exec_command[@]}" | grep -w "username" + pod_exec_with_retries "${pod_name}" "${pod_exec_command[@]}" | grep -w "password" + pod_exec_with_retries "${pod_name}" "${pod_exec_command[@]}" | grep -w "username" # Create a pod that has access to the secret data through environment variables kubectl create -f "${pod_env_yaml_file}" @@ -65,8 +65,8 @@ setup() { kubectl wait --for=condition=Ready --timeout=$timeout pod "$second_pod_name" # Display environment variables - kubectl exec $second_pod_name -- "${pod_env_exec_command[@]}" | grep -w "SECRET_USERNAME" - kubectl exec $second_pod_name -- "${pod_env_exec_command[@]}" | grep -w "SECRET_PASSWORD" + pod_exec_with_retries "${second_pod_name}" "${pod_env_exec_command[@]}" | grep -w "SECRET_USERNAME" + pod_exec_with_retries "${second_pod_name}" "${pod_env_exec_command[@]}" | grep -w "SECRET_PASSWORD" } teardown() {