tests: k8s: retry after kubectl exec error

Some of the k8s tests were already retrying if `kubectl exec` succeeded
but produced empty output. Perform the same retries on `kubectl exec`
error exit code too, instead of aborting the test immediately.

Example of recent exec error:

https://github.com/kata-containers/kata-containers/actions/runs/17813996758/job/50644372056

not ok 1 Check number of cpus
...
error: Internal error occurred: error sending request: Post
"https://10.224.0.4:10250/exec/kata-containers-k8s-tests/cpu-test/c1?command=sh&command=-c&command=
cat+%!F(MISSING)proc%!F(MISSING)cpuinfo+%!C(MISSING)grep+processor%!C(MISSING)wc+-l&error=1&output=1": EOF

Signed-off-by: Dan Mihai <dmihai@microsoft.com>
This commit is contained in:
Dan Mihai
2025-09-18 15:33:31 +00:00
parent 0fb40eda12
commit eec6c8b0c4

View File

@@ -416,35 +416,56 @@ teardown_common() {
fi
}
# Invoke "kubectl exec", log its output, and check that a grep pattern is present in the output.
# Execute a command in a pod and grep kubectl's output.
#
# Retry "kubectl exec" several times in case it unexpectedly returns an empty output string,
# in an attempt to work around issues similar to https://github.com/kubernetes/kubernetes/issues/124571.
# This function retries "kubectl exec" several times, if:
# - kubectl returns a failure exit code, or
# - kubectl exits successfully but produces empty console output.
# These retries are an attempt to work around issues similar to https://github.com/kubernetes/kubernetes/issues/124571.
#
# Parameters:
# $1 - pod name
# $2 - the grep pattern
# $3+ - the command to execute using "kubectl exec"
#
# Exit code:
# Equal to grep's exit code
grep_pod_exec_output() {
local -r pod_name="$1"
shift
local -r grep_arg="$1"
shift
local grep_out=""
pod_exec_with_retries "${pod_name}" "$@" | grep "${grep_arg}"
}
# Execute a command in a pod and echo kubectl's output to stdout.
#
# This function retries "kubectl exec" several times, if:
# - kubectl returns a failure exit code, or
# - kubectl exits successfully but produces empty console output.
# These retries are an attempt to work around issues similar to https://github.com/kubernetes/kubernetes/issues/124571.
#
# Parameters:
# $1 - pod name
# $2+ - the command to execute using "kubectl exec"
#
# Exit code:
# 0
pod_exec_with_retries() {
local -r pod_name="$1"
shift
local cmd_out=""
for _ in {1..10}; do
info "Executing in pod ${pod_name}: $*"
cmd_out=$(kubectl exec "${pod_name}" -- "$@")
bats_unbuffered_info "Executing in pod ${pod_name}: $*"
cmd_out=$(kubectl exec "${pod_name}" -- "$@") || (bats_unbuffered_info "kubectl exec failed" ; cmd_out="")
if [[ -n "${cmd_out}" ]]; then
info "command output: ${cmd_out}"
grep_out=$(echo "${cmd_out}" | grep "${grep_arg}")
info "grep output: ${grep_out}"
bats_unbuffered_info "command output: ${cmd_out}"
break
fi
warn "Empty output from kubectl exec"
bats_unbuffered_info "Warning: empty output from kubectl exec"
sleep 1
done
[[ -n "${grep_out}" ]]
echo "${cmd_out}"
}