tests: k8s-number-cpus: retry on kubectl exec error

Retry after "kubectl exec" failure, instead of aborting the test
immediately.

Example of recent error:

https://github.com/kata-containers/kata-containers/actions/runs/17813996758/job/50644372056

not ok 1 Check number of cpus
...
error: Internal error occurred: error sending request: Post
"https://10.224.0.4:10250/exec/kata-containers-k8s-tests/cpu-test/c1?command=sh&command=-c&command=
cat+%!F(MISSING)proc%!F(MISSING)cpuinfo+%!C(MISSING)grep+processor%!C(MISSING)wc+-l&error=1&output=1": EOF

Signed-off-by: Dan Mihai <dmihai@microsoft.com>
This commit is contained in:
Dan Mihai
2025-09-18 21:33:04 +00:00
parent 91c3804959
commit 01c7949bfd

View File

@@ -29,19 +29,19 @@ setup() {
# Skip on aarch64 due to missing cpu hotplug related functionality. # Skip on aarch64 due to missing cpu hotplug related functionality.
@test "Check number of cpus" { @test "Check number of cpus" {
local -r retries="10"
local -r max_number_cpus="3"
local number_cpus=""
# Create pod # Create pod
kubectl create -f "${yaml_file}" kubectl create -f "${yaml_file}"
# Check pod creation # Check pod creation
kubectl wait --for=condition=Ready --timeout=$timeout pod "$pod_name" kubectl wait --for=condition=Ready --timeout=$timeout pod "$pod_name"
retries="10"
max_number_cpus="3"
for _ in $(seq 1 "$retries"); do for _ in $(seq 1 "$retries"); do
# Get number of cpus # Get number of cpus
number_cpus=$(kubectl exec pod/"$pod_name" -c "$container_name" \ number_cpus=$(container_exec_with_retries "$pod_name" "$container_name" "${exec_command[@]}")
-- "${exec_command[@]}")
if [[ "$number_cpus" =~ ^[0-9]+$ ]]; then if [[ "$number_cpus" =~ ^[0-9]+$ ]]; then
# Verify number of cpus # Verify number of cpus
[ "$number_cpus" -le "$max_number_cpus" ] [ "$number_cpus" -le "$max_number_cpus" ]