Merge pull request #11827 from microsoft/danmihai1/exec-retries

tests: k8s: retry kubectl exec
This commit is contained in:
Dan Mihai
2025-09-22 17:14:50 -07:00
committed by GitHub
3 changed files with 78 additions and 23 deletions

View File

@@ -55,8 +55,8 @@ setup() {
kubectl wait --for=condition=Ready --timeout=$timeout pod "$pod_name"
# List the files
kubectl exec $pod_name -- "${pod_exec_command[@]}" | grep -w "password"
kubectl exec $pod_name -- "${pod_exec_command[@]}" | grep -w "username"
pod_exec_with_retries "${pod_name}" "${pod_exec_command[@]}" | grep -w "password"
pod_exec_with_retries "${pod_name}" "${pod_exec_command[@]}" | grep -w "username"
# Create a pod that has access to the secret data through environment variables
kubectl create -f "${pod_env_yaml_file}"
@@ -65,8 +65,8 @@ setup() {
kubectl wait --for=condition=Ready --timeout=$timeout pod "$second_pod_name"
# Display environment variables
kubectl exec $second_pod_name -- "${pod_env_exec_command[@]}" | grep -w "SECRET_USERNAME"
kubectl exec $second_pod_name -- "${pod_env_exec_command[@]}" | grep -w "SECRET_PASSWORD"
pod_exec_with_retries "${second_pod_name}" "${pod_env_exec_command[@]}" | grep -w "SECRET_USERNAME"
pod_exec_with_retries "${second_pod_name}" "${pod_env_exec_command[@]}" | grep -w "SECRET_PASSWORD"
}
teardown() {

View File

@@ -29,19 +29,19 @@ setup() {
# Skip on aarch64 due to missing cpu hotplug related functionality.
@test "Check number of cpus" {
local -r retries="10"
local -r max_number_cpus="3"
local number_cpus=""
# Create pod
kubectl create -f "${yaml_file}"
# Check pod creation
kubectl wait --for=condition=Ready --timeout=$timeout pod "$pod_name"
retries="10"
max_number_cpus="3"
for _ in $(seq 1 "$retries"); do
# Get number of cpus
number_cpus=$(kubectl exec pod/"$pod_name" -c "$container_name" \
-- "${exec_command[@]}")
number_cpus=$(container_exec_with_retries "$pod_name" "$container_name" "${exec_command[@]}")
if [[ "$number_cpus" =~ ^[0-9]+$ ]]; then
# Verify number of cpus
[ "$number_cpus" -le "$max_number_cpus" ]

View File

@@ -416,35 +416,90 @@ teardown_common() {
fi
}
# Invoke "kubectl exec", log its output, and check that a grep pattern is present in the output.
# Execute a command in a pod and grep kubectl's output.
#
# Retry "kubectl exec" several times in case it unexpectedly returns an empty output string,
# in an attempt to work around issues similar to https://github.com/kubernetes/kubernetes/issues/124571.
# This function retries "kubectl exec" several times, if:
# - kubectl returns a failure exit code, or
# - kubectl exits successfully but produces empty console output.
# These retries are an attempt to work around issues similar to https://github.com/kubernetes/kubernetes/issues/124571.
#
# Parameters:
# $1 - pod name
# $2 - the grep pattern
# $3+ - the command to execute using "kubectl exec"
#
# Exit code:
# Equal to grep's exit code
grep_pod_exec_output() {
local -r pod_name="$1"
shift
local -r grep_arg="$1"
shift
local grep_out=""
pod_exec_with_retries "${pod_name}" "$@" | grep "${grep_arg}"
}
# Execute a command in a pod and echo kubectl's output to stdout.
#
# This function retries "kubectl exec" several times, if:
# - kubectl returns a failure exit code, or
# - kubectl exits successfully but produces empty console output.
# These retries are an attempt to work around issues similar to https://github.com/kubernetes/kubernetes/issues/124571.
#
# Parameters:
# $1 - pod name
# $2+ - the command to execute using "kubectl exec"
#
# Exit code:
# 0
pod_exec_with_retries() {
local -r pod_name="$1"
shift
local -r container_name=""
container_exec_with_retries "${pod_name}" "${container_name}" "$@"
}
# Execute a command in a pod's container and echo kubectl's output to stdout.
#
# If the caller specifies an empty container name as parameter, the command is executed in pod's default container,
# or in pod's first container if there is no default.
#
# This function retries "kubectl exec" several times, if:
# - kubectl returns a failure exit code, or
# - kubectl exits successfully but produces empty console output.
# These retries are an attempt to work around issues similar to https://github.com/kubernetes/kubernetes/issues/124571.
#
# Parameters:
# $1 - pod name
# $2 - container name
# $3+ - the command to execute using "kubectl exec"
#
# Exit code:
# 0
container_exec_with_retries() {
local -r pod_name="$1"
shift
local -r container_name="$1"
shift
local cmd_out=""
for _ in {1..10}; do
info "Executing in pod ${pod_name}: $*"
cmd_out=$(kubectl exec "${pod_name}" -- "$@")
if [[ -n "${cmd_out}" ]]; then
info "command output: ${cmd_out}"
grep_out=$(echo "${cmd_out}" | grep "${grep_arg}")
info "grep output: ${grep_out}"
break
if [[ -n "${container_name}" ]]; then
bats_unbuffered_info "Executing in pod ${pod_name}, container ${container_name}: $*"
cmd_out=$(kubectl exec "${pod_name}" -c "${container_name}" -- "$@") || (bats_unbuffered_info "kubectl exec failed" ; cmd_out="")
else
bats_unbuffered_info "Executing in pod ${pod_name}: $*"
cmd_out=$(kubectl exec "${pod_name}" -- "$@") || (bats_unbuffered_info "kubectl exec failed" ; cmd_out="")
fi
if [[ -n "${cmd_out}" ]]; then
bats_unbuffered_info "command output: ${cmd_out}"
break
else
bats_unbuffered_info "Warning: empty output from kubectl exec"
sleep 1
fi
warn "Empty output from kubectl exec"
sleep 1
done
[[ -n "${grep_out}" ]]
echo "${cmd_out}"
}