tests: Use custom-node-debugger pod

With #10232 merged, we now have a persistent node debugger pod throughout the test.
As a result, there’s no need to spawn another debugger pod using `kubectl debug`,
which could lead to false negatives due to premature pod termination, as reported
in #10081.

This commit removes the `print_node_journal()` call that uses `kubectl debug` and
instead uses `exec_host()` to capture the host journal. The `exec_host()` function
is relocated to `tests/integration/kubernetes/lib.sh` to prevent cyclical dependencies
between `tests_common.sh` and `lib.sh`.

Signed-off-by: Hyounggyu Choi <Hyounggyu.Choi@ibm.com>
This commit is contained in:
Hyounggyu Choi 2024-09-24 16:11:17 +02:00
parent 2c2941122c
commit c70588fafe
10 changed files with 49 additions and 68 deletions

View File

@ -95,6 +95,6 @@ teardown() {
if [[ -n "${node_start_time:-}" && -z "$BATS_TEST_COMPLETED" ]]; then
echo "DEBUG: system logs of node '$node' since test start time ($node_start_time)"
print_node_journal "$node" "kata" --since "$node_start_time" || true
exec_host "${node}" journalctl -x -t "kata" --since '"'$node_start_time'"' || true
fi
}

View File

@ -5,6 +5,7 @@
# SPDX-License-Identifier: Apache-2.0
#
load "${BATS_TEST_DIRNAME}/lib.sh"
load "${BATS_TEST_DIRNAME}/../../common.bash"
load "${BATS_TEST_DIRNAME}/tests_common.sh"
TEST_INITRD="${TEST_INITRD:-no}"

View File

@ -117,6 +117,6 @@ teardown() {
if [[ -n "${node_start_time:-}" && -z "$BATS_TEST_COMPLETED" ]]; then
echo "DEBUG: system logs of node '$node' since test start time ($node_start_time)"
print_node_journal "$node" "kata" --since "$node_start_time" || true
exec_host "${node}" journalctl -x -t "kata" --since '"'$node_start_time'"' || true
fi
}

View File

@ -100,6 +100,6 @@ teardown() {
if [[ -n "${node_start_time:-}" && -z "$BATS_TEST_COMPLETED" ]]; then
echo "DEBUG: system logs of node '$node' since test start time ($node_start_time)"
print_node_journal "$node" "kata" --since "$node_start_time" || true
exec_host "${node}" journalctl -x -t "kata" --since '"'$node_start_time'"' || true
fi
}

View File

@ -151,6 +151,6 @@ teardown() {
if [[ -n "${node_start_time:-}" && -z "$BATS_TEST_COMPLETED" ]]; then
echo "DEBUG: system logs of node '$node' since test start time ($node_start_time)"
print_node_journal "$node" "kata" --since "$node_start_time" || true
exec_host "${node}" journalctl -x -t "kata" --since '"'$node_start_time'"' || true
fi
}

View File

@ -101,6 +101,6 @@ teardown() {
if [[ -n "${node_start_time:-}" && -z "$BATS_TEST_COMPLETED" ]]; then
echo "DEBUG: system logs of node '$node' since test start time ($node_start_time)"
print_node_journal "$node" "kata" --since "$node_start_time" || true
exec_host "${node}" journalctl -x -t "kata" --since '"'$node_start_time'"' || true
fi
}

View File

@ -115,6 +115,6 @@ teardown() {
if [[ -n "${node_start_time:-}" && -z "$BATS_TEST_COMPLETED" ]]; then
echo "DEBUG: system logs of node '$node' since test start time ($node_start_time)"
print_node_journal "$node" "kata" --since "$node_start_time" || true
exec_host "${node}" journalctl -x -t "kata" --since '"'$node_start_time'"' || true
fi
}

View File

@ -5,6 +5,7 @@
# SPDX-License-Identifier: Apache-2.0
#
load "${BATS_TEST_DIRNAME}/lib.sh"
load "${BATS_TEST_DIRNAME}/../../common.bash"
load "${BATS_TEST_DIRNAME}/tests_common.sh"
TEST_INITRD="${TEST_INITRD:-no}"

View File

@ -64,6 +64,46 @@ k8s_create_pod() {
fi
}
# Runs a command in the host filesystem.
#
# Parameters:
# $1 - the node name
#
exec_host() {
local node="$1"
# `kubectl debug` always returns 0, so we hack it to return the right exit code.
local command="${@:2}"
# Make 7 character hash from the node name
local pod_name="custom-node-debugger-$(echo -n "$node" | sha1sum | cut -c1-7)"
# Run a debug pod
# Check if there is an existing node debugger pod and reuse it
# Otherwise, create a new one
if ! kubectl get pod -n kube-system "${pod_name}" > /dev/null 2>&1; then
POD_NAME="${pod_name}" NODE_NAME="${node}" envsubst < runtimeclass_workloads/custom-node-debugger.yaml | \
kubectl apply -n kube-system -f - > /dev/null
# Wait for the newly created pod to be ready
kubectl wait pod -n kube-system --timeout="30s" --for=condition=ready "${pod_name}" > /dev/null
# Manually check the exit status of the previous command to handle errors explicitly
# since `set -e` is not enabled, allowing subsequent commands to run if needed.
if [ $? -ne 0 ]; then
return $?
fi
fi
# Execute the command and capture the output
# We're trailing the `\r` here due to: https://github.com/kata-containers/kata-containers/issues/8051
# tl;dr: When testing with CRI-O we're facing the following error:
# ```
# (from function `exec_host' in file tests_common.sh, line 51,
# in test file k8s-file-volume.bats, line 25)
# `exec_host "echo "$file_body" > $tmp_file"' failed with status 127
# [bats-exec-test:38] INFO: k8s configured to use runtimeclass
# bash: line 1: $'\r': command not found
# ```
kubectl exec -qi -n kube-system "${pod_name}" -- chroot /host bash -c "${command}" | tr -d '\r'
}
# Check the logged messages on host have a given message.
#
# Parameters:
@ -79,7 +119,7 @@ assert_logs_contain() {
local message="$4"
# Note: with image-rs we get more than the default 1000 lines of logs
print_node_journal "$node" "$log_id" --since "$datetime" | grep "$message"
exec_host "${node}" journalctl -x -t $log_id --since '"'$datetime'"' | grep "$message"
}
# Create a pod then assert it fails to run. Use in tests that you expect the
@ -262,27 +302,6 @@ set_node() {
"${yaml}"
}
# Get the systemd's journal from a worker node
#
# Parameters:
# $1 - the k8s worker node name
# $2 - the syslog identifier as in journalctl's -t option
# $N - (optional) any extra parameters to journalctl
#
print_node_journal() {
local node="$1"
local id="$2"
shift 2
local img="quay.io/prometheus/busybox"
kubectl debug --image "$img" -q -i "node/${node}" \
-- chroot /host journalctl -x -t "$id" --no-pager "$@"
# Delete the debugger pod
kubectl get pods -o name | grep "node-debugger-${node}" | \
xargs kubectl delete > /dev/null
}
# Get the sandbox id for kata container from a worker node
#
# Parameters:

View File

@ -76,46 +76,6 @@ get_one_kata_node() {
echo "${resource_name/"node/"}"
}
# Runs a command in the host filesystem.
#
# Parameters:
# $1 - the node name
#
exec_host() {
local node="$1"
# `kubectl debug` always returns 0, so we hack it to return the right exit code.
local command="${@:2}"
# Make 7 character hash from the node name
local pod_name="custom-node-debugger-$(echo -n "$node" | sha1sum | cut -c1-7)"
# Run a debug pod
# Check if there is an existing node debugger pod and reuse it
# Otherwise, create a new one
if ! kubectl get pod -n kube-system "${pod_name}" > /dev/null 2>&1; then
POD_NAME="${pod_name}" NODE_NAME="${node}" envsubst < runtimeclass_workloads/custom-node-debugger.yaml | \
kubectl apply -n kube-system -f - > /dev/null
# Wait for the newly created pod to be ready
kubectl wait pod -n kube-system --timeout="30s" --for=condition=ready "${pod_name}" > /dev/null
# Manually check the exit status of the previous command to handle errors explicitly
# since `set -e` is not enabled, allowing subsequent commands to run if needed.
if [ $? -ne 0 ]; then
return $?
fi
fi
# Execute the command and capture the output
# We're trailing the `\r` here due to: https://github.com/kata-containers/kata-containers/issues/8051
# tl;dr: When testing with CRI-O we're facing the following error:
# ```
# (from function `exec_host' in file tests_common.sh, line 51,
# in test file k8s-file-volume.bats, line 25)
# `exec_host "echo "$file_body" > $tmp_file"' failed with status 127
# [bats-exec-test:38] INFO: k8s configured to use runtimeclass
# bash: line 1: $'\r': command not found
# ```
kubectl exec -qi -n kube-system "${pod_name}" -- chroot /host bash -c "${command}" | tr -d '\r'
}
auto_generate_policy_enabled() {
[ "${AUTO_GENERATE_POLICY}" == "yes" ]
}