mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-07-31 23:36:12 +00:00
tests: Use custom-node-debugger pod
With #10232 merged, we now have a persistent node debugger pod throughout the test. As a result, there’s no need to spawn another debugger pod using `kubectl debug`, which could lead to false negatives due to premature pod termination, as reported in #10081. This commit removes the `print_node_journal()` call that uses `kubectl debug` and instead uses `exec_host()` to capture the host journal. The `exec_host()` function is relocated to `tests/integration/kubernetes/lib.sh` to prevent cyclical dependencies between `tests_common.sh` and `lib.sh`. Signed-off-by: Hyounggyu Choi <Hyounggyu.Choi@ibm.com>
This commit is contained in:
parent
2c2941122c
commit
c70588fafe
@ -95,6 +95,6 @@ teardown() {
|
||||
|
||||
if [[ -n "${node_start_time:-}" && -z "$BATS_TEST_COMPLETED" ]]; then
|
||||
echo "DEBUG: system logs of node '$node' since test start time ($node_start_time)"
|
||||
print_node_journal "$node" "kata" --since "$node_start_time" || true
|
||||
exec_host "${node}" journalctl -x -t "kata" --since '"'$node_start_time'"' || true
|
||||
fi
|
||||
}
|
||||
|
@ -5,6 +5,7 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
load "${BATS_TEST_DIRNAME}/lib.sh"
|
||||
load "${BATS_TEST_DIRNAME}/../../common.bash"
|
||||
load "${BATS_TEST_DIRNAME}/tests_common.sh"
|
||||
TEST_INITRD="${TEST_INITRD:-no}"
|
||||
|
@ -117,6 +117,6 @@ teardown() {
|
||||
|
||||
if [[ -n "${node_start_time:-}" && -z "$BATS_TEST_COMPLETED" ]]; then
|
||||
echo "DEBUG: system logs of node '$node' since test start time ($node_start_time)"
|
||||
print_node_journal "$node" "kata" --since "$node_start_time" || true
|
||||
exec_host "${node}" journalctl -x -t "kata" --since '"'$node_start_time'"' || true
|
||||
fi
|
||||
}
|
||||
|
@ -100,6 +100,6 @@ teardown() {
|
||||
|
||||
if [[ -n "${node_start_time:-}" && -z "$BATS_TEST_COMPLETED" ]]; then
|
||||
echo "DEBUG: system logs of node '$node' since test start time ($node_start_time)"
|
||||
print_node_journal "$node" "kata" --since "$node_start_time" || true
|
||||
exec_host "${node}" journalctl -x -t "kata" --since '"'$node_start_time'"' || true
|
||||
fi
|
||||
}
|
||||
|
@ -151,6 +151,6 @@ teardown() {
|
||||
|
||||
if [[ -n "${node_start_time:-}" && -z "$BATS_TEST_COMPLETED" ]]; then
|
||||
echo "DEBUG: system logs of node '$node' since test start time ($node_start_time)"
|
||||
print_node_journal "$node" "kata" --since "$node_start_time" || true
|
||||
exec_host "${node}" journalctl -x -t "kata" --since '"'$node_start_time'"' || true
|
||||
fi
|
||||
}
|
||||
|
@ -101,6 +101,6 @@ teardown() {
|
||||
|
||||
if [[ -n "${node_start_time:-}" && -z "$BATS_TEST_COMPLETED" ]]; then
|
||||
echo "DEBUG: system logs of node '$node' since test start time ($node_start_time)"
|
||||
print_node_journal "$node" "kata" --since "$node_start_time" || true
|
||||
exec_host "${node}" journalctl -x -t "kata" --since '"'$node_start_time'"' || true
|
||||
fi
|
||||
}
|
||||
|
@ -115,6 +115,6 @@ teardown() {
|
||||
|
||||
if [[ -n "${node_start_time:-}" && -z "$BATS_TEST_COMPLETED" ]]; then
|
||||
echo "DEBUG: system logs of node '$node' since test start time ($node_start_time)"
|
||||
print_node_journal "$node" "kata" --since "$node_start_time" || true
|
||||
exec_host "${node}" journalctl -x -t "kata" --since '"'$node_start_time'"' || true
|
||||
fi
|
||||
}
|
||||
|
@ -5,6 +5,7 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
load "${BATS_TEST_DIRNAME}/lib.sh"
|
||||
load "${BATS_TEST_DIRNAME}/../../common.bash"
|
||||
load "${BATS_TEST_DIRNAME}/tests_common.sh"
|
||||
TEST_INITRD="${TEST_INITRD:-no}"
|
||||
|
@ -64,6 +64,46 @@ k8s_create_pod() {
|
||||
fi
|
||||
}
|
||||
|
||||
# Runs a command in the host filesystem.
|
||||
#
|
||||
# Parameters:
|
||||
# $1 - the node name
|
||||
#
|
||||
exec_host() {
|
||||
local node="$1"
|
||||
# `kubectl debug` always returns 0, so we hack it to return the right exit code.
|
||||
local command="${@:2}"
|
||||
# Make 7 character hash from the node name
|
||||
local pod_name="custom-node-debugger-$(echo -n "$node" | sha1sum | cut -c1-7)"
|
||||
|
||||
# Run a debug pod
|
||||
# Check if there is an existing node debugger pod and reuse it
|
||||
# Otherwise, create a new one
|
||||
if ! kubectl get pod -n kube-system "${pod_name}" > /dev/null 2>&1; then
|
||||
POD_NAME="${pod_name}" NODE_NAME="${node}" envsubst < runtimeclass_workloads/custom-node-debugger.yaml | \
|
||||
kubectl apply -n kube-system -f - > /dev/null
|
||||
# Wait for the newly created pod to be ready
|
||||
kubectl wait pod -n kube-system --timeout="30s" --for=condition=ready "${pod_name}" > /dev/null
|
||||
# Manually check the exit status of the previous command to handle errors explicitly
|
||||
# since `set -e` is not enabled, allowing subsequent commands to run if needed.
|
||||
if [ $? -ne 0 ]; then
|
||||
return $?
|
||||
fi
|
||||
fi
|
||||
|
||||
# Execute the command and capture the output
|
||||
# We're trailing the `\r` here due to: https://github.com/kata-containers/kata-containers/issues/8051
|
||||
# tl;dr: When testing with CRI-O we're facing the following error:
|
||||
# ```
|
||||
# (from function `exec_host' in file tests_common.sh, line 51,
|
||||
# in test file k8s-file-volume.bats, line 25)
|
||||
# `exec_host "echo "$file_body" > $tmp_file"' failed with status 127
|
||||
# [bats-exec-test:38] INFO: k8s configured to use runtimeclass
|
||||
# bash: line 1: $'\r': command not found
|
||||
# ```
|
||||
kubectl exec -qi -n kube-system "${pod_name}" -- chroot /host bash -c "${command}" | tr -d '\r'
|
||||
}
|
||||
|
||||
# Check the logged messages on host have a given message.
|
||||
#
|
||||
# Parameters:
|
||||
@ -79,7 +119,7 @@ assert_logs_contain() {
|
||||
local message="$4"
|
||||
|
||||
# Note: with image-rs we get more than the default 1000 lines of logs
|
||||
print_node_journal "$node" "$log_id" --since "$datetime" | grep "$message"
|
||||
exec_host "${node}" journalctl -x -t $log_id --since '"'$datetime'"' | grep "$message"
|
||||
}
|
||||
|
||||
# Create a pod then assert it fails to run. Use in tests that you expect the
|
||||
@ -262,27 +302,6 @@ set_node() {
|
||||
"${yaml}"
|
||||
}
|
||||
|
||||
# Get the systemd's journal from a worker node
|
||||
#
|
||||
# Parameters:
|
||||
# $1 - the k8s worker node name
|
||||
# $2 - the syslog identifier as in journalctl's -t option
|
||||
# $N - (optional) any extra parameters to journalctl
|
||||
#
|
||||
print_node_journal() {
|
||||
local node="$1"
|
||||
local id="$2"
|
||||
shift 2
|
||||
local img="quay.io/prometheus/busybox"
|
||||
|
||||
kubectl debug --image "$img" -q -i "node/${node}" \
|
||||
-- chroot /host journalctl -x -t "$id" --no-pager "$@"
|
||||
# Delete the debugger pod
|
||||
kubectl get pods -o name | grep "node-debugger-${node}" | \
|
||||
xargs kubectl delete > /dev/null
|
||||
}
|
||||
|
||||
|
||||
# Get the sandbox id for kata container from a worker node
|
||||
#
|
||||
# Parameters:
|
||||
|
@ -76,46 +76,6 @@ get_one_kata_node() {
|
||||
echo "${resource_name/"node/"}"
|
||||
}
|
||||
|
||||
# Runs a command in the host filesystem.
|
||||
#
|
||||
# Parameters:
|
||||
# $1 - the node name
|
||||
#
|
||||
exec_host() {
|
||||
local node="$1"
|
||||
# `kubectl debug` always returns 0, so we hack it to return the right exit code.
|
||||
local command="${@:2}"
|
||||
# Make 7 character hash from the node name
|
||||
local pod_name="custom-node-debugger-$(echo -n "$node" | sha1sum | cut -c1-7)"
|
||||
|
||||
# Run a debug pod
|
||||
# Check if there is an existing node debugger pod and reuse it
|
||||
# Otherwise, create a new one
|
||||
if ! kubectl get pod -n kube-system "${pod_name}" > /dev/null 2>&1; then
|
||||
POD_NAME="${pod_name}" NODE_NAME="${node}" envsubst < runtimeclass_workloads/custom-node-debugger.yaml | \
|
||||
kubectl apply -n kube-system -f - > /dev/null
|
||||
# Wait for the newly created pod to be ready
|
||||
kubectl wait pod -n kube-system --timeout="30s" --for=condition=ready "${pod_name}" > /dev/null
|
||||
# Manually check the exit status of the previous command to handle errors explicitly
|
||||
# since `set -e` is not enabled, allowing subsequent commands to run if needed.
|
||||
if [ $? -ne 0 ]; then
|
||||
return $?
|
||||
fi
|
||||
fi
|
||||
|
||||
# Execute the command and capture the output
|
||||
# We're trailing the `\r` here due to: https://github.com/kata-containers/kata-containers/issues/8051
|
||||
# tl;dr: When testing with CRI-O we're facing the following error:
|
||||
# ```
|
||||
# (from function `exec_host' in file tests_common.sh, line 51,
|
||||
# in test file k8s-file-volume.bats, line 25)
|
||||
# `exec_host "echo "$file_body" > $tmp_file"' failed with status 127
|
||||
# [bats-exec-test:38] INFO: k8s configured to use runtimeclass
|
||||
# bash: line 1: $'\r': command not found
|
||||
# ```
|
||||
kubectl exec -qi -n kube-system "${pod_name}" -- chroot /host bash -c "${command}" | tr -d '\r'
|
||||
}
|
||||
|
||||
auto_generate_policy_enabled() {
|
||||
[ "${AUTO_GENERATE_POLICY}" == "yes" ]
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user