Merge pull request #10232 from BbolroC/fix-loop-device-for-exec_host

tests: Fix loop device handling for exec_host()
2025-09-16 22:39:01 +00:00 · 2024-09-23 08:15:03 +02:00
parent f1f5bef9ef 2d6ac3d85d
commit 8a893cd4ee
7 changed files with 89 additions and 53 deletions
--- a/tests/integration/kubernetes/confidential_common.sh
+++ b/tests/integration/kubernetes/confidential_common.sh
@@ -87,27 +87,30 @@ function is_confidential_hardware() {

 function create_loop_device(){
 	local loop_file="${1:-/tmp/trusted-image-storage.img}"
+	local node="$(get_one_kata_node)"
 	cleanup_loop_device "$loop_file"

-	sudo dd if=/dev/zero of=$loop_file bs=1M count=2500
-	sudo losetup -fP $loop_file >/dev/null 2>&1
-	local device=$(sudo losetup -j $loop_file | awk -F'[: ]' '{print $1}')
+	exec_host "$node" "dd if=/dev/zero of=$loop_file bs=1M count=2500"
+	exec_host "$node" "losetup -fP $loop_file >/dev/null 2>&1"
+	local device=$(exec_host "$node" losetup -j $loop_file | awk -F'[: ]' '{print $1}')
+
 	echo $device
 }

 function cleanup_loop_device(){
 	local loop_file="${1:-/tmp/trusted-image-storage.img}"
+	local node="$(get_one_kata_node)"
 	# Find all loop devices associated with $loop_file
-	local existed_devices=$(sudo losetup -j $loop_file | awk -F'[: ]' '{print $1}')
+	local existed_devices=$(exec_host "$node" losetup -j $loop_file | awk -F'[: ]' '{print $1}')

 	if [ -n "$existed_devices" ]; then
 		# Iterate over each found loop device and detach it
 		for d in $existed_devices; do
-			sudo losetup -d "$d" >/dev/null 2>&1
+			exec_host "$node" "losetup -d "$d" >/dev/null 2>&1"
 		done
 	fi

-	sudo rm -f "$loop_file" >/dev/null 2>&1 || true
+	exec_host "$node" "rm -f "$loop_file" >/dev/null 2>&1 || true"
 }

 # This function creates pod yaml. Parameters
--- a/tests/integration/kubernetes/k8s-file-volume.bats
+++ b/tests/integration/kubernetes/k8s-file-volume.bats
@@ -16,7 +16,8 @@ setup() {
 	pod_name="test-file-volume"
 	container_name="busybox-file-volume-container"
 	node="$(get_one_kata_node)"
-	tmp_file=$(exec_host "$node" mktemp /tmp/file-volume-test-foo.XXXXX)
+	tmp_file=$(mktemp -u /tmp/file-volume-test-foo.XXXXX)
+	exec_host "$node" touch $tmp_file
 	mount_path="/tmp/foo.txt"
 	file_body="test"
 	get_pod_config_dir
--- a/tests/integration/kubernetes/k8s-guest-pull-image.bats
+++ b/tests/integration/kubernetes/k8s-guest-pull-image.bats
@@ -92,10 +92,6 @@ setup() {
    # The image pulled in the guest will be downloaded and unpacked in the `/run/kata-containers/image` directory.
    # The tests will use `cryptsetup` to encrypt a block device and mount it at `/run/kata-containers/image`.

-    if [ "${KATA_HYPERVISOR}" = "qemu-coco-dev" ]; then
-        skip "skip this specific one due to issue https://github.com/kata-containers/kata-containers/issues/10133"
-    fi
-
    storage_config=$(mktemp "${BATS_FILE_TMPDIR}/$(basename "${storage_config_template}").XXX")
    local_device=$(create_loop_device)
    LOCAL_DEVICE="$local_device" NODE_NAME="$node" envsubst < "$storage_config_template" > "$storage_config"
@@ -110,6 +106,15 @@ setup() {
    pod_config=$(mktemp "${BATS_FILE_TMPDIR}/$(basename "${pod_config_template}").XXX")
    IMAGE="$image_pulled_time_less_than_default_time" NODE_NAME="$node" envsubst < "$pod_config_template" > "$pod_config"

+
+    # Set CreateContainerRequest timeout for qemu-coco-dev
+    if [ "${KATA_HYPERVISOR}" == "qemu-coco-dev" ]; then
+        create_container_timeout=300
+        set_metadata_annotation "$pod_config" \
+            "io.katacontainers.config.runtime.create_container_timeout" \
+            "${create_container_timeout}"
+    fi
+
    # Enable dm-integrity in guest
    set_metadata_annotation "${pod_config}" \
        "io.katacontainers.config.hypervisor.kernel_params" \
@@ -125,15 +130,13 @@ setup() {
    cat $pod_config

    add_allow_all_policy_to_yaml "$pod_config"
-    k8s_create_pod "$pod_config"
+    local wait_time=120
+    [ "${KATA_HYPERVISOR}" == "qemu-coco-dev" ] && wait_time=300
+    k8s_create_pod "$pod_config" "$wait_time"
 }

@test "Test we cannot pull a large image that pull time exceeds createcontainer timeout inside the guest" {

-    if [ "${KATA_HYPERVISOR}" = "qemu-coco-dev" ]; then
-        skip "skip this specific one due to issue https://github.com/kata-containers/kata-containers/issues/10133"
-    fi
-
    storage_config=$(mktemp "${BATS_FILE_TMPDIR}/$(basename "${storage_config_template}").XXX")
    local_device=$(create_loop_device)
    LOCAL_DEVICE="$local_device" NODE_NAME="$node" envsubst < "$storage_config_template" > "$storage_config"
@@ -176,8 +179,8 @@ setup() {

@test "Test we can pull a large image inside the guest with large createcontainer timeout" {

-    if [ "${KATA_HYPERVISOR}" = "qemu-coco-dev" ]; then
-        skip "skip this specific one due to issue https://github.com/kata-containers/kata-containers/issues/10133"
+    if [ "${KATA_HYPERVISOR}" = "qemu-coco-dev" ] && [ "${KBS_INGRESS}" = "aks" ]; then
+        skip "skip this specific one due to issue https://github.com/kata-containers/kata-containers/issues/10299"
    fi
    storage_config=$(mktemp "${BATS_FILE_TMPDIR}/$(basename "${storage_config_template}").XXX")
    local_device=$(create_loop_device)
@@ -195,6 +198,7 @@ setup() {

    # Set CreateContainerRequest timeout in the annotation to pull large image in guest
    create_container_timeout=120
+    [ "${KATA_HYPERVISOR}" == "qemu-coco-dev" ] && create_container_timeout=600
    set_metadata_annotation "$pod_config" \
        "io.katacontainers.config.runtime.create_container_timeout" \
        "${create_container_timeout}"
@@ -214,7 +218,9 @@ setup() {
    cat $pod_config

    add_allow_all_policy_to_yaml "$pod_config"
-    k8s_create_pod "$pod_config"
+    local wait_time=120
+    [ "${KATA_HYPERVISOR}" == "qemu-coco-dev" ] && wait_time=600
+    k8s_create_pod "$pod_config" "$wait_time"
 }

 teardown() {
--- a/tests/integration/kubernetes/k8s-volume.bats
+++ b/tests/integration/kubernetes/k8s-volume.bats
@@ -16,7 +16,8 @@ setup() {
 	get_pod_config_dir

 	node=$(get_one_kata_node)
-	tmp_file=$(exec_host "$node" mktemp -d /tmp/data.XXXX)
+	tmp_file=$(mktemp -u /tmp/data.XXXX)
+	exec_host "$node" mkdir $tmp_file
 	pv_yaml=$(mktemp --tmpdir pv_config.XXXXXX.yaml)
 	pod_yaml=$(mktemp --tmpdir pod_config.XXXXXX.yaml)
 	msg="Hello from Kubernetes"
--- a/tests/integration/kubernetes/run_kubernetes_tests.sh
+++ b/tests/integration/kubernetes/run_kubernetes_tests.sh
@@ -129,6 +129,11 @@ do
 	fi
 done

+# Clean up all node debugger pods whose name starts with `custom-node-debugger` if pods exist
+pods_to_be_deleted=$(kubectl get pods -n kube-system --no-headers -o custom-columns=:metadata.name \
+	| grep '^custom-node-debugger' || true)
+[ -n "$pods_to_be_deleted" ] && kubectl delete pod -n kube-system $pods_to_be_deleted || true
+
 [ ${#tests_fail[@]} -ne 0 ] && die "Tests FAILED from suites: ${tests_fail[*]}"

 info "All tests SUCCEEDED"
--- a/tests/integration/kubernetes/runtimeclass_workloads/custom-node-debugger.yaml
+++ b/tests/integration/kubernetes/runtimeclass_workloads/custom-node-debugger.yaml
@@ -0,0 +1,37 @@
+#
+# Copyright (c) IBM Corp. 2024
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+apiVersion: v1
+kind: Pod
+metadata:
+  name: ${POD_NAME}
+spec:
+  affinity:
+    nodeAffinity:
+      requiredDuringSchedulingIgnoredDuringExecution:
+        nodeSelectorTerms:
+        - matchExpressions:
+          - key: kubernetes.io/hostname
+            operator: In
+            values:
+            - ${NODE_NAME}
+  containers:
+  - name: node-debugger-container
+    image: quay.io/bedrock/ubuntu:latest
+    command: ["/bin/sh", "-c", "sleep infinity"]
+    stdin: true
+    tty: true
+    securityContext:
+      privileged: true
+      runAsUser: 0
+      allowPrivilegeEscalation: true
+    volumeMounts:
+    - name: host-root
+      mountPath: /host
+  volumes:
+  - name: host-root
+    hostPath:
+      path: /
+      type: Directory
--- a/tests/integration/kubernetes/tests_common.sh
+++ b/tests/integration/kubernetes/tests_common.sh
@@ -76,19 +76,6 @@ get_one_kata_node() {
 	echo "${resource_name/"node/"}"
 }

-# Get the new debugger pod that wasn't present in the old_pods array.
-get_new_debugger_pod() {
-    local old_pods=("$@")
-    local new_pod_list=($(kubectl get pods -o name | grep node-debugger))
-
-    for new_pod in "${new_pod_list[@]}"; do
-        if [[ ! " ${old_pods[*]} " =~ " ${new_pod} " ]]; then
-            echo "${new_pod}"
-            return
-        fi
-    done
-}
-
 # Runs a command in the host filesystem.
 #
 # Parameters:
@@ -98,19 +85,23 @@ exec_host() {
 	local node="$1"
 	# `kubectl debug` always returns 0, so we hack it to return the right exit code.
 	local command="${@:2}"
-	command+='; echo -en \\n$?'
-
-	# Get the already existing debugger pods
-	local old_debugger_pods=($(kubectl get pods -o name | grep node-debugger))
+	# Make 7 character hash from the node name
+	local pod_name="custom-node-debugger-$(echo -n "$node" | sha1sum | cut -c1-7)"

 	# Run a debug pod
-	kubectl debug -q "node/${node}" --image=quay.io/bedrock/ubuntu:latest -- chroot /host bash -c "sleep infinity" >&2
-
-	# Identify the new debugger pod
-	local new_debugger_pod=$(get_new_debugger_pod "${old_debugger_pods[@]}")
-
-	# Wait for the newly created pod to be ready
-	kubectl wait --timeout="30s" --for=condition=ready "${new_debugger_pod}" > /dev/null
+	# Check if there is an existing node debugger pod and reuse it
+	# Otherwise, create a new one
+	if ! kubectl get pod -n kube-system "${pod_name}" > /dev/null 2>&1; then
+		POD_NAME="${pod_name}" NODE_NAME="${node}" envsubst < runtimeclass_workloads/custom-node-debugger.yaml | \
+			kubectl apply -n kube-system -f - > /dev/null
+		# Wait for the newly created pod to be ready
+		kubectl wait pod -n kube-system --timeout="30s" --for=condition=ready "${pod_name}" > /dev/null
+		# Manually check the exit status of the previous command to handle errors explicitly
+		# since `set -e` is not enabled, allowing subsequent commands to run if needed.
+		if [ $? -ne 0 ]; then
+			return $?
+		fi
+	fi

 	# Execute the command and capture the output
 	# We're trailing the `\r` here due to: https://github.com/kata-containers/kata-containers/issues/8051
@@ -122,15 +113,7 @@ exec_host() {
 	# [bats-exec-test:38] INFO: k8s configured to use runtimeclass
 	# bash: line 1: $'\r': command not found
 	# ```
-	local output="$(kubectl exec -qi "${new_debugger_pod}" -- chroot /host bash -c "${command}" | tr -d '\r')"
-
-	# Delete the newly created pod
-	kubectl delete "${new_debugger_pod}" >&2
-
-	# Output the command result
-	local exit_code="$(echo "${output}" | tail -1)"
-	echo "$(echo "${output}" | head -n -1)"
-	return ${exit_code}
+	kubectl exec -qi -n kube-system "${pod_name}" -- chroot /host bash -c "${command}" | tr -d '\r'
 }

 auto_generate_policy_enabled() {