From aedf14b244b739775a27841ae67a65cb6152ed9a Mon Sep 17 00:00:00 2001
From: Hyounggyu Choi <Hyounggyu.Choi@ibm.com>
Date: Mon, 2 Sep 2024 09:44:47 +0200
Subject: [PATCH 1/5] tests: Mimic node debugger with full privileges

This commit addresses an issue with handling loop devices
via a node debugger due to restricted privileges.
It runs a pod with full privileges, allowing it to mount
the host root to `/host`, similar to the node debugger.
This change enables us to run tests for trusted image storage
using the `qemu-coco-dev` runtime class.

Fixes: #10133

Signed-off-by: Hyounggyu Choi <Hyounggyu.Choi@ibm.com>
---
 .../custom-node-debugger.yaml                 | 37 +++++++++++++++++++
 tests/integration/kubernetes/tests_common.sh  |  2 +-
 2 files changed, 38 insertions(+), 1 deletion(-)
 create mode 100644 tests/integration/kubernetes/runtimeclass_workloads/custom-node-debugger.yaml

diff --git a/tests/integration/kubernetes/runtimeclass_workloads/custom-node-debugger.yaml b/tests/integration/kubernetes/runtimeclass_workloads/custom-node-debugger.yaml
new file mode 100644
index 0000000000..58a6a8cfae
--- /dev/null
+++ b/tests/integration/kubernetes/runtimeclass_workloads/custom-node-debugger.yaml
@@ -0,0 +1,37 @@
+#
+# Copyright (c) IBM Corp. 2024
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+apiVersion: v1
+kind: Pod
+metadata:
+  name: custom-node-debugger
+spec:
+  affinity:
+    nodeAffinity:
+      requiredDuringSchedulingIgnoredDuringExecution:
+        nodeSelectorTerms:
+        - matchExpressions:
+          - key: kubernetes.io/hostname
+            operator: In
+            values:
+            - ${NODE_NAME}
+  containers:
+  - name: node-debugger-container
+    image: quay.io/bedrock/ubuntu:latest
+    command: ["/bin/sh", "-c", "sleep infinity"]
+    stdin: true
+    tty: true
+    securityContext:
+      privileged: true
+      runAsUser: 0
+      allowPrivilegeEscalation: true
+    volumeMounts:
+    - name: host-root
+      mountPath: /host
+  volumes:
+  - name: host-root
+    hostPath:
+      path: /
+      type: Directory
diff --git a/tests/integration/kubernetes/tests_common.sh b/tests/integration/kubernetes/tests_common.sh
index c552f5bf2c..98a3c60c9e 100644
--- a/tests/integration/kubernetes/tests_common.sh
+++ b/tests/integration/kubernetes/tests_common.sh
@@ -104,7 +104,7 @@ exec_host() {
 	local old_debugger_pods=($(kubectl get pods -o name | grep node-debugger))
 
 	# Run a debug pod
-	kubectl debug -q "node/${node}" --image=quay.io/bedrock/ubuntu:latest -- chroot /host bash -c "sleep infinity" >&2
+	NODE_NAME="${node}" envsubst < runtimeclass_workloads/custom-node-debugger.yaml | kubectl apply -f - > /dev/null
 
 	# Identify the new debugger pod
 	local new_debugger_pod=$(get_new_debugger_pod "${old_debugger_pods[@]}")

From 374b8d2534c02834eaf92afc7cccc83ad8f97cd1 Mon Sep 17 00:00:00 2001
From: Hyounggyu Choi <Hyounggyu.Choi@ibm.com>
Date: Mon, 2 Sep 2024 10:11:20 +0200
Subject: [PATCH 2/5] tests: Create and delete node debugger pod only once

Creating and deleting a node debugger pod for every `exec_host()`
call is inefficient.
This commit changes the test suite to create and delete the pod
only once, globally.

Signed-off-by: Hyounggyu Choi <Hyounggyu.Choi@ibm.com>
---
 .../kubernetes/k8s-file-volume.bats           |  3 +-
 tests/integration/kubernetes/k8s-volume.bats  |  3 +-
 .../kubernetes/run_kubernetes_tests.sh        |  5 +++
 .../custom-node-debugger.yaml                 |  2 +-
 tests/integration/kubernetes/tests_common.sh  | 38 ++++++-------------
 5 files changed, 21 insertions(+), 30 deletions(-)

diff --git a/tests/integration/kubernetes/k8s-file-volume.bats b/tests/integration/kubernetes/k8s-file-volume.bats
index f35ab1decc..35891d1dc0 100644
--- a/tests/integration/kubernetes/k8s-file-volume.bats
+++ b/tests/integration/kubernetes/k8s-file-volume.bats
@@ -16,7 +16,8 @@ setup() {
 	pod_name="test-file-volume"
 	container_name="busybox-file-volume-container"
 	node="$(get_one_kata_node)"
-	tmp_file=$(exec_host "$node" mktemp /tmp/file-volume-test-foo.XXXXX)
+	tmp_file=$(mktemp -u /tmp/file-volume-test-foo.XXXXX)
+	exec_host "$node" touch $tmp_file
 	mount_path="/tmp/foo.txt"
 	file_body="test"
 	get_pod_config_dir
diff --git a/tests/integration/kubernetes/k8s-volume.bats b/tests/integration/kubernetes/k8s-volume.bats
index 4178f8b1e1..58c2b51c3a 100644
--- a/tests/integration/kubernetes/k8s-volume.bats
+++ b/tests/integration/kubernetes/k8s-volume.bats
@@ -16,7 +16,8 @@ setup() {
 	get_pod_config_dir
 
 	node=$(get_one_kata_node)
-	tmp_file=$(exec_host "$node" mktemp -d /tmp/data.XXXX)
+	tmp_file=$(mktemp -u /tmp/data.XXXX)
+	exec_host "$node" mkdir $tmp_file
 	pv_yaml=$(mktemp --tmpdir pv_config.XXXXXX.yaml)
 	pod_yaml=$(mktemp --tmpdir pod_config.XXXXXX.yaml)
 	msg="Hello from Kubernetes"
diff --git a/tests/integration/kubernetes/run_kubernetes_tests.sh b/tests/integration/kubernetes/run_kubernetes_tests.sh
index 67753849d1..ab12babc29 100755
--- a/tests/integration/kubernetes/run_kubernetes_tests.sh
+++ b/tests/integration/kubernetes/run_kubernetes_tests.sh
@@ -129,6 +129,11 @@ do
 	fi
 done
 
+# Clean up all node debugger pods whose name starts with `custom-node-debugger` if pods exist
+pods_to_be_deleted=$(kubectl get pods -n kube-system --no-headers -o custom-columns=:metadata.name \
+	| grep '^custom-node-debugger' || true)
+[ -n "$pods_to_be_deleted" ] && kubectl delete pod -n kube-system $pods_to_be_deleted || true
+
 [ ${#tests_fail[@]} -ne 0 ] && die "Tests FAILED from suites: ${tests_fail[*]}"
 
 info "All tests SUCCEEDED"
diff --git a/tests/integration/kubernetes/runtimeclass_workloads/custom-node-debugger.yaml b/tests/integration/kubernetes/runtimeclass_workloads/custom-node-debugger.yaml
index 58a6a8cfae..cb77fdfc7e 100644
--- a/tests/integration/kubernetes/runtimeclass_workloads/custom-node-debugger.yaml
+++ b/tests/integration/kubernetes/runtimeclass_workloads/custom-node-debugger.yaml
@@ -6,7 +6,7 @@
 apiVersion: v1
 kind: Pod
 metadata:
-  name: custom-node-debugger
+  name: ${POD_NAME}
 spec:
   affinity:
     nodeAffinity:
diff --git a/tests/integration/kubernetes/tests_common.sh b/tests/integration/kubernetes/tests_common.sh
index 98a3c60c9e..501a0fc42e 100644
--- a/tests/integration/kubernetes/tests_common.sh
+++ b/tests/integration/kubernetes/tests_common.sh
@@ -76,19 +76,6 @@ get_one_kata_node() {
 	echo "${resource_name/"node/"}"
 }
 
-# Get the new debugger pod that wasn't present in the old_pods array.
-get_new_debugger_pod() {
-    local old_pods=("$@")
-    local new_pod_list=($(kubectl get pods -o name | grep node-debugger))
-
-    for new_pod in "${new_pod_list[@]}"; do
-        if [[ ! " ${old_pods[*]} " =~ " ${new_pod} " ]]; then
-            echo "${new_pod}"
-            return
-        fi
-    done
-}
-
 # Runs a command in the host filesystem.
 #
 # Parameters:
@@ -99,18 +86,18 @@ exec_host() {
 	# `kubectl debug` always returns 0, so we hack it to return the right exit code.
 	local command="${@:2}"
 	command+='; echo -en \\n$?'
-
-	# Get the already existing debugger pods
-	local old_debugger_pods=($(kubectl get pods -o name | grep node-debugger))
+	# Make 7 character hash from the node name
+	local pod_name="custom-node-debugger-$(echo -n "$node" | sha1sum | cut -c1-7)"
 
 	# Run a debug pod
-	NODE_NAME="${node}" envsubst < runtimeclass_workloads/custom-node-debugger.yaml | kubectl apply -f - > /dev/null
-
-	# Identify the new debugger pod
-	local new_debugger_pod=$(get_new_debugger_pod "${old_debugger_pods[@]}")
-
-	# Wait for the newly created pod to be ready
-	kubectl wait --timeout="30s" --for=condition=ready "${new_debugger_pod}" > /dev/null
+	# Check if there is an existing node debugger pod and reuse it
+	# Otherwise, create a new one
+	if ! kubectl get pod -n kube-system "${pod_name}" > /dev/null 2>&1; then
+		POD_NAME="${pod_name}" NODE_NAME="${node}" envsubst < runtimeclass_workloads/custom-node-debugger.yaml | \
+			kubectl apply -n kube-system -f - > /dev/null
+		# Wait for the newly created pod to be ready
+		kubectl wait pod -n kube-system --timeout="30s" --for=condition=ready "${pod_name}" > /dev/null
+	fi
 
 	# Execute the command and capture the output
 	# We're trailing the `\r` here due to: https://github.com/kata-containers/kata-containers/issues/8051
@@ -122,10 +109,7 @@ exec_host() {
 	# [bats-exec-test:38] INFO: k8s configured to use runtimeclass
 	# bash: line 1: $'\r': command not found
 	# ```
-	local output="$(kubectl exec -qi "${new_debugger_pod}" -- chroot /host bash -c "${command}" | tr -d '\r')"
-
-	# Delete the newly created pod
-	kubectl delete "${new_debugger_pod}" >&2
+	local output="$(kubectl exec -qi -n kube-system "${pod_name}" -- chroot /host bash -c "${command}" | tr -d '\r')"
 
 	# Output the command result
 	local exit_code="$(echo "${output}" | tail -1)"

From 9cff9271bc8e9cf001c2706c56f5747c14d29e2b Mon Sep 17 00:00:00 2001
From: Hyounggyu Choi <Hyounggyu.Choi@ibm.com>
Date: Fri, 6 Sep 2024 14:12:30 +0200
Subject: [PATCH 3/5] tests: Run all commands in *_loop_device() using
 exec_host()

If the host running the tests is different from the host where the cluster is running,
the *_loop_device() functions do not work as expected because the device is created
on the test host, while the cluster expects the device to be local.

This commit ensures that all commands for the relevant functions are executed via exec_host()
so that a device should be handled on a cluster node.

Additionally, it modifies exec_host() to return the exit code of the last executed command
because the existing logic with `kubectl debug` sometimes includes unexpected characters
that are difficult to handle. `kubectl exec` appears to properly return the exit code for
a given command to it.

Signed-off-by: Hyounggyu Choi <Hyounggyu.Choi@ibm.com>
---
 .../integration/kubernetes/confidential_common.sh | 15 +++++++++------
 tests/integration/kubernetes/tests_common.sh      | 13 ++++++-------
 2 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/tests/integration/kubernetes/confidential_common.sh b/tests/integration/kubernetes/confidential_common.sh
index 5e5fb019ae..5b3e59ba71 100644
--- a/tests/integration/kubernetes/confidential_common.sh
+++ b/tests/integration/kubernetes/confidential_common.sh
@@ -87,27 +87,30 @@ function is_confidential_hardware() {
 
 function create_loop_device(){
 	local loop_file="${1:-/tmp/trusted-image-storage.img}"
+	local node="$(get_one_kata_node)"
 	cleanup_loop_device "$loop_file"
 
-	sudo dd if=/dev/zero of=$loop_file bs=1M count=2500
-	sudo losetup -fP $loop_file >/dev/null 2>&1
-	local device=$(sudo losetup -j $loop_file | awk -F'[: ]' '{print $1}')
+	exec_host "$node" "dd if=/dev/zero of=$loop_file bs=1M count=2500"
+	exec_host "$node" "losetup -fP $loop_file >/dev/null 2>&1"
+	local device=$(exec_host "$node" losetup -j $loop_file | awk -F'[: ]' '{print $1}')
+
 	echo $device
 }
 
 function cleanup_loop_device(){
 	local loop_file="${1:-/tmp/trusted-image-storage.img}"
+	local node="$(get_one_kata_node)"
 	# Find all loop devices associated with $loop_file
-	local existed_devices=$(sudo losetup -j $loop_file | awk -F'[: ]' '{print $1}')
+	local existed_devices=$(exec_host "$node" losetup -j $loop_file | awk -F'[: ]' '{print $1}')
 
 	if [ -n "$existed_devices" ]; then
 		# Iterate over each found loop device and detach it
 		for d in $existed_devices; do
-			sudo losetup -d "$d" >/dev/null 2>&1
+			exec_host "$node" "losetup -d "$d" >/dev/null 2>&1"
 		done
 	fi
 
-	sudo rm -f "$loop_file" >/dev/null 2>&1 || true
+	exec_host "$node" "rm -f "$loop_file" >/dev/null 2>&1 || true"
 }
 
 # This function creates pod yaml. Parameters
diff --git a/tests/integration/kubernetes/tests_common.sh b/tests/integration/kubernetes/tests_common.sh
index 501a0fc42e..d21e40d0ec 100644
--- a/tests/integration/kubernetes/tests_common.sh
+++ b/tests/integration/kubernetes/tests_common.sh
@@ -85,7 +85,6 @@ exec_host() {
 	local node="$1"
 	# `kubectl debug` always returns 0, so we hack it to return the right exit code.
 	local command="${@:2}"
-	command+='; echo -en \\n$?'
 	# Make 7 character hash from the node name
 	local pod_name="custom-node-debugger-$(echo -n "$node" | sha1sum | cut -c1-7)"
 
@@ -97,6 +96,11 @@ exec_host() {
 			kubectl apply -n kube-system -f - > /dev/null
 		# Wait for the newly created pod to be ready
 		kubectl wait pod -n kube-system --timeout="30s" --for=condition=ready "${pod_name}" > /dev/null
+		# Manually check the exit status of the previous command to handle errors explicitly
+		# since `set -e` is not enabled, allowing subsequent commands to run if needed.
+		if [ $? -ne 0 ]; then
+			return $?
+		fi
 	fi
 
 	# Execute the command and capture the output
@@ -109,12 +113,7 @@ exec_host() {
 	# [bats-exec-test:38] INFO: k8s configured to use runtimeclass
 	# bash: line 1: $'\r': command not found
 	# ```
-	local output="$(kubectl exec -qi -n kube-system "${pod_name}" -- chroot /host bash -c "${command}" | tr -d '\r')"
-
-	# Output the command result
-	local exit_code="$(echo "${output}" | tail -1)"
-	echo "$(echo "${output}" | head -n -1)"
-	return ${exit_code}
+	kubectl exec -qi -n kube-system "${pod_name}" -- chroot /host bash -c "${command}" | tr -d '\r'
 }
 
 auto_generate_policy_enabled() {

From c6b86e88e4e89672e333ffb48c415903edbfe526 Mon Sep 17 00:00:00 2001
From: Hyounggyu Choi <Hyounggyu.Choi@ibm.com>
Date: Tue, 3 Sep 2024 13:22:20 +0200
Subject: [PATCH 4/5] tests: Increase timeouts for qemu-coco-dev in trusted
 image storage tests

Timeouts occur (e.g. `create_container_timeout` and `wait_time`)
when using qemu-coco-dev.
This commit increases these timeouts for the trusted image storage
test cases

Signed-off-by: Hyounggyu Choi <Hyounggyu.Choi@ibm.com>
---
 .../kubernetes/k8s-guest-pull-image.bats       | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/tests/integration/kubernetes/k8s-guest-pull-image.bats b/tests/integration/kubernetes/k8s-guest-pull-image.bats
index 2de58bd36e..46381810a9 100644
--- a/tests/integration/kubernetes/k8s-guest-pull-image.bats
+++ b/tests/integration/kubernetes/k8s-guest-pull-image.bats
@@ -110,6 +110,15 @@ setup() {
     pod_config=$(mktemp "${BATS_FILE_TMPDIR}/$(basename "${pod_config_template}").XXX")
     IMAGE="$image_pulled_time_less_than_default_time" NODE_NAME="$node" envsubst < "$pod_config_template" > "$pod_config"
 
+
+    # Set CreateContainerRequest timeout for qemu-coco-dev
+    if [ "${KATA_HYPERVISOR}" == "qemu-coco-dev" ]; then
+        create_container_timeout=300
+        set_metadata_annotation "$pod_config" \
+            "io.katacontainers.config.runtime.create_container_timeout" \
+            "${create_container_timeout}"
+    fi
+
     # Enable dm-integrity in guest
     set_metadata_annotation "${pod_config}" \
         "io.katacontainers.config.hypervisor.kernel_params" \
@@ -125,7 +134,9 @@ setup() {
     cat $pod_config
 
     add_allow_all_policy_to_yaml "$pod_config"
-    k8s_create_pod "$pod_config"
+    local wait_time=120
+    [ "${KATA_HYPERVISOR}" == "qemu-coco-dev" ] && wait_time=300
+    k8s_create_pod "$pod_config" "$wait_time"
 }
 
 @test "Test we cannot pull a large image that pull time exceeds createcontainer timeout inside the guest" {
@@ -195,6 +206,7 @@ setup() {
 
     # Set CreateContainerRequest timeout in the annotation to pull large image in guest
     create_container_timeout=120
+    [ "${KATA_HYPERVISOR}" == "qemu-coco-dev" ] && create_container_timeout=600
     set_metadata_annotation "$pod_config" \
         "io.katacontainers.config.runtime.create_container_timeout" \
         "${create_container_timeout}"
@@ -214,7 +226,9 @@ setup() {
     cat $pod_config
 
     add_allow_all_policy_to_yaml "$pod_config"
-    k8s_create_pod "$pod_config"
+    local wait_time=120
+    [ "${KATA_HYPERVISOR}" == "qemu-coco-dev" ] && wait_time=600
+    k8s_create_pod "$pod_config" "$wait_time"
 }
 
 teardown() {

From 2d6ac3d85d04778b5afbda7a30fa663bdf388ed7 Mon Sep 17 00:00:00 2001
From: Hyounggyu Choi <Hyounggyu.Choi@ibm.com>
Date: Mon, 2 Sep 2024 10:48:36 +0200
Subject: [PATCH 5/5] tests: Re-enable guest-pull-image tests for qemu-coco-dev

Now that the issue with handling loop devices has been resolved,
this commit re-enables the guest-pull-image tests for `qemu-coco-dev`.

Signed-off-by: Hyounggyu Choi <Hyounggyu.Choi@ibm.com>
---
 .../integration/kubernetes/k8s-guest-pull-image.bats | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/tests/integration/kubernetes/k8s-guest-pull-image.bats b/tests/integration/kubernetes/k8s-guest-pull-image.bats
index 46381810a9..4b2a896523 100644
--- a/tests/integration/kubernetes/k8s-guest-pull-image.bats
+++ b/tests/integration/kubernetes/k8s-guest-pull-image.bats
@@ -92,10 +92,6 @@ setup() {
     # The image pulled in the guest will be downloaded and unpacked in the `/run/kata-containers/image` directory.
     # The tests will use `cryptsetup` to encrypt a block device and mount it at `/run/kata-containers/image`.
 
-    if [ "${KATA_HYPERVISOR}" = "qemu-coco-dev" ]; then
-        skip "skip this specific one due to issue https://github.com/kata-containers/kata-containers/issues/10133"
-    fi
-
     storage_config=$(mktemp "${BATS_FILE_TMPDIR}/$(basename "${storage_config_template}").XXX")
     local_device=$(create_loop_device)
     LOCAL_DEVICE="$local_device" NODE_NAME="$node" envsubst < "$storage_config_template" > "$storage_config"
@@ -141,10 +137,6 @@ setup() {
 
 @test "Test we cannot pull a large image that pull time exceeds createcontainer timeout inside the guest" {
 
-    if [ "${KATA_HYPERVISOR}" = "qemu-coco-dev" ]; then
-        skip "skip this specific one due to issue https://github.com/kata-containers/kata-containers/issues/10133"
-    fi
-
     storage_config=$(mktemp "${BATS_FILE_TMPDIR}/$(basename "${storage_config_template}").XXX")
     local_device=$(create_loop_device)
     LOCAL_DEVICE="$local_device" NODE_NAME="$node" envsubst < "$storage_config_template" > "$storage_config"
@@ -187,8 +179,8 @@ setup() {
 
 @test "Test we can pull a large image inside the guest with large createcontainer timeout" {
 
-    if [ "${KATA_HYPERVISOR}" = "qemu-coco-dev" ]; then
-        skip "skip this specific one due to issue https://github.com/kata-containers/kata-containers/issues/10133"
+    if [ "${KATA_HYPERVISOR}" = "qemu-coco-dev" ] && [ "${KBS_INGRESS}" = "aks" ]; then
+        skip "skip this specific one due to issue https://github.com/kata-containers/kata-containers/issues/10299"
     fi
     storage_config=$(mktemp "${BATS_FILE_TMPDIR}/$(basename "${storage_config_template}").XXX")
     local_device=$(create_loop_device)