diff --git a/tests/integration/kubernetes/confidential_common.sh b/tests/integration/kubernetes/confidential_common.sh index 5e5fb019ae..5b3e59ba71 100644 --- a/tests/integration/kubernetes/confidential_common.sh +++ b/tests/integration/kubernetes/confidential_common.sh @@ -87,27 +87,30 @@ function is_confidential_hardware() { function create_loop_device(){ local loop_file="${1:-/tmp/trusted-image-storage.img}" + local node="$(get_one_kata_node)" cleanup_loop_device "$loop_file" - sudo dd if=/dev/zero of=$loop_file bs=1M count=2500 - sudo losetup -fP $loop_file >/dev/null 2>&1 - local device=$(sudo losetup -j $loop_file | awk -F'[: ]' '{print $1}') + exec_host "$node" "dd if=/dev/zero of=$loop_file bs=1M count=2500" + exec_host "$node" "losetup -fP $loop_file >/dev/null 2>&1" + local device=$(exec_host "$node" losetup -j $loop_file | awk -F'[: ]' '{print $1}') + echo $device } function cleanup_loop_device(){ local loop_file="${1:-/tmp/trusted-image-storage.img}" + local node="$(get_one_kata_node)" # Find all loop devices associated with $loop_file - local existed_devices=$(sudo losetup -j $loop_file | awk -F'[: ]' '{print $1}') + local existed_devices=$(exec_host "$node" losetup -j $loop_file | awk -F'[: ]' '{print $1}') if [ -n "$existed_devices" ]; then # Iterate over each found loop device and detach it for d in $existed_devices; do - sudo losetup -d "$d" >/dev/null 2>&1 + exec_host "$node" "losetup -d "$d" >/dev/null 2>&1" done fi - sudo rm -f "$loop_file" >/dev/null 2>&1 || true + exec_host "$node" "rm -f "$loop_file" >/dev/null 2>&1 || true" } # This function creates pod yaml. Parameters diff --git a/tests/integration/kubernetes/k8s-file-volume.bats b/tests/integration/kubernetes/k8s-file-volume.bats index f35ab1decc..35891d1dc0 100644 --- a/tests/integration/kubernetes/k8s-file-volume.bats +++ b/tests/integration/kubernetes/k8s-file-volume.bats @@ -16,7 +16,8 @@ setup() { pod_name="test-file-volume" container_name="busybox-file-volume-container" node="$(get_one_kata_node)" - tmp_file=$(exec_host "$node" mktemp /tmp/file-volume-test-foo.XXXXX) + tmp_file=$(mktemp -u /tmp/file-volume-test-foo.XXXXX) + exec_host "$node" touch $tmp_file mount_path="/tmp/foo.txt" file_body="test" get_pod_config_dir diff --git a/tests/integration/kubernetes/k8s-guest-pull-image.bats b/tests/integration/kubernetes/k8s-guest-pull-image.bats index 2de58bd36e..4b2a896523 100644 --- a/tests/integration/kubernetes/k8s-guest-pull-image.bats +++ b/tests/integration/kubernetes/k8s-guest-pull-image.bats @@ -92,10 +92,6 @@ setup() { # The image pulled in the guest will be downloaded and unpacked in the `/run/kata-containers/image` directory. # The tests will use `cryptsetup` to encrypt a block device and mount it at `/run/kata-containers/image`. - if [ "${KATA_HYPERVISOR}" = "qemu-coco-dev" ]; then - skip "skip this specific one due to issue https://github.com/kata-containers/kata-containers/issues/10133" - fi - storage_config=$(mktemp "${BATS_FILE_TMPDIR}/$(basename "${storage_config_template}").XXX") local_device=$(create_loop_device) LOCAL_DEVICE="$local_device" NODE_NAME="$node" envsubst < "$storage_config_template" > "$storage_config" @@ -110,6 +106,15 @@ setup() { pod_config=$(mktemp "${BATS_FILE_TMPDIR}/$(basename "${pod_config_template}").XXX") IMAGE="$image_pulled_time_less_than_default_time" NODE_NAME="$node" envsubst < "$pod_config_template" > "$pod_config" + + # Set CreateContainerRequest timeout for qemu-coco-dev + if [ "${KATA_HYPERVISOR}" == "qemu-coco-dev" ]; then + create_container_timeout=300 + set_metadata_annotation "$pod_config" \ + "io.katacontainers.config.runtime.create_container_timeout" \ + "${create_container_timeout}" + fi + # Enable dm-integrity in guest set_metadata_annotation "${pod_config}" \ "io.katacontainers.config.hypervisor.kernel_params" \ @@ -125,15 +130,13 @@ setup() { cat $pod_config add_allow_all_policy_to_yaml "$pod_config" - k8s_create_pod "$pod_config" + local wait_time=120 + [ "${KATA_HYPERVISOR}" == "qemu-coco-dev" ] && wait_time=300 + k8s_create_pod "$pod_config" "$wait_time" } @test "Test we cannot pull a large image that pull time exceeds createcontainer timeout inside the guest" { - if [ "${KATA_HYPERVISOR}" = "qemu-coco-dev" ]; then - skip "skip this specific one due to issue https://github.com/kata-containers/kata-containers/issues/10133" - fi - storage_config=$(mktemp "${BATS_FILE_TMPDIR}/$(basename "${storage_config_template}").XXX") local_device=$(create_loop_device) LOCAL_DEVICE="$local_device" NODE_NAME="$node" envsubst < "$storage_config_template" > "$storage_config" @@ -176,8 +179,8 @@ setup() { @test "Test we can pull a large image inside the guest with large createcontainer timeout" { - if [ "${KATA_HYPERVISOR}" = "qemu-coco-dev" ]; then - skip "skip this specific one due to issue https://github.com/kata-containers/kata-containers/issues/10133" + if [ "${KATA_HYPERVISOR}" = "qemu-coco-dev" ] && [ "${KBS_INGRESS}" = "aks" ]; then + skip "skip this specific one due to issue https://github.com/kata-containers/kata-containers/issues/10299" fi storage_config=$(mktemp "${BATS_FILE_TMPDIR}/$(basename "${storage_config_template}").XXX") local_device=$(create_loop_device) @@ -195,6 +198,7 @@ setup() { # Set CreateContainerRequest timeout in the annotation to pull large image in guest create_container_timeout=120 + [ "${KATA_HYPERVISOR}" == "qemu-coco-dev" ] && create_container_timeout=600 set_metadata_annotation "$pod_config" \ "io.katacontainers.config.runtime.create_container_timeout" \ "${create_container_timeout}" @@ -214,7 +218,9 @@ setup() { cat $pod_config add_allow_all_policy_to_yaml "$pod_config" - k8s_create_pod "$pod_config" + local wait_time=120 + [ "${KATA_HYPERVISOR}" == "qemu-coco-dev" ] && wait_time=600 + k8s_create_pod "$pod_config" "$wait_time" } teardown() { diff --git a/tests/integration/kubernetes/k8s-volume.bats b/tests/integration/kubernetes/k8s-volume.bats index 4178f8b1e1..58c2b51c3a 100644 --- a/tests/integration/kubernetes/k8s-volume.bats +++ b/tests/integration/kubernetes/k8s-volume.bats @@ -16,7 +16,8 @@ setup() { get_pod_config_dir node=$(get_one_kata_node) - tmp_file=$(exec_host "$node" mktemp -d /tmp/data.XXXX) + tmp_file=$(mktemp -u /tmp/data.XXXX) + exec_host "$node" mkdir $tmp_file pv_yaml=$(mktemp --tmpdir pv_config.XXXXXX.yaml) pod_yaml=$(mktemp --tmpdir pod_config.XXXXXX.yaml) msg="Hello from Kubernetes" diff --git a/tests/integration/kubernetes/run_kubernetes_tests.sh b/tests/integration/kubernetes/run_kubernetes_tests.sh index 67753849d1..ab12babc29 100755 --- a/tests/integration/kubernetes/run_kubernetes_tests.sh +++ b/tests/integration/kubernetes/run_kubernetes_tests.sh @@ -129,6 +129,11 @@ do fi done +# Clean up all node debugger pods whose name starts with `custom-node-debugger` if pods exist +pods_to_be_deleted=$(kubectl get pods -n kube-system --no-headers -o custom-columns=:metadata.name \ + | grep '^custom-node-debugger' || true) +[ -n "$pods_to_be_deleted" ] && kubectl delete pod -n kube-system $pods_to_be_deleted || true + [ ${#tests_fail[@]} -ne 0 ] && die "Tests FAILED from suites: ${tests_fail[*]}" info "All tests SUCCEEDED" diff --git a/tests/integration/kubernetes/runtimeclass_workloads/custom-node-debugger.yaml b/tests/integration/kubernetes/runtimeclass_workloads/custom-node-debugger.yaml new file mode 100644 index 0000000000..cb77fdfc7e --- /dev/null +++ b/tests/integration/kubernetes/runtimeclass_workloads/custom-node-debugger.yaml @@ -0,0 +1,37 @@ +# +# Copyright (c) IBM Corp. 2024 +# +# SPDX-License-Identifier: Apache-2.0 +# +apiVersion: v1 +kind: Pod +metadata: + name: ${POD_NAME} +spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: In + values: + - ${NODE_NAME} + containers: + - name: node-debugger-container + image: quay.io/bedrock/ubuntu:latest + command: ["/bin/sh", "-c", "sleep infinity"] + stdin: true + tty: true + securityContext: + privileged: true + runAsUser: 0 + allowPrivilegeEscalation: true + volumeMounts: + - name: host-root + mountPath: /host + volumes: + - name: host-root + hostPath: + path: / + type: Directory diff --git a/tests/integration/kubernetes/tests_common.sh b/tests/integration/kubernetes/tests_common.sh index c552f5bf2c..d21e40d0ec 100644 --- a/tests/integration/kubernetes/tests_common.sh +++ b/tests/integration/kubernetes/tests_common.sh @@ -76,19 +76,6 @@ get_one_kata_node() { echo "${resource_name/"node/"}" } -# Get the new debugger pod that wasn't present in the old_pods array. -get_new_debugger_pod() { - local old_pods=("$@") - local new_pod_list=($(kubectl get pods -o name | grep node-debugger)) - - for new_pod in "${new_pod_list[@]}"; do - if [[ ! " ${old_pods[*]} " =~ " ${new_pod} " ]]; then - echo "${new_pod}" - return - fi - done -} - # Runs a command in the host filesystem. # # Parameters: @@ -98,19 +85,23 @@ exec_host() { local node="$1" # `kubectl debug` always returns 0, so we hack it to return the right exit code. local command="${@:2}" - command+='; echo -en \\n$?' - - # Get the already existing debugger pods - local old_debugger_pods=($(kubectl get pods -o name | grep node-debugger)) + # Make 7 character hash from the node name + local pod_name="custom-node-debugger-$(echo -n "$node" | sha1sum | cut -c1-7)" # Run a debug pod - kubectl debug -q "node/${node}" --image=quay.io/bedrock/ubuntu:latest -- chroot /host bash -c "sleep infinity" >&2 - - # Identify the new debugger pod - local new_debugger_pod=$(get_new_debugger_pod "${old_debugger_pods[@]}") - - # Wait for the newly created pod to be ready - kubectl wait --timeout="30s" --for=condition=ready "${new_debugger_pod}" > /dev/null + # Check if there is an existing node debugger pod and reuse it + # Otherwise, create a new one + if ! kubectl get pod -n kube-system "${pod_name}" > /dev/null 2>&1; then + POD_NAME="${pod_name}" NODE_NAME="${node}" envsubst < runtimeclass_workloads/custom-node-debugger.yaml | \ + kubectl apply -n kube-system -f - > /dev/null + # Wait for the newly created pod to be ready + kubectl wait pod -n kube-system --timeout="30s" --for=condition=ready "${pod_name}" > /dev/null + # Manually check the exit status of the previous command to handle errors explicitly + # since `set -e` is not enabled, allowing subsequent commands to run if needed. + if [ $? -ne 0 ]; then + return $? + fi + fi # Execute the command and capture the output # We're trailing the `\r` here due to: https://github.com/kata-containers/kata-containers/issues/8051 @@ -122,15 +113,7 @@ exec_host() { # [bats-exec-test:38] INFO: k8s configured to use runtimeclass # bash: line 1: $'\r': command not found # ``` - local output="$(kubectl exec -qi "${new_debugger_pod}" -- chroot /host bash -c "${command}" | tr -d '\r')" - - # Delete the newly created pod - kubectl delete "${new_debugger_pod}" >&2 - - # Output the command result - local exit_code="$(echo "${output}" | tail -1)" - echo "$(echo "${output}" | head -n -1)" - return ${exit_code} + kubectl exec -qi -n kube-system "${pod_name}" -- chroot /host bash -c "${command}" | tr -d '\r' } auto_generate_policy_enabled() {