From 923162cecb606bb61035ccc49c441889bf879f8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Sun, 5 Apr 2026 23:46:48 +0200 Subject: [PATCH] ci: Add runtime-rs GPU shims to NVIDIA GPU CI workflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add qemu-nvidia-gpu-runtime-rs and qemu-nvidia-gpu-snp-runtime-rs to the NVIDIA GPU test matrix so CI covers the new runtime-rs shims. Introduce a `coco` boolean field in each matrix entry and use it for all CoCo-related conditionals (KBS, snapshotter, KBS deploy/cleanup steps). This replaces fragile name-string comparisons that were already broken for the runtime-rs variants: `nvidia-gpu (runtime-rs)` was incorrectly getting KBS steps, and `nvidia-gpu-snp (runtime-rs)` was not getting the right env vars. Signed-off-by: Fabiano FidĂȘncio --- .../run-k8s-tests-on-nvidia-gpu.yaml | 20 ++++++++++--------- tests/gha-run-k8s-common.sh | 6 +++--- .../kubernetes/confidential_common.sh | 2 +- tests/integration/kubernetes/gha-run.sh | 2 +- .../kubernetes/run_kubernetes_nv_tests.sh | 11 +++++++--- tests/integration/kubernetes/tests_common.sh | 2 +- 6 files changed, 25 insertions(+), 18 deletions(-) diff --git a/.github/workflows/run-k8s-tests-on-nvidia-gpu.yaml b/.github/workflows/run-k8s-tests-on-nvidia-gpu.yaml index 5a6da266b4..6be76646ea 100644 --- a/.github/workflows/run-k8s-tests-on-nvidia-gpu.yaml +++ b/.github/workflows/run-k8s-tests-on-nvidia-gpu.yaml @@ -37,8 +37,10 @@ jobs: fail-fast: false matrix: environment: [ - { name: nvidia-gpu, vmm: qemu-nvidia-gpu, runner: amd64-nvidia-a100 }, - { name: nvidia-gpu-snp, vmm: qemu-nvidia-gpu-snp, runner: amd64-nvidia-h100-snp }, + { name: nvidia-gpu, vmm: qemu-nvidia-gpu, runner: amd64-nvidia-a100, coco: false }, + { name: nvidia-gpu-runtime-rs, vmm: qemu-nvidia-gpu-runtime-rs, runner: amd64-nvidia-a100, coco: false }, + { name: nvidia-gpu-snp, vmm: qemu-nvidia-gpu-snp, runner: amd64-nvidia-h100-snp, coco: true }, + { name: nvidia-gpu-snp-runtime-rs, vmm: qemu-nvidia-gpu-snp-runtime-rs, runner: amd64-nvidia-h100-snp, coco: true }, ] runs-on: ${{ matrix.environment.runner }} env: @@ -48,9 +50,9 @@ jobs: GH_PR_NUMBER: ${{ inputs.pr-number }} KATA_HYPERVISOR: ${{ matrix.environment.vmm }} KUBERNETES: kubeadm - KBS: ${{ matrix.environment.name == 'nvidia-gpu-snp' && 'true' || 'false' }} - SNAPSHOTTER: ${{ matrix.environment.name == 'nvidia-gpu-snp' && 'nydus' || '' }} - USE_EXPERIMENTAL_SNAPSHOTTER_SETUP: ${{ matrix.environment.name == 'nvidia-gpu-snp' && 'true' || 'false' }} + KBS: ${{ matrix.environment.coco && 'true' || 'false' }} + SNAPSHOTTER: ${{ matrix.environment.coco && 'nydus' || '' }} + USE_EXPERIMENTAL_SNAPSHOTTER_SETUP: ${{ matrix.environment.coco && 'true' || 'false' }} K8S_TEST_HOST_TYPE: baremetal steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 @@ -75,12 +77,12 @@ jobs: run: bash tests/integration/kubernetes/gha-run.sh install-kata-tools kata-tools-artifacts - name: Uninstall previous `kbs-client` - if: matrix.environment.name != 'nvidia-gpu' + if: matrix.environment.coco timeout-minutes: 10 run: bash tests/integration/kubernetes/gha-run.sh uninstall-kbs-client - name: Deploy CoCo KBS - if: matrix.environment.name != 'nvidia-gpu' + if: matrix.environment.coco timeout-minutes: 10 run: bash tests/integration/kubernetes/gha-run.sh deploy-coco-kbs env: @@ -88,7 +90,7 @@ jobs: KBS_INGRESS: nodeport - name: Install `kbs-client` - if: matrix.environment.name != 'nvidia-gpu' + if: matrix.environment.coco timeout-minutes: 10 run: bash tests/integration/kubernetes/gha-run.sh install-kbs-client @@ -127,7 +129,7 @@ jobs: run: bash tests/integration/kubernetes/gha-run.sh cleanup - name: Delete CoCo KBS - if: always() && matrix.environment.name != 'nvidia-gpu' + if: always() && matrix.environment.coco timeout-minutes: 10 run: | bash tests/integration/kubernetes/gha-run.sh delete-coco-kbs diff --git a/tests/gha-run-k8s-common.sh b/tests/gha-run-k8s-common.sh index 6b6aa8a738..c0e3f90b09 100644 --- a/tests/gha-run-k8s-common.sh +++ b/tests/gha-run-k8s-common.sh @@ -691,7 +691,7 @@ function helm_helper() { # HELM_SHIMS is a space-separated list of shim names # Enable each shim and set supported architectures # TEE shims that need defaults unset (will be set based on env vars) - tee_shims="qemu-se qemu-se-runtime-rs qemu-cca qemu-snp qemu-snp-runtime-rs qemu-tdx qemu-tdx-runtime-rs qemu-coco-dev qemu-coco-dev-runtime-rs qemu-nvidia-gpu-snp qemu-nvidia-gpu-tdx" + tee_shims="qemu-se qemu-se-runtime-rs qemu-cca qemu-snp qemu-snp-runtime-rs qemu-tdx qemu-tdx-runtime-rs qemu-coco-dev qemu-coco-dev-runtime-rs qemu-nvidia-gpu-snp qemu-nvidia-gpu-tdx qemu-nvidia-gpu-snp-runtime-rs qemu-nvidia-gpu-tdx-runtime-rs" for shim in ${HELM_SHIMS}; do # Determine supported architectures based on shim name @@ -705,7 +705,7 @@ function helm_helper() { yq -i ".shims.${shim}.enabled = true" "${values_yaml}" yq -i ".shims.${shim}.supportedArches = [\"arm64\"]" "${values_yaml}" ;; - qemu-snp|qemu-snp-runtime-rs|qemu-tdx|qemu-tdx-runtime-rs|qemu-nvidia-gpu-snp|qemu-nvidia-gpu-tdx) + qemu-snp|qemu-snp-runtime-rs|qemu-tdx|qemu-tdx-runtime-rs|qemu-nvidia-gpu-snp|qemu-nvidia-gpu-tdx|qemu-nvidia-gpu-snp-runtime-rs|qemu-nvidia-gpu-tdx-runtime-rs) yq -i ".shims.${shim}.enabled = true" "${values_yaml}" yq -i ".shims.${shim}.supportedArches = [\"amd64\"]" "${values_yaml}" ;; @@ -717,7 +717,7 @@ function helm_helper() { yq -i ".shims.${shim}.enabled = true" "${values_yaml}" yq -i ".shims.${shim}.supportedArches = [\"amd64\", \"s390x\"]" "${values_yaml}" ;; - qemu-nvidia-gpu) + qemu-nvidia-gpu|qemu-nvidia-gpu-runtime-rs) yq -i ".shims.${shim}.enabled = true" "${values_yaml}" yq -i ".shims.${shim}.supportedArches = [\"amd64\", \"arm64\"]" "${values_yaml}" ;; diff --git a/tests/integration/kubernetes/confidential_common.sh b/tests/integration/kubernetes/confidential_common.sh index 46a07524f7..047a32b27d 100644 --- a/tests/integration/kubernetes/confidential_common.sh +++ b/tests/integration/kubernetes/confidential_common.sh @@ -10,7 +10,7 @@ source "${BATS_TEST_DIRNAME}/../../common.bash" load "${BATS_TEST_DIRNAME}/confidential_kbs.sh" -SUPPORTED_GPU_TEE_HYPERVISORS=("qemu-nvidia-gpu-snp" "qemu-nvidia-gpu-tdx") +SUPPORTED_GPU_TEE_HYPERVISORS=("qemu-nvidia-gpu-snp" "qemu-nvidia-gpu-tdx" "qemu-nvidia-gpu-snp-runtime-rs" "qemu-nvidia-gpu-tdx-runtime-rs") SUPPORTED_TEE_HYPERVISORS=("qemu-snp" "qemu-snp-runtime-rs" "qemu-tdx" "qemu-se" "qemu-se-runtime-rs" "${SUPPORTED_GPU_TEE_HYPERVISORS[@]}") SUPPORTED_NON_TEE_HYPERVISORS=("qemu-coco-dev" "qemu-coco-dev-runtime-rs") diff --git a/tests/integration/kubernetes/gha-run.sh b/tests/integration/kubernetes/gha-run.sh index b60a9e861f..8a46d32bb4 100755 --- a/tests/integration/kubernetes/gha-run.sh +++ b/tests/integration/kubernetes/gha-run.sh @@ -187,7 +187,7 @@ function deploy_kata() { # Workaround to avoid modifying the workflow yaml files case "${KATA_HYPERVISOR}" in - qemu-tdx|qemu-snp|qemu-snp-runtime-rs|qemu-nvidia-gpu-*) + qemu-tdx|qemu-snp|qemu-snp-runtime-rs|qemu-nvidia-gpu-tdx*|qemu-nvidia-gpu-snp*) USE_EXPERIMENTAL_SETUP_SNAPSHOTTER=true SNAPSHOTTER="nydus" EXPERIMENTAL_FORCE_GUEST_PULL=false diff --git a/tests/integration/kubernetes/run_kubernetes_nv_tests.sh b/tests/integration/kubernetes/run_kubernetes_nv_tests.sh index 5fe8981c28..82a3dd4907 100644 --- a/tests/integration/kubernetes/run_kubernetes_nv_tests.sh +++ b/tests/integration/kubernetes/run_kubernetes_nv_tests.sh @@ -14,7 +14,12 @@ source "${kubernetes_dir}/../../common.bash" # Enable NVRC trace logging for NVIDIA GPU runtime via drop-in config enable_nvrc_trace() { - local config_dir="/opt/kata/share/defaults/kata-containers/runtimes/${KATA_HYPERVISOR}/config.d" + local kata_config_base="/opt/kata/share/defaults/kata-containers" + case "${KATA_HYPERVISOR}" in + *-runtime-rs) kata_config_base="${kata_config_base}/runtime-rs" ;; + esac + + local config_dir="${kata_config_base}/runtimes/${KATA_HYPERVISOR}/config.d" local drop_in_file="${config_dir}/90-nvrc-trace.toml" local kernel_params_drop_in="${config_dir}/30-kernel-params.toml" @@ -30,7 +35,7 @@ enable_nvrc_trace() { if [[ -f "${kernel_params_drop_in}" ]]; then base_params=$(grep -E '^kernel_params\s*=' "${kernel_params_drop_in}" | sed 's/^kernel_params\s*=\s*"\(.*\)"/\1/' || true) else - local runtime_config="/opt/kata/share/defaults/kata-containers/runtimes/${KATA_HYPERVISOR}/configuration-${KATA_HYPERVISOR}.toml" + local runtime_config="${kata_config_base}/runtimes/${KATA_HYPERVISOR}/configuration-${KATA_HYPERVISOR}.toml" if [[ -f "${runtime_config}" ]]; then base_params=$(grep -E '^kernel_params\s*=' "${runtime_config}" | sed 's/^kernel_params\s*=\s*"\(.*\)"/\1/' || true) fi @@ -93,7 +98,7 @@ else "k8s-nvidia-nim-service.bats") fi -SUPPORTED_HYPERVISORS=("qemu-nvidia-gpu" "qemu-nvidia-gpu-snp" "qemu-nvidia-gpu-tdx") +SUPPORTED_HYPERVISORS=("qemu-nvidia-gpu" "qemu-nvidia-gpu-snp" "qemu-nvidia-gpu-tdx" "qemu-nvidia-gpu-runtime-rs" "qemu-nvidia-gpu-snp-runtime-rs" "qemu-nvidia-gpu-tdx-runtime-rs") export KATA_HYPERVISOR="${KATA_HYPERVISOR:-qemu-nvidia-gpu}" # shellcheck disable=SC2076 # intentionally use literal string matching if [[ ! " ${SUPPORTED_HYPERVISORS[*]} " =~ " ${KATA_HYPERVISOR} " ]]; then diff --git a/tests/integration/kubernetes/tests_common.sh b/tests/integration/kubernetes/tests_common.sh index f1027fdb4b..6d8817f220 100644 --- a/tests/integration/kubernetes/tests_common.sh +++ b/tests/integration/kubernetes/tests_common.sh @@ -82,7 +82,7 @@ auto_generate_policy_enabled() { is_coco_platform() { case "${KATA_HYPERVISOR}" in - "qemu-tdx"|"qemu-snp"|"qemu-snp-runtime-rs"|"qemu-coco-dev"|"qemu-coco-dev-runtime-rs"|"qemu-nvidia-gpu-tdx"|"qemu-nvidia-gpu-snp") + "qemu-tdx"|"qemu-snp"|"qemu-snp-runtime-rs"|"qemu-coco-dev"|"qemu-coco-dev-runtime-rs"|"qemu-nvidia-gpu-tdx"|"qemu-nvidia-gpu-snp"|"qemu-nvidia-gpu-tdx-runtime-rs"|"qemu-nvidia-gpu-snp-runtime-rs") return 0 ;; *)