mirror of
https://github.com/kata-containers/kata-containers.git
synced 2026-05-12 18:17:05 +00:00
ci: Add runtime-rs GPU shims to NVIDIA GPU CI workflow
Add qemu-nvidia-gpu-runtime-rs and qemu-nvidia-gpu-snp-runtime-rs to the NVIDIA GPU test matrix so CI covers the new runtime-rs shims. Introduce a `coco` boolean field in each matrix entry and use it for all CoCo-related conditionals (KBS, snapshotter, KBS deploy/cleanup steps). This replaces fragile name-string comparisons that were already broken for the runtime-rs variants: `nvidia-gpu (runtime-rs)` was incorrectly getting KBS steps, and `nvidia-gpu-snp (runtime-rs)` was not getting the right env vars. Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com>
This commit is contained in:
@@ -37,8 +37,10 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
environment: [
|
||||
{ name: nvidia-gpu, vmm: qemu-nvidia-gpu, runner: amd64-nvidia-a100 },
|
||||
{ name: nvidia-gpu-snp, vmm: qemu-nvidia-gpu-snp, runner: amd64-nvidia-h100-snp },
|
||||
{ name: nvidia-gpu, vmm: qemu-nvidia-gpu, runner: amd64-nvidia-a100, coco: false },
|
||||
{ name: nvidia-gpu-runtime-rs, vmm: qemu-nvidia-gpu-runtime-rs, runner: amd64-nvidia-a100, coco: false },
|
||||
{ name: nvidia-gpu-snp, vmm: qemu-nvidia-gpu-snp, runner: amd64-nvidia-h100-snp, coco: true },
|
||||
{ name: nvidia-gpu-snp-runtime-rs, vmm: qemu-nvidia-gpu-snp-runtime-rs, runner: amd64-nvidia-h100-snp, coco: true },
|
||||
]
|
||||
runs-on: ${{ matrix.environment.runner }}
|
||||
env:
|
||||
@@ -48,9 +50,9 @@ jobs:
|
||||
GH_PR_NUMBER: ${{ inputs.pr-number }}
|
||||
KATA_HYPERVISOR: ${{ matrix.environment.vmm }}
|
||||
KUBERNETES: kubeadm
|
||||
KBS: ${{ matrix.environment.name == 'nvidia-gpu-snp' && 'true' || 'false' }}
|
||||
SNAPSHOTTER: ${{ matrix.environment.name == 'nvidia-gpu-snp' && 'nydus' || '' }}
|
||||
USE_EXPERIMENTAL_SNAPSHOTTER_SETUP: ${{ matrix.environment.name == 'nvidia-gpu-snp' && 'true' || 'false' }}
|
||||
KBS: ${{ matrix.environment.coco && 'true' || 'false' }}
|
||||
SNAPSHOTTER: ${{ matrix.environment.coco && 'nydus' || '' }}
|
||||
USE_EXPERIMENTAL_SNAPSHOTTER_SETUP: ${{ matrix.environment.coco && 'true' || 'false' }}
|
||||
K8S_TEST_HOST_TYPE: baremetal
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
@@ -75,12 +77,12 @@ jobs:
|
||||
run: bash tests/integration/kubernetes/gha-run.sh install-kata-tools kata-tools-artifacts
|
||||
|
||||
- name: Uninstall previous `kbs-client`
|
||||
if: matrix.environment.name != 'nvidia-gpu'
|
||||
if: matrix.environment.coco
|
||||
timeout-minutes: 10
|
||||
run: bash tests/integration/kubernetes/gha-run.sh uninstall-kbs-client
|
||||
|
||||
- name: Deploy CoCo KBS
|
||||
if: matrix.environment.name != 'nvidia-gpu'
|
||||
if: matrix.environment.coco
|
||||
timeout-minutes: 10
|
||||
run: bash tests/integration/kubernetes/gha-run.sh deploy-coco-kbs
|
||||
env:
|
||||
@@ -88,7 +90,7 @@ jobs:
|
||||
KBS_INGRESS: nodeport
|
||||
|
||||
- name: Install `kbs-client`
|
||||
if: matrix.environment.name != 'nvidia-gpu'
|
||||
if: matrix.environment.coco
|
||||
timeout-minutes: 10
|
||||
run: bash tests/integration/kubernetes/gha-run.sh install-kbs-client
|
||||
|
||||
@@ -127,7 +129,7 @@ jobs:
|
||||
run: bash tests/integration/kubernetes/gha-run.sh cleanup
|
||||
|
||||
- name: Delete CoCo KBS
|
||||
if: always() && matrix.environment.name != 'nvidia-gpu'
|
||||
if: always() && matrix.environment.coco
|
||||
timeout-minutes: 10
|
||||
run: |
|
||||
bash tests/integration/kubernetes/gha-run.sh delete-coco-kbs
|
||||
|
||||
@@ -691,7 +691,7 @@ function helm_helper() {
|
||||
# HELM_SHIMS is a space-separated list of shim names
|
||||
# Enable each shim and set supported architectures
|
||||
# TEE shims that need defaults unset (will be set based on env vars)
|
||||
tee_shims="qemu-se qemu-se-runtime-rs qemu-cca qemu-snp qemu-snp-runtime-rs qemu-tdx qemu-tdx-runtime-rs qemu-coco-dev qemu-coco-dev-runtime-rs qemu-nvidia-gpu-snp qemu-nvidia-gpu-tdx"
|
||||
tee_shims="qemu-se qemu-se-runtime-rs qemu-cca qemu-snp qemu-snp-runtime-rs qemu-tdx qemu-tdx-runtime-rs qemu-coco-dev qemu-coco-dev-runtime-rs qemu-nvidia-gpu-snp qemu-nvidia-gpu-tdx qemu-nvidia-gpu-snp-runtime-rs qemu-nvidia-gpu-tdx-runtime-rs"
|
||||
|
||||
for shim in ${HELM_SHIMS}; do
|
||||
# Determine supported architectures based on shim name
|
||||
@@ -705,7 +705,7 @@ function helm_helper() {
|
||||
yq -i ".shims.${shim}.enabled = true" "${values_yaml}"
|
||||
yq -i ".shims.${shim}.supportedArches = [\"arm64\"]" "${values_yaml}"
|
||||
;;
|
||||
qemu-snp|qemu-snp-runtime-rs|qemu-tdx|qemu-tdx-runtime-rs|qemu-nvidia-gpu-snp|qemu-nvidia-gpu-tdx)
|
||||
qemu-snp|qemu-snp-runtime-rs|qemu-tdx|qemu-tdx-runtime-rs|qemu-nvidia-gpu-snp|qemu-nvidia-gpu-tdx|qemu-nvidia-gpu-snp-runtime-rs|qemu-nvidia-gpu-tdx-runtime-rs)
|
||||
yq -i ".shims.${shim}.enabled = true" "${values_yaml}"
|
||||
yq -i ".shims.${shim}.supportedArches = [\"amd64\"]" "${values_yaml}"
|
||||
;;
|
||||
@@ -717,7 +717,7 @@ function helm_helper() {
|
||||
yq -i ".shims.${shim}.enabled = true" "${values_yaml}"
|
||||
yq -i ".shims.${shim}.supportedArches = [\"amd64\", \"s390x\"]" "${values_yaml}"
|
||||
;;
|
||||
qemu-nvidia-gpu)
|
||||
qemu-nvidia-gpu|qemu-nvidia-gpu-runtime-rs)
|
||||
yq -i ".shims.${shim}.enabled = true" "${values_yaml}"
|
||||
yq -i ".shims.${shim}.supportedArches = [\"amd64\", \"arm64\"]" "${values_yaml}"
|
||||
;;
|
||||
|
||||
@@ -10,7 +10,7 @@ source "${BATS_TEST_DIRNAME}/../../common.bash"
|
||||
|
||||
load "${BATS_TEST_DIRNAME}/confidential_kbs.sh"
|
||||
|
||||
SUPPORTED_GPU_TEE_HYPERVISORS=("qemu-nvidia-gpu-snp" "qemu-nvidia-gpu-tdx")
|
||||
SUPPORTED_GPU_TEE_HYPERVISORS=("qemu-nvidia-gpu-snp" "qemu-nvidia-gpu-tdx" "qemu-nvidia-gpu-snp-runtime-rs" "qemu-nvidia-gpu-tdx-runtime-rs")
|
||||
SUPPORTED_TEE_HYPERVISORS=("qemu-snp" "qemu-snp-runtime-rs" "qemu-tdx" "qemu-se" "qemu-se-runtime-rs" "${SUPPORTED_GPU_TEE_HYPERVISORS[@]}")
|
||||
SUPPORTED_NON_TEE_HYPERVISORS=("qemu-coco-dev" "qemu-coco-dev-runtime-rs")
|
||||
|
||||
|
||||
@@ -187,7 +187,7 @@ function deploy_kata() {
|
||||
|
||||
# Workaround to avoid modifying the workflow yaml files
|
||||
case "${KATA_HYPERVISOR}" in
|
||||
qemu-tdx|qemu-snp|qemu-snp-runtime-rs|qemu-nvidia-gpu-*)
|
||||
qemu-tdx|qemu-snp|qemu-snp-runtime-rs|qemu-nvidia-gpu-tdx*|qemu-nvidia-gpu-snp*)
|
||||
USE_EXPERIMENTAL_SETUP_SNAPSHOTTER=true
|
||||
SNAPSHOTTER="nydus"
|
||||
EXPERIMENTAL_FORCE_GUEST_PULL=false
|
||||
|
||||
@@ -14,7 +14,12 @@ source "${kubernetes_dir}/../../common.bash"
|
||||
|
||||
# Enable NVRC trace logging for NVIDIA GPU runtime via drop-in config
|
||||
enable_nvrc_trace() {
|
||||
local config_dir="/opt/kata/share/defaults/kata-containers/runtimes/${KATA_HYPERVISOR}/config.d"
|
||||
local kata_config_base="/opt/kata/share/defaults/kata-containers"
|
||||
case "${KATA_HYPERVISOR}" in
|
||||
*-runtime-rs) kata_config_base="${kata_config_base}/runtime-rs" ;;
|
||||
esac
|
||||
|
||||
local config_dir="${kata_config_base}/runtimes/${KATA_HYPERVISOR}/config.d"
|
||||
local drop_in_file="${config_dir}/90-nvrc-trace.toml"
|
||||
local kernel_params_drop_in="${config_dir}/30-kernel-params.toml"
|
||||
|
||||
@@ -30,7 +35,7 @@ enable_nvrc_trace() {
|
||||
if [[ -f "${kernel_params_drop_in}" ]]; then
|
||||
base_params=$(grep -E '^kernel_params\s*=' "${kernel_params_drop_in}" | sed 's/^kernel_params\s*=\s*"\(.*\)"/\1/' || true)
|
||||
else
|
||||
local runtime_config="/opt/kata/share/defaults/kata-containers/runtimes/${KATA_HYPERVISOR}/configuration-${KATA_HYPERVISOR}.toml"
|
||||
local runtime_config="${kata_config_base}/runtimes/${KATA_HYPERVISOR}/configuration-${KATA_HYPERVISOR}.toml"
|
||||
if [[ -f "${runtime_config}" ]]; then
|
||||
base_params=$(grep -E '^kernel_params\s*=' "${runtime_config}" | sed 's/^kernel_params\s*=\s*"\(.*\)"/\1/' || true)
|
||||
fi
|
||||
@@ -93,7 +98,7 @@ else
|
||||
"k8s-nvidia-nim-service.bats")
|
||||
fi
|
||||
|
||||
SUPPORTED_HYPERVISORS=("qemu-nvidia-gpu" "qemu-nvidia-gpu-snp" "qemu-nvidia-gpu-tdx")
|
||||
SUPPORTED_HYPERVISORS=("qemu-nvidia-gpu" "qemu-nvidia-gpu-snp" "qemu-nvidia-gpu-tdx" "qemu-nvidia-gpu-runtime-rs" "qemu-nvidia-gpu-snp-runtime-rs" "qemu-nvidia-gpu-tdx-runtime-rs")
|
||||
export KATA_HYPERVISOR="${KATA_HYPERVISOR:-qemu-nvidia-gpu}"
|
||||
# shellcheck disable=SC2076 # intentionally use literal string matching
|
||||
if [[ ! " ${SUPPORTED_HYPERVISORS[*]} " =~ " ${KATA_HYPERVISOR} " ]]; then
|
||||
|
||||
@@ -82,7 +82,7 @@ auto_generate_policy_enabled() {
|
||||
|
||||
is_coco_platform() {
|
||||
case "${KATA_HYPERVISOR}" in
|
||||
"qemu-tdx"|"qemu-snp"|"qemu-snp-runtime-rs"|"qemu-coco-dev"|"qemu-coco-dev-runtime-rs"|"qemu-nvidia-gpu-tdx"|"qemu-nvidia-gpu-snp")
|
||||
"qemu-tdx"|"qemu-snp"|"qemu-snp-runtime-rs"|"qemu-coco-dev"|"qemu-coco-dev-runtime-rs"|"qemu-nvidia-gpu-tdx"|"qemu-nvidia-gpu-snp"|"qemu-nvidia-gpu-tdx-runtime-rs"|"qemu-nvidia-gpu-snp-runtime-rs")
|
||||
return 0
|
||||
;;
|
||||
*)
|
||||
|
||||
Reference in New Issue
Block a user