From ed4996722fc36c4d6b431c26ffa79aecf0a4f5eb Mon Sep 17 00:00:00 2001 From: Manuel Huber Date: Thu, 19 Mar 2026 16:39:07 -0700 Subject: [PATCH] tests: nvidia: Do not use elevated privileges Do not run the NIM containers with elevated privileges. Note that, using hostPath requires proper host folder permissions, and that using emptyDir requires a proper fsGroup ID. Once issue 11162 is resolved, we can further refine the securityContext fields for the TEE manifests. Signed-off-by: Manuel Huber --- tests/integration/kubernetes/k8s-nvidia-nim.bats | 1 + .../nvidia-nim-llama-3-1-8b-instruct-tee.yaml.in | 12 ++++++++---- .../nvidia-nim-llama-3-1-8b-instruct.yaml.in | 4 ---- ...nvidia-nim-llama-3-2-nv-embedqa-1b-v2-tee.yaml.in | 11 +++++++---- .../nvidia-nim-llama-3-2-nv-embedqa-1b-v2.yaml.in | 9 +++++---- 5 files changed, 21 insertions(+), 16 deletions(-) diff --git a/tests/integration/kubernetes/k8s-nvidia-nim.bats b/tests/integration/kubernetes/k8s-nvidia-nim.bats index 6c1b0c5572..fca591ab72 100644 --- a/tests/integration/kubernetes/k8s-nvidia-nim.bats +++ b/tests/integration/kubernetes/k8s-nvidia-nim.bats @@ -10,6 +10,7 @@ load "${BATS_TEST_DIRNAME}/confidential_common.sh" export KATA_HYPERVISOR="${KATA_HYPERVISOR:-qemu-nvidia-gpu}" +# when using hostPath, ensure directory is writable by container user export LOCAL_NIM_CACHE="/opt/nim/.cache" SKIP_MULTI_GPU_TESTS=${SKIP_MULTI_GPU_TESTS:-false} diff --git a/tests/integration/kubernetes/runtimeclass_workloads/nvidia-nim-llama-3-1-8b-instruct-tee.yaml.in b/tests/integration/kubernetes/runtimeclass_workloads/nvidia-nim-llama-3-1-8b-instruct-tee.yaml.in index 8207604c7e..b48717084d 100644 --- a/tests/integration/kubernetes/runtimeclass_workloads/nvidia-nim-llama-3-1-8b-instruct-tee.yaml.in +++ b/tests/integration/kubernetes/runtimeclass_workloads/nvidia-nim-llama-3-1-8b-instruct-tee.yaml.in @@ -16,14 +16,18 @@ metadata: # cc_init_data annotation will be added by genpolicy with CDH configuration # from the custom default-initdata.toml created by create_nim_initdata_file() spec: + # Explicit user/group/supplementary groups to support nydus guest-pull. + # See issue https://github.com/kata-containers/kata-containers/issues/11162 and + # other references to this issue in the genpolicy source folder. + securityContext: + runAsUser: 1000 + runAsGroup: 1000 + fsGroup: 1000 + supplementalGroups: [4, 20, 24, 25, 27, 29, 30, 44, 46] restartPolicy: Never runtimeClassName: kata imagePullSecrets: - name: ngc-secret-instruct - securityContext: - runAsUser: 0 - runAsGroup: 0 - fsGroup: 0 containers: - name: ${POD_NAME_INSTRUCT} image: nvcr.io/nim/meta/llama-3.1-8b-instruct:1.13.1 diff --git a/tests/integration/kubernetes/runtimeclass_workloads/nvidia-nim-llama-3-1-8b-instruct.yaml.in b/tests/integration/kubernetes/runtimeclass_workloads/nvidia-nim-llama-3-1-8b-instruct.yaml.in index 283f591295..ce8d21f53a 100644 --- a/tests/integration/kubernetes/runtimeclass_workloads/nvidia-nim-llama-3-1-8b-instruct.yaml.in +++ b/tests/integration/kubernetes/runtimeclass_workloads/nvidia-nim-llama-3-1-8b-instruct.yaml.in @@ -14,10 +14,6 @@ spec: runtimeClassName: kata imagePullSecrets: - name: ngc-secret-instruct - securityContext: - runAsUser: 0 - runAsGroup: 0 - fsGroup: 0 containers: - name: ${POD_NAME_INSTRUCT} image: nvcr.io/nim/meta/llama-3.1-8b-instruct:1.13.1 diff --git a/tests/integration/kubernetes/runtimeclass_workloads/nvidia-nim-llama-3-2-nv-embedqa-1b-v2-tee.yaml.in b/tests/integration/kubernetes/runtimeclass_workloads/nvidia-nim-llama-3-2-nv-embedqa-1b-v2-tee.yaml.in index 7bc15daf97..06fea52257 100644 --- a/tests/integration/kubernetes/runtimeclass_workloads/nvidia-nim-llama-3-2-nv-embedqa-1b-v2-tee.yaml.in +++ b/tests/integration/kubernetes/runtimeclass_workloads/nvidia-nim-llama-3-2-nv-embedqa-1b-v2-tee.yaml.in @@ -16,15 +16,18 @@ metadata: # cc_init_data annotation will be added by genpolicy with CDH configuration # from the custom default-initdata.toml created by create_nim_initdata_file() spec: + # Explicit user/group/supplementary groups to support nydus guest-pull. + # See issue https://github.com/kata-containers/kata-containers/issues/11162 and + # other references to this issue in the genpolicy source folder. + securityContext: + runAsUser: 1000 + runAsGroup: 1000 + fsGroup: 1000 restartPolicy: Always runtimeClassName: kata serviceAccountName: default imagePullSecrets: - name: ngc-secret-embedqa - securityContext: - fsGroup: 0 - runAsGroup: 0 - runAsUser: 0 containers: - name: ${POD_NAME_EMBEDQA} image: nvcr.io/nim/nvidia/llama-3.2-nv-embedqa-1b-v2:1.10.1 diff --git a/tests/integration/kubernetes/runtimeclass_workloads/nvidia-nim-llama-3-2-nv-embedqa-1b-v2.yaml.in b/tests/integration/kubernetes/runtimeclass_workloads/nvidia-nim-llama-3-2-nv-embedqa-1b-v2.yaml.in index a7d4506d2b..d26b55b1e2 100644 --- a/tests/integration/kubernetes/runtimeclass_workloads/nvidia-nim-llama-3-2-nv-embedqa-1b-v2.yaml.in +++ b/tests/integration/kubernetes/runtimeclass_workloads/nvidia-nim-llama-3-2-nv-embedqa-1b-v2.yaml.in @@ -10,15 +10,16 @@ metadata: labels: app: ${POD_NAME_EMBEDQA} spec: + # unlike the instruct manifest, this image needs securityContext to + # avoid NVML/GPU permission failures + securityContext: + runAsUser: 1000 + runAsGroup: 1000 restartPolicy: Always runtimeClassName: kata serviceAccountName: default imagePullSecrets: - name: ngc-secret-embedqa - securityContext: - fsGroup: 0 - runAsGroup: 0 - runAsUser: 0 containers: - name: ${POD_NAME_EMBEDQA} image: nvcr.io/nim/nvidia/llama-3.2-nv-embedqa-1b-v2:1.10.1