tests: nvidia-nim: use trusted storage templates for runtime-rs

Now that runtime-rs supports block-encrypted emptyDir volumes, remove the no-trusted-storage workaround templates and the is_runtime_rs branching in the NIM test. Runtime-rs now uses the same TEE templates as the Go runtime with emptyDir + PVC at 48Gi memory, instead of the 128Gi workaround that compensated for lacking trusted storage. Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com> Assisted-by: Cursor <cursoragent@cursor.com>
2026-07-01 22:50:54 +00:00 · 2026-05-08 16:27:54 +02:00
parent 54aaa1ea2a
commit c19bdbf23b
3 changed files with 20 additions and 234 deletions
--- a/tests/integration/kubernetes/k8s-nvidia-nim.bats
+++ b/tests/integration/kubernetes/k8s-nvidia-nim.bats
@@ -179,13 +179,6 @@ setup_file() {
    export POD_EMBEDQA_YAML_IN="${pod_config_dir}/${POD_NAME_EMBEDQA}.yaml.in"
    export POD_EMBEDQA_YAML="${pod_config_dir}/${POD_NAME_EMBEDQA}.yaml"

-    # runtime-rs does not support trusted storage yet, so use alternative
-    # TEE templates without emptyDir/PVC volumes and higher memory.
-    if is_runtime_rs && [[ "${TEE}" = "true" ]]; then
-        export POD_INSTRUCT_YAML_IN="${pod_config_dir}/${POD_NAME_INSTRUCT}-no-trusted-storage.yaml.in"
-        export POD_EMBEDQA_YAML_IN="${pod_config_dir}/${POD_NAME_EMBEDQA}-no-trusted-storage.yaml.in"
-    fi
-
    dpkg -s jq >/dev/null 2>&1 || sudo apt -y install jq

    setup_langchain_flow
@@ -202,29 +195,27 @@ setup_file() {
        # file and BEFORE auto_generate_policy() runs.
        create_nim_initdata_file "${policy_settings_dir}/default-initdata.toml"

-        if ! is_runtime_rs; then
-            # Container image layer storage: one block device and PV/PVC per pod.
-            storage_config_template="${pod_config_dir}/confidential/trusted-storage.yaml.in"
+        # Container image layer storage: one block device and PV/PVC per pod.
+        storage_config_template="${pod_config_dir}/confidential/trusted-storage.yaml.in"

-            instruct_storage_mib=57344
-            local_device_instruct=$(create_loop_device /tmp/trusted-image-storage-instruct.img "$instruct_storage_mib")
-            storage_config_instruct=$(mktemp "${BATS_FILE_TMPDIR}/$(basename "${storage_config_template}").instruct.XXX")
-            PV_NAME=trusted-block-pv-instruct PVC_NAME=trusted-pvc-instruct \
-                PV_STORAGE_CAPACITY="${instruct_storage_mib}Mi" PVC_STORAGE_REQUEST="${instruct_storage_mib}Mi" \
-                LOCAL_DEVICE="$local_device_instruct" NODE_NAME="$node" \
-                envsubst < "$storage_config_template" > "$storage_config_instruct"
-            retry_kubectl_apply "$storage_config_instruct"
+        instruct_storage_mib=57344
+        local_device_instruct=$(create_loop_device /tmp/trusted-image-storage-instruct.img "$instruct_storage_mib")
+        storage_config_instruct=$(mktemp "${BATS_FILE_TMPDIR}/$(basename "${storage_config_template}").instruct.XXX")
+        PV_NAME=trusted-block-pv-instruct PVC_NAME=trusted-pvc-instruct \
+            PV_STORAGE_CAPACITY="${instruct_storage_mib}Mi" PVC_STORAGE_REQUEST="${instruct_storage_mib}Mi" \
+            LOCAL_DEVICE="$local_device_instruct" NODE_NAME="$node" \
+            envsubst < "$storage_config_template" > "$storage_config_instruct"
+        retry_kubectl_apply "$storage_config_instruct"

-            if [ "${SKIP_MULTI_GPU_TESTS}" != "true" ]; then
-                embedqa_storage_mib=8192
-                local_device_embedqa=$(create_loop_device /tmp/trusted-image-storage-embedqa.img "$embedqa_storage_mib")
-                storage_config_embedqa=$(mktemp "${BATS_FILE_TMPDIR}/$(basename "${storage_config_template}").embedqa.XXX")
-                PV_NAME=trusted-block-pv-embedqa PVC_NAME=trusted-pvc-embedqa \
-                    PV_STORAGE_CAPACITY="${embedqa_storage_mib}Mi" PVC_STORAGE_REQUEST="${embedqa_storage_mib}Mi" \
-                    LOCAL_DEVICE="$local_device_embedqa" NODE_NAME="$node" \
-                    envsubst < "$storage_config_template" > "$storage_config_embedqa"
-                retry_kubectl_apply "$storage_config_embedqa"
-            fi
+        if [ "${SKIP_MULTI_GPU_TESTS}" != "true" ]; then
+            embedqa_storage_mib=8192
+            local_device_embedqa=$(create_loop_device /tmp/trusted-image-storage-embedqa.img "$embedqa_storage_mib")
+            storage_config_embedqa=$(mktemp "${BATS_FILE_TMPDIR}/$(basename "${storage_config_template}").embedqa.XXX")
+            PV_NAME=trusted-block-pv-embedqa PVC_NAME=trusted-pvc-embedqa \
+                PV_STORAGE_CAPACITY="${embedqa_storage_mib}Mi" PVC_STORAGE_REQUEST="${embedqa_storage_mib}Mi" \
+                LOCAL_DEVICE="$local_device_embedqa" NODE_NAME="$node" \
+                envsubst < "$storage_config_template" > "$storage_config_embedqa"
+            retry_kubectl_apply "$storage_config_embedqa"
        fi
    fi

@@ -506,7 +497,7 @@ teardown_file() {
        [ -f "${POD_EMBEDQA_YAML}" ] && kubectl delete -f "${POD_EMBEDQA_YAML}" --ignore-not-found=true
    fi

-    if [[ "${TEE}" = "true" ]] && ! is_runtime_rs; then
+    if [[ "${TEE}" = "true" ]]; then
        kubectl delete --ignore-not-found pvc trusted-pvc-instruct trusted-pvc-embedqa
        kubectl delete --ignore-not-found pv trusted-block-pv-instruct trusted-block-pv-embedqa
        kubectl delete --ignore-not-found storageclass local-storage
--- a/tests/integration/kubernetes/runtimeclass_workloads/nvidia-nim-llama-3-1-8b-instruct-tee-no-trusted-storage.yaml.in
+++ b/tests/integration/kubernetes/runtimeclass_workloads/nvidia-nim-llama-3-1-8b-instruct-tee-no-trusted-storage.yaml.in
@@ -1,98 +0,0 @@
-# Copyright (c) 2026 NVIDIA Corporation
-#
-# SPDX-License-Identifier: Apache-2.0
-#
-# TEE variant without trusted storage support (e.g. for runtime-rs which
-# does not yet implement block-encrypted emptyDir or trusted PVC).
-# Uses higher memory to compensate for the lack of offloaded storage.
-#
---
-apiVersion: v1
-kind: Pod
-metadata:
-  name: ${POD_NAME_INSTRUCT}
-  labels:
-    app: ${POD_NAME_INSTRUCT}
-  annotations:
-    # Start CDH process and configure AA for KBS communication
-    # aa_kbc_params tells the Attestation Agent where KBS is located
-    io.katacontainers.config.hypervisor.kernel_params: "agent.guest_components_procs=confidential-data-hub agent.aa_kbc_params=cc_kbc::${CC_KBS_ADDR}"
-    # cc_init_data annotation will be added by genpolicy with CDH configuration
-    # from the custom default-initdata.toml created by create_nim_initdata_file()
-spec:
-  # Explicit user/group/supplementary groups to support nydus guest-pull.
-  # See issue https://github.com/kata-containers/kata-containers/issues/11162 and
-  # other references to this issue in the genpolicy source folder.
-  securityContext:
-    runAsUser: 1000
-    runAsGroup: 1000
-    fsGroup: 1000
-    supplementalGroups: [4, 20, 24, 25, 27, 29, 30, 44, 46]
-  restartPolicy: Never
-  runtimeClassName: kata
-  imagePullSecrets:
-    - name: ngc-secret-instruct
-  containers:
-  - name: ${POD_NAME_INSTRUCT}
-    image: nvcr.io/nim/meta/llama-3.1-8b-instruct:1.13.1
-    # Ports exposed by the container:
-    ports:
-      - containerPort: 8000
-        name: http-openai
-    livenessProbe:
-      httpGet:
-        path: /v1/health/live
-        port: http-openai
-      initialDelaySeconds: 15
-      periodSeconds: 10
-      timeoutSeconds: 1
-      successThreshold: 1
-      failureThreshold: 3
-    readinessProbe:
-      httpGet:
-        path: /v1/health/ready
-        port: http-openai
-      initialDelaySeconds: 15
-      periodSeconds: 10
-      timeoutSeconds: 1
-      successThreshold: 1
-      failureThreshold: 3
-    startupProbe:
-      httpGet:
-        path: /v1/health/ready
-        port: http-openai
-      initialDelaySeconds: 360
-      periodSeconds: 10
-      timeoutSeconds: 1
-      successThreshold: 1
-      failureThreshold: 30
-    env:
-      - name: NGC_API_KEY
-        valueFrom:
-          secretKeyRef:
-            name: ngc-api-key-sealed-instruct
-            key: api-key
-    # GPU resource limit (for NVIDIA GPU)
-    resources:
-      limits:
-        nvidia.com/pgpu: "1"
-        cpu: "16"
-        memory: "128Gi"
---
-apiVersion: v1
-kind: Secret
-metadata:
-  name: ngc-secret-instruct
-type: kubernetes.io/dockerconfigjson
-data:
-  .dockerconfigjson: ${DOCKER_CONFIG_JSON}
---
-apiVersion: v1
-kind: Secret
-metadata:
-  name: ngc-api-key-sealed-instruct
-type: Opaque
-data:
-  # Sealed secret pointing to kbs:///default/ngc-api-key/instruct
-  # CDH will unseal this by fetching the actual key from KBS
-  api-key: "${NGC_API_KEY_SEALED_SECRET_INSTRUCT_BASE64}"
--- a/tests/integration/kubernetes/runtimeclass_workloads/nvidia-nim-llama-3-2-nv-embedqa-1b-v2-tee-no-trusted-storage.yaml.in
+++ b/tests/integration/kubernetes/runtimeclass_workloads/nvidia-nim-llama-3-2-nv-embedqa-1b-v2-tee-no-trusted-storage.yaml.in
@@ -1,107 +0,0 @@
-# Copyright (c) 2026 NVIDIA Corporation
-#
-# SPDX-License-Identifier: Apache-2.0
-#
-# TEE variant without trusted storage support (e.g. for runtime-rs which
-# does not yet implement block-encrypted emptyDir or trusted PVC).
-# Uses higher memory to compensate for the lack of offloaded storage.
-#
---
-apiVersion: v1
-kind: Pod
-metadata:
-  name: ${POD_NAME_EMBEDQA}
-  labels:
-    app: ${POD_NAME_EMBEDQA}
-  annotations:
-    # Start CDH process and configure AA for KBS communication
-    # aa_kbc_params tells the Attestation Agent where KBS is located
-    io.katacontainers.config.hypervisor.kernel_params: "agent.guest_components_procs=confidential-data-hub agent.aa_kbc_params=cc_kbc::${CC_KBS_ADDR}"
-    # cc_init_data annotation will be added by genpolicy with CDH configuration
-    # from the custom default-initdata.toml created by create_nim_initdata_file()
-spec:
-  # Explicit user/group/supplementary groups to support nydus guest-pull.
-  # See issue https://github.com/kata-containers/kata-containers/issues/11162 and
-  # other references to this issue in the genpolicy source folder.
-  securityContext:
-    runAsUser: 1000
-    runAsGroup: 1000
-    fsGroup: 1000
-  restartPolicy: Always
-  runtimeClassName: kata
-  serviceAccountName: default
-  imagePullSecrets:
-    - name: ngc-secret-embedqa
-  containers:
-  - name: ${POD_NAME_EMBEDQA}
-    image: nvcr.io/nim/nvidia/llama-3.2-nv-embedqa-1b-v2:1.10.1
-    imagePullPolicy: IfNotPresent
-    env:
-      - name: NGC_API_KEY
-        valueFrom:
-          secretKeyRef:
-            name: ngc-api-key-sealed-embedqa
-            key: api-key
-      - name: NIM_HTTP_API_PORT
-        value: "8000"
-      - name: NIM_JSONL_LOGGING
-        value: "1"
-      - name: NIM_LOG_LEVEL
-        value: "INFO"
-    ports:
-      - containerPort: 8000
-        name: http
-
-    livenessProbe:
-      httpGet:
-        path: /v1/health/live
-        port: 8000
-      initialDelaySeconds: 15
-      periodSeconds: 10
-      timeoutSeconds: 1
-      successThreshold: 1
-      failureThreshold: 3
-
-    readinessProbe:
-      httpGet:
-        path: /v1/health/ready
-        port: 8000
-      initialDelaySeconds: 15
-      periodSeconds: 10
-      timeoutSeconds: 10
-      successThreshold: 1
-      failureThreshold: 3
-
-    startupProbe:
-      httpGet:
-        path: /v1/health/ready
-        port: 8000
-      initialDelaySeconds: 60
-      periodSeconds: 10
-      timeoutSeconds: 1
-      successThreshold: 1
-      failureThreshold: 180
-
-    resources:
-      limits:
-        nvidia.com/pgpu: "1"
-        cpu: "16"
-        memory: "48Gi"
---
-apiVersion: v1
-kind: Secret
-metadata:
-  name: ngc-secret-embedqa
-type: kubernetes.io/dockerconfigjson
-data:
-  .dockerconfigjson: ${DOCKER_CONFIG_JSON}
---
-apiVersion: v1
-kind: Secret
-metadata:
-  name: ngc-api-key-sealed-embedqa
-type: Opaque
-data:
-  # Sealed secret pointing to kbs:///default/ngc-api-key/embedqa
-  # CDH will unseal this by fetching the actual key from KBS
-  api-key: "${NGC_API_KEY_SEALED_SECRET_EMBEDQA_BASE64}"