mirror of
https://github.com/kata-containers/kata-containers.git
synced 2026-07-01 22:50:54 +00:00
tests: nvidia-nim: use trusted storage templates for runtime-rs
Now that runtime-rs supports block-encrypted emptyDir volumes, remove the no-trusted-storage workaround templates and the is_runtime_rs branching in the NIM test. Runtime-rs now uses the same TEE templates as the Go runtime with emptyDir + PVC at 48Gi memory, instead of the 128Gi workaround that compensated for lacking trusted storage. Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com> Assisted-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -179,13 +179,6 @@ setup_file() {
|
||||
export POD_EMBEDQA_YAML_IN="${pod_config_dir}/${POD_NAME_EMBEDQA}.yaml.in"
|
||||
export POD_EMBEDQA_YAML="${pod_config_dir}/${POD_NAME_EMBEDQA}.yaml"
|
||||
|
||||
# runtime-rs does not support trusted storage yet, so use alternative
|
||||
# TEE templates without emptyDir/PVC volumes and higher memory.
|
||||
if is_runtime_rs && [[ "${TEE}" = "true" ]]; then
|
||||
export POD_INSTRUCT_YAML_IN="${pod_config_dir}/${POD_NAME_INSTRUCT}-no-trusted-storage.yaml.in"
|
||||
export POD_EMBEDQA_YAML_IN="${pod_config_dir}/${POD_NAME_EMBEDQA}-no-trusted-storage.yaml.in"
|
||||
fi
|
||||
|
||||
dpkg -s jq >/dev/null 2>&1 || sudo apt -y install jq
|
||||
|
||||
setup_langchain_flow
|
||||
@@ -202,29 +195,27 @@ setup_file() {
|
||||
# file and BEFORE auto_generate_policy() runs.
|
||||
create_nim_initdata_file "${policy_settings_dir}/default-initdata.toml"
|
||||
|
||||
if ! is_runtime_rs; then
|
||||
# Container image layer storage: one block device and PV/PVC per pod.
|
||||
storage_config_template="${pod_config_dir}/confidential/trusted-storage.yaml.in"
|
||||
# Container image layer storage: one block device and PV/PVC per pod.
|
||||
storage_config_template="${pod_config_dir}/confidential/trusted-storage.yaml.in"
|
||||
|
||||
instruct_storage_mib=57344
|
||||
local_device_instruct=$(create_loop_device /tmp/trusted-image-storage-instruct.img "$instruct_storage_mib")
|
||||
storage_config_instruct=$(mktemp "${BATS_FILE_TMPDIR}/$(basename "${storage_config_template}").instruct.XXX")
|
||||
PV_NAME=trusted-block-pv-instruct PVC_NAME=trusted-pvc-instruct \
|
||||
PV_STORAGE_CAPACITY="${instruct_storage_mib}Mi" PVC_STORAGE_REQUEST="${instruct_storage_mib}Mi" \
|
||||
LOCAL_DEVICE="$local_device_instruct" NODE_NAME="$node" \
|
||||
envsubst < "$storage_config_template" > "$storage_config_instruct"
|
||||
retry_kubectl_apply "$storage_config_instruct"
|
||||
instruct_storage_mib=57344
|
||||
local_device_instruct=$(create_loop_device /tmp/trusted-image-storage-instruct.img "$instruct_storage_mib")
|
||||
storage_config_instruct=$(mktemp "${BATS_FILE_TMPDIR}/$(basename "${storage_config_template}").instruct.XXX")
|
||||
PV_NAME=trusted-block-pv-instruct PVC_NAME=trusted-pvc-instruct \
|
||||
PV_STORAGE_CAPACITY="${instruct_storage_mib}Mi" PVC_STORAGE_REQUEST="${instruct_storage_mib}Mi" \
|
||||
LOCAL_DEVICE="$local_device_instruct" NODE_NAME="$node" \
|
||||
envsubst < "$storage_config_template" > "$storage_config_instruct"
|
||||
retry_kubectl_apply "$storage_config_instruct"
|
||||
|
||||
if [ "${SKIP_MULTI_GPU_TESTS}" != "true" ]; then
|
||||
embedqa_storage_mib=8192
|
||||
local_device_embedqa=$(create_loop_device /tmp/trusted-image-storage-embedqa.img "$embedqa_storage_mib")
|
||||
storage_config_embedqa=$(mktemp "${BATS_FILE_TMPDIR}/$(basename "${storage_config_template}").embedqa.XXX")
|
||||
PV_NAME=trusted-block-pv-embedqa PVC_NAME=trusted-pvc-embedqa \
|
||||
PV_STORAGE_CAPACITY="${embedqa_storage_mib}Mi" PVC_STORAGE_REQUEST="${embedqa_storage_mib}Mi" \
|
||||
LOCAL_DEVICE="$local_device_embedqa" NODE_NAME="$node" \
|
||||
envsubst < "$storage_config_template" > "$storage_config_embedqa"
|
||||
retry_kubectl_apply "$storage_config_embedqa"
|
||||
fi
|
||||
if [ "${SKIP_MULTI_GPU_TESTS}" != "true" ]; then
|
||||
embedqa_storage_mib=8192
|
||||
local_device_embedqa=$(create_loop_device /tmp/trusted-image-storage-embedqa.img "$embedqa_storage_mib")
|
||||
storage_config_embedqa=$(mktemp "${BATS_FILE_TMPDIR}/$(basename "${storage_config_template}").embedqa.XXX")
|
||||
PV_NAME=trusted-block-pv-embedqa PVC_NAME=trusted-pvc-embedqa \
|
||||
PV_STORAGE_CAPACITY="${embedqa_storage_mib}Mi" PVC_STORAGE_REQUEST="${embedqa_storage_mib}Mi" \
|
||||
LOCAL_DEVICE="$local_device_embedqa" NODE_NAME="$node" \
|
||||
envsubst < "$storage_config_template" > "$storage_config_embedqa"
|
||||
retry_kubectl_apply "$storage_config_embedqa"
|
||||
fi
|
||||
fi
|
||||
|
||||
@@ -506,7 +497,7 @@ teardown_file() {
|
||||
[ -f "${POD_EMBEDQA_YAML}" ] && kubectl delete -f "${POD_EMBEDQA_YAML}" --ignore-not-found=true
|
||||
fi
|
||||
|
||||
if [[ "${TEE}" = "true" ]] && ! is_runtime_rs; then
|
||||
if [[ "${TEE}" = "true" ]]; then
|
||||
kubectl delete --ignore-not-found pvc trusted-pvc-instruct trusted-pvc-embedqa
|
||||
kubectl delete --ignore-not-found pv trusted-block-pv-instruct trusted-block-pv-embedqa
|
||||
kubectl delete --ignore-not-found storageclass local-storage
|
||||
|
||||
@@ -1,98 +0,0 @@
|
||||
# Copyright (c) 2026 NVIDIA Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# TEE variant without trusted storage support (e.g. for runtime-rs which
|
||||
# does not yet implement block-encrypted emptyDir or trusted PVC).
|
||||
# Uses higher memory to compensate for the lack of offloaded storage.
|
||||
#
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: ${POD_NAME_INSTRUCT}
|
||||
labels:
|
||||
app: ${POD_NAME_INSTRUCT}
|
||||
annotations:
|
||||
# Start CDH process and configure AA for KBS communication
|
||||
# aa_kbc_params tells the Attestation Agent where KBS is located
|
||||
io.katacontainers.config.hypervisor.kernel_params: "agent.guest_components_procs=confidential-data-hub agent.aa_kbc_params=cc_kbc::${CC_KBS_ADDR}"
|
||||
# cc_init_data annotation will be added by genpolicy with CDH configuration
|
||||
# from the custom default-initdata.toml created by create_nim_initdata_file()
|
||||
spec:
|
||||
# Explicit user/group/supplementary groups to support nydus guest-pull.
|
||||
# See issue https://github.com/kata-containers/kata-containers/issues/11162 and
|
||||
# other references to this issue in the genpolicy source folder.
|
||||
securityContext:
|
||||
runAsUser: 1000
|
||||
runAsGroup: 1000
|
||||
fsGroup: 1000
|
||||
supplementalGroups: [4, 20, 24, 25, 27, 29, 30, 44, 46]
|
||||
restartPolicy: Never
|
||||
runtimeClassName: kata
|
||||
imagePullSecrets:
|
||||
- name: ngc-secret-instruct
|
||||
containers:
|
||||
- name: ${POD_NAME_INSTRUCT}
|
||||
image: nvcr.io/nim/meta/llama-3.1-8b-instruct:1.13.1
|
||||
# Ports exposed by the container:
|
||||
ports:
|
||||
- containerPort: 8000
|
||||
name: http-openai
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /v1/health/live
|
||||
port: http-openai
|
||||
initialDelaySeconds: 15
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 1
|
||||
successThreshold: 1
|
||||
failureThreshold: 3
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /v1/health/ready
|
||||
port: http-openai
|
||||
initialDelaySeconds: 15
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 1
|
||||
successThreshold: 1
|
||||
failureThreshold: 3
|
||||
startupProbe:
|
||||
httpGet:
|
||||
path: /v1/health/ready
|
||||
port: http-openai
|
||||
initialDelaySeconds: 360
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 1
|
||||
successThreshold: 1
|
||||
failureThreshold: 30
|
||||
env:
|
||||
- name: NGC_API_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: ngc-api-key-sealed-instruct
|
||||
key: api-key
|
||||
# GPU resource limit (for NVIDIA GPU)
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/pgpu: "1"
|
||||
cpu: "16"
|
||||
memory: "128Gi"
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: ngc-secret-instruct
|
||||
type: kubernetes.io/dockerconfigjson
|
||||
data:
|
||||
.dockerconfigjson: ${DOCKER_CONFIG_JSON}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: ngc-api-key-sealed-instruct
|
||||
type: Opaque
|
||||
data:
|
||||
# Sealed secret pointing to kbs:///default/ngc-api-key/instruct
|
||||
# CDH will unseal this by fetching the actual key from KBS
|
||||
api-key: "${NGC_API_KEY_SEALED_SECRET_INSTRUCT_BASE64}"
|
||||
@@ -1,107 +0,0 @@
|
||||
# Copyright (c) 2026 NVIDIA Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# TEE variant without trusted storage support (e.g. for runtime-rs which
|
||||
# does not yet implement block-encrypted emptyDir or trusted PVC).
|
||||
# Uses higher memory to compensate for the lack of offloaded storage.
|
||||
#
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: ${POD_NAME_EMBEDQA}
|
||||
labels:
|
||||
app: ${POD_NAME_EMBEDQA}
|
||||
annotations:
|
||||
# Start CDH process and configure AA for KBS communication
|
||||
# aa_kbc_params tells the Attestation Agent where KBS is located
|
||||
io.katacontainers.config.hypervisor.kernel_params: "agent.guest_components_procs=confidential-data-hub agent.aa_kbc_params=cc_kbc::${CC_KBS_ADDR}"
|
||||
# cc_init_data annotation will be added by genpolicy with CDH configuration
|
||||
# from the custom default-initdata.toml created by create_nim_initdata_file()
|
||||
spec:
|
||||
# Explicit user/group/supplementary groups to support nydus guest-pull.
|
||||
# See issue https://github.com/kata-containers/kata-containers/issues/11162 and
|
||||
# other references to this issue in the genpolicy source folder.
|
||||
securityContext:
|
||||
runAsUser: 1000
|
||||
runAsGroup: 1000
|
||||
fsGroup: 1000
|
||||
restartPolicy: Always
|
||||
runtimeClassName: kata
|
||||
serviceAccountName: default
|
||||
imagePullSecrets:
|
||||
- name: ngc-secret-embedqa
|
||||
containers:
|
||||
- name: ${POD_NAME_EMBEDQA}
|
||||
image: nvcr.io/nim/nvidia/llama-3.2-nv-embedqa-1b-v2:1.10.1
|
||||
imagePullPolicy: IfNotPresent
|
||||
env:
|
||||
- name: NGC_API_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: ngc-api-key-sealed-embedqa
|
||||
key: api-key
|
||||
- name: NIM_HTTP_API_PORT
|
||||
value: "8000"
|
||||
- name: NIM_JSONL_LOGGING
|
||||
value: "1"
|
||||
- name: NIM_LOG_LEVEL
|
||||
value: "INFO"
|
||||
ports:
|
||||
- containerPort: 8000
|
||||
name: http
|
||||
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /v1/health/live
|
||||
port: 8000
|
||||
initialDelaySeconds: 15
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 1
|
||||
successThreshold: 1
|
||||
failureThreshold: 3
|
||||
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /v1/health/ready
|
||||
port: 8000
|
||||
initialDelaySeconds: 15
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 10
|
||||
successThreshold: 1
|
||||
failureThreshold: 3
|
||||
|
||||
startupProbe:
|
||||
httpGet:
|
||||
path: /v1/health/ready
|
||||
port: 8000
|
||||
initialDelaySeconds: 60
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 1
|
||||
successThreshold: 1
|
||||
failureThreshold: 180
|
||||
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/pgpu: "1"
|
||||
cpu: "16"
|
||||
memory: "48Gi"
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: ngc-secret-embedqa
|
||||
type: kubernetes.io/dockerconfigjson
|
||||
data:
|
||||
.dockerconfigjson: ${DOCKER_CONFIG_JSON}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: ngc-api-key-sealed-embedqa
|
||||
type: Opaque
|
||||
data:
|
||||
# Sealed secret pointing to kbs:///default/ngc-api-key/embedqa
|
||||
# CDH will unseal this by fetching the actual key from KBS
|
||||
api-key: "${NGC_API_KEY_SEALED_SECRET_EMBEDQA_BASE64}"
|
||||
Reference in New Issue
Block a user