kata-containers/tests/integration/kubernetes/runtimeclass_workloads/nvidia-nim-llama-3-1-8b-instruct.yaml.in
Zvonko Kaiser 8be41a4e80 gpu: Add embeding service
For a simple RAG pipeline add a embeding service

Signed-off-by: Zvonko Kaiser <zkaiser@nvidia.com>
2025-08-14 16:34:21 +00:00

89 lines
2.1 KiB
YAML

# Copyright (c) 2025 NVIDIA Corporation
#
# SPDX-License-Identifier: Apache-2.0
#
---
apiVersion: v1
kind: Secret
metadata:
name: ngc-secret-instruct
type: kubernetes.io/dockerconfigjson
data:
.dockerconfigjson: ${DOCKER_CONFIG_JSON}
---
apiVersion: v1
kind: Pod
metadata:
name: ${POD_NAME_INSTRUCT}
labels:
app: ${POD_NAME_INSTRUCT}
spec:
restartPolicy: Never
runtimeClassName: "${RUNTIME_CLASS_NAME}"
imagePullSecrets:
- name: ngc-secret-instruct
securityContext:
runAsUser: 0
runAsGroup: 0
fsGroup: 0
containers:
- name: ${POD_NAME_INSTRUCT}
image: nvcr.io/nim/meta/llama3-8b-instruct:1.0.0
# Ports exposed by the container:
ports:
- containerPort: 8000
name: http-openai
livenessProbe:
httpGet:
path: /v1/health/live
port: http-openai
initialDelaySeconds: 15
periodSeconds: 10
timeoutSeconds: 1
successThreshold: 1
failureThreshold: 3
readinessProbe:
httpGet:
path: /v1/health/ready
port: http-openai
initialDelaySeconds: 15
periodSeconds: 10
timeoutSeconds: 1
successThreshold: 1
failureThreshold: 3
startupProbe:
httpGet:
path: /v1/health/ready
port: http-openai
initialDelaySeconds: 40
periodSeconds: 10
timeoutSeconds: 1
successThreshold: 1
failureThreshold: 180
# Environment variable for NGC_API_KEY. In production, use a Secret.
env:
- name: NGC_API_KEY
value: "${NGC_API_KEY}"
# GPU resource request/limit (for NVIDIA GPU)
resources:
requests:
cpu: "16"
memory: "32Gi"
limits:
nvidia.com/pgpu: "1"
cpu: "16"
memory: "32Gi"
# Mount the local .cache directory into the container
volumeMounts:
- name: nim-cache
mountPath: /opt/nim/.cache
# Host path volume for the local .cache directory.
# Adjust 'path' to match your $LOCAL_NIM_CACHE location.
volumes:
- name: nim-cache
hostPath:
path: "${LOCAL_NIM_CACHE}"
type: DirectoryOrCreate