mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-07-16 08:26:16 +00:00
gpu: Add Pod spec for NIM llama
Pod spec for the NIM inferencing service Signed-off-by: Zvonko Kaiser <zkaiser@nvidia.com>
This commit is contained in:
parent
66cd18ae67
commit
06466a53c5
@ -0,0 +1,84 @@
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: ngc-secret-instruct
|
||||
type: kubernetes.io/dockerconfigjson
|
||||
data:
|
||||
.dockerconfigjson: ${DOCKER_CONFIG_JSON}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: ${POD_NAME_INSTRUCT}
|
||||
labels:
|
||||
app: ${POD_NAME_INSTRUCT}
|
||||
spec:
|
||||
restartPolicy: Never
|
||||
runtimeClassName: kata-qemu-nvidia-gpu
|
||||
imagePullSecrets:
|
||||
- name: ngc-secret-instruct
|
||||
securityContext:
|
||||
runAsUser: 0
|
||||
runAsGroup: 0
|
||||
fsGroup: 0
|
||||
containers:
|
||||
- name: ${POD_NAME_INSTRUCT}
|
||||
image: nvcr.io/nim/meta/llama3-8b-instruct:1.0.0
|
||||
# Ports exposed by the container:
|
||||
ports:
|
||||
- containerPort: 8000
|
||||
name: http-openai
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /v1/health/live
|
||||
port: http-openai
|
||||
initialDelaySeconds: 15
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 1
|
||||
successThreshold: 1
|
||||
failureThreshold: 3
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /v1/health/ready
|
||||
port: http-openai
|
||||
initialDelaySeconds: 15
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 1
|
||||
successThreshold: 1
|
||||
failureThreshold: 3
|
||||
startupProbe:
|
||||
httpGet:
|
||||
path: /v1/health/ready
|
||||
port: http-openai
|
||||
initialDelaySeconds: 40
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 1
|
||||
successThreshold: 1
|
||||
failureThreshold: 180
|
||||
# Environment variable for NGC_API_KEY. In production, use a Secret.
|
||||
env:
|
||||
- name: NGC_API_KEY
|
||||
value: "${NGC_API_KEY}"
|
||||
# GPU resource request/limit (for NVIDIA GPU)
|
||||
resources:
|
||||
requests:
|
||||
cpu: "16"
|
||||
memory: "32Gi"
|
||||
limits:
|
||||
nvidia.com/pgpu: "1"
|
||||
cpu: "16"
|
||||
memory: "32Gi"
|
||||
# Mount the local .cache directory into the container
|
||||
volumeMounts:
|
||||
- name: nim-cache
|
||||
mountPath: /opt/nim/.cache
|
||||
|
||||
# Host path volume for the local .cache directory.
|
||||
# Adjust 'path' to match your $LOCAL_NIM_CACHE location.
|
||||
volumes:
|
||||
- name: nim-cache
|
||||
hostPath:
|
||||
path: "/opr/nim/.cache"
|
||||
type: DirectoryOrCreate
|
||||
|
Loading…
Reference in New Issue
Block a user