# Copyright (c) 2025 NVIDIA Corporation # # SPDX-License-Identifier: Apache-2.0 # --- apiVersion: v1 kind: Secret metadata: name: ngc-secret-instruct type: kubernetes.io/dockerconfigjson data: .dockerconfigjson: ${DOCKER_CONFIG_JSON} --- apiVersion: v1 kind: Pod metadata: name: ${POD_NAME_INSTRUCT} labels: app: ${POD_NAME_INSTRUCT} spec: restartPolicy: Never runtimeClassName: "${RUNTIME_CLASS_NAME}" imagePullSecrets: - name: ngc-secret-instruct securityContext: runAsUser: 0 runAsGroup: 0 fsGroup: 0 containers: - name: ${POD_NAME_INSTRUCT} image: nvcr.io/nim/meta/llama3-8b-instruct:1.0.0 # Ports exposed by the container: ports: - containerPort: 8000 name: http-openai livenessProbe: httpGet: path: /v1/health/live port: http-openai initialDelaySeconds: 15 periodSeconds: 10 timeoutSeconds: 1 successThreshold: 1 failureThreshold: 3 readinessProbe: httpGet: path: /v1/health/ready port: http-openai initialDelaySeconds: 15 periodSeconds: 10 timeoutSeconds: 1 successThreshold: 1 failureThreshold: 3 startupProbe: httpGet: path: /v1/health/ready port: http-openai initialDelaySeconds: 40 periodSeconds: 10 timeoutSeconds: 1 successThreshold: 1 failureThreshold: 180 # Environment variable for NGC_API_KEY. In production, use a Secret. env: - name: NGC_API_KEY value: "${NGC_API_KEY}" # GPU resource request/limit (for NVIDIA GPU) resources: requests: cpu: "16" memory: "32Gi" limits: nvidia.com/pgpu: "1" cpu: "16" memory: "32Gi" # Mount the local .cache directory into the container volumeMounts: - name: nim-cache mountPath: /opt/nim/.cache # Host path volume for the local .cache directory. # Adjust 'path' to match your $LOCAL_NIM_CACHE location. volumes: - name: nim-cache hostPath: path: "${LOCAL_NIM_CACHE}" type: DirectoryOrCreate