---
apiVersion: v1
kind: Secret
metadata:
  name: ngc-secret-instruct
  namespace: default
type: kubernetes.io/dockerconfigjson
data:
  .dockerconfigjson: ${DOCKER_CONFIG_JSON}
---
apiVersion: v1
kind: Pod
metadata:
  name: ${POD_NAME}
  namespace: default
  labels:
    app: ${POD_NAME}
spec:
  runtimeClassName: kata-qemu-nvidia-gpu
  imagePullSecrets:
    - name: ngc-secret-instruct
  securityContext:
    runAsUser: 0
    runAsGroup: 0
    fsGroup: 0
  containers:
  - name: ${POD_NAME}
    image: nvcr.io/nim/meta/llama3-8b-instruct:1.0.0
    # Ports exposed by the container:
    ports:
      - containerPort: 8000
        name: http-openai
    livenessProbe:
      httpGet:
        path: /v1/health/live
        port: http-openai
      initialDelaySeconds: 15
      periodSeconds: 10
      timeoutSeconds: 1
      successThreshold: 1
      failureThreshold: 3
    readinessProbe:
      httpGet:
        path: /v1/health/ready
        port: http-openai
      initialDelaySeconds: 15
      periodSeconds: 10
      timeoutSeconds: 1
      successThreshold: 1
      failureThreshold: 3
    startupProbe:
      httpGet:
        path: /v1/health/ready
        port: http-openai
      initialDelaySeconds: 40
      periodSeconds: 10
      timeoutSeconds: 1
      successThreshold: 1
      failureThreshold: 180
    # Environment variable for NGC_API_KEY. In production, use a Secret.
    env:
      - name: NGC_API_KEY
        value: "${NGC_API_KEY}"
    # GPU resource request/limit (for NVIDIA GPU)
    resources:
      requests:
        cpu: "16"
        memory: "64Gi"
      limits:
        nvidia.com/pgpu: "1"
        cpu: "16"
        memory: "64Gi"
    # Mount the local .cache directory into the container
    volumeMounts:
      - name: nim-cache
        mountPath: /opt/nim/.cache

  # Host path volume for the local .cache directory.
  # Adjust 'path' to match your $LOCAL_NIM_CACHE location.
  volumes:
  - name: nim-cache
    hostPath:
      path: "/opr/nim/.cache"
      type: DirectoryOrCreate