# Copyright (c) 2025 NVIDIA Corporation
#
# SPDX-License-Identifier: Apache-2.0
#
---
apiVersion: v1
kind: Secret
metadata:
  name: ngc-secret-embedqa
type: kubernetes.io/dockerconfigjson
data:
  .dockerconfigjson: ${DOCKER_CONFIG_JSON}
---
apiVersion: v1
kind: Pod
metadata:
  name: nvidia-nim-llama-3-2-nv-embedqa-1b-v2
  labels:
    app: nvidia-nim-llama-3-2-nv-embedqa-1b-v2
spec:
  restartPolicy: Always
  runtimeClassName: "${RUNTIME_CLASS_NAME}"
  serviceAccountName: default
  imagePullSecrets:
    - name: ngc-secret-embedqa
  securityContext:
    fsGroup: 0
    runAsGroup: 0
    runAsUser: 0
  containers:
  - name: nvidia-nim-llama-3-2-nv-embedqa-1b-v2
    image: nvcr.io/nim/nvidia/llama-3.2-nv-embedqa-1b-v2:1.5.0
    imagePullPolicy: IfNotPresent
    env:
      - name: NIM_CACHE_PATH
        value: "/opt/nim/.cache"
      - name: NGC_API_KEY
        value: "${NGC_API_KEY}"
      - name: NIM_HTTP_API_PORT
        value: "8000"
      - name: NIM_JSONL_LOGGING
        value: "1"
      - name: NIM_LOG_LEVEL
        value: "INFO"
    ports:
      - containerPort: 8000
        name: http

    livenessProbe:
      httpGet:
        path: /v1/health/live
        port: 8000
      initialDelaySeconds: 15
      periodSeconds: 10
      timeoutSeconds: 1
      successThreshold: 1
      failureThreshold: 3

    readinessProbe:
      httpGet:
        path: /v1/health/ready
        port: 8000
      initialDelaySeconds: 15
      periodSeconds: 10
      timeoutSeconds: 1
      successThreshold: 1
      failureThreshold: 3

    startupProbe:
      httpGet:
        path: /v1/health/ready
        port: 8000
      initialDelaySeconds: 40
      periodSeconds: 10
      timeoutSeconds: 1
      successThreshold: 1
      failureThreshold: 180

    resources:
      limits:
        nvidia.com/pgpu: 1
        cpu: "16"
        memory: "32Gi"

    volumeMounts:
      - name: nim-cache
        mountPath: /opt/nim/.cache

  volumes:
  - name: nim-cache
    hostPath:
      path: "${LOCAL_NIM_CACHE}"
      type: DirectoryOrCreate