diff --git a/.github/actionlint.yaml b/.github/actionlint.yaml index 1dfc12017c..7f8904d0da 100644 --- a/.github/actionlint.yaml +++ b/.github/actionlint.yaml @@ -23,3 +23,4 @@ self-hosted-runner: - s390x - s390x-large - tdx + - amd64-nvidia-a100 diff --git a/.github/workflows/build-kata-static-tarball-amd64.yaml b/.github/workflows/build-kata-static-tarball-amd64.yaml index f5b25ed978..bacba5068a 100644 --- a/.github/workflows/build-kata-static-tarball-amd64.yaml +++ b/.github/workflows/build-kata-static-tarball-amd64.yaml @@ -23,6 +23,8 @@ on: secrets: QUAY_DEPLOYER_PASSWORD: required: false + KBUILD_SIGN_PIN: + required: true permissions: contents: read @@ -108,6 +110,7 @@ jobs: ARTEFACT_REGISTRY_PASSWORD: ${{ secrets.GITHUB_TOKEN }} TARGET_BRANCH: ${{ inputs.target-branch }} RELEASE: ${{ inputs.stage == 'release' && 'yes' || 'no' }} + KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }} - name: Parse OCI image name and digest id: parse-oci-segments @@ -215,6 +218,7 @@ jobs: ARTEFACT_REGISTRY_PASSWORD: ${{ secrets.GITHUB_TOKEN }} TARGET_BRANCH: ${{ inputs.target-branch }} RELEASE: ${{ inputs.stage == 'release' && 'yes' || 'no' }} + KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }} - name: store-artifact ${{ matrix.asset }} uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 diff --git a/.github/workflows/ci-coco-stability.yaml b/.github/workflows/ci-coco-stability.yaml index f6936c1603..accb9cf9d2 100644 --- a/.github/workflows/ci-coco-stability.yaml +++ b/.github/workflows/ci-coco-stability.yaml @@ -31,3 +31,4 @@ jobs: AZ_TENANT_ID: ${{ secrets.AZ_TENANT_ID }} AZ_SUBSCRIPTION_ID: ${{ secrets.AZ_SUBSCRIPTION_ID }} QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }} + KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }} diff --git a/.github/workflows/ci-devel.yaml b/.github/workflows/ci-devel.yaml index 333ea99660..96ad37d1e7 100644 --- a/.github/workflows/ci-devel.yaml +++ b/.github/workflows/ci-devel.yaml @@ -27,6 +27,8 @@ jobs: CI_HKD_PATH: ${{ secrets.CI_HKD_PATH }} ITA_KEY: ${{ secrets.ITA_KEY }} QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }} + NGC_API_KEY: ${{ secrets.NGC_API_KEY }} + KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }} build-checks: uses: ./.github/workflows/build-checks.yaml diff --git a/.github/workflows/ci-nightly.yaml b/.github/workflows/ci-nightly.yaml index 0ea61d6eae..990d1a7c8f 100644 --- a/.github/workflows/ci-nightly.yaml +++ b/.github/workflows/ci-nightly.yaml @@ -31,3 +31,5 @@ jobs: CI_HKD_PATH: ${{ secrets.CI_HKD_PATH }} ITA_KEY: ${{ secrets.ITA_KEY }} QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }} + NGC_API_KEY: ${{ secrets.NGC_API_KEY }} + KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }} diff --git a/.github/workflows/ci-on-push.yaml b/.github/workflows/ci-on-push.yaml index dcbeb36167..7d58efc13c 100644 --- a/.github/workflows/ci-on-push.yaml +++ b/.github/workflows/ci-on-push.yaml @@ -52,3 +52,5 @@ jobs: CI_HKD_PATH: ${{ secrets.CI_HKD_PATH }} ITA_KEY: ${{ secrets.ITA_KEY }} QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }} + NGC_API_KEY: ${{ secrets.NGC_API_KEY }} + KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }} diff --git a/.github/workflows/ci-weekly.yaml b/.github/workflows/ci-weekly.yaml index 7f8744bc36..4b14c6645f 100644 --- a/.github/workflows/ci-weekly.yaml +++ b/.github/workflows/ci-weekly.yaml @@ -27,6 +27,8 @@ on: required: true QUAY_DEPLOYER_PASSWORD: required: true + KBUILD_SIGN_PIN: + required: true permissions: contents: read @@ -43,6 +45,8 @@ jobs: tarball-suffix: -${{ inputs.tag }} commit-hash: ${{ inputs.commit-hash }} target-branch: ${{ inputs.target-branch }} + secrets: + KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }} publish-kata-deploy-payload-amd64: needs: build-kata-static-tarball-amd64 diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 07e7691cc9..e157f9fbd4 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -35,6 +35,10 @@ on: required: true QUAY_DEPLOYER_PASSWORD: required: true + NGC_API_KEY: + required: true + KBUILD_SIGN_PIN: + required: true permissions: contents: read @@ -52,6 +56,8 @@ jobs: tarball-suffix: -${{ inputs.tag }} commit-hash: ${{ inputs.commit-hash }} target-branch: ${{ inputs.target-branch }} + secrets: + KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }} publish-kata-deploy-payload-amd64: needs: build-kata-static-tarball-amd64 @@ -323,6 +329,21 @@ jobs: pr-number: ${{ inputs.pr-number }} target-branch: ${{ inputs.target-branch }} + run-k8s-tests-on-nvidia-gpu: + if: ${{ inputs.skip-test != 'yes' }} + needs: publish-kata-deploy-payload-amd64 + uses: ./.github/workflows/run-k8s-tests-on-nvidia-gpu.yaml + with: + registry: ghcr.io + repo: ${{ github.repository_owner }}/kata-deploy-ci + tag: ${{ inputs.tag }}-amd64 + commit-hash: ${{ inputs.commit-hash }} + pr-number: ${{ inputs.pr-number }} + target-branch: ${{ inputs.target-branch }} + secrets: + NGC_API_KEY: ${{ secrets.NGC_API_KEY }} + + run-kata-coco-tests: if: ${{ inputs.skip-test != 'yes' }} needs: @@ -383,20 +404,6 @@ jobs: pr-number: ${{ inputs.pr-number }} target-branch: ${{ inputs.target-branch }} - run-metrics-tests: - # Skip metrics tests whilst runner is broken - if: false - # if: ${{ inputs.skip-test != 'yes' }} - needs: build-kata-static-tarball-amd64 - uses: ./.github/workflows/run-metrics.yaml - with: - registry: ghcr.io - repo: ${{ github.repository_owner }}/kata-deploy-ci - tag: ${{ inputs.tag }}-amd64 - commit-hash: ${{ inputs.commit-hash }} - pr-number: ${{ inputs.pr-number }} - target-branch: ${{ inputs.target-branch }} - run-basic-amd64-tests: if: ${{ inputs.skip-test != 'yes' }} needs: build-kata-static-tarball-amd64 diff --git a/.github/workflows/payload-after-push.yaml b/.github/workflows/payload-after-push.yaml index 1ef9d55d54..566bcf14bd 100644 --- a/.github/workflows/payload-after-push.yaml +++ b/.github/workflows/payload-after-push.yaml @@ -25,6 +25,7 @@ jobs: target-branch: ${{ github.ref_name }} secrets: QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }} + KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }} build-assets-arm64: permissions: diff --git a/.github/workflows/release-amd64.yaml b/.github/workflows/release-amd64.yaml index 88af72c9fc..129b8f3054 100644 --- a/.github/workflows/release-amd64.yaml +++ b/.github/workflows/release-amd64.yaml @@ -8,6 +8,8 @@ on: secrets: QUAY_DEPLOYER_PASSWORD: required: true + KBUILD_SIGN_PIN: + required: true permissions: contents: read @@ -20,6 +22,7 @@ jobs: stage: release secrets: QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }} + KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }} permissions: contents: read packages: write diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index c0723c8196..48171cac7b 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -35,6 +35,7 @@ jobs: target-arch: amd64 secrets: QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }} + KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }} build-and-push-assets-arm64: needs: release diff --git a/.github/workflows/run-k8s-tests-on-nvidia-gpu.yaml b/.github/workflows/run-k8s-tests-on-nvidia-gpu.yaml new file mode 100644 index 0000000000..c6e285a41b --- /dev/null +++ b/.github/workflows/run-k8s-tests-on-nvidia-gpu.yaml @@ -0,0 +1,89 @@ +name: CI | Run NVIDIA GPU kubernetes tests on arm64 +on: + workflow_call: + inputs: + registry: + required: true + type: string + repo: + required: true + type: string + tag: + required: true + type: string + pr-number: + required: true + type: string + commit-hash: + required: false + type: string + target-branch: + required: false + type: string + default: "" + secrets: + NGC_API_KEY: + required: true + +permissions: {} + +jobs: + run-nvidia-gpu-tests-on-amd64: + strategy: + fail-fast: false + matrix: + vmm: + - qemu-nvidia-gpu + k8s: + - kubeadm + runs-on: amd64-nvidia-a100 + env: + DOCKER_REGISTRY: ${{ inputs.registry }} + DOCKER_REPO: ${{ inputs.repo }} + DOCKER_TAG: ${{ inputs.tag }} + GH_PR_NUMBER: ${{ inputs.pr-number }} + KATA_HYPERVISOR: ${{ matrix.vmm }} + KUBERNETES: ${{ matrix.k8s }} + USING_NFD: "false" + K8S_TEST_HOST_TYPE: all + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + ref: ${{ inputs.commit-hash }} + fetch-depth: 0 + persist-credentials: false + + - name: Rebase atop of the latest target branch + run: | + ./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch" + env: + TARGET_BRANCH: ${{ inputs.target-branch }} + + - name: Deploy Kata + timeout-minutes: 10 + run: bash tests/integration/kubernetes/gha-run.sh deploy-kata + + - name: Install `bats` + run: bash tests/integration/kubernetes/gha-run.sh install-bats + + - name: Run tests + timeout-minutes: 30 + run: bash tests/integration/kubernetes/gha-run.sh run-nv-tests + env: + NGC_API_KEY: ${{ secrets.NGC_API_KEY }} + - name: Collect artifacts ${{ matrix.vmm }} + if: always() + run: bash tests/integration/kubernetes/gha-run.sh collect-artifacts + continue-on-error: true + + - name: Archive artifacts ${{ matrix.vmm }} + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 + with: + name: k8s-tests-${{ matrix.vmm }}-${{ matrix.k8s }}-${{ inputs.tag }} + path: /tmp/artifacts + retention-days: 1 + + - name: Delete kata-deploy + if: always() + timeout-minutes: 5 + run: bash tests/integration/kubernetes/gha-run.sh cleanup diff --git a/tests/integration/kubernetes/gha-run.sh b/tests/integration/kubernetes/gha-run.sh index cb0f760298..1efb75c1d6 100755 --- a/tests/integration/kubernetes/gha-run.sh +++ b/tests/integration/kubernetes/gha-run.sh @@ -289,7 +289,7 @@ function run_tests() { if [[ "${KATA_HYPERVISOR}" = "dragonball" ]] && [[ "${SNAPSHOTTER}" = "devmapper" ]]; then echo "Skipping tests for ${KATA_HYPERVISOR} using devmapper" else - bash run_kubernetes_tests.sh + bash "${K8STESTS}" fi popd } @@ -589,7 +589,14 @@ function main() { deploy-kata-zvsi) deploy_kata "zvsi" ;; deploy-snapshotter) deploy_snapshotter ;; report-tests) report_tests ;; - run-tests) run_tests ;; + run-tests) + K8STESTS=run_kubernetes_tests.sh + run_tests + ;; + run-nv-tests) + K8STESTS=run_kubernetes_nv_tests.sh + run_tests + ;; run-tests-kcli) run_tests "kcli" ;; collect-artifacts) collect_artifacts ;; cleanup) cleanup ;; diff --git a/tests/integration/kubernetes/k8s-nvidia-nim.bats b/tests/integration/kubernetes/k8s-nvidia-nim.bats new file mode 100644 index 0000000000..3181f68d2a --- /dev/null +++ b/tests/integration/kubernetes/k8s-nvidia-nim.bats @@ -0,0 +1,99 @@ +#!/usr/bin/env bats +# +# Copyright (c) 2025 NVIDIA Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +# shellcheck disable=SC2154 # BATS variables are not assigned in this file +load "${BATS_TEST_DIRNAME}/../../common.bash" +# shellcheck disable=SC1091 +load "${BATS_TEST_DIRNAME}/tests_common.sh" + +export POD_NAME_INSTRUCT="nvidia-nim-llama-3-1-8b-instruct" +export POD_NAME_EMBEDQA="nvidia-nim-llama-3-2-nv-embedqa-1b-v2" + +export POD_SECRET_INSTRUCT="ngc-secret-instruct" + +DOCKER_CONFIG_JSON=$( + echo -n "{\"auths\":{\"nvcr.io\":{\"username\":\"\$oauthtoken\",\"password\":\"${NGC_API_KEY}\",\"auth\":\"$(echo -n "\$oauthtoken:${NGC_API_KEY}" | base64 -w0)\"}}}" | + base64 -w0 +) +export DOCKER_CONFIG_JSON + +setup_file() { + dpkg -s jq >/dev/null 2>&1 || sudo apt -y install jq + + export PYENV_ROOT="${HOME}/.pyenv" + [[ -d ${PYENV_ROOT}/bin ]] && export PATH="${PYENV_ROOT}/bin:${PATH}" + eval "$(pyenv init - bash)" + + python3 -m venv "${HOME}"/.cicd/venv + + get_pod_config_dir + + pod_instruct_yaml_in="${pod_config_dir}/${POD_NAME_INSTRUCT}.yaml.in" + pod_instruct_yaml="${pod_config_dir}/${POD_NAME_INSTRUCT}.yaml" + + envsubst <"${pod_instruct_yaml_in}" >"${pod_instruct_yaml}" + + export POD_INSTRUCT_YAML="${pod_instruct_yaml}" +} + +@test "NVIDIA NIM Llama 3.1-8b Instruct" { + kubectl apply -f "${POD_INSTRUCT_YAML}" + kubectl wait --for=condition=Ready --timeout=500s pod "${POD_NAME_INSTRUCT}" + # shellcheck disable=SC2030 # Variable is shared via file between BATS tests + POD_IP_INSTRUCT=$(kubectl get pod "${POD_NAME_INSTRUCT}" -o jsonpath='{.status.podIP}') + [[ -n "${POD_IP_INSTRUCT}" ]] + + echo "POD_IP_INSTRUCT=${POD_IP_INSTRUCT}" >"${BATS_SUITE_TMPDIR}/env" + echo "# POD_IP_INSTRUCT=${POD_IP_INSTRUCT}" >&3 +} + +@test "List of models available for inference" { + # shellcheck disable=SC1091 # File is created by previous test + source "${BATS_SUITE_TMPDIR}/env" + # shellcheck disable=SC2031 # Variable is shared via file between BATS tests + [[ -n "${POD_IP_INSTRUCT}" ]] + + # shellcheck disable=SC2031 # Variable is shared via file between BATS tests + run curl -sX GET "http://${POD_IP_INSTRUCT}:8000/v1/models" + [[ "${status}" -eq 0 ]] + + # shellcheck disable=SC2030 # Variable is shared via file between BATS tests + MODEL_NAME=$(echo "${output}" | jq '.data[0].id' | tr -d '"') + export MODEL_NAME + [[ -n "${MODEL_NAME}" ]] + echo "MODEL_NAME=${MODEL_NAME}" >>"${BATS_SUITE_TMPDIR}/env" + echo "# MODEL_NAME=${MODEL_NAME}" >&3 + +} + +@test "Simple OpenAI completion request" { + # shellcheck disable=SC1091 # File is created by previous test + source "${BATS_SUITE_TMPDIR}/env" + # shellcheck disable=SC2031 # Variables are shared via file between BATS tests + [[ -n "${POD_IP_INSTRUCT}" ]] + # shellcheck disable=SC2031 # Variables are shared via file between BATS tests + [[ -n "${MODEL_NAME}" ]] + + QUESTION="What are Kata Containers?" + + # shellcheck disable=SC2031 # Variables are shared via file between BATS tests + run curl -sX 'POST' \ + "http://${POD_IP_INSTRUCT}:8000/v1/completions" \ + -H "accept: application/json" \ + -H "Content-Type: application/json" \ + -d "{\"model\": \"${MODEL_NAME}\", \"prompt\": \"${QUESTION}\", \"max_tokens\": 64}" + + ANSWER=$(echo "${output}" | jq '.choices[0].text') + [[ -n "${ANSWER}" ]] + + echo "# QUESTION: ${QUESTION}" >&3 + echo "# ANSWER: ${ANSWER}" >&3 +} + +teardown_file() { + kubectl delete -f "${POD_INSTRUCT_YAML}" +} diff --git a/tests/integration/kubernetes/run_kubernetes_nv_tests.sh b/tests/integration/kubernetes/run_kubernetes_nv_tests.sh new file mode 100644 index 0000000000..9b3681bdbc --- /dev/null +++ b/tests/integration/kubernetes/run_kubernetes_nv_tests.sh @@ -0,0 +1,42 @@ +#!/bin/bash +# +# Copyright (c) 2025 NVIDIA Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +set -e + +kubernetes_dir=$(dirname "$(readlink -f "$0")") +# shellcheck disable=SC1091 # import based on variable +source "${kubernetes_dir}/../../common.bash" + +cleanup() { + true +} + +trap cleanup EXIT + +# Setting to "yes" enables fail fast, stopping execution at the first failed test. +K8S_TEST_FAIL_FAST="${K8S_TEST_FAIL_FAST:-no}" +K8S_TEST_NV=("k8s-nvidia-nim.bats") + +ensure_yq + +info "Running tests with bats version: $(bats --version)" + +tests_fail=() +for K8S_TEST_ENTRY in "${K8S_TEST_NV[@]}" +do + K8S_TEST_ENTRY=$(echo "${K8S_TEST_ENTRY}" | tr -d '[:space:][:cntrl:]') + info "$(kubectl get pods --all-namespaces 2>&1)" + info "Executing ${K8S_TEST_ENTRY}" + if ! bats --show-output-of-passing-tests "${K8S_TEST_ENTRY}"; then + tests_fail+=("${K8S_TEST_ENTRY}") + [[ "${K8S_TEST_FAIL_FAST}" = "yes" ]] && break + fi +done + +[[ ${#tests_fail[@]} -ne 0 ]] && die "Tests FAILED from suites: ${tests_fail[*]}" + +info "All tests SUCCEEDED" diff --git a/tests/integration/kubernetes/runtimeclass_workloads/nvidia-nim-llama-3-1-8b-instruct.yaml.in b/tests/integration/kubernetes/runtimeclass_workloads/nvidia-nim-llama-3-1-8b-instruct.yaml.in new file mode 100644 index 0000000000..761d46158e --- /dev/null +++ b/tests/integration/kubernetes/runtimeclass_workloads/nvidia-nim-llama-3-1-8b-instruct.yaml.in @@ -0,0 +1,89 @@ + +# Copyright (c) 2025 NVIDIA Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# +--- +apiVersion: v1 +kind: Secret +metadata: + name: ngc-secret-instruct +type: kubernetes.io/dockerconfigjson +data: + .dockerconfigjson: ${DOCKER_CONFIG_JSON} +--- +apiVersion: v1 +kind: Pod +metadata: + name: ${POD_NAME_INSTRUCT} + labels: + app: ${POD_NAME_INSTRUCT} +spec: + restartPolicy: Never + runtimeClassName: kata-qemu-nvidia-gpu + imagePullSecrets: + - name: ngc-secret-instruct + securityContext: + runAsUser: 0 + runAsGroup: 0 + fsGroup: 0 + containers: + - name: ${POD_NAME_INSTRUCT} + image: nvcr.io/nim/meta/llama3-8b-instruct:1.0.0 + # Ports exposed by the container: + ports: + - containerPort: 8000 + name: http-openai + livenessProbe: + httpGet: + path: /v1/health/live + port: http-openai + initialDelaySeconds: 15 + periodSeconds: 10 + timeoutSeconds: 1 + successThreshold: 1 + failureThreshold: 3 + readinessProbe: + httpGet: + path: /v1/health/ready + port: http-openai + initialDelaySeconds: 15 + periodSeconds: 10 + timeoutSeconds: 1 + successThreshold: 1 + failureThreshold: 3 + startupProbe: + httpGet: + path: /v1/health/ready + port: http-openai + initialDelaySeconds: 40 + periodSeconds: 10 + timeoutSeconds: 1 + successThreshold: 1 + failureThreshold: 180 + # Environment variable for NGC_API_KEY. In production, use a Secret. + env: + - name: NGC_API_KEY + value: "${NGC_API_KEY}" + # GPU resource request/limit (for NVIDIA GPU) + resources: + requests: + cpu: "16" + memory: "32Gi" + limits: + nvidia.com/pgpu: "1" + cpu: "16" + memory: "32Gi" + # Mount the local .cache directory into the container + volumeMounts: + - name: nim-cache + mountPath: /opt/nim/.cache + + # Host path volume for the local .cache directory. + # Adjust 'path' to match your $LOCAL_NIM_CACHE location. + volumes: + - name: nim-cache + hostPath: + path: "/opr/nim/.cache" + type: DirectoryOrCreate + diff --git a/tests/integration/kubernetes/runtimeclass_workloads/nvidia-nim-llama-3-2-nv-embedqa-1b-v2.yaml.in b/tests/integration/kubernetes/runtimeclass_workloads/nvidia-nim-llama-3-2-nv-embedqa-1b-v2.yaml.in new file mode 100644 index 0000000000..890564da9f --- /dev/null +++ b/tests/integration/kubernetes/runtimeclass_workloads/nvidia-nim-llama-3-2-nv-embedqa-1b-v2.yaml.in @@ -0,0 +1,95 @@ +# Copyright (c) 2025 NVIDIA Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# +--- +apiVersion: v1 +kind: Secret +metadata: + name: ngc-secret-embedqa + namespace: nim-embedqa +type: kubernetes.io/dockerconfigjson +data: + .dockerconfigjson: ${DOCKER_CONFIG_JSON} +--- +apiVersion: v1 +kind: Pod +metadata: + name: nvidia-nim-llama-3-2-nv-embedqa-1b-v2 + namespace: nim-embedqa + labels: + app: nvidia-nim-llama-3-2-nv-embedqa-1b-v2 +spec: + restartPolicy: Always + runtimeClassName: "${RUNTIME_CLASS_NAME}" + serviceAccountName: default + imagePullSecrets: + - name: ngc-secret-embedqa + securityContext: + fsGroup: 0 + runAsGroup: 0 + runAsUser: 0 + containers: + - name: nvidia-nim-llama-3-2-nv-embedqa-1b-v2 + image: nvcr.io/nim/nvidia/llama-3.2-nv-embedqa-1b-v2:1.5.0 + imagePullPolicy: IfNotPresent + env: + - name: NIM_CACHE_PATH + value: "/opt/nim/.cache" + - name: NGC_API_KEY + value: "${NGC_API_KEY}" + - name: NIM_HTTP_API_PORT + value: "8000" + - name: NIM_JSONL_LOGGING + value: "1" + - name: NIM_LOG_LEVEL + value: "INFO" + ports: + - containerPort: 8000 + name: http + + livenessProbe: + httpGet: + path: /v1/health/live + port: 8000 + initialDelaySeconds: 15 + periodSeconds: 10 + timeoutSeconds: 1 + successThreshold: 1 + failureThreshold: 3 + + readinessProbe: + httpGet: + path: /v1/health/ready + port: 8000 + initialDelaySeconds: 15 + periodSeconds: 10 + timeoutSeconds: 1 + successThreshold: 1 + failureThreshold: 3 + + startupProbe: + httpGet: + path: /v1/health/ready + port: 8000 + initialDelaySeconds: 40 + periodSeconds: 10 + timeoutSeconds: 1 + successThreshold: 1 + failureThreshold: 180 + + resources: + limits: + nvidia.com/pgpu: 1 + cpu: "16" + memory: "32Gi" + + volumeMounts: + - name: nim-cache + mountPath: /opt/nim/.cache + + volumes: + - name: nim-cache + hostPath: + path: "/opr/nim/.cache" + type: DirectoryOrCreate