mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-08-31 16:36:38 +00:00
Merge pull request #11236 from kata-containers/amd64-nvidia-gpu-cicd
gpu: AMD64 NVIDIA GPU CI/CD
This commit is contained in:
1
.github/actionlint.yaml
vendored
1
.github/actionlint.yaml
vendored
@@ -23,3 +23,4 @@ self-hosted-runner:
|
||||
- s390x
|
||||
- s390x-large
|
||||
- tdx
|
||||
- amd64-nvidia-a100
|
||||
|
@@ -23,6 +23,8 @@ on:
|
||||
secrets:
|
||||
QUAY_DEPLOYER_PASSWORD:
|
||||
required: false
|
||||
KBUILD_SIGN_PIN:
|
||||
required: true
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
@@ -108,6 +110,7 @@ jobs:
|
||||
ARTEFACT_REGISTRY_PASSWORD: ${{ secrets.GITHUB_TOKEN }}
|
||||
TARGET_BRANCH: ${{ inputs.target-branch }}
|
||||
RELEASE: ${{ inputs.stage == 'release' && 'yes' || 'no' }}
|
||||
KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }}
|
||||
|
||||
- name: Parse OCI image name and digest
|
||||
id: parse-oci-segments
|
||||
@@ -215,6 +218,7 @@ jobs:
|
||||
ARTEFACT_REGISTRY_PASSWORD: ${{ secrets.GITHUB_TOKEN }}
|
||||
TARGET_BRANCH: ${{ inputs.target-branch }}
|
||||
RELEASE: ${{ inputs.stage == 'release' && 'yes' || 'no' }}
|
||||
KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }}
|
||||
|
||||
- name: store-artifact ${{ matrix.asset }}
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
|
1
.github/workflows/ci-coco-stability.yaml
vendored
1
.github/workflows/ci-coco-stability.yaml
vendored
@@ -31,3 +31,4 @@ jobs:
|
||||
AZ_TENANT_ID: ${{ secrets.AZ_TENANT_ID }}
|
||||
AZ_SUBSCRIPTION_ID: ${{ secrets.AZ_SUBSCRIPTION_ID }}
|
||||
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
|
||||
KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }}
|
||||
|
2
.github/workflows/ci-devel.yaml
vendored
2
.github/workflows/ci-devel.yaml
vendored
@@ -27,6 +27,8 @@ jobs:
|
||||
CI_HKD_PATH: ${{ secrets.CI_HKD_PATH }}
|
||||
ITA_KEY: ${{ secrets.ITA_KEY }}
|
||||
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
|
||||
NGC_API_KEY: ${{ secrets.NGC_API_KEY }}
|
||||
KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }}
|
||||
|
||||
build-checks:
|
||||
uses: ./.github/workflows/build-checks.yaml
|
||||
|
2
.github/workflows/ci-nightly.yaml
vendored
2
.github/workflows/ci-nightly.yaml
vendored
@@ -31,3 +31,5 @@ jobs:
|
||||
CI_HKD_PATH: ${{ secrets.CI_HKD_PATH }}
|
||||
ITA_KEY: ${{ secrets.ITA_KEY }}
|
||||
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
|
||||
NGC_API_KEY: ${{ secrets.NGC_API_KEY }}
|
||||
KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }}
|
||||
|
2
.github/workflows/ci-on-push.yaml
vendored
2
.github/workflows/ci-on-push.yaml
vendored
@@ -52,3 +52,5 @@ jobs:
|
||||
CI_HKD_PATH: ${{ secrets.CI_HKD_PATH }}
|
||||
ITA_KEY: ${{ secrets.ITA_KEY }}
|
||||
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
|
||||
NGC_API_KEY: ${{ secrets.NGC_API_KEY }}
|
||||
KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }}
|
||||
|
4
.github/workflows/ci-weekly.yaml
vendored
4
.github/workflows/ci-weekly.yaml
vendored
@@ -27,6 +27,8 @@ on:
|
||||
required: true
|
||||
QUAY_DEPLOYER_PASSWORD:
|
||||
required: true
|
||||
KBUILD_SIGN_PIN:
|
||||
required: true
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
@@ -43,6 +45,8 @@ jobs:
|
||||
tarball-suffix: -${{ inputs.tag }}
|
||||
commit-hash: ${{ inputs.commit-hash }}
|
||||
target-branch: ${{ inputs.target-branch }}
|
||||
secrets:
|
||||
KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }}
|
||||
|
||||
publish-kata-deploy-payload-amd64:
|
||||
needs: build-kata-static-tarball-amd64
|
||||
|
35
.github/workflows/ci.yaml
vendored
35
.github/workflows/ci.yaml
vendored
@@ -35,6 +35,10 @@ on:
|
||||
required: true
|
||||
QUAY_DEPLOYER_PASSWORD:
|
||||
required: true
|
||||
NGC_API_KEY:
|
||||
required: true
|
||||
KBUILD_SIGN_PIN:
|
||||
required: true
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
@@ -52,6 +56,8 @@ jobs:
|
||||
tarball-suffix: -${{ inputs.tag }}
|
||||
commit-hash: ${{ inputs.commit-hash }}
|
||||
target-branch: ${{ inputs.target-branch }}
|
||||
secrets:
|
||||
KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }}
|
||||
|
||||
publish-kata-deploy-payload-amd64:
|
||||
needs: build-kata-static-tarball-amd64
|
||||
@@ -323,6 +329,21 @@ jobs:
|
||||
pr-number: ${{ inputs.pr-number }}
|
||||
target-branch: ${{ inputs.target-branch }}
|
||||
|
||||
run-k8s-tests-on-nvidia-gpu:
|
||||
if: ${{ inputs.skip-test != 'yes' }}
|
||||
needs: publish-kata-deploy-payload-amd64
|
||||
uses: ./.github/workflows/run-k8s-tests-on-nvidia-gpu.yaml
|
||||
with:
|
||||
registry: ghcr.io
|
||||
repo: ${{ github.repository_owner }}/kata-deploy-ci
|
||||
tag: ${{ inputs.tag }}-amd64
|
||||
commit-hash: ${{ inputs.commit-hash }}
|
||||
pr-number: ${{ inputs.pr-number }}
|
||||
target-branch: ${{ inputs.target-branch }}
|
||||
secrets:
|
||||
NGC_API_KEY: ${{ secrets.NGC_API_KEY }}
|
||||
|
||||
|
||||
run-kata-coco-tests:
|
||||
if: ${{ inputs.skip-test != 'yes' }}
|
||||
needs:
|
||||
@@ -383,20 +404,6 @@ jobs:
|
||||
pr-number: ${{ inputs.pr-number }}
|
||||
target-branch: ${{ inputs.target-branch }}
|
||||
|
||||
run-metrics-tests:
|
||||
# Skip metrics tests whilst runner is broken
|
||||
if: false
|
||||
# if: ${{ inputs.skip-test != 'yes' }}
|
||||
needs: build-kata-static-tarball-amd64
|
||||
uses: ./.github/workflows/run-metrics.yaml
|
||||
with:
|
||||
registry: ghcr.io
|
||||
repo: ${{ github.repository_owner }}/kata-deploy-ci
|
||||
tag: ${{ inputs.tag }}-amd64
|
||||
commit-hash: ${{ inputs.commit-hash }}
|
||||
pr-number: ${{ inputs.pr-number }}
|
||||
target-branch: ${{ inputs.target-branch }}
|
||||
|
||||
run-basic-amd64-tests:
|
||||
if: ${{ inputs.skip-test != 'yes' }}
|
||||
needs: build-kata-static-tarball-amd64
|
||||
|
1
.github/workflows/payload-after-push.yaml
vendored
1
.github/workflows/payload-after-push.yaml
vendored
@@ -25,6 +25,7 @@ jobs:
|
||||
target-branch: ${{ github.ref_name }}
|
||||
secrets:
|
||||
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
|
||||
KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }}
|
||||
|
||||
build-assets-arm64:
|
||||
permissions:
|
||||
|
3
.github/workflows/release-amd64.yaml
vendored
3
.github/workflows/release-amd64.yaml
vendored
@@ -8,6 +8,8 @@ on:
|
||||
secrets:
|
||||
QUAY_DEPLOYER_PASSWORD:
|
||||
required: true
|
||||
KBUILD_SIGN_PIN:
|
||||
required: true
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
@@ -20,6 +22,7 @@ jobs:
|
||||
stage: release
|
||||
secrets:
|
||||
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
|
||||
KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }}
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
|
1
.github/workflows/release.yaml
vendored
1
.github/workflows/release.yaml
vendored
@@ -35,6 +35,7 @@ jobs:
|
||||
target-arch: amd64
|
||||
secrets:
|
||||
QUAY_DEPLOYER_PASSWORD: ${{ secrets.QUAY_DEPLOYER_PASSWORD }}
|
||||
KBUILD_SIGN_PIN: ${{ secrets.KBUILD_SIGN_PIN }}
|
||||
|
||||
build-and-push-assets-arm64:
|
||||
needs: release
|
||||
|
89
.github/workflows/run-k8s-tests-on-nvidia-gpu.yaml
vendored
Normal file
89
.github/workflows/run-k8s-tests-on-nvidia-gpu.yaml
vendored
Normal file
@@ -0,0 +1,89 @@
|
||||
name: CI | Run NVIDIA GPU kubernetes tests on arm64
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
registry:
|
||||
required: true
|
||||
type: string
|
||||
repo:
|
||||
required: true
|
||||
type: string
|
||||
tag:
|
||||
required: true
|
||||
type: string
|
||||
pr-number:
|
||||
required: true
|
||||
type: string
|
||||
commit-hash:
|
||||
required: false
|
||||
type: string
|
||||
target-branch:
|
||||
required: false
|
||||
type: string
|
||||
default: ""
|
||||
secrets:
|
||||
NGC_API_KEY:
|
||||
required: true
|
||||
|
||||
permissions: {}
|
||||
|
||||
jobs:
|
||||
run-nvidia-gpu-tests-on-amd64:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
vmm:
|
||||
- qemu-nvidia-gpu
|
||||
k8s:
|
||||
- kubeadm
|
||||
runs-on: amd64-nvidia-a100
|
||||
env:
|
||||
DOCKER_REGISTRY: ${{ inputs.registry }}
|
||||
DOCKER_REPO: ${{ inputs.repo }}
|
||||
DOCKER_TAG: ${{ inputs.tag }}
|
||||
GH_PR_NUMBER: ${{ inputs.pr-number }}
|
||||
KATA_HYPERVISOR: ${{ matrix.vmm }}
|
||||
KUBERNETES: ${{ matrix.k8s }}
|
||||
USING_NFD: "false"
|
||||
K8S_TEST_HOST_TYPE: all
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
ref: ${{ inputs.commit-hash }}
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
|
||||
- name: Rebase atop of the latest target branch
|
||||
run: |
|
||||
./tests/git-helper.sh "rebase-atop-of-the-latest-target-branch"
|
||||
env:
|
||||
TARGET_BRANCH: ${{ inputs.target-branch }}
|
||||
|
||||
- name: Deploy Kata
|
||||
timeout-minutes: 10
|
||||
run: bash tests/integration/kubernetes/gha-run.sh deploy-kata
|
||||
|
||||
- name: Install `bats`
|
||||
run: bash tests/integration/kubernetes/gha-run.sh install-bats
|
||||
|
||||
- name: Run tests
|
||||
timeout-minutes: 30
|
||||
run: bash tests/integration/kubernetes/gha-run.sh run-nv-tests
|
||||
env:
|
||||
NGC_API_KEY: ${{ secrets.NGC_API_KEY }}
|
||||
- name: Collect artifacts ${{ matrix.vmm }}
|
||||
if: always()
|
||||
run: bash tests/integration/kubernetes/gha-run.sh collect-artifacts
|
||||
continue-on-error: true
|
||||
|
||||
- name: Archive artifacts ${{ matrix.vmm }}
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
with:
|
||||
name: k8s-tests-${{ matrix.vmm }}-${{ matrix.k8s }}-${{ inputs.tag }}
|
||||
path: /tmp/artifacts
|
||||
retention-days: 1
|
||||
|
||||
- name: Delete kata-deploy
|
||||
if: always()
|
||||
timeout-minutes: 5
|
||||
run: bash tests/integration/kubernetes/gha-run.sh cleanup
|
@@ -289,7 +289,7 @@ function run_tests() {
|
||||
if [[ "${KATA_HYPERVISOR}" = "dragonball" ]] && [[ "${SNAPSHOTTER}" = "devmapper" ]]; then
|
||||
echo "Skipping tests for ${KATA_HYPERVISOR} using devmapper"
|
||||
else
|
||||
bash run_kubernetes_tests.sh
|
||||
bash "${K8STESTS}"
|
||||
fi
|
||||
popd
|
||||
}
|
||||
@@ -589,7 +589,14 @@ function main() {
|
||||
deploy-kata-zvsi) deploy_kata "zvsi" ;;
|
||||
deploy-snapshotter) deploy_snapshotter ;;
|
||||
report-tests) report_tests ;;
|
||||
run-tests) run_tests ;;
|
||||
run-tests)
|
||||
K8STESTS=run_kubernetes_tests.sh
|
||||
run_tests
|
||||
;;
|
||||
run-nv-tests)
|
||||
K8STESTS=run_kubernetes_nv_tests.sh
|
||||
run_tests
|
||||
;;
|
||||
run-tests-kcli) run_tests "kcli" ;;
|
||||
collect-artifacts) collect_artifacts ;;
|
||||
cleanup) cleanup ;;
|
||||
|
99
tests/integration/kubernetes/k8s-nvidia-nim.bats
Normal file
99
tests/integration/kubernetes/k8s-nvidia-nim.bats
Normal file
@@ -0,0 +1,99 @@
|
||||
#!/usr/bin/env bats
|
||||
#
|
||||
# Copyright (c) 2025 NVIDIA Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
# shellcheck disable=SC2154 # BATS variables are not assigned in this file
|
||||
load "${BATS_TEST_DIRNAME}/../../common.bash"
|
||||
# shellcheck disable=SC1091
|
||||
load "${BATS_TEST_DIRNAME}/tests_common.sh"
|
||||
|
||||
export POD_NAME_INSTRUCT="nvidia-nim-llama-3-1-8b-instruct"
|
||||
export POD_NAME_EMBEDQA="nvidia-nim-llama-3-2-nv-embedqa-1b-v2"
|
||||
|
||||
export POD_SECRET_INSTRUCT="ngc-secret-instruct"
|
||||
|
||||
DOCKER_CONFIG_JSON=$(
|
||||
echo -n "{\"auths\":{\"nvcr.io\":{\"username\":\"\$oauthtoken\",\"password\":\"${NGC_API_KEY}\",\"auth\":\"$(echo -n "\$oauthtoken:${NGC_API_KEY}" | base64 -w0)\"}}}" |
|
||||
base64 -w0
|
||||
)
|
||||
export DOCKER_CONFIG_JSON
|
||||
|
||||
setup_file() {
|
||||
dpkg -s jq >/dev/null 2>&1 || sudo apt -y install jq
|
||||
|
||||
export PYENV_ROOT="${HOME}/.pyenv"
|
||||
[[ -d ${PYENV_ROOT}/bin ]] && export PATH="${PYENV_ROOT}/bin:${PATH}"
|
||||
eval "$(pyenv init - bash)"
|
||||
|
||||
python3 -m venv "${HOME}"/.cicd/venv
|
||||
|
||||
get_pod_config_dir
|
||||
|
||||
pod_instruct_yaml_in="${pod_config_dir}/${POD_NAME_INSTRUCT}.yaml.in"
|
||||
pod_instruct_yaml="${pod_config_dir}/${POD_NAME_INSTRUCT}.yaml"
|
||||
|
||||
envsubst <"${pod_instruct_yaml_in}" >"${pod_instruct_yaml}"
|
||||
|
||||
export POD_INSTRUCT_YAML="${pod_instruct_yaml}"
|
||||
}
|
||||
|
||||
@test "NVIDIA NIM Llama 3.1-8b Instruct" {
|
||||
kubectl apply -f "${POD_INSTRUCT_YAML}"
|
||||
kubectl wait --for=condition=Ready --timeout=500s pod "${POD_NAME_INSTRUCT}"
|
||||
# shellcheck disable=SC2030 # Variable is shared via file between BATS tests
|
||||
POD_IP_INSTRUCT=$(kubectl get pod "${POD_NAME_INSTRUCT}" -o jsonpath='{.status.podIP}')
|
||||
[[ -n "${POD_IP_INSTRUCT}" ]]
|
||||
|
||||
echo "POD_IP_INSTRUCT=${POD_IP_INSTRUCT}" >"${BATS_SUITE_TMPDIR}/env"
|
||||
echo "# POD_IP_INSTRUCT=${POD_IP_INSTRUCT}" >&3
|
||||
}
|
||||
|
||||
@test "List of models available for inference" {
|
||||
# shellcheck disable=SC1091 # File is created by previous test
|
||||
source "${BATS_SUITE_TMPDIR}/env"
|
||||
# shellcheck disable=SC2031 # Variable is shared via file between BATS tests
|
||||
[[ -n "${POD_IP_INSTRUCT}" ]]
|
||||
|
||||
# shellcheck disable=SC2031 # Variable is shared via file between BATS tests
|
||||
run curl -sX GET "http://${POD_IP_INSTRUCT}:8000/v1/models"
|
||||
[[ "${status}" -eq 0 ]]
|
||||
|
||||
# shellcheck disable=SC2030 # Variable is shared via file between BATS tests
|
||||
MODEL_NAME=$(echo "${output}" | jq '.data[0].id' | tr -d '"')
|
||||
export MODEL_NAME
|
||||
[[ -n "${MODEL_NAME}" ]]
|
||||
echo "MODEL_NAME=${MODEL_NAME}" >>"${BATS_SUITE_TMPDIR}/env"
|
||||
echo "# MODEL_NAME=${MODEL_NAME}" >&3
|
||||
|
||||
}
|
||||
|
||||
@test "Simple OpenAI completion request" {
|
||||
# shellcheck disable=SC1091 # File is created by previous test
|
||||
source "${BATS_SUITE_TMPDIR}/env"
|
||||
# shellcheck disable=SC2031 # Variables are shared via file between BATS tests
|
||||
[[ -n "${POD_IP_INSTRUCT}" ]]
|
||||
# shellcheck disable=SC2031 # Variables are shared via file between BATS tests
|
||||
[[ -n "${MODEL_NAME}" ]]
|
||||
|
||||
QUESTION="What are Kata Containers?"
|
||||
|
||||
# shellcheck disable=SC2031 # Variables are shared via file between BATS tests
|
||||
run curl -sX 'POST' \
|
||||
"http://${POD_IP_INSTRUCT}:8000/v1/completions" \
|
||||
-H "accept: application/json" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"model\": \"${MODEL_NAME}\", \"prompt\": \"${QUESTION}\", \"max_tokens\": 64}"
|
||||
|
||||
ANSWER=$(echo "${output}" | jq '.choices[0].text')
|
||||
[[ -n "${ANSWER}" ]]
|
||||
|
||||
echo "# QUESTION: ${QUESTION}" >&3
|
||||
echo "# ANSWER: ${ANSWER}" >&3
|
||||
}
|
||||
|
||||
teardown_file() {
|
||||
kubectl delete -f "${POD_INSTRUCT_YAML}"
|
||||
}
|
42
tests/integration/kubernetes/run_kubernetes_nv_tests.sh
Normal file
42
tests/integration/kubernetes/run_kubernetes_nv_tests.sh
Normal file
@@ -0,0 +1,42 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright (c) 2025 NVIDIA Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
set -e
|
||||
|
||||
kubernetes_dir=$(dirname "$(readlink -f "$0")")
|
||||
# shellcheck disable=SC1091 # import based on variable
|
||||
source "${kubernetes_dir}/../../common.bash"
|
||||
|
||||
cleanup() {
|
||||
true
|
||||
}
|
||||
|
||||
trap cleanup EXIT
|
||||
|
||||
# Setting to "yes" enables fail fast, stopping execution at the first failed test.
|
||||
K8S_TEST_FAIL_FAST="${K8S_TEST_FAIL_FAST:-no}"
|
||||
K8S_TEST_NV=("k8s-nvidia-nim.bats")
|
||||
|
||||
ensure_yq
|
||||
|
||||
info "Running tests with bats version: $(bats --version)"
|
||||
|
||||
tests_fail=()
|
||||
for K8S_TEST_ENTRY in "${K8S_TEST_NV[@]}"
|
||||
do
|
||||
K8S_TEST_ENTRY=$(echo "${K8S_TEST_ENTRY}" | tr -d '[:space:][:cntrl:]')
|
||||
info "$(kubectl get pods --all-namespaces 2>&1)"
|
||||
info "Executing ${K8S_TEST_ENTRY}"
|
||||
if ! bats --show-output-of-passing-tests "${K8S_TEST_ENTRY}"; then
|
||||
tests_fail+=("${K8S_TEST_ENTRY}")
|
||||
[[ "${K8S_TEST_FAIL_FAST}" = "yes" ]] && break
|
||||
fi
|
||||
done
|
||||
|
||||
[[ ${#tests_fail[@]} -ne 0 ]] && die "Tests FAILED from suites: ${tests_fail[*]}"
|
||||
|
||||
info "All tests SUCCEEDED"
|
@@ -0,0 +1,89 @@
|
||||
|
||||
# Copyright (c) 2025 NVIDIA Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: ngc-secret-instruct
|
||||
type: kubernetes.io/dockerconfigjson
|
||||
data:
|
||||
.dockerconfigjson: ${DOCKER_CONFIG_JSON}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: ${POD_NAME_INSTRUCT}
|
||||
labels:
|
||||
app: ${POD_NAME_INSTRUCT}
|
||||
spec:
|
||||
restartPolicy: Never
|
||||
runtimeClassName: kata-qemu-nvidia-gpu
|
||||
imagePullSecrets:
|
||||
- name: ngc-secret-instruct
|
||||
securityContext:
|
||||
runAsUser: 0
|
||||
runAsGroup: 0
|
||||
fsGroup: 0
|
||||
containers:
|
||||
- name: ${POD_NAME_INSTRUCT}
|
||||
image: nvcr.io/nim/meta/llama3-8b-instruct:1.0.0
|
||||
# Ports exposed by the container:
|
||||
ports:
|
||||
- containerPort: 8000
|
||||
name: http-openai
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /v1/health/live
|
||||
port: http-openai
|
||||
initialDelaySeconds: 15
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 1
|
||||
successThreshold: 1
|
||||
failureThreshold: 3
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /v1/health/ready
|
||||
port: http-openai
|
||||
initialDelaySeconds: 15
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 1
|
||||
successThreshold: 1
|
||||
failureThreshold: 3
|
||||
startupProbe:
|
||||
httpGet:
|
||||
path: /v1/health/ready
|
||||
port: http-openai
|
||||
initialDelaySeconds: 40
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 1
|
||||
successThreshold: 1
|
||||
failureThreshold: 180
|
||||
# Environment variable for NGC_API_KEY. In production, use a Secret.
|
||||
env:
|
||||
- name: NGC_API_KEY
|
||||
value: "${NGC_API_KEY}"
|
||||
# GPU resource request/limit (for NVIDIA GPU)
|
||||
resources:
|
||||
requests:
|
||||
cpu: "16"
|
||||
memory: "32Gi"
|
||||
limits:
|
||||
nvidia.com/pgpu: "1"
|
||||
cpu: "16"
|
||||
memory: "32Gi"
|
||||
# Mount the local .cache directory into the container
|
||||
volumeMounts:
|
||||
- name: nim-cache
|
||||
mountPath: /opt/nim/.cache
|
||||
|
||||
# Host path volume for the local .cache directory.
|
||||
# Adjust 'path' to match your $LOCAL_NIM_CACHE location.
|
||||
volumes:
|
||||
- name: nim-cache
|
||||
hostPath:
|
||||
path: "/opr/nim/.cache"
|
||||
type: DirectoryOrCreate
|
||||
|
@@ -0,0 +1,95 @@
|
||||
# Copyright (c) 2025 NVIDIA Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: ngc-secret-embedqa
|
||||
namespace: nim-embedqa
|
||||
type: kubernetes.io/dockerconfigjson
|
||||
data:
|
||||
.dockerconfigjson: ${DOCKER_CONFIG_JSON}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: nvidia-nim-llama-3-2-nv-embedqa-1b-v2
|
||||
namespace: nim-embedqa
|
||||
labels:
|
||||
app: nvidia-nim-llama-3-2-nv-embedqa-1b-v2
|
||||
spec:
|
||||
restartPolicy: Always
|
||||
runtimeClassName: "${RUNTIME_CLASS_NAME}"
|
||||
serviceAccountName: default
|
||||
imagePullSecrets:
|
||||
- name: ngc-secret-embedqa
|
||||
securityContext:
|
||||
fsGroup: 0
|
||||
runAsGroup: 0
|
||||
runAsUser: 0
|
||||
containers:
|
||||
- name: nvidia-nim-llama-3-2-nv-embedqa-1b-v2
|
||||
image: nvcr.io/nim/nvidia/llama-3.2-nv-embedqa-1b-v2:1.5.0
|
||||
imagePullPolicy: IfNotPresent
|
||||
env:
|
||||
- name: NIM_CACHE_PATH
|
||||
value: "/opt/nim/.cache"
|
||||
- name: NGC_API_KEY
|
||||
value: "${NGC_API_KEY}"
|
||||
- name: NIM_HTTP_API_PORT
|
||||
value: "8000"
|
||||
- name: NIM_JSONL_LOGGING
|
||||
value: "1"
|
||||
- name: NIM_LOG_LEVEL
|
||||
value: "INFO"
|
||||
ports:
|
||||
- containerPort: 8000
|
||||
name: http
|
||||
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /v1/health/live
|
||||
port: 8000
|
||||
initialDelaySeconds: 15
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 1
|
||||
successThreshold: 1
|
||||
failureThreshold: 3
|
||||
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /v1/health/ready
|
||||
port: 8000
|
||||
initialDelaySeconds: 15
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 1
|
||||
successThreshold: 1
|
||||
failureThreshold: 3
|
||||
|
||||
startupProbe:
|
||||
httpGet:
|
||||
path: /v1/health/ready
|
||||
port: 8000
|
||||
initialDelaySeconds: 40
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 1
|
||||
successThreshold: 1
|
||||
failureThreshold: 180
|
||||
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/pgpu: 1
|
||||
cpu: "16"
|
||||
memory: "32Gi"
|
||||
|
||||
volumeMounts:
|
||||
- name: nim-cache
|
||||
mountPath: /opt/nim/.cache
|
||||
|
||||
volumes:
|
||||
- name: nim-cache
|
||||
hostPath:
|
||||
path: "/opr/nim/.cache"
|
||||
type: DirectoryOrCreate
|
Reference in New Issue
Block a user