gpu: Add embeding service

For a simple RAG pipeline add a embeding service

Signed-off-by: Zvonko Kaiser <zkaiser@nvidia.com>
This commit is contained in:
Zvonko Kaiser
2025-05-09 14:57:47 +00:00
parent 65a9fe0063
commit 8be41a4e80
3 changed files with 243 additions and 18 deletions

View File

@@ -1,4 +1,3 @@
# Copyright (c) 2025 NVIDIA Corporation
#
# SPDX-License-Identifier: Apache-2.0
@@ -20,7 +19,7 @@ metadata:
app: ${POD_NAME_INSTRUCT}
spec:
restartPolicy: Never
runtimeClassName: kata-qemu-nvidia-gpu
runtimeClassName: "${RUNTIME_CLASS_NAME}"
imagePullSecrets:
- name: ngc-secret-instruct
securityContext:
@@ -84,6 +83,6 @@ spec:
volumes:
- name: nim-cache
hostPath:
path: "/opr/nim/.cache"
path: "${LOCAL_NIM_CACHE}"
type: DirectoryOrCreate

View File

@@ -7,7 +7,6 @@ apiVersion: v1
kind: Secret
metadata:
name: ngc-secret-embedqa
namespace: nim-embedqa
type: kubernetes.io/dockerconfigjson
data:
.dockerconfigjson: ${DOCKER_CONFIG_JSON}
@@ -16,7 +15,6 @@ apiVersion: v1
kind: Pod
metadata:
name: nvidia-nim-llama-3-2-nv-embedqa-1b-v2
namespace: nim-embedqa
labels:
app: nvidia-nim-llama-3-2-nv-embedqa-1b-v2
spec:
@@ -91,5 +89,5 @@ spec:
volumes:
- name: nim-cache
hostPath:
path: "/opr/nim/.cache"
path: "${LOCAL_NIM_CACHE}"
type: DirectoryOrCreate