mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-08-14 06:06:12 +00:00
Merge a06727a44d
into 9379a18c8a
This commit is contained in:
commit
a023838ff6
@ -37,6 +37,7 @@ jobs:
|
||||
k8s:
|
||||
- kubeadm
|
||||
runs-on: amd64-nvidia-a100
|
||||
environment: nvidia
|
||||
env:
|
||||
DOCKER_REGISTRY: ${{ inputs.registry }}
|
||||
DOCKER_REPO: ${{ inputs.repo }}
|
||||
|
@ -10,10 +10,13 @@ load "${BATS_TEST_DIRNAME}/../../common.bash"
|
||||
# shellcheck disable=SC1091
|
||||
load "${BATS_TEST_DIRNAME}/tests_common.sh"
|
||||
|
||||
RUNTIME_CLASS_NAME=${RUNTIME_CLASS_NAME:-kata-qemu-nvidia-gpu}
|
||||
export RUNTIME_CLASS_NAME
|
||||
|
||||
export POD_NAME_INSTRUCT="nvidia-nim-llama-3-1-8b-instruct"
|
||||
export POD_NAME_EMBEDQA="nvidia-nim-llama-3-2-nv-embedqa-1b-v2"
|
||||
|
||||
export POD_SECRET_INSTRUCT="ngc-secret-instruct"
|
||||
export LOCAL_NIM_CACHE="/opt/nim/.cache"
|
||||
|
||||
DOCKER_CONFIG_JSON=$(
|
||||
echo -n "{\"auths\":{\"nvcr.io\":{\"username\":\"\$oauthtoken\",\"password\":\"${NGC_API_KEY}\",\"auth\":\"$(echo -n "\$oauthtoken:${NGC_API_KEY}" | base64 -w0)\"}}}" |
|
||||
@ -28,6 +31,7 @@ setup_file() {
|
||||
[[ -d ${PYENV_ROOT}/bin ]] && export PATH="${PYENV_ROOT}/bin:${PATH}"
|
||||
eval "$(pyenv init - bash)"
|
||||
|
||||
# shellcheck disable=SC1091 # Virtual environment will be created during test execution
|
||||
python3 -m venv "${HOME}"/.cicd/venv
|
||||
|
||||
get_pod_config_dir
|
||||
@ -35,20 +39,36 @@ setup_file() {
|
||||
pod_instruct_yaml_in="${pod_config_dir}/${POD_NAME_INSTRUCT}.yaml.in"
|
||||
pod_instruct_yaml="${pod_config_dir}/${POD_NAME_INSTRUCT}.yaml"
|
||||
|
||||
pod_embedqa_yaml_in="${pod_config_dir}/${POD_NAME_EMBEDQA}.yaml.in"
|
||||
pod_embedqa_yaml="${pod_config_dir}/${POD_NAME_EMBEDQA}.yaml"
|
||||
|
||||
envsubst <"${pod_instruct_yaml_in}" >"${pod_instruct_yaml}"
|
||||
envsubst <"${pod_embedqa_yaml_in}" >"${pod_embedqa_yaml}"
|
||||
|
||||
export POD_INSTRUCT_YAML="${pod_instruct_yaml}"
|
||||
export POD_EMBEDQA_YAML="${pod_embedqa_yaml}"
|
||||
}
|
||||
|
||||
@test "NVIDIA NIM Llama 3.1-8b Instruct" {
|
||||
@test "NVIDIA NIM Llama 3.1-8b Instruct & NVIDIA NIM Llama 3.2 EmbedQA-1b-v2" {
|
||||
kubectl apply -f "${POD_INSTRUCT_YAML}"
|
||||
kubectl apply -f "${POD_EMBEDQA_YAML}"
|
||||
|
||||
kubectl wait --for=condition=Ready --timeout=500s pod "${POD_NAME_INSTRUCT}"
|
||||
kubectl wait --for=condition=Ready --timeout=500s pod "${POD_NAME_EMBEDQA}"
|
||||
|
||||
# shellcheck disable=SC2030 # Variable is shared via file between BATS tests
|
||||
POD_IP_INSTRUCT=$(kubectl get pod "${POD_NAME_INSTRUCT}" -o jsonpath='{.status.podIP}')
|
||||
[[ -n "${POD_IP_INSTRUCT}" ]]
|
||||
|
||||
# shellcheck disable=SC2030 # Variable is shared via file between BATS tests
|
||||
POD_IP_EMBEDQA=$(kubectl get pod "${POD_NAME_EMBEDQA}" -o jsonpath='{.status.podIP}')
|
||||
[[ -n "${POD_IP_EMBEDQA}" ]]
|
||||
|
||||
echo "POD_IP_INSTRUCT=${POD_IP_INSTRUCT}" >"${BATS_SUITE_TMPDIR}/env"
|
||||
echo "# POD_IP_INSTRUCT=${POD_IP_INSTRUCT}" >&3
|
||||
|
||||
echo "POD_IP_EMBEDQA=${POD_IP_EMBEDQA}" >>"${BATS_SUITE_TMPDIR}/env"
|
||||
echo "# POD_IP_EMBEDQA=${POD_IP_EMBEDQA}" >&3
|
||||
}
|
||||
|
||||
@test "List of models available for inference" {
|
||||
@ -94,6 +114,209 @@ setup_file() {
|
||||
echo "# ANSWER: ${ANSWER}" >&3
|
||||
}
|
||||
|
||||
@test "Setup the LangChain flow" {
|
||||
# shellcheck disable=SC1091 # Sourcing virtual environment activation script
|
||||
source "${HOME}"/.cicd/venv/bin/activate
|
||||
|
||||
pip install --upgrade pip
|
||||
[[ "$(pip show langchain 2>/dev/null | awk '/^Version:/{print $2}')" = "0.2.5" ]] || pip install langchain==0.2.5
|
||||
[[ "$(pip show langchain-nvidia-ai-endpoints 2>/dev/null | awk '/^Version:/{print $2}')" = "0.1.2" ]] || pip install langchain-nvidia-ai-endpoints==0.1.2
|
||||
[[ "$(pip show faiss-gpu 2>/dev/null | awk '/^Version:/{print $2}')" = "1.7.2" ]] || pip install faiss-gpu==1.7.2
|
||||
[[ "$(pip show langchain-community 2>/dev/null | awk '/^Version:/{print $2}')" = "0.2.5" ]] || pip install langchain-community==0.2.5
|
||||
[[ "$(pip show beautifulsoup4 2>/dev/null | awk '/^Version:/{print $2}')" = "4.13.4" ]] || pip install beautifulsoup4==4.13.4
|
||||
}
|
||||
|
||||
@test "LangChain NVIDIA AI Endpoints" {
|
||||
# shellcheck disable=SC1091 # File is created by previous test
|
||||
source "${BATS_SUITE_TMPDIR}/env"
|
||||
# shellcheck disable=SC2031 # Variables are shared via file between BATS tests
|
||||
[[ -n "${POD_IP_INSTRUCT}" ]]
|
||||
# shellcheck disable=SC2031 # Variables are shared via file between BATS tests
|
||||
[[ -n "${MODEL_NAME}" ]]
|
||||
|
||||
QUESTION="What is the capital of France?"
|
||||
ANSWER="The capital of France is Paris."
|
||||
|
||||
# shellcheck disable=SC1091 # Sourcing virtual environment activation script
|
||||
source "${HOME}"/.cicd/venv/bin/activate
|
||||
# shellcheck disable=SC2031 # Variables are used in heredoc, not subshell
|
||||
cat <<EOF >"${HOME}"/.cicd/venv/langchain_nim.py
|
||||
from langchain_nvidia_ai_endpoints import ChatNVIDIA
|
||||
|
||||
llm = ChatNVIDIA(base_url="http://${POD_IP_INSTRUCT}:8000/v1", model="${MODEL_NAME}", temperature=0.1, max_tokens=1000, top_p=1.0)
|
||||
|
||||
result = llm.invoke("${QUESTION}")
|
||||
print(result.content)
|
||||
EOF
|
||||
|
||||
run python3 "${HOME}"/.cicd/venv/langchain_nim.py
|
||||
|
||||
[[ "${status}" -eq 0 ]]
|
||||
[[ "${output}" = "${ANSWER}" ]]
|
||||
|
||||
echo "# QUESTION: ${QUESTION}" >&3
|
||||
echo "# ANSWER: ${ANSWER}" >&3
|
||||
}
|
||||
|
||||
@test "Kata Documentation RAG" {
|
||||
# shellcheck disable=SC1091 # File is created by previous test
|
||||
source "${BATS_SUITE_TMPDIR}/env"
|
||||
# shellcheck disable=SC2031 # Variables are shared via file between BATS tests
|
||||
[[ -n "${POD_IP_EMBEDQA}" ]]
|
||||
# shellcheck disable=SC2031 # Variables are shared via file between BATS tests
|
||||
[[ -n "${POD_IP_INSTRUCT}" ]]
|
||||
|
||||
# shellcheck disable=SC1091 # Sourcing virtual environment activation script
|
||||
source "${HOME}"/.cicd/venv/bin/activate
|
||||
cat <<EOF >"${HOME}"/.cicd/venv/langchain_nim_kata_rag.py
|
||||
import os
|
||||
from langchain.chains import ConversationalRetrievalChain, LLMChain
|
||||
from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT, QA_PROMPT
|
||||
from langchain.chains.question_answering import load_qa_chain
|
||||
from langchain.memory import ConversationBufferMemory
|
||||
from langchain_community.vectorstores import FAISS
|
||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
from langchain_nvidia_ai_endpoints import ChatNVIDIA
|
||||
from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings
|
||||
EOF
|
||||
|
||||
# shellcheck disable=SC2129 # Multiple heredocs are intentional for building the Python script
|
||||
cat <<EOF >>"${HOME}"/.cicd/venv/langchain_nim_kata_rag.py
|
||||
import re
|
||||
from typing import List, Union
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
def html_document_loader(url: Union[str, bytes]) -> str:
|
||||
try:
|
||||
response = requests.get(url)
|
||||
html_content = response.text
|
||||
except Exception as e:
|
||||
print(f"Failed to load {url} due to exception {e}")
|
||||
return ""
|
||||
|
||||
try:
|
||||
# Create a Beautiful Soup object to parse html
|
||||
soup = BeautifulSoup(html_content, "html.parser")
|
||||
|
||||
# Remove script and style tags
|
||||
for script in soup(["script", "style"]):
|
||||
script.extract()
|
||||
|
||||
# Get the plain text from the HTML document
|
||||
text = soup.get_text()
|
||||
|
||||
# Remove excess whitespace and newlines
|
||||
text = re.sub("\s+", " ", text).strip()
|
||||
|
||||
return text
|
||||
except Exception as e:
|
||||
print(f"Exception {e} while loading document")
|
||||
return ""
|
||||
|
||||
EOF
|
||||
|
||||
cat <<EOF >>"${HOME}"/.cicd/venv/langchain_nim_kata_rag.py
|
||||
def create_embeddings(embedding_path: str = "./data/nv_embedding"):
|
||||
|
||||
embedding_path = "./data/nv_embedding"
|
||||
print(f"Storing embeddings to {embedding_path}")
|
||||
|
||||
# List of web pages containing Kata technical documentation
|
||||
urls = [
|
||||
"https://github.com/kata-containers/kata-containers/releases",
|
||||
]
|
||||
|
||||
documents = []
|
||||
for url in urls:
|
||||
document = html_document_loader(url)
|
||||
documents.append(document)
|
||||
|
||||
|
||||
text_splitter = RecursiveCharacterTextSplitter(
|
||||
chunk_size=1000,
|
||||
chunk_overlap=0,
|
||||
length_function=len,
|
||||
)
|
||||
texts = text_splitter.create_documents(documents)
|
||||
index_docs(url, text_splitter, texts, embedding_path)
|
||||
print("Generated embedding successfully")
|
||||
EOF
|
||||
|
||||
# shellcheck disable=SC2031 # POD_IP_EMBEDQA is shared via file between BATS tests
|
||||
cat <<EOF >>"${HOME}"/.cicd/venv/langchain_nim_kata_rag.py
|
||||
def index_docs(url: Union[str, bytes], splitter, documents: List[str], dest_embed_dir) -> None:
|
||||
embeddings = NVIDIAEmbeddings(base_url="http://${POD_IP_EMBEDQA}:8000/v1", model="nvidia/llama-3.2-nv-embedqa-1b-v2")
|
||||
|
||||
for document in documents:
|
||||
texts = splitter.split_text(document.page_content)
|
||||
|
||||
# metadata to attach to document
|
||||
metadatas = [document.metadata]
|
||||
|
||||
# create embeddings and add to vector store
|
||||
if os.path.exists(dest_embed_dir):
|
||||
update = FAISS.load_local(folder_path=dest_embed_dir, embeddings=embeddings, allow_dangerous_deserialization=True)
|
||||
update.add_texts(texts, metadatas=metadatas)
|
||||
update.save_local(folder_path=dest_embed_dir)
|
||||
else:
|
||||
docsearch = FAISS.from_texts(texts, embedding=embeddings, metadatas=metadatas)
|
||||
docsearch.save_local(folder_path=dest_embed_dir)
|
||||
EOF
|
||||
|
||||
# shellcheck disable=SC2031 # POD_IP_EMBEDQA is shared via file between BATS tests
|
||||
cat <<EOF >>"${HOME}"/.cicd/venv/langchain_nim_kata_rag.py
|
||||
create_embeddings()
|
||||
|
||||
embedding_model = NVIDIAEmbeddings(base_url="http://${POD_IP_EMBEDQA}:8000/v1", model="nvidia/llama-3.2-nv-embedqa-1b-v2")
|
||||
EOF
|
||||
|
||||
cat <<EOF >>"${HOME}"/.cicd/venv/langchain_nim_kata_rag.py
|
||||
# Embed documents
|
||||
embedding_path = "./data/nv_embedding"
|
||||
docsearch = FAISS.load_local(folder_path=embedding_path, embeddings=embedding_model, allow_dangerous_deserialization=True)
|
||||
EOF
|
||||
|
||||
# shellcheck disable=SC2031 # Variables are used in heredoc, not subshell
|
||||
cat <<EOF >>"${HOME}"/.cicd/venv/langchain_nim_kata_rag.py
|
||||
llm = ChatNVIDIA(base_url="http://${POD_IP_INSTRUCT}:8000/v1", model="meta/llama3-8b-instruct", temperature=0.1, max_tokens=1000, top_p=1.0)
|
||||
|
||||
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
|
||||
|
||||
qa_prompt=QA_PROMPT
|
||||
|
||||
doc_chain = load_qa_chain(llm, chain_type="stuff", prompt=QA_PROMPT)
|
||||
|
||||
qa = ConversationalRetrievalChain.from_llm(
|
||||
llm=llm,
|
||||
retriever=docsearch.as_retriever(),
|
||||
chain_type="stuff",
|
||||
memory=memory,
|
||||
combine_docs_chain_kwargs={'prompt': qa_prompt},
|
||||
)
|
||||
|
||||
EOF
|
||||
|
||||
QUESTION="What is the latest Kata Containers release?"
|
||||
|
||||
cat <<EOF >>"${HOME}"/.cicd/venv/langchain_nim_kata_rag.py
|
||||
query = "${QUESTION}"
|
||||
result = qa.invoke({"question": query})
|
||||
print("#"+ result.get("answer"))
|
||||
|
||||
EOF
|
||||
|
||||
run python3 "${HOME}"/.cicd/venv/langchain_nim_kata_rag.py
|
||||
[[ "${status}" -eq 0 ]]
|
||||
|
||||
ANSWER=$(echo "${output}" | cut -d '#' -f2)
|
||||
[[ -n "${ANSWER}" ]]
|
||||
|
||||
echo "# QUESTION: ${QUESTION}" >&3
|
||||
echo "# ANSWER: ${ANSWER}" >&3
|
||||
}
|
||||
|
||||
teardown_file() {
|
||||
kubectl delete -f "${POD_INSTRUCT_YAML}"
|
||||
}
|
||||
|
@ -1,4 +1,3 @@
|
||||
|
||||
# Copyright (c) 2025 NVIDIA Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
@ -20,13 +19,13 @@ metadata:
|
||||
app: ${POD_NAME_INSTRUCT}
|
||||
spec:
|
||||
restartPolicy: Never
|
||||
runtimeClassName: kata-qemu-nvidia-gpu
|
||||
runtimeClassName: "${RUNTIME_CLASS_NAME}"
|
||||
imagePullSecrets:
|
||||
- name: ngc-secret-instruct
|
||||
securityContext:
|
||||
runAsUser: 0
|
||||
runAsGroup: 0
|
||||
fsGroup: 0
|
||||
runAsUser: 1000
|
||||
runAsGroup: 1000
|
||||
fsGroup: 1000
|
||||
containers:
|
||||
- name: ${POD_NAME_INSTRUCT}
|
||||
image: nvcr.io/nim/meta/llama3-8b-instruct:1.0.0
|
||||
@ -84,6 +83,6 @@ spec:
|
||||
volumes:
|
||||
- name: nim-cache
|
||||
hostPath:
|
||||
path: "/opr/nim/.cache"
|
||||
path: "${LOCAL_NIM_CACHE}"
|
||||
type: DirectoryOrCreate
|
||||
|
||||
|
@ -7,7 +7,6 @@ apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: ngc-secret-embedqa
|
||||
namespace: nim-embedqa
|
||||
type: kubernetes.io/dockerconfigjson
|
||||
data:
|
||||
.dockerconfigjson: ${DOCKER_CONFIG_JSON}
|
||||
@ -16,7 +15,6 @@ apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: nvidia-nim-llama-3-2-nv-embedqa-1b-v2
|
||||
namespace: nim-embedqa
|
||||
labels:
|
||||
app: nvidia-nim-llama-3-2-nv-embedqa-1b-v2
|
||||
spec:
|
||||
@ -26,9 +24,9 @@ spec:
|
||||
imagePullSecrets:
|
||||
- name: ngc-secret-embedqa
|
||||
securityContext:
|
||||
fsGroup: 0
|
||||
runAsGroup: 0
|
||||
runAsUser: 0
|
||||
fsGroup: 1000
|
||||
runAsGroup: 1000
|
||||
runAsUser: 1000
|
||||
containers:
|
||||
- name: nvidia-nim-llama-3-2-nv-embedqa-1b-v2
|
||||
image: nvcr.io/nim/nvidia/llama-3.2-nv-embedqa-1b-v2:1.5.0
|
||||
@ -91,5 +89,5 @@ spec:
|
||||
volumes:
|
||||
- name: nim-cache
|
||||
hostPath:
|
||||
path: "/opr/nim/.cache"
|
||||
path: "${LOCAL_NIM_CACHE}"
|
||||
type: DirectoryOrCreate
|
||||
|
Loading…
Reference in New Issue
Block a user