mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-07-08 12:54:05 +00:00
gpu: Add NIM bats test
We're running a simple NIM container to test if the GPUs are working properly Signed-off-by: Zvonko Kaiser <zkaiser@nvidia.com>
This commit is contained in:
parent
79bf86d71d
commit
c023817a8e
@ -64,6 +64,8 @@ jobs:
|
|||||||
- name: Run tests
|
- name: Run tests
|
||||||
timeout-minutes: 30
|
timeout-minutes: 30
|
||||||
run: bash tests/integration/kubernetes/gha-run.sh run-nv-tests
|
run: bash tests/integration/kubernetes/gha-run.sh run-nv-tests
|
||||||
|
env:
|
||||||
|
NGC_API_KEY: ${{ secrets.NGC_API_KEY }}
|
||||||
|
|
||||||
- name: Collect artifacts ${{ matrix.vmm }}
|
- name: Collect artifacts ${{ matrix.vmm }}
|
||||||
if: always()
|
if: always()
|
||||||
|
241
tests/integration/kubernetes/k8s-nvidia-nim.bats
Normal file
241
tests/integration/kubernetes/k8s-nvidia-nim.bats
Normal file
@ -0,0 +1,241 @@
|
|||||||
|
#!/usr/bin/env bats
|
||||||
|
#
|
||||||
|
# Copyright (c) 2025 NVIDIA Corporation
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
#
|
||||||
|
|
||||||
|
load "${BATS_TEST_DIRNAME}/../../common.bash"
|
||||||
|
load "${BATS_TEST_DIRNAME}/tests_common.sh"
|
||||||
|
|
||||||
|
export POD_NAME="nvidia-nim-llama-3-1-8b-instruct"
|
||||||
|
export DOCKER_CONFIG_JSON=$(
|
||||||
|
echo -n "{\"auths\":{\"nvcr.io\":{\"username\":\"\$oauthtoken\",\"password\":\"${NGC_API_KEY}\",\"auth\":\"$(echo -n "\$oauthtoken:${NGC_API_KEY}" | base64 -w0)\"}}}" \
|
||||||
|
| base64 -w0
|
||||||
|
)
|
||||||
|
|
||||||
|
setup() {
|
||||||
|
dpkg -s python3-pip 2>&1 >/dev/null || sudo apt -y install python3-pip
|
||||||
|
dpkg -s python3-venv 2>&1 >/dev/null || sudo apt -y install python3-venv
|
||||||
|
|
||||||
|
python3 -m venv ${HOME}/.cicd/venv
|
||||||
|
|
||||||
|
get_pod_config_dir
|
||||||
|
|
||||||
|
pod_yaml_in="${pod_config_dir}/pod-nvidia-nim-llama-3.1-8b-instruct.yaml.in"
|
||||||
|
pod_yaml="${pod_config_dir}/pod-nvidia-nim-llama-3.1-8b-instruct.yaml"
|
||||||
|
|
||||||
|
envsubst < "${pod_yaml_in}" > "${pod_yaml}"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@test "NVIDIA NIM Llama 3.1-8b Instruct" {
|
||||||
|
kubectl apply -f "${pod_yaml}"
|
||||||
|
kubectl wait --for=condition=Ready --timeout=500s pod "${POD_NAME}"
|
||||||
|
export POD_IP=$(kubectl get pod "${POD_NAME}" -o jsonpath='{.status.podIP}')
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "List of models available for inference" {
|
||||||
|
export MODEL_NAME=$(curl -sX GET "http://${POD_IP}:8000/v1/models" | jq .data[0].id | tr -d '"')
|
||||||
|
echo $MODEL_NAME
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "Simple OpenAI completion request" {
|
||||||
|
curl -X 'POST' \
|
||||||
|
"http://${POD_IP}:8000/v1/completions" \
|
||||||
|
-H "accept: application/json" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d "{\"model\": \"${MODEL_NAME}\", \"prompt\": \"Once upon a time\", \"max_tokens\": 64}" | jq .choices[0].text
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@test "Setup the LangChain flow" {
|
||||||
|
source ${HOME}/.cicd/venv/bin/activate
|
||||||
|
pip install --upgrade pip
|
||||||
|
pip install langchain=="0.2.5"
|
||||||
|
pip install langchain-nvidia-ai-endpoints=="0.1.2"
|
||||||
|
pip install faiss-cpu=="1.10.0"
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "LangChain NVIDIA AI Endpoints" {
|
||||||
|
source ${HOME}/.cicd/venv/bin/activate
|
||||||
|
cat <<-EOF > ${HOME}/.cicd/venv/langchain_nim.py
|
||||||
|
from langchain_nvidia_ai_endpoints import ChatNVIDIA
|
||||||
|
|
||||||
|
llm = ChatNVIDIA(base_url="http://${POD_IP}:8000/v1", model="${MODEL_NAME}", temperature=0.1, max_tokens=1000, top_p=1.0)
|
||||||
|
|
||||||
|
result = llm.invoke("What is the capital of France?")
|
||||||
|
print(result.content)
|
||||||
|
EOF
|
||||||
|
run python3.10 ${HOME}/.cicd/venv/langchain_nim.py
|
||||||
|
|
||||||
|
[ "$status" -eq 0 ]
|
||||||
|
[ "$output" = "The capital of France is Paris." ]
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "Kata Documentation RAG" {
|
||||||
|
source ${HOME}/.cicd/venv/bin/activate
|
||||||
|
cat <<EOF > ${HOME}/.cicd/venv/langchain_nim_kata_rag.py
|
||||||
|
import os
|
||||||
|
from langchain.chains import ConversationalRetrievalChain, LLMChain
|
||||||
|
from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT, QA_PROMPT
|
||||||
|
from langchain.chains.question_answering import load_qa_chain
|
||||||
|
from langchain.memory import ConversationBufferMemory
|
||||||
|
from langchain_community.vectorstores import FAISS
|
||||||
|
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
||||||
|
from langchain_nvidia_ai_endpoints import ChatNVIDIA
|
||||||
|
from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings
|
||||||
|
EOF
|
||||||
|
|
||||||
|
cat <<EOF >> ${HOME}/.cicd/venv/langchain_nim_kata_rag.py
|
||||||
|
import re
|
||||||
|
from typing import List, Union
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
def html_document_loader(url: Union[str, bytes]) -> str:
|
||||||
|
"""
|
||||||
|
Loads the HTML content of a document from a given URL and return it's content.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: The URL of the document.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The content of the document.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
Exception: If there is an error while making the HTTP request.
|
||||||
|
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
response = requests.get(url)
|
||||||
|
html_content = response.text
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Failed to load {url} due to exception {e}")
|
||||||
|
return ""
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Create a Beautiful Soup object to parse html
|
||||||
|
soup = BeautifulSoup(html_content, "html.parser")
|
||||||
|
|
||||||
|
# Remove script and style tags
|
||||||
|
for script in soup(["script", "style"]):
|
||||||
|
script.extract()
|
||||||
|
|
||||||
|
# Get the plain text from the HTML document
|
||||||
|
text = soup.get_text()
|
||||||
|
|
||||||
|
# Remove excess whitespace and newlines
|
||||||
|
text = re.sub("\s+", " ", text).strip()
|
||||||
|
|
||||||
|
return text
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Exception {e} while loading document")
|
||||||
|
return ""
|
||||||
|
EOF
|
||||||
|
|
||||||
|
cat <<EOF >> ${HOME}/.cicd/venv/langchain_nim_kata_rag.py
|
||||||
|
def create_embeddings(embedding_path: str = "./data/nv_embedding"):
|
||||||
|
|
||||||
|
embedding_path = "./data/nv_embedding"
|
||||||
|
print(f"Storing embeddings to {embedding_path}")
|
||||||
|
|
||||||
|
# List of web pages containing Kata technical documentation
|
||||||
|
urls = [
|
||||||
|
"https://katacontainers.io/",
|
||||||
|
"https://katacontainers.io/learn/",
|
||||||
|
]
|
||||||
|
|
||||||
|
documents = []
|
||||||
|
for url in urls:
|
||||||
|
document = html_document_loader(url)
|
||||||
|
documents.append(document)
|
||||||
|
|
||||||
|
text_splitter = RecursiveCharacterTextSplitter(
|
||||||
|
chunk_size=1000,
|
||||||
|
chunk_overlap=0,
|
||||||
|
length_function=len,
|
||||||
|
)
|
||||||
|
texts = text_splitter.create_documents(documents)
|
||||||
|
index_docs(url, text_splitter, texts, embedding_path)
|
||||||
|
print("Generated embedding successfully")
|
||||||
|
EOF
|
||||||
|
|
||||||
|
cat <<EOF >> ${HOME}/.cicd/venv/langchain_nim_kata_rag.py
|
||||||
|
def index_docs(url: Union[str, bytes], splitter, documents: List[str], dest_embed_dir) -> None:
|
||||||
|
"""
|
||||||
|
Split the document into chunks and create embeddings for the document
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: Source url for the document.
|
||||||
|
splitter: Splitter used to split the document
|
||||||
|
documents: list of documents whose embeddings needs to be created
|
||||||
|
dest_embed_dir: destination directory for embeddings
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
None
|
||||||
|
"""
|
||||||
|
embeddings = NVIDIAEmbeddings(model="NV-Embed-QA", truncate="END")
|
||||||
|
|
||||||
|
for document in documents:
|
||||||
|
texts = splitter.split_text(document.page_content)
|
||||||
|
|
||||||
|
# metadata to attach to document
|
||||||
|
metadatas = [document.metadata]
|
||||||
|
|
||||||
|
# create embeddings and add to vector store
|
||||||
|
if os.path.exists(dest_embed_dir):
|
||||||
|
update = FAISS.load_local(folder_path=dest_embed_dir, embeddings=embeddings, allow_dangerous_deserialization=True)
|
||||||
|
update.add_texts(texts, metadatas=metadatas)
|
||||||
|
update.save_local(folder_path=dest_embed_dir)
|
||||||
|
else:
|
||||||
|
docsearch = FAISS.from_texts(texts, embedding=embeddings, metadatas=metadatas)
|
||||||
|
docsearch.save_local(folder_path=dest_embed_dir)
|
||||||
|
EOF
|
||||||
|
|
||||||
|
cat <<EOF >> ${HOME}/.cicd/venv/langchain_nim_kata_rag.py
|
||||||
|
create_embeddings()
|
||||||
|
|
||||||
|
embedding_model = NVIDIAEmbeddings(model="NV-Embed-QA", truncate="END")
|
||||||
|
EOF
|
||||||
|
|
||||||
|
cat <<EOF >> ${HOME}/.cicd/venv/langchain_nim_kata_rag.py
|
||||||
|
# Embed documents
|
||||||
|
embedding_path = "./data/nv_embedding"
|
||||||
|
docsearch = FAISS.load_local(folder_path=embedding_path, embeddings=embedding_model, allow_dangerous_deserialization=True)
|
||||||
|
EOF
|
||||||
|
|
||||||
|
cat <<EOF >> ${HOME}/.cicd/venv/langchain_nim_kata_rag.py
|
||||||
|
llm = ChatNVIDIA(base_url="http://${POD_IP}:8000/v1", model="${MODEL_NAME}", temperature=0.1, max_tokens=1000, top_p=1.0)
|
||||||
|
|
||||||
|
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
|
||||||
|
|
||||||
|
qa_prompt=QA_PROMPT
|
||||||
|
|
||||||
|
doc_chain = load_qa_chain(llm, chain_type="stuff", prompt=QA_PROMPT)
|
||||||
|
|
||||||
|
qa = ConversationalRetrievalChain.from_llm(
|
||||||
|
llm=llm,
|
||||||
|
retriever=docsearch.as_retriever(),
|
||||||
|
chain_type="stuff",
|
||||||
|
memory=memory,
|
||||||
|
combine_docs_chain_kwargs={'prompt': qa_prompt},
|
||||||
|
)
|
||||||
|
EOF
|
||||||
|
|
||||||
|
cat <<EOF >> ${HOME}/.cicd/venv/langchain_nim_kata_rag.py
|
||||||
|
query = "What is Kata Containers?"
|
||||||
|
result = qa({"question": query})
|
||||||
|
print(result.get("answer"))
|
||||||
|
EOF
|
||||||
|
|
||||||
|
run python3.10 ${HOME}/.cicd/venv/langchain_nim_kata_rag.py
|
||||||
|
|
||||||
|
# [ "$status" -eq 0 ]
|
||||||
|
# [ "$output" = "The NVIDIA Jetson Nano Developer Kit is a small, powerful computer designed for AI and robotics applications." ]
|
||||||
|
}
|
||||||
|
teardown() {
|
||||||
|
kubectl describe "pod/$POD_NAME"
|
||||||
|
kubectl delete pod "$POD_NAME"
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user