diff --git a/.github/workflows/run-k8s-tests-on-nvidia-gpu.yaml b/.github/workflows/run-k8s-tests-on-nvidia-gpu.yaml index 64502b4c19..d49a34e537 100644 --- a/.github/workflows/run-k8s-tests-on-nvidia-gpu.yaml +++ b/.github/workflows/run-k8s-tests-on-nvidia-gpu.yaml @@ -64,6 +64,8 @@ jobs: - name: Run tests timeout-minutes: 30 run: bash tests/integration/kubernetes/gha-run.sh run-nv-tests + env: + NGC_API_KEY: ${{ secrets.NGC_API_KEY }} - name: Collect artifacts ${{ matrix.vmm }} if: always() diff --git a/tests/integration/kubernetes/k8s-nvidia-nim.bats b/tests/integration/kubernetes/k8s-nvidia-nim.bats new file mode 100644 index 0000000000..80bf80657d --- /dev/null +++ b/tests/integration/kubernetes/k8s-nvidia-nim.bats @@ -0,0 +1,241 @@ +#!/usr/bin/env bats +# +# Copyright (c) 2025 NVIDIA Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +load "${BATS_TEST_DIRNAME}/../../common.bash" +load "${BATS_TEST_DIRNAME}/tests_common.sh" + +export POD_NAME="nvidia-nim-llama-3-1-8b-instruct" +export DOCKER_CONFIG_JSON=$( + echo -n "{\"auths\":{\"nvcr.io\":{\"username\":\"\$oauthtoken\",\"password\":\"${NGC_API_KEY}\",\"auth\":\"$(echo -n "\$oauthtoken:${NGC_API_KEY}" | base64 -w0)\"}}}" \ + | base64 -w0 + ) + +setup() { + dpkg -s python3-pip 2>&1 >/dev/null || sudo apt -y install python3-pip + dpkg -s python3-venv 2>&1 >/dev/null || sudo apt -y install python3-venv + + python3 -m venv ${HOME}/.cicd/venv + + get_pod_config_dir + + pod_yaml_in="${pod_config_dir}/pod-nvidia-nim-llama-3.1-8b-instruct.yaml.in" + pod_yaml="${pod_config_dir}/pod-nvidia-nim-llama-3.1-8b-instruct.yaml" + + envsubst < "${pod_yaml_in}" > "${pod_yaml}" +} + + +@test "NVIDIA NIM Llama 3.1-8b Instruct" { + kubectl apply -f "${pod_yaml}" + kubectl wait --for=condition=Ready --timeout=500s pod "${POD_NAME}" + export POD_IP=$(kubectl get pod "${POD_NAME}" -o jsonpath='{.status.podIP}') +} + +@test "List of models available for inference" { + export MODEL_NAME=$(curl -sX GET "http://${POD_IP}:8000/v1/models" | jq .data[0].id | tr -d '"') + echo $MODEL_NAME +} + +@test "Simple OpenAI completion request" { + curl -X 'POST' \ + "http://${POD_IP}:8000/v1/completions" \ + -H "accept: application/json" \ + -H "Content-Type: application/json" \ + -d "{\"model\": \"${MODEL_NAME}\", \"prompt\": \"Once upon a time\", \"max_tokens\": 64}" | jq .choices[0].text +} + + +@test "Setup the LangChain flow" { + source ${HOME}/.cicd/venv/bin/activate + pip install --upgrade pip + pip install langchain=="0.2.5" + pip install langchain-nvidia-ai-endpoints=="0.1.2" + pip install faiss-cpu=="1.10.0" +} + +@test "LangChain NVIDIA AI Endpoints" { + source ${HOME}/.cicd/venv/bin/activate + cat <<-EOF > ${HOME}/.cicd/venv/langchain_nim.py + from langchain_nvidia_ai_endpoints import ChatNVIDIA + + llm = ChatNVIDIA(base_url="http://${POD_IP}:8000/v1", model="${MODEL_NAME}", temperature=0.1, max_tokens=1000, top_p=1.0) + + result = llm.invoke("What is the capital of France?") + print(result.content) + EOF + run python3.10 ${HOME}/.cicd/venv/langchain_nim.py + + [ "$status" -eq 0 ] + [ "$output" = "The capital of France is Paris." ] +} + +@test "Kata Documentation RAG" { + source ${HOME}/.cicd/venv/bin/activate + cat < ${HOME}/.cicd/venv/langchain_nim_kata_rag.py +import os +from langchain.chains import ConversationalRetrievalChain, LLMChain +from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT, QA_PROMPT +from langchain.chains.question_answering import load_qa_chain +from langchain.memory import ConversationBufferMemory +from langchain_community.vectorstores import FAISS +from langchain_text_splitters import RecursiveCharacterTextSplitter +from langchain_nvidia_ai_endpoints import ChatNVIDIA +from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings +EOF + + cat <> ${HOME}/.cicd/venv/langchain_nim_kata_rag.py +import re +from typing import List, Union + +import requests +from bs4 import BeautifulSoup + +def html_document_loader(url: Union[str, bytes]) -> str: + """ + Loads the HTML content of a document from a given URL and return it's content. + + Args: + url: The URL of the document. + + Returns: + The content of the document. + + Raises: + Exception: If there is an error while making the HTTP request. + + """ + try: + response = requests.get(url) + html_content = response.text + except Exception as e: + print(f"Failed to load {url} due to exception {e}") + return "" + + try: + # Create a Beautiful Soup object to parse html + soup = BeautifulSoup(html_content, "html.parser") + + # Remove script and style tags + for script in soup(["script", "style"]): + script.extract() + + # Get the plain text from the HTML document + text = soup.get_text() + + # Remove excess whitespace and newlines + text = re.sub("\s+", " ", text).strip() + + return text + except Exception as e: + print(f"Exception {e} while loading document") + return "" +EOF + + cat <> ${HOME}/.cicd/venv/langchain_nim_kata_rag.py +def create_embeddings(embedding_path: str = "./data/nv_embedding"): + + embedding_path = "./data/nv_embedding" + print(f"Storing embeddings to {embedding_path}") + + # List of web pages containing Kata technical documentation + urls = [ + "https://katacontainers.io/", + "https://katacontainers.io/learn/", + ] + + documents = [] + for url in urls: + document = html_document_loader(url) + documents.append(document) + + text_splitter = RecursiveCharacterTextSplitter( + chunk_size=1000, + chunk_overlap=0, + length_function=len, + ) + texts = text_splitter.create_documents(documents) + index_docs(url, text_splitter, texts, embedding_path) + print("Generated embedding successfully") +EOF + + cat <> ${HOME}/.cicd/venv/langchain_nim_kata_rag.py +def index_docs(url: Union[str, bytes], splitter, documents: List[str], dest_embed_dir) -> None: + """ + Split the document into chunks and create embeddings for the document + + Args: + url: Source url for the document. + splitter: Splitter used to split the document + documents: list of documents whose embeddings needs to be created + dest_embed_dir: destination directory for embeddings + + Returns: + None + """ + embeddings = NVIDIAEmbeddings(model="NV-Embed-QA", truncate="END") + + for document in documents: + texts = splitter.split_text(document.page_content) + + # metadata to attach to document + metadatas = [document.metadata] + + # create embeddings and add to vector store + if os.path.exists(dest_embed_dir): + update = FAISS.load_local(folder_path=dest_embed_dir, embeddings=embeddings, allow_dangerous_deserialization=True) + update.add_texts(texts, metadatas=metadatas) + update.save_local(folder_path=dest_embed_dir) + else: + docsearch = FAISS.from_texts(texts, embedding=embeddings, metadatas=metadatas) + docsearch.save_local(folder_path=dest_embed_dir) +EOF + + cat <> ${HOME}/.cicd/venv/langchain_nim_kata_rag.py +create_embeddings() + +embedding_model = NVIDIAEmbeddings(model="NV-Embed-QA", truncate="END") +EOF + + cat <> ${HOME}/.cicd/venv/langchain_nim_kata_rag.py +# Embed documents +embedding_path = "./data/nv_embedding" +docsearch = FAISS.load_local(folder_path=embedding_path, embeddings=embedding_model, allow_dangerous_deserialization=True) +EOF + + cat <> ${HOME}/.cicd/venv/langchain_nim_kata_rag.py +llm = ChatNVIDIA(base_url="http://${POD_IP}:8000/v1", model="${MODEL_NAME}", temperature=0.1, max_tokens=1000, top_p=1.0) + +memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) + +qa_prompt=QA_PROMPT + +doc_chain = load_qa_chain(llm, chain_type="stuff", prompt=QA_PROMPT) + +qa = ConversationalRetrievalChain.from_llm( + llm=llm, + retriever=docsearch.as_retriever(), + chain_type="stuff", + memory=memory, + combine_docs_chain_kwargs={'prompt': qa_prompt}, +) +EOF + + cat <> ${HOME}/.cicd/venv/langchain_nim_kata_rag.py +query = "What is Kata Containers?" +result = qa({"question": query}) +print(result.get("answer")) +EOF + + run python3.10 ${HOME}/.cicd/venv/langchain_nim_kata_rag.py + +# [ "$status" -eq 0 ] +# [ "$output" = "The NVIDIA Jetson Nano Developer Kit is a small, powerful computer designed for AI and robotics applications." ] +} +teardown() { + kubectl describe "pod/$POD_NAME" + kubectl delete pod "$POD_NAME" +}