templates: add RAG template for Intel Xeon Scalable Processors (#18424)

**Description:** This template utilizes Chroma and TGI (Text Generation Inference) to execute RAG on the Intel Xeon Scalable Processors. It serves as a demonstration for users, illustrating the deployment of the RAG service on the Intel Xeon Scalable Processors and showcasing the resulting performance enhancements. **Issue:** None **Dependencies:** The template contains the poetry project requirements to run this template. CPU TGI batching is WIP. **Twitter handle:** None --------- Signed-off-by: lvliang-intel <liang1.lv@intel.com> Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com> Co-authored-by: Bagatur <baskaryan@gmail.com>
2025-09-02 03:26:17 +00:00 · 2024-03-30 05:37:32 +08:00
parent d4673a3507
commit 0175906437
9 changed files with 6027 additions and 0 deletions
--- a/templates/intel-rag-xeon/ingest.py
+++ b/templates/intel-rag-xeon/ingest.py
@@ -0,0 +1,49 @@
+import os
+
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.document_loaders import UnstructuredFileLoader
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain_community.vectorstores import Chroma
+from langchain_core.documents import Document
+
+
+def ingest_documents():
+    """
+    Ingest PDF to Redis from the data/ directory that
+    contains Edgar 10k filings data for Nike.
+    """
+    # Load list of pdfs
+    data_path = "data/"
+    doc = [os.path.join(data_path, file) for file in os.listdir(data_path)][0]
+
+    print("Parsing 10k filing doc for NIKE", doc)
+
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=1500, chunk_overlap=100, add_start_index=True
+    )
+    loader = UnstructuredFileLoader(doc, mode="single", strategy="fast")
+    chunks = loader.load_and_split(text_splitter)
+
+    print("Done preprocessing. Created", len(chunks), "chunks of the original pdf")
+
+    # Create vectorstore
+    embedder = HuggingFaceEmbeddings(
+        model_name="sentence-transformers/all-MiniLM-L6-v2"
+    )
+
+    documents = []
+    for chunk in chunks:
+        doc = Document(page_content=chunk.page_content, metadata=chunk.metadata)
+        documents.append(doc)
+
+    # Add to vectorDB
+    _ = Chroma.from_documents(
+        documents=documents,
+        collection_name="xeon-rag",
+        embedding=embedder,
+        persist_directory="/tmp/xeon_rag_db",
+    )
+
+
+if __name__ == "__main__":
+    ingest_documents()