Redis langserve template (#12443)

Add Redis langserve template! Eventually will add semantic caching to this too. But I was struggling to get that to work for some reason with the LCEL implementation here. - **Description:** Introduces the Redis LangServe template. A simple RAG based app built on top of Redis that allows you to chat with company's public financial data (Edgar 10k filings) - **Issue:** None - **Dependencies:** The template contains the poetry project requirements to run this template - **Tag maintainer:** @baskaryan @Spartee - **Twitter handle:** @tchutch94 **Note**: this requires the commit here that deletes the `_aget_relevant_documents()` method from the Redis retriever class that wasn't implemented. That was breaking the langserve app. --------- Co-authored-by: Sam Partee <sam.partee@redis.com>
2025-09-01 19:12:42 +00:00 · 2023-10-28 11:31:12 -04:00
parent 9adaa78c65
commit 4209457bdc
13 changed files with 4775 additions and 9 deletions
--- a/templates/rag-redis/ingest.py
+++ b/templates/rag-redis/ingest.py
@@ -0,0 +1,49 @@
+import os
+
+from langchain.document_loaders import UnstructuredFileLoader
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.vectorstores import Redis
+from rag_redis.config import EMBED_MODEL, INDEX_NAME, INDEX_SCHEMA, REDIS_URL
+
+
+def ingest_documents():
+    """
+    Ingest PDF to Redis from the data/ directory that
+    contains Edgar 10k filings data for Nike.
+    """
+    # Load list of pdfs
+    company_name = "Nike"
+    data_path = "data/"
+    doc = [
+        os.path.join(data_path, file) for file in os.listdir(data_path)
+    ][0]
+
+    print("Parsing 10k filing doc for NIKE", doc)
+
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=1500, chunk_overlap=100, add_start_index=True
+    )
+    loader = UnstructuredFileLoader(doc, mode="single", strategy="fast")
+    chunks = loader.load_and_split(text_splitter)
+
+    print("Done preprocessing. Created", len(chunks), "chunks of the original pdf")
+    # Create vectorstore
+    embedder = HuggingFaceEmbeddings(
+        model_name=EMBED_MODEL
+    )
+
+    _ = Redis.from_texts(
+        # appending this little bit can sometimes help with semantic retrieval
+        # especially with multiple companies
+        texts=[f"Company: {company_name}. " + chunk.page_content for chunk in chunks],
+        metadatas=[chunk.metadata for chunk in chunks],
+        embedding=embedder,
+        index_name=INDEX_NAME,
+        index_schema=INDEX_SCHEMA,
+        redis_url=REDIS_URL
+    )
+
+
+if __name__ == "__main__":
+    ingest_documents()