Template for Ollama + Multi-query retriever (#14092)

2025-09-06 13:33:37 +00:00 · 2023-12-01 08:53:17 -08:00
parent 75312c3694
commit b07a5a9509
8 changed files with 2025 additions and 0 deletions
--- a/templates/rag-ollama-multi-query/rag_ollama_multi_query/chain.py
+++ b/templates/rag-ollama-multi-query/rag_ollama_multi_query/chain.py
@@ -0,0 +1,93 @@
+from typing import List
+
+from langchain.chains import LLMChain
+from langchain.chat_models import ChatOllama, ChatOpenAI
+from langchain.document_loaders import WebBaseLoader
+from langchain.embeddings import OpenAIEmbeddings
+from langchain.output_parsers import PydanticOutputParser
+from langchain.prompts import ChatPromptTemplate, PromptTemplate
+from langchain.pydantic_v1 import BaseModel, Field
+from langchain.retrievers.multi_query import MultiQueryRetriever
+from langchain.schema.output_parser import StrOutputParser
+from langchain.schema.runnable import RunnableParallel, RunnablePassthrough
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.vectorstores import Chroma
+
+# Load
+loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")
+data = loader.load()
+
+# Split
+text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
+all_splits = text_splitter.split_documents(data)
+
+# Add to vectorDB
+vectorstore = Chroma.from_documents(
+    documents=all_splits,
+    collection_name="rag-private",
+    embedding=OpenAIEmbeddings(),
+)
+
+
+# Output parser will split the LLM result into a list of queries
+class LineList(BaseModel):
+    # "lines" is the key (attribute name) of the parsed output
+    lines: List[str] = Field(description="Lines of text")
+
+
+class LineListOutputParser(PydanticOutputParser):
+    def __init__(self) -> None:
+        super().__init__(pydantic_object=LineList)
+
+    def parse(self, text: str) -> LineList:
+        lines = text.strip().split("\n")
+        return LineList(lines=lines)
+
+
+output_parser = LineListOutputParser()
+
+QUERY_PROMPT = PromptTemplate(
+    input_variables=["question"],
+    template="""You are an AI language model assistant. Your task is to generate five
+    different versions of the given user question to retrieve relevant documents from
+    a vector database. By generating multiple perspectives on the user question, your
+    goal is to help the user overcome some of the limitations of the distance-based
+    similarity search. Provide these alternative questions separated by newlines.
+    Original question: {question}""",
+)
+
+# Add the LLM downloaded from Ollama
+ollama_llm = "zephyr"
+llm = ChatOllama(model=ollama_llm)
+
+# Chain
+llm_chain = LLMChain(llm=llm, prompt=QUERY_PROMPT, output_parser=output_parser)
+
+# Run
+retriever = MultiQueryRetriever(
+    retriever=vectorstore.as_retriever(), llm_chain=llm_chain, parser_key="lines"
+)  # "lines" is the key (attribute name) of the parsed output
+
+# RAG prompt
+template = """Answer the question based only on the following context:
+{context}
+Question: {question}
+"""
+prompt = ChatPromptTemplate.from_template(template)
+
+# RAG
+model = ChatOpenAI()
+chain = (
+    RunnableParallel({"context": retriever, "question": RunnablePassthrough()})
+    | prompt
+    | model
+    | StrOutputParser()
+)
+
+
+# Add typing for input
+class Question(BaseModel):
+    __root__: str
+
+
+chain = chain.with_types(input_type=Question)