diff --git a/templates/rag-chroma-dense-retrieval/.gitignore b/templates/propositional-retrieval/.gitignore similarity index 100% rename from templates/rag-chroma-dense-retrieval/.gitignore rename to templates/propositional-retrieval/.gitignore diff --git a/templates/rag-chroma-dense-retrieval/LICENSE b/templates/propositional-retrieval/LICENSE similarity index 100% rename from templates/rag-chroma-dense-retrieval/LICENSE rename to templates/propositional-retrieval/LICENSE diff --git a/templates/rag-chroma-dense-retrieval/README.md b/templates/propositional-retrieval/README.md similarity index 77% rename from templates/rag-chroma-dense-retrieval/README.md rename to templates/propositional-retrieval/README.md index 986d7d782b3..da2d33b3faf 100644 --- a/templates/rag-chroma-dense-retrieval/README.md +++ b/templates/propositional-retrieval/README.md @@ -1,8 +1,8 @@ -# rag-chroma-dense-retrieval +# propositional-retrieval This template demonstrates the multi-vector indexing strategy proposed by Chen, et. al.'s [Dense X Retrieval: What Retrieval Granularity Should We Use?](https://arxiv.org/abs/2312.06648). The prompt, which you can [try out on the hub](https://smith.langchain.com/hub/wfh/proposal-indexing), directs an LLM to generate de-contextualized "propositions" which can be vectorized to increase the retrieval accuracy. You can see the full definition in `proposal_chain.py`. -![Retriever Diagram](./_images/retriever_diagram.png) +![Retriever Diagram](https://github.com/langchain-ai/langchain/raw/master/templates/propositional-retrieval/_images/retriever_diagram.png) ## Storage @@ -18,7 +18,7 @@ Create the index by running the following: ```python poetry install -poetry run python rag_chroma_dense_retrieval/ingest.py +poetry run python propositional_retrieval/ingest.py ``` ## Usage @@ -32,21 +32,21 @@ pip install -U langchain-cli To create a new LangChain project and install this as the only package, you can do: ```shell -langchain app new my-app --package rag-chroma-dense-retrieval +langchain app new my-app --package propositional-retrieval ``` If you want to add this to an existing project, you can just run: ```shell -langchain app add rag-chroma-dense-retrieval +langchain app add propositional-retrieval ``` And add the following code to your `server.py` file: ```python -from rag_chroma_dense_retrieval import chain +from propositional_retrieval import chain -add_routes(app, chain, path="/rag-chroma-dense-retrieval") +add_routes(app, chain, path="/propositional-retrieval") ``` (Optional) Let's now configure LangSmith. @@ -70,12 +70,12 @@ This will start the FastAPI app with a server is running locally at [http://localhost:8000](http://localhost:8000) We can see all templates at [http://127.0.0.1:8000/docs](http://127.0.0.1:8000/docs) -We can access the playground at [http://127.0.0.1:8000/rag-chroma-dense-retrieval/playground](http://127.0.0.1:8000/rag-chroma-dense-retrieval/playground) +We can access the playground at [http://127.0.0.1:8000/propositional-retrieval/playground](http://127.0.0.1:8000/propositional-retrieval/playground) We can access the template from code with: ```python from langserve.client import RemoteRunnable -runnable = RemoteRunnable("http://localhost:8000/rag-chroma-dense-retrieval") +runnable = RemoteRunnable("http://localhost:8000/propositional-retrieval") ``` diff --git a/templates/rag-chroma-dense-retrieval/_images/retriever_diagram.png b/templates/propositional-retrieval/_images/retriever_diagram.png similarity index 100% rename from templates/rag-chroma-dense-retrieval/_images/retriever_diagram.png rename to templates/propositional-retrieval/_images/retriever_diagram.png diff --git a/templates/rag-chroma-dense-retrieval/poetry.lock b/templates/propositional-retrieval/poetry.lock similarity index 100% rename from templates/rag-chroma-dense-retrieval/poetry.lock rename to templates/propositional-retrieval/poetry.lock diff --git a/templates/rag-chroma-dense-retrieval/rag_chroma_dense_retrieval.ipynb b/templates/propositional-retrieval/propositional_retrieval.ipynb similarity index 86% rename from templates/rag-chroma-dense-retrieval/rag_chroma_dense_retrieval.ipynb rename to templates/propositional-retrieval/propositional_retrieval.ipynb index 70319cc6a65..98b37e29d4b 100644 --- a/templates/rag-chroma-dense-retrieval/rag_chroma_dense_retrieval.ipynb +++ b/templates/propositional-retrieval/propositional_retrieval.ipynb @@ -12,7 +12,7 @@ "```\n", "from fastapi import FastAPI\n", "from langserve import add_routes\n", - "from rag_chroma_dense_retrieval import chain\n", + "from propositional_retrieval import chain\n", "\n", "app = FastAPI(\n", " title=\"LangChain Server\",\n", @@ -20,7 +20,7 @@ " description=\"Retriever and Generator for RAG Chroma Dense Retrieval\",\n", ")\n", "\n", - "add_routes(app, chain, path=\"/rag-chroma-dense-retrieval\")\n", + "add_routes(app, chain, path=\"/propositional-retrieval\")\n", "\n", "if __name__ == \"__main__\":\n", " import uvicorn\n", @@ -39,7 +39,7 @@ "source": [ "from langserve.client import RemoteRunnable\n", "\n", - "rag_app = RemoteRunnable(\"http://localhost:8001/rag-chroma-dense-retrieval\")\n", + "rag_app = RemoteRunnable(\"http://localhost:8001/propositional-retrieval\")\n", "rag_app.invoke(\"How are transformers related to convolutional neural networks?\")" ] } diff --git a/templates/propositional-retrieval/propositional_retrieval/__init__.py b/templates/propositional-retrieval/propositional_retrieval/__init__.py new file mode 100644 index 00000000000..e83774b0078 --- /dev/null +++ b/templates/propositional-retrieval/propositional_retrieval/__init__.py @@ -0,0 +1,4 @@ +from propositional_retrieval.chain import chain +from propositional_retrieval.proposal_chain import proposition_chain + +__all__ = ["chain", "proposition_chain"] diff --git a/templates/rag-chroma-dense-retrieval/rag_chroma_dense_retrieval/chain.py b/templates/propositional-retrieval/propositional_retrieval/chain.py similarity index 92% rename from templates/rag-chroma-dense-retrieval/rag_chroma_dense_retrieval/chain.py rename to templates/propositional-retrieval/propositional_retrieval/chain.py index bdb82edd8ee..468e694e900 100644 --- a/templates/rag-chroma-dense-retrieval/rag_chroma_dense_retrieval/chain.py +++ b/templates/propositional-retrieval/propositional_retrieval/chain.py @@ -5,8 +5,8 @@ from langchain_core.prompts import ChatPromptTemplate from langchain_core.pydantic_v1 import BaseModel from langchain_core.runnables import RunnablePassthrough -from rag_chroma_dense_retrieval.constants import DOCSTORE_ID_KEY -from rag_chroma_dense_retrieval.storage import get_multi_vector_retriever +from propositional_retrieval.constants import DOCSTORE_ID_KEY +from propositional_retrieval.storage import get_multi_vector_retriever def format_docs(docs: list) -> str: diff --git a/templates/rag-chroma-dense-retrieval/rag_chroma_dense_retrieval/constants.py b/templates/propositional-retrieval/propositional_retrieval/constants.py similarity index 100% rename from templates/rag-chroma-dense-retrieval/rag_chroma_dense_retrieval/constants.py rename to templates/propositional-retrieval/propositional_retrieval/constants.py diff --git a/templates/rag-chroma-dense-retrieval/rag_chroma_dense_retrieval/ingest.py b/templates/propositional-retrieval/propositional_retrieval/ingest.py similarity index 80% rename from templates/rag-chroma-dense-retrieval/rag_chroma_dense_retrieval/ingest.py rename to templates/propositional-retrieval/propositional_retrieval/ingest.py index b6dfd027e9a..83b233acb40 100644 --- a/templates/rag-chroma-dense-retrieval/rag_chroma_dense_retrieval/ingest.py +++ b/templates/propositional-retrieval/propositional_retrieval/ingest.py @@ -2,12 +2,13 @@ import logging import uuid from typing import Sequence +from bs4 import BeautifulSoup as Soup from langchain_core.documents import Document from langchain_core.runnables import Runnable -from rag_chroma_dense_retrieval.constants import DOCSTORE_ID_KEY -from rag_chroma_dense_retrieval.proposal_chain import proposition_chain -from rag_chroma_dense_retrieval.storage import get_multi_vector_retriever +from propositional_retrieval.constants import DOCSTORE_ID_KEY +from propositional_retrieval.proposal_chain import proposition_chain +from propositional_retrieval.storage import get_multi_vector_retriever logging.basicConfig(level=logging.INFO) @@ -48,7 +49,9 @@ def create_index( """ logger.info("Creating multi-vector retriever") retriever = get_multi_vector_retriever(docstore_id_key) - propositions = indexer.batch([{"input": doc.page_content} for doc in docs]) + propositions = indexer.batch( + [{"input": doc.page_content} for doc in docs], {"max_concurrency": 10} + ) add_documents( retriever, @@ -69,12 +72,15 @@ if __name__ == "__main__": # The attention is all you need paper # Could add more parsing here, as it's very raw. - loader = RecursiveUrlLoader("https://ar5iv.labs.arxiv.org/html/1706.03762") + loader = RecursiveUrlLoader( + "https://ar5iv.labs.arxiv.org/html/1706.03762", + max_depth=2, + extractor=lambda x: Soup(x, "html.parser").text, + ) data = loader.load() logger.info(f"Loaded {len(data)} documents") # Split - text_splitter = RecursiveCharacterTextSplitter(chunk_size=8000, chunk_overlap=0) all_splits = text_splitter.split_documents(data) logger.info(f"Split into {len(all_splits)} documents") diff --git a/templates/rag-chroma-dense-retrieval/rag_chroma_dense_retrieval/proposal_chain.py b/templates/propositional-retrieval/propositional_retrieval/proposal_chain.py similarity index 100% rename from templates/rag-chroma-dense-retrieval/rag_chroma_dense_retrieval/proposal_chain.py rename to templates/propositional-retrieval/propositional_retrieval/proposal_chain.py diff --git a/templates/rag-chroma-dense-retrieval/rag_chroma_dense_retrieval/storage.py b/templates/propositional-retrieval/propositional_retrieval/storage.py similarity index 100% rename from templates/rag-chroma-dense-retrieval/rag_chroma_dense_retrieval/storage.py rename to templates/propositional-retrieval/propositional_retrieval/storage.py diff --git a/templates/rag-chroma-dense-retrieval/pyproject.toml b/templates/propositional-retrieval/pyproject.toml similarity index 86% rename from templates/rag-chroma-dense-retrieval/pyproject.toml rename to templates/propositional-retrieval/pyproject.toml index 7945c63a5b8..169ddc824e8 100644 --- a/templates/rag-chroma-dense-retrieval/pyproject.toml +++ b/templates/propositional-retrieval/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] -name = "rag-chroma-dense-retrieval" +name = "propositional-retrieval" version = "0.1.0" -description = "Dense retrieval using vectorized propositions.s" +description = "Dense retrieval using vectorized propositions." authors = [ "William Fu-Hinthorn ", ] diff --git a/templates/rag-chroma-dense-retrieval/tests/__init__.py b/templates/propositional-retrieval/tests/__init__.py similarity index 100% rename from templates/rag-chroma-dense-retrieval/tests/__init__.py rename to templates/propositional-retrieval/tests/__init__.py diff --git a/templates/rag-chroma-dense-retrieval/rag_chroma_dense_retrieval/__init__.py b/templates/rag-chroma-dense-retrieval/rag_chroma_dense_retrieval/__init__.py deleted file mode 100644 index 11e2b90c0cc..00000000000 --- a/templates/rag-chroma-dense-retrieval/rag_chroma_dense_retrieval/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from rag_chroma_dense_retrieval.chain import chain -from rag_chroma_dense_retrieval.proposal_chain import proposition_chain - -__all__ = ["chain", "proposition_chain"]