mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-05 03:02:35 +00:00
Update propositional-retrieval template (#14766)
More descriptive name. Add parser in ingest. Update image link
This commit is contained in:
parent
4855964332
commit
65091ebe50
@ -1,8 +1,8 @@
|
|||||||
# rag-chroma-dense-retrieval
|
# propositional-retrieval
|
||||||
|
|
||||||
This template demonstrates the multi-vector indexing strategy proposed by Chen, et. al.'s [Dense X Retrieval: What Retrieval Granularity Should We Use?](https://arxiv.org/abs/2312.06648). The prompt, which you can [try out on the hub](https://smith.langchain.com/hub/wfh/proposal-indexing), directs an LLM to generate de-contextualized "propositions" which can be vectorized to increase the retrieval accuracy. You can see the full definition in `proposal_chain.py`.
|
This template demonstrates the multi-vector indexing strategy proposed by Chen, et. al.'s [Dense X Retrieval: What Retrieval Granularity Should We Use?](https://arxiv.org/abs/2312.06648). The prompt, which you can [try out on the hub](https://smith.langchain.com/hub/wfh/proposal-indexing), directs an LLM to generate de-contextualized "propositions" which can be vectorized to increase the retrieval accuracy. You can see the full definition in `proposal_chain.py`.
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
## Storage
|
## Storage
|
||||||
|
|
||||||
@ -18,7 +18,7 @@ Create the index by running the following:
|
|||||||
|
|
||||||
```python
|
```python
|
||||||
poetry install
|
poetry install
|
||||||
poetry run python rag_chroma_dense_retrieval/ingest.py
|
poetry run python propositional_retrieval/ingest.py
|
||||||
```
|
```
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
@ -32,21 +32,21 @@ pip install -U langchain-cli
|
|||||||
To create a new LangChain project and install this as the only package, you can do:
|
To create a new LangChain project and install this as the only package, you can do:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
langchain app new my-app --package rag-chroma-dense-retrieval
|
langchain app new my-app --package propositional-retrieval
|
||||||
```
|
```
|
||||||
|
|
||||||
If you want to add this to an existing project, you can just run:
|
If you want to add this to an existing project, you can just run:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
langchain app add rag-chroma-dense-retrieval
|
langchain app add propositional-retrieval
|
||||||
```
|
```
|
||||||
|
|
||||||
And add the following code to your `server.py` file:
|
And add the following code to your `server.py` file:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from rag_chroma_dense_retrieval import chain
|
from propositional_retrieval import chain
|
||||||
|
|
||||||
add_routes(app, chain, path="/rag-chroma-dense-retrieval")
|
add_routes(app, chain, path="/propositional-retrieval")
|
||||||
```
|
```
|
||||||
|
|
||||||
(Optional) Let's now configure LangSmith.
|
(Optional) Let's now configure LangSmith.
|
||||||
@ -70,12 +70,12 @@ This will start the FastAPI app with a server is running locally at
|
|||||||
[http://localhost:8000](http://localhost:8000)
|
[http://localhost:8000](http://localhost:8000)
|
||||||
|
|
||||||
We can see all templates at [http://127.0.0.1:8000/docs](http://127.0.0.1:8000/docs)
|
We can see all templates at [http://127.0.0.1:8000/docs](http://127.0.0.1:8000/docs)
|
||||||
We can access the playground at [http://127.0.0.1:8000/rag-chroma-dense-retrieval/playground](http://127.0.0.1:8000/rag-chroma-dense-retrieval/playground)
|
We can access the playground at [http://127.0.0.1:8000/propositional-retrieval/playground](http://127.0.0.1:8000/propositional-retrieval/playground)
|
||||||
|
|
||||||
We can access the template from code with:
|
We can access the template from code with:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from langserve.client import RemoteRunnable
|
from langserve.client import RemoteRunnable
|
||||||
|
|
||||||
runnable = RemoteRunnable("http://localhost:8000/rag-chroma-dense-retrieval")
|
runnable = RemoteRunnable("http://localhost:8000/propositional-retrieval")
|
||||||
```
|
```
|
Before Width: | Height: | Size: 375 KiB After Width: | Height: | Size: 375 KiB |
@ -12,7 +12,7 @@
|
|||||||
"```\n",
|
"```\n",
|
||||||
"from fastapi import FastAPI\n",
|
"from fastapi import FastAPI\n",
|
||||||
"from langserve import add_routes\n",
|
"from langserve import add_routes\n",
|
||||||
"from rag_chroma_dense_retrieval import chain\n",
|
"from propositional_retrieval import chain\n",
|
||||||
"\n",
|
"\n",
|
||||||
"app = FastAPI(\n",
|
"app = FastAPI(\n",
|
||||||
" title=\"LangChain Server\",\n",
|
" title=\"LangChain Server\",\n",
|
||||||
@ -20,7 +20,7 @@
|
|||||||
" description=\"Retriever and Generator for RAG Chroma Dense Retrieval\",\n",
|
" description=\"Retriever and Generator for RAG Chroma Dense Retrieval\",\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"add_routes(app, chain, path=\"/rag-chroma-dense-retrieval\")\n",
|
"add_routes(app, chain, path=\"/propositional-retrieval\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"if __name__ == \"__main__\":\n",
|
"if __name__ == \"__main__\":\n",
|
||||||
" import uvicorn\n",
|
" import uvicorn\n",
|
||||||
@ -39,7 +39,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"from langserve.client import RemoteRunnable\n",
|
"from langserve.client import RemoteRunnable\n",
|
||||||
"\n",
|
"\n",
|
||||||
"rag_app = RemoteRunnable(\"http://localhost:8001/rag-chroma-dense-retrieval\")\n",
|
"rag_app = RemoteRunnable(\"http://localhost:8001/propositional-retrieval\")\n",
|
||||||
"rag_app.invoke(\"How are transformers related to convolutional neural networks?\")"
|
"rag_app.invoke(\"How are transformers related to convolutional neural networks?\")"
|
||||||
]
|
]
|
||||||
}
|
}
|
@ -0,0 +1,4 @@
|
|||||||
|
from propositional_retrieval.chain import chain
|
||||||
|
from propositional_retrieval.proposal_chain import proposition_chain
|
||||||
|
|
||||||
|
__all__ = ["chain", "proposition_chain"]
|
@ -5,8 +5,8 @@ from langchain_core.prompts import ChatPromptTemplate
|
|||||||
from langchain_core.pydantic_v1 import BaseModel
|
from langchain_core.pydantic_v1 import BaseModel
|
||||||
from langchain_core.runnables import RunnablePassthrough
|
from langchain_core.runnables import RunnablePassthrough
|
||||||
|
|
||||||
from rag_chroma_dense_retrieval.constants import DOCSTORE_ID_KEY
|
from propositional_retrieval.constants import DOCSTORE_ID_KEY
|
||||||
from rag_chroma_dense_retrieval.storage import get_multi_vector_retriever
|
from propositional_retrieval.storage import get_multi_vector_retriever
|
||||||
|
|
||||||
|
|
||||||
def format_docs(docs: list) -> str:
|
def format_docs(docs: list) -> str:
|
@ -2,12 +2,13 @@ import logging
|
|||||||
import uuid
|
import uuid
|
||||||
from typing import Sequence
|
from typing import Sequence
|
||||||
|
|
||||||
|
from bs4 import BeautifulSoup as Soup
|
||||||
from langchain_core.documents import Document
|
from langchain_core.documents import Document
|
||||||
from langchain_core.runnables import Runnable
|
from langchain_core.runnables import Runnable
|
||||||
|
|
||||||
from rag_chroma_dense_retrieval.constants import DOCSTORE_ID_KEY
|
from propositional_retrieval.constants import DOCSTORE_ID_KEY
|
||||||
from rag_chroma_dense_retrieval.proposal_chain import proposition_chain
|
from propositional_retrieval.proposal_chain import proposition_chain
|
||||||
from rag_chroma_dense_retrieval.storage import get_multi_vector_retriever
|
from propositional_retrieval.storage import get_multi_vector_retriever
|
||||||
|
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
||||||
@ -48,7 +49,9 @@ def create_index(
|
|||||||
"""
|
"""
|
||||||
logger.info("Creating multi-vector retriever")
|
logger.info("Creating multi-vector retriever")
|
||||||
retriever = get_multi_vector_retriever(docstore_id_key)
|
retriever = get_multi_vector_retriever(docstore_id_key)
|
||||||
propositions = indexer.batch([{"input": doc.page_content} for doc in docs])
|
propositions = indexer.batch(
|
||||||
|
[{"input": doc.page_content} for doc in docs], {"max_concurrency": 10}
|
||||||
|
)
|
||||||
|
|
||||||
add_documents(
|
add_documents(
|
||||||
retriever,
|
retriever,
|
||||||
@ -69,12 +72,15 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
# The attention is all you need paper
|
# The attention is all you need paper
|
||||||
# Could add more parsing here, as it's very raw.
|
# Could add more parsing here, as it's very raw.
|
||||||
loader = RecursiveUrlLoader("https://ar5iv.labs.arxiv.org/html/1706.03762")
|
loader = RecursiveUrlLoader(
|
||||||
|
"https://ar5iv.labs.arxiv.org/html/1706.03762",
|
||||||
|
max_depth=2,
|
||||||
|
extractor=lambda x: Soup(x, "html.parser").text,
|
||||||
|
)
|
||||||
data = loader.load()
|
data = loader.load()
|
||||||
logger.info(f"Loaded {len(data)} documents")
|
logger.info(f"Loaded {len(data)} documents")
|
||||||
|
|
||||||
# Split
|
# Split
|
||||||
|
|
||||||
text_splitter = RecursiveCharacterTextSplitter(chunk_size=8000, chunk_overlap=0)
|
text_splitter = RecursiveCharacterTextSplitter(chunk_size=8000, chunk_overlap=0)
|
||||||
all_splits = text_splitter.split_documents(data)
|
all_splits = text_splitter.split_documents(data)
|
||||||
logger.info(f"Split into {len(all_splits)} documents")
|
logger.info(f"Split into {len(all_splits)} documents")
|
@ -1,7 +1,7 @@
|
|||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "rag-chroma-dense-retrieval"
|
name = "propositional-retrieval"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
description = "Dense retrieval using vectorized propositions.s"
|
description = "Dense retrieval using vectorized propositions."
|
||||||
authors = [
|
authors = [
|
||||||
"William Fu-Hinthorn <will@langchain.dev>",
|
"William Fu-Hinthorn <will@langchain.dev>",
|
||||||
]
|
]
|
@ -1,4 +0,0 @@
|
|||||||
from rag_chroma_dense_retrieval.chain import chain
|
|
||||||
from rag_chroma_dense_retrieval.proposal_chain import proposition_chain
|
|
||||||
|
|
||||||
__all__ = ["chain", "proposition_chain"]
|
|
Loading…
Reference in New Issue
Block a user