mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-27 00:48:45 +00:00
DOCS: update rag use case (#13319)
This commit is contained in:
parent
a0b39a4325
commit
76c317ed78
@ -8,6 +8,7 @@ Notebook | Description
|
||||
[Semi_Structured_RAG.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/Semi_Structured_RAG.ipynb) | Perform retrieval-augmented generation (rag) on documents with semi-structured data, including text and tables, using unstructured for parsing, multi-vector retriever for storing, and lcel for implementing chains.
|
||||
[Semi_structured_and_multi_moda...](https://github.com/langchain-ai/langchain/tree/master/cookbook/Semi_structured_and_multi_modal_RAG.ipynb) | Perform retrieval-augmented generation (rag) on documents with semi-structured data and images, using unstructured for parsing, multi-vector retriever for storage and retrieval, and lcel for implementing chains.
|
||||
[Semi_structured_multi_modal_RA...](https://github.com/langchain-ai/langchain/tree/master/cookbook/Semi_structured_multi_modal_RAG_LLaMA2.ipynb) | Perform retrieval-augmented generation (rag) on documents with semi-structured data and images, using various tools and methods such as unstructured for parsing, multi-vector retriever for storing, lcel for implementing chains, and open source language models like llama2, llava, and gpt4all.
|
||||
[analyze_document.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/analyze_document.ipynb) | Analyze a single long document.
|
||||
[autogpt/autogpt.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/autogpt/autogpt.ipynb) | Implement autogpt, a language model, with langchain primitives such as llms, prompttemplates, vectorstores, embeddings, and tools.
|
||||
[autogpt/marathon_times.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/autogpt/marathon_times.ipynb) | Implement autogpt for finding winning marathon times.
|
||||
[baby_agi.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/baby_agi.ipynb) | Implement babyagi, an ai agent that can generate and execute tasks based on a given objective, with the flexibility to swap out specific vectorstores/model providers.
|
||||
@ -44,6 +45,7 @@ Notebook | Description
|
||||
[plan_and_execute_agent.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/plan_and_execute_agent.ipynb) | Create plan-and-execute agents that accomplish objectives by planning tasks with a language model (llm) and executing them with a separate agent.
|
||||
[press_releases.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/press_releases.ipynb) | Retrieve and query company press release data powered by [Kay.ai](https://kay.ai).
|
||||
[program_aided_language_model.i...](https://github.com/langchain-ai/langchain/tree/master/cookbook/program_aided_language_model.ipynb) | Implement program-aided language models as described in the provided research paper.
|
||||
[qa_citations.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/qa_citations.ipynb) | Different ways to get a model to cite its sources.
|
||||
[retrieval_in_sql.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/retrieval_in_sql.ipynb) | Perform retrieval-augmented-generation (rag) on a PostgreSQL database using pgvector.
|
||||
[sales_agent_with_context.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/sales_agent_with_context.ipynb) | Implement a context-aware ai sales agent, salesgpt, that can have natural sales conversations, interact with other systems, and use a product knowledge base to discuss a company's offerings.
|
||||
[self_query_hotel_search.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/self_query_hotel_search.ipynb) | Build a hotel room search feature with self-querying retrieval, using a specific hotel recommendation dataset.
|
||||
|
105
cookbook/analyze_document.ipynb
Normal file
105
cookbook/analyze_document.ipynb
Normal file
@ -0,0 +1,105 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f69d4a4c-137d-47e9-bea1-786afce9c1c0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Analyze a single long document\n",
|
||||
"\n",
|
||||
"The AnalyzeDocumentChain takes in a single document, splits it up, and then runs it through a CombineDocumentsChain."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "2a0707ce-6d2d-471b-bc33-64da32a7b3f0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with open(\"../docs/docs/modules/state_of_the_union.txt\") as f:\n",
|
||||
" state_of_the_union = f.read()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "ca14d161-2d5b-4a6c-a296-77d8ce4b28cd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains import AnalyzeDocumentChain\n",
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(model=\"gpt-3.5-turbo\", temperature=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "9f97406c-85a9-45fb-99ce-9138c0ba3731",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains.question_answering import load_qa_chain\n",
|
||||
"\n",
|
||||
"qa_chain = load_qa_chain(llm, chain_type=\"map_reduce\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "0871a753-f5bb-4b4f-a394-f87f2691f659",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"qa_document_chain = AnalyzeDocumentChain(combine_docs_chain=qa_chain)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "e6f86428-3c2c-46a0-a57c-e22826fdbf91",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The President said, \"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service.\"'"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"qa_document_chain.run(\n",
|
||||
" input_document=state_of_the_union,\n",
|
||||
" question=\"what did the president say about justice breyer?\",\n",
|
||||
")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
@ -1,44 +0,0 @@
|
||||
# Analyze a single long document
|
||||
|
||||
The AnalyzeDocumentChain takes in a single document, splits it up, and then runs it through a CombineDocumentsChain.
|
||||
|
||||
```python
|
||||
with open("../../state_of_the_union.txt") as f:
|
||||
state_of_the_union = f.read()
|
||||
```
|
||||
|
||||
```python
|
||||
from langchain.llms import OpenAI
|
||||
from langchain.chains import AnalyzeDocumentChain
|
||||
|
||||
|
||||
llm = OpenAI(temperature=0)
|
||||
```
|
||||
|
||||
|
||||
```python
|
||||
from langchain.chains.question_answering import load_qa_chain
|
||||
```
|
||||
|
||||
|
||||
```python
|
||||
qa_chain = load_qa_chain(llm, chain_type="map_reduce")
|
||||
```
|
||||
|
||||
|
||||
```python
|
||||
qa_document_chain = AnalyzeDocumentChain(combine_docs_chain=qa_chain)
|
||||
```
|
||||
|
||||
|
||||
```python
|
||||
qa_document_chain.run(input_document=state_of_the_union, question="what did the president say about justice breyer?")
|
||||
```
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
' The president thanked Justice Breyer for his service.'
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
@ -1,434 +0,0 @@
|
||||
---
|
||||
sidebar_position: 2
|
||||
---
|
||||
|
||||
# Remembering chat history
|
||||
The ConversationalRetrievalQA chain builds on RetrievalQAChain to provide a chat history component.
|
||||
|
||||
It first combines the chat history (either explicitly passed in or retrieved from the provided memory) and the question into a standalone question, then looks up relevant documents from the retriever, and finally passes those documents and the question to a question-answering chain to return a response.
|
||||
|
||||
To create one, you will need a retriever. In the below example, we will create one from a vector store, which can be created from embeddings.
|
||||
|
||||
```python
|
||||
from langchain.embeddings.openai import OpenAIEmbeddings
|
||||
from langchain.vectorstores import Chroma
|
||||
from langchain.text_splitter import CharacterTextSplitter
|
||||
from langchain.llms import OpenAI
|
||||
from langchain.chains import ConversationalRetrievalChain
|
||||
```
|
||||
|
||||
Load in documents. You can replace this with a loader for whatever type of data you want
|
||||
|
||||
```python
|
||||
from langchain.document_loaders import TextLoader
|
||||
loader = TextLoader("../../state_of_the_union.txt")
|
||||
documents = loader.load()
|
||||
```
|
||||
|
||||
If you had multiple loaders that you wanted to combine, you do something like:
|
||||
|
||||
```python
|
||||
# loaders = [....]
|
||||
# docs = []
|
||||
# for loader in loaders:
|
||||
# docs.extend(loader.load())
|
||||
```
|
||||
|
||||
We now split the documents, create embeddings for them, and put them in a vectorstore. This allows us to do semantic search over them.
|
||||
|
||||
```python
|
||||
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
||||
documents = text_splitter.split_documents(documents)
|
||||
|
||||
embeddings = OpenAIEmbeddings()
|
||||
vectorstore = Chroma.from_documents(documents, embeddings)
|
||||
```
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
Using embedded DuckDB without persistence: data will be transient
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
||||
We can now create a memory object, which is necessary to track the inputs/outputs and hold a conversation.
|
||||
|
||||
```python
|
||||
from langchain.memory import ConversationBufferMemory
|
||||
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
|
||||
```
|
||||
|
||||
We now initialize the `ConversationalRetrievalChain`
|
||||
|
||||
```python
|
||||
qa = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0), vectorstore.as_retriever(), memory=memory)
|
||||
```
|
||||
|
||||
```python
|
||||
query = "What did the president say about Ketanji Brown Jackson"
|
||||
result = qa({"question": query})
|
||||
```
|
||||
|
||||
```python
|
||||
result["answer"]
|
||||
```
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
" The president said that Ketanji Brown Jackson is one of the nation's top legal minds, a former top litigator in private practice, a former federal public defender, and from a family of public school educators and police officers. He also said that she is a consensus builder and has received a broad range of support from the Fraternal Order of Police to former judges appointed by Democrats and Republicans."
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
||||
```python
|
||||
query = "Did he mention who she succeeded"
|
||||
result = qa({"question": query})
|
||||
```
|
||||
|
||||
```python
|
||||
result['answer']
|
||||
```
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
' Ketanji Brown Jackson succeeded Justice Stephen Breyer on the United States Supreme Court.'
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
||||
## Pass in chat history
|
||||
|
||||
In the above example, we used a Memory object to track chat history. We can also just pass it in explicitly. In order to do this, we need to initialize a chain without any memory object.
|
||||
|
||||
```python
|
||||
qa = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0), vectorstore.as_retriever())
|
||||
```
|
||||
|
||||
Here's an example of asking a question with no chat history
|
||||
|
||||
```python
|
||||
chat_history = []
|
||||
query = "What did the president say about Ketanji Brown Jackson"
|
||||
result = qa({"question": query, "chat_history": chat_history})
|
||||
```
|
||||
|
||||
```python
|
||||
result["answer"]
|
||||
```
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
" The president said that Ketanji Brown Jackson is one of the nation's top legal minds, a former top litigator in private practice, a former federal public defender, and from a family of public school educators and police officers. He also said that she is a consensus builder and has received a broad range of support from the Fraternal Order of Police to former judges appointed by Democrats and Republicans."
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
||||
Here's an example of asking a question with some chat history
|
||||
|
||||
```python
|
||||
chat_history = [(query, result["answer"])]
|
||||
query = "Did he mention who she succeeded"
|
||||
result = qa({"question": query, "chat_history": chat_history})
|
||||
```
|
||||
|
||||
```python
|
||||
result['answer']
|
||||
```
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
' Ketanji Brown Jackson succeeded Justice Stephen Breyer on the United States Supreme Court.'
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
||||
## Using a different model for condensing the question
|
||||
|
||||
This chain has two steps. First, it condenses the current question and the chat history into a standalone question. This is necessary to create a standanlone vector to use for retrieval. After that, it does retrieval and then answers the question using retrieval augmented generation with a separate model. Part of the power of the declarative nature of LangChain is that you can easily use a separate language model for each call. This can be useful to use a cheaper and faster model for the simpler task of condensing the question, and then a more expensive model for answering the question. Here is an example of doing so.
|
||||
|
||||
```python
|
||||
from langchain.chat_models import ChatOpenAI
|
||||
```
|
||||
|
||||
```python
|
||||
qa = ConversationalRetrievalChain.from_llm(
|
||||
ChatOpenAI(temperature=0, model="gpt-4"),
|
||||
vectorstore.as_retriever(),
|
||||
condense_question_llm = ChatOpenAI(temperature=0, model='gpt-3.5-turbo'),
|
||||
)
|
||||
```
|
||||
|
||||
```python
|
||||
chat_history = []
|
||||
query = "What did the president say about Ketanji Brown Jackson"
|
||||
result = qa({"question": query, "chat_history": chat_history})
|
||||
```
|
||||
|
||||
```python
|
||||
chat_history = [(query, result["answer"])]
|
||||
query = "Did he mention who she succeeded"
|
||||
result = qa({"question": query, "chat_history": chat_history})
|
||||
```
|
||||
|
||||
## Using a custom prompt for condensing the question
|
||||
|
||||
By default, ConversationalRetrievalQA uses CONDENSE_QUESTION_PROMPT to condense a question. Here is the implementation of this in the docs
|
||||
|
||||
```python
|
||||
from langchain.prompts.prompt import PromptTemplate
|
||||
|
||||
_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.
|
||||
|
||||
Chat History:
|
||||
{chat_history}
|
||||
Follow Up Input: {question}
|
||||
Standalone question:"""
|
||||
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
|
||||
|
||||
```
|
||||
|
||||
But instead of this any custom template can be used to further augment information in the question or instruct the LLM to do something. Here is an example
|
||||
|
||||
```python
|
||||
from langchain.prompts.prompt import PromptTemplate
|
||||
```
|
||||
|
||||
```python
|
||||
custom_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question. At the end of standalone question add this 'Answer the question in German language.' If you do not know the answer reply with 'I am sorry'.
|
||||
Chat History:
|
||||
{chat_history}
|
||||
Follow Up Input: {question}
|
||||
Standalone question:"""
|
||||
```
|
||||
|
||||
```python
|
||||
CUSTOM_QUESTION_PROMPT = PromptTemplate.from_template(custom_template)
|
||||
```
|
||||
|
||||
```python
|
||||
model = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.3)
|
||||
embeddings = OpenAIEmbeddings()
|
||||
vectordb = Chroma(embedding_function=embeddings, persist_directory=directory)
|
||||
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
|
||||
qa = ConversationalRetrievalChain.from_llm(
|
||||
model,
|
||||
vectordb.as_retriever(),
|
||||
condense_question_prompt=CUSTOM_QUESTION_PROMPT,
|
||||
memory=memory
|
||||
)
|
||||
```
|
||||
|
||||
```python
|
||||
query = "What did the president say about Ketanji Brown Jackson"
|
||||
result = qa({"question": query})
|
||||
```
|
||||
|
||||
```python
|
||||
query = "Did he mention who she succeeded"
|
||||
result = qa({"question": query})
|
||||
```
|
||||
|
||||
## Return Source Documents
|
||||
|
||||
You can also easily return source documents from the ConversationalRetrievalChain. This is useful for when you want to inspect what documents were returned.
|
||||
|
||||
```python
|
||||
qa = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0), vectorstore.as_retriever(), return_source_documents=True)
|
||||
```
|
||||
|
||||
```python
|
||||
chat_history = []
|
||||
query = "What did the president say about Ketanji Brown Jackson"
|
||||
result = qa({"question": query, "chat_history": chat_history})
|
||||
```
|
||||
|
||||
```python
|
||||
result['source_documents'][0]
|
||||
```
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'source': '../../state_of_the_union.txt'})
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
||||
## ConversationalRetrievalChain with `search_distance`
|
||||
|
||||
If you are using a vector store that supports filtering by search distance, you can add a threshold value parameter.
|
||||
|
||||
```python
|
||||
vectordbkwargs = {"search_distance": 0.9}
|
||||
```
|
||||
|
||||
```python
|
||||
qa = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0), vectorstore.as_retriever(), return_source_documents=True)
|
||||
chat_history = []
|
||||
query = "What did the president say about Ketanji Brown Jackson"
|
||||
result = qa({"question": query, "chat_history": chat_history, "vectordbkwargs": vectordbkwargs})
|
||||
```
|
||||
|
||||
## ConversationalRetrievalChain with `map_reduce`
|
||||
|
||||
We can also use different types of combine document chains with the ConversationalRetrievalChain chain.
|
||||
|
||||
```python
|
||||
from langchain.chains import LLMChain
|
||||
from langchain.chains.question_answering import load_qa_chain
|
||||
from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT
|
||||
```
|
||||
|
||||
```python
|
||||
llm = OpenAI(temperature=0)
|
||||
question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
|
||||
doc_chain = load_qa_chain(llm, chain_type="map_reduce")
|
||||
|
||||
chain = ConversationalRetrievalChain(
|
||||
retriever=vectorstore.as_retriever(),
|
||||
question_generator=question_generator,
|
||||
combine_docs_chain=doc_chain,
|
||||
)
|
||||
```
|
||||
|
||||
```python
|
||||
chat_history = []
|
||||
query = "What did the president say about Ketanji Brown Jackson"
|
||||
result = chain({"question": query, "chat_history": chat_history})
|
||||
```
|
||||
|
||||
```python
|
||||
result['answer']
|
||||
```
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
" The president said that Ketanji Brown Jackson is one of the nation's top legal minds, a former top litigator in private practice, a former federal public defender, from a family of public school educators and police officers, a consensus builder, and has received a broad range of support from the Fraternal Order of Police to former judges appointed by Democrats and Republicans."
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
||||
## ConversationalRetrievalChain with Question Answering with sources
|
||||
|
||||
You can also use this chain with the question answering with sources chain.
|
||||
|
||||
```python
|
||||
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
|
||||
```
|
||||
|
||||
```python
|
||||
llm = OpenAI(temperature=0)
|
||||
question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
|
||||
doc_chain = load_qa_with_sources_chain(llm, chain_type="map_reduce")
|
||||
|
||||
chain = ConversationalRetrievalChain(
|
||||
retriever=vectorstore.as_retriever(),
|
||||
question_generator=question_generator,
|
||||
combine_docs_chain=doc_chain,
|
||||
)
|
||||
```
|
||||
|
||||
```python
|
||||
chat_history = []
|
||||
query = "What did the president say about Ketanji Brown Jackson"
|
||||
result = chain({"question": query, "chat_history": chat_history})
|
||||
```
|
||||
|
||||
```python
|
||||
result['answer']
|
||||
```
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
" The president said that Ketanji Brown Jackson is one of the nation's top legal minds, a former top litigator in private practice, a former federal public defender, from a family of public school educators and police officers, a consensus builder, and has received a broad range of support from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \nSOURCES: ../../state_of_the_union.txt"
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
||||
## ConversationalRetrievalChain with streaming to `stdout`
|
||||
|
||||
Output from the chain will be streamed to `stdout` token by token in this example.
|
||||
|
||||
```python
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
||||
from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT, QA_PROMPT
|
||||
from langchain.chains.question_answering import load_qa_chain
|
||||
|
||||
# Construct a ConversationalRetrievalChain with a streaming llm for combine docs
|
||||
# and a separate, non-streaming llm for question generation
|
||||
llm = OpenAI(temperature=0)
|
||||
streaming_llm = OpenAI(streaming=True, callbacks=[StreamingStdOutCallbackHandler()], temperature=0)
|
||||
|
||||
question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
|
||||
doc_chain = load_qa_chain(streaming_llm, chain_type="stuff", prompt=QA_PROMPT)
|
||||
|
||||
qa = ConversationalRetrievalChain(
|
||||
retriever=vectorstore.as_retriever(), combine_docs_chain=doc_chain, question_generator=question_generator)
|
||||
```
|
||||
|
||||
```python
|
||||
chat_history = []
|
||||
query = "What did the president say about Ketanji Brown Jackson"
|
||||
result = qa({"question": query, "chat_history": chat_history})
|
||||
```
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
The president said that Ketanji Brown Jackson is one of the nation's top legal minds, a former top litigator in private practice, a former federal public defender, and from a family of public school educators and police officers. He also said that she is a consensus builder and has received a broad range of support from the Fraternal Order of Police to former judges appointed by Democrats and Republicans.
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
||||
```python
|
||||
chat_history = [(query, result["answer"])]
|
||||
query = "Did he mention who she succeeded"
|
||||
result = qa({"question": query, "chat_history": chat_history})
|
||||
```
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
Ketanji Brown Jackson succeeded Justice Stephen Breyer on the United States Supreme Court.
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
||||
## get_chat_history Function
|
||||
|
||||
You can also specify a `get_chat_history` function, which can be used to format the chat_history string.
|
||||
|
||||
```python
|
||||
def get_chat_history(inputs) -> str:
|
||||
res = []
|
||||
for human, ai in inputs:
|
||||
res.append(f"Human:{human}\nAI:{ai}")
|
||||
return "\n".join(res)
|
||||
qa = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0), vectorstore.as_retriever(), get_chat_history=get_chat_history)
|
||||
```
|
||||
|
||||
```python
|
||||
chat_history = []
|
||||
query = "What did the president say about Ketanji Brown Jackson"
|
||||
result = qa({"question": query, "chat_history": chat_history})
|
||||
```
|
||||
|
||||
```python
|
||||
result['answer']
|
||||
```
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
" The president said that Ketanji Brown Jackson is one of the nation's top legal minds, a former top litigator in private practice, a former federal public defender, and from a family of public school educators and police officers. He also said that she is a consensus builder and has received a broad range of support from the Fraternal Order of Police to former judges appointed by Democrats and Republicans."
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
@ -367,7 +367,7 @@
|
||||
"source": [
|
||||
"from langchain.callbacks.manager import CallbackManager\n",
|
||||
"from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n",
|
||||
"from langchain.chains import ConversationalRetrievalChain\n",
|
||||
"from langchain.chains import ConversationalRetrievalChain, LLMChain\n",
|
||||
"from langchain.llms import LlamaCpp\n",
|
||||
"from langchain.memory import ConversationSummaryMemory\n",
|
||||
"from langchain.prompts import PromptTemplate"
|
||||
|
@ -5,7 +5,7 @@
|
||||
"id": "839f3c76",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Agent with retrieval tool\n",
|
||||
"# RAG with Agents\n",
|
||||
"\n",
|
||||
"This is an agent specifically optimized for doing retrieval when necessary and also holding a conversation.\n",
|
||||
"\n",
|
||||
|
@ -1,448 +0,0 @@
|
||||
# RAG over in-memory documents
|
||||
|
||||
Here we walk through how to use LangChain for question answering over a list of documents. Under the hood we'll be using our [Document chains](/docs/modules/chains/document/).
|
||||
|
||||
## Prepare Data
|
||||
First we prepare the data. For this example we do similarity search over a vector database, but these documents could be fetched in any manner (the point of this notebook to highlight what to do AFTER you fetch the documents).
|
||||
|
||||
|
||||
```python
|
||||
from langchain.embeddings.openai import OpenAIEmbeddings
|
||||
from langchain.text_splitter import CharacterTextSplitter
|
||||
from langchain.vectorstores import Chroma
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.prompts import PromptTemplate
|
||||
from langchain.indexes.vectorstore import VectorstoreIndexCreator
|
||||
```
|
||||
|
||||
|
||||
```python
|
||||
with open("../../state_of_the_union.txt") as f:
|
||||
state_of_the_union = f.read()
|
||||
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
||||
texts = text_splitter.split_text(state_of_the_union)
|
||||
|
||||
embeddings = OpenAIEmbeddings()
|
||||
```
|
||||
|
||||
|
||||
```python
|
||||
docsearch = Chroma.from_texts(texts, embeddings, metadatas=[{"source": str(i)} for i in range(len(texts))]).as_retriever()
|
||||
```
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
Running Chroma using direct local API.
|
||||
Using DuckDB in-memory for database. Data will be transient.
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
||||
|
||||
```python
|
||||
query = "What did the president say about Justice Breyer"
|
||||
docs = docsearch.get_relevant_documents(query)
|
||||
```
|
||||
|
||||
|
||||
```python
|
||||
from langchain.chains.question_answering import load_qa_chain
|
||||
from langchain.llms import OpenAI
|
||||
```
|
||||
|
||||
## Quickstart
|
||||
If you just want to get started as quickly as possible, this is the recommended way to do it:
|
||||
|
||||
|
||||
```python
|
||||
chain = load_qa_chain(OpenAI(temperature=0), chain_type="stuff")
|
||||
query = "What did the president say about Justice Breyer"
|
||||
chain.run(input_documents=docs, question=query)
|
||||
```
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
' The president said that Justice Breyer has dedicated his life to serve the country and thanked him for his service.'
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
||||
If you want more control and understanding over what is happening, please see the information below.
|
||||
|
||||
## The `stuff` Chain
|
||||
|
||||
This sections shows results of using the `stuff` Chain to do question answering.
|
||||
|
||||
|
||||
```python
|
||||
chain = load_qa_chain(OpenAI(temperature=0), chain_type="stuff")
|
||||
```
|
||||
|
||||
|
||||
```python
|
||||
query = "What did the president say about Justice Breyer"
|
||||
chain({"input_documents": docs, "question": query}, return_only_outputs=True)
|
||||
```
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
{'output_text': ' The president said that Justice Breyer has dedicated his life to serve the country and thanked him for his service.'}
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
||||
**Custom Prompts**
|
||||
|
||||
You can also use your own prompts with this chain. In this example, we will respond in Italian.
|
||||
|
||||
|
||||
```python
|
||||
prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
||||
|
||||
{context}
|
||||
|
||||
Question: {question}
|
||||
Answer in Italian:"""
|
||||
PROMPT = PromptTemplate(
|
||||
template=prompt_template, input_variables=["context", "question"]
|
||||
)
|
||||
chain = load_qa_chain(OpenAI(temperature=0), chain_type="stuff", prompt=PROMPT)
|
||||
chain({"input_documents": docs, "question": query}, return_only_outputs=True)
|
||||
```
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
{'output_text': ' Il presidente ha detto che Justice Breyer ha dedicato la sua vita a servire questo paese e ha ricevuto una vasta gamma di supporto.'}
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
||||
## The `map_reduce` Chain
|
||||
|
||||
This sections shows results of using the `map_reduce` Chain to do question answering.
|
||||
|
||||
|
||||
```python
|
||||
chain = load_qa_chain(OpenAI(temperature=0), chain_type="map_reduce")
|
||||
```
|
||||
|
||||
|
||||
```python
|
||||
query = "What did the president say about Justice Breyer"
|
||||
chain({"input_documents": docs, "question": query}, return_only_outputs=True)
|
||||
```
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
{'output_text': ' The president said that Justice Breyer is an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court, and thanked him for his service.'}
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
||||
**Intermediate Steps**
|
||||
|
||||
We can also return the intermediate steps for `map_reduce` chains, should we want to inspect them. This is done with the `return_map_steps` variable.
|
||||
|
||||
|
||||
```python
|
||||
chain = load_qa_chain(OpenAI(temperature=0), chain_type="map_reduce", return_map_steps=True)
|
||||
```
|
||||
|
||||
|
||||
```python
|
||||
chain({"input_documents": docs, "question": query}, return_only_outputs=True)
|
||||
```
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
{'intermediate_steps': [' "Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service."',
|
||||
' A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans.',
|
||||
' None',
|
||||
' None'],
|
||||
'output_text': ' The president said that Justice Breyer is an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court, and thanked him for his service.'}
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
||||
**Custom Prompts**
|
||||
|
||||
You can also use your own prompts with this chain. In this example, we will respond in Italian.
|
||||
|
||||
|
||||
```python
|
||||
question_prompt_template = """Use the following portion of a long document to see if any of the text is relevant to answer the question.
|
||||
Return any relevant text translated into italian.
|
||||
{context}
|
||||
Question: {question}
|
||||
Relevant text, if any, in Italian:"""
|
||||
QUESTION_PROMPT = PromptTemplate(
|
||||
template=question_prompt_template, input_variables=["context", "question"]
|
||||
)
|
||||
|
||||
combine_prompt_template = """Given the following extracted parts of a long document and a question, create a final answer italian.
|
||||
If you don't know the answer, just say that you don't know. Don't try to make up an answer.
|
||||
|
||||
QUESTION: {question}
|
||||
=========
|
||||
{summaries}
|
||||
=========
|
||||
Answer in Italian:"""
|
||||
COMBINE_PROMPT = PromptTemplate(
|
||||
template=combine_prompt_template, input_variables=["summaries", "question"]
|
||||
)
|
||||
chain = load_qa_chain(OpenAI(temperature=0), chain_type="map_reduce", return_map_steps=True, question_prompt=QUESTION_PROMPT, combine_prompt=COMBINE_PROMPT)
|
||||
chain({"input_documents": docs, "question": query}, return_only_outputs=True)
|
||||
```
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
{'intermediate_steps': ["\nStasera vorrei onorare qualcuno che ha dedicato la sua vita a servire questo paese: il giustizia Stephen Breyer - un veterano dell'esercito, uno studioso costituzionale e un giustizia in uscita della Corte Suprema degli Stati Uniti. Giustizia Breyer, grazie per il tuo servizio.",
|
||||
'\nNessun testo pertinente.',
|
||||
' Non ha detto nulla riguardo a Justice Breyer.',
|
||||
" Non c'è testo pertinente."],
|
||||
'output_text': ' Non ha detto nulla riguardo a Justice Breyer.'}
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
||||
**Batch Size**
|
||||
|
||||
When using the `map_reduce` chain, one thing to keep in mind is the batch size you are using during the map step. If this is too high, it could cause rate limiting errors. You can control this by setting the batch size on the LLM used. Note that this only applies for LLMs with this parameter. Below is an example of doing so:
|
||||
|
||||
```python
|
||||
llm = OpenAI(batch_size=5, temperature=0)
|
||||
```
|
||||
|
||||
## The `refine` Chain
|
||||
|
||||
This sections shows results of using the `refine` Chain to do question answering.
|
||||
|
||||
|
||||
```python
|
||||
chain = load_qa_chain(OpenAI(temperature=0), chain_type="refine")
|
||||
```
|
||||
|
||||
|
||||
```python
|
||||
query = "What did the president say about Justice Breyer"
|
||||
chain({"input_documents": docs, "question": query}, return_only_outputs=True)
|
||||
```
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
{'output_text': '\n\nThe president said that he wanted to honor Justice Breyer for his dedication to serving the country, his legacy of excellence, and his commitment to advancing liberty and justice, as well as for his support of the Equality Act and his commitment to protecting the rights of LGBTQ+ Americans. He also praised Justice Breyer for his role in helping to pass the Bipartisan Infrastructure Law, which he said would be the most sweeping investment to rebuild America in history and would help the country compete for the jobs of the 21st Century.'}
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
||||
**Intermediate Steps**
|
||||
|
||||
We can also return the intermediate steps for `refine` chains, should we want to inspect them. This is done with the `return_refine_steps` variable.
|
||||
|
||||
|
||||
```python
|
||||
chain = load_qa_chain(OpenAI(temperature=0), chain_type="refine", return_refine_steps=True)
|
||||
```
|
||||
|
||||
|
||||
```python
|
||||
chain({"input_documents": docs, "question": query}, return_only_outputs=True)
|
||||
```
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
{'intermediate_steps': ['\nThe president said that he wanted to honor Justice Breyer for his dedication to serving the country and his legacy of excellence.',
|
||||
'\nThe president said that he wanted to honor Justice Breyer for his dedication to serving the country, his legacy of excellence, and his commitment to advancing liberty and justice.',
|
||||
'\n\nThe president said that he wanted to honor Justice Breyer for his dedication to serving the country, his legacy of excellence, and his commitment to advancing liberty and justice, as well as for his support of the Equality Act and his commitment to protecting the rights of LGBTQ+ Americans.',
|
||||
'\n\nThe president said that he wanted to honor Justice Breyer for his dedication to serving the country, his legacy of excellence, and his commitment to advancing liberty and justice, as well as for his support of the Equality Act and his commitment to protecting the rights of LGBTQ+ Americans. He also praised Justice Breyer for his role in helping to pass the Bipartisan Infrastructure Law, which is the most sweeping investment to rebuild America in history.'],
|
||||
'output_text': '\n\nThe president said that he wanted to honor Justice Breyer for his dedication to serving the country, his legacy of excellence, and his commitment to advancing liberty and justice, as well as for his support of the Equality Act and his commitment to protecting the rights of LGBTQ+ Americans. He also praised Justice Breyer for his role in helping to pass the Bipartisan Infrastructure Law, which is the most sweeping investment to rebuild America in history.'}
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
||||
**Custom Prompts**
|
||||
|
||||
You can also use your own prompts with this chain. In this example, we will respond in Italian.
|
||||
|
||||
|
||||
```python
|
||||
refine_prompt_template = (
|
||||
"The original question is as follows: {question}\n"
|
||||
"We have provided an existing answer: {existing_answer}\n"
|
||||
"We have the opportunity to refine the existing answer"
|
||||
"(only if needed) with some more context below.\n"
|
||||
"------------\n"
|
||||
"{context_str}\n"
|
||||
"------------\n"
|
||||
"Given the new context, refine the original answer to better "
|
||||
"answer the question. "
|
||||
"If the context isn't useful, return the original answer. Reply in Italian."
|
||||
)
|
||||
refine_prompt = PromptTemplate(
|
||||
input_variables=["question", "existing_answer", "context_str"],
|
||||
template=refine_prompt_template,
|
||||
)
|
||||
|
||||
|
||||
initial_qa_template = (
|
||||
"Context information is below. \n"
|
||||
"---------------------\n"
|
||||
"{context_str}"
|
||||
"\n---------------------\n"
|
||||
"Given the context information and not prior knowledge, "
|
||||
"answer the question: {question}\nYour answer should be in Italian.\n"
|
||||
)
|
||||
initial_qa_prompt = PromptTemplate(
|
||||
input_variables=["context_str", "question"], template=initial_qa_template
|
||||
)
|
||||
chain = load_qa_chain(OpenAI(temperature=0), chain_type="refine", return_refine_steps=True,
|
||||
question_prompt=initial_qa_prompt, refine_prompt=refine_prompt)
|
||||
chain({"input_documents": docs, "question": query}, return_only_outputs=True)
|
||||
```
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
{'intermediate_steps': ['\nIl presidente ha detto che Justice Breyer ha dedicato la sua vita al servizio di questo paese e ha reso omaggio al suo servizio.',
|
||||
"\nIl presidente ha detto che Justice Breyer ha dedicato la sua vita al servizio di questo paese, ha reso omaggio al suo servizio e ha sostenuto la nomina di una top litigatrice in pratica privata, un ex difensore pubblico federale e una famiglia di insegnanti e agenti di polizia delle scuole pubbliche. Ha anche sottolineato l'importanza di avanzare la libertà e la giustizia attraverso la sicurezza delle frontiere e la risoluzione del sistema di immigrazione.",
|
||||
"\nIl presidente ha detto che Justice Breyer ha dedicato la sua vita al servizio di questo paese, ha reso omaggio al suo servizio e ha sostenuto la nomina di una top litigatrice in pratica privata, un ex difensore pubblico federale e una famiglia di insegnanti e agenti di polizia delle scuole pubbliche. Ha anche sottolineato l'importanza di avanzare la libertà e la giustizia attraverso la sicurezza delle frontiere, la risoluzione del sistema di immigrazione, la protezione degli americani LGBTQ+ e l'approvazione dell'Equality Act. Ha inoltre sottolineato l'importanza di lavorare insieme per sconfiggere l'epidemia di oppiacei.",
|
||||
"\n\nIl presidente ha detto che Justice Breyer ha dedicato la sua vita al servizio di questo paese, ha reso omaggio al suo servizio e ha sostenuto la nomina di una top litigatrice in pratica privata, un ex difensore pubblico federale e una famiglia di insegnanti e agenti di polizia delle scuole pubbliche. Ha anche sottolineato l'importanza di avanzare la libertà e la giustizia attraverso la sicurezza delle frontiere, la risoluzione del sistema di immigrazione, la protezione degli americani LGBTQ+ e l'approvazione dell'Equality Act. Ha inoltre sottolineato l'importanza di lavorare insieme per sconfiggere l'epidemia di oppiacei e per investire in America, educare gli americani, far crescere la forza lavoro e costruire l'economia dal"],
|
||||
'output_text': "\n\nIl presidente ha detto che Justice Breyer ha dedicato la sua vita al servizio di questo paese, ha reso omaggio al suo servizio e ha sostenuto la nomina di una top litigatrice in pratica privata, un ex difensore pubblico federale e una famiglia di insegnanti e agenti di polizia delle scuole pubbliche. Ha anche sottolineato l'importanza di avanzare la libertà e la giustizia attraverso la sicurezza delle frontiere, la risoluzione del sistema di immigrazione, la protezione degli americani LGBTQ+ e l'approvazione dell'Equality Act. Ha inoltre sottolineato l'importanza di lavorare insieme per sconfiggere l'epidemia di oppiacei e per investire in America, educare gli americani, far crescere la forza lavoro e costruire l'economia dal"}
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
||||
## The `map-rerank` Chain
|
||||
|
||||
This sections shows results of using the `map-rerank` Chain to do question answering with sources.
|
||||
|
||||
|
||||
```python
|
||||
chain = load_qa_chain(OpenAI(temperature=0), chain_type="map_rerank", return_intermediate_steps=True)
|
||||
```
|
||||
|
||||
|
||||
```python
|
||||
query = "What did the president say about Justice Breyer"
|
||||
results = chain({"input_documents": docs, "question": query}, return_only_outputs=True)
|
||||
```
|
||||
|
||||
|
||||
```python
|
||||
results["output_text"]
|
||||
```
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
' The President thanked Justice Breyer for his service and honored him for dedicating his life to serve the country.'
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
||||
|
||||
```python
|
||||
results["intermediate_steps"]
|
||||
```
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
[{'answer': ' The President thanked Justice Breyer for his service and honored him for dedicating his life to serve the country.',
|
||||
'score': '100'},
|
||||
{'answer': ' This document does not answer the question', 'score': '0'},
|
||||
{'answer': ' This document does not answer the question', 'score': '0'},
|
||||
{'answer': ' This document does not answer the question', 'score': '0'}]
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
||||
**Custom Prompts**
|
||||
|
||||
You can also use your own prompts with this chain. In this example, we will respond in Italian.
|
||||
|
||||
|
||||
```python
|
||||
from langchain.output_parsers import RegexParser
|
||||
|
||||
output_parser = RegexParser(
|
||||
regex=r"(.*?)\nScore: (.*)",
|
||||
output_keys=["answer", "score"],
|
||||
)
|
||||
|
||||
prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
||||
|
||||
In addition to giving an answer, also return a score of how fully it answered the user's question. This should be in the following format:
|
||||
|
||||
Question: [question here]
|
||||
Helpful Answer In Italian: [answer here]
|
||||
Score: [score between 0 and 100]
|
||||
|
||||
Begin!
|
||||
|
||||
Context:
|
||||
---------
|
||||
{context}
|
||||
---------
|
||||
Question: {question}
|
||||
Helpful Answer In Italian:"""
|
||||
PROMPT = PromptTemplate(
|
||||
template=prompt_template,
|
||||
input_variables=["context", "question"],
|
||||
output_parser=output_parser,
|
||||
)
|
||||
|
||||
chain = load_qa_chain(OpenAI(temperature=0), chain_type="map_rerank", return_intermediate_steps=True, prompt=PROMPT)
|
||||
query = "What did the president say about Justice Breyer"
|
||||
chain({"input_documents": docs, "question": query}, return_only_outputs=True)
|
||||
```
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
{'intermediate_steps': [{'answer': ' Il presidente ha detto che Justice Breyer ha dedicato la sua vita a servire questo paese.',
|
||||
'score': '100'},
|
||||
{'answer': ' Il presidente non ha detto nulla sulla Giustizia Breyer.',
|
||||
'score': '100'},
|
||||
{'answer': ' Non so.', 'score': '0'},
|
||||
{'answer': ' Non so.', 'score': '0'}],
|
||||
'output_text': ' Il presidente ha detto che Justice Breyer ha dedicato la sua vita a servire questo paese.'}
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
||||
## Document QA with sources
|
||||
|
||||
We can also perform document QA and return the sources that were used to answer the question. To do this we'll just need to make sure each document has a "source" key in the metadata, and we'll use the `load_qa_with_sources` helper to construct our chain:
|
||||
|
||||
```python
|
||||
docsearch = Chroma.from_texts(texts, embeddings, metadatas=[{"source": str(i)} for i in range(len(texts))])
|
||||
query = "What did the president say about Justice Breyer"
|
||||
docs = docsearch.similarity_search(query)
|
||||
```
|
||||
|
||||
```python
|
||||
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
|
||||
|
||||
chain = load_qa_with_sources_chain(OpenAI(temperature=0), chain_type="stuff")
|
||||
query = "What did the president say about Justice Breyer"
|
||||
chain({"input_documents": docs, "question": query}, return_only_outputs=True)
|
||||
```
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
{'output_text': ' The president thanked Justice Breyer for his service.\nSOURCES: 30-pl'}
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,128 +0,0 @@
|
||||
# Dynamically select from multiple retrievers
|
||||
|
||||
This notebook demonstrates how to use the `RouterChain` paradigm to create a chain that dynamically selects which Retrieval system to use. Specifically we show how to use the `MultiRetrievalQAChain` to create a question-answering chain that selects the retrieval QA chain which is most relevant for a given question, and then answers the question using it.
|
||||
|
||||
```python
|
||||
from langchain.chains.router import MultiRetrievalQAChain
|
||||
from langchain.llms import OpenAI
|
||||
```
|
||||
|
||||
|
||||
```python
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
from langchain.document_loaders import TextLoader
|
||||
from langchain.vectorstores import FAISS
|
||||
|
||||
sou_docs = TextLoader('../../state_of_the_union.txt').load_and_split()
|
||||
sou_retriever = FAISS.from_documents(sou_docs, OpenAIEmbeddings()).as_retriever()
|
||||
|
||||
pg_docs = TextLoader('../../paul_graham_essay.txt').load_and_split()
|
||||
pg_retriever = FAISS.from_documents(pg_docs, OpenAIEmbeddings()).as_retriever()
|
||||
|
||||
personal_texts = [
|
||||
"I love apple pie",
|
||||
"My favorite color is fuchsia",
|
||||
"My dream is to become a professional dancer",
|
||||
"I broke my arm when I was 12",
|
||||
"My parents are from Peru",
|
||||
]
|
||||
personal_retriever = FAISS.from_texts(personal_texts, OpenAIEmbeddings()).as_retriever()
|
||||
```
|
||||
|
||||
|
||||
```python
|
||||
retriever_infos = [
|
||||
{
|
||||
"name": "state of the union",
|
||||
"description": "Good for answering questions about the 2023 State of the Union address",
|
||||
"retriever": sou_retriever
|
||||
},
|
||||
{
|
||||
"name": "pg essay",
|
||||
"description": "Good for answering questions about Paul Graham's essay on his career",
|
||||
"retriever": pg_retriever
|
||||
},
|
||||
{
|
||||
"name": "personal",
|
||||
"description": "Good for answering questions about me",
|
||||
"retriever": personal_retriever
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
|
||||
```python
|
||||
chain = MultiRetrievalQAChain.from_retrievers(OpenAI(), retriever_infos, verbose=True)
|
||||
```
|
||||
|
||||
|
||||
```python
|
||||
print(chain.run("What did the president say about the economy?"))
|
||||
```
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
|
||||
|
||||
> Entering new MultiRetrievalQAChain chain...
|
||||
state of the union: {'query': 'What did the president say about the economy in the 2023 State of the Union address?'}
|
||||
> Finished chain.
|
||||
The president said that the economy was stronger than it had been a year prior, and that the American Rescue Plan helped create record job growth and fuel economic relief for millions of Americans. He also proposed a plan to fight inflation and lower costs for families, including cutting the cost of prescription drugs and energy, providing investments and tax credits for energy efficiency, and increasing access to child care and Pre-K.
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
||||
|
||||
```python
|
||||
print(chain.run("What is something Paul Graham regrets about his work?"))
|
||||
```
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
|
||||
|
||||
> Entering new MultiRetrievalQAChain chain...
|
||||
pg essay: {'query': 'What is something Paul Graham regrets about his work?'}
|
||||
> Finished chain.
|
||||
Paul Graham regrets that he did not take a vacation after selling his company, instead of immediately starting to paint.
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
||||
|
||||
```python
|
||||
print(chain.run("What is my background?"))
|
||||
```
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
|
||||
|
||||
> Entering new MultiRetrievalQAChain chain...
|
||||
personal: {'query': 'What is my background?'}
|
||||
> Finished chain.
|
||||
Your background is Peruvian.
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
||||
|
||||
```python
|
||||
print(chain.run("What year was the Internet created in?"))
|
||||
```
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
|
||||
|
||||
> Entering new MultiRetrievalQAChain chain...
|
||||
None: {'query': 'What year was the Internet created in?'}
|
||||
> Finished chain.
|
||||
The Internet was created in 1969 through a project called ARPANET, which was funded by the United States Department of Defense. However, the World Wide Web, which is often confused with the Internet, was created in 1989 by British computer scientist Tim Berners-Lee.
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
@ -1,173 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "66398b75",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Retrieving from multiple sources\n",
|
||||
"\n",
|
||||
"Often times you may want to do retrieval over multiple sources. These can be different vectorstores (where one contains information about topic X and the other contains info about topic Y). They could also be completely different databases altogether!\n",
|
||||
"\n",
|
||||
"A key part is is doing as much of the retrieval in parallel as possible. This will keep the latency as low as possible. Luckily, [LangChain Expression Language](../../) supports parallelism out of the box.\n",
|
||||
"\n",
|
||||
"Let's take a look where we do retrieval over a SQL database and a vectorstore."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "1c5bab6a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "43a6210f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Set up SQL query"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "ab3bf8ba",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains import create_sql_query_chain\n",
|
||||
"from langchain.utilities import SQLDatabase\n",
|
||||
"\n",
|
||||
"db = SQLDatabase.from_uri(\"sqlite:///../../../../../notebooks/Chinook.db\")\n",
|
||||
"query_chain = create_sql_query_chain(ChatOpenAI(temperature=0), db)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a8585120",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Set up vectorstore"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "b916b0b0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.indexes import VectorstoreIndexCreator\n",
|
||||
"from langchain.schema.document import Document\n",
|
||||
"\n",
|
||||
"index_creator = VectorstoreIndexCreator()\n",
|
||||
"index = index_creator.from_documents([Document(page_content=\"Foo\")])\n",
|
||||
"retriever = index.vectorstore.as_retriever()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a3b91816",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Combine"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "4423211c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.prompts import ChatPromptTemplate\n",
|
||||
"\n",
|
||||
"system_message = \"\"\"Use the information from the below two sources to answer any questions.\n",
|
||||
"\n",
|
||||
"Source 1: a SQL database about employee data\n",
|
||||
"<source1>\n",
|
||||
"{source1}\n",
|
||||
"</source1>\n",
|
||||
"\n",
|
||||
"Source 2: a text database of random information\n",
|
||||
"<source2>\n",
|
||||
"{source2}\n",
|
||||
"</source2>\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"prompt = ChatPromptTemplate.from_messages(\n",
|
||||
" [(\"system\", system_message), (\"human\", \"{question}\")]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "7ff87e0c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"full_chain = (\n",
|
||||
" {\n",
|
||||
" \"source1\": {\"question\": lambda x: x[\"question\"]} | query_chain | db.run,\n",
|
||||
" \"source2\": (lambda x: x[\"question\"]) | retriever,\n",
|
||||
" \"question\": lambda x: x[\"question\"],\n",
|
||||
" }\n",
|
||||
" | prompt\n",
|
||||
" | ChatOpenAI()\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "d6706410",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Number of requested results 4 is greater than number of elements in index 1, updating n_results = 1\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"content='There are 8 employees.' additional_kwargs={} example=False\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"response = full_chain.invoke({\"question\": \"How many Employees are there\"})\n",
|
||||
"print(response)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
@ -1,230 +0,0 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
---
|
||||
# Using a Retriever
|
||||
|
||||
This example showcases question answering over an index.
|
||||
|
||||
```python
|
||||
from langchain.chains import RetrievalQA
|
||||
from langchain.document_loaders import TextLoader
|
||||
from langchain.embeddings.openai import OpenAIEmbeddings
|
||||
from langchain.llms import OpenAI
|
||||
from langchain.text_splitter import CharacterTextSplitter
|
||||
from langchain.vectorstores import Chroma
|
||||
```
|
||||
|
||||
|
||||
```python
|
||||
loader = TextLoader("../../state_of_the_union.txt")
|
||||
documents = loader.load()
|
||||
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
||||
texts = text_splitter.split_documents(documents)
|
||||
|
||||
embeddings = OpenAIEmbeddings()
|
||||
docsearch = Chroma.from_documents(texts, embeddings)
|
||||
|
||||
qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=docsearch.as_retriever())
|
||||
```
|
||||
|
||||
|
||||
```python
|
||||
query = "What did the president say about Ketanji Brown Jackson"
|
||||
qa.run(query)
|
||||
```
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
" The president said that she is one of the nation's top legal minds, a former top litigator in private practice, a former federal public defender, and from a family of public school educators and police officers. He also said that she is a consensus builder and has received a broad range of support, from the Fraternal Order of Police to former judges appointed by Democrats and Republicans."
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
||||
## Chain Type
|
||||
You can easily specify different chain types to load and use in the RetrievalQA chain. For a more detailed walkthrough of these types, please see [this notebook](/docs/modules/chains/additional/question_answering).
|
||||
|
||||
There are two ways to load different chain types. First, you can specify the chain type argument in the `from_chain_type` method. This allows you to pass in the name of the chain type you want to use. For example, in the below we change the chain type to `map_reduce`.
|
||||
|
||||
|
||||
```python
|
||||
qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="map_reduce", retriever=docsearch.as_retriever())
|
||||
```
|
||||
|
||||
|
||||
```python
|
||||
query = "What did the president say about Ketanji Brown Jackson"
|
||||
qa.run(query)
|
||||
```
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
" The president said that Judge Ketanji Brown Jackson is one of our nation's top legal minds, a former top litigator in private practice and a former federal public defender, from a family of public school educators and police officers, a consensus builder and has received a broad range of support from the Fraternal Order of Police to former judges appointed by Democrats and Republicans."
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
||||
The above way allows you to really simply change the chain_type, but it doesn't provide a ton of flexibility over parameters to that chain type. If you want to control those parameters, you can load the chain directly (as you did in [this notebook](/docs/modules/chains/additional/question_answering)) and then pass that directly to the RetrievalQA chain with the `combine_documents_chain` parameter. For example:
|
||||
|
||||
|
||||
```python
|
||||
from langchain.chains.question_answering import load_qa_chain
|
||||
qa_chain = load_qa_chain(OpenAI(temperature=0), chain_type="stuff")
|
||||
qa = RetrievalQA(combine_documents_chain=qa_chain, retriever=docsearch.as_retriever())
|
||||
```
|
||||
|
||||
|
||||
```python
|
||||
query = "What did the president say about Ketanji Brown Jackson"
|
||||
qa.run(query)
|
||||
```
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
" The president said that Ketanji Brown Jackson is one of the nation's top legal minds, a former top litigator in private practice, a former federal public defender, and from a family of public school educators and police officers. He also said that she is a consensus builder and has received a broad range of support from the Fraternal Order of Police to former judges appointed by Democrats and Republicans."
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
||||
## Custom Prompts
|
||||
You can pass in custom prompts to do question answering. These prompts are the same prompts as you can pass into the [base question answering chain](/docs/modules/chains/additional/question_answering)
|
||||
|
||||
|
||||
```python
|
||||
from langchain.prompts import PromptTemplate
|
||||
prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
||||
|
||||
{context}
|
||||
|
||||
Question: {question}
|
||||
Answer in Italian:"""
|
||||
PROMPT = PromptTemplate(
|
||||
template=prompt_template, input_variables=["context", "question"]
|
||||
)
|
||||
```
|
||||
|
||||
|
||||
```python
|
||||
chain_type_kwargs = {"prompt": PROMPT}
|
||||
qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=docsearch.as_retriever(), chain_type_kwargs=chain_type_kwargs)
|
||||
```
|
||||
|
||||
|
||||
```python
|
||||
query = "What did the president say about Ketanji Brown Jackson"
|
||||
qa.run(query)
|
||||
```
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
" Il presidente ha detto che Ketanji Brown Jackson è una delle menti legali più importanti del paese, che continuerà l'eccellenza di Justice Breyer e che ha ricevuto un ampio sostegno, da Fraternal Order of Police a ex giudici nominati da democratici e repubblicani."
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
||||
## Vectorstore Retriever Options
|
||||
You can adjust how documents are retrieved from your vectorstore depending on the specific task.
|
||||
|
||||
There are two main ways to retrieve documents relevant to a query- Similarity Search and Max Marginal Relevance Search (MMR Search). Similarity Search is the default, but you can use MMR by adding the `search_type` parameter:
|
||||
|
||||
```python
|
||||
docsearch.as_retriever(search_type="mmr")
|
||||
```
|
||||
|
||||
You can also modify the search by passing specific search arguments through the retriever to the search function, using the `search_kwargs` keyword argument.
|
||||
|
||||
- `k` defines how many documents are returned; defaults to 4.
|
||||
- `score_threshold` allows you to set a minimum relevance for documents returned by the retriever, if you are using the "similarity_score_threshold" search type.
|
||||
- `fetch_k` determines the amount of documents to pass to the MMR algorithm; defaults to 20.
|
||||
- `lambda_mult` controls the diversity of results returned by the MMR algorithm, with 1 being minimum diversity and 0 being maximum. Defaults to 0.5.
|
||||
- `filter` allows you to define a filter on what documents should be retrieved, based on the documents' metadata. This has no effect if the Vectorstore doesn't store any metadata.
|
||||
|
||||
Some examples for how these parameters can be used:
|
||||
```python
|
||||
# Retrieve more documents with higher diversity- useful if your dataset has many similar documents
|
||||
docsearch.as_retriever(search_type="mmr", search_kwargs={'k': 6, 'lambda_mult': 0.25})
|
||||
|
||||
# Fetch more documents for the MMR algorithm to consider, but only return the top 5
|
||||
docsearch.as_retriever(search_type="mmr", search_kwargs={'k': 5, 'fetch_k': 50})
|
||||
|
||||
# Only retrieve documents that have a relevance score above a certain threshold
|
||||
docsearch.as_retriever(search_type="similarity_score_threshold", search_kwargs={'score_threshold': 0.8})
|
||||
|
||||
# Only get the single most similar document from the dataset
|
||||
docsearch.as_retriever(search_kwargs={'k': 1})
|
||||
|
||||
# Use a filter to only retrieve documents from a specific paper
|
||||
docsearch.as_retriever(search_kwargs={'filter': {'paper_title':'GPT-4 Technical Report'}})
|
||||
```
|
||||
|
||||
## Return Source Documents
|
||||
Additionally, we can return the source documents used to answer the question by specifying an optional parameter when constructing the chain.
|
||||
|
||||
|
||||
```python
|
||||
qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=docsearch.as_retriever(search_type="mmr", search_kwargs={'fetch_k': 30}), return_source_documents=True)
|
||||
```
|
||||
|
||||
|
||||
```python
|
||||
query = "What did the president say about Ketanji Brown Jackson"
|
||||
result = qa({"query": query})
|
||||
```
|
||||
|
||||
|
||||
```python
|
||||
result["result"]
|
||||
```
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
" The president said that Ketanji Brown Jackson is one of the nation's top legal minds, a former top litigator in private practice and a former federal public defender from a family of public school educators and police officers, and that she has received a broad range of support from the Fraternal Order of Police to former judges appointed by Democrats and Republicans."
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
||||
|
||||
```python
|
||||
result["source_documents"]
|
||||
```
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
[Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', lookup_str='', metadata={'source': '../../state_of_the_union.txt'}, lookup_index=0),
|
||||
Document(page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \n\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \n\nWe can do both. At our border, we’ve installed new technology like cutting-edge scanners to better detect drug smuggling. \n\nWe’ve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \n\nWe’re putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \n\nWe’re securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.', lookup_str='', metadata={'source': '../../state_of_the_union.txt'}, lookup_index=0),
|
||||
Document(page_content='And for our LGBTQ+ Americans, let’s finally get the bipartisan Equality Act to my desk. The onslaught of state laws targeting transgender Americans and their families is wrong. \n\nAs I said last year, especially to our younger transgender Americans, I will always have your back as your President, so you can be yourself and reach your God-given potential. \n\nWhile it often appears that we never agree, that isn’t true. I signed 80 bipartisan bills into law last year. From preventing government shutdowns to protecting Asian-Americans from still-too-common hate crimes to reforming military justice. \n\nAnd soon, we’ll strengthen the Violence Against Women Act that I first wrote three decades ago. It is important for us to show the nation that we can come together and do big things. \n\nSo tonight I’m offering a Unity Agenda for the Nation. Four big things we can do together. \n\nFirst, beat the opioid epidemic.', lookup_str='', metadata={'source': '../../state_of_the_union.txt'}, lookup_index=0),
|
||||
Document(page_content='Tonight, I’m announcing a crackdown on these companies overcharging American businesses and consumers. \n\nAnd as Wall Street firms take over more nursing homes, quality in those homes has gone down and costs have gone up. \n\nThat ends on my watch. \n\nMedicare is going to set higher standards for nursing homes and make sure your loved ones get the care they deserve and expect. \n\nWe’ll also cut costs and keep the economy going strong by giving workers a fair shot, provide more training and apprenticeships, hire them based on their skills not degrees. \n\nLet’s pass the Paycheck Fairness Act and paid leave. \n\nRaise the minimum wage to $15 an hour and extend the Child Tax Credit, so no one has to raise a family in poverty. \n\nLet’s increase Pell Grants and increase our historic support of HBCUs, and invest in what Jill—our First Lady who teaches full-time—calls America’s best-kept secret: community colleges.', lookup_str='', metadata={'source': '../../state_of_the_union.txt'}, lookup_index=0)]
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
||||
Alternatively, if our document have a "source" metadata key, we can use the `RetrievalQAWithSourcesChain` to cite our sources:
|
||||
|
||||
```python
|
||||
docsearch = Chroma.from_texts(texts, embeddings, metadatas=[{"source": f"{i}-pl"} for i in range(len(texts))])
|
||||
```
|
||||
|
||||
```python
|
||||
from langchain.chains import RetrievalQAWithSourcesChain
|
||||
from langchain.llms import OpenAI
|
||||
|
||||
chain = RetrievalQAWithSourcesChain.from_chain_type(OpenAI(temperature=0), chain_type="stuff", retriever=docsearch.as_retriever())
|
||||
```
|
||||
|
||||
```python
|
||||
chain({"question": "What did the president say about Justice Breyer"}, return_only_outputs=True)
|
||||
```
|
||||
|
||||
<CodeOutputBlock lang="python">
|
||||
|
||||
```
|
||||
{'answer': ' The president honored Justice Breyer for his service and mentioned his legacy of excellence.\n',
|
||||
'sources': '31-pl'}
|
||||
```
|
||||
|
||||
</CodeOutputBlock>
|
@ -1,199 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Retrieve from vector stores directly\n",
|
||||
"\n",
|
||||
"This notebook walks through how to use LangChain for text generation over a vector index. This is useful if we want to generate text that is able to draw from a large body of custom text, for example, generating blog posts that have an understanding of previous blog posts written, or product tutorials that can refer to product documentation."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Prepare Data\n",
|
||||
"\n",
|
||||
"First, we prepare the data. For this example, we fetch a documentation site that consists of markdown files hosted on Github and split them into small enough Documents."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pathlib\n",
|
||||
"import subprocess\n",
|
||||
"import tempfile\n",
|
||||
"\n",
|
||||
"from langchain.docstore.document import Document\n",
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain.prompts import PromptTemplate\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain.vectorstores import Chroma"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Cloning into '.'...\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"def get_github_docs(repo_owner, repo_name):\n",
|
||||
" with tempfile.TemporaryDirectory() as d:\n",
|
||||
" subprocess.check_call(\n",
|
||||
" f\"git clone --depth 1 https://github.com/{repo_owner}/{repo_name}.git .\",\n",
|
||||
" cwd=d,\n",
|
||||
" shell=True,\n",
|
||||
" )\n",
|
||||
" git_sha = (\n",
|
||||
" subprocess.check_output(\"git rev-parse HEAD\", shell=True, cwd=d)\n",
|
||||
" .decode(\"utf-8\")\n",
|
||||
" .strip()\n",
|
||||
" )\n",
|
||||
" repo_path = pathlib.Path(d)\n",
|
||||
" markdown_files = list(repo_path.glob(\"*/*.md\")) + list(\n",
|
||||
" repo_path.glob(\"*/*.mdx\")\n",
|
||||
" )\n",
|
||||
" for markdown_file in markdown_files:\n",
|
||||
" with open(markdown_file, \"r\") as f:\n",
|
||||
" relative_path = markdown_file.relative_to(repo_path)\n",
|
||||
" github_url = f\"https://github.com/{repo_owner}/{repo_name}/blob/{git_sha}/{relative_path}\"\n",
|
||||
" yield Document(page_content=f.read(), metadata={\"source\": github_url})\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"sources = get_github_docs(\"yirenlu92\", \"deno-manual-forked\")\n",
|
||||
"\n",
|
||||
"source_chunks = []\n",
|
||||
"splitter = CharacterTextSplitter(separator=\" \", chunk_size=1024, chunk_overlap=0)\n",
|
||||
"for source in sources:\n",
|
||||
" for chunk in splitter.split_text(source.page_content):\n",
|
||||
" source_chunks.append(Document(page_content=chunk, metadata=source.metadata))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Set Up Vector DB\n",
|
||||
"\n",
|
||||
"Now that we have the documentation content in chunks, let's put all this information in a vector index for easy retrieval."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"search_index = Chroma.from_documents(source_chunks, OpenAIEmbeddings())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Set Up LLM Chain with Custom Prompt\n",
|
||||
"\n",
|
||||
"Next, let's set up a simple LLM chain but give it a custom prompt for blog post generation. Note that the custom prompt is parameterized and takes two inputs: `context`, which will be the documents fetched from the vector search, and `topic`, which is given by the user."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains import LLMChain\n",
|
||||
"\n",
|
||||
"prompt_template = \"\"\"Use the context below to write a 400 word blog post about the topic below:\n",
|
||||
" Context: {context}\n",
|
||||
" Topic: {topic}\n",
|
||||
" Blog post:\"\"\"\n",
|
||||
"\n",
|
||||
"PROMPT = PromptTemplate(template=prompt_template, input_variables=[\"context\", \"topic\"])\n",
|
||||
"\n",
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"\n",
|
||||
"chain = LLMChain(llm=llm, prompt=PROMPT)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Generate Text\n",
|
||||
"\n",
|
||||
"Finally, we write a function to apply our inputs to the chain. The function takes an input parameter `topic`. We find the documents in the vector index that correspond to that `topic`, and use them as additional context in our simple LLM chain."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def generate_blog_post(topic):\n",
|
||||
" docs = search_index.similarity_search(topic, k=4)\n",
|
||||
" inputs = [{\"context\": doc.page_content, \"topic\": topic} for doc in docs]\n",
|
||||
" print(chain.apply(inputs))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[{'text': '\\n\\nEnvironment variables are a great way to store and access sensitive information in your Deno applications. Deno offers built-in support for environment variables with `Deno.env`, and you can also use a `.env` file to store and access environment variables.\\n\\nUsing `Deno.env` is simple. It has getter and setter methods, so you can easily set and retrieve environment variables. For example, you can set the `FIREBASE_API_KEY` and `FIREBASE_AUTH_DOMAIN` environment variables like this:\\n\\n```ts\\nDeno.env.set(\"FIREBASE_API_KEY\", \"examplekey123\");\\nDeno.env.set(\"FIREBASE_AUTH_DOMAIN\", \"firebasedomain.com\");\\n\\nconsole.log(Deno.env.get(\"FIREBASE_API_KEY\")); // examplekey123\\nconsole.log(Deno.env.get(\"FIREBASE_AUTH_DOMAIN\")); // firebasedomain.com\\n```\\n\\nYou can also store environment variables in a `.env` file. This is a great'}, {'text': '\\n\\nEnvironment variables are a powerful tool for managing configuration settings in a program. They allow us to set values that can be used by the program, without having to hard-code them into the code. This makes it easier to change settings without having to modify the code.\\n\\nIn Deno, environment variables can be set in a few different ways. The most common way is to use the `VAR=value` syntax. This will set the environment variable `VAR` to the value `value`. This can be used to set any number of environment variables before running a command. For example, if we wanted to set the environment variable `VAR` to `hello` before running a Deno command, we could do so like this:\\n\\n```\\nVAR=hello deno run main.ts\\n```\\n\\nThis will set the environment variable `VAR` to `hello` before running the command. We can then access this variable in our code using the `Deno.env.get()` function. For example, if we ran the following command:\\n\\n```\\nVAR=hello && deno eval \"console.log(\\'Deno: \\' + Deno.env.get(\\'VAR'}, {'text': '\\n\\nEnvironment variables are a powerful tool for developers, allowing them to store and access data without having to hard-code it into their applications. In Deno, you can access environment variables using the `Deno.env.get()` function.\\n\\nFor example, if you wanted to access the `HOME` environment variable, you could do so like this:\\n\\n```js\\n// env.js\\nDeno.env.get(\"HOME\");\\n```\\n\\nWhen running this code, you\\'ll need to grant the Deno process access to environment variables. This can be done by passing the `--allow-env` flag to the `deno run` command. You can also specify which environment variables you want to grant access to, like this:\\n\\n```shell\\n# Allow access to only the HOME env var\\ndeno run --allow-env=HOME env.js\\n```\\n\\nIt\\'s important to note that environment variables are case insensitive on Windows, so Deno also matches them case insensitively (on Windows only).\\n\\nAnother thing to be aware of when using environment variables is subprocess permissions. Subprocesses are powerful and can access system resources regardless of the permissions you granted to the Den'}, {'text': '\\n\\nEnvironment variables are an important part of any programming language, and Deno is no exception. Deno is a secure JavaScript and TypeScript runtime built on the V8 JavaScript engine, and it recently added support for environment variables. This feature was added in Deno version 1.6.0, and it is now available for use in Deno applications.\\n\\nEnvironment variables are used to store information that can be used by programs. They are typically used to store configuration information, such as the location of a database or the name of a user. In Deno, environment variables are stored in the `Deno.env` object. This object is similar to the `process.env` object in Node.js, and it allows you to access and set environment variables.\\n\\nThe `Deno.env` object is a read-only object, meaning that you cannot directly modify the environment variables. Instead, you must use the `Deno.env.set()` function to set environment variables. This function takes two arguments: the name of the environment variable and the value to set it to. For example, if you wanted to set the `FOO` environment variable to `bar`, you would use the following code:\\n\\n```'}]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"generate_blog_post(\"environment variables\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
@ -1,5 +1,37 @@
|
||||
{
|
||||
"redirects": [
|
||||
{
|
||||
"source": "/docs/use_cases/question_answering/analyze_document",
|
||||
"destination": "/cookbook"
|
||||
},
|
||||
{
|
||||
"source": "/docs/use_cases/question_answering/qa_citations",
|
||||
"destination": "/cookbook"
|
||||
},
|
||||
{
|
||||
"source": "/docs/use_cases/question_answering/chat_vector_db",
|
||||
"destination": "/docs/use_cases/question_answering/"
|
||||
},
|
||||
{
|
||||
"source": "/docs/use_cases/question_answering/in_memory_question_answering",
|
||||
"destination": "/docs/use_cases/question_answering/"
|
||||
},
|
||||
{
|
||||
"source": "/docs/use_cases/question_answering/multi_retrieval_qa_router",
|
||||
"destination": "/docs/use_cases/question_answering/"
|
||||
},
|
||||
{
|
||||
"source": "/docs/use_cases/question_answering/multiple_retrieval",
|
||||
"destination": "/docs/use_cases/question_answering/"
|
||||
},
|
||||
{
|
||||
"source": "/docs/use_cases/question_answering/vector_db_qa",
|
||||
"destination": "/docs/use_cases/question_answering/"
|
||||
},
|
||||
{
|
||||
"source": "/docs/use_cases/question_answering/vector_db_text_generation",
|
||||
"destination": "/docs/use_cases/question_answering/"
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/agents/toolkits(/?)",
|
||||
"destination": "/docs/modules/agents/tools/toolkits"
|
||||
@ -170,7 +202,7 @@
|
||||
},
|
||||
{
|
||||
"source": "/docs/use_cases/question_answering/how_to/chat_vector_db",
|
||||
"destination": "/docs/use_cases/question_answering/chat_vector_db"
|
||||
"destination": "/docs/use_cases/question_answering/"
|
||||
},
|
||||
{
|
||||
"source": "/docs/use_cases/code_understanding",
|
||||
@ -202,7 +234,7 @@
|
||||
},
|
||||
{
|
||||
"source": "/docs/use_cases/question_answering/how_to/qa_citations",
|
||||
"destination": "/docs/use_cases/question_answering/qa_citations"
|
||||
"destination": "/cookbook"
|
||||
},
|
||||
{
|
||||
"source": "/docs/use_cases/question_answering/how_to/question_answering",
|
||||
@ -3686,11 +3718,11 @@
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/chains/additional/analyze_document",
|
||||
"destination": "/docs/use_cases/question_answering/analyze_document"
|
||||
"destination": "/cookbook"
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/chains/popular/chat_vector_db",
|
||||
"destination": "/docs/use_cases/question_answering/chat_vector_db"
|
||||
"destination": "/docs/use_cases/question_answering/"
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/chains/additional/multi_retrieval_qa_router",
|
||||
@ -3830,7 +3862,7 @@
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/chains/additional/qa_citations",
|
||||
"destination": "/docs/use_cases/question_answering/qa_citations"
|
||||
"destination": "/cookbook"
|
||||
},
|
||||
{
|
||||
"source": "/docs/modules/chains/additional/vector_db_text_generation",
|
||||
|
@ -245,7 +245,7 @@ class WebBaseLoader(BaseLoader):
|
||||
def lazy_load(self) -> Iterator[Document]:
|
||||
"""Lazy load text from the url(s) in web_path."""
|
||||
for path in self.web_paths:
|
||||
soup = self._scrape(path)
|
||||
soup = self._scrape(path, bs_kwargs=self.bs_kwargs)
|
||||
text = soup.get_text(**self.bs_get_text_kwargs)
|
||||
metadata = _build_metadata(soup, path)
|
||||
yield Document(page_content=text, metadata=metadata)
|
||||
|
Loading…
Reference in New Issue
Block a user