From 76c317ed78b70c6f444fb8c2db8a650866de0486 Mon Sep 17 00:00:00 2001
From: Bagatur <22008038+baskaryan@users.noreply.github.com>
Date: Wed, 15 Nov 2023 10:54:15 -0800
Subject: [PATCH] DOCS: update rag use case (#13319)

---
 cookbook/README.md                            |    2 +
 cookbook/analyze_document.ipynb               |  105 ++
 .../qa_citations.ipynb                        |    0
 .../question_answering/analyze_document.mdx   |   44 -
 .../question_answering/chat_vector_db.mdx     |  434 -------
 .../code_understanding.ipynb                  |    2 +-
 .../conversational_retrieval_agents.ipynb     |    2 +-
 .../in_memory_question_answering.mdx          |  448 -------
 .../use_cases/question_answering/index.ipynb  | 1076 +++++++++++++----
 .../multi_retrieval_qa_router.mdx             |  128 --
 .../multiple_retrieval.ipynb                  |  173 ---
 .../question_answering/vector_db_qa.mdx       |  230 ----
 .../vector_db_text_generation.ipynb           |  199 ---
 docs/vercel.json                              |   42 +-
 .../langchain/document_loaders/web_base.py    |    2 +-
 15 files changed, 968 insertions(+), 1919 deletions(-)
 create mode 100644 cookbook/analyze_document.ipynb
 rename {docs/docs/use_cases/question_answering => cookbook}/qa_citations.ipynb (100%)
 delete mode 100644 docs/docs/use_cases/question_answering/analyze_document.mdx
 delete mode 100644 docs/docs/use_cases/question_answering/chat_vector_db.mdx
 delete mode 100644 docs/docs/use_cases/question_answering/in_memory_question_answering.mdx
 delete mode 100644 docs/docs/use_cases/question_answering/multi_retrieval_qa_router.mdx
 delete mode 100644 docs/docs/use_cases/question_answering/multiple_retrieval.ipynb
 delete mode 100644 docs/docs/use_cases/question_answering/vector_db_qa.mdx
 delete mode 100644 docs/docs/use_cases/question_answering/vector_db_text_generation.ipynb

diff --git a/cookbook/README.md b/cookbook/README.md
index 9130c629dae..55c3a1cfd8a 100644
--- a/cookbook/README.md
+++ b/cookbook/README.md
@@ -8,6 +8,7 @@ Notebook | Description
 [Semi_Structured_RAG.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/Semi_Structured_RAG.ipynb) | Perform retrieval-augmented generation (rag) on documents with semi-structured data, including text and tables, using unstructured for parsing, multi-vector retriever for storing, and lcel for implementing chains.
 [Semi_structured_and_multi_moda...](https://github.com/langchain-ai/langchain/tree/master/cookbook/Semi_structured_and_multi_modal_RAG.ipynb) | Perform retrieval-augmented generation (rag) on documents with semi-structured data and images, using unstructured for parsing, multi-vector retriever for storage and retrieval, and lcel for implementing chains.
 [Semi_structured_multi_modal_RA...](https://github.com/langchain-ai/langchain/tree/master/cookbook/Semi_structured_multi_modal_RAG_LLaMA2.ipynb) | Perform retrieval-augmented generation (rag) on documents with semi-structured data and images, using various tools and methods such as unstructured for parsing, multi-vector retriever for storing, lcel for implementing chains, and open source language models like llama2, llava, and gpt4all.
+[analyze_document.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/analyze_document.ipynb) | Analyze a single long document.
 [autogpt/autogpt.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/autogpt/autogpt.ipynb) | Implement autogpt, a language model, with langchain primitives such as llms, prompttemplates, vectorstores, embeddings, and tools.
 [autogpt/marathon_times.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/autogpt/marathon_times.ipynb) | Implement autogpt for finding winning marathon times.
 [baby_agi.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/baby_agi.ipynb) | Implement babyagi, an ai agent that can generate and execute tasks based on a given objective, with the flexibility to swap out specific vectorstores/model providers.
@@ -44,6 +45,7 @@ Notebook | Description
 [plan_and_execute_agent.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/plan_and_execute_agent.ipynb) | Create plan-and-execute agents that accomplish objectives by planning tasks with a language model (llm) and executing them with a separate agent.
 [press_releases.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/press_releases.ipynb) | Retrieve and query company press release data powered by [Kay.ai](https://kay.ai).
 [program_aided_language_model.i...](https://github.com/langchain-ai/langchain/tree/master/cookbook/program_aided_language_model.ipynb) | Implement program-aided language models as described in the provided research paper.
+[qa_citations.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/qa_citations.ipynb) | Different ways to get a model to cite its sources.
 [retrieval_in_sql.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/retrieval_in_sql.ipynb) | Perform retrieval-augmented-generation (rag) on a PostgreSQL database using pgvector.
 [sales_agent_with_context.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/sales_agent_with_context.ipynb) | Implement a context-aware ai sales agent, salesgpt, that can have natural sales conversations, interact with other systems, and use a product knowledge base to discuss a company's offerings.
 [self_query_hotel_search.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/self_query_hotel_search.ipynb) | Build a hotel room search feature with self-querying retrieval, using a specific hotel recommendation dataset.
diff --git a/cookbook/analyze_document.ipynb b/cookbook/analyze_document.ipynb
new file mode 100644
index 00000000000..9bfc43918a6
--- /dev/null
+++ b/cookbook/analyze_document.ipynb
@@ -0,0 +1,105 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "f69d4a4c-137d-47e9-bea1-786afce9c1c0",
+   "metadata": {},
+   "source": [
+    "# Analyze a single long document\n",
+    "\n",
+    "The AnalyzeDocumentChain takes in a single document, splits it up, and then runs it through a CombineDocumentsChain."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "2a0707ce-6d2d-471b-bc33-64da32a7b3f0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open(\"../docs/docs/modules/state_of_the_union.txt\") as f:\n",
+    "    state_of_the_union = f.read()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "ca14d161-2d5b-4a6c-a296-77d8ce4b28cd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.chains import AnalyzeDocumentChain\n",
+    "from langchain.chat_models import ChatOpenAI\n",
+    "\n",
+    "llm = ChatOpenAI(model=\"gpt-3.5-turbo\", temperature=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "9f97406c-85a9-45fb-99ce-9138c0ba3731",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.chains.question_answering import load_qa_chain\n",
+    "\n",
+    "qa_chain = load_qa_chain(llm, chain_type=\"map_reduce\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "0871a753-f5bb-4b4f-a394-f87f2691f659",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "qa_document_chain = AnalyzeDocumentChain(combine_docs_chain=qa_chain)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "e6f86428-3c2c-46a0-a57c-e22826fdbf91",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'The President said, \"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service.\"'"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "qa_document_chain.run(\n",
+    "    input_document=state_of_the_union,\n",
+    "    question=\"what did the president say about justice breyer?\",\n",
+    ")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/docs/use_cases/question_answering/qa_citations.ipynb b/cookbook/qa_citations.ipynb
similarity index 100%
rename from docs/docs/use_cases/question_answering/qa_citations.ipynb
rename to cookbook/qa_citations.ipynb
diff --git a/docs/docs/use_cases/question_answering/analyze_document.mdx b/docs/docs/use_cases/question_answering/analyze_document.mdx
deleted file mode 100644
index a0715e0881f..00000000000
--- a/docs/docs/use_cases/question_answering/analyze_document.mdx
+++ /dev/null
@@ -1,44 +0,0 @@
-# Analyze a single long document
-
-The AnalyzeDocumentChain takes in a single document, splits it up, and then runs it through a CombineDocumentsChain.
-
-```python
-with open("../../state_of_the_union.txt") as f:
-    state_of_the_union = f.read()
-```
-
-```python
-from langchain.llms import OpenAI
-from langchain.chains import AnalyzeDocumentChain
-
-
-llm = OpenAI(temperature=0)
-```
-
-
-```python
-from langchain.chains.question_answering import load_qa_chain
-```
-
-
-```python
-qa_chain = load_qa_chain(llm, chain_type="map_reduce")
-```
-
-
-```python
-qa_document_chain = AnalyzeDocumentChain(combine_docs_chain=qa_chain)
-```
-
-
-```python
-qa_document_chain.run(input_document=state_of_the_union, question="what did the president say about justice breyer?")
-```
-
-<CodeOutputBlock lang="python">
-
-```
-    ' The president thanked Justice Breyer for his service.'
-```
-
-</CodeOutputBlock>
diff --git a/docs/docs/use_cases/question_answering/chat_vector_db.mdx b/docs/docs/use_cases/question_answering/chat_vector_db.mdx
deleted file mode 100644
index eb078946eca..00000000000
--- a/docs/docs/use_cases/question_answering/chat_vector_db.mdx
+++ /dev/null
@@ -1,434 +0,0 @@
----
-sidebar_position: 2
----
-
-# Remembering chat history
-The ConversationalRetrievalQA chain builds on RetrievalQAChain to provide a chat history component.
-
-It first combines the chat history (either explicitly passed in or retrieved from the provided memory) and the question into a standalone question, then looks up relevant documents from the retriever, and finally passes those documents and the question to a question-answering chain to return a response.
-
-To create one, you will need a retriever. In the below example, we will create one from a vector store, which can be created from embeddings.
-
-```python
-from langchain.embeddings.openai import OpenAIEmbeddings
-from langchain.vectorstores import Chroma
-from langchain.text_splitter import CharacterTextSplitter
-from langchain.llms import OpenAI
-from langchain.chains import ConversationalRetrievalChain
-```
-
-Load in documents. You can replace this with a loader for whatever type of data you want
-
-```python
-from langchain.document_loaders import TextLoader
-loader = TextLoader("../../state_of_the_union.txt")
-documents = loader.load()
-```
-
-If you had multiple loaders that you wanted to combine, you do something like:
-
-```python
-# loaders = [....]
-# docs = []
-# for loader in loaders:
-#     docs.extend(loader.load())
-```
-
-We now split the documents, create embeddings for them, and put them in a vectorstore. This allows us to do semantic search over them.
-
-```python
-text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
-documents = text_splitter.split_documents(documents)
-
-embeddings = OpenAIEmbeddings()
-vectorstore = Chroma.from_documents(documents, embeddings)
-```
-
-<CodeOutputBlock lang="python">
-
-```
-    Using embedded DuckDB without persistence: data will be transient
-```
-
-</CodeOutputBlock>
-
-We can now create a memory object, which is necessary to track the inputs/outputs and hold a conversation.
-
-```python
-from langchain.memory import ConversationBufferMemory
-memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
-```
-
-We now initialize the `ConversationalRetrievalChain`
-
-```python
-qa = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0), vectorstore.as_retriever(), memory=memory)
-```
-
-```python
-query = "What did the president say about Ketanji Brown Jackson"
-result = qa({"question": query})
-```
-
-```python
-result["answer"]
-```
-
-<CodeOutputBlock lang="python">
-
-```
-    " The president said that Ketanji Brown Jackson is one of the nation's top legal minds, a former top litigator in private practice, a former federal public defender, and from a family of public school educators and police officers. He also said that she is a consensus builder and has received a broad range of support from the Fraternal Order of Police to former judges appointed by Democrats and Republicans."
-```
-
-</CodeOutputBlock>
-
-```python
-query = "Did he mention who she succeeded"
-result = qa({"question": query})
-```
-
-```python
-result['answer']
-```
-
-<CodeOutputBlock lang="python">
-
-```
-    ' Ketanji Brown Jackson succeeded Justice Stephen Breyer on the United States Supreme Court.'
-```
-
-</CodeOutputBlock>
-
-## Pass in chat history
-
-In the above example, we used a Memory object to track chat history. We can also just pass it in explicitly. In order to do this, we need to initialize a chain without any memory object.
-
-```python
-qa = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0), vectorstore.as_retriever())
-```
-
-Here's an example of asking a question with no chat history
-
-```python
-chat_history = []
-query = "What did the president say about Ketanji Brown Jackson"
-result = qa({"question": query, "chat_history": chat_history})
-```
-
-```python
-result["answer"]
-```
-
-<CodeOutputBlock lang="python">
-
-```
-    " The president said that Ketanji Brown Jackson is one of the nation's top legal minds, a former top litigator in private practice, a former federal public defender, and from a family of public school educators and police officers. He also said that she is a consensus builder and has received a broad range of support from the Fraternal Order of Police to former judges appointed by Democrats and Republicans."
-```
-
-</CodeOutputBlock>
-
-Here's an example of asking a question with some chat history
-
-```python
-chat_history = [(query, result["answer"])]
-query = "Did he mention who she succeeded"
-result = qa({"question": query, "chat_history": chat_history})
-```
-
-```python
-result['answer']
-```
-
-<CodeOutputBlock lang="python">
-
-```
-    ' Ketanji Brown Jackson succeeded Justice Stephen Breyer on the United States Supreme Court.'
-```
-
-</CodeOutputBlock>
-
-## Using a different model for condensing the question
-
-This chain has two steps. First, it condenses the current question and the chat history into a standalone question. This is necessary to create a standanlone vector to use for retrieval. After that, it does retrieval and then answers the question using retrieval augmented generation with a separate model. Part of the power of the declarative nature of LangChain is that you can easily use a separate language model for each call. This can be useful to use a cheaper and faster model for the simpler task of condensing the question, and then a more expensive model for answering the question. Here is an example of doing so.
-
-```python
-from langchain.chat_models import ChatOpenAI
-```
-
-```python
-qa = ConversationalRetrievalChain.from_llm(
-    ChatOpenAI(temperature=0, model="gpt-4"),
-    vectorstore.as_retriever(),
-    condense_question_llm = ChatOpenAI(temperature=0, model='gpt-3.5-turbo'),
-)
-```
-
-```python
-chat_history = []
-query = "What did the president say about Ketanji Brown Jackson"
-result = qa({"question": query, "chat_history": chat_history})
-```
-
-```python
-chat_history = [(query, result["answer"])]
-query = "Did he mention who she succeeded"
-result = qa({"question": query, "chat_history": chat_history})
-```
-
-## Using a custom prompt for condensing the question
-
-By default, ConversationalRetrievalQA uses CONDENSE_QUESTION_PROMPT to condense a question. Here is the implementation of this in the docs
-
-```python
-from langchain.prompts.prompt import PromptTemplate
-
-_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.
-
-Chat History:
-{chat_history}
-Follow Up Input: {question}
-Standalone question:"""
-CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
-
-```
-
-But instead of this any custom template can be used to further augment information in the question or instruct the LLM to do something. Here is an example
-
-```python
-from langchain.prompts.prompt import PromptTemplate
-```
-
-```python
-custom_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question. At the end of standalone question add this 'Answer the question in German language.' If you do not know the answer reply with 'I am sorry'.
-Chat History:
-{chat_history}
-Follow Up Input: {question}
-Standalone question:"""
-```
-
-```python
-CUSTOM_QUESTION_PROMPT = PromptTemplate.from_template(custom_template)
-```
-
-```python
-model = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.3)
-embeddings = OpenAIEmbeddings()
-vectordb = Chroma(embedding_function=embeddings, persist_directory=directory)
-memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
-qa = ConversationalRetrievalChain.from_llm(
-    model,
-    vectordb.as_retriever(),
-    condense_question_prompt=CUSTOM_QUESTION_PROMPT,
-    memory=memory
-)
-```
-
-```python
-query = "What did the president say about Ketanji Brown Jackson"
-result = qa({"question": query})
-```
-
-```python
-query = "Did he mention who she succeeded"
-result = qa({"question": query})
-```
-
-## Return Source Documents
-
-You can also easily return source documents from the ConversationalRetrievalChain. This is useful for when you want to inspect what documents were returned.
-
-```python
-qa = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0), vectorstore.as_retriever(), return_source_documents=True)
-```
-
-```python
-chat_history = []
-query = "What did the president say about Ketanji Brown Jackson"
-result = qa({"question": query, "chat_history": chat_history})
-```
-
-```python
-result['source_documents'][0]
-```
-
-<CodeOutputBlock lang="python">
-
-```
-    Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'source': '../../state_of_the_union.txt'})
-```
-
-</CodeOutputBlock>
-
-## ConversationalRetrievalChain with `search_distance`
-
-If you are using a vector store that supports filtering by search distance, you can add a threshold value parameter.
-
-```python
-vectordbkwargs = {"search_distance": 0.9}
-```
-
-```python
-qa = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0), vectorstore.as_retriever(), return_source_documents=True)
-chat_history = []
-query = "What did the president say about Ketanji Brown Jackson"
-result = qa({"question": query, "chat_history": chat_history, "vectordbkwargs": vectordbkwargs})
-```
-
-## ConversationalRetrievalChain with `map_reduce`
-
-We can also use different types of combine document chains with the ConversationalRetrievalChain chain.
-
-```python
-from langchain.chains import LLMChain
-from langchain.chains.question_answering import load_qa_chain
-from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT
-```
-
-```python
-llm = OpenAI(temperature=0)
-question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
-doc_chain = load_qa_chain(llm, chain_type="map_reduce")
-
-chain = ConversationalRetrievalChain(
-    retriever=vectorstore.as_retriever(),
-    question_generator=question_generator,
-    combine_docs_chain=doc_chain,
-)
-```
-
-```python
-chat_history = []
-query = "What did the president say about Ketanji Brown Jackson"
-result = chain({"question": query, "chat_history": chat_history})
-```
-
-```python
-result['answer']
-```
-
-<CodeOutputBlock lang="python">
-
-```
-    " The president said that Ketanji Brown Jackson is one of the nation's top legal minds, a former top litigator in private practice, a former federal public defender, from a family of public school educators and police officers, a consensus builder, and has received a broad range of support from the Fraternal Order of Police to former judges appointed by Democrats and Republicans."
-```
-
-</CodeOutputBlock>
-
-## ConversationalRetrievalChain with Question Answering with sources
-
-You can also use this chain with the question answering with sources chain.
-
-```python
-from langchain.chains.qa_with_sources import load_qa_with_sources_chain
-```
-
-```python
-llm = OpenAI(temperature=0)
-question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
-doc_chain = load_qa_with_sources_chain(llm, chain_type="map_reduce")
-
-chain = ConversationalRetrievalChain(
-    retriever=vectorstore.as_retriever(),
-    question_generator=question_generator,
-    combine_docs_chain=doc_chain,
-)
-```
-
-```python
-chat_history = []
-query = "What did the president say about Ketanji Brown Jackson"
-result = chain({"question": query, "chat_history": chat_history})
-```
-
-```python
-result['answer']
-```
-
-<CodeOutputBlock lang="python">
-
-```
-    " The president said that Ketanji Brown Jackson is one of the nation's top legal minds, a former top litigator in private practice, a former federal public defender, from a family of public school educators and police officers, a consensus builder, and has received a broad range of support from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \nSOURCES: ../../state_of_the_union.txt"
-```
-
-</CodeOutputBlock>
-
-## ConversationalRetrievalChain with streaming to `stdout`
-
-Output from the chain will be streamed to `stdout` token by token in this example.
-
-```python
-from langchain.chains.llm import LLMChain
-from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
-from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT, QA_PROMPT
-from langchain.chains.question_answering import load_qa_chain
-
-# Construct a ConversationalRetrievalChain with a streaming llm for combine docs
-# and a separate, non-streaming llm for question generation
-llm = OpenAI(temperature=0)
-streaming_llm = OpenAI(streaming=True, callbacks=[StreamingStdOutCallbackHandler()], temperature=0)
-
-question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
-doc_chain = load_qa_chain(streaming_llm, chain_type="stuff", prompt=QA_PROMPT)
-
-qa = ConversationalRetrievalChain(
-    retriever=vectorstore.as_retriever(), combine_docs_chain=doc_chain, question_generator=question_generator)
-```
-
-```python
-chat_history = []
-query = "What did the president say about Ketanji Brown Jackson"
-result = qa({"question": query, "chat_history": chat_history})
-```
-
-<CodeOutputBlock lang="python">
-
-```
-     The president said that Ketanji Brown Jackson is one of the nation's top legal minds, a former top litigator in private practice, a former federal public defender, and from a family of public school educators and police officers. He also said that she is a consensus builder and has received a broad range of support from the Fraternal Order of Police to former judges appointed by Democrats and Republicans.
-```
-
-</CodeOutputBlock>
-
-```python
-chat_history = [(query, result["answer"])]
-query = "Did he mention who she succeeded"
-result = qa({"question": query, "chat_history": chat_history})
-```
-
-<CodeOutputBlock lang="python">
-
-```
-     Ketanji Brown Jackson succeeded Justice Stephen Breyer on the United States Supreme Court.
-```
-
-</CodeOutputBlock>
-
-## get_chat_history Function
-
-You can also specify a `get_chat_history` function, which can be used to format the chat_history string.
-
-```python
-def get_chat_history(inputs) -> str:
-    res = []
-    for human, ai in inputs:
-        res.append(f"Human:{human}\nAI:{ai}")
-    return "\n".join(res)
-qa = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0), vectorstore.as_retriever(), get_chat_history=get_chat_history)
-```
-
-```python
-chat_history = []
-query = "What did the president say about Ketanji Brown Jackson"
-result = qa({"question": query, "chat_history": chat_history})
-```
-
-```python
-result['answer']
-```
-
-<CodeOutputBlock lang="python">
-
-```
-    " The president said that Ketanji Brown Jackson is one of the nation's top legal minds, a former top litigator in private practice, a former federal public defender, and from a family of public school educators and police officers. He also said that she is a consensus builder and has received a broad range of support from the Fraternal Order of Police to former judges appointed by Democrats and Republicans."
-```
-
-</CodeOutputBlock>
diff --git a/docs/docs/use_cases/question_answering/code_understanding.ipynb b/docs/docs/use_cases/question_answering/code_understanding.ipynb
index 6162a15ca8f..9abe005d81f 100644
--- a/docs/docs/use_cases/question_answering/code_understanding.ipynb
+++ b/docs/docs/use_cases/question_answering/code_understanding.ipynb
@@ -367,7 +367,7 @@
    "source": [
     "from langchain.callbacks.manager import CallbackManager\n",
     "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n",
-    "from langchain.chains import ConversationalRetrievalChain\n",
+    "from langchain.chains import ConversationalRetrievalChain, LLMChain\n",
     "from langchain.llms import LlamaCpp\n",
     "from langchain.memory import ConversationSummaryMemory\n",
     "from langchain.prompts import PromptTemplate"
diff --git a/docs/docs/use_cases/question_answering/conversational_retrieval_agents.ipynb b/docs/docs/use_cases/question_answering/conversational_retrieval_agents.ipynb
index 3b0aae632ab..502ea5053f6 100644
--- a/docs/docs/use_cases/question_answering/conversational_retrieval_agents.ipynb
+++ b/docs/docs/use_cases/question_answering/conversational_retrieval_agents.ipynb
@@ -5,7 +5,7 @@
    "id": "839f3c76",
    "metadata": {},
    "source": [
-    "# Agent with retrieval tool\n",
+    "# RAG with Agents\n",
     "\n",
     "This is an agent specifically optimized for doing retrieval when necessary and also holding a conversation.\n",
     "\n",
diff --git a/docs/docs/use_cases/question_answering/in_memory_question_answering.mdx b/docs/docs/use_cases/question_answering/in_memory_question_answering.mdx
deleted file mode 100644
index a8e7ca43bcd..00000000000
--- a/docs/docs/use_cases/question_answering/in_memory_question_answering.mdx
+++ /dev/null
@@ -1,448 +0,0 @@
-# RAG over in-memory documents
-
-Here we walk through how to use LangChain for question answering over a list of documents. Under the hood we'll be using our [Document chains](/docs/modules/chains/document/).
-
-## Prepare Data
-First we prepare the data. For this example we do similarity search over a vector database, but these documents could be fetched in any manner (the point of this notebook to highlight what to do AFTER you fetch the documents).
-
-
-```python
-from langchain.embeddings.openai import OpenAIEmbeddings
-from langchain.text_splitter import CharacterTextSplitter
-from langchain.vectorstores import Chroma
-from langchain.docstore.document import Document
-from langchain.prompts import PromptTemplate
-from langchain.indexes.vectorstore import VectorstoreIndexCreator
-```
-
-
-```python
-with open("../../state_of_the_union.txt") as f:
-    state_of_the_union = f.read()
-text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
-texts = text_splitter.split_text(state_of_the_union)
-
-embeddings = OpenAIEmbeddings()
-```
-
-
-```python
-docsearch = Chroma.from_texts(texts, embeddings, metadatas=[{"source": str(i)} for i in range(len(texts))]).as_retriever()
-```
-
-<CodeOutputBlock lang="python">
-
-```
-    Running Chroma using direct local API.
-    Using DuckDB in-memory for database. Data will be transient.
-```
-
-</CodeOutputBlock>
-
-
-```python
-query = "What did the president say about Justice Breyer"
-docs = docsearch.get_relevant_documents(query)
-```
-
-
-```python
-from langchain.chains.question_answering import load_qa_chain
-from langchain.llms import OpenAI
-```
-
-## Quickstart
-If you just want to get started as quickly as possible, this is the recommended way to do it:
-
-
-```python
-chain = load_qa_chain(OpenAI(temperature=0), chain_type="stuff")
-query = "What did the president say about Justice Breyer"
-chain.run(input_documents=docs, question=query)
-```
-
-<CodeOutputBlock lang="python">
-
-```
-    ' The president said that Justice Breyer has dedicated his life to serve the country and thanked him for his service.'
-```
-
-</CodeOutputBlock>
-
-If you want more control and understanding over what is happening, please see the information below.
-
-## The `stuff` Chain
-
-This sections shows results of using the `stuff` Chain to do question answering.
-
-
-```python
-chain = load_qa_chain(OpenAI(temperature=0), chain_type="stuff")
-```
-
-
-```python
-query = "What did the president say about Justice Breyer"
-chain({"input_documents": docs, "question": query}, return_only_outputs=True)
-```
-
-<CodeOutputBlock lang="python">
-
-```
-    {'output_text': ' The president said that Justice Breyer has dedicated his life to serve the country and thanked him for his service.'}
-```
-
-</CodeOutputBlock>
-
-**Custom Prompts**
-
-You can also use your own prompts with this chain. In this example, we will respond in Italian.
-
-
-```python
-prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
-
-{context}
-
-Question: {question}
-Answer in Italian:"""
-PROMPT = PromptTemplate(
-    template=prompt_template, input_variables=["context", "question"]
-)
-chain = load_qa_chain(OpenAI(temperature=0), chain_type="stuff", prompt=PROMPT)
-chain({"input_documents": docs, "question": query}, return_only_outputs=True)
-```
-
-<CodeOutputBlock lang="python">
-
-```
-    {'output_text': ' Il presidente ha detto che Justice Breyer ha dedicato la sua vita a servire questo paese e ha ricevuto una vasta gamma di supporto.'}
-```
-
-</CodeOutputBlock>
-
-## The `map_reduce` Chain
-
-This sections shows results of using the `map_reduce` Chain to do question answering.
-
-
-```python
-chain = load_qa_chain(OpenAI(temperature=0), chain_type="map_reduce")
-```
-
-
-```python
-query = "What did the president say about Justice Breyer"
-chain({"input_documents": docs, "question": query}, return_only_outputs=True)
-```
-
-<CodeOutputBlock lang="python">
-
-```
-    {'output_text': ' The president said that Justice Breyer is an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court, and thanked him for his service.'}
-```
-
-</CodeOutputBlock>
-
-**Intermediate Steps**
-
-We can also return the intermediate steps for `map_reduce` chains, should we want to inspect them. This is done with the `return_map_steps` variable.
-
-
-```python
-chain = load_qa_chain(OpenAI(temperature=0), chain_type="map_reduce", return_map_steps=True)
-```
-
-
-```python
-chain({"input_documents": docs, "question": query}, return_only_outputs=True)
-```
-
-<CodeOutputBlock lang="python">
-
-```
-    {'intermediate_steps': [' "Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service."',
-      ' A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans.',
-      ' None',
-      ' None'],
-     'output_text': ' The president said that Justice Breyer is an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court, and thanked him for his service.'}
-```
-
-</CodeOutputBlock>
-
-**Custom Prompts**
-
-You can also use your own prompts with this chain. In this example, we will respond in Italian.
-
-
-```python
-question_prompt_template = """Use the following portion of a long document to see if any of the text is relevant to answer the question.
-Return any relevant text translated into italian.
-{context}
-Question: {question}
-Relevant text, if any, in Italian:"""
-QUESTION_PROMPT = PromptTemplate(
-    template=question_prompt_template, input_variables=["context", "question"]
-)
-
-combine_prompt_template = """Given the following extracted parts of a long document and a question, create a final answer italian.
-If you don't know the answer, just say that you don't know. Don't try to make up an answer.
-
-QUESTION: {question}
-=========
-{summaries}
-=========
-Answer in Italian:"""
-COMBINE_PROMPT = PromptTemplate(
-    template=combine_prompt_template, input_variables=["summaries", "question"]
-)
-chain = load_qa_chain(OpenAI(temperature=0), chain_type="map_reduce", return_map_steps=True, question_prompt=QUESTION_PROMPT, combine_prompt=COMBINE_PROMPT)
-chain({"input_documents": docs, "question": query}, return_only_outputs=True)
-```
-
-<CodeOutputBlock lang="python">
-
-```
-    {'intermediate_steps': ["\nStasera vorrei onorare qualcuno che ha dedicato la sua vita a servire questo paese: il giustizia Stephen Breyer - un veterano dell'esercito, uno studioso costituzionale e un giustizia in uscita della Corte Suprema degli Stati Uniti. Giustizia Breyer, grazie per il tuo servizio.",
-      '\nNessun testo pertinente.',
-      ' Non ha detto nulla riguardo a Justice Breyer.',
-      " Non c'è testo pertinente."],
-     'output_text': ' Non ha detto nulla riguardo a Justice Breyer.'}
-```
-
-</CodeOutputBlock>
-
-**Batch Size**
-
-When using the `map_reduce` chain, one thing to keep in mind is the batch size you are using during the map step. If this is too high, it could cause rate limiting errors. You can control this by setting the batch size on the LLM used. Note that this only applies for LLMs with this parameter. Below is an example of doing so:
-
-```python
-llm = OpenAI(batch_size=5, temperature=0)
-```
-
-## The `refine` Chain
-
-This sections shows results of using the `refine` Chain to do question answering.
-
-
-```python
-chain = load_qa_chain(OpenAI(temperature=0), chain_type="refine")
-```
-
-
-```python
-query = "What did the president say about Justice Breyer"
-chain({"input_documents": docs, "question": query}, return_only_outputs=True)
-```
-
-<CodeOutputBlock lang="python">
-
-```
-    {'output_text': '\n\nThe president said that he wanted to honor Justice Breyer for his dedication to serving the country, his legacy of excellence, and his commitment to advancing liberty and justice, as well as for his support of the Equality Act and his commitment to protecting the rights of LGBTQ+ Americans. He also praised Justice Breyer for his role in helping to pass the Bipartisan Infrastructure Law, which he said would be the most sweeping investment to rebuild America in history and would help the country compete for the jobs of the 21st Century.'}
-```
-
-</CodeOutputBlock>
-
-**Intermediate Steps**
-
-We can also return the intermediate steps for `refine` chains, should we want to inspect them. This is done with the `return_refine_steps` variable.
-
-
-```python
-chain = load_qa_chain(OpenAI(temperature=0), chain_type="refine", return_refine_steps=True)
-```
-
-
-```python
-chain({"input_documents": docs, "question": query}, return_only_outputs=True)
-```
-
-<CodeOutputBlock lang="python">
-
-```
-    {'intermediate_steps': ['\nThe president said that he wanted to honor Justice Breyer for his dedication to serving the country and his legacy of excellence.',
-      '\nThe president said that he wanted to honor Justice Breyer for his dedication to serving the country, his legacy of excellence, and his commitment to advancing liberty and justice.',
-      '\n\nThe president said that he wanted to honor Justice Breyer for his dedication to serving the country, his legacy of excellence, and his commitment to advancing liberty and justice, as well as for his support of the Equality Act and his commitment to protecting the rights of LGBTQ+ Americans.',
-      '\n\nThe president said that he wanted to honor Justice Breyer for his dedication to serving the country, his legacy of excellence, and his commitment to advancing liberty and justice, as well as for his support of the Equality Act and his commitment to protecting the rights of LGBTQ+ Americans. He also praised Justice Breyer for his role in helping to pass the Bipartisan Infrastructure Law, which is the most sweeping investment to rebuild America in history.'],
-     'output_text': '\n\nThe president said that he wanted to honor Justice Breyer for his dedication to serving the country, his legacy of excellence, and his commitment to advancing liberty and justice, as well as for his support of the Equality Act and his commitment to protecting the rights of LGBTQ+ Americans. He also praised Justice Breyer for his role in helping to pass the Bipartisan Infrastructure Law, which is the most sweeping investment to rebuild America in history.'}
-```
-
-</CodeOutputBlock>
-
-**Custom Prompts**
-
-You can also use your own prompts with this chain. In this example, we will respond in Italian.
-
-
-```python
-refine_prompt_template = (
-    "The original question is as follows: {question}\n"
-    "We have provided an existing answer: {existing_answer}\n"
-    "We have the opportunity to refine the existing answer"
-    "(only if needed) with some more context below.\n"
-    "------------\n"
-    "{context_str}\n"
-    "------------\n"
-    "Given the new context, refine the original answer to better "
-    "answer the question. "
-    "If the context isn't useful, return the original answer. Reply in Italian."
-)
-refine_prompt = PromptTemplate(
-    input_variables=["question", "existing_answer", "context_str"],
-    template=refine_prompt_template,
-)
-
-
-initial_qa_template = (
-    "Context information is below. \n"
-    "---------------------\n"
-    "{context_str}"
-    "\n---------------------\n"
-    "Given the context information and not prior knowledge, "
-    "answer the question: {question}\nYour answer should be in Italian.\n"
-)
-initial_qa_prompt = PromptTemplate(
-    input_variables=["context_str", "question"], template=initial_qa_template
-)
-chain = load_qa_chain(OpenAI(temperature=0), chain_type="refine", return_refine_steps=True,
-                     question_prompt=initial_qa_prompt, refine_prompt=refine_prompt)
-chain({"input_documents": docs, "question": query}, return_only_outputs=True)
-```
-
-<CodeOutputBlock lang="python">
-
-```
-    {'intermediate_steps': ['\nIl presidente ha detto che Justice Breyer ha dedicato la sua vita al servizio di questo paese e ha reso omaggio al suo servizio.',
-      "\nIl presidente ha detto che Justice Breyer ha dedicato la sua vita al servizio di questo paese, ha reso omaggio al suo servizio e ha sostenuto la nomina di una top litigatrice in pratica privata, un ex difensore pubblico federale e una famiglia di insegnanti e agenti di polizia delle scuole pubbliche. Ha anche sottolineato l'importanza di avanzare la libertà e la giustizia attraverso la sicurezza delle frontiere e la risoluzione del sistema di immigrazione.",
-      "\nIl presidente ha detto che Justice Breyer ha dedicato la sua vita al servizio di questo paese, ha reso omaggio al suo servizio e ha sostenuto la nomina di una top litigatrice in pratica privata, un ex difensore pubblico federale e una famiglia di insegnanti e agenti di polizia delle scuole pubbliche. Ha anche sottolineato l'importanza di avanzare la libertà e la giustizia attraverso la sicurezza delle frontiere, la risoluzione del sistema di immigrazione, la protezione degli americani LGBTQ+ e l'approvazione dell'Equality Act. Ha inoltre sottolineato l'importanza di lavorare insieme per sconfiggere l'epidemia di oppiacei.",
-      "\n\nIl presidente ha detto che Justice Breyer ha dedicato la sua vita al servizio di questo paese, ha reso omaggio al suo servizio e ha sostenuto la nomina di una top litigatrice in pratica privata, un ex difensore pubblico federale e una famiglia di insegnanti e agenti di polizia delle scuole pubbliche. Ha anche sottolineato l'importanza di avanzare la libertà e la giustizia attraverso la sicurezza delle frontiere, la risoluzione del sistema di immigrazione, la protezione degli americani LGBTQ+ e l'approvazione dell'Equality Act. Ha inoltre sottolineato l'importanza di lavorare insieme per sconfiggere l'epidemia di oppiacei e per investire in America, educare gli americani, far crescere la forza lavoro e costruire l'economia dal"],
-     'output_text': "\n\nIl presidente ha detto che Justice Breyer ha dedicato la sua vita al servizio di questo paese, ha reso omaggio al suo servizio e ha sostenuto la nomina di una top litigatrice in pratica privata, un ex difensore pubblico federale e una famiglia di insegnanti e agenti di polizia delle scuole pubbliche. Ha anche sottolineato l'importanza di avanzare la libertà e la giustizia attraverso la sicurezza delle frontiere, la risoluzione del sistema di immigrazione, la protezione degli americani LGBTQ+ e l'approvazione dell'Equality Act. Ha inoltre sottolineato l'importanza di lavorare insieme per sconfiggere l'epidemia di oppiacei e per investire in America, educare gli americani, far crescere la forza lavoro e costruire l'economia dal"}
-```
-
-</CodeOutputBlock>
-
-## The `map-rerank` Chain
-
-This sections shows results of using the `map-rerank` Chain to do question answering with sources.
-
-
-```python
-chain = load_qa_chain(OpenAI(temperature=0), chain_type="map_rerank", return_intermediate_steps=True)
-```
-
-
-```python
-query = "What did the president say about Justice Breyer"
-results = chain({"input_documents": docs, "question": query}, return_only_outputs=True)
-```
-
-
-```python
-results["output_text"]
-```
-
-<CodeOutputBlock lang="python">
-
-```
-    ' The President thanked Justice Breyer for his service and honored him for dedicating his life to serve the country.'
-```
-
-</CodeOutputBlock>
-
-
-```python
-results["intermediate_steps"]
-```
-
-<CodeOutputBlock lang="python">
-
-```
-    [{'answer': ' The President thanked Justice Breyer for his service and honored him for dedicating his life to serve the country.',
-      'score': '100'},
-     {'answer': ' This document does not answer the question', 'score': '0'},
-     {'answer': ' This document does not answer the question', 'score': '0'},
-     {'answer': ' This document does not answer the question', 'score': '0'}]
-```
-
-</CodeOutputBlock>
-
-**Custom Prompts**
-
-You can also use your own prompts with this chain. In this example, we will respond in Italian.
-
-
-```python
-from langchain.output_parsers import RegexParser
-
-output_parser = RegexParser(
-    regex=r"(.*?)\nScore: (.*)",
-    output_keys=["answer", "score"],
-)
-
-prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
-
-In addition to giving an answer, also return a score of how fully it answered the user's question. This should be in the following format:
-
-Question: [question here]
-Helpful Answer In Italian: [answer here]
-Score: [score between 0 and 100]
-
-Begin!
-
-Context:
----------
-{context}
----------
-Question: {question}
-Helpful Answer In Italian:"""
-PROMPT = PromptTemplate(
-    template=prompt_template,
-    input_variables=["context", "question"],
-    output_parser=output_parser,
-)
-
-chain = load_qa_chain(OpenAI(temperature=0), chain_type="map_rerank", return_intermediate_steps=True, prompt=PROMPT)
-query = "What did the president say about Justice Breyer"
-chain({"input_documents": docs, "question": query}, return_only_outputs=True)
-```
-
-<CodeOutputBlock lang="python">
-
-```
-    {'intermediate_steps': [{'answer': ' Il presidente ha detto che Justice Breyer ha dedicato la sua vita a servire questo paese.',
-       'score': '100'},
-      {'answer': ' Il presidente non ha detto nulla sulla Giustizia Breyer.',
-       'score': '100'},
-      {'answer': ' Non so.', 'score': '0'},
-      {'answer': ' Non so.', 'score': '0'}],
-     'output_text': ' Il presidente ha detto che Justice Breyer ha dedicato la sua vita a servire questo paese.'}
-```
-
-</CodeOutputBlock>
-
-## Document QA with sources
-
-We can also perform document QA and return the sources that were used to answer the question. To do this we'll just need to make sure each document has a "source" key in the metadata, and we'll use the `load_qa_with_sources` helper to construct our chain:
-
-```python
-docsearch = Chroma.from_texts(texts, embeddings, metadatas=[{"source": str(i)} for i in range(len(texts))])
-query = "What did the president say about Justice Breyer"
-docs = docsearch.similarity_search(query)
-```
-
-```python
-from langchain.chains.qa_with_sources import load_qa_with_sources_chain
-
-chain = load_qa_with_sources_chain(OpenAI(temperature=0), chain_type="stuff")
-query = "What did the president say about Justice Breyer"
-chain({"input_documents": docs, "question": query}, return_only_outputs=True)
-```
-
-<CodeOutputBlock lang="python">
-
-```
-    {'output_text': ' The president thanked Justice Breyer for his service.\nSOURCES: 30-pl'}
-```
-
-</CodeOutputBlock>
-
diff --git a/docs/docs/use_cases/question_answering/index.ipynb b/docs/docs/use_cases/question_answering/index.ipynb
index 299f76c5a82..40bb8c60d22 100644
--- a/docs/docs/use_cases/question_answering/index.ipynb
+++ b/docs/docs/use_cases/question_answering/index.ipynb
@@ -1,162 +1,221 @@
 {
  "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "86fc5bb2-017f-434e-8cd6-53ab214a5604",
+   "metadata": {},
+   "source": [
+    "# Retrieval-augmented generation (RAG)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "de913d6d-c57f-4927-82fe-18902a636861",
+   "metadata": {},
+   "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/langchain-ai/langchain/blob/master/docs/docs/use_cases/question_answering/index.ipynb)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "5151afed",
    "metadata": {},
    "source": [
-    "# Retrieval-augmented generation (RAG)\n",
-    "\n",
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/langchain-ai/langchain/blob/master/docs/docs/use_cases/question_answering/qa.ipynb)\n",
-    "\n",
-    "## Use case\n",
-    "Suppose you have some text documents (PDF, blog, Notion pages, etc.) and want to ask questions related to the contents of those documents. \n",
-    "\n",
-    "LLMs, given their proficiency in understanding text, are a great tool for this.\n",
-    "\n",
-    "In this walkthrough we'll go over how to build a question-answering over documents application using LLMs. \n",
-    "\n",
-    "Two very related use cases which we cover elsewhere are:\n",
-    "- [QA over structured data](/docs/use_cases/qa_structured/sql) (e.g., SQL)\n",
-    "- [QA over code](/docs/use_cases/code_understanding) (e.g., Python)\n",
-    "\n",
-    "![intro.png](/img/qa_intro.png)\n",
-    "\n",
     "## Overview\n",
-    "The pipeline for converting raw unstructured data into a QA chain looks like this:\n",
-    "1. `Loading`: First we need to load our data. Use the [LangChain integration hub](https://integrations.langchain.com/) to browse the full set of loaders. \n",
-    "2. `Splitting`: [Text splitters](/docs/modules/data_connection/document_transformers/) break `Documents` into splits of specified size\n",
-    "3. `Storage`: Storage (e.g., often a [vectorstore](/docs/modules/data_connection/vectorstores/)) will house [and often embed](https://www.pinecone.io/learn/vector-embeddings/) the splits\n",
-    "4. `Retrieval`: The app retrieves splits from storage (e.g., often [with similar embeddings](https://www.pinecone.io/learn/k-nearest-neighbor/) to the input question)\n",
-    "5. `Generation`: An [LLM](/docs/modules/model_io/llms/) produces an answer using a prompt that includes the question and the retrieved data\n",
     "\n",
-    "![flow.jpeg](/img/qa_flow.jpeg)\n",
+    "### What is RAG?\n",
     "\n",
-    "## Quickstart\n",
+    "RAG is a technique for augmenting LLM knowledge with additional, often private or real-time, data.\n",
     "\n",
-    "Suppose we want a QA app over this [blog post](https://lilianweng.github.io/posts/2023-06-23-agent/). \n",
+    "LLMs can reason about wide-ranging topics, but their knowledge is limited to the public data up to a specific point in time that they were trained on. If you want to build AI applications that can reason about private data or data introduced after a model's cutoff date, you need to augment the knowledge of the model with the specific information it needs. The process of bringing the appropriate information and inserting it into the model prompt is known as Retrieval Augmented Generation (RAG).\n",
     "\n",
-    "We can create this in a few lines of code. \n",
+    "### What's in this guide?\n",
     "\n",
-    "First set environment variables and install packages:"
+    "LangChain has a number of components specifically designed to help build RAG applications. To familiarize ourselves with these, we'll build a simple question-answering application over a text data source. Specifically, we'll build a QA bot over the [LLM Powered Autonomous Agents](https://lilianweng.github.io/posts/2023-06-23-agent/) blog post by Lilian Weng. Along the way we'll go over a typical QA architecture, discuss the relevant LangChain components, and highlight additional resources for more advanced QA techniques. We'll also see how LangSmith can help us trace and understand our application. LangSmith will become increasingly helpful as our application grows in complexity.\n",
+    "\n",
+    "**Note**\n",
+    "Here we focus on RAG for unstructured data. Two RAG use cases which we cover elsewhere are:\n",
+    "- [QA over structured data](/docs/use_cases/qa_structured/sql) (e.g., SQL)\n",
+    "- [QA over code](/docs/use_cases/question_answering/code_understanding) (e.g., Python)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2f25cbbd-0938-4e3d-87e4-17a204a03ffb",
+   "metadata": {},
+   "source": [
+    "## Architecture\n",
+    "A typical RAG application has two main components:\n",
+    "\n",
+    "**Indexing**: a pipeline for ingesting data from a source and indexing it. *This usually happen offline.*\n",
+    "\n",
+    "**Retrieval and generation**: the actual RAG chain, which takes the user query at run time and retrieves the relevant data from the index, then passes that to the model.\n",
+    "\n",
+    "The most common full sequence from raw data to answer looks like:\n",
+    "\n",
+    "#### Indexing\n",
+    "1. **Load**: First we need to load our data. We'll use [DocumentLoaders](/docs/modules/data_connection/document_loaders/) for this.\n",
+    "2. **Split**: [Text splitters](/docs/modules/data_connection/document_transformers/) break large `Documents` into smaller chunks. This is useful both for indexing data and for passing it in to a model, since large chunks are harder to search over and won't in a model's finite context window.\n",
+    "3. **Store**: We need somewhere to store and index our splits, so that they can later be searched over. This is often done using a [VectorStore](/docs/modules/data_connection/vectorstores/) and [Embeddings](/docs/modules/data_connection/text_embedding/) model.\n",
+    "#### Retrieval and generation\n",
+    "4. **Retrieve**: Given a user input, relevant splits are retrieved from storage using a [Retriever](/docs/modules/data_connection/retrievers/).\n",
+    "5. **Generate**: A [ChatModel](/docs/modules/model_io/chat_models) / [LLM](/docs/modules/model_io/llms/) produces an answer using a prompt that includes the question and the retrieved data\n",
+    "\n",
+    "![flow.jpeg](/img/qa_flow.jpeg)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "487d8d79-5ee9-4aa4-9fdf-cd5f4303e099",
+   "metadata": {},
+   "source": [
+    "## Setup\n",
+    "\n",
+    "### Dependencies\n",
+    "\n",
+    "We'll use an OpenAI chat model and embeddings and a Chroma vector store in this walkthrough, but everything shown here works with any [ChatModel](/docs/integrations/chat/) or [LLM](/docs/integrations/llms/), [Embeddings](/docs/integrations/text_embedding/), and [VectorStore](/docs/integrations/vectorstores/) or [Retriever](/docs/integrations/retrievers). \n",
+    "\n",
+    "We'll use the following packages:"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "e14b744b",
+   "id": "28d272cd-4e31-40aa-bbb4-0be0a1f49a14",
    "metadata": {},
    "outputs": [],
    "source": [
-    "pip install langchain openai chromadb langchainhub\n",
+    "!pip install -U langchain openai chromadb langchainhub bs4"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "51ef48de-70b6-4f43-8e0b-ab9b84c9c02a",
+   "metadata": {},
+   "source": [
+    "We need to set environment variable `OPENAI_API_KEY`, which can be done directly or loaded from a `.env` file like so:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "143787ca-d8e6-4dc9-8281-4374f4d71720",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import getpass\n",
+    "import os\n",
+    "\n",
+    "os.environ[\"OPENAI_API_KEY\"] = getpass.getpass()\n",
     "\n",
-    "# Set env var OPENAI_API_KEY or load from a .env file\n",
     "# import dotenv\n",
     "\n",
     "# dotenv.load_dotenv()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "1665e740-ce01-4f09-b9ed-516db0bd326f",
+   "metadata": {},
+   "source": [
+    "### LangSmith\n",
+    "\n",
+    "Many of the applications you build with LangChain will contain multiple steps with multiple invocations of LLM calls. As these applications get more and more complex, it becomes crucial to be able to inspect what exactly is going on inside your chain or agent. The best way to do this is with [LangSmith](https://smith.langchain.com).\n",
+    "\n",
+    "Note that LangSmith is not needed, but it is helpful. If you do want to use LangSmith, after you sign up at the link above, make sure to set your environment variables to start logging traces:"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 1,
-   "id": "820244ae-74b4-4593-b392-822979dd91b8",
+   "execution_count": null,
+   "id": "07411adb-3722-4f65-ab7f-8f6f57663d11",
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Load documents\n",
+    "os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
+    "os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fa6ba684-26cf-4860-904e-a4d51380c134",
+   "metadata": {},
+   "source": [
+    "## Quickstart\n",
     "\n",
-    "from langchain.document_loaders import WebBaseLoader\n",
-    "\n",
-    "loader = WebBaseLoader(\"https://lilianweng.github.io/posts/2023-06-23-agent/\")"
+    "Suppose we want to build a QA app over the [LLM Powered Autonomous Agents](https://lilianweng.github.io/posts/2023-06-23-agent/) blog post by Lilian Weng. We can create a simple pipeline for this in ~20 lines of code:"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 3,
-   "id": "c89a0aa7-1e7e-4557-90e5-a7ea87db00e7",
+   "id": "d8a913b1-0eea-442a-8a64-ec73333f104b",
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Split documents\n",
-    "\n",
+    "import bs4\n",
+    "from langchain import hub\n",
+    "from langchain.chat_models import ChatOpenAI\n",
+    "from langchain.document_loaders import WebBaseLoader\n",
+    "from langchain.embeddings import OpenAIEmbeddings\n",
+    "from langchain.schema import StrOutputParser\n",
+    "from langchain.schema.runnable import RunnablePassthrough\n",
     "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
+    "from langchain.vectorstores import Chroma"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "820244ae-74b4-4593-b392-822979dd91b8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "loader = WebBaseLoader(\n",
+    "    web_paths=(\"https://lilianweng.github.io/posts/2023-06-23-agent/\",),\n",
+    "    bs_kwargs=dict(\n",
+    "        parse_only=bs4.SoupStrainer(\n",
+    "            class_=(\"post-content\", \"post-title\", \"post-header\")\n",
+    "        )\n",
+    "    ),\n",
+    ")\n",
+    "docs = loader.load()\n",
     "\n",
-    "text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)\n",
-    "splits = text_splitter.split_documents(loader.load())"
+    "text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)\n",
+    "splits = text_splitter.split_documents(docs)\n",
+    "\n",
+    "vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())\n",
+    "retriever = vectorstore.as_retriever()\n",
+    "\n",
+    "prompt = hub.pull(\"rlm/rag-prompt\")\n",
+    "llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)\n",
+    "\n",
+    "\n",
+    "def format_docs(docs):\n",
+    "    return \"\\n\\n\".join(doc.page_content for doc in docs)\n",
+    "\n",
+    "\n",
+    "rag_chain = (\n",
+    "    {\"context\": retriever | format_docs, \"question\": RunnablePassthrough()}\n",
+    "    | prompt\n",
+    "    | llm\n",
+    "    | StrOutputParser()\n",
+    ")"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 5,
-   "id": "000e46f6-dafc-4a43-8417-463d0614fd30",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Embed and store splits\n",
-    "\n",
-    "from langchain.embeddings import OpenAIEmbeddings\n",
-    "from langchain.vectorstores import Chroma\n",
-    "\n",
-    "vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())\n",
-    "retriever = vectorstore.as_retriever()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "dacbde0b-7d45-4a2c-931d-81bb094aec94",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Prompt\n",
-    "# https://smith.langchain.com/hub/rlm/rag-prompt\n",
-    "\n",
-    "from langchain import hub\n",
-    "\n",
-    "rag_prompt = hub.pull(\"rlm/rag-prompt\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "79b9fdae-c2bf-4cf6-884f-c19aa07dd975",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# LLM\n",
-    "\n",
-    "from langchain.chat_models import ChatOpenAI\n",
-    "\n",
-    "llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "92c0f3ae-6ab2-4d04-9b22-1963b96b9db5",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# RAG chain\n",
-    "\n",
-    "from langchain.schema.runnable import RunnablePassthrough\n",
-    "\n",
-    "rag_chain = {\"context\": retriever, \"question\": RunnablePassthrough()} | rag_prompt | llm"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
    "id": "0d3b0f36-7b56-49c0-8e40-a1aa9ebcbf24",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "AIMessage(content='Task decomposition is the process of breaking down a task into smaller subgoals or steps. It can be done using simple prompting, task-specific instructions, or human inputs.')"
+       "'Task decomposition is a technique used to break down complex tasks into smaller and simpler steps. It can be done through prompting techniques like Chain of Thought or Tree of Thoughts, or by using task-specific instructions or human inputs. Task decomposition helps agents plan ahead and manage complicated tasks more effectively.'"
       ]
      },
-     "execution_count": 10,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -165,14 +224,34 @@
     "rag_chain.invoke(\"What is Task Decomposition?\")"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "7cb344e0-c423-400c-a079-964c08e07e32",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# cleanup\n",
+    "vectorstore.delete_collection()"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "639dc31a-7f16-40f6-ba2a-20e7c2ecfe60",
    "metadata": {},
    "source": [
-    "[Here](https://smith.langchain.com/public/2270a675-74de-47ac-b111-b232d8340a64/r) is the LangSmith trace for this chain.\n",
+    ":::tip Check out the [LangSmith trace](https://smith.langchain.com/public/1c6ca97e-445b-4d00-84b4-c7befcbc59fe/r) \n",
+    ":::"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "842cf72d-abbc-468e-a2eb-022470347727",
+   "metadata": {},
+   "source": [
+    "## Detailed walkthrough\n",
     "\n",
-    "Below we will explain each step in more detail."
+    "Let's go through the above code step-by-step to really understand what's going on."
    ]
   },
   {
@@ -182,22 +261,88 @@
    "source": [
     "## Step 1. Load\n",
     "\n",
-    "Specify a `DocumentLoader` to load in your unstructured data as `Documents`. \n",
+    "We need to first load the blog post contents. We can use `DocumentLoader`s for this, which are objects that load in data from a source as `Documents`.  A `Document` is an object with `page_content` (str) and `metadata` (dict) attributes. \n",
     "\n",
-    "A `Document` is a dict with text (`page_content`) and `metadata`."
+    "In this case we'll use the `WebBaseLoader`, which uses `urllib` and `BeautifulSoup` to load and parse the passed in web urls, returning one `Document` per url. We can customize the html -> text parsing by passing in parameters to the `BeautifulSoup` parser via `bs_kwargs` (see [BeautifulSoup docs](https://beautiful-soup-4.readthedocs.io/en/latest/#beautifulsoup)). In this case only HTML tags with class \"post-content\", \"post-title\", or \"post-header\" are relevant, so we'll remove all others."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 7,
    "id": "cf4d5c72",
    "metadata": {},
    "outputs": [],
    "source": [
     "from langchain.document_loaders import WebBaseLoader\n",
     "\n",
-    "loader = WebBaseLoader(\"https://lilianweng.github.io/posts/2023-06-23-agent/\")\n",
-    "data = loader.load()"
+    "loader = WebBaseLoader(\n",
+    "    web_paths=(\"https://lilianweng.github.io/posts/2023-06-23-agent/\",),\n",
+    "    bs_kwargs={\n",
+    "        \"parse_only\": bs4.SoupStrainer(\n",
+    "            class_=(\"post-content\", \"post-title\", \"post-header\")\n",
+    "        )\n",
+    "    },\n",
+    ")\n",
+    "docs = loader.load()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "207f87a3-effa-4457-b013-6d233bc7a088",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "42824"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(docs[0].page_content)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "52469796-5ce4-4c12-bd2a-a903872dac33",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "      LLM Powered Autonomous Agents\n",
+      "    \n",
+      "Date: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng\n",
+      "\n",
+      "\n",
+      "Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\n",
+      "Agent System Overview#\n",
+      "In\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(docs[0].page_content[:500])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ee5c6556-56be-4067-adbc-98b5aa19ef6e",
+   "metadata": {},
+   "source": [
+    "### Go deeper\n",
+    "`DocumentLoader`: Object that load data from a source as `Documents`.\n",
+    "- [Docs](/docs/modules/data_connection/document_loaders/): Further documentation on how to use `DocumentLoader`s.\n",
+    "- [Integrations](/docs/integrations/document_loaders/): Find the relevant `DocumentLoader` integration (of the > 160 of them) for your use case."
    ]
   },
   {
@@ -205,26 +350,92 @@
    "id": "fd2cc9a7",
    "metadata": {},
    "source": [
-    "### Go deeper\n",
-    "- Browse the > 160 data loader integrations [here](https://integrations.langchain.com/).\n",
-    "- See further documentation on loaders [here](/docs/modules/data_connection/document_loaders/).\n",
-    "\n",
     "## Step 2. Split\n",
     "\n",
-    "Split the `Document` into chunks for embedding and vector storage."
+    "Our loaded document is over 42k characters long. This is too long to fit in the context window of many models. And even for those models that could fit the full post in their context window, empirically models struggle to find the relevant context in very long prompts. \n",
+    "\n",
+    "So we'll split the `Document` into chunks for embedding and vector storage. This should help us retrieve only the most relevant bits of the blog post at run time.\n",
+    "\n",
+    "In this case we'll split our documents into chunks of 1000 characters with 200 characters of overlap between chunks. The overlap helps mitigate the possibility of separating a statement from important context related to it. We use the `RecursiveCharacterTextSplitter`, which will (recursively) split the document using common separators (like new lines) until each chunk is the appropriate size."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 10,
    "id": "4b11c01d",
    "metadata": {},
    "outputs": [],
    "source": [
     "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
     "\n",
-    "text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)\n",
-    "all_splits = text_splitter.split_documents(data)"
+    "text_splitter = RecursiveCharacterTextSplitter(\n",
+    "    chunk_size=1000, chunk_overlap=200, add_start_index=True\n",
+    ")\n",
+    "all_splits = text_splitter.split_documents(docs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "3741eb67-9caf-40f2-a001-62f49349bff5",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "66"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(all_splits)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "f868d0e5-5670-4d54-b562-f50265e907f4",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "969"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(all_splits[0].page_content)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "5c9e5f27-c8e3-4ca7-8a8e-45c5de2901cc",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/',\n",
+       " 'start_index': 7056}"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "all_splits[10].metadata"
    ]
   },
   {
@@ -234,25 +445,34 @@
    "source": [
     "### Go deeper\n",
     "\n",
-    "- `DocumentSplitters` are just one type of the more generic `DocumentTransformers`.\n",
-    "- See further documentation on transformers [here](/docs/modules/data_connection/document_transformers/).\n",
-    "- `Context-aware splitters` keep the location (\"context\") of each split in the original `Document`:\n",
+    "`DocumentSplitter`: Object that splits a list of `Document`s into smaller chunks. Subclass of `DocumentTransformer`s.\n",
+    "- Explore `Context-aware splitters`, which keep the location (\"context\") of each split in the original `Document`:\n",
     "    - [Markdown files](/docs/use_cases/question_answering/document-context-aware-QA)\n",
     "    - [Code (py or js)](docs/integrations/document_loaders/source_code)\n",
-    "    - [Documents](/docs/integrations/document_loaders/grobid)\n",
+    "    - [Scientific papers](/docs/integrations/document_loaders/grobid)\n",
     "\n",
+    "`DocumentTransformer`: Object that performs a transformation on a list of `Document`s.\n",
+    "- [Docs](/docs/modules/data_connection/document_transformers/): Further documentation on how to use `DocumentTransformer`s\n",
+    "- [Integrations](/docs/integrations/document_transformers/)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "46547031-2352-4321-9970-d6ea27285c2e",
+   "metadata": {},
+   "source": [
     "## Step 3. Store\n",
     "\n",
-    "To be able to look up our document splits, we first need to store them where we can later look them up.\n",
+    "Now that we've got 66 text chunks in memory, we need to store and index them so that we can search them later in our RAG app. The most common way to do this is to embed the contents of each document split and upload those embeddings to a vector store. \n",
     "\n",
-    "The most common way to do this is to embed the contents of each document split.\n",
+    "Then, when we want to search over our splits, we take the search query, embed it as well, and perform some sort of \"similarity\" search to identify the stored splits with the most similar embeddings to our query embedding. The simplest similarity measure is cosine similarity — we measure the cosine of the angle between each pair of embeddings (which are just very high dimensional vectors).\n",
     "\n",
-    "We store the embedding and splits in a vectorstore."
+    "We can embed and store all of our document splits in a single command using the `Chroma` vector store and `OpenAIEmbeddings` model."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 14,
    "id": "e9c302c8",
    "metadata": {},
    "outputs": [],
@@ -269,43 +489,91 @@
    "metadata": {},
    "source": [
     "### Go deeper\n",
-    "- Browse the > 40 vectorstores integrations [here](https://integrations.langchain.com/).\n",
-    "- See further documentation on vectorstores [here](/docs/modules/data_connection/vectorstores/).\n",
-    "- Browse the > 30 text embedding integrations [here](https://integrations.langchain.com/).\n",
-    "- See further documentation on embedding models [here](/docs/modules/data_connection/text_embedding/).\n",
+    "`Embeddings`: Wrapper around a text embedding model, used for converting text to embeddings.\n",
+    "- [Docs](/docs/modules/data_connection/text_embedding): Further documentation on the interface.\n",
+    "- [Integrations](/docs/integrations/text_embedding/): Browse the > 30 text embedding integrations\n",
     "\n",
-    " Here are Steps 1-3:\n",
-    "\n",
-    "![lc.png](/img/qa_data_load.png)\n",
+    "`VectorStore`: Wrapper around a vector database, used for storing and querying embeddings.\n",
+    "- [Docs](/docs/modules/data_connection/vectorstores/): Further documentation on the interface.\n",
+    "- [Integrations](/docs/integrations/vectorstores/): Browse the > 40 `VectorStore` integrations.\n",
     "\n",
+    "This completes the **Indexing** portion of the pipeline. At this point we have an query-able vector store containing the chunked contents of our blog post. Given a user question, we should ideally be able to return the snippets of the blog post that answer the question:"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "70d64d40-e475-43d9-b64c-925922bb5ef7",
+   "metadata": {},
+   "source": [
     "## Step 4. Retrieve\n",
     "\n",
-    "Retrieve relevant splits for any question using [similarity search](https://www.pinecone.io/learn/what-is-similarity-search/).\n",
+    "Now let's write the actual application logic. We want to create a simple application that let's the user ask a question, searches for documents relevant to that question, passes the retrieved documents and initial question to a model, and finally returns an answer.\n",
     "\n",
-    "This is simply \"top K\" retrieval where we select documents based on embedding similarity to the query."
+    "LangChain defines a `Retriever` interface which wraps an index that can return relevant documents given a string query. All retrievers implement a common method `get_relevant_documents()` (and its asynchronous variant `aget_relevant_documents()`).\n",
+    "\n",
+    "The most common type of `Retriever` is the `VectorStoreRetriever`, which uses the similarity search capabilities of a vector store to facillitate retrieval. Any `VectorStore` can easily be turned into a `Retriever`:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 15,
+   "id": "4414df0d-5d43-46d0-85a9-5f47be0dd099",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "retriever = vectorstore.as_retriever(search_type=\"similarity\", search_kwargs={\"k\": 6})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
    "id": "e2c26b7d",
    "metadata": {},
+   "outputs": [],
+   "source": [
+    "retrieved_docs = retriever.get_relevant_documents(\n",
+    "    \"What are the approaches to Task Decomposition?\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "8684291d-0f5e-453a-8d3e-ff9feea765d0",
+   "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "4"
+       "6"
       ]
      },
-     "execution_count": 7,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "question = \"What are the approaches to Task Decomposition?\"\n",
-    "docs = vectorstore.similarity_search(question)\n",
-    "len(docs)"
+    "len(retrieved_docs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "9a5dc074-816d-409a-b005-ab4eddfd76af",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.\n",
+      "Task decomposition can be done (1) by LLM with simple prompting like \"Steps for XYZ.\\n1.\", \"What are the subgoals for achieving XYZ?\", (2) by using task-specific instructions; e.g. \"Write a story outline.\" for writing a novel, or (3) with human inputs.\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(retrieved_docs[0].page_content)"
    ]
   },
   {
@@ -314,84 +582,15 @@
    "metadata": {},
    "source": [
     "### Go deeper\n",
+    "Vector stores are commonly used for retrieval, but there are plenty of other ways to do retrieval. \n",
     "\n",
-    "Vectorstores are commonly used for retrieval, but they are not the only option. For example, SVMs (see thread [here](https://twitter.com/karpathy/status/1647025230546886658?s=20)) can also be used.\n",
-    "\n",
-    "LangChain [has many retrievers](/docs/modules/data_connection/retrievers/) including, but not limited to, vectorstores. \n",
-    "\n",
-    "All retrievers implement a common method `get_relevant_documents()` (and its asynchronous variant `aget_relevant_documents()`)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "id": "c901eaee",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "4"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from langchain.retrievers import SVMRetriever\n",
-    "\n",
-    "svm_retriever = SVMRetriever.from_documents(all_splits, OpenAIEmbeddings())\n",
-    "docs_svm = svm_retriever.get_relevant_documents(question)\n",
-    "len(docs_svm)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "69de3d54",
-   "metadata": {},
-   "source": [
-    "Some common ways to improve on vector similarity search include:\n",
-    "- `MultiQueryRetriever` [generates variants of the input question](/docs/modules/data_connection/retrievers/MultiQueryRetriever) to improve retrieval.\n",
-    "- `Max marginal relevance` selects for [relevance and diversity](https://www.cs.cmu.edu/~jgc/publication/The_Use_MMR_Diversity_Based_LTMIR_1998.pdf) among the retrieved documents.\n",
-    "- Documents can be filtered during retrieval using [`metadata` filters](/docs/use_cases/question_answering/document-context-aware-QA)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "9cfe3270-4e89-4c60-a2e5-9026b021bf76",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import logging\n",
-    "\n",
-    "from langchain.chat_models import ChatOpenAI\n",
-    "from langchain.retrievers.multi_query import MultiQueryRetriever\n",
-    "\n",
-    "logging.basicConfig()\n",
-    "logging.getLogger(\"langchain.retrievers.multi_query\").setLevel(logging.INFO)\n",
-    "\n",
-    "retriever_from_llm = MultiQueryRetriever.from_llm(\n",
-    "    retriever=vectorstore.as_retriever(), llm=ChatOpenAI(temperature=0)\n",
-    ")\n",
-    "unique_docs = retriever_from_llm.get_relevant_documents(query=question)\n",
-    "len(unique_docs)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "ee8420e6-73a6-411b-a84d-74b096bddad7",
-   "metadata": {},
-   "source": [
-    "In addition, a useful concept for improving retrieval is decoupling the documents from the embedded search key.\n",
-    "\n",
-    "For example, we can embed a document summary or question that are likely to lead to the document being retrieved.\n",
-    "\n",
-    "See details in [here](docs/modules/data_connection/retrievers/multi_vector) on the multi-vector retriever for this purpose.\n",
-    "\n",
-    "![mv.png](/img/multi_vector.png)"
+    "`Retriever`: An object that returns `Document`s given a text query\n",
+    "- [Docs](/docs/modules/data_connection/retrievers/): Further documentation on the interface and built-in retrieval techniques. Some of which include:\n",
+    "    - `MultiQueryRetriever` [generates variants of the input question](/docs/modules/data_connection/retrievers/MultiQueryRetriever) to improve retrieval hit rate.\n",
+    "    - `MultiVectorRetriever` (diagram below) instead generates [variants of the embeddings](/docs/modules/data_connection/retrievers/multi_vector), also in order to improve retrieval hit rate.\n",
+    "    - `Max marginal relevance` selects for [relevance and diversity](https://www.cs.cmu.edu/~jgc/publication/The_Use_MMR_Diversity_Based_LTMIR_1998.pdf) among the retrieved documents to avoid passing in duplicate context.\n",
+    "    - Documents can be filtered during vector store retrieval using [`metadata` filters](/docs/use_cases/question_answering/document-context-aware-QA).\n",
+    "- [Integrations](/docs/integrations/retrievers/): Integrations with retrieval services."
    ]
   },
   {
@@ -401,42 +600,128 @@
    "source": [
     "## Step 5. Generate\n",
     "\n",
-    "Distill the retrieved documents into an answer using an LLM/Chat model (e.g., `gpt-3.5-turbo`).\n",
+    "Let's put it all together into a chain that takes a question, retrieves relevant documents, constructs a prompt, passes that to a model, and parses the output.\n",
     "\n",
-    "We use the [Runnable](https://python.langchain.com/docs/expression_language/interface) protocol to define the chain.\n",
-    "\n",
-    "Runnable protocol pipes together components in a transparent way.\n",
-    "\n",
-    "We used a prompt for RAG that is checked into the LangChain prompt hub ([here](https://smith.langchain.com/hub/rlm/rag-prompt))."
+    "We'll use the gpt-3.5-turbo OpenAI chat model, but any LangChain `LLM` or `ChatModel` could be substituted in."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
-   "id": "99fa1aec",
+   "execution_count": 19,
+   "id": "d34d998c-9abf-4e01-a4ad-06dadfcf131c",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "AIMessage(content='Task decomposition is the process of breaking down a task into smaller subgoals or steps. It can be done using simple prompting, task-specific instructions, or human inputs.')"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "from langchain.chat_models import ChatOpenAI\n",
     "\n",
-    "llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)\n",
+    "llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bc826723-36fc-45d1-a3ef-df8c2c8471a8",
+   "metadata": {},
+   "source": [
+    "We'll use a prompt for RAG that is checked into the LangChain prompt hub ([here](https://smith.langchain.com/hub/rlm/rag-prompt))."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "bede955b-9aeb-4fd3-964d-8e43f214ce70",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain import hub\n",
     "\n",
+    "prompt = hub.pull(\"rlm/rag-prompt\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "11c35354-f275-47ec-9f72-ebd5c23731eb",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Human: You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\n",
+      "Question: filler question \n",
+      "Context: filler context \n",
+      "Answer:\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\n",
+    "    prompt.invoke(\n",
+    "        {\"context\": \"filler context\", \"question\": \"filler question\"}\n",
+    "    ).to_string()\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "51f9a210-1eee-4054-99d7-9d9ddf7e3593",
+   "metadata": {},
+   "source": [
+    "We'll use the [LCEL Runnable](https://python.langchain.com/docs/expression_language/) protocol to define the chain, allowing us to \n",
+    "- pipe together components and functions in a transparent way\n",
+    "- automatically trace our chain in LangSmith\n",
+    "- get streaming, async, and batched calling out of the box"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "99fa1aec",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.schema import StrOutputParser\n",
     "from langchain.schema.runnable import RunnablePassthrough\n",
     "\n",
-    "rag_chain = {\"context\": retriever, \"question\": RunnablePassthrough()} | rag_prompt | llm\n",
     "\n",
-    "rag_chain.invoke(\"What is Task Decomposition?\")"
+    "def format_docs(docs):\n",
+    "    return \"\\n\\n\".join(doc.page_content for doc in docs)\n",
+    "\n",
+    "\n",
+    "rag_chain = (\n",
+    "    {\"context\": retriever | format_docs, \"question\": RunnablePassthrough()}\n",
+    "    | prompt\n",
+    "    | llm\n",
+    "    | StrOutputParser()\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "8655a152-d7cf-466f-b1bc-fbff9ae2b889",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Task decomposition is a technique used to break down complex tasks into smaller and simpler steps. It can be done through methods like Chain of Thought (CoT) or Tree of Thoughts, which involve dividing the task into manageable subtasks and exploring multiple reasoning possibilities at each step. Task decomposition can be performed by AI models with prompting, task-specific instructions, or human inputs."
+     ]
+    }
+   ],
+   "source": [
+    "for chunk in rag_chain.stream(\"What is Task Decomposition?\"):\n",
+    "    print(chunk, end=\"\", flush=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2c000e5f-2b7f-4eb9-8876-9f4b186b4a08",
+   "metadata": {},
+   "source": [
+    ":::tip Check out the [LangSmith trace](https://smith.langchain.com/public/1799e8db-8a6d-4eb2-84d5-46e8d7d5a99b/r) \n",
+    ":::"
    ]
   },
   {
@@ -447,9 +732,15 @@
     "### Go deeper\n",
     "\n",
     "#### Choosing LLMs\n",
-    "- Browse the > 90 LLM and chat model integrations [here](https://integrations.langchain.com/).\n",
-    "- See further documentation on LLMs and chat models [here](/docs/modules/model_io/models/).\n",
-    "- See a guide on local LLMS [here](/docs/modules/use_cases/question_answering/local_retrieval_qa)."
+    "`ChatModel`: An LLM-backed chat model wrapper. Takes in a sequence of messages and returns a message.\n",
+    "- [Docs](/docs/modules/model_io/chat/)\n",
+    "- [Integrations](/docs/integrations/chat/): Explore over 25 `ChatModel` integrations.\n",
+    "\n",
+    "`LLM`: A text-in-text-out LLM. Takes in a string and returns a string.\n",
+    "- [Docs](/docs/modules/model_io/llms)\n",
+    "- [Integrations](/docs/integrations/llms): Explore over 75 `LLM` integrations.\n",
+    "\n",
+    "See a guide on RAG with locally-running models [here](/docs/modules/use_cases/question_answering/local_retrieval_qa)."
    ]
   },
   {
@@ -459,24 +750,22 @@
    "source": [
     "#### Customizing the prompt\n",
     "\n",
-    "As shown above, we can load prompts (e.g., [this RAG prompt](https://smith.langchain.com/hub/rlm/rag-prompt)) from the prompt hub.\n",
-    "\n",
-    "The prompt can also be easily customized, as shown below."
+    "As shown above, we can load prompts (e.g., [this RAG prompt](https://smith.langchain.com/hub/rlm/rag-prompt)) from the prompt hub. The prompt can also be easily customized:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 24,
    "id": "e4fee704",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "AIMessage(content='Task decomposition is the process of breaking down a complicated task into smaller, more manageable subtasks or steps. It can be done using prompts, task-specific instructions, or human inputs. Thanks for asking!')"
+       "'Task decomposition is the process of breaking down a complex task into smaller and simpler steps. It can be done through techniques like Chain of Thought (CoT) or Tree of Thoughts, which involve dividing the problem into multiple thought steps and generating multiple thoughts per step. Task decomposition helps in enhancing model performance and understanding the thinking process of the model. Thanks for asking!'"
       ]
      },
-     "execution_count": 12,
+     "execution_count": 24,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -494,7 +783,10 @@
     "rag_prompt_custom = PromptTemplate.from_template(template)\n",
     "\n",
     "rag_chain = (\n",
-    "    {\"context\": retriever, \"question\": RunnablePassthrough()} | rag_prompt_custom | llm\n",
+    "    {\"context\": retriever | format_docs, \"question\": RunnablePassthrough()}\n",
+    "    | rag_prompt_custom\n",
+    "    | llm\n",
+    "    | StrOutputParser()\n",
     ")\n",
     "\n",
     "rag_chain.invoke(\"What is Task Decomposition?\")"
@@ -502,18 +794,292 @@
   },
   {
    "cell_type": "markdown",
-   "id": "5f5b6297-715a-444e-b3ef-a6d27382b435",
+   "id": "94b952e6-dc4b-415b-9cf3-1ad333e48366",
    "metadata": {},
    "source": [
-    "We can use [LangSmith](https://smith.langchain.com/public/129cac54-44d5-453a-9807-3bd4835e5f96/r) to see the trace."
+    ":::tip Check out the [LangSmith trace](https://smith.langchain.com/public/da23c4d8-3b33-47fd-84df-a3a582eedf84/r) \n",
+    ":::"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1c2f99b5-80b4-4178-bf30-c1c0a152638f",
+   "metadata": {},
+   "source": [
+    "### Adding sources\n",
+    "\n",
+    "With LCEL it's easy to return the retrieved documents or certain source metadata from the documents:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "id": "ded41680-b749-4e2a-9daa-b1165d74783b",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'documents': [{'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/',\n",
+       "   'start_index': 1585},\n",
+       "  {'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/',\n",
+       "   'start_index': 2192},\n",
+       "  {'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/',\n",
+       "   'start_index': 17804},\n",
+       "  {'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/',\n",
+       "   'start_index': 17414},\n",
+       "  {'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/',\n",
+       "   'start_index': 29630},\n",
+       "  {'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/',\n",
+       "   'start_index': 19373}],\n",
+       " 'answer': 'Task decomposition is a technique used to break down complex tasks into smaller and simpler steps. It involves transforming big tasks into multiple manageable tasks, allowing for a more systematic and organized approach to problem-solving. Thanks for asking!'}"
+      ]
+     },
+     "execution_count": 25,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from operator import itemgetter\n",
+    "\n",
+    "from langchain.schema.runnable import RunnableMap\n",
+    "\n",
+    "rag_chain_from_docs = (\n",
+    "    {\n",
+    "        \"context\": lambda input: format_docs(input[\"documents\"]),\n",
+    "        \"question\": itemgetter(\"question\"),\n",
+    "    }\n",
+    "    | rag_prompt_custom\n",
+    "    | llm\n",
+    "    | StrOutputParser()\n",
+    ")\n",
+    "rag_chain_with_source = RunnableMap(\n",
+    "    {\"documents\": retriever, \"question\": RunnablePassthrough()}\n",
+    ") | {\n",
+    "    \"documents\": lambda input: [doc.metadata for doc in input[\"documents\"]],\n",
+    "    \"answer\": rag_chain_from_docs,\n",
+    "}\n",
+    "\n",
+    "rag_chain_with_source.invoke(\"What is Task Decomposition\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b437da5d-ca09-4d15-9be2-c35e5a1ace77",
+   "metadata": {},
+   "source": [
+    ":::tip Check out the [LangSmith trace](https://smith.langchain.com/public/007d7e01-cb62-4a84-8b71-b24767f953ee/r)\n",
+    ":::"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "776ae958-cbdc-4471-8669-c6087436f0b5",
+   "metadata": {},
+   "source": [
+    "### Adding memory\n",
+    "\n",
+    "Suppose we want to create a stateful application that remembers past user inputs. There are two main things we need to do to support this.\n",
+    "1. Add a messages placeholder to our chain which allows us to pass in historical messages\n",
+    "2. Add a chain that takes the latest user query and reformulates it in the context of the chat history into a standalone question that can be passed to our retriever.\n",
+    "\n",
+    "Let's start with 2. We can build a \"condense question\" chain that looks something like this:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "id": "2b685428-8b82-4af1-be4f-7232c5d55b73",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder\n",
+    "\n",
+    "condense_q_system_prompt = \"\"\"Given a chat history and the latest user question \\\n",
+    "which might reference the chat history, formulate a standalone question \\\n",
+    "which can be understood without the chat history. Do NOT answer the question, \\\n",
+    "just reformulate it if needed and otherwise return it as is.\"\"\"\n",
+    "condense_q_prompt = ChatPromptTemplate.from_messages(\n",
+    "    [\n",
+    "        (\"system\", condense_q_system_prompt),\n",
+    "        MessagesPlaceholder(variable_name=\"chat_history\"),\n",
+    "        (\"human\", \"{question}\"),\n",
+    "    ]\n",
+    ")\n",
+    "condense_q_chain = condense_q_prompt | llm | StrOutputParser()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "id": "46ee9aa1-16f1-4509-8dae-f8c71f4ad47d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'What is the definition of \"large\" in the context of a language model?'"
+      ]
+     },
+     "execution_count": 27,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain.schema.messages import AIMessage, HumanMessage\n",
+    "\n",
+    "condense_q_chain.invoke(\n",
+    "    {\n",
+    "        \"chat_history\": [\n",
+    "            HumanMessage(content=\"What does LLM stand for?\"),\n",
+    "            AIMessage(content=\"Large language model\"),\n",
+    "        ],\n",
+    "        \"question\": \"What is meant by large\",\n",
+    "    }\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "id": "31ee8481-ce37-41ae-8ca5-62196619d4b3",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'How do transformer models function?'"
+      ]
+     },
+     "execution_count": 28,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "condense_q_chain.invoke(\n",
+    "    {\n",
+    "        \"chat_history\": [\n",
+    "            HumanMessage(content=\"What does LLM stand for?\"),\n",
+    "            AIMessage(content=\"Large language model\"),\n",
+    "        ],\n",
+    "        \"question\": \"How do transformers work\",\n",
+    "    }\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "42a47168-4a1f-4e39-bd2d-d5b03609a243",
+   "metadata": {},
+   "source": [
+    "And now we can build our full QA chain. Notice we add some routing functionality to only run the \"condense question chain\" when our chat history isn't empty."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "id": "66f275f3-ddef-4678-b90d-ee64576878f9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "qa_system_prompt = \"\"\"You are an assistant for question-answering tasks. \\\n",
+    "Use the following pieces of retrieved context to answer the question. \\\n",
+    "If you don't know the answer, just say that you don't know. \\\n",
+    "Use three sentences maximum and keep the answer concise.\\\n",
+    "\n",
+    "{context}\"\"\"\n",
+    "qa_prompt = ChatPromptTemplate.from_messages(\n",
+    "    [\n",
+    "        (\"system\", qa_system_prompt),\n",
+    "        MessagesPlaceholder(variable_name=\"chat_history\"),\n",
+    "        (\"human\", \"{question}\"),\n",
+    "    ]\n",
+    ")\n",
+    "\n",
+    "\n",
+    "def condense_question(input: dict):\n",
+    "    if input.get(\"chat_history\"):\n",
+    "        return condense_q_chain\n",
+    "    else:\n",
+    "        return input[\"question\"]\n",
+    "\n",
+    "\n",
+    "rag_chain = (\n",
+    "    RunnablePassthrough.assign(context=condense_question | retriever | format_docs)\n",
+    "    | qa_prompt\n",
+    "    | llm\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "id": "51fd0e54-5bb4-4a9a-b012-87a18ebe2bef",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "AIMessage(content='Common ways of task decomposition include:\\n\\n1. Using Chain of Thought (CoT): CoT is a prompting technique that instructs the model to \"think step by step\" and decompose complex tasks into smaller and simpler steps. It utilizes more test-time computation and sheds light on the model\\'s thinking process.\\n\\n2. Prompting with LLM: Language Model (LLM) can be used to prompt the model with simple instructions like \"Steps for XYZ\" or \"What are the subgoals for achieving XYZ?\" This allows the model to generate a sequence of subtasks or thought steps.\\n\\n3. Task-specific instructions: For certain tasks, task-specific instructions can be provided to guide the model in decomposing the task. For example, for writing a novel, the instruction \"Write a story outline\" can be given to break down the task into manageable steps.\\n\\n4. Human inputs: In some cases, human inputs can be used to assist in task decomposition. Humans can provide their expertise and knowledge to identify and break down complex tasks into smaller subtasks.')"
+      ]
+     },
+     "execution_count": 30,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "chat_history = []\n",
+    "\n",
+    "question = \"What is Task Decomposition?\"\n",
+    "ai_msg = rag_chain.invoke({\"question\": question, \"chat_history\": chat_history})\n",
+    "chat_history.extend([HumanMessage(content=question), ai_msg])\n",
+    "\n",
+    "second_question = \"What are common ways of doing it?\"\n",
+    "rag_chain.invoke({\"question\": second_question, \"chat_history\": chat_history})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "53263a65-4de2-4dd8-9291-6a8169ab6f1d",
+   "metadata": {},
+   "source": [
+    ":::tip Check out the [LangSmith trace](https://smith.langchain.com/public/b3001782-bb30-476a-886b-12da17ec258f/r) \n",
+    ":::"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e6e5191f-43e6-4fa0-9ba5-db002fcaacf3",
+   "metadata": {},
+   "source": [
+    "Of course, we've written here the logic for using chat history when it's provided, but we haven't actually added functionality for storing chat history for each user session. This is something that's fairly application specific and is usually best handled outside of LangChain."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "580e18de-132d-4009-ba67-4aaf2c7717a2",
+   "metadata": {},
+   "source": [
+    "## Next steps\n",
+    "\n",
+    "That's a lot of content we've covered in a short amount of time. There's plenty of nuances, features, integrations, etc to explore in each of the above sections. Aside from the sources mentioned above, good next steps include:\n",
+    "\n",
+    "- Reading up on more advanced retrieval techniques in the [Retrievers](/docs/modules/data_connection/retrievers/) section.\n",
+    "- Learning about the LangChain [Indexing API](/docs/modules/data_connection/indexing), which helps repeatedly sync data sources and vector stores without redundant computation or storage.\n",
+    "- Exploring RAG [LangChain Templates](/docs/templates/#-advanced-retrieval), which are reference applications that can easily be deployed with [LangServe](/docs/langserve).\n",
+    "- Learning about [evaluating RAG applications with LangSmith](https://github.com/langchain-ai/langsmith-cookbook/blob/main/testing-examples/qa-correctness/qa-correctness.ipynb)."
    ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "poetry-venv",
    "language": "python",
-   "name": "python3"
+   "name": "poetry-venv"
   },
   "language_info": {
    "codemirror_mode": {
diff --git a/docs/docs/use_cases/question_answering/multi_retrieval_qa_router.mdx b/docs/docs/use_cases/question_answering/multi_retrieval_qa_router.mdx
deleted file mode 100644
index eba5dfa013a..00000000000
--- a/docs/docs/use_cases/question_answering/multi_retrieval_qa_router.mdx
+++ /dev/null
@@ -1,128 +0,0 @@
-# Dynamically select from multiple retrievers
-
-This notebook demonstrates how to use the `RouterChain` paradigm to create a chain that dynamically selects which Retrieval system to use. Specifically we show how to use the `MultiRetrievalQAChain` to create a question-answering chain that selects the retrieval QA chain which is most relevant for a given question, and then answers the question using it.
-
-```python
-from langchain.chains.router import MultiRetrievalQAChain
-from langchain.llms import OpenAI
-```
-
-
-```python
-from langchain.embeddings import OpenAIEmbeddings
-from langchain.document_loaders import TextLoader
-from langchain.vectorstores import FAISS
-
-sou_docs = TextLoader('../../state_of_the_union.txt').load_and_split()
-sou_retriever = FAISS.from_documents(sou_docs, OpenAIEmbeddings()).as_retriever()
-
-pg_docs = TextLoader('../../paul_graham_essay.txt').load_and_split()
-pg_retriever = FAISS.from_documents(pg_docs, OpenAIEmbeddings()).as_retriever()
-
-personal_texts = [
-    "I love apple pie",
-    "My favorite color is fuchsia",
-    "My dream is to become a professional dancer",
-    "I broke my arm when I was 12",
-    "My parents are from Peru",
-]
-personal_retriever = FAISS.from_texts(personal_texts, OpenAIEmbeddings()).as_retriever()
-```
-
-
-```python
-retriever_infos = [
-    {
-        "name": "state of the union",
-        "description": "Good for answering questions about the 2023 State of the Union address",
-        "retriever": sou_retriever
-    },
-    {
-        "name": "pg essay",
-        "description": "Good for answering questions about Paul Graham's essay on his career",
-        "retriever": pg_retriever
-    },
-    {
-        "name": "personal",
-        "description": "Good for answering questions about me",
-        "retriever": personal_retriever
-    }
-]
-```
-
-
-```python
-chain = MultiRetrievalQAChain.from_retrievers(OpenAI(), retriever_infos, verbose=True)
-```
-
-
-```python
-print(chain.run("What did the president say about the economy?"))
-```
-
-<CodeOutputBlock lang="python">
-
-```
-
-
-    > Entering new MultiRetrievalQAChain chain...
-    state of the union: {'query': 'What did the president say about the economy in the 2023 State of the Union address?'}
-    > Finished chain.
-     The president said that the economy was stronger than it had been a year prior, and that the American Rescue Plan helped create record job growth and fuel economic relief for millions of Americans. He also proposed a plan to fight inflation and lower costs for families, including cutting the cost of prescription drugs and energy, providing investments and tax credits for energy efficiency, and increasing access to child care and Pre-K.
-```
-
-</CodeOutputBlock>
-
-
-```python
-print(chain.run("What is something Paul Graham regrets about his work?"))
-```
-
-<CodeOutputBlock lang="python">
-
-```
-
-
-    > Entering new MultiRetrievalQAChain chain...
-    pg essay: {'query': 'What is something Paul Graham regrets about his work?'}
-    > Finished chain.
-     Paul Graham regrets that he did not take a vacation after selling his company, instead of immediately starting to paint.
-```
-
-</CodeOutputBlock>
-
-
-```python
-print(chain.run("What is my background?"))
-```
-
-<CodeOutputBlock lang="python">
-
-```
-
-
-    > Entering new MultiRetrievalQAChain chain...
-    personal: {'query': 'What is my background?'}
-    > Finished chain.
-     Your background is Peruvian.
-```
-
-</CodeOutputBlock>
-
-
-```python
-print(chain.run("What year was the Internet created in?"))
-```
-
-<CodeOutputBlock lang="python">
-
-```
-
-
-    > Entering new MultiRetrievalQAChain chain...
-    None: {'query': 'What year was the Internet created in?'}
-    > Finished chain.
-    The Internet was created in 1969 through a project called ARPANET, which was funded by the United States Department of Defense. However, the World Wide Web, which is often confused with the Internet, was created in 1989 by British computer scientist Tim Berners-Lee.
-```
-
-</CodeOutputBlock>
diff --git a/docs/docs/use_cases/question_answering/multiple_retrieval.ipynb b/docs/docs/use_cases/question_answering/multiple_retrieval.ipynb
deleted file mode 100644
index 4733167cf0c..00000000000
--- a/docs/docs/use_cases/question_answering/multiple_retrieval.ipynb
+++ /dev/null
@@ -1,173 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "66398b75",
-   "metadata": {},
-   "source": [
-    "# Retrieving from multiple sources\n",
-    "\n",
-    "Often times you may want to do retrieval over multiple sources. These can be different vectorstores (where one contains information about topic X and the other contains info about topic Y). They could also be completely different databases altogether!\n",
-    "\n",
-    "A key part is is doing as much of the retrieval in parallel as possible. This will keep the latency as low as possible. Luckily, [LangChain Expression Language](../../) supports parallelism out of the box.\n",
-    "\n",
-    "Let's take a look where we do retrieval over a SQL database and a vectorstore."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "1c5bab6a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain.chat_models import ChatOpenAI"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "43a6210f",
-   "metadata": {},
-   "source": [
-    "## Set up SQL query"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "ab3bf8ba",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain.chains import create_sql_query_chain\n",
-    "from langchain.utilities import SQLDatabase\n",
-    "\n",
-    "db = SQLDatabase.from_uri(\"sqlite:///../../../../../notebooks/Chinook.db\")\n",
-    "query_chain = create_sql_query_chain(ChatOpenAI(temperature=0), db)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "a8585120",
-   "metadata": {},
-   "source": [
-    "## Set up vectorstore"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "b916b0b0",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain.indexes import VectorstoreIndexCreator\n",
-    "from langchain.schema.document import Document\n",
-    "\n",
-    "index_creator = VectorstoreIndexCreator()\n",
-    "index = index_creator.from_documents([Document(page_content=\"Foo\")])\n",
-    "retriever = index.vectorstore.as_retriever()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "a3b91816",
-   "metadata": {},
-   "source": [
-    "## Combine"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "4423211c",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain.prompts import ChatPromptTemplate\n",
-    "\n",
-    "system_message = \"\"\"Use the information from the below two sources to answer any questions.\n",
-    "\n",
-    "Source 1: a SQL database about employee data\n",
-    "<source1>\n",
-    "{source1}\n",
-    "</source1>\n",
-    "\n",
-    "Source 2: a text database of random information\n",
-    "<source2>\n",
-    "{source2}\n",
-    "</source2>\n",
-    "\"\"\"\n",
-    "\n",
-    "prompt = ChatPromptTemplate.from_messages(\n",
-    "    [(\"system\", system_message), (\"human\", \"{question}\")]\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "id": "7ff87e0c",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "full_chain = (\n",
-    "    {\n",
-    "        \"source1\": {\"question\": lambda x: x[\"question\"]} | query_chain | db.run,\n",
-    "        \"source2\": (lambda x: x[\"question\"]) | retriever,\n",
-    "        \"question\": lambda x: x[\"question\"],\n",
-    "    }\n",
-    "    | prompt\n",
-    "    | ChatOpenAI()\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "d6706410",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Number of requested results 4 is greater than number of elements in index 1, updating n_results = 1\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "content='There are 8 employees.' additional_kwargs={} example=False\n"
-     ]
-    }
-   ],
-   "source": [
-    "response = full_chain.invoke({\"question\": \"How many Employees are there\"})\n",
-    "print(response)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.1"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/docs/docs/use_cases/question_answering/vector_db_qa.mdx b/docs/docs/use_cases/question_answering/vector_db_qa.mdx
deleted file mode 100644
index 4ba6b2da0fa..00000000000
--- a/docs/docs/use_cases/question_answering/vector_db_qa.mdx
+++ /dev/null
@@ -1,230 +0,0 @@
----
-sidebar_position: 1
----
-# Using a Retriever
-
-This example showcases question answering over an index.
-
-```python
-from langchain.chains import RetrievalQA
-from langchain.document_loaders import TextLoader
-from langchain.embeddings.openai import OpenAIEmbeddings
-from langchain.llms import OpenAI
-from langchain.text_splitter import CharacterTextSplitter
-from langchain.vectorstores import Chroma
-```
-
-
-```python
-loader = TextLoader("../../state_of_the_union.txt")
-documents = loader.load()
-text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
-texts = text_splitter.split_documents(documents)
-
-embeddings = OpenAIEmbeddings()
-docsearch = Chroma.from_documents(texts, embeddings)
-
-qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=docsearch.as_retriever())
-```
-
-
-```python
-query = "What did the president say about Ketanji Brown Jackson"
-qa.run(query)
-```
-
-<CodeOutputBlock lang="python">
-
-```
-    " The president said that she is one of the nation's top legal minds, a former top litigator in private practice, a former federal public defender, and from a family of public school educators and police officers. He also said that she is a consensus builder and has received a broad range of support, from the Fraternal Order of Police to former judges appointed by Democrats and Republicans."
-```
-
-</CodeOutputBlock>
-
-## Chain Type
-You can easily specify different chain types to load and use in the RetrievalQA chain. For a more detailed walkthrough of these types, please see [this notebook](/docs/modules/chains/additional/question_answering).
-
-There are two ways to load different chain types. First, you can specify the chain type argument in the `from_chain_type` method. This allows you to pass in the name of the chain type you want to use. For example, in the below we change the chain type to `map_reduce`.
-
-
-```python
-qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="map_reduce", retriever=docsearch.as_retriever())
-```
-
-
-```python
-query = "What did the president say about Ketanji Brown Jackson"
-qa.run(query)
-```
-
-<CodeOutputBlock lang="python">
-
-```
-    " The president said that Judge Ketanji Brown Jackson is one of our nation's top legal minds, a former top litigator in private practice and a former federal public defender, from a family of public school educators and police officers, a consensus builder and has received a broad range of support from the Fraternal Order of Police to former judges appointed by Democrats and Republicans."
-```
-
-</CodeOutputBlock>
-
-The above way allows you to really simply change the chain_type, but it doesn't provide a ton of flexibility over parameters to that chain type. If you want to control those parameters, you can load the chain directly (as you did in [this notebook](/docs/modules/chains/additional/question_answering)) and then pass that directly to the RetrievalQA chain with the `combine_documents_chain` parameter. For example:
-
-
-```python
-from langchain.chains.question_answering import load_qa_chain
-qa_chain = load_qa_chain(OpenAI(temperature=0), chain_type="stuff")
-qa = RetrievalQA(combine_documents_chain=qa_chain, retriever=docsearch.as_retriever())
-```
-
-
-```python
-query = "What did the president say about Ketanji Brown Jackson"
-qa.run(query)
-```
-
-<CodeOutputBlock lang="python">
-
-```
-    " The president said that Ketanji Brown Jackson is one of the nation's top legal minds, a former top litigator in private practice, a former federal public defender, and from a family of public school educators and police officers. He also said that she is a consensus builder and has received a broad range of support from the Fraternal Order of Police to former judges appointed by Democrats and Republicans."
-```
-
-</CodeOutputBlock>
-
-## Custom Prompts
-You can pass in custom prompts to do question answering. These prompts are the same prompts as you can pass into the [base question answering chain](/docs/modules/chains/additional/question_answering)
-
-
-```python
-from langchain.prompts import PromptTemplate
-prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
-
-{context}
-
-Question: {question}
-Answer in Italian:"""
-PROMPT = PromptTemplate(
-    template=prompt_template, input_variables=["context", "question"]
-)
-```
-
-
-```python
-chain_type_kwargs = {"prompt": PROMPT}
-qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=docsearch.as_retriever(), chain_type_kwargs=chain_type_kwargs)
-```
-
-
-```python
-query = "What did the president say about Ketanji Brown Jackson"
-qa.run(query)
-```
-
-<CodeOutputBlock lang="python">
-
-```
-    " Il presidente ha detto che Ketanji Brown Jackson è una delle menti legali più importanti del paese, che continuerà l'eccellenza di Justice Breyer e che ha ricevuto un ampio sostegno, da Fraternal Order of Police a ex giudici nominati da democratici e repubblicani."
-```
-
-</CodeOutputBlock>
-
-## Vectorstore Retriever Options
-You can adjust how documents are retrieved from your vectorstore depending on the specific task.
-
-There are two main ways to retrieve documents relevant to a query- Similarity Search and Max Marginal Relevance Search (MMR Search). Similarity Search is the default, but you can use MMR by adding the `search_type` parameter:
-
-```python
-docsearch.as_retriever(search_type="mmr")
-```
-
-You can also modify the search by passing specific search arguments through the retriever to the search function, using the `search_kwargs` keyword argument.
-
-- `k` defines how many documents are returned; defaults to 4.
-- `score_threshold` allows you to set a minimum relevance for documents returned by the retriever, if you are using the "similarity_score_threshold" search type.
-- `fetch_k` determines the amount of documents to pass to the MMR algorithm; defaults to 20.
-- `lambda_mult` controls the diversity of results returned by the MMR algorithm, with 1 being minimum diversity and 0 being maximum. Defaults to 0.5.
-- `filter` allows you to define a filter on what documents should be retrieved, based on the documents' metadata. This has no effect if the Vectorstore doesn't store any metadata.
-
-Some examples for how these parameters can be used:
-```python
-# Retrieve more documents with higher diversity- useful if your dataset has many similar documents
-docsearch.as_retriever(search_type="mmr", search_kwargs={'k': 6, 'lambda_mult': 0.25})
-
-# Fetch more documents for the MMR algorithm to consider, but only return the top 5
-docsearch.as_retriever(search_type="mmr", search_kwargs={'k': 5, 'fetch_k': 50})
-
-# Only retrieve documents that have a relevance score above a certain threshold
-docsearch.as_retriever(search_type="similarity_score_threshold", search_kwargs={'score_threshold': 0.8})
-
-# Only get the single most similar document from the dataset
-docsearch.as_retriever(search_kwargs={'k': 1})
-
-# Use a filter to only retrieve documents from a specific paper
-docsearch.as_retriever(search_kwargs={'filter': {'paper_title':'GPT-4 Technical Report'}})
-```
-
-## Return Source Documents
-Additionally, we can return the source documents used to answer the question by specifying an optional parameter when constructing the chain.
-
-
-```python
-qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=docsearch.as_retriever(search_type="mmr", search_kwargs={'fetch_k': 30}), return_source_documents=True)
-```
-
-
-```python
-query = "What did the president say about Ketanji Brown Jackson"
-result = qa({"query": query})
-```
-
-
-```python
-result["result"]
-```
-
-<CodeOutputBlock lang="python">
-
-```
-    " The president said that Ketanji Brown Jackson is one of the nation's top legal minds, a former top litigator in private practice and a former federal public defender from a family of public school educators and police officers, and that she has received a broad range of support from the Fraternal Order of Police to former judges appointed by Democrats and Republicans."
-```
-
-</CodeOutputBlock>
-
-
-```python
-result["source_documents"]
-```
-
-<CodeOutputBlock lang="python">
-
-```
-    [Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', lookup_str='', metadata={'source': '../../state_of_the_union.txt'}, lookup_index=0),
-     Document(page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \n\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \n\nWe can do both. At our border, we’ve installed new technology like cutting-edge scanners to better detect drug smuggling.  \n\nWe’ve set up joint patrols with Mexico and Guatemala to catch more human traffickers.  \n\nWe’re putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \n\nWe’re securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.', lookup_str='', metadata={'source': '../../state_of_the_union.txt'}, lookup_index=0),
-     Document(page_content='And for our LGBTQ+ Americans, let’s finally get the bipartisan Equality Act to my desk. The onslaught of state laws targeting transgender Americans and their families is wrong. \n\nAs I said last year, especially to our younger transgender Americans, I will always have your back as your President, so you can be yourself and reach your God-given potential. \n\nWhile it often appears that we never agree, that isn’t true. I signed 80 bipartisan bills into law last year. From preventing government shutdowns to protecting Asian-Americans from still-too-common hate crimes to reforming military justice. \n\nAnd soon, we’ll strengthen the Violence Against Women Act that I first wrote three decades ago. It is important for us to show the nation that we can come together and do big things. \n\nSo tonight I’m offering a Unity Agenda for the Nation. Four big things we can do together.  \n\nFirst, beat the opioid epidemic.', lookup_str='', metadata={'source': '../../state_of_the_union.txt'}, lookup_index=0),
-     Document(page_content='Tonight, I’m announcing a crackdown on these companies overcharging American businesses and consumers. \n\nAnd as Wall Street firms take over more nursing homes, quality in those homes has gone down and costs have gone up.  \n\nThat ends on my watch. \n\nMedicare is going to set higher standards for nursing homes and make sure your loved ones get the care they deserve and expect. \n\nWe’ll also cut costs and keep the economy going strong by giving workers a fair shot, provide more training and apprenticeships, hire them based on their skills not degrees. \n\nLet’s pass the Paycheck Fairness Act and paid leave.  \n\nRaise the minimum wage to $15 an hour and extend the Child Tax Credit, so no one has to raise a family in poverty. \n\nLet’s increase Pell Grants and increase our historic support of HBCUs, and invest in what Jill—our First Lady who teaches full-time—calls America’s best-kept secret: community colleges.', lookup_str='', metadata={'source': '../../state_of_the_union.txt'}, lookup_index=0)]
-```
-
-</CodeOutputBlock>
-
-Alternatively, if our document have a "source" metadata key, we can use the `RetrievalQAWithSourcesChain` to cite our sources:
-
-```python
-docsearch = Chroma.from_texts(texts, embeddings, metadatas=[{"source": f"{i}-pl"} for i in range(len(texts))])
-```
-
-```python
-from langchain.chains import RetrievalQAWithSourcesChain
-from langchain.llms import OpenAI
-
-chain = RetrievalQAWithSourcesChain.from_chain_type(OpenAI(temperature=0), chain_type="stuff", retriever=docsearch.as_retriever())
-```
-
-```python
-chain({"question": "What did the president say about Justice Breyer"}, return_only_outputs=True)
-```
-
-<CodeOutputBlock lang="python">
-
-```
-    {'answer': ' The president honored Justice Breyer for his service and mentioned his legacy of excellence.\n',
-     'sources': '31-pl'}
-```
-
-</CodeOutputBlock>
diff --git a/docs/docs/use_cases/question_answering/vector_db_text_generation.ipynb b/docs/docs/use_cases/question_answering/vector_db_text_generation.ipynb
deleted file mode 100644
index a10321575ce..00000000000
--- a/docs/docs/use_cases/question_answering/vector_db_text_generation.ipynb
+++ /dev/null
@@ -1,199 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Retrieve from vector stores directly\n",
-    "\n",
-    "This notebook walks through how to use LangChain for text generation over a vector index. This is useful if we want to generate text that is able to draw from a large body of custom text, for example, generating blog posts that have an understanding of previous blog posts written, or product tutorials that can refer to product documentation."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Prepare Data\n",
-    "\n",
-    "First, we prepare the data. For this example, we fetch a documentation site that consists of markdown files hosted on Github and split them into small enough Documents."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pathlib\n",
-    "import subprocess\n",
-    "import tempfile\n",
-    "\n",
-    "from langchain.docstore.document import Document\n",
-    "from langchain.embeddings.openai import OpenAIEmbeddings\n",
-    "from langchain.llms import OpenAI\n",
-    "from langchain.prompts import PromptTemplate\n",
-    "from langchain.text_splitter import CharacterTextSplitter\n",
-    "from langchain.vectorstores import Chroma"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Cloning into '.'...\n"
-     ]
-    }
-   ],
-   "source": [
-    "def get_github_docs(repo_owner, repo_name):\n",
-    "    with tempfile.TemporaryDirectory() as d:\n",
-    "        subprocess.check_call(\n",
-    "            f\"git clone --depth 1 https://github.com/{repo_owner}/{repo_name}.git .\",\n",
-    "            cwd=d,\n",
-    "            shell=True,\n",
-    "        )\n",
-    "        git_sha = (\n",
-    "            subprocess.check_output(\"git rev-parse HEAD\", shell=True, cwd=d)\n",
-    "            .decode(\"utf-8\")\n",
-    "            .strip()\n",
-    "        )\n",
-    "        repo_path = pathlib.Path(d)\n",
-    "        markdown_files = list(repo_path.glob(\"*/*.md\")) + list(\n",
-    "            repo_path.glob(\"*/*.mdx\")\n",
-    "        )\n",
-    "        for markdown_file in markdown_files:\n",
-    "            with open(markdown_file, \"r\") as f:\n",
-    "                relative_path = markdown_file.relative_to(repo_path)\n",
-    "                github_url = f\"https://github.com/{repo_owner}/{repo_name}/blob/{git_sha}/{relative_path}\"\n",
-    "                yield Document(page_content=f.read(), metadata={\"source\": github_url})\n",
-    "\n",
-    "\n",
-    "sources = get_github_docs(\"yirenlu92\", \"deno-manual-forked\")\n",
-    "\n",
-    "source_chunks = []\n",
-    "splitter = CharacterTextSplitter(separator=\" \", chunk_size=1024, chunk_overlap=0)\n",
-    "for source in sources:\n",
-    "    for chunk in splitter.split_text(source.page_content):\n",
-    "        source_chunks.append(Document(page_content=chunk, metadata=source.metadata))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Set Up Vector DB\n",
-    "\n",
-    "Now that we have the documentation content in chunks, let's put all this information in a vector index for easy retrieval."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "search_index = Chroma.from_documents(source_chunks, OpenAIEmbeddings())"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Set Up LLM Chain with Custom Prompt\n",
-    "\n",
-    "Next, let's set up a simple LLM chain but give it a custom prompt for blog post generation. Note that the custom prompt is parameterized and takes two inputs: `context`, which will be the documents fetched from the vector search, and `topic`, which is given by the user."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain.chains import LLMChain\n",
-    "\n",
-    "prompt_template = \"\"\"Use the context below to write a 400 word blog post about the topic below:\n",
-    "    Context: {context}\n",
-    "    Topic: {topic}\n",
-    "    Blog post:\"\"\"\n",
-    "\n",
-    "PROMPT = PromptTemplate(template=prompt_template, input_variables=[\"context\", \"topic\"])\n",
-    "\n",
-    "llm = OpenAI(temperature=0)\n",
-    "\n",
-    "chain = LLMChain(llm=llm, prompt=PROMPT)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Generate Text\n",
-    "\n",
-    "Finally, we write a function to apply our inputs to the chain. The function takes an input parameter `topic`. We find the documents in the vector index that correspond to that `topic`, and use them as additional context in our simple LLM chain."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def generate_blog_post(topic):\n",
-    "    docs = search_index.similarity_search(topic, k=4)\n",
-    "    inputs = [{\"context\": doc.page_content, \"topic\": topic} for doc in docs]\n",
-    "    print(chain.apply(inputs))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[{'text': '\\n\\nEnvironment variables are a great way to store and access sensitive information in your Deno applications. Deno offers built-in support for environment variables with `Deno.env`, and you can also use a `.env` file to store and access environment variables.\\n\\nUsing `Deno.env` is simple. It has getter and setter methods, so you can easily set and retrieve environment variables. For example, you can set the `FIREBASE_API_KEY` and `FIREBASE_AUTH_DOMAIN` environment variables like this:\\n\\n```ts\\nDeno.env.set(\"FIREBASE_API_KEY\", \"examplekey123\");\\nDeno.env.set(\"FIREBASE_AUTH_DOMAIN\", \"firebasedomain.com\");\\n\\nconsole.log(Deno.env.get(\"FIREBASE_API_KEY\")); // examplekey123\\nconsole.log(Deno.env.get(\"FIREBASE_AUTH_DOMAIN\")); // firebasedomain.com\\n```\\n\\nYou can also store environment variables in a `.env` file. This is a great'}, {'text': '\\n\\nEnvironment variables are a powerful tool for managing configuration settings in a program. They allow us to set values that can be used by the program, without having to hard-code them into the code. This makes it easier to change settings without having to modify the code.\\n\\nIn Deno, environment variables can be set in a few different ways. The most common way is to use the `VAR=value` syntax. This will set the environment variable `VAR` to the value `value`. This can be used to set any number of environment variables before running a command. For example, if we wanted to set the environment variable `VAR` to `hello` before running a Deno command, we could do so like this:\\n\\n```\\nVAR=hello deno run main.ts\\n```\\n\\nThis will set the environment variable `VAR` to `hello` before running the command. We can then access this variable in our code using the `Deno.env.get()` function. For example, if we ran the following command:\\n\\n```\\nVAR=hello && deno eval \"console.log(\\'Deno: \\' + Deno.env.get(\\'VAR'}, {'text': '\\n\\nEnvironment variables are a powerful tool for developers, allowing them to store and access data without having to hard-code it into their applications. In Deno, you can access environment variables using the `Deno.env.get()` function.\\n\\nFor example, if you wanted to access the `HOME` environment variable, you could do so like this:\\n\\n```js\\n// env.js\\nDeno.env.get(\"HOME\");\\n```\\n\\nWhen running this code, you\\'ll need to grant the Deno process access to environment variables. This can be done by passing the `--allow-env` flag to the `deno run` command. You can also specify which environment variables you want to grant access to, like this:\\n\\n```shell\\n# Allow access to only the HOME env var\\ndeno run --allow-env=HOME env.js\\n```\\n\\nIt\\'s important to note that environment variables are case insensitive on Windows, so Deno also matches them case insensitively (on Windows only).\\n\\nAnother thing to be aware of when using environment variables is subprocess permissions. Subprocesses are powerful and can access system resources regardless of the permissions you granted to the Den'}, {'text': '\\n\\nEnvironment variables are an important part of any programming language, and Deno is no exception. Deno is a secure JavaScript and TypeScript runtime built on the V8 JavaScript engine, and it recently added support for environment variables. This feature was added in Deno version 1.6.0, and it is now available for use in Deno applications.\\n\\nEnvironment variables are used to store information that can be used by programs. They are typically used to store configuration information, such as the location of a database or the name of a user. In Deno, environment variables are stored in the `Deno.env` object. This object is similar to the `process.env` object in Node.js, and it allows you to access and set environment variables.\\n\\nThe `Deno.env` object is a read-only object, meaning that you cannot directly modify the environment variables. Instead, you must use the `Deno.env.set()` function to set environment variables. This function takes two arguments: the name of the environment variable and the value to set it to. For example, if you wanted to set the `FOO` environment variable to `bar`, you would use the following code:\\n\\n```'}]\n"
-     ]
-    }
-   ],
-   "source": [
-    "generate_blog_post(\"environment variables\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.1"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/docs/vercel.json b/docs/vercel.json
index e8fd345d6d6..dbdcb0506f7 100644
--- a/docs/vercel.json
+++ b/docs/vercel.json
@@ -1,5 +1,37 @@
 {
   "redirects": [
+    {
+      "source": "/docs/use_cases/question_answering/analyze_document",
+      "destination": "/cookbook"
+    },
+    {
+      "source": "/docs/use_cases/question_answering/qa_citations",
+      "destination": "/cookbook"
+    },
+    {
+      "source": "/docs/use_cases/question_answering/chat_vector_db",
+      "destination": "/docs/use_cases/question_answering/"
+    },
+    {
+      "source": "/docs/use_cases/question_answering/in_memory_question_answering",
+      "destination": "/docs/use_cases/question_answering/"
+    },
+    {
+      "source": "/docs/use_cases/question_answering/multi_retrieval_qa_router",
+      "destination": "/docs/use_cases/question_answering/"
+    },
+    {
+      "source": "/docs/use_cases/question_answering/multiple_retrieval",
+      "destination": "/docs/use_cases/question_answering/"
+    },
+    {
+      "source": "/docs/use_cases/question_answering/vector_db_qa",
+      "destination": "/docs/use_cases/question_answering/"
+    },
+    {
+      "source": "/docs/use_cases/question_answering/vector_db_text_generation",
+      "destination": "/docs/use_cases/question_answering/"
+    },
     {
       "source": "/docs/modules/agents/toolkits(/?)",
       "destination": "/docs/modules/agents/tools/toolkits"
@@ -170,7 +202,7 @@
     },
     {
       "source": "/docs/use_cases/question_answering/how_to/chat_vector_db",
-      "destination": "/docs/use_cases/question_answering/chat_vector_db"
+      "destination": "/docs/use_cases/question_answering/"
     },
     {
       "source": "/docs/use_cases/code_understanding",
@@ -202,7 +234,7 @@
     },
     {
       "source": "/docs/use_cases/question_answering/how_to/qa_citations",
-      "destination": "/docs/use_cases/question_answering/qa_citations"
+      "destination": "/cookbook"
     },
     {
       "source": "/docs/use_cases/question_answering/how_to/question_answering",
@@ -3686,11 +3718,11 @@
     },
     {
       "source": "/docs/modules/chains/additional/analyze_document",
-      "destination": "/docs/use_cases/question_answering/analyze_document"
+      "destination": "/cookbook"
     },
     {
       "source": "/docs/modules/chains/popular/chat_vector_db",
-      "destination": "/docs/use_cases/question_answering/chat_vector_db"
+      "destination": "/docs/use_cases/question_answering/"
     },
     {
       "source": "/docs/modules/chains/additional/multi_retrieval_qa_router",
@@ -3830,7 +3862,7 @@
     },
     {
       "source": "/docs/modules/chains/additional/qa_citations",
-      "destination": "/docs/use_cases/question_answering/qa_citations"
+      "destination": "/cookbook"
     },
     {
       "source": "/docs/modules/chains/additional/vector_db_text_generation",
diff --git a/libs/langchain/langchain/document_loaders/web_base.py b/libs/langchain/langchain/document_loaders/web_base.py
index 5d1a9daa488..4cbaabd2268 100644
--- a/libs/langchain/langchain/document_loaders/web_base.py
+++ b/libs/langchain/langchain/document_loaders/web_base.py
@@ -245,7 +245,7 @@ class WebBaseLoader(BaseLoader):
     def lazy_load(self) -> Iterator[Document]:
         """Lazy load text from the url(s) in web_path."""
         for path in self.web_paths:
-            soup = self._scrape(path)
+            soup = self._scrape(path, bs_kwargs=self.bs_kwargs)
             text = soup.get_text(**self.bs_get_text_kwargs)
             metadata = _build_metadata(soup, path)
             yield Document(page_content=text, metadata=metadata)