mirror of
				https://github.com/hwchase17/langchain.git
				synced 2025-10-25 04:30:13 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			251 lines
		
	
	
		
			6.5 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			251 lines
		
	
	
		
			6.5 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| {
 | |
|  "cells": [
 | |
|   {
 | |
|    "cell_type": "markdown",
 | |
|    "id": "74148cee",
 | |
|    "metadata": {},
 | |
|    "source": [
 | |
|     "# Question Answering with Sources\n",
 | |
|     "\n",
 | |
|     "This notebook walks through how to use LangChain for question answering with sources over a list of documents. It covers three different chain types: `stuff`, `map_reduce`, and `refine`. For a more in depth explanation of what these chain types are, see [here](../../explanation/combine_docs.md)."
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "markdown",
 | |
|    "id": "ca2f0efc",
 | |
|    "metadata": {},
 | |
|    "source": [
 | |
|     "### Prepare Data\n",
 | |
|     "First we prepare the data. For this example we do similarity search over a vector database, but these documents could be fetched in any manner (the point of this notebook to highlight what to do AFTER you fetch the documents)."
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 1,
 | |
|    "id": "78f28130",
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "from langchain.embeddings.openai import OpenAIEmbeddings\n",
 | |
|     "from langchain.embeddings.cohere import CohereEmbeddings\n",
 | |
|     "from langchain.text_splitter import CharacterTextSplitter\n",
 | |
|     "from langchain.vectorstores.elastic_vector_search import ElasticVectorSearch\n",
 | |
|     "from langchain.vectorstores.faiss import FAISS\n",
 | |
|     "from langchain.docstore.document import Document"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 2,
 | |
|    "id": "4da195a3",
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "with open('../state_of_the_union.txt') as f:\n",
 | |
|     "    state_of_the_union = f.read()\n",
 | |
|     "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
 | |
|     "texts = text_splitter.split_text(state_of_the_union)\n",
 | |
|     "\n",
 | |
|     "embeddings = OpenAIEmbeddings()"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 3,
 | |
|    "id": "5ec2b55b",
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "docsearch = FAISS.from_texts(texts, embeddings, metadatas=[{\"source\": i} for i in range(len(texts))])"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 4,
 | |
|    "id": "5286f58f",
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "query = \"What did the president say about Justice Breyer\"\n",
 | |
|     "docs = docsearch.similarity_search(query)"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 5,
 | |
|    "id": "005a47e9",
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "from langchain.chains.qa_with_sources import load_qa_with_sources_chain\n",
 | |
|     "from langchain.llms import OpenAI"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "markdown",
 | |
|    "id": "d82f899a",
 | |
|    "metadata": {},
 | |
|    "source": [
 | |
|     "### The `stuff` Chain\n",
 | |
|     "\n",
 | |
|     "This sections shows results of using the `stuff` Chain to do question answering with sources."
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 6,
 | |
|    "id": "fc1a5ed6",
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "chain = load_qa_with_sources_chain(OpenAI(temperature=0), chain_type=\"stuff\")"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 7,
 | |
|    "id": "e239964b",
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "docs = [Document(page_content=t, metadata={\"source\": i}) for i, t in enumerate(texts[:3])]"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 8,
 | |
|    "id": "7d766417",
 | |
|    "metadata": {},
 | |
|    "outputs": [
 | |
|     {
 | |
|      "data": {
 | |
|       "text/plain": [
 | |
|        "{'output_text': ' The president did not mention Justice Breyer.\\nSOURCES: 0-pl, 1-pl, 2-pl'}"
 | |
|       ]
 | |
|      },
 | |
|      "execution_count": 8,
 | |
|      "metadata": {},
 | |
|      "output_type": "execute_result"
 | |
|     }
 | |
|    ],
 | |
|    "source": [
 | |
|     "query = \"What did the president say about Justice Breyer\"\n",
 | |
|     "chain({\"input_documents\": docs, \"question\": query}, return_only_outputs=True)"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "markdown",
 | |
|    "id": "c5dbb304",
 | |
|    "metadata": {},
 | |
|    "source": [
 | |
|     "### The `map_reduce` Chain\n",
 | |
|     "\n",
 | |
|     "This sections shows results of using the `map_reduce` Chain to do question answering with sources."
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 9,
 | |
|    "id": "921db0a4",
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "chain = load_qa_with_sources_chain(OpenAI(temperature=0), chain_type=\"map_reduce\")"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 10,
 | |
|    "id": "e417926a",
 | |
|    "metadata": {},
 | |
|    "outputs": [
 | |
|     {
 | |
|      "data": {
 | |
|       "text/plain": [
 | |
|        "{'output_text': ' The president did not mention Justice Breyer.\\nSOURCES: 0, 1, 2'}"
 | |
|       ]
 | |
|      },
 | |
|      "execution_count": 10,
 | |
|      "metadata": {},
 | |
|      "output_type": "execute_result"
 | |
|     }
 | |
|    ],
 | |
|    "source": [
 | |
|     "query = \"What did the president say about Justice Breyer\"\n",
 | |
|     "chain({\"input_documents\": docs, \"question\": query}, return_only_outputs=True)"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "markdown",
 | |
|    "id": "5bf0e1ab",
 | |
|    "metadata": {},
 | |
|    "source": [
 | |
|     "### The `refine` Chain\n",
 | |
|     "\n",
 | |
|     "This sections shows results of using the `refine` Chain to do question answering with sources."
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 11,
 | |
|    "id": "904835c8",
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "chain = load_qa_with_sources_chain(OpenAI(temperature=0), chain_type=\"refine\")"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 13,
 | |
|    "id": "f60875c6",
 | |
|    "metadata": {},
 | |
|    "outputs": [
 | |
|     {
 | |
|      "data": {
 | |
|       "text/plain": [
 | |
|        "{'output_text': \"\\n\\nThe president did not mention Justice Breyer in his speech to the European Parliament, which focused on building a coalition of freedom-loving nations to confront Putin, unifying European allies, countering Russia's lies with truth, and enforcing powerful economic sanctions. Source: 2\"}"
 | |
|       ]
 | |
|      },
 | |
|      "execution_count": 13,
 | |
|      "metadata": {},
 | |
|      "output_type": "execute_result"
 | |
|     }
 | |
|    ],
 | |
|    "source": [
 | |
|     "query = \"What did the president say about Justice Breyer\"\n",
 | |
|     "chain({\"input_documents\": docs, \"question\": query}, return_only_outputs=True)"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": null,
 | |
|    "id": "929620d0",
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": []
 | |
|   }
 | |
|  ],
 | |
|  "metadata": {
 | |
|   "kernelspec": {
 | |
|    "display_name": "Python 3 (ipykernel)",
 | |
|    "language": "python",
 | |
|    "name": "python3"
 | |
|   },
 | |
|   "language_info": {
 | |
|    "codemirror_mode": {
 | |
|     "name": "ipython",
 | |
|     "version": 3
 | |
|    },
 | |
|    "file_extension": ".py",
 | |
|    "mimetype": "text/x-python",
 | |
|    "name": "python",
 | |
|    "nbconvert_exporter": "python",
 | |
|    "pygments_lexer": "ipython3",
 | |
|    "version": "3.10.8"
 | |
|   }
 | |
|  },
 | |
|  "nbformat": 4,
 | |
|  "nbformat_minor": 5
 | |
| }
 |