diff --git a/docs/extras/use_cases/question_answering/document-context-aware-QA.ipynb b/docs/extras/use_cases/question_answering/document-context-aware-QA.ipynb index 9bdccb2387f..7969cab51a1 100644 --- a/docs/extras/use_cases/question_answering/document-context-aware-QA.ipynb +++ b/docs/extras/use_cases/question_answering/document-context-aware-QA.ipynb @@ -30,14 +30,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "2e587f65", "metadata": {}, "outputs": [], "source": [ "# Load Notion page as a markdownfile file\n", "from langchain.document_loaders import NotionDirectoryLoader\n", - "path='.../Notion_Folder_With_Markdown_File'\n", + "path='../Notion_DB/'\n", "loader = NotionDirectoryLoader(path)\n", "docs = loader.load()\n", "md_file=docs[0].page_content" @@ -45,7 +45,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "id": "1cd3fd7e", "metadata": {}, "outputs": [], @@ -69,7 +69,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 26, "id": "7fbff95f", "metadata": {}, "outputs": [], @@ -110,8 +110,10 @@ "outputs": [], "source": [ "# Build vectorstore and keep the metadata\n", + "from langchain.embeddings import OpenAIEmbeddings\n", "from langchain.vectorstores import Chroma\n", - "vectorstore = Chroma.from_documents(texts=all_splits,metadatas=all_metadatas,embedding=OpenAIEmbeddings())" + "vectorstore = Chroma.from_documents(documents=all_splits,\n", + " embedding=OpenAIEmbeddings())" ] }, { @@ -157,6 +159,37 @@ "We can see that we can query *only for texts* in the `Introduction` of the document!" ] }, + { + "cell_type": "code", + "execution_count": 29, + "id": "d688db6e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "query='Introduction' filter=Comparison(comparator=, attribute='Section', value='Introduction') limit=None\n" + ] + }, + { + "data": { + "text/plain": [ + "[Document(page_content='![Untitled](Auto-Evaluation%20of%20Metadata%20Filtering%2018502448c85240828f33716740f9574b/Untitled.png)', metadata={'Section': 'Introduction'}),\n", + " Document(page_content='Q+A systems often use a two-step approach: retrieve relevant text chunks and then synthesize them into an answer. There many ways to approach this. For example, we recently [discussed](https://blog.langchain.dev/auto-evaluation-of-anthropic-100k-context-window/) the Retriever-Less option (at bottom in the below diagram), highlighting the Anthropic 100k context window model. Metadata filtering is an alternative approach that pre-filters chunks based on a user-defined criteria in a VectorDB using', metadata={'Section': 'Introduction'}),\n", + " Document(page_content='metadata tags prior to semantic search.', metadata={'Section': 'Introduction'})]" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Test\n", + "retriever.get_relevant_documents(\"Summarize the Introduction section of the document\")" + ] + }, { "cell_type": "code", "execution_count": 29, @@ -287,6 +320,11 @@ "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.16" + }, + "vscode": { + "interpreter": { + "hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1" + } } }, "nbformat": 4,