mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-02 01:23:07 +00:00
Updated QA notebook (#6801)
Description: `all_metadatas` was not defined, `OpenAIEmbeddings` was not imported, Issue: #6723 the issue # it fixes (if applicable), Dependencies: lark, Tag maintainer: @vowelparrot , @dev2049 --------- Co-authored-by: rlm <pexpresss31@gmail.com>
This commit is contained in:
parent
140ba682f1
commit
5861770a53
@ -30,14 +30,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 14,
|
||||
"id": "2e587f65",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load Notion page as a markdownfile file\n",
|
||||
"from langchain.document_loaders import NotionDirectoryLoader\n",
|
||||
"path='.../Notion_Folder_With_Markdown_File'\n",
|
||||
"path='../Notion_DB/'\n",
|
||||
"loader = NotionDirectoryLoader(path)\n",
|
||||
"docs = loader.load()\n",
|
||||
"md_file=docs[0].page_content"
|
||||
@ -45,7 +45,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 15,
|
||||
"id": "1cd3fd7e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -69,7 +69,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"execution_count": 26,
|
||||
"id": "7fbff95f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -110,8 +110,10 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Build vectorstore and keep the metadata\n",
|
||||
"from langchain.embeddings import OpenAIEmbeddings\n",
|
||||
"from langchain.vectorstores import Chroma\n",
|
||||
"vectorstore = Chroma.from_documents(texts=all_splits,metadatas=all_metadatas,embedding=OpenAIEmbeddings())"
|
||||
"vectorstore = Chroma.from_documents(documents=all_splits,\n",
|
||||
" embedding=OpenAIEmbeddings())"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -157,6 +159,37 @@
|
||||
"We can see that we can query *only for texts* in the `Introduction` of the document!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"id": "d688db6e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"query='Introduction' filter=Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='Section', value='Introduction') limit=None\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='', metadata={'Section': 'Introduction'}),\n",
|
||||
" Document(page_content='Q+A systems often use a two-step approach: retrieve relevant text chunks and then synthesize them into an answer. There many ways to approach this. For example, we recently [discussed](https://blog.langchain.dev/auto-evaluation-of-anthropic-100k-context-window/) the Retriever-Less option (at bottom in the below diagram), highlighting the Anthropic 100k context window model. Metadata filtering is an alternative approach that pre-filters chunks based on a user-defined criteria in a VectorDB using', metadata={'Section': 'Introduction'}),\n",
|
||||
" Document(page_content='metadata tags prior to semantic search.', metadata={'Section': 'Introduction'})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 29,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Test\n",
|
||||
"retriever.get_relevant_documents(\"Summarize the Introduction section of the document\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
@ -287,6 +320,11 @@
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
Loading…
Reference in New Issue
Block a user