mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-16 23:13:31 +00:00
langchain: preserve document metadata in FlashrankRerank
(#19148)
**Description:** Preserves document metadata in `FlashrankRerank` - **Issue:** #19142 - **Dependencies:** None - **Twitter handle:** n/a --------- Co-authored-by: Simon Stone <simon.stone@dartmouth.edu>
This commit is contained in:
@@ -53,7 +53,10 @@
|
||||
"def pretty_print_docs(docs):\n",
|
||||
" print(\n",
|
||||
" f\"\\n{'-' * 100}\\n\".join(\n",
|
||||
" [f\"Document {i+1}:\\n\\n\" + d.page_content for i, d in enumerate(docs)]\n",
|
||||
" [\n",
|
||||
" f\"Document {i+1}:\\n\\n{d.page_content}\\nMetadata: {d.metadata}\"\n",
|
||||
" for i, d in enumerate(docs)\n",
|
||||
" ]\n",
|
||||
" )\n",
|
||||
" )"
|
||||
]
|
||||
@@ -316,6 +319,8 @@
|
||||
").load()\n",
|
||||
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)\n",
|
||||
"texts = text_splitter.split_documents(documents)\n",
|
||||
"for idx, text in enumerate(texts):\n",
|
||||
" text.metadata[\"id\"] = idx\n",
|
||||
"\n",
|
||||
"embedding = OpenAIEmbeddings(model=\"text-embedding-ada-002\")\n",
|
||||
"retriever = FAISS.from_documents(texts, embedding).as_retriever(search_kwargs={\"k\": 20})\n",
|
||||
|
Reference in New Issue
Block a user