langchain: preserve document metadata in FlashrankRerank (#19148)

**Description:** Preserves document metadata in `FlashrankRerank`
    - **Issue:** #19142
    - **Dependencies:** None
    - **Twitter handle:** n/a

---------

Co-authored-by: Simon Stone <simon.stone@dartmouth.edu>
This commit is contained in:
Simon Stone
2024-03-19 00:15:18 -04:00
committed by GitHub
parent bc648f6cfc
commit 58c7687174
2 changed files with 12 additions and 3 deletions

View File

@@ -53,7 +53,10 @@
"def pretty_print_docs(docs):\n",
" print(\n",
" f\"\\n{'-' * 100}\\n\".join(\n",
" [f\"Document {i+1}:\\n\\n\" + d.page_content for i, d in enumerate(docs)]\n",
" [\n",
" f\"Document {i+1}:\\n\\n{d.page_content}\\nMetadata: {d.metadata}\"\n",
" for i, d in enumerate(docs)\n",
" ]\n",
" )\n",
" )"
]
@@ -316,6 +319,8 @@
").load()\n",
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)\n",
"texts = text_splitter.split_documents(documents)\n",
"for idx, text in enumerate(texts):\n",
" text.metadata[\"id\"] = idx\n",
"\n",
"embedding = OpenAIEmbeddings(model=\"text-embedding-ada-002\")\n",
"retriever = FAISS.from_documents(texts, embedding).as_retriever(search_kwargs={\"k\": 20})\n",