mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-27 00:48:45 +00:00
langchain: preserve document metadata in FlashrankRerank
(#19148)
**Description:** Preserves document metadata in `FlashrankRerank` - **Issue:** #19142 - **Dependencies:** None - **Twitter handle:** n/a --------- Co-authored-by: Simon Stone <simon.stone@dartmouth.edu>
This commit is contained in:
parent
bc648f6cfc
commit
58c7687174
@ -53,7 +53,10 @@
|
||||
"def pretty_print_docs(docs):\n",
|
||||
" print(\n",
|
||||
" f\"\\n{'-' * 100}\\n\".join(\n",
|
||||
" [f\"Document {i+1}:\\n\\n\" + d.page_content for i, d in enumerate(docs)]\n",
|
||||
" [\n",
|
||||
" f\"Document {i+1}:\\n\\n{d.page_content}\\nMetadata: {d.metadata}\"\n",
|
||||
" for i, d in enumerate(docs)\n",
|
||||
" ]\n",
|
||||
" )\n",
|
||||
" )"
|
||||
]
|
||||
@ -316,6 +319,8 @@
|
||||
").load()\n",
|
||||
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)\n",
|
||||
"texts = text_splitter.split_documents(documents)\n",
|
||||
"for idx, text in enumerate(texts):\n",
|
||||
" text.metadata[\"id\"] = idx\n",
|
||||
"\n",
|
||||
"embedding = OpenAIEmbeddings(model=\"text-embedding-ada-002\")\n",
|
||||
"retriever = FAISS.from_documents(texts, embedding).as_retriever(search_kwargs={\"k\": 20})\n",
|
||||
|
@ -59,16 +59,20 @@ class FlashrankRerank(BaseDocumentCompressor):
|
||||
callbacks: Optional[Callbacks] = None,
|
||||
) -> Sequence[Document]:
|
||||
passages = [
|
||||
{"id": i, "text": doc.page_content} for i, doc in enumerate(documents)
|
||||
{"id": i, "text": doc.page_content, "meta": doc.metadata}
|
||||
for i, doc in enumerate(documents)
|
||||
]
|
||||
|
||||
rerank_request = RerankRequest(query=query, passages=passages)
|
||||
rerank_response = self.client.rerank(rerank_request)[: self.top_n]
|
||||
final_results = []
|
||||
|
||||
for r in rerank_response:
|
||||
metadata = r["meta"]
|
||||
metadata["relevance_score"] = r["score"]
|
||||
doc = Document(
|
||||
page_content=r["text"],
|
||||
metadata={"id": r["id"], "relevance_score": r["score"]},
|
||||
metadata=metadata,
|
||||
)
|
||||
final_results.append(doc)
|
||||
return final_results
|
||||
|
Loading…
Reference in New Issue
Block a user