mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-29 07:19:59 +00:00
add missing source
field to pymupdf output (#2110)
To be consistent with other loaders for use with the `Sources` vector workflows.
This commit is contained in:
@@ -156,6 +156,7 @@ class PyMuPDFLoader(BasePDFLoader):
|
|||||||
page_content=page.get_text(**kwargs).encode("utf-8"),
|
page_content=page.get_text(**kwargs).encode("utf-8"),
|
||||||
metadata=dict(
|
metadata=dict(
|
||||||
{
|
{
|
||||||
|
"source": file_path,
|
||||||
"file_path": file_path,
|
"file_path": file_path,
|
||||||
"page_number": page.number + 1,
|
"page_number": page.number + 1,
|
||||||
"total_pages": len(doc),
|
"total_pages": len(doc),
|
||||||
|
Reference in New Issue
Block a user