mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-28 15:00:23 +00:00
add missing source
field to pymupdf output (#2110)
To be consistent with other loaders for use with the `Sources` vector workflows.
This commit is contained in:
@@ -156,6 +156,7 @@ class PyMuPDFLoader(BasePDFLoader):
|
||||
page_content=page.get_text(**kwargs).encode("utf-8"),
|
||||
metadata=dict(
|
||||
{
|
||||
"source": file_path,
|
||||
"file_path": file_path,
|
||||
"page_number": page.number + 1,
|
||||
"total_pages": len(doc),
|
||||
|
Reference in New Issue
Block a user