mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-26 08:33:49 +00:00
add missing source
field to pymupdf output (#2110)
To be consistent with other loaders for use with the `Sources` vector workflows.
This commit is contained in:
parent
a554e94a1a
commit
b25dbcb5b3
@ -156,6 +156,7 @@ class PyMuPDFLoader(BasePDFLoader):
|
||||
page_content=page.get_text(**kwargs).encode("utf-8"),
|
||||
metadata=dict(
|
||||
{
|
||||
"source": file_path,
|
||||
"file_path": file_path,
|
||||
"page_number": page.number + 1,
|
||||
"total_pages": len(doc),
|
||||
|
Loading…
Reference in New Issue
Block a user