mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-26 16:43:35 +00:00
add missing source
field to pymupdf output (#2110)
To be consistent with other loaders for use with the `Sources` vector workflows.
This commit is contained in:
parent
a554e94a1a
commit
b25dbcb5b3
@ -156,6 +156,7 @@ class PyMuPDFLoader(BasePDFLoader):
|
|||||||
page_content=page.get_text(**kwargs).encode("utf-8"),
|
page_content=page.get_text(**kwargs).encode("utf-8"),
|
||||||
metadata=dict(
|
metadata=dict(
|
||||||
{
|
{
|
||||||
|
"source": file_path,
|
||||||
"file_path": file_path,
|
"file_path": file_path,
|
||||||
"page_number": page.number + 1,
|
"page_number": page.number + 1,
|
||||||
"total_pages": len(doc),
|
"total_pages": len(doc),
|
||||||
|
Loading…
Reference in New Issue
Block a user