add missing source field to pymupdf output (#2110)

To be consistent with other loaders for use with the `Sources` vector
workflows.
This commit is contained in:
Tim Asp
2023-03-28 13:22:05 -07:00
committed by GitHub
parent a554e94a1a
commit b25dbcb5b3

View File

@@ -156,6 +156,7 @@ class PyMuPDFLoader(BasePDFLoader):
page_content=page.get_text(**kwargs).encode("utf-8"),
metadata=dict(
{
"source": file_path,
"file_path": file_path,
"page_number": page.number + 1,
"total_pages": len(doc),