[Bugfix langchain_community] Fix PyMuPDFLoader (#29550)

- **Description:**  add legacy properties
    - **Issue:** #29470
    - **Twitter handle:** pprados
This commit is contained in:
Philippe PRADOS
2025-02-04 15:24:40 +01:00
committed by GitHub
parent 65b404a2d1
commit 5771e561fb

View File

@@ -836,9 +836,9 @@ class PyMuPDFParser(BaseBlobParser):
Returns:
dict: The extracted metadata.
"""
return _purge_metadata(
dict(
{
metadata = _purge_metadata(
{
**{
"producer": "PyMuPDF",
"creator": "PyMuPDF",
"creationdate": "",
@@ -851,8 +851,12 @@ class PyMuPDFParser(BaseBlobParser):
for k in doc.metadata
if isinstance(doc.metadata[k], (str, int))
},
)
}
)
for k in ("modDate", "creationDate"):
if k in doc.metadata:
metadata[k] = doc.metadata[k]
return metadata
def _extract_images_from_page(
self, doc: pymupdf.Document, page: pymupdf.Page