[Bugfix langchain_community] Fix PyMuPDFLoader (#29550)

- **Description:**  add legacy properties
    - **Issue:** #29470
    - **Twitter handle:** pprados
This commit is contained in:
Philippe PRADOS 2025-02-04 15:24:40 +01:00 committed by GitHub
parent 65b404a2d1
commit 5771e561fb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -836,9 +836,9 @@ class PyMuPDFParser(BaseBlobParser):
Returns: Returns:
dict: The extracted metadata. dict: The extracted metadata.
""" """
return _purge_metadata( metadata = _purge_metadata(
dict(
{ {
**{
"producer": "PyMuPDF", "producer": "PyMuPDF",
"creator": "PyMuPDF", "creator": "PyMuPDF",
"creationdate": "", "creationdate": "",
@ -851,8 +851,12 @@ class PyMuPDFParser(BaseBlobParser):
for k in doc.metadata for k in doc.metadata
if isinstance(doc.metadata[k], (str, int)) if isinstance(doc.metadata[k], (str, int))
}, },
}
) )
) for k in ("modDate", "creationDate"):
if k in doc.metadata:
metadata[k] = doc.metadata[k]
return metadata
def _extract_images_from_page( def _extract_images_from_page(
self, doc: pymupdf.Document, page: pymupdf.Page self, doc: pymupdf.Document, page: pymupdf.Page