[Bugfix langchain_community] Fix PyMuPDFLoader (#29550)

- **Description:**  add legacy properties
    - **Issue:** #29470
    - **Twitter handle:** pprados
This commit is contained in:
Philippe PRADOS 2025-02-04 15:24:40 +01:00 committed by GitHub
parent 65b404a2d1
commit 5771e561fb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -836,9 +836,9 @@ class PyMuPDFParser(BaseBlobParser):
Returns:
dict: The extracted metadata.
"""
return _purge_metadata(
dict(
{
metadata = _purge_metadata(
{
**{
"producer": "PyMuPDF",
"creator": "PyMuPDF",
"creationdate": "",
@ -851,8 +851,12 @@ class PyMuPDFParser(BaseBlobParser):
for k in doc.metadata
if isinstance(doc.metadata[k], (str, int))
},
)
}
)
for k in ("modDate", "creationDate"):
if k in doc.metadata:
metadata[k] = doc.metadata[k]
return metadata
def _extract_images_from_page(
self, doc: pymupdf.Document, page: pymupdf.Page