mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-29 09:58:44 +00:00
[Bugfix langchain_community] Fix PyMuPDFLoader (#29550)
- **Description:** add legacy properties - **Issue:** #29470 - **Twitter handle:** pprados
This commit is contained in:
parent
65b404a2d1
commit
5771e561fb
@ -836,9 +836,9 @@ class PyMuPDFParser(BaseBlobParser):
|
||||
Returns:
|
||||
dict: The extracted metadata.
|
||||
"""
|
||||
return _purge_metadata(
|
||||
dict(
|
||||
{
|
||||
metadata = _purge_metadata(
|
||||
{
|
||||
**{
|
||||
"producer": "PyMuPDF",
|
||||
"creator": "PyMuPDF",
|
||||
"creationdate": "",
|
||||
@ -851,8 +851,12 @@ class PyMuPDFParser(BaseBlobParser):
|
||||
for k in doc.metadata
|
||||
if isinstance(doc.metadata[k], (str, int))
|
||||
},
|
||||
)
|
||||
}
|
||||
)
|
||||
for k in ("modDate", "creationDate"):
|
||||
if k in doc.metadata:
|
||||
metadata[k] = doc.metadata[k]
|
||||
return metadata
|
||||
|
||||
def _extract_images_from_page(
|
||||
self, doc: pymupdf.Document, page: pymupdf.Page
|
||||
|
Loading…
Reference in New Issue
Block a user