mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-30 18:33:40 +00:00
[Bugfix langchain_community] Fix PyMuPDFLoader (#29550)
- **Description:** add legacy properties - **Issue:** #29470 - **Twitter handle:** pprados
This commit is contained in:
parent
65b404a2d1
commit
5771e561fb
@ -836,9 +836,9 @@ class PyMuPDFParser(BaseBlobParser):
|
|||||||
Returns:
|
Returns:
|
||||||
dict: The extracted metadata.
|
dict: The extracted metadata.
|
||||||
"""
|
"""
|
||||||
return _purge_metadata(
|
metadata = _purge_metadata(
|
||||||
dict(
|
|
||||||
{
|
{
|
||||||
|
**{
|
||||||
"producer": "PyMuPDF",
|
"producer": "PyMuPDF",
|
||||||
"creator": "PyMuPDF",
|
"creator": "PyMuPDF",
|
||||||
"creationdate": "",
|
"creationdate": "",
|
||||||
@ -851,8 +851,12 @@ class PyMuPDFParser(BaseBlobParser):
|
|||||||
for k in doc.metadata
|
for k in doc.metadata
|
||||||
if isinstance(doc.metadata[k], (str, int))
|
if isinstance(doc.metadata[k], (str, int))
|
||||||
},
|
},
|
||||||
|
}
|
||||||
)
|
)
|
||||||
)
|
for k in ("modDate", "creationDate"):
|
||||||
|
if k in doc.metadata:
|
||||||
|
metadata[k] = doc.metadata[k]
|
||||||
|
return metadata
|
||||||
|
|
||||||
def _extract_images_from_page(
|
def _extract_images_from_page(
|
||||||
self, doc: pymupdf.Document, page: pymupdf.Page
|
self, doc: pymupdf.Document, page: pymupdf.Page
|
||||||
|
Loading…
Reference in New Issue
Block a user