mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-22 06:39:52 +00:00
community: Include PDF ID in MathPix metadata (#15629)
- **Description:** Includes the PDF ID in the MathPix document metadata. This is useful in case you need to re-request a processed PDF from the MathPix API later.
This commit is contained in:
parent
d2a686b165
commit
f6226d464e
@ -518,7 +518,7 @@ class MathpixPDFLoader(BasePDFLoader):
|
||||
contents = self.get_processed_pdf(pdf_id)
|
||||
if self.should_clean_pdf:
|
||||
contents = self.clean_pdf(contents)
|
||||
metadata = {"source": self.source, "file_path": self.source}
|
||||
metadata = {"source": self.source, "file_path": self.source, "pdf_id": pdf_id}
|
||||
return [Document(page_content=contents, metadata=metadata)]
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user