From f6226d464e4344ccd14e38d374d5bb76e26125a6 Mon Sep 17 00:00:00 2001 From: Chad Norvell Date: Sun, 7 Jan 2024 08:31:53 -0800 Subject: [PATCH] community: Include PDF ID in MathPix metadata (#15629) - **Description:** Includes the PDF ID in the MathPix document metadata. This is useful in case you need to re-request a processed PDF from the MathPix API later. --- libs/community/langchain_community/document_loaders/pdf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/community/langchain_community/document_loaders/pdf.py b/libs/community/langchain_community/document_loaders/pdf.py index 30615d3e7a3..600eb26bd0b 100644 --- a/libs/community/langchain_community/document_loaders/pdf.py +++ b/libs/community/langchain_community/document_loaders/pdf.py @@ -518,7 +518,7 @@ class MathpixPDFLoader(BasePDFLoader): contents = self.get_processed_pdf(pdf_id) if self.should_clean_pdf: contents = self.clean_pdf(contents) - metadata = {"source": self.source, "file_path": self.source} + metadata = {"source": self.source, "file_path": self.source, "pdf_id": pdf_id} return [Document(page_content=contents, metadata=metadata)]