From b60e6f6efa8e69d7974ea09cea3f17b82dd10331 Mon Sep 17 00:00:00 2001 From: ccurme Date: Mon, 24 Mar 2025 19:02:52 -0400 Subject: [PATCH] community[patch]: update API ref for AmazonTextractPDFParser (#30468) --- .../langchain_community/document_loaders/parsers/pdf.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/libs/community/langchain_community/document_loaders/parsers/pdf.py b/libs/community/langchain_community/document_loaders/parsers/pdf.py index 782edddad44..2b53db28736 100644 --- a/libs/community/langchain_community/document_loaders/parsers/pdf.py +++ b/libs/community/langchain_community/document_loaders/parsers/pdf.py @@ -1508,6 +1508,11 @@ class AmazonTextractPDFParser(BaseBlobParser): This helps most LLMs to achieve better accuracy when processing these texts. + ``Document`` objects are returned with metadata that includes the ``source`` and + a 1-based index of the page number in ``page``. Note that ``page`` represents + the index of the result returned from Textract, not necessarily the as-written + page number in the document. + """ def __init__(