From b60e6f6efa8e69d7974ea09cea3f17b82dd10331 Mon Sep 17 00:00:00 2001
From: ccurme <chester.curme@gmail.com>
Date: Mon, 24 Mar 2025 19:02:52 -0400
Subject: [PATCH] community[patch]: update API ref for AmazonTextractPDFParser
 (#30468)

---
 .../langchain_community/document_loaders/parsers/pdf.py      | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/libs/community/langchain_community/document_loaders/parsers/pdf.py b/libs/community/langchain_community/document_loaders/parsers/pdf.py
index 782edddad44..2b53db28736 100644
--- a/libs/community/langchain_community/document_loaders/parsers/pdf.py
+++ b/libs/community/langchain_community/document_loaders/parsers/pdf.py
@@ -1508,6 +1508,11 @@ class AmazonTextractPDFParser(BaseBlobParser):
     This helps most LLMs to achieve better accuracy when
     processing these texts.
 
+    ``Document`` objects are returned with metadata that includes the ``source`` and
+    a 1-based index of the page number in ``page``. Note that ``page`` represents
+    the index of the result returned from Textract, not necessarily the as-written
+    page number in the document.
+
     """
 
     def __init__(