Update pdf.py comment for PyPDFLoader (#10495)

PyPDF does not chunk at the character level to my understanding. Description: PyPDF does not chunk at the character level, but instead breaks up content by page. Fixup comment --------- Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com> Co-authored-by: Bagatur <baskaryan@gmail.com>
2025-07-10 15:06:18 +00:00 · 2023-10-05 13:22:40 -05:00 · 2023-10-05 13:22:40 -05:00 · 203258b4d6
commit 203258b4d6
parent 4236ae3851
2 changed files with 3 additions and 3 deletions
--- a/libs/langchain/langchain/document_loaders/parsers/pdf.py
+++ b/libs/langchain/langchain/document_loaders/parsers/pdf.py
@ -13,7 +13,7 @@ if TYPE_CHECKING:
 class PyPDFParser(BaseBlobParser):
-    """Load `PDF` using `pypdf` and chunk at character level."""
+    """Load `PDF` using `pypdf`"""
    def __init__(self, password: Optional[Union[str, bytes]] = None):
        self.password = password
--- a/libs/langchain/langchain/document_loaders/pdf.py
+++ b/libs/langchain/langchain/document_loaders/pdf.py
@ -135,9 +135,9 @@ class OnlinePDFLoader(BasePDFLoader):
 class PyPDFLoader(BasePDFLoader):
-    """Load `PDF using `pypdf` and chunks at character level.
+    """Load PDF using pypdf into list of documents.
-    Loader also stores page numbers in metadata.
+    Loader chunks by page and stores page numbers in metadata.
    """
    def __init__(