diff --git a/libs/langchain/langchain/document_loaders/parsers/pdf.py b/libs/langchain/langchain/document_loaders/parsers/pdf.py index 2ec7a684be6..22701f2c3e3 100644 --- a/libs/langchain/langchain/document_loaders/parsers/pdf.py +++ b/libs/langchain/langchain/document_loaders/parsers/pdf.py @@ -13,7 +13,7 @@ if TYPE_CHECKING: class PyPDFParser(BaseBlobParser): - """Load `PDF` using `pypdf` and chunk at character level.""" + """Load `PDF` using `pypdf`""" def __init__(self, password: Optional[Union[str, bytes]] = None): self.password = password diff --git a/libs/langchain/langchain/document_loaders/pdf.py b/libs/langchain/langchain/document_loaders/pdf.py index dfccf9c4bd2..67743effd98 100644 --- a/libs/langchain/langchain/document_loaders/pdf.py +++ b/libs/langchain/langchain/document_loaders/pdf.py @@ -135,9 +135,9 @@ class OnlinePDFLoader(BasePDFLoader): class PyPDFLoader(BasePDFLoader): - """Load `PDF using `pypdf` and chunks at character level. + """Load PDF using pypdf into list of documents. - Loader also stores page numbers in metadata. + Loader chunks by page and stores page numbers in metadata. """ def __init__(