mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-10 06:55:09 +00:00
Update pdf.py comment for PyPDFLoader (#10495)
PyPDF does not chunk at the character level to my understanding. Description: PyPDF does not chunk at the character level, but instead breaks up content by page. Fixup comment --------- Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com> Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
parent
4236ae3851
commit
203258b4d6
@ -13,7 +13,7 @@ if TYPE_CHECKING:
|
||||
|
||||
|
||||
class PyPDFParser(BaseBlobParser):
|
||||
"""Load `PDF` using `pypdf` and chunk at character level."""
|
||||
"""Load `PDF` using `pypdf`"""
|
||||
|
||||
def __init__(self, password: Optional[Union[str, bytes]] = None):
|
||||
self.password = password
|
||||
|
@ -135,9 +135,9 @@ class OnlinePDFLoader(BasePDFLoader):
|
||||
|
||||
|
||||
class PyPDFLoader(BasePDFLoader):
|
||||
"""Load `PDF using `pypdf` and chunks at character level.
|
||||
"""Load PDF using pypdf into list of documents.
|
||||
|
||||
Loader also stores page numbers in metadata.
|
||||
Loader chunks by page and stores page numbers in metadata.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
|
Loading…
Reference in New Issue
Block a user