diff --git a/libs/langchain/langchain/document_loaders/airbyte.py b/libs/langchain/langchain/document_loaders/airbyte.py index 05f3ca62ce1..aec832248d7 100644 --- a/libs/langchain/langchain/document_loaders/airbyte.py +++ b/libs/langchain/langchain/document_loaders/airbyte.py @@ -62,7 +62,7 @@ class AirbyteCDKLoader(BaseLoader): class AirbyteHubspotLoader(AirbyteCDKLoader): - """Loads records from Hubspot using an Airbyte source connector.""" + """Load from `Hubspot` using an `Airbyte` source connector.""" def __init__( self, @@ -94,7 +94,7 @@ class AirbyteHubspotLoader(AirbyteCDKLoader): class AirbyteStripeLoader(AirbyteCDKLoader): - """Loads records from Stripe using an Airbyte source connector.""" + """Load from `Stripe` using an `Airbyte` source connector.""" def __init__( self, @@ -126,7 +126,7 @@ class AirbyteStripeLoader(AirbyteCDKLoader): class AirbyteTypeformLoader(AirbyteCDKLoader): - """Loads records from Typeform using an Airbyte source connector.""" + """Load from `Typeform` using an `Airbyte` source connector.""" def __init__( self, @@ -158,7 +158,7 @@ class AirbyteTypeformLoader(AirbyteCDKLoader): class AirbyteZendeskSupportLoader(AirbyteCDKLoader): - """Loads records from Zendesk Support using an Airbyte source connector.""" + """Load from `Zendesk Support` using an `Airbyte` source connector.""" def __init__( self, @@ -190,7 +190,7 @@ class AirbyteZendeskSupportLoader(AirbyteCDKLoader): class AirbyteShopifyLoader(AirbyteCDKLoader): - """Loads records from Shopify using an Airbyte source connector.""" + """Load from `Shopify` using an `Airbyte` source connector.""" def __init__( self, @@ -222,7 +222,7 @@ class AirbyteShopifyLoader(AirbyteCDKLoader): class AirbyteSalesforceLoader(AirbyteCDKLoader): - """Loads records from Salesforce using an Airbyte source connector.""" + """Load from `Salesforce` using an `Airbyte` source connector.""" def __init__( self, @@ -254,7 +254,7 @@ class AirbyteSalesforceLoader(AirbyteCDKLoader): class AirbyteGongLoader(AirbyteCDKLoader): - """Loads records from Gong using an Airbyte source connector.""" + """Load from `Gong` using an `Airbyte` source connector.""" def __init__( self, diff --git a/libs/langchain/langchain/document_loaders/blob_loaders/file_system.py b/libs/langchain/langchain/document_loaders/blob_loaders/file_system.py index aecc1d62f19..0ac6ad0d84e 100644 --- a/libs/langchain/langchain/document_loaders/blob_loaders/file_system.py +++ b/libs/langchain/langchain/document_loaders/blob_loaders/file_system.py @@ -37,7 +37,7 @@ def _make_iterator( class FileSystemBlobLoader(BlobLoader): - """Blob loader for the local file system. + """Load blobs in the local file system. Example: @@ -58,7 +58,7 @@ class FileSystemBlobLoader(BlobLoader): suffixes: Optional[Sequence[str]] = None, show_progress: bool = False, ) -> None: - """Initialize with path to directory and how to glob over it. + """Initialize with a path to directory and how to glob over it. Args: path: Path to directory to load from diff --git a/libs/langchain/langchain/document_loaders/blob_loaders/schema.py b/libs/langchain/langchain/document_loaders/blob_loaders/schema.py index 4435075d503..4e1978c446d 100644 --- a/libs/langchain/langchain/document_loaders/blob_loaders/schema.py +++ b/libs/langchain/langchain/document_loaders/blob_loaders/schema.py @@ -19,7 +19,7 @@ PathLike = Union[str, PurePath] class Blob(BaseModel): - """A blob is used to represent raw data by either reference or value. + """Blob represents raw data by either reference or value. Provides an interface to materialize the blob in different representations, and help to decouple the development of data loaders from the downstream parsing of diff --git a/libs/langchain/langchain/document_loaders/chromium.py b/libs/langchain/langchain/document_loaders/chromium.py index 2c09294698e..fa757f2b001 100644 --- a/libs/langchain/langchain/document_loaders/chromium.py +++ b/libs/langchain/langchain/document_loaders/chromium.py @@ -9,8 +9,8 @@ logger = logging.getLogger(__name__) class AsyncChromiumLoader(BaseLoader): - """Scrape HTML content from provided URLs using a - headless instance of the Chromium browser.""" + """Scrape HTML pages from URLs using a + headless instance of the Chromium.""" def __init__( self, diff --git a/libs/langchain/langchain/document_loaders/csv_loader.py b/libs/langchain/langchain/document_loaders/csv_loader.py index 45133786fa3..f2ab7c4eaa6 100644 --- a/libs/langchain/langchain/document_loaders/csv_loader.py +++ b/libs/langchain/langchain/document_loaders/csv_loader.py @@ -78,7 +78,9 @@ class CSVLoader(BaseLoader): class UnstructuredCSVLoader(UnstructuredFileLoader): - """Loader that uses unstructured to load CSV files. Like other + """Load `CSV` files using `Unstructured`. + + Like other Unstructured loaders, UnstructuredCSVLoader can be used in both "single" and "elements" mode. If you use the loader in "elements" mode, the CSV file will be a single Unstructured Table element. diff --git a/libs/langchain/langchain/document_loaders/email.py b/libs/langchain/langchain/document_loaders/email.py index f23ef88d390..f85f95a9a6d 100644 --- a/libs/langchain/langchain/document_loaders/email.py +++ b/libs/langchain/langchain/document_loaders/email.py @@ -10,7 +10,7 @@ from langchain.document_loaders.unstructured import ( class UnstructuredEmailLoader(UnstructuredFileLoader): - """Load email files with `unstructured`. + """Load email files using `Unstructured`. Works with both .eml and .msg files. You can process attachments in addition to the diff --git a/libs/langchain/langchain/document_loaders/embaas.py b/libs/langchain/langchain/document_loaders/embaas.py index 9efa6c4c5a5..0afe0b71984 100644 --- a/libs/langchain/langchain/document_loaders/embaas.py +++ b/libs/langchain/langchain/document_loaders/embaas.py @@ -52,14 +52,14 @@ class EmbaasDocumentExtractionPayload(EmbaasDocumentExtractionParameters): class BaseEmbaasLoader(BaseModel): - """Base loader for embedding a model into an `Embaas` document extraction API.""" + """Base loader for `Embaas` document extraction API.""" embaas_api_key: Optional[str] = None - """The API key for the embaas document extraction API.""" + """The API key for the Embaas document extraction API.""" api_url: str = EMBAAS_DOC_API_URL - """The URL of the embaas document extraction API.""" + """The URL of the Embaas document extraction API.""" params: EmbaasDocumentExtractionParameters = EmbaasDocumentExtractionParameters() - """Additional parameters to pass to the embaas document extraction API.""" + """Additional parameters to pass to the Embaas document extraction API.""" @root_validator(pre=True) def validate_environment(cls, values: Dict) -> Dict: @@ -163,13 +163,13 @@ class EmbaasBlobLoader(BaseEmbaasLoader, BaseBlobParser): except requests.exceptions.RequestException as e: if e.response is None or not e.response.text: raise ValueError( - f"Error raised by embaas document text extraction API: {e}" + f"Error raised by Embaas document text extraction API: {e}" ) parsed_response = e.response.json() if "message" in parsed_response: raise ValueError( - f"Validation Error raised by embaas document text extraction API:" + f"Validation Error raised by Embaas document text extraction API:" f" {parsed_response['message']}" ) raise diff --git a/libs/langchain/langchain/document_loaders/helpers.py b/libs/langchain/langchain/document_loaders/helpers.py index c48d0b8eb18..6e0f8b9bfb9 100644 --- a/libs/langchain/langchain/document_loaders/helpers.py +++ b/libs/langchain/langchain/document_loaders/helpers.py @@ -5,7 +5,7 @@ from typing import List, NamedTuple, Optional, cast class FileEncoding(NamedTuple): - """A file encoding as the NamedTuple.""" + """File encoding as the NamedTuple.""" encoding: Optional[str] """The encoding of the file.""" diff --git a/libs/langchain/langchain/document_loaders/notebook.py b/libs/langchain/langchain/document_loaders/notebook.py index e9f84666b37..77a01b6c45c 100644 --- a/libs/langchain/langchain/document_loaders/notebook.py +++ b/libs/langchain/langchain/document_loaders/notebook.py @@ -56,7 +56,7 @@ def concatenate_cells( def remove_newlines(x: Any) -> Any: - """Recursively removes newlines, no matter the data structure they are stored in.""" + """Recursively remove newlines, no matter the data structure they are stored in.""" import pandas as pd if isinstance(x, str): diff --git a/libs/langchain/langchain/document_loaders/parsers/generic.py b/libs/langchain/langchain/document_loaders/parsers/generic.py index 80545281c29..3d4c0a5ee0b 100644 --- a/libs/langchain/langchain/document_loaders/parsers/generic.py +++ b/libs/langchain/langchain/document_loaders/parsers/generic.py @@ -10,7 +10,7 @@ from langchain.schema import Document class MimeTypeBasedParser(BaseBlobParser): - """A parser that uses mime-types to determine how to parse a blob. + """Parser that uses `mime`-types to parse a blob. This parser is useful for simple pipelines where the mime-type is sufficient to determine how to parse a blob. diff --git a/libs/langchain/langchain/document_loaders/parsers/grobid.py b/libs/langchain/langchain/document_loaders/parsers/grobid.py index c19b21e23da..ea21ac8a49e 100644 --- a/libs/langchain/langchain/document_loaders/parsers/grobid.py +++ b/libs/langchain/langchain/document_loaders/parsers/grobid.py @@ -11,13 +11,13 @@ logger = logging.getLogger(__name__) class ServerUnavailableException(Exception): - """Exception raised when the GROBID server is unavailable.""" + """Exception raised when the Grobid server is unavailable.""" pass class GrobidParser(BaseBlobParser): - """Loader that uses Grobid to load article PDF files.""" + """Load article `PDF` files using `Grobid`.""" def __init__( self, diff --git a/libs/langchain/langchain/document_loaders/parsers/html/bs4.py b/libs/langchain/langchain/document_loaders/parsers/html/bs4.py index e56aec31aba..334c1689e5b 100644 --- a/libs/langchain/langchain/document_loaders/parsers/html/bs4.py +++ b/libs/langchain/langchain/document_loaders/parsers/html/bs4.py @@ -11,7 +11,7 @@ logger = logging.getLogger(__name__) class BS4HTMLParser(BaseBlobParser): - """Parser that uses beautiful soup to parse HTML files.""" + """Pparse HTML files using `Beautiful Soup`.""" def __init__( self, diff --git a/libs/langchain/langchain/document_loaders/parsers/language/code_segmenter.py b/libs/langchain/langchain/document_loaders/parsers/language/code_segmenter.py index 100fb78e810..2efb2add448 100644 --- a/libs/langchain/langchain/document_loaders/parsers/language/code_segmenter.py +++ b/libs/langchain/langchain/document_loaders/parsers/language/code_segmenter.py @@ -3,7 +3,7 @@ from typing import List class CodeSegmenter(ABC): - """The abstract class for the code segmenter.""" + """Abstract class for the code segmenter.""" def __init__(self, code: str): self.code = code diff --git a/libs/langchain/langchain/document_loaders/parsers/language/javascript.py b/libs/langchain/langchain/document_loaders/parsers/language/javascript.py index cb53bfb4208..258345f8b9c 100644 --- a/libs/langchain/langchain/document_loaders/parsers/language/javascript.py +++ b/libs/langchain/langchain/document_loaders/parsers/language/javascript.py @@ -4,7 +4,7 @@ from langchain.document_loaders.parsers.language.code_segmenter import CodeSegme class JavaScriptSegmenter(CodeSegmenter): - """The code segmenter for JavaScript.""" + """Code segmenter for JavaScript.""" def __init__(self, code: str): super().__init__(code) diff --git a/libs/langchain/langchain/document_loaders/parsers/language/language_parser.py b/libs/langchain/langchain/document_loaders/parsers/language/language_parser.py index 12a11380c8f..97d26a99e61 100644 --- a/libs/langchain/langchain/document_loaders/parsers/language/language_parser.py +++ b/libs/langchain/langchain/document_loaders/parsers/language/language_parser.py @@ -19,8 +19,7 @@ LANGUAGE_SEGMENTERS: Dict[str, Any] = { class LanguageParser(BaseBlobParser): - """ - Language parser that split code using the respective language syntax. + """Parse using the respective programming language syntax. Each top-level function and class in the code is loaded into separate documents. Furthermore, an extra document is generated, containing the remaining top-level code diff --git a/libs/langchain/langchain/document_loaders/parsers/language/python.py b/libs/langchain/langchain/document_loaders/parsers/language/python.py index 4446b4a21a1..642f6b32c31 100644 --- a/libs/langchain/langchain/document_loaders/parsers/language/python.py +++ b/libs/langchain/langchain/document_loaders/parsers/language/python.py @@ -5,7 +5,7 @@ from langchain.document_loaders.parsers.language.code_segmenter import CodeSegme class PythonSegmenter(CodeSegmenter): - """The code segmenter for Python.""" + """Code segmenter for `Python`.""" def __init__(self, code: str): super().__init__(code) diff --git a/libs/langchain/langchain/document_loaders/parsers/pdf.py b/libs/langchain/langchain/document_loaders/parsers/pdf.py index dde96eb8fb8..00d8d9adea6 100644 --- a/libs/langchain/langchain/document_loaders/parsers/pdf.py +++ b/libs/langchain/langchain/document_loaders/parsers/pdf.py @@ -8,7 +8,7 @@ from langchain.schema import Document class PyPDFParser(BaseBlobParser): - """Loads a PDF with pypdf and chunks at character level.""" + """Load `PDF` using `pypdf` and chunk at character level.""" def __init__(self, password: Optional[Union[str, bytes]] = None): self.password = password @@ -29,7 +29,7 @@ class PyPDFParser(BaseBlobParser): class PDFMinerParser(BaseBlobParser): - """Parse PDFs with PDFMiner.""" + """Parse `PDF` using `PDFMiner`.""" def lazy_parse(self, blob: Blob) -> Iterator[Document]: """Lazily parse the blob.""" @@ -42,7 +42,7 @@ class PDFMinerParser(BaseBlobParser): class PyMuPDFParser(BaseBlobParser): - """Parse PDFs with PyMuPDF.""" + """Parse `PDF` using `PyMuPDF`.""" def __init__(self, text_kwargs: Optional[Mapping[str, Any]] = None) -> None: """Initialize the parser. @@ -81,7 +81,7 @@ class PyMuPDFParser(BaseBlobParser): class PyPDFium2Parser(BaseBlobParser): - """Parse PDFs with PyPDFium2.""" + """Parse `PDF` with `PyPDFium2`.""" def __init__(self) -> None: """Initialize the parser.""" @@ -114,7 +114,7 @@ class PyPDFium2Parser(BaseBlobParser): class PDFPlumberParser(BaseBlobParser): - """Parse PDFs with PDFPlumber.""" + """Parse `PDF` with `PDFPlumber`.""" def __init__(self, text_kwargs: Optional[Mapping[str, Any]] = None) -> None: """Initialize the parser. @@ -153,7 +153,7 @@ class PDFPlumberParser(BaseBlobParser): class AmazonTextractPDFParser(BaseBlobParser): - """Sends PDF files to Amazon Textract and parses them to generate Documents. + """Send `PDF` files to `Amazon Textract` and parse them. For parsing multi-page PDFs, they have to reside on S3. """ diff --git a/libs/langchain/langchain/document_loaders/pdf.py b/libs/langchain/langchain/document_loaders/pdf.py index b671d90ebcd..301af6953e1 100644 --- a/libs/langchain/langchain/document_loaders/pdf.py +++ b/libs/langchain/langchain/document_loaders/pdf.py @@ -459,7 +459,7 @@ class PDFPlumberLoader(BasePDFLoader): class AmazonTextractPDFLoader(BasePDFLoader): - """ "Load `PDF` files from a local file system, HTTP or S3. + """Load `PDF` files from a local file system, HTTP or S3. To authenticate, the AWS client uses the following methods to automatically load credentials: diff --git a/libs/langchain/langchain/document_loaders/telegram.py b/libs/langchain/langchain/document_loaders/telegram.py index 9b4f81f4ad8..86e857cd37b 100644 --- a/libs/langchain/langchain/document_loaders/telegram.py +++ b/libs/langchain/langchain/document_loaders/telegram.py @@ -47,7 +47,7 @@ class TelegramChatFileLoader(BaseLoader): def text_to_docs(text: Union[str, List[str]]) -> List[Document]: - """Converts a string or list of strings to a list of Documents with metadata.""" + """Convert a string or list of strings to a list of Documents with metadata.""" if isinstance(text, str): # Take a single string as one page text = [text] @@ -78,7 +78,7 @@ def text_to_docs(text: Union[str, List[str]]) -> List[Document]: class TelegramChatApiLoader(BaseLoader): - """Loads Telegram chat json directory dump.""" + """Load `Telegram` chat json directory dump.""" def __init__( self, diff --git a/libs/langchain/langchain/document_loaders/unstructured.py b/libs/langchain/langchain/document_loaders/unstructured.py index 748a29d3449..8e55b1dd08f 100644 --- a/libs/langchain/langchain/document_loaders/unstructured.py +++ b/libs/langchain/langchain/document_loaders/unstructured.py @@ -8,7 +8,7 @@ from langchain.document_loaders.base import BaseLoader def satisfies_min_unstructured_version(min_version: str) -> bool: - """Checks to see if the installed unstructured version exceeds the minimum version + """Check if the installed `Unstructured` version exceeds the minimum version for the feature in question.""" from unstructured.__version__ import __version__ as __unstructured_version__ @@ -25,7 +25,7 @@ def satisfies_min_unstructured_version(min_version: str) -> bool: def validate_unstructured_version(min_unstructured_version: str) -> None: - """Raises an error if the unstructured version does not exceed the + """Raise an error if the `Unstructured` version does not exceed the specified minimum.""" if not satisfies_min_unstructured_version(min_unstructured_version): raise ValueError( @@ -34,7 +34,7 @@ def validate_unstructured_version(min_unstructured_version: str) -> None: class UnstructuredBaseLoader(BaseLoader, ABC): - """Loader that uses Unstructured to load files.""" + """Base Loader that uses `Unstructured`.""" def __init__( self, @@ -181,7 +181,7 @@ def get_elements_from_api( api_key: str = "", **unstructured_kwargs: Any, ) -> List: - """Retrieves a list of elements from the Unstructured API.""" + """Retrieve a list of elements from the `Unstructured API`.""" if isinstance(file, collections.abc.Sequence) or isinstance(file_path, list): from unstructured.partition.api import partition_multiple_via_api diff --git a/libs/langchain/langchain/document_loaders/youtube.py b/libs/langchain/langchain/document_loaders/youtube.py index 9724ffda095..9998435a369 100644 --- a/libs/langchain/langchain/document_loaders/youtube.py +++ b/libs/langchain/langchain/document_loaders/youtube.py @@ -19,7 +19,7 @@ SCOPES = ["https://www.googleapis.com/auth/youtube.readonly"] @dataclass class GoogleApiClient: - """A Generic Google Api Client. + """Generic Google API Client. To use, you should have the ``google_auth_oauthlib,youtube_transcript_api,google`` python package installed.