docstrings: document_loaders consitency (#9139)

Formatted docstrings from different formats to consistent format, lile:
>Loads processed docs from Docugami.
"Load from `Docugami`."

>Loader that uses Unstructured to load HTML files.
"Load `HTML` files using `Unstructured`."

>Load documents from a directory.
"Load from a directory."
 
- `Load` - no `Loads`
- DocumentLoader always loads Documents, so no more
"documents/docs/texts/ etc"
- integrated systems and APIs enclosed in backticks,
This commit is contained in:
Leonid Ganeline 2023-08-11 13:09:31 -07:00 committed by GitHub
parent 0aabded97f
commit edb585228d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
54 changed files with 66 additions and 94 deletions

View File

@ -1,4 +1,3 @@
"""Loads acreom vault from a directory."""
import re import re
from pathlib import Path from pathlib import Path
from typing import Iterator, List from typing import Iterator, List
@ -8,7 +7,7 @@ from langchain.document_loaders.base import BaseLoader
class AcreomLoader(BaseLoader): class AcreomLoader(BaseLoader):
"""Loader that loads acreom vault from a directory.""" """Load `acreom` vault from a directory."""
FRONT_MATTER_REGEX = re.compile(r"^---\n(.*?)\n---\n", re.MULTILINE | re.DOTALL) FRONT_MATTER_REGEX = re.compile(r"^---\n(.*?)\n---\n", re.MULTILINE | re.DOTALL)
"""Regex to match front matter metadata in markdown files.""" """Regex to match front matter metadata in markdown files."""
@ -16,6 +15,7 @@ class AcreomLoader(BaseLoader):
def __init__( def __init__(
self, path: str, encoding: str = "UTF-8", collect_metadata: bool = True self, path: str, encoding: str = "UTF-8", collect_metadata: bool = True
): ):
"""Initialize the loader."""
self.file_path = path self.file_path = path
"""Path to the directory containing the markdown files.""" """Path to the directory containing the markdown files."""
self.encoding = encoding self.encoding = encoding

View File

@ -1,4 +1,3 @@
"""Loads local airbyte json files."""
from typing import Any, Callable, Iterator, List, Mapping, Optional from typing import Any, Callable, Iterator, List, Mapping, Optional
from langchain.docstore.document import Document from langchain.docstore.document import Document
@ -9,7 +8,7 @@ RecordHandler = Callable[[Any, Optional[str]], Document]
class AirbyteCDKLoader(BaseLoader): class AirbyteCDKLoader(BaseLoader):
"""Loads records using an Airbyte source connector implemented using the CDK.""" """Load with an `Airbyte` source connector implemented using the `CDK`."""
def __init__( def __init__(
self, self,

View File

@ -1,4 +1,3 @@
"""Loads local airbyte json files."""
import json import json
from typing import List from typing import List
@ -8,7 +7,7 @@ from langchain.utils import stringify_dict
class AirbyteJSONLoader(BaseLoader): class AirbyteJSONLoader(BaseLoader):
"""Loads local airbyte json files.""" """Load local `Airbyte` json files."""
def __init__(self, file_path: str): def __init__(self, file_path: str):
"""Initialize with a file path. This should start with '/tmp/airbyte_local/'.""" """Initialize with a file path. This should start with '/tmp/airbyte_local/'."""

View File

@ -5,7 +5,7 @@ from langchain.document_loaders.base import BaseLoader
class AirtableLoader(BaseLoader): class AirtableLoader(BaseLoader):
"""Loader for Airtable tables.""" """Load the `Airtable` tables."""
def __init__(self, api_token: str, table_id: str, base_id: str): def __init__(self, api_token: str, table_id: str, base_id: str):
"""Initialize with API token and the IDs for table and base""" """Initialize with API token and the IDs for table and base"""

View File

@ -7,7 +7,8 @@ from langchain.document_loaders.base import BaseLoader
class ApifyDatasetLoader(BaseLoader, BaseModel): class ApifyDatasetLoader(BaseLoader, BaseModel):
"""Loads datasets from Apify-a web scraping, crawling, and data extraction platform. """Load datasets from `Apify` web scraping, crawling, and data extraction platform.
For details, see https://docs.apify.com/platform/integrations/langchain For details, see https://docs.apify.com/platform/integrations/langchain
Example: Example:

View File

@ -6,7 +6,7 @@ from langchain.utilities.arxiv import ArxivAPIWrapper
class ArxivLoader(BaseLoader): class ArxivLoader(BaseLoader):
"""Loads a query result from arxiv.org into a list of Documents. """Load a query result from `Arxiv`.
The loader converts the original PDF format into the text. The loader converts the original PDF format into the text.
""" """

View File

@ -24,7 +24,7 @@ default_header_template = {
class AsyncHtmlLoader(BaseLoader): class AsyncHtmlLoader(BaseLoader):
"""Loads HTML asynchronously.""" """Load `HTML` asynchronously."""
def __init__( def __init__(
self, self,
@ -36,7 +36,7 @@ class AsyncHtmlLoader(BaseLoader):
requests_kwargs: Dict[str, Any] = {}, requests_kwargs: Dict[str, Any] = {},
raise_for_status: bool = False, raise_for_status: bool = False,
): ):
"""Initialize with webpage path.""" """Initialize with a webpage path."""
# TODO: Deprecate web_path in favor of web_paths, and remove this # TODO: Deprecate web_path in favor of web_paths, and remove this
# left like this because there are a number of loaders that expect single # left like this because there are a number of loaders that expect single

View File

@ -1,4 +1,3 @@
"""Loads AZLyrics."""
from typing import List from typing import List
from langchain.docstore.document import Document from langchain.docstore.document import Document
@ -6,7 +5,7 @@ from langchain.document_loaders.web_base import WebBaseLoader
class AZLyricsLoader(WebBaseLoader): class AZLyricsLoader(WebBaseLoader):
"""Loads AZLyrics webpages.""" """Load `AZLyrics` webpages."""
def load(self) -> List[Document]: def load(self) -> List[Document]:
"""Load webpages into Documents.""" """Load webpages into Documents."""

View File

@ -1,4 +1,3 @@
"""Loading logic for loading documents from an Azure Blob Storage container."""
from typing import List from typing import List
from langchain.docstore.document import Document from langchain.docstore.document import Document
@ -9,7 +8,7 @@ from langchain.document_loaders.base import BaseLoader
class AzureBlobStorageContainerLoader(BaseLoader): class AzureBlobStorageContainerLoader(BaseLoader):
"""Loading Documents from Azure Blob Storage.""" """Load from `Azure Blob Storage` container."""
def __init__(self, conn_str: str, container: str, prefix: str = ""): def __init__(self, conn_str: str, container: str, prefix: str = ""):
"""Initialize with connection string, container and blob prefix.""" """Initialize with connection string, container and blob prefix."""

View File

@ -8,7 +8,7 @@ from langchain.document_loaders.unstructured import UnstructuredFileLoader
class AzureBlobStorageFileLoader(BaseLoader): class AzureBlobStorageFileLoader(BaseLoader):
"""Loading Documents from Azure Blob Storage.""" """Load from `Azure Blob Storage` files."""
def __init__(self, conn_str: str, container: str, blob_name: str): def __init__(self, conn_str: str, container: str, blob_name: str):
"""Initialize with connection string, container and blob name.""" """Initialize with connection string, container and blob name."""

View File

@ -8,7 +8,7 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter, TextSplitter
class BaseLoader(ABC): class BaseLoader(ABC):
"""Interface for loading Documents. """Interface for Document Loader.
Implementations should implement the lazy-loading method using generators Implementations should implement the lazy-loading method using generators
to avoid loading all Documents into memory at once. to avoid loading all Documents into memory at once.

View File

@ -11,7 +11,7 @@ logger = logging.getLogger(__name__)
class BibtexLoader(BaseLoader): class BibtexLoader(BaseLoader):
"""Loads a bibtex file into a list of Documents. """Load a `bibtex` file.
Each document represents one entry from the bibtex file. Each document represents one entry from the bibtex file.

View File

@ -10,7 +10,7 @@ if TYPE_CHECKING:
class BigQueryLoader(BaseLoader): class BigQueryLoader(BaseLoader):
"""Loads a query result from BigQuery into a list of documents. """Load from the Google Cloud Platform `BigQuery`.
Each document represents one row of the result. The `page_content_columns` Each document represents one row of the result. The `page_content_columns`
are written into the `page_content` of the document. The `metadata_columns` are written into the `page_content` of the document. The `metadata_columns`

View File

@ -10,7 +10,7 @@ from langchain.document_loaders.base import BaseLoader
class BiliBiliLoader(BaseLoader): class BiliBiliLoader(BaseLoader):
"""Loads bilibili transcripts.""" """Load `BiliBili` video transcripts."""
def __init__(self, video_urls: List[str]): def __init__(self, video_urls: List[str]):
"""Initialize with bilibili url. """Initialize with bilibili url.

View File

@ -1,4 +1,3 @@
"""Loads all documents from a blackboard course."""
import contextlib import contextlib
import re import re
from pathlib import Path from pathlib import Path
@ -12,7 +11,7 @@ from langchain.document_loaders.web_base import WebBaseLoader
class BlackboardLoader(WebBaseLoader): class BlackboardLoader(WebBaseLoader):
"""Loads all documents from a Blackboard course. """Load a `Blackboard` course.
This loader is not compatible with all Blackboard courses. It is only This loader is not compatible with all Blackboard courses. It is only
compatible with courses that use the new Blackboard interface. compatible with courses that use the new Blackboard interface.

View File

@ -20,7 +20,7 @@ class BlockchainType(Enum):
class BlockchainDocumentLoader(BaseLoader): class BlockchainDocumentLoader(BaseLoader):
"""Loads elements from a blockchain smart contract into Langchain documents. """Load elements from a blockchain smart contract.
The supported blockchains are: Ethereum mainnet, Ethereum Goerli testnet, The supported blockchains are: Ethereum mainnet, Ethereum Goerli testnet,
Polygon mainnet, and Polygon Mumbai testnet. Polygon mainnet, and Polygon Mumbai testnet.

View File

@ -6,7 +6,7 @@ from langchain.utilities.brave_search import BraveSearchWrapper
class BraveSearchLoader(BaseLoader): class BraveSearchLoader(BaseLoader):
"""Loads a query result from Brave Search engine into a list of Documents.""" """Load with `Brave Search` engine."""
def __init__(self, query: str, api_key: str, search_kwargs: Optional[dict] = None): def __init__(self, query: str, api_key: str, search_kwargs: Optional[dict] = None):
"""Initializes the BraveLoader. """Initializes the BraveLoader.

View File

@ -7,7 +7,7 @@ from langchain.document_loaders.base import BaseLoader
class BrowserlessLoader(BaseLoader): class BrowserlessLoader(BaseLoader):
"""Loads the content of webpages using Browserless' /content endpoint""" """Load webpages with `Browserless` /content endpoint."""
def __init__( def __init__(
self, api_token: str, urls: Union[str, List[str]], text_content: bool = True self, api_token: str, urls: Union[str, List[str]], text_content: bool = True

View File

@ -1,4 +1,3 @@
"""Load conversations from ChatGPT data export"""
import datetime import datetime
import json import json
from typing import List from typing import List
@ -29,7 +28,7 @@ def concatenate_rows(message: dict, title: str) -> str:
class ChatGPTLoader(BaseLoader): class ChatGPTLoader(BaseLoader):
"""Load conversations from exported ChatGPT data.""" """Load conversations from exported `ChatGPT` data."""
def __init__(self, log_file: str, num_logs: int = -1): def __init__(self, log_file: str, num_logs: int = -1):
"""Initialize a class object. """Initialize a class object.

View File

@ -1,4 +1,3 @@
"""Loads College Confidential."""
from typing import List from typing import List
from langchain.docstore.document import Document from langchain.docstore.document import Document
@ -6,7 +5,7 @@ from langchain.document_loaders.web_base import WebBaseLoader
class CollegeConfidentialLoader(WebBaseLoader): class CollegeConfidentialLoader(WebBaseLoader):
"""Loads College Confidential webpages.""" """Load `College Confidential` webpages."""
def load(self) -> List[Document]: def load(self) -> List[Document]:
"""Load webpages as Documents.""" """Load webpages as Documents."""

View File

@ -16,9 +16,7 @@ DEFAULT = Literal["default"]
class ConcurrentLoader(GenericLoader): class ConcurrentLoader(GenericLoader):
""" """Load and pars Documents concurrently."""
A generic document loader that loads and parses documents concurrently.
"""
def __init__( def __init__(
self, blob_loader: BlobLoader, blob_parser: BaseBlobParser, num_workers: int = 4 self, blob_loader: BlobLoader, blob_parser: BaseBlobParser, num_workers: int = 4

View File

@ -1,4 +1,3 @@
"""Load Data from a Confluence Space"""
import logging import logging
from enum import Enum from enum import Enum
from io import BytesIO from io import BytesIO
@ -33,7 +32,7 @@ class ContentFormat(str, Enum):
class ConfluenceLoader(BaseLoader): class ConfluenceLoader(BaseLoader):
"""Load Confluence pages. """Load `Confluence` pages.
Port of https://llamahub.ai/l/confluence Port of https://llamahub.ai/l/confluence
This currently supports username/api_key, Oauth2 login or personal access token This currently supports username/api_key, Oauth2 login or personal access token

View File

@ -1,4 +1,3 @@
"""Load CoNLL-U files."""
import csv import csv
from typing import List from typing import List
@ -7,7 +6,7 @@ from langchain.document_loaders.base import BaseLoader
class CoNLLULoader(BaseLoader): class CoNLLULoader(BaseLoader):
"""Load CoNLL-U files.""" """Load `CoNLL-U` files."""
def __init__(self, file_path: str): def __init__(self, file_path: str):
"""Initialize with a file path.""" """Initialize with a file path."""

View File

@ -10,7 +10,7 @@ from langchain.document_loaders.unstructured import (
class CSVLoader(BaseLoader): class CSVLoader(BaseLoader):
"""Loads a CSV file into a list of documents. """Load a `CSV` file into a list of Documents.
Each document represents one row of the CSV file. Every row is converted into a Each document represents one row of the CSV file. Every row is converted into a
key/value pair and outputted to a new line in the document's page_content. key/value pair and outputted to a new line in the document's page_content.

View File

@ -12,7 +12,7 @@ logger = logging.getLogger(__name__)
class CubeSemanticLoader(BaseLoader): class CubeSemanticLoader(BaseLoader):
"""Load Cube semantic layer metadata. """Load `Cube semantic layer` metadata.
Args: Args:
cube_api_url: REST API endpoint. cube_api_url: REST API endpoint.

View File

@ -1,4 +1,3 @@
"""Load Datadog logs."""
from datetime import datetime, timedelta from datetime import datetime, timedelta
from typing import List, Optional from typing import List, Optional
@ -7,7 +6,7 @@ from langchain.document_loaders.base import BaseLoader
class DatadogLogsLoader(BaseLoader): class DatadogLogsLoader(BaseLoader):
"""Loads a query result from Datadog into a list of documents. """Load `Datadog` logs.
Logs are written into the `page_content` and into the `metadata`. Logs are written into the `page_content` and into the `metadata`.
""" """

View File

@ -1,4 +1,3 @@
"""Load from a Dataframe object"""
from typing import Any, Iterator, List from typing import Any, Iterator, List
from langchain.docstore.document import Document from langchain.docstore.document import Document
@ -6,7 +5,7 @@ from langchain.document_loaders.base import BaseLoader
class DataFrameLoader(BaseLoader): class DataFrameLoader(BaseLoader):
"""Load Pandas DataFrame.""" """Load `Pandas` DataFrame."""
def __init__(self, data_frame: Any, page_content_column: str = "text"): def __init__(self, data_frame: Any, page_content_column: str = "text"):
"""Initialize with dataframe object. """Initialize with dataframe object.

View File

@ -1,4 +1,3 @@
"""Loader that uses Diffbot to load webpages in text format."""
import logging import logging
from typing import Any, List from typing import Any, List
@ -11,7 +10,7 @@ logger = logging.getLogger(__name__)
class DiffbotLoader(BaseLoader): class DiffbotLoader(BaseLoader):
"""Loads Diffbot file json.""" """Load `Diffbot` json file."""
def __init__( def __init__(
self, api_token: str, urls: List[str], continue_on_failure: bool = True self, api_token: str, urls: List[str], continue_on_failure: bool = True

View File

@ -1,4 +1,3 @@
"""Load documents from a directory."""
import concurrent import concurrent
import logging import logging
import random import random
@ -26,7 +25,7 @@ def _is_visible(p: Path) -> bool:
class DirectoryLoader(BaseLoader): class DirectoryLoader(BaseLoader):
"""Load documents from a directory.""" """Load from a directory."""
def __init__( def __init__(
self, self,

View File

@ -1,4 +1,3 @@
"""Load from Discord chat dump"""
from __future__ import annotations from __future__ import annotations
from typing import TYPE_CHECKING, List from typing import TYPE_CHECKING, List
@ -11,7 +10,7 @@ if TYPE_CHECKING:
class DiscordChatLoader(BaseLoader): class DiscordChatLoader(BaseLoader):
"""Load Discord chat logs.""" """Load `Discord` chat logs."""
def __init__(self, chat_log: pd.DataFrame, user_id_col: str = "ID"): def __init__(self, chat_log: pd.DataFrame, user_id_col: str = "ID"):
"""Initialize with a Pandas DataFrame containing chat logs. """Initialize with a Pandas DataFrame containing chat logs.

View File

@ -1,5 +1,3 @@
"""Loads processed documents from Docugami."""
import io import io
import logging import logging
import os import os
@ -29,7 +27,7 @@ logger = logging.getLogger(__name__)
class DocugamiLoader(BaseLoader, BaseModel): class DocugamiLoader(BaseLoader, BaseModel):
"""Loads processed docs from Docugami. """Load from `Docugami`.
To use, you should have the ``lxml`` python package installed. To use, you should have the ``lxml`` python package installed.
""" """

View File

@ -1,5 +1,3 @@
"""Loads data from Dropbox."""
# Prerequisites: # Prerequisites:
# 1. Create a Dropbox app. # 1. Create a Dropbox app.
# 2. Give the app these scope permissions: `files.metadata.read` # 2. Give the app these scope permissions: `files.metadata.read`
@ -20,7 +18,7 @@ from langchain.document_loaders.base import BaseLoader
class DropboxLoader(BaseLoader, BaseModel): class DropboxLoader(BaseLoader, BaseModel):
"""Loads files from Dropbox. """Load files from `Dropbox`.
In addition to common files such as text and PDF files, it also supports In addition to common files such as text and PDF files, it also supports
*Dropbox Paper* files. *Dropbox Paper* files.

View File

@ -5,7 +5,7 @@ from langchain.document_loaders.base import BaseLoader
class DuckDBLoader(BaseLoader): class DuckDBLoader(BaseLoader):
"""Loads a query result from DuckDB into a list of documents. """Load from `DuckDB`.
Each document represents one row of the result. The `page_content_columns` Each document represents one row of the result. The `page_content_columns`
are written into the `page_content` of the document. The `metadata_columns` are written into the `page_content` of the document. The `metadata_columns`

View File

@ -1,4 +1,3 @@
"""Loads email files."""
import os import os
from typing import Any, List from typing import Any, List
@ -11,7 +10,9 @@ from langchain.document_loaders.unstructured import (
class UnstructuredEmailLoader(UnstructuredFileLoader): class UnstructuredEmailLoader(UnstructuredFileLoader):
"""Loader that uses unstructured to load email files. Works with both """Load email files with `unstructured`.
Works with both
.eml and .msg files. You can process attachments in addition to the .eml and .msg files. You can process attachments in addition to the
e-mail message itself by passing process_attachments=True into the e-mail message itself by passing process_attachments=True into the
constructor for the loader. By default, attachments will be processed constructor for the loader. By default, attachments will be processed

View File

@ -52,7 +52,7 @@ class EmbaasDocumentExtractionPayload(EmbaasDocumentExtractionParameters):
class BaseEmbaasLoader(BaseModel): class BaseEmbaasLoader(BaseModel):
"""Base class for embedding a model into an Embaas document extraction API.""" """Base loader for embedding a model into an `Embaas` document extraction API."""
embaas_api_key: Optional[str] = None embaas_api_key: Optional[str] = None
"""The API key for the embaas document extraction API.""" """The API key for the embaas document extraction API."""
@ -72,7 +72,7 @@ class BaseEmbaasLoader(BaseModel):
class EmbaasBlobLoader(BaseEmbaasLoader, BaseBlobParser): class EmbaasBlobLoader(BaseEmbaasLoader, BaseBlobParser):
"""Embaas's document byte loader. """Load `Embaas` blob.
To use, you should have the To use, you should have the
environment variable ``EMBAAS_API_KEY`` set with your API key, or pass environment variable ``EMBAAS_API_KEY`` set with your API key, or pass
@ -178,7 +178,7 @@ class EmbaasBlobLoader(BaseEmbaasLoader, BaseBlobParser):
class EmbaasLoader(BaseEmbaasLoader, BaseLoader): class EmbaasLoader(BaseEmbaasLoader, BaseLoader):
"""Embaas's document loader. """Load from `Embaas`.
To use, you should have the To use, you should have the
environment variable ``EMBAAS_API_KEY`` set with your API key, or pass environment variable ``EMBAAS_API_KEY`` set with your API key, or pass

View File

@ -1,4 +1,3 @@
"""Loads EPub files."""
from typing import List from typing import List
from langchain.document_loaders.unstructured import ( from langchain.document_loaders.unstructured import (
@ -8,7 +7,7 @@ from langchain.document_loaders.unstructured import (
class UnstructuredEPubLoader(UnstructuredFileLoader): class UnstructuredEPubLoader(UnstructuredFileLoader):
"""Loader that uses Unstructured to load EPUB files. """Load `EPub` files using `Unstructured`.
You can run the loader in one of two modes: "single" and "elements". You can run the loader in one of two modes: "single" and "elements".
If you use "single" mode, the document will be returned as a single If you use "single" mode, the document will be returned as a single

View File

@ -9,8 +9,7 @@ from langchain.document_loaders.base import BaseLoader
class EtherscanLoader(BaseLoader): class EtherscanLoader(BaseLoader):
""" """Load transactions from `Ethereum` mainnet.
Load transactions from an account on Ethereum mainnet.
The Loader use Etherscan API to interact with Ethereum mainnet. The Loader use Etherscan API to interact with Ethereum mainnet.

View File

@ -15,7 +15,7 @@ logger = logging.getLogger(__name__)
class EverNoteLoader(BaseLoader): class EverNoteLoader(BaseLoader):
"""EverNote Loader. """Load from `EverNote`.
Loads an EverNote notebook export file e.g. my_notebook.enex into Documents. Loads an EverNote notebook export file e.g. my_notebook.enex into Documents.
Instructions on producing this file can be found at Instructions on producing this file can be found at

View File

@ -8,7 +8,9 @@ from langchain.document_loaders.unstructured import (
class UnstructuredExcelLoader(UnstructuredFileLoader): class UnstructuredExcelLoader(UnstructuredFileLoader):
"""Loader that uses unstructured to load Excel files. Like other """Load Microsoft Excel files using `Unstructured`.
Like other
Unstructured loaders, UnstructuredExcelLoader can be used in both Unstructured loaders, UnstructuredExcelLoader can be used in both
"single" and "elements" mode. If you use the loader in "elements" "single" and "elements" mode. If you use the loader in "elements"
mode, each sheet in the Excel file will be a an Unstructured Table mode, each sheet in the Excel file will be a an Unstructured Table

View File

@ -1,4 +1,3 @@
"""Loads Facebook chat json dump."""
import datetime import datetime
import json import json
from pathlib import Path from pathlib import Path
@ -23,7 +22,7 @@ def concatenate_rows(row: dict) -> str:
class FacebookChatLoader(BaseLoader): class FacebookChatLoader(BaseLoader):
"""Loads Facebook messages json directory dump.""" """Load `Facebook Chat` messages directory dump."""
def __init__(self, path: str): def __init__(self, path: str):
"""Initialize with a path.""" """Initialize with a path."""

View File

@ -5,7 +5,7 @@ from langchain.document_loaders.base import BaseLoader
class FaunaLoader(BaseLoader): class FaunaLoader(BaseLoader):
"""FaunaDB Loader. """Load from `FaunaDB`.
Attributes: Attributes:
query (str): The FQL query string to execute. query (str): The FQL query string to execute.

View File

@ -1,4 +1,3 @@
"""Loads Figma files json dump."""
import json import json
import urllib.request import urllib.request
from typing import Any, List from typing import Any, List
@ -9,7 +8,7 @@ from langchain.utils import stringify_dict
class FigmaFileLoader(BaseLoader): class FigmaFileLoader(BaseLoader):
"""Loads Figma file json.""" """Load `Figma` file."""
def __init__(self, access_token: str, ids: str, key: str): def __init__(self, access_token: str, ids: str, key: str):
"""Initialize with access token, ids, and key. """Initialize with access token, ids, and key.

View File

@ -1,4 +1,3 @@
"""Loading logic for loading documents from an GCS directory."""
from typing import Callable, List, Optional from typing import Callable, List, Optional
from langchain.docstore.document import Document from langchain.docstore.document import Document
@ -7,7 +6,7 @@ from langchain.document_loaders.gcs_file import GCSFileLoader
class GCSDirectoryLoader(BaseLoader): class GCSDirectoryLoader(BaseLoader):
"""Loads Documents from GCS.""" """Load from GCS directory."""
def __init__( def __init__(
self, self,

View File

@ -1,4 +1,3 @@
"""Load documents from a GCS file."""
import os import os
import tempfile import tempfile
from typing import Callable, List, Optional from typing import Callable, List, Optional
@ -9,7 +8,7 @@ from langchain.document_loaders.unstructured import UnstructuredFileLoader
class GCSFileLoader(BaseLoader): class GCSFileLoader(BaseLoader):
"""Load Documents from a GCS file.""" """Load from GCS file."""
def __init__( def __init__(
self, self,

View File

@ -15,7 +15,7 @@ DEFAULT = Literal["default"]
class GenericLoader(BaseLoader): class GenericLoader(BaseLoader):
"""A generic document loader. """Generic Document Loader.
A generic document loader that allows combining an arbitrary blob loader with A generic document loader that allows combining an arbitrary blob loader with
a blob parser. a blob parser.

View File

@ -1,4 +1,3 @@
"""Load from Dataframe object"""
from typing import Any, Iterator, List from typing import Any, Iterator, List
from langchain.docstore.document import Document from langchain.docstore.document import Document
@ -6,7 +5,7 @@ from langchain.document_loaders.base import BaseLoader
class GeoDataFrameLoader(BaseLoader): class GeoDataFrameLoader(BaseLoader):
"""Load geopandas Dataframe.""" """Load `geopandas` Dataframe."""
def __init__(self, data_frame: Any, page_content_column: str = "geometry"): def __init__(self, data_frame: Any, page_content_column: str = "geometry"):
"""Initialize with geopandas Dataframe. """Initialize with geopandas Dataframe.

View File

@ -6,7 +6,8 @@ from langchain.document_loaders.base import BaseLoader
class GitLoader(BaseLoader): class GitLoader(BaseLoader):
"""Loads files from a Git repository into a list of documents. """Load `Git` repository files.
The Repository can be local on disk available at `repo_path`, The Repository can be local on disk available at `repo_path`,
or remote at `clone_url` that will be cloned to `repo_path`. or remote at `clone_url` that will be cloned to `repo_path`.
Currently, supports only text files. Currently, supports only text files.

View File

@ -1,4 +1,3 @@
"""Loads GitBook."""
from typing import Any, List, Optional from typing import Any, List, Optional
from urllib.parse import urljoin, urlparse from urllib.parse import urljoin, urlparse
@ -7,7 +6,7 @@ from langchain.document_loaders.web_base import WebBaseLoader
class GitbookLoader(WebBaseLoader): class GitbookLoader(WebBaseLoader):
"""Load GitBook data. """Load `GitBook` data.
1. load from either a single page, or 1. load from either a single page, or
2. load all (relative) paths in the navbar. 2. load all (relative) paths in the navbar.

View File

@ -11,7 +11,7 @@ from langchain.utils import get_from_dict_or_env
class BaseGitHubLoader(BaseLoader, BaseModel, ABC): class BaseGitHubLoader(BaseLoader, BaseModel, ABC):
"""Load issues of a GitHub repository.""" """Load `GitHub` repository Issues."""
repo: str repo: str
"""Name of repository""" """Name of repository"""

View File

@ -1,5 +1,3 @@
"""Loads data from Google Drive."""
# Prerequisites: # Prerequisites:
# 1. Create a Google Cloud project # 1. Create a Google Cloud project
# 2. Enable the Google Drive API: # 2. Enable the Google Drive API:
@ -22,7 +20,7 @@ SCOPES = ["https://www.googleapis.com/auth/drive.readonly"]
class GoogleDriveLoader(BaseLoader, BaseModel): class GoogleDriveLoader(BaseLoader, BaseModel):
"""Loads Google Docs from Google Drive.""" """Load Google Docs from `Google Drive`."""
service_account_key: Path = Path.home() / ".credentials" / "keys.json" service_account_key: Path = Path.home() / ".credentials" / "keys.json"
"""Path to the service account key file.""" """Path to the service account key file."""

View File

@ -1,4 +1,3 @@
"""Loads .txt web files."""
from typing import List from typing import List
from langchain.docstore.document import Document from langchain.docstore.document import Document
@ -6,7 +5,7 @@ from langchain.document_loaders.base import BaseLoader
class GutenbergLoader(BaseLoader): class GutenbergLoader(BaseLoader):
"""Loader that uses urllib to load .txt web files.""" """Load from `Gutenberg.org`."""
def __init__(self, file_path: str): def __init__(self, file_path: str):
"""Initialize with a file path.""" """Initialize with a file path."""

View File

@ -1,4 +1,3 @@
"""Loads HN."""
from typing import Any, List from typing import Any, List
from langchain.docstore.document import Document from langchain.docstore.document import Document
@ -6,7 +5,9 @@ from langchain.document_loaders.web_base import WebBaseLoader
class HNLoader(WebBaseLoader): class HNLoader(WebBaseLoader):
"""Load Hacker News data from either main page results or the comments page.""" """Load `Hacker News` data.
It loads data from either main page results or the comments page."""
def load(self) -> List[Document]: def load(self) -> List[Document]:
"""Get important HN webpage information. """Get important HN webpage information.

View File

@ -1,11 +1,10 @@
"""Loader that uses unstructured to load HTML files."""
from typing import List from typing import List
from langchain.document_loaders.unstructured import UnstructuredFileLoader from langchain.document_loaders.unstructured import UnstructuredFileLoader
class UnstructuredHTMLLoader(UnstructuredFileLoader): class UnstructuredHTMLLoader(UnstructuredFileLoader):
"""Loader that uses Unstructured to load HTML files. """Load `HTML` files using `Unstructured`.
You can run the loader in one of two modes: "single" and "elements". You can run the loader in one of two modes: "single" and "elements".
If you use "single" mode, the document will be returned as a single If you use "single" mode, the document will be returned as a single

View File

@ -1,5 +1,3 @@
"""Loader that uses bs4 to load HTML files, enriching metadata with page title."""
import logging import logging
from typing import Dict, List, Union from typing import Dict, List, Union
@ -10,7 +8,7 @@ logger = logging.getLogger(__name__)
class BSHTMLLoader(BaseLoader): class BSHTMLLoader(BaseLoader):
"""Loader that uses beautiful soup to parse HTML files.""" """Load `HTML` files and parse them with `beautiful soup`."""
def __init__( def __init__(
self, self,