From 11195cfa42de7a038ae9cdcc7f179d595b7c8ab8 Mon Sep 17 00:00:00 2001 From: Leonid Ganeline Date: Mon, 11 Mar 2024 13:37:36 -0700 Subject: [PATCH] community[patch]: speed up import times in the community package (#18928) This PR speeds up import times in the community package --- .../langchain_community/callbacks/__init__.py | 95 ++- .../chat_loaders/__init__.py | 26 + .../chat_message_histories/__init__.py | 101 +-- .../chat_models/__init__.py | 145 ++--- .../langchain_community/docstore/__init__.py | 22 +- .../document_compressors/__init__.py | 21 +- .../document_loaders/__init__.py | 598 ++++++------------ .../document_loaders/blob_loaders/__init__.py | 26 +- .../document_loaders/parsers/__init__.py | 56 +- .../document_loaders/pdf.py | 4 + .../document_loaders/telegram.py | 4 + 11 files changed, 430 insertions(+), 668 deletions(-) diff --git a/libs/community/langchain_community/callbacks/__init__.py b/libs/community/langchain_community/callbacks/__init__.py index 79aa23a8886..f4cc9fc2037 100644 --- a/libs/community/langchain_community/callbacks/__init__.py +++ b/libs/community/langchain_community/callbacks/__init__.py @@ -6,63 +6,42 @@ BaseCallbackHandler --> CallbackHandler # Example: AimCallbackHandler """ +import importlib +from typing import Any -from langchain_community.callbacks.aim_callback import AimCallbackHandler -from langchain_community.callbacks.argilla_callback import ArgillaCallbackHandler -from langchain_community.callbacks.arize_callback import ArizeCallbackHandler -from langchain_community.callbacks.arthur_callback import ArthurCallbackHandler -from langchain_community.callbacks.clearml_callback import ClearMLCallbackHandler -from langchain_community.callbacks.comet_ml_callback import CometCallbackHandler -from langchain_community.callbacks.context_callback import ContextCallbackHandler -from langchain_community.callbacks.fiddler_callback import FiddlerCallbackHandler -from langchain_community.callbacks.flyte_callback import FlyteCallbackHandler -from langchain_community.callbacks.human import HumanApprovalCallbackHandler -from langchain_community.callbacks.infino_callback import InfinoCallbackHandler -from langchain_community.callbacks.labelstudio_callback import ( - LabelStudioCallbackHandler, -) -from langchain_community.callbacks.llmonitor_callback import LLMonitorCallbackHandler -from langchain_community.callbacks.manager import ( - get_openai_callback, - wandb_tracing_enabled, -) -from langchain_community.callbacks.mlflow_callback import MlflowCallbackHandler -from langchain_community.callbacks.openai_info import OpenAICallbackHandler -from langchain_community.callbacks.promptlayer_callback import ( - PromptLayerCallbackHandler, -) -from langchain_community.callbacks.sagemaker_callback import SageMakerCallbackHandler -from langchain_community.callbacks.streamlit import ( - LLMThoughtLabeler, - StreamlitCallbackHandler, -) -from langchain_community.callbacks.trubrics_callback import TrubricsCallbackHandler -from langchain_community.callbacks.wandb_callback import WandbCallbackHandler -from langchain_community.callbacks.whylabs_callback import WhyLabsCallbackHandler +_module_lookup = { + "AimCallbackHandler": "langchain_community.callbacks.aim_callback", + "ArgillaCallbackHandler": "langchain_community.callbacks.argilla_callback", + "ArizeCallbackHandler": "langchain_community.callbacks.arize_callback", + "ArthurCallbackHandler": "langchain_community.callbacks.arthur_callback", + "ClearMLCallbackHandler": "langchain_community.callbacks.clearml_callback", + "CometCallbackHandler": "langchain_community.callbacks.comet_ml_callback", + "ContextCallbackHandler": "langchain_community.callbacks.context_callback", + "FiddlerCallbackHandler": "langchain_community.callbacks.fiddler_callback", + "FlyteCallbackHandler": "langchain_community.callbacks.flyte_callback", + "HumanApprovalCallbackHandler": "langchain_community.callbacks.human", + "InfinoCallbackHandler": "langchain_community.callbacks.infino_callback", + "LLMThoughtLabeler": "langchain_community.callbacks.streamlit", + "LLMonitorCallbackHandler": "langchain_community.callbacks.llmonitor_callback", + "LabelStudioCallbackHandler": "langchain_community.callbacks.labelstudio_callback", + "MlflowCallbackHandler": "langchain_community.callbacks.mlflow_callback", + "OpenAICallbackHandler": "langchain_community.callbacks.openai_info", + "PromptLayerCallbackHandler": "langchain_community.callbacks.promptlayer_callback", + "SageMakerCallbackHandler": "langchain_community.callbacks.sagemaker_callback", + "StreamlitCallbackHandler": "langchain_community.callbacks.streamlit", + "TrubricsCallbackHandler": "langchain_community.callbacks.trubrics_callback", + "WandbCallbackHandler": "langchain_community.callbacks.wandb_callback", + "WhyLabsCallbackHandler": "langchain_community.callbacks.whylabs_callback", + "get_openai_callback": "langchain_community.callbacks.manager", + "wandb_tracing_enabled": "langchain_community.callbacks.manager", +} -__all__ = [ - "AimCallbackHandler", - "ArgillaCallbackHandler", - "ArizeCallbackHandler", - "PromptLayerCallbackHandler", - "ArthurCallbackHandler", - "ClearMLCallbackHandler", - "CometCallbackHandler", - "ContextCallbackHandler", - "HumanApprovalCallbackHandler", - "InfinoCallbackHandler", - "MlflowCallbackHandler", - "LLMonitorCallbackHandler", - "OpenAICallbackHandler", - "LLMThoughtLabeler", - "StreamlitCallbackHandler", - "WandbCallbackHandler", - "WhyLabsCallbackHandler", - "get_openai_callback", - "wandb_tracing_enabled", - "FlyteCallbackHandler", - "SageMakerCallbackHandler", - "LabelStudioCallbackHandler", - "TrubricsCallbackHandler", - "FiddlerCallbackHandler", -] + +def __getattr__(name: str) -> Any: + if name in _module_lookup: + module = importlib.import_module(_module_lookup[name]) + return getattr(module, name) + raise AttributeError(f"module {__name__} has no attribute {name}") + + +__all__ = list(_module_lookup.keys()) diff --git a/libs/community/langchain_community/chat_loaders/__init__.py b/libs/community/langchain_community/chat_loaders/__init__.py index 7547ddcecc8..42ddcc4098e 100644 --- a/libs/community/langchain_community/chat_loaders/__init__.py +++ b/libs/community/langchain_community/chat_loaders/__init__.py @@ -17,3 +17,29 @@ WhatsApp. The loaded chat messages can be used for fine-tuning models. ChatSession """ # noqa: E501 + +import importlib +from typing import Any + +_module_lookup = { + "BaseChatLoader": "langchain_community.chat_loaders.base", + "FolderFacebookMessengerChatLoader": "langchain_community.chat_loaders.facebook_messenger", # noqa: E501 + "GMailLoader": "langchain_community.chat_loaders.gmail", + "IMessageChatLoader": "langchain_community.chat_loaders.imessage", + "LangSmithDatasetChatLoader": "langchain_community.chat_loaders.langsmith", + "LangSmithRunChatLoader": "langchain_community.chat_loaders.langsmith", + "SingleFileFacebookMessengerChatLoader": "langchain_community.chat_loaders.facebook_messenger", # noqa: E501 + "SlackChatLoader": "langchain_community.chat_loaders.slack", + "TelegramChatLoader": "langchain_community.chat_loaders.telegram", + "WhatsAppChatLoader": "langchain_community.chat_loaders.whatsapp", +} + + +def __getattr__(name: str) -> Any: + if name in _module_lookup: + module = importlib.import_module(_module_lookup[name]) + return getattr(module, name) + raise AttributeError(f"module {__name__} has no attribute {name}") + + +__all__ = list(_module_lookup.keys()) diff --git a/libs/community/langchain_community/chat_message_histories/__init__.py b/libs/community/langchain_community/chat_message_histories/__init__.py index 592fcc649e9..195007d18f6 100644 --- a/libs/community/langchain_community/chat_message_histories/__init__.py +++ b/libs/community/langchain_community/chat_message_histories/__init__.py @@ -15,70 +15,39 @@ """ # noqa: E501 -from langchain_community.chat_message_histories.astradb import ( - AstraDBChatMessageHistory, -) -from langchain_community.chat_message_histories.cassandra import ( - CassandraChatMessageHistory, -) -from langchain_community.chat_message_histories.cosmos_db import ( - CosmosDBChatMessageHistory, -) -from langchain_community.chat_message_histories.dynamodb import ( - DynamoDBChatMessageHistory, -) -from langchain_community.chat_message_histories.elasticsearch import ( - ElasticsearchChatMessageHistory, -) -from langchain_community.chat_message_histories.file import FileChatMessageHistory -from langchain_community.chat_message_histories.firestore import ( - FirestoreChatMessageHistory, -) -from langchain_community.chat_message_histories.in_memory import ChatMessageHistory -from langchain_community.chat_message_histories.momento import MomentoChatMessageHistory -from langchain_community.chat_message_histories.mongodb import MongoDBChatMessageHistory -from langchain_community.chat_message_histories.neo4j import Neo4jChatMessageHistory -from langchain_community.chat_message_histories.postgres import ( - PostgresChatMessageHistory, -) -from langchain_community.chat_message_histories.redis import RedisChatMessageHistory -from langchain_community.chat_message_histories.rocksetdb import ( - RocksetChatMessageHistory, -) -from langchain_community.chat_message_histories.singlestoredb import ( - SingleStoreDBChatMessageHistory, -) -from langchain_community.chat_message_histories.sql import SQLChatMessageHistory -from langchain_community.chat_message_histories.streamlit import ( - StreamlitChatMessageHistory, -) -from langchain_community.chat_message_histories.tidb import TiDBChatMessageHistory -from langchain_community.chat_message_histories.upstash_redis import ( - UpstashRedisChatMessageHistory, -) -from langchain_community.chat_message_histories.xata import XataChatMessageHistory -from langchain_community.chat_message_histories.zep import ZepChatMessageHistory +import importlib +from typing import Any -__all__ = [ - "AstraDBChatMessageHistory", - "ChatMessageHistory", - "CassandraChatMessageHistory", - "CosmosDBChatMessageHistory", - "DynamoDBChatMessageHistory", - "ElasticsearchChatMessageHistory", - "FileChatMessageHistory", - "FirestoreChatMessageHistory", - "MomentoChatMessageHistory", - "MongoDBChatMessageHistory", - "PostgresChatMessageHistory", - "RedisChatMessageHistory", - "RocksetChatMessageHistory", - "SQLChatMessageHistory", - "StreamlitChatMessageHistory", - "SingleStoreDBChatMessageHistory", - "XataChatMessageHistory", - "ZepChatMessageHistory", - "UpstashRedisChatMessageHistory", - "Neo4jChatMessageHistory", - "TiDBChatMessageHistory", -] +_module_lookup = { + "AstraDBChatMessageHistory": "langchain_community.chat_message_histories.astradb", + "CassandraChatMessageHistory": "langchain_community.chat_message_histories.cassandra", # noqa: E501 + "ChatMessageHistory": "langchain_community.chat_message_histories.in_memory", + "CosmosDBChatMessageHistory": "langchain_community.chat_message_histories.cosmos_db", # noqa: E501 + "DynamoDBChatMessageHistory": "langchain_community.chat_message_histories.dynamodb", + "ElasticsearchChatMessageHistory": "langchain_community.chat_message_histories.elasticsearch", # noqa: E501 + "FileChatMessageHistory": "langchain_community.chat_message_histories.file", + "FirestoreChatMessageHistory": "langchain_community.chat_message_histories.firestore", # noqa: E501 + "MomentoChatMessageHistory": "langchain_community.chat_message_histories.momento", + "MongoDBChatMessageHistory": "langchain_community.chat_message_histories.mongodb", + "Neo4jChatMessageHistory": "langchain_community.chat_message_histories.neo4j", + "PostgresChatMessageHistory": "langchain_community.chat_message_histories.postgres", + "RedisChatMessageHistory": "langchain_community.chat_message_histories.redis", + "RocksetChatMessageHistory": "langchain_community.chat_message_histories.rocksetdb", + "SQLChatMessageHistory": "langchain_community.chat_message_histories.sql", + "SingleStoreDBChatMessageHistory": "langchain_community.chat_message_histories.singlestoredb", # noqa: E501 + "StreamlitChatMessageHistory": "langchain_community.chat_message_histories.streamlit", # noqa: E501 + "TiDBChatMessageHistory": "langchain_community.chat_message_histories.tidb", + "UpstashRedisChatMessageHistory": "langchain_community.chat_message_histories.upstash_redis", # noqa: E501 + "XataChatMessageHistory": "langchain_community.chat_message_histories.xata", + "ZepChatMessageHistory": "langchain_community.chat_message_histories.zep", +} + + +def __getattr__(name: str) -> Any: + if name in _module_lookup: + module = importlib.import_module(_module_lookup[name]) + return getattr(module, name) + raise AttributeError(f"module {__name__} has no attribute {name}") + + +__all__ = list(_module_lookup.keys()) diff --git a/libs/community/langchain_community/chat_models/__init__.py b/libs/community/langchain_community/chat_models/__init__.py index fb3495ef174..76ede64dc1a 100644 --- a/libs/community/langchain_community/chat_models/__init__.py +++ b/libs/community/langchain_community/chat_models/__init__.py @@ -17,92 +17,61 @@ an interface where "chat messages" are the inputs and outputs. AIMessage, BaseMessage, HumanMessage """ # noqa: E501 -from langchain_community.chat_models.anthropic import ChatAnthropic -from langchain_community.chat_models.anyscale import ChatAnyscale -from langchain_community.chat_models.azure_openai import AzureChatOpenAI -from langchain_community.chat_models.baichuan import ChatBaichuan -from langchain_community.chat_models.baidu_qianfan_endpoint import QianfanChatEndpoint -from langchain_community.chat_models.bedrock import BedrockChat -from langchain_community.chat_models.cohere import ChatCohere -from langchain_community.chat_models.databricks import ChatDatabricks -from langchain_community.chat_models.deepinfra import ChatDeepInfra -from langchain_community.chat_models.ernie import ErnieBotChat -from langchain_community.chat_models.everlyai import ChatEverlyAI -from langchain_community.chat_models.fake import FakeListChatModel -from langchain_community.chat_models.fireworks import ChatFireworks -from langchain_community.chat_models.friendli import ChatFriendli -from langchain_community.chat_models.gigachat import GigaChat -from langchain_community.chat_models.google_palm import ChatGooglePalm -from langchain_community.chat_models.gpt_router import GPTRouter -from langchain_community.chat_models.huggingface import ChatHuggingFace -from langchain_community.chat_models.human import HumanInputChatModel -from langchain_community.chat_models.hunyuan import ChatHunyuan -from langchain_community.chat_models.javelin_ai_gateway import ChatJavelinAIGateway -from langchain_community.chat_models.jinachat import JinaChat -from langchain_community.chat_models.kinetica import ChatKinetica -from langchain_community.chat_models.konko import ChatKonko -from langchain_community.chat_models.litellm import ChatLiteLLM -from langchain_community.chat_models.litellm_router import ChatLiteLLMRouter -from langchain_community.chat_models.llama_edge import LlamaEdgeChatService -from langchain_community.chat_models.maritalk import ChatMaritalk -from langchain_community.chat_models.minimax import MiniMaxChat -from langchain_community.chat_models.mlflow import ChatMlflow -from langchain_community.chat_models.mlflow_ai_gateway import ChatMLflowAIGateway -from langchain_community.chat_models.ollama import ChatOllama -from langchain_community.chat_models.openai import ChatOpenAI -from langchain_community.chat_models.pai_eas_endpoint import PaiEasChatEndpoint -from langchain_community.chat_models.perplexity import ChatPerplexity -from langchain_community.chat_models.promptlayer_openai import PromptLayerChatOpenAI -from langchain_community.chat_models.sparkllm import ChatSparkLLM -from langchain_community.chat_models.tongyi import ChatTongyi -from langchain_community.chat_models.vertexai import ChatVertexAI -from langchain_community.chat_models.volcengine_maas import VolcEngineMaasChat -from langchain_community.chat_models.yandex import ChatYandexGPT -from langchain_community.chat_models.yuan2 import ChatYuan2 -from langchain_community.chat_models.zhipuai import ChatZhipuAI +import importlib +from typing import Any -__all__ = [ - "LlamaEdgeChatService", - "ChatOpenAI", - "BedrockChat", - "AzureChatOpenAI", - "FakeListChatModel", - "PromptLayerChatOpenAI", - "ChatDatabricks", - "ChatDeepInfra", - "ChatEverlyAI", - "ChatAnthropic", - "ChatCohere", - "ChatGooglePalm", - "ChatMlflow", - "ChatMLflowAIGateway", - "ChatOllama", - "ChatVertexAI", - "JinaChat", - "ChatHuggingFace", - "HumanInputChatModel", - "MiniMaxChat", - "ChatAnyscale", - "ChatLiteLLM", - "ChatLiteLLMRouter", - "ErnieBotChat", - "ChatJavelinAIGateway", - "ChatKonko", - "PaiEasChatEndpoint", - "QianfanChatEndpoint", - "ChatTongyi", - "ChatFireworks", - "ChatYandexGPT", - "ChatBaichuan", - "ChatHunyuan", - "ChatFriendli", - "GigaChat", - "ChatSparkLLM", - "VolcEngineMaasChat", - "GPTRouter", - "ChatYuan2", - "ChatZhipuAI", - "ChatPerplexity", - "ChatKinetica", - "ChatMaritalk", -] +_module_lookup = { + "AzureChatOpenAI": "langchain_community.chat_models.azure_openai", + "BedrockChat": "langchain_community.chat_models.bedrock", + "ChatAnthropic": "langchain_community.chat_models.anthropic", + "ChatAnyscale": "langchain_community.chat_models.anyscale", + "ChatBaichuan": "langchain_community.chat_models.baichuan", + "ChatCohere": "langchain_community.chat_models.cohere", + "ChatDatabricks": "langchain_community.chat_models.databricks", + "ChatDeepInfra": "langchain_community.chat_models.deepinfra", + "ChatEverlyAI": "langchain_community.chat_models.everlyai", + "ChatFireworks": "langchain_community.chat_models.fireworks", + "ChatFriendli": "langchain_community.chat_models.friendli", + "ChatGooglePalm": "langchain_community.chat_models.google_palm", + "ChatHuggingFace": "langchain_community.chat_models.huggingface", + "ChatHunyuan": "langchain_community.chat_models.hunyuan", + "ChatJavelinAIGateway": "langchain_community.chat_models.javelin_ai_gateway", + "ChatKinetica": "langchain_community.chat_models.kinetica", + "ChatKonko": "langchain_community.chat_models.konko", + "ChatLiteLLM": "langchain_community.chat_models.litellm", + "ChatLiteLLMRouter": "langchain_community.chat_models.litellm_router", + "ChatMLflowAIGateway": "langchain_community.chat_models.mlflow_ai_gateway", + "ChatMaritalk": "langchain_community.chat_models.maritalk", + "ChatMlflow": "langchain_community.chat_models.mlflow", + "ChatOllama": "langchain_community.chat_models.ollama", + "ChatOpenAI": "langchain_community.chat_models.openai", + "ChatPerplexity": "langchain_community.chat_models.perplexity", + "ChatSparkLLM": "langchain_community.chat_models.sparkllm", + "ChatTongyi": "langchain_community.chat_models.tongyi", + "ChatVertexAI": "langchain_community.chat_models.vertexai", + "ChatYandexGPT": "langchain_community.chat_models.yandex", + "ChatYuan2": "langchain_community.chat_models.yuan2", + "ChatZhipuAI": "langchain_community.chat_models.zhipuai", + "ErnieBotChat": "langchain_community.chat_models.ernie", + "FakeListChatModel": "langchain_community.chat_models.fake", + "GPTRouter": "langchain_community.chat_models.gpt_router", + "GigaChat": "langchain_community.chat_models.gigachat", + "HumanInputChatModel": "langchain_community.chat_models.human", + "JinaChat": "langchain_community.chat_models.jinachat", + "LlamaEdgeChatService": "langchain_community.chat_models.llama_edge", + "MiniMaxChat": "langchain_community.chat_models.minimax", + "PaiEasChatEndpoint": "langchain_community.chat_models.pai_eas_endpoint", + "PromptLayerChatOpenAI": "langchain_community.chat_models.promptlayer_openai", + "QianfanChatEndpoint": "langchain_community.chat_models.baidu_qianfan_endpoint", + "VolcEngineMaasChat": "langchain_community.chat_models.volcengine_maas", +} + + +def __getattr__(name: str) -> Any: + if name in _module_lookup: + module = importlib.import_module(_module_lookup[name]) + return getattr(module, name) + raise AttributeError(f"module {__name__} has no attribute {name}") + + +__all__ = list(_module_lookup.keys()) diff --git a/libs/community/langchain_community/docstore/__init__.py b/libs/community/langchain_community/docstore/__init__.py index 1de54381661..a8a2bfab831 100644 --- a/libs/community/langchain_community/docstore/__init__.py +++ b/libs/community/langchain_community/docstore/__init__.py @@ -14,8 +14,22 @@ The **Docstore** is a simplified version of the Document Loader. Document, AddableMixin """ -from langchain_community.docstore.arbitrary_fn import DocstoreFn -from langchain_community.docstore.in_memory import InMemoryDocstore -from langchain_community.docstore.wikipedia import Wikipedia -__all__ = ["DocstoreFn", "InMemoryDocstore", "Wikipedia"] +import importlib +from typing import Any + +_module_lookup = { + "DocstoreFn": "langchain_community.docstore.arbitrary_fn", + "InMemoryDocstore": "langchain_community.docstore.in_memory", + "Wikipedia": "langchain_community.docstore.wikipedia", +} + + +def __getattr__(name: str) -> Any: + if name in _module_lookup: + module = importlib.import_module(_module_lookup[name]) + return getattr(module, name) + raise AttributeError(f"module {__name__} has no attribute {name}") + + +__all__ = list(_module_lookup.keys()) diff --git a/libs/community/langchain_community/document_compressors/__init__.py b/libs/community/langchain_community/document_compressors/__init__.py index 6be1bb544f5..6820dc4b4a5 100644 --- a/libs/community/langchain_community/document_compressors/__init__.py +++ b/libs/community/langchain_community/document_compressors/__init__.py @@ -1,7 +1,16 @@ -from langchain_community.document_compressors.llmlingua_filter import ( - LLMLinguaCompressor, -) +import importlib +from typing import Any -__all__ = [ - "LLMLinguaCompressor", -] +_module_lookup = { + "LLMLinguaCompressor": "langchain_community.document_compressors.llmlingua_filter", +} + + +def __getattr__(name: str) -> Any: + if name in _module_lookup: + module = importlib.import_module(_module_lookup[name]) + return getattr(module, name) + raise AttributeError(f"module {__name__} has no attribute {name}") + + +__all__ = list(_module_lookup.keys()) diff --git a/libs/community/langchain_community/document_loaders/__init__.py b/libs/community/langchain_community/document_loaders/__init__.py index b6abd537a3d..973981c1111 100644 --- a/libs/community/langchain_community/document_loaders/__init__.py +++ b/libs/community/langchain_community/document_loaders/__init__.py @@ -14,415 +14,197 @@ Document, TextSplitter """ +import importlib +from typing import Any -from langchain_community.document_loaders.acreom import AcreomLoader -from langchain_community.document_loaders.airbyte import ( - AirbyteCDKLoader, - AirbyteGongLoader, - AirbyteHubspotLoader, - AirbyteSalesforceLoader, - AirbyteShopifyLoader, - AirbyteStripeLoader, - AirbyteTypeformLoader, - AirbyteZendeskSupportLoader, -) -from langchain_community.document_loaders.airbyte_json import AirbyteJSONLoader -from langchain_community.document_loaders.airtable import AirtableLoader -from langchain_community.document_loaders.apify_dataset import ApifyDatasetLoader -from langchain_community.document_loaders.arcgis_loader import ArcGISLoader -from langchain_community.document_loaders.arxiv import ArxivLoader -from langchain_community.document_loaders.assemblyai import ( - AssemblyAIAudioLoaderById, - AssemblyAIAudioTranscriptLoader, -) -from langchain_community.document_loaders.astradb import AstraDBLoader -from langchain_community.document_loaders.async_html import AsyncHtmlLoader -from langchain_community.document_loaders.athena import AthenaLoader -from langchain_community.document_loaders.azlyrics import AZLyricsLoader -from langchain_community.document_loaders.azure_ai_data import ( - AzureAIDataLoader, -) -from langchain_community.document_loaders.azure_blob_storage_container import ( - AzureBlobStorageContainerLoader, -) -from langchain_community.document_loaders.azure_blob_storage_file import ( - AzureBlobStorageFileLoader, -) -from langchain_community.document_loaders.bibtex import BibtexLoader -from langchain_community.document_loaders.bigquery import BigQueryLoader -from langchain_community.document_loaders.bilibili import BiliBiliLoader -from langchain_community.document_loaders.blackboard import BlackboardLoader -from langchain_community.document_loaders.blob_loaders import ( - Blob, - BlobLoader, - FileSystemBlobLoader, - YoutubeAudioLoader, -) -from langchain_community.document_loaders.blockchain import BlockchainDocumentLoader -from langchain_community.document_loaders.brave_search import BraveSearchLoader -from langchain_community.document_loaders.browserless import BrowserlessLoader -from langchain_community.document_loaders.cassandra import CassandraLoader -from langchain_community.document_loaders.chatgpt import ChatGPTLoader -from langchain_community.document_loaders.chromium import AsyncChromiumLoader -from langchain_community.document_loaders.college_confidential import ( - CollegeConfidentialLoader, -) -from langchain_community.document_loaders.concurrent import ConcurrentLoader -from langchain_community.document_loaders.confluence import ConfluenceLoader -from langchain_community.document_loaders.conllu import CoNLLULoader -from langchain_community.document_loaders.couchbase import CouchbaseLoader -from langchain_community.document_loaders.csv_loader import ( - CSVLoader, - UnstructuredCSVLoader, -) -from langchain_community.document_loaders.cube_semantic import CubeSemanticLoader -from langchain_community.document_loaders.datadog_logs import DatadogLogsLoader -from langchain_community.document_loaders.dataframe import DataFrameLoader -from langchain_community.document_loaders.diffbot import DiffbotLoader -from langchain_community.document_loaders.directory import DirectoryLoader -from langchain_community.document_loaders.discord import DiscordChatLoader -from langchain_community.document_loaders.doc_intelligence import ( - AzureAIDocumentIntelligenceLoader, -) -from langchain_community.document_loaders.docugami import DocugamiLoader -from langchain_community.document_loaders.docusaurus import DocusaurusLoader -from langchain_community.document_loaders.dropbox import DropboxLoader -from langchain_community.document_loaders.duckdb_loader import DuckDBLoader -from langchain_community.document_loaders.email import ( - OutlookMessageLoader, - UnstructuredEmailLoader, -) -from langchain_community.document_loaders.epub import UnstructuredEPubLoader -from langchain_community.document_loaders.etherscan import EtherscanLoader -from langchain_community.document_loaders.evernote import EverNoteLoader -from langchain_community.document_loaders.excel import UnstructuredExcelLoader -from langchain_community.document_loaders.facebook_chat import FacebookChatLoader -from langchain_community.document_loaders.fauna import FaunaLoader -from langchain_community.document_loaders.figma import FigmaFileLoader -from langchain_community.document_loaders.gcs_directory import GCSDirectoryLoader -from langchain_community.document_loaders.gcs_file import GCSFileLoader -from langchain_community.document_loaders.geodataframe import GeoDataFrameLoader -from langchain_community.document_loaders.git import GitLoader -from langchain_community.document_loaders.gitbook import GitbookLoader -from langchain_community.document_loaders.github import ( - GithubFileLoader, - GitHubIssuesLoader, -) -from langchain_community.document_loaders.google_speech_to_text import ( - GoogleSpeechToTextLoader, -) -from langchain_community.document_loaders.googledrive import GoogleDriveLoader -from langchain_community.document_loaders.gutenberg import GutenbergLoader -from langchain_community.document_loaders.hn import HNLoader -from langchain_community.document_loaders.html import UnstructuredHTMLLoader -from langchain_community.document_loaders.html_bs import BSHTMLLoader -from langchain_community.document_loaders.hugging_face_dataset import ( - HuggingFaceDatasetLoader, -) -from langchain_community.document_loaders.hugging_face_model import ( - HuggingFaceModelLoader, -) -from langchain_community.document_loaders.ifixit import IFixitLoader -from langchain_community.document_loaders.image import UnstructuredImageLoader -from langchain_community.document_loaders.image_captions import ImageCaptionLoader -from langchain_community.document_loaders.imsdb import IMSDbLoader -from langchain_community.document_loaders.iugu import IuguLoader -from langchain_community.document_loaders.joplin import JoplinLoader -from langchain_community.document_loaders.json_loader import JSONLoader -from langchain_community.document_loaders.lakefs import LakeFSLoader -from langchain_community.document_loaders.larksuite import LarkSuiteDocLoader -from langchain_community.document_loaders.markdown import UnstructuredMarkdownLoader -from langchain_community.document_loaders.mastodon import MastodonTootsLoader -from langchain_community.document_loaders.max_compute import MaxComputeLoader -from langchain_community.document_loaders.mediawikidump import MWDumpLoader -from langchain_community.document_loaders.merge import MergedDataLoader -from langchain_community.document_loaders.mhtml import MHTMLLoader -from langchain_community.document_loaders.modern_treasury import ModernTreasuryLoader -from langchain_community.document_loaders.mongodb import MongodbLoader -from langchain_community.document_loaders.news import NewsURLLoader -from langchain_community.document_loaders.notebook import NotebookLoader -from langchain_community.document_loaders.notion import NotionDirectoryLoader -from langchain_community.document_loaders.notiondb import NotionDBLoader -from langchain_community.document_loaders.obs_directory import OBSDirectoryLoader -from langchain_community.document_loaders.obs_file import OBSFileLoader -from langchain_community.document_loaders.obsidian import ObsidianLoader -from langchain_community.document_loaders.odt import UnstructuredODTLoader -from langchain_community.document_loaders.onedrive import OneDriveLoader -from langchain_community.document_loaders.onedrive_file import OneDriveFileLoader -from langchain_community.document_loaders.open_city_data import OpenCityDataLoader -from langchain_community.document_loaders.org_mode import UnstructuredOrgModeLoader -from langchain_community.document_loaders.pdf import ( - AmazonTextractPDFLoader, - MathpixPDFLoader, - OnlinePDFLoader, - PDFMinerLoader, - PDFMinerPDFasHTMLLoader, - PDFPlumberLoader, - PyMuPDFLoader, - PyPDFDirectoryLoader, - PyPDFium2Loader, - PyPDFLoader, - UnstructuredPDFLoader, -) -from langchain_community.document_loaders.pebblo import PebbloSafeLoader -from langchain_community.document_loaders.polars_dataframe import PolarsDataFrameLoader -from langchain_community.document_loaders.powerpoint import UnstructuredPowerPointLoader -from langchain_community.document_loaders.psychic import PsychicLoader -from langchain_community.document_loaders.pubmed import PubMedLoader -from langchain_community.document_loaders.pyspark_dataframe import ( - PySparkDataFrameLoader, -) -from langchain_community.document_loaders.python import PythonLoader -from langchain_community.document_loaders.readthedocs import ReadTheDocsLoader -from langchain_community.document_loaders.recursive_url_loader import RecursiveUrlLoader -from langchain_community.document_loaders.reddit import RedditPostsLoader -from langchain_community.document_loaders.roam import RoamLoader -from langchain_community.document_loaders.rocksetdb import RocksetLoader -from langchain_community.document_loaders.rss import RSSFeedLoader -from langchain_community.document_loaders.rst import UnstructuredRSTLoader -from langchain_community.document_loaders.rtf import UnstructuredRTFLoader -from langchain_community.document_loaders.s3_directory import S3DirectoryLoader -from langchain_community.document_loaders.s3_file import S3FileLoader -from langchain_community.document_loaders.sharepoint import SharePointLoader -from langchain_community.document_loaders.sitemap import SitemapLoader -from langchain_community.document_loaders.slack_directory import SlackDirectoryLoader -from langchain_community.document_loaders.snowflake_loader import SnowflakeLoader -from langchain_community.document_loaders.spreedly import SpreedlyLoader -from langchain_community.document_loaders.sql_database import SQLDatabaseLoader -from langchain_community.document_loaders.srt import SRTLoader -from langchain_community.document_loaders.stripe import StripeLoader -from langchain_community.document_loaders.surrealdb import SurrealDBLoader -from langchain_community.document_loaders.telegram import ( - TelegramChatApiLoader, - TelegramChatFileLoader, -) -from langchain_community.document_loaders.tencent_cos_directory import ( - TencentCOSDirectoryLoader, -) -from langchain_community.document_loaders.tencent_cos_file import TencentCOSFileLoader -from langchain_community.document_loaders.tensorflow_datasets import ( - TensorflowDatasetLoader, -) -from langchain_community.document_loaders.text import TextLoader -from langchain_community.document_loaders.tidb import TiDBLoader -from langchain_community.document_loaders.tomarkdown import ToMarkdownLoader -from langchain_community.document_loaders.toml import TomlLoader -from langchain_community.document_loaders.trello import TrelloLoader -from langchain_community.document_loaders.tsv import UnstructuredTSVLoader -from langchain_community.document_loaders.twitter import TwitterTweetLoader -from langchain_community.document_loaders.unstructured import ( - UnstructuredAPIFileIOLoader, - UnstructuredAPIFileLoader, - UnstructuredFileIOLoader, - UnstructuredFileLoader, -) -from langchain_community.document_loaders.url import UnstructuredURLLoader -from langchain_community.document_loaders.url_playwright import PlaywrightURLLoader -from langchain_community.document_loaders.url_selenium import SeleniumURLLoader -from langchain_community.document_loaders.vsdx import VsdxLoader -from langchain_community.document_loaders.weather import WeatherDataLoader -from langchain_community.document_loaders.web_base import WebBaseLoader -from langchain_community.document_loaders.whatsapp_chat import WhatsAppChatLoader -from langchain_community.document_loaders.wikipedia import WikipediaLoader -from langchain_community.document_loaders.word_document import ( - Docx2txtLoader, - UnstructuredWordDocumentLoader, -) -from langchain_community.document_loaders.xml import UnstructuredXMLLoader -from langchain_community.document_loaders.xorbits import XorbitsLoader -from langchain_community.document_loaders.youtube import ( - GoogleApiClient, - GoogleApiYoutubeLoader, - YoutubeLoader, -) -from langchain_community.document_loaders.yuque import YuqueLoader +_module_lookup = { + "AZLyricsLoader": "langchain_community.document_loaders.azlyrics", + "AcreomLoader": "langchain_community.document_loaders.acreom", + "AirbyteCDKLoader": "langchain_community.document_loaders.airbyte", + "AirbyteGongLoader": "langchain_community.document_loaders.airbyte", + "AirbyteHubspotLoader": "langchain_community.document_loaders.airbyte", + "AirbyteJSONLoader": "langchain_community.document_loaders.airbyte_json", + "AirbyteSalesforceLoader": "langchain_community.document_loaders.airbyte", + "AirbyteShopifyLoader": "langchain_community.document_loaders.airbyte", + "AirbyteStripeLoader": "langchain_community.document_loaders.airbyte", + "AirbyteTypeformLoader": "langchain_community.document_loaders.airbyte", + "AirbyteZendeskSupportLoader": "langchain_community.document_loaders.airbyte", + "AirtableLoader": "langchain_community.document_loaders.airtable", + "AmazonTextractPDFLoader": "langchain_community.document_loaders.pdf", + "ApifyDatasetLoader": "langchain_community.document_loaders.apify_dataset", + "ArcGISLoader": "langchain_community.document_loaders.arcgis_loader", + "ArxivLoader": "langchain_community.document_loaders.arxiv", + "AssemblyAIAudioLoaderById": "langchain_community.document_loaders.assemblyai", + "AssemblyAIAudioTranscriptLoader": "langchain_community.document_loaders.assemblyai", # noqa: E501 + "AstraDBLoader": "langchain_community.document_loaders.astradb", + "AsyncChromiumLoader": "langchain_community.document_loaders.chromium", + "AsyncHtmlLoader": "langchain_community.document_loaders.async_html", + "AthenaLoader": "langchain_community.document_loaders.athena", + "AzureAIDataLoader": "langchain_community.document_loaders.azure_ai_data", + "AzureAIDocumentIntelligenceLoader": "langchain_community.document_loaders.doc_intelligence", # noqa: E501 + "AzureBlobStorageContainerLoader": "langchain_community.document_loaders.azure_blob_storage_container", # noqa: E501 + "AzureBlobStorageFileLoader": "langchain_community.document_loaders.azure_blob_storage_file", # noqa: E501 + "BSHTMLLoader": "langchain_community.document_loaders.html_bs", + "BibtexLoader": "langchain_community.document_loaders.bibtex", + "BigQueryLoader": "langchain_community.document_loaders.bigquery", + "BiliBiliLoader": "langchain_community.document_loaders.bilibili", + "BlackboardLoader": "langchain_community.document_loaders.blackboard", + "Blob": "langchain_community.document_loaders.blob_loaders", + "BlobLoader": "langchain_community.document_loaders.blob_loaders", + "BlockchainDocumentLoader": "langchain_community.document_loaders.blockchain", + "BraveSearchLoader": "langchain_community.document_loaders.brave_search", + "BrowserlessLoader": "langchain_community.document_loaders.browserless", + "CSVLoader": "langchain_community.document_loaders.csv_loader", + "CassandraLoader": "langchain_community.document_loaders.cassandra", + "ChatGPTLoader": "langchain_community.document_loaders.chatgpt", + "CoNLLULoader": "langchain_community.document_loaders.conllu", + "CollegeConfidentialLoader": "langchain_community.document_loaders.college_confidential", # noqa: E501 + "ConcurrentLoader": "langchain_community.document_loaders.concurrent", + "ConfluenceLoader": "langchain_community.document_loaders.confluence", + "CouchbaseLoader": "langchain_community.document_loaders.couchbase", + "CubeSemanticLoader": "langchain_community.document_loaders.cube_semantic", + "DataFrameLoader": "langchain_community.document_loaders.dataframe", + "DatadogLogsLoader": "langchain_community.document_loaders.datadog_logs", + "DiffbotLoader": "langchain_community.document_loaders.diffbot", + "DirectoryLoader": "langchain_community.document_loaders.directory", + "DiscordChatLoader": "langchain_community.document_loaders.discord", + "DocugamiLoader": "langchain_community.document_loaders.docugami", + "DocusaurusLoader": "langchain_community.document_loaders.docusaurus", + "Docx2txtLoader": "langchain_community.document_loaders.word_document", + "DropboxLoader": "langchain_community.document_loaders.dropbox", + "DuckDBLoader": "langchain_community.document_loaders.duckdb_loader", + "EtherscanLoader": "langchain_community.document_loaders.etherscan", + "EverNoteLoader": "langchain_community.document_loaders.evernote", + "FacebookChatLoader": "langchain_community.document_loaders.facebook_chat", + "FaunaLoader": "langchain_community.document_loaders.fauna", + "FigmaFileLoader": "langchain_community.document_loaders.figma", + "FileSystemBlobLoader": "langchain_community.document_loaders.blob_loaders", + "GCSDirectoryLoader": "langchain_community.document_loaders.gcs_directory", + "GCSFileLoader": "langchain_community.document_loaders.gcs_file", + "GeoDataFrameLoader": "langchain_community.document_loaders.geodataframe", + "GitHubIssuesLoader": "langchain_community.document_loaders.github", + "GitLoader": "langchain_community.document_loaders.git", + "GitbookLoader": "langchain_community.document_loaders.gitbook", + "GithubFileLoader": "langchain_community.document_loaders.github", + "GoogleApiClient": "langchain_community.document_loaders.youtube", + "GoogleApiYoutubeLoader": "langchain_community.document_loaders.youtube", + "GoogleDriveLoader": "langchain_community.document_loaders.googledrive", + "GoogleSpeechToTextLoader": "langchain_community.document_loaders.google_speech_to_text", # noqa: E501 + "GutenbergLoader": "langchain_community.document_loaders.gutenberg", + "HNLoader": "langchain_community.document_loaders.hn", + "HuggingFaceDatasetLoader": "langchain_community.document_loaders.hugging_face_dataset", # noqa: E501 + "HuggingFaceModelLoader": "langchain_community.document_loaders.hugging_face_model", + "IFixitLoader": "langchain_community.document_loaders.ifixit", + "IMSDbLoader": "langchain_community.document_loaders.imsdb", + "ImageCaptionLoader": "langchain_community.document_loaders.image_captions", + "IuguLoader": "langchain_community.document_loaders.iugu", + "JSONLoader": "langchain_community.document_loaders.json_loader", + "JoplinLoader": "langchain_community.document_loaders.joplin", + "LakeFSLoader": "langchain_community.document_loaders.lakefs", + "LarkSuiteDocLoader": "langchain_community.document_loaders.larksuite", + "MHTMLLoader": "langchain_community.document_loaders.mhtml", + "MWDumpLoader": "langchain_community.document_loaders.mediawikidump", + "MastodonTootsLoader": "langchain_community.document_loaders.mastodon", + "MathpixPDFLoader": "langchain_community.document_loaders.pdf", + "MaxComputeLoader": "langchain_community.document_loaders.max_compute", + "MergedDataLoader": "langchain_community.document_loaders.merge", + "ModernTreasuryLoader": "langchain_community.document_loaders.modern_treasury", + "MongodbLoader": "langchain_community.document_loaders.mongodb", + "NewsURLLoader": "langchain_community.document_loaders.news", + "NotebookLoader": "langchain_community.document_loaders.notebook", + "NotionDBLoader": "langchain_community.document_loaders.notiondb", + "NotionDirectoryLoader": "langchain_community.document_loaders.notion", + "OBSDirectoryLoader": "langchain_community.document_loaders.obs_directory", + "OBSFileLoader": "langchain_community.document_loaders.obs_file", + "ObsidianLoader": "langchain_community.document_loaders.obsidian", + "OneDriveFileLoader": "langchain_community.document_loaders.onedrive_file", + "OneDriveLoader": "langchain_community.document_loaders.onedrive", + "OnlinePDFLoader": "langchain_community.document_loaders.pdf", + "OpenCityDataLoader": "langchain_community.document_loaders.open_city_data", + "OutlookMessageLoader": "langchain_community.document_loaders.email", + "PDFMinerLoader": "langchain_community.document_loaders.pdf", + "PDFMinerPDFasHTMLLoader": "langchain_community.document_loaders.pdf", + "PDFPlumberLoader": "langchain_community.document_loaders.pdf", + "PagedPDFSplitter": "langchain_community.document_loaders.pdf", + "PebbloSafeLoader": "langchain_community.document_loaders.pebblo", + "PlaywrightURLLoader": "langchain_community.document_loaders.url_playwright", + "PolarsDataFrameLoader": "langchain_community.document_loaders.polars_dataframe", + "PsychicLoader": "langchain_community.document_loaders.psychic", + "PubMedLoader": "langchain_community.document_loaders.pubmed", + "PyMuPDFLoader": "langchain_community.document_loaders.pdf", + "PyPDFDirectoryLoader": "langchain_community.document_loaders.pdf", + "PyPDFLoader": "langchain_community.document_loaders.pdf", + "PyPDFium2Loader": "langchain_community.document_loaders.pdf", + "PySparkDataFrameLoader": "langchain_community.document_loaders.pyspark_dataframe", + "PythonLoader": "langchain_community.document_loaders.python", + "RSSFeedLoader": "langchain_community.document_loaders.rss", + "ReadTheDocsLoader": "langchain_community.document_loaders.readthedocs", + "RecursiveUrlLoader": "langchain_community.document_loaders.recursive_url_loader", + "RedditPostsLoader": "langchain_community.document_loaders.reddit", + "RoamLoader": "langchain_community.document_loaders.roam", + "RocksetLoader": "langchain_community.document_loaders.rocksetdb", + "S3DirectoryLoader": "langchain_community.document_loaders.s3_directory", + "S3FileLoader": "langchain_community.document_loaders.s3_file", + "SQLDatabaseLoader": "langchain_community.document_loaders.sql_database", + "SRTLoader": "langchain_community.document_loaders.srt", + "SeleniumURLLoader": "langchain_community.document_loaders.url_selenium", + "SharePointLoader": "langchain_community.document_loaders.sharepoint", + "SitemapLoader": "langchain_community.document_loaders.sitemap", + "SlackDirectoryLoader": "langchain_community.document_loaders.slack_directory", + "SnowflakeLoader": "langchain_community.document_loaders.snowflake_loader", + "SpreedlyLoader": "langchain_community.document_loaders.spreedly", + "StripeLoader": "langchain_community.document_loaders.stripe", + "SurrealDBLoader": "langchain_community.document_loaders.surrealdb", + "TelegramChatApiLoader": "langchain_community.document_loaders.telegram", + "TelegramChatFileLoader": "langchain_community.document_loaders.telegram", + "TelegramChatLoader": "langchain_community.document_loaders.telegram", + "TencentCOSDirectoryLoader": "langchain_community.document_loaders.tencent_cos_directory", # noqa: E501 + "TencentCOSFileLoader": "langchain_community.document_loaders.tencent_cos_file", + "TensorflowDatasetLoader": "langchain_community.document_loaders.tensorflow_datasets", # noqa: E501 + "TextLoader": "langchain_community.document_loaders.text", + "TiDBLoader": "langchain_community.document_loaders.tidb", + "ToMarkdownLoader": "langchain_community.document_loaders.tomarkdown", + "TomlLoader": "langchain_community.document_loaders.toml", + "TrelloLoader": "langchain_community.document_loaders.trello", + "TwitterTweetLoader": "langchain_community.document_loaders.twitter", + "UnstructuredAPIFileIOLoader": "langchain_community.document_loaders.unstructured", + "UnstructuredAPIFileLoader": "langchain_community.document_loaders.unstructured", + "UnstructuredCSVLoader": "langchain_community.document_loaders.csv_loader", + "UnstructuredEPubLoader": "langchain_community.document_loaders.epub", + "UnstructuredEmailLoader": "langchain_community.document_loaders.email", + "UnstructuredExcelLoader": "langchain_community.document_loaders.excel", + "UnstructuredFileIOLoader": "langchain_community.document_loaders.unstructured", + "UnstructuredFileLoader": "langchain_community.document_loaders.unstructured", + "UnstructuredHTMLLoader": "langchain_community.document_loaders.html", + "UnstructuredImageLoader": "langchain_community.document_loaders.image", + "UnstructuredMarkdownLoader": "langchain_community.document_loaders.markdown", + "UnstructuredODTLoader": "langchain_community.document_loaders.odt", + "UnstructuredOrgModeLoader": "langchain_community.document_loaders.org_mode", + "UnstructuredPDFLoader": "langchain_community.document_loaders.pdf", + "UnstructuredPowerPointLoader": "langchain_community.document_loaders.powerpoint", + "UnstructuredRSTLoader": "langchain_community.document_loaders.rst", + "UnstructuredRTFLoader": "langchain_community.document_loaders.rtf", + "UnstructuredTSVLoader": "langchain_community.document_loaders.tsv", + "UnstructuredURLLoader": "langchain_community.document_loaders.url", + "UnstructuredWordDocumentLoader": "langchain_community.document_loaders.word_document", # noqa: E501 + "UnstructuredXMLLoader": "langchain_community.document_loaders.xml", + "VsdxLoader": "langchain_community.document_loaders.vsdx", + "WeatherDataLoader": "langchain_community.document_loaders.weather", + "WebBaseLoader": "langchain_community.document_loaders.web_base", + "WhatsAppChatLoader": "langchain_community.document_loaders.whatsapp_chat", + "WikipediaLoader": "langchain_community.document_loaders.wikipedia", + "XorbitsLoader": "langchain_community.document_loaders.xorbits", + "YoutubeAudioLoader": "langchain_community.document_loaders.blob_loaders", + "YoutubeLoader": "langchain_community.document_loaders.youtube", + "YuqueLoader": "langchain_community.document_loaders.yuque", +} -# Legacy: only for backwards compatibility. Use PyPDFLoader instead -PagedPDFSplitter = PyPDFLoader -# For backwards compatibility -TelegramChatLoader = TelegramChatFileLoader +def __getattr__(name: str) -> Any: + if name in _module_lookup: + module = importlib.import_module(_module_lookup[name]) + return getattr(module, name) + raise AttributeError(f"module {__name__} has no attribute {name}") -__all__ = [ - "AcreomLoader", - "AsyncHtmlLoader", - "AsyncChromiumLoader", - "AZLyricsLoader", - "AcreomLoader", - "AirbyteCDKLoader", - "AirbyteGongLoader", - "AirbyteJSONLoader", - "AirbyteHubspotLoader", - "AirbyteSalesforceLoader", - "AirbyteShopifyLoader", - "AirbyteStripeLoader", - "AirbyteTypeformLoader", - "AirbyteZendeskSupportLoader", - "AirtableLoader", - "AmazonTextractPDFLoader", - "ApifyDatasetLoader", - "ArcGISLoader", - "ArxivLoader", - "AssemblyAIAudioLoaderById", - "AssemblyAIAudioTranscriptLoader", - "AstraDBLoader", - "AsyncHtmlLoader", - "AthenaLoader", - "AzureAIDataLoader", - "AzureAIDocumentIntelligenceLoader", - "AzureBlobStorageContainerLoader", - "AzureBlobStorageFileLoader", - "BSHTMLLoader", - "BibtexLoader", - "BigQueryLoader", - "BiliBiliLoader", - "BlackboardLoader", - "Blob", - "BlobLoader", - "BlockchainDocumentLoader", - "BraveSearchLoader", - "BrowserlessLoader", - "CassandraLoader", - "CSVLoader", - "ChatGPTLoader", - "CoNLLULoader", - "CollegeConfidentialLoader", - "ConcurrentLoader", - "ConfluenceLoader", - "CouchbaseLoader", - "CubeSemanticLoader", - "DataFrameLoader", - "DatadogLogsLoader", - "PebbloSafeLoader", - "DiffbotLoader", - "DirectoryLoader", - "DiscordChatLoader", - "DocugamiLoader", - "DocusaurusLoader", - "Docx2txtLoader", - "DropboxLoader", - "DuckDBLoader", - "EtherscanLoader", - "EverNoteLoader", - "FacebookChatLoader", - "FaunaLoader", - "FigmaFileLoader", - "FileSystemBlobLoader", - "GCSDirectoryLoader", - "GCSFileLoader", - "GeoDataFrameLoader", - "GithubFileLoader", - "GitHubIssuesLoader", - "GitLoader", - "GitbookLoader", - "GoogleApiClient", - "GoogleApiYoutubeLoader", - "GoogleSpeechToTextLoader", - "GoogleDriveLoader", - "GutenbergLoader", - "HNLoader", - "HuggingFaceDatasetLoader", - "HuggingFaceModelLoader", - "IFixitLoader", - "IMSDbLoader", - "ImageCaptionLoader", - "IuguLoader", - "JSONLoader", - "JoplinLoader", - "LarkSuiteDocLoader", - "LakeFSLoader", - "MHTMLLoader", - "MWDumpLoader", - "MastodonTootsLoader", - "MathpixPDFLoader", - "MaxComputeLoader", - "MergedDataLoader", - "ModernTreasuryLoader", - "MongodbLoader", - "NewsURLLoader", - "NotebookLoader", - "NotionDBLoader", - "NotionDirectoryLoader", - "OBSDirectoryLoader", - "OBSFileLoader", - "ObsidianLoader", - "OneDriveFileLoader", - "OneDriveLoader", - "OnlinePDFLoader", - "OpenCityDataLoader", - "OutlookMessageLoader", - "PDFMinerLoader", - "PDFMinerPDFasHTMLLoader", - "PDFPlumberLoader", - "PagedPDFSplitter", - "PlaywrightURLLoader", - "PolarsDataFrameLoader", - "PsychicLoader", - "PubMedLoader", - "PyMuPDFLoader", - "PyPDFDirectoryLoader", - "PyPDFLoader", - "PyPDFium2Loader", - "PySparkDataFrameLoader", - "PythonLoader", - "RSSFeedLoader", - "ReadTheDocsLoader", - "RecursiveUrlLoader", - "RedditPostsLoader", - "RoamLoader", - "RocksetLoader", - "S3DirectoryLoader", - "S3FileLoader", - "SRTLoader", - "SeleniumURLLoader", - "SharePointLoader", - "SitemapLoader", - "SlackDirectoryLoader", - "SnowflakeLoader", - "SpreedlyLoader", - "SQLDatabaseLoader", - "StripeLoader", - "SurrealDBLoader", - "TelegramChatApiLoader", - "TelegramChatFileLoader", - "TelegramChatLoader", - "TensorflowDatasetLoader", - "TencentCOSDirectoryLoader", - "TencentCOSFileLoader", - "TextLoader", - "TiDBLoader", - "ToMarkdownLoader", - "TomlLoader", - "TrelloLoader", - "TwitterTweetLoader", - "UnstructuredAPIFileIOLoader", - "UnstructuredAPIFileLoader", - "UnstructuredCSVLoader", - "UnstructuredEPubLoader", - "UnstructuredEmailLoader", - "UnstructuredExcelLoader", - "UnstructuredFileIOLoader", - "UnstructuredFileLoader", - "UnstructuredHTMLLoader", - "UnstructuredImageLoader", - "UnstructuredMarkdownLoader", - "UnstructuredODTLoader", - "UnstructuredOrgModeLoader", - "UnstructuredPDFLoader", - "UnstructuredPowerPointLoader", - "UnstructuredRSTLoader", - "UnstructuredRTFLoader", - "UnstructuredTSVLoader", - "UnstructuredURLLoader", - "UnstructuredWordDocumentLoader", - "UnstructuredXMLLoader", - "VsdxLoader", - "WeatherDataLoader", - "WebBaseLoader", - "WhatsAppChatLoader", - "WikipediaLoader", - "XorbitsLoader", - "YoutubeAudioLoader", - "YoutubeLoader", - "YuqueLoader", -] + +__all__ = list(_module_lookup.keys()) diff --git a/libs/community/langchain_community/document_loaders/blob_loaders/__init__.py b/libs/community/langchain_community/document_loaders/blob_loaders/__init__.py index 174c71de026..910714da541 100644 --- a/libs/community/langchain_community/document_loaders/blob_loaders/__init__.py +++ b/libs/community/langchain_community/document_loaders/blob_loaders/__init__.py @@ -1,9 +1,19 @@ -from langchain_community.document_loaders.blob_loaders.file_system import ( - FileSystemBlobLoader, -) -from langchain_community.document_loaders.blob_loaders.schema import Blob, BlobLoader -from langchain_community.document_loaders.blob_loaders.youtube_audio import ( - YoutubeAudioLoader, -) +import importlib +from typing import Any -__all__ = ["BlobLoader", "Blob", "FileSystemBlobLoader", "YoutubeAudioLoader"] +_module_lookup = { + "Blob": "langchain_community.document_loaders.blob_loaders.schema", + "BlobLoader": "langchain_community.document_loaders.blob_loaders.schema", + "FileSystemBlobLoader": "langchain_community.document_loaders.blob_loaders.file_system", # noqa: E501 + "YoutubeAudioLoader": "langchain_community.document_loaders.blob_loaders.youtube_audio", # noqa: E501 +} + + +def __getattr__(name: str) -> Any: + if name in _module_lookup: + module = importlib.import_module(_module_lookup[name]) + return getattr(module, name) + raise AttributeError(f"module {__name__} has no attribute {name}") + + +__all__ = list(_module_lookup.keys()) diff --git a/libs/community/langchain_community/document_loaders/parsers/__init__.py b/libs/community/langchain_community/document_loaders/parsers/__init__.py index 8b635e9a68b..c425f8b546c 100644 --- a/libs/community/langchain_community/document_loaders/parsers/__init__.py +++ b/libs/community/langchain_community/document_loaders/parsers/__init__.py @@ -1,31 +1,27 @@ -from langchain_community.document_loaders.parsers.audio import OpenAIWhisperParser -from langchain_community.document_loaders.parsers.doc_intelligence import ( - AzureAIDocumentIntelligenceParser, -) -from langchain_community.document_loaders.parsers.docai import DocAIParser -from langchain_community.document_loaders.parsers.grobid import GrobidParser -from langchain_community.document_loaders.parsers.html import BS4HTMLParser -from langchain_community.document_loaders.parsers.language import LanguageParser -from langchain_community.document_loaders.parsers.pdf import ( - PDFMinerParser, - PDFPlumberParser, - PyMuPDFParser, - PyPDFium2Parser, - PyPDFParser, -) -from langchain_community.document_loaders.parsers.vsdx import VsdxParser +import importlib +from typing import Any -__all__ = [ - "AzureAIDocumentIntelligenceParser", - "BS4HTMLParser", - "DocAIParser", - "GrobidParser", - "LanguageParser", - "OpenAIWhisperParser", - "PDFMinerParser", - "PDFPlumberParser", - "PyMuPDFParser", - "PyPDFium2Parser", - "PyPDFParser", - "VsdxParser", -] +_module_lookup = { + "AzureAIDocumentIntelligenceParser": "langchain_community.document_loaders.parsers.doc_intelligence", # noqa: E501 + "BS4HTMLParser": "langchain_community.document_loaders.parsers.html", + "DocAIParser": "langchain_community.document_loaders.parsers.docai", + "GrobidParser": "langchain_community.document_loaders.parsers.grobid", + "LanguageParser": "langchain_community.document_loaders.parsers.language", + "OpenAIWhisperParser": "langchain_community.document_loaders.parsers.audio", + "PDFMinerParser": "langchain_community.document_loaders.parsers.pdf", + "PDFPlumberParser": "langchain_community.document_loaders.parsers.pdf", + "PyMuPDFParser": "langchain_community.document_loaders.parsers.pdf", + "PyPDFParser": "langchain_community.document_loaders.parsers.pdf", + "PyPDFium2Parser": "langchain_community.document_loaders.parsers.pdf", + "VsdxParser": "langchain_community.document_loaders.parsers.vsdx", +} + + +def __getattr__(name: str) -> Any: + if name in _module_lookup: + module = importlib.import_module(_module_lookup[name]) + return getattr(module, name) + raise AttributeError(f"module {__name__} has no attribute {name}") + + +__all__ = list(_module_lookup.keys()) diff --git a/libs/community/langchain_community/document_loaders/pdf.py b/libs/community/langchain_community/document_loaders/pdf.py index a163c05f617..8522f491a12 100644 --- a/libs/community/langchain_community/document_loaders/pdf.py +++ b/libs/community/langchain_community/document_loaders/pdf.py @@ -780,3 +780,7 @@ class DocumentIntelligenceLoader(BasePDFLoader): """Lazy load given path as pages.""" blob = Blob.from_path(self.file_path) yield from self.parser.parse(blob) + + +# Legacy: only for backwards compatibility. Use PyPDFLoader instead +PagedPDFSplitter = PyPDFLoader diff --git a/libs/community/langchain_community/document_loaders/telegram.py b/libs/community/langchain_community/document_loaders/telegram.py index b4c94b29746..50955b73fc6 100644 --- a/libs/community/langchain_community/document_loaders/telegram.py +++ b/libs/community/langchain_community/document_loaders/telegram.py @@ -262,3 +262,7 @@ class TelegramChatApiLoader(BaseLoader): combined_texts = self._combine_message_texts(message_threads, df) return text_to_docs(combined_texts) + + +# For backwards compatibility +TelegramChatLoader = TelegramChatFileLoader