langchain[patch]: Migrate document loaders to use optional langchain community imports (#21095)

This commit is contained in:
Eugene Yurtsev
2024-05-01 11:26:25 -04:00
committed by GitHub
parent bd38073d76
commit 0e5bf16d00
165 changed files with 4415 additions and 722 deletions

View File

@@ -261,6 +261,7 @@ if TYPE_CHECKING:
from langchain_community.document_loaders.json_loader import (
JSONLoader,
)
from langchain_community.document_loaders.kinetica_loader import KineticaLoader
from langchain_community.document_loaders.lakefs import (
LakeFSLoader,
)
@@ -512,193 +513,6 @@ if TYPE_CHECKING:
YuqueLoader,
)
__all__ = [
"AZLyricsLoader",
"AcreomLoader",
"AirbyteCDKLoader",
"AirbyteGongLoader",
"AirbyteHubspotLoader",
"AirbyteJSONLoader",
"AirbyteSalesforceLoader",
"AirbyteShopifyLoader",
"AirbyteStripeLoader",
"AirbyteTypeformLoader",
"AirbyteZendeskSupportLoader",
"AirtableLoader",
"AmazonTextractPDFLoader",
"ApifyDatasetLoader",
"ArcGISLoader",
"ArxivLoader",
"AssemblyAIAudioLoaderById",
"AssemblyAIAudioTranscriptLoader",
"AstraDBLoader",
"AsyncChromiumLoader",
"AsyncHtmlLoader",
"AthenaLoader",
"AzureAIDataLoader",
"AzureAIDocumentIntelligenceLoader",
"AzureBlobStorageContainerLoader",
"AzureBlobStorageFileLoader",
"BSHTMLLoader",
"BibtexLoader",
"BigQueryLoader",
"BiliBiliLoader",
"BlackboardLoader",
"Blob",
"BlobLoader",
"BlockchainDocumentLoader",
"BraveSearchLoader",
"BrowserbaseLoader",
"BrowserlessLoader",
"CSVLoader",
"CassandraLoader",
"ChatGPTLoader",
"CoNLLULoader",
"CollegeConfidentialLoader",
"ConcurrentLoader",
"ConfluenceLoader",
"CouchbaseLoader",
"CubeSemanticLoader",
"DataFrameLoader",
"DatadogLogsLoader",
"DiffbotLoader",
"DirectoryLoader",
"DiscordChatLoader",
"DocugamiLoader",
"DocusaurusLoader",
"Docx2txtLoader",
"DropboxLoader",
"DuckDBLoader",
"EtherscanLoader",
"EverNoteLoader",
"FacebookChatLoader",
"FaunaLoader",
"FigmaFileLoader",
"FireCrawlLoader",
"FileSystemBlobLoader",
"GCSDirectoryLoader",
"GCSFileLoader",
"GeoDataFrameLoader",
"GitHubIssuesLoader",
"GitLoader",
"GitbookLoader",
"GithubFileLoader",
"GoogleApiClient",
"GoogleApiYoutubeLoader",
"GoogleDriveLoader",
"GoogleSpeechToTextLoader",
"GutenbergLoader",
"HNLoader",
"HuggingFaceDatasetLoader",
"HuggingFaceModelLoader",
"IFixitLoader",
"IMSDbLoader",
"ImageCaptionLoader",
"IuguLoader",
"JSONLoader",
"JoplinLoader",
"LLMSherpaFileLoader",
"LakeFSLoader",
"LarkSuiteDocLoader",
"MHTMLLoader",
"MWDumpLoader",
"MastodonTootsLoader",
"MathpixPDFLoader",
"MaxComputeLoader",
"MergedDataLoader",
"ModernTreasuryLoader",
"MongodbLoader",
"NewsURLLoader",
"NotebookLoader",
"NotionDBLoader",
"NotionDirectoryLoader",
"OBSDirectoryLoader",
"OBSFileLoader",
"ObsidianLoader",
"OneDriveFileLoader",
"OneDriveLoader",
"OnlinePDFLoader",
"OpenCityDataLoader",
"OracleAutonomousDatabaseLoader",
"OutlookMessageLoader",
"PDFMinerLoader",
"PDFMinerPDFasHTMLLoader",
"PDFPlumberLoader",
"PagedPDFSplitter",
"PebbloSafeLoader",
"PlaywrightURLLoader",
"PolarsDataFrameLoader",
"PsychicLoader",
"PubMedLoader",
"PyMuPDFLoader",
"PyPDFDirectoryLoader",
"PyPDFLoader",
"PyPDFium2Loader",
"PySparkDataFrameLoader",
"PythonLoader",
"RSSFeedLoader",
"ReadTheDocsLoader",
"RecursiveUrlLoader",
"RedditPostsLoader",
"RoamLoader",
"RocksetLoader",
"S3DirectoryLoader",
"S3FileLoader",
"SQLDatabaseLoader",
"SRTLoader",
"SeleniumURLLoader",
"SharePointLoader",
"SitemapLoader",
"SlackDirectoryLoader",
"SnowflakeLoader",
"SpiderLoader",
"SpreedlyLoader",
"StripeLoader",
"SurrealDBLoader",
"TelegramChatApiLoader",
"TelegramChatFileLoader",
"TelegramChatLoader",
"TencentCOSDirectoryLoader",
"TencentCOSFileLoader",
"TensorflowDatasetLoader",
"TextLoader",
"TiDBLoader",
"ToMarkdownLoader",
"TomlLoader",
"TrelloLoader",
"TwitterTweetLoader",
"UnstructuredAPIFileIOLoader",
"UnstructuredAPIFileLoader",
"UnstructuredCHMLoader",
"UnstructuredCSVLoader",
"UnstructuredEPubLoader",
"UnstructuredEmailLoader",
"UnstructuredExcelLoader",
"UnstructuredFileIOLoader",
"UnstructuredFileLoader",
"UnstructuredHTMLLoader",
"UnstructuredImageLoader",
"UnstructuredMarkdownLoader",
"UnstructuredODTLoader",
"UnstructuredOrgModeLoader",
"UnstructuredPDFLoader",
"UnstructuredPowerPointLoader",
"UnstructuredRSTLoader",
"UnstructuredRTFLoader",
"UnstructuredTSVLoader",
"UnstructuredURLLoader",
"UnstructuredWordDocumentLoader",
"UnstructuredXMLLoader",
"VsdxLoader",
"WeatherDataLoader",
"WebBaseLoader",
"WhatsAppChatLoader",
"WikipediaLoader",
"XorbitsLoader",
"YoutubeAudioLoader",
"YoutubeLoader",
"YuqueLoader",
]
_module_lookup = {
"AZLyricsLoader": "langchain_community.document_loaders.azlyrics",
@@ -898,4 +712,192 @@ def __getattr__(name: str) -> Any:
raise AttributeError(f"module {__name__} has no attribute {name}")
__all__ = list(_module_lookup.keys())
__all__ = [
"AZLyricsLoader",
"AcreomLoader",
"AirbyteCDKLoader",
"AirbyteGongLoader",
"AirbyteHubspotLoader",
"AirbyteJSONLoader",
"AirbyteSalesforceLoader",
"AirbyteShopifyLoader",
"AirbyteStripeLoader",
"AirbyteTypeformLoader",
"AirbyteZendeskSupportLoader",
"AirtableLoader",
"AmazonTextractPDFLoader",
"ApifyDatasetLoader",
"ArcGISLoader",
"ArxivLoader",
"AssemblyAIAudioLoaderById",
"AssemblyAIAudioTranscriptLoader",
"AstraDBLoader",
"AsyncChromiumLoader",
"AsyncHtmlLoader",
"AthenaLoader",
"AzureAIDataLoader",
"AzureAIDocumentIntelligenceLoader",
"AzureBlobStorageContainerLoader",
"AzureBlobStorageFileLoader",
"BSHTMLLoader",
"BibtexLoader",
"BigQueryLoader",
"BiliBiliLoader",
"BlackboardLoader",
"Blob",
"BlobLoader",
"BlockchainDocumentLoader",
"BraveSearchLoader",
"BrowserbaseLoader",
"BrowserlessLoader",
"CSVLoader",
"CassandraLoader",
"ChatGPTLoader",
"CoNLLULoader",
"CollegeConfidentialLoader",
"ConcurrentLoader",
"ConfluenceLoader",
"CouchbaseLoader",
"CubeSemanticLoader",
"DataFrameLoader",
"DatadogLogsLoader",
"DiffbotLoader",
"DirectoryLoader",
"DiscordChatLoader",
"DocugamiLoader",
"DocusaurusLoader",
"Docx2txtLoader",
"DropboxLoader",
"DuckDBLoader",
"EtherscanLoader",
"EverNoteLoader",
"FacebookChatLoader",
"FaunaLoader",
"FigmaFileLoader",
"FireCrawlLoader",
"FileSystemBlobLoader",
"GCSDirectoryLoader",
"GlueCatalogLoader",
"GCSFileLoader",
"GeoDataFrameLoader",
"GitHubIssuesLoader",
"GitLoader",
"GitbookLoader",
"GithubFileLoader",
"GoogleApiClient",
"GoogleApiYoutubeLoader",
"GoogleDriveLoader",
"GoogleSpeechToTextLoader",
"GutenbergLoader",
"HNLoader",
"HuggingFaceDatasetLoader",
"HuggingFaceModelLoader",
"IFixitLoader",
"ImageCaptionLoader",
"IMSDbLoader",
"IuguLoader",
"JoplinLoader",
"JSONLoader",
"KineticaLoader",
"LakeFSLoader",
"LarkSuiteDocLoader",
"LLMSherpaFileLoader",
"MastodonTootsLoader",
"MHTMLLoader",
"MWDumpLoader",
"MathpixPDFLoader",
"MaxComputeLoader",
"MergedDataLoader",
"ModernTreasuryLoader",
"MongodbLoader",
"NewsURLLoader",
"NotebookLoader",
"NotionDBLoader",
"NotionDirectoryLoader",
"OBSDirectoryLoader",
"OBSFileLoader",
"ObsidianLoader",
"OneDriveFileLoader",
"OneDriveLoader",
"OnlinePDFLoader",
"OpenCityDataLoader",
"OracleAutonomousDatabaseLoader",
"OutlookMessageLoader",
"PDFMinerLoader",
"PDFMinerPDFasHTMLLoader",
"PDFPlumberLoader",
"PagedPDFSplitter",
"PebbloSafeLoader",
"PlaywrightURLLoader",
"PolarsDataFrameLoader",
"PsychicLoader",
"PubMedLoader",
"PyMuPDFLoader",
"PyPDFDirectoryLoader",
"PyPDFLoader",
"PyPDFium2Loader",
"PySparkDataFrameLoader",
"PythonLoader",
"RSSFeedLoader",
"ReadTheDocsLoader",
"RecursiveUrlLoader",
"RedditPostsLoader",
"RoamLoader",
"RocksetLoader",
"S3DirectoryLoader",
"S3FileLoader",
"SQLDatabaseLoader",
"SRTLoader",
"SeleniumURLLoader",
"SharePointLoader",
"SitemapLoader",
"SlackDirectoryLoader",
"SnowflakeLoader",
"SpiderLoader",
"SpreedlyLoader",
"StripeLoader",
"SurrealDBLoader",
"TelegramChatApiLoader",
"TelegramChatFileLoader",
"TelegramChatLoader",
"TencentCOSDirectoryLoader",
"TencentCOSFileLoader",
"TensorflowDatasetLoader",
"TextLoader",
"TiDBLoader",
"ToMarkdownLoader",
"TomlLoader",
"TrelloLoader",
"TwitterTweetLoader",
"UnstructuredAPIFileIOLoader",
"UnstructuredAPIFileLoader",
"UnstructuredCHMLoader",
"UnstructuredCSVLoader",
"UnstructuredEPubLoader",
"UnstructuredEmailLoader",
"UnstructuredExcelLoader",
"UnstructuredFileIOLoader",
"UnstructuredFileLoader",
"UnstructuredHTMLLoader",
"UnstructuredImageLoader",
"UnstructuredMarkdownLoader",
"UnstructuredODTLoader",
"UnstructuredOrgModeLoader",
"UnstructuredPDFLoader",
"UnstructuredPowerPointLoader",
"UnstructuredRSTLoader",
"UnstructuredRTFLoader",
"UnstructuredTSVLoader",
"UnstructuredURLLoader",
"UnstructuredWordDocumentLoader",
"UnstructuredXMLLoader",
"VsdxLoader",
"WeatherDataLoader",
"WebBaseLoader",
"WhatsAppChatLoader",
"WikipediaLoader",
"XorbitsLoader",
"YoutubeAudioLoader",
"YoutubeLoader",
"YuqueLoader",
]

View File

@@ -1,11 +1,24 @@
import importlib
from typing import Any
from typing import TYPE_CHECKING, Any
from langchain_core.document_loaders import Blob, BlobLoader
if TYPE_CHECKING:
from langchain_community.document_loaders.blob_loaders.file_system import (
FileSystemBlobLoader,
)
from langchain_community.document_loaders.blob_loaders.youtube_audio import (
YoutubeAudioLoader,
)
_module_lookup = {
"Blob": "langchain_community.document_loaders.blob_loaders.schema",
"BlobLoader": "langchain_community.document_loaders.blob_loaders.schema",
"FileSystemBlobLoader": "langchain_community.document_loaders.blob_loaders.file_system", # noqa: E501
"YoutubeAudioLoader": "langchain_community.document_loaders.blob_loaders.youtube_audio", # noqa: E501
"FileSystemBlobLoader": (
"langchain_community.document_loaders.blob_loaders.file_system"
),
"YoutubeAudioLoader": (
"langchain_community.document_loaders.blob_loaders.youtube_audio"
),
}
@@ -16,4 +29,9 @@ def __getattr__(name: str) -> Any:
raise AttributeError(f"module {__name__} has no attribute {name}")
__all__ = list(_module_lookup.keys())
__all__ = [
"BlobLoader",
"Blob",
"FileSystemBlobLoader",
"YoutubeAudioLoader",
]