mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-22 14:49:29 +00:00
clean up loaders (#1178)
This commit is contained in:
parent
9962bda70b
commit
4766b20223
@ -8,10 +8,6 @@ from langchain.document_loaders.web_base import WebBaseLoader
|
||||
class AZLyricsLoader(WebBaseLoader):
|
||||
"""Loader that loads AZLyrics webpages."""
|
||||
|
||||
def __init__(self, web_path: str):
|
||||
"""Initialize with webpage path."""
|
||||
self.web_path = web_path
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load webpage."""
|
||||
soup = self.scrape()
|
||||
|
@ -8,10 +8,6 @@ from langchain.document_loaders.web_base import WebBaseLoader
|
||||
class CollegeConfidentialLoader(WebBaseLoader):
|
||||
"""Loader that loads College Confidential webpages."""
|
||||
|
||||
def __init__(self, web_path: str):
|
||||
"""Initialize with webpage path."""
|
||||
self.web_path = web_path
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load webpage."""
|
||||
soup = self.scrape()
|
||||
|
@ -8,10 +8,6 @@ from langchain.document_loaders.web_base import WebBaseLoader
|
||||
class IMSDbLoader(WebBaseLoader):
|
||||
"""Loader that loads IMSDb webpages."""
|
||||
|
||||
def __init__(self, web_path: str):
|
||||
"""Initialize with webpage path."""
|
||||
self.web_path = web_path
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load webpage."""
|
||||
soup = self.scrape()
|
||||
|
@ -2,18 +2,19 @@
|
||||
import os
|
||||
from typing import List
|
||||
|
||||
from unstructured.__version__ import __version__ as __unstructured_version__
|
||||
from unstructured.file_utils.filetype import FileType, detect_filetype
|
||||
|
||||
from langchain.document_loaders.unstructured import UnstructuredFileLoader
|
||||
|
||||
unstructured_version = tuple([int(x) for x in __unstructured_version__.split(".")])
|
||||
|
||||
|
||||
class UnstructuredPowerPointLoader(UnstructuredFileLoader):
|
||||
"""Loader that uses unstructured to load powerpoint files."""
|
||||
|
||||
def _get_elements(self) -> List:
|
||||
from unstructured.__version__ import __version__ as __unstructured_version__
|
||||
from unstructured.file_utils.filetype import FileType, detect_filetype
|
||||
|
||||
unstructured_version = tuple(
|
||||
[int(x) for x in __unstructured_version__.split(".")]
|
||||
)
|
||||
# NOTE(MthwRobinson) - magic will raise an import error if the libmagic
|
||||
# system dependency isn't installed. If it's not installed, we'll just
|
||||
# check the file extension
|
||||
|
Loading…
Reference in New Issue
Block a user