community: Use default load() implementation in doc loaders (#18385)

Following https://github.com/langchain-ai/langchain/pull/18289
This commit is contained in:
Christophe Bornet 2024-03-01 20:46:52 +01:00 committed by GitHub
parent 42341bc787
commit 177f51c7bd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
49 changed files with 22 additions and 253 deletions

View File

@ -1,6 +1,6 @@
import re import re
from pathlib import Path from pathlib import Path
from typing import Iterator, List from typing import Iterator
from langchain_core.documents import Document from langchain_core.documents import Document
@ -74,6 +74,3 @@ class AcreomLoader(BaseLoader):
} }
yield Document(page_content=text, metadata=metadata) yield Document(page_content=text, metadata=metadata)
def load(self) -> List[Document]:
return list(self.lazy_load())

View File

@ -1,4 +1,4 @@
from typing import Any, Callable, Iterator, List, Mapping, Optional from typing import Any, Callable, Iterator, Mapping, Optional
from langchain_core.documents import Document from langchain_core.documents import Document
from langchain_core.utils.utils import guard_import from langchain_core.utils.utils import guard_import
@ -53,9 +53,6 @@ class AirbyteCDKLoader(BaseLoader):
self._stream_name = stream_name self._stream_name = stream_name
self._state = state self._state = state
def load(self) -> List[Document]:
return list(self.lazy_load())
def lazy_load(self) -> Iterator[Document]: def lazy_load(self) -> Iterator[Document]:
return self._integration._load_data( return self._integration._load_data(
stream_name=self._stream_name, state=self._state stream_name=self._stream_name, state=self._state

View File

@ -1,4 +1,4 @@
from typing import Iterator, List from typing import Iterator
from langchain_core.documents import Document from langchain_core.documents import Document
@ -34,7 +34,3 @@ class AirtableLoader(BaseLoader):
"table_id": self.table_id, "table_id": self.table_id,
}, },
) )
def load(self) -> List[Document]:
"""Load Documents from table."""
return list(self.lazy_load())

View File

@ -148,7 +148,3 @@ class ArcGISLoader(BaseLoader):
) )
yield Document(page_content=page_content, metadata=metadata) yield Document(page_content=page_content, metadata=metadata)
def load(self) -> List[Document]:
"""Load all records from FeatureLayer."""
return list(self.lazy_load())

View File

@ -76,9 +76,6 @@ class AstraDBLoader(BaseLoader):
self.nb_prefetched = nb_prefetched self.nb_prefetched = nb_prefetched
self.extraction_function = extraction_function self.extraction_function = extraction_function
def load(self) -> List[Document]:
return list(self.lazy_load())
def lazy_load(self) -> Iterator[Document]: def lazy_load(self) -> Iterator[Document]:
for doc in self.collection.paginated_find( for doc in self.collection.paginated_find(
filter=self.filter, filter=self.filter,

View File

@ -157,7 +157,3 @@ class AthenaLoader(BaseLoader):
} }
doc = Document(page_content=page_content, metadata=metadata) doc = Document(page_content=page_content, metadata=metadata)
yield doc yield doc
def load(self) -> List[Document]:
"""Load data into document objects."""
return list(self.lazy_load())

View File

@ -1,4 +1,4 @@
from typing import Iterator, List, Optional from typing import Iterator, Optional
from langchain_community.docstore.document import Document from langchain_community.docstore.document import Document
from langchain_community.document_loaders.base import BaseLoader from langchain_community.document_loaders.base import BaseLoader
@ -16,10 +16,6 @@ class AzureAIDataLoader(BaseLoader):
self.glob_pattern = glob self.glob_pattern = glob
"""Optional glob pattern to select files. Defaults to None.""" """Optional glob pattern to select files. Defaults to None."""
def load(self) -> List[Document]:
"""Load documents."""
return list(self.lazy_load())
def lazy_load(self) -> Iterator[Document]: def lazy_load(self) -> Iterator[Document]:
"""A lazy loader for Documents.""" """A lazy loader for Documents."""
try: try:

View File

@ -1,4 +1,4 @@
from typing import Any, Iterator, List from typing import Any, Iterator
from langchain_core.documents import Document from langchain_core.documents import Document
@ -18,9 +18,6 @@ class BaiduBOSDirectoryLoader(BaseLoader):
self.bucket = bucket self.bucket = bucket
self.prefix = prefix self.prefix = prefix
def load(self) -> List[Document]:
return list(self.lazy_load())
def lazy_load(self) -> Iterator[Document]: def lazy_load(self) -> Iterator[Document]:
"""Load documents.""" """Load documents."""
try: try:

View File

@ -1,7 +1,7 @@
import logging import logging
import os import os
import tempfile import tempfile
from typing import Any, Iterator, List from typing import Any, Iterator
from langchain_core.documents import Document from langchain_core.documents import Document
@ -24,9 +24,6 @@ class BaiduBOSFileLoader(BaseLoader):
self.bucket = bucket self.bucket = bucket
self.key = key self.key = key
def load(self) -> List[Document]:
return list(self.lazy_load())
def lazy_load(self) -> Iterator[Document]: def lazy_load(self) -> Iterator[Document]:
"""Load documents.""" """Load documents."""
try: try:

View File

@ -96,16 +96,3 @@ class BibtexLoader(BaseLoader):
doc = self._load_entry(entry) doc = self._load_entry(entry)
if doc: if doc:
yield doc yield doc
def load(self) -> List[Document]:
"""Load bibtex file documents from the given bibtex file path.
See https://bibtexparser.readthedocs.io/en/master/
Args:
file_path: the path to the bibtex file
Returns:
a list of documents with the document.page_content in text format
"""
return list(self.lazy_load())

View File

@ -61,7 +61,3 @@ class BrowserlessLoader(BaseLoader):
"source": url, "source": url,
}, },
) )
def load(self) -> List[Document]:
"""Load Documents from URLs."""
return list(self.lazy_load())

View File

@ -5,7 +5,6 @@ from typing import (
Any, Any,
Callable, Callable,
Iterator, Iterator,
List,
Optional, Optional,
Sequence, Sequence,
Union, Union,
@ -106,9 +105,6 @@ class CassandraLoader(BaseLoader):
if query_execution_profile is not _NOT_SET: if query_execution_profile is not _NOT_SET:
self.query_kwargs["execution_profile"] = query_execution_profile self.query_kwargs["execution_profile"] = query_execution_profile
def load(self) -> List[Document]:
return list(self.lazy_load())
def lazy_load(self) -> Iterator[Document]: def lazy_load(self) -> Iterator[Document]:
for row in self.session.execute(self.query, **self.query_kwargs): for row in self.session.execute(self.query, **self.query_kwargs):
metadata = self.metadata.copy() metadata = self.metadata.copy()

View File

@ -78,14 +78,3 @@ class AsyncChromiumLoader(BaseLoader):
html_content = asyncio.run(self.ascrape_playwright(url)) html_content = asyncio.run(self.ascrape_playwright(url))
metadata = {"source": url} metadata = {"source": url}
yield Document(page_content=html_content, metadata=metadata) yield Document(page_content=html_content, metadata=metadata)
def load(self) -> List[Document]:
"""
Load and return all Documents from the provided URLs.
Returns:
List[Document]: A list of Document objects
containing the scraped content from each URL.
"""
return list(self.lazy_load())

View File

@ -68,10 +68,6 @@ class CouchbaseLoader(BaseLoader):
self.page_content_fields = page_content_fields self.page_content_fields = page_content_fields
self.metadata_fields = metadata_fields self.metadata_fields = metadata_fields
def load(self) -> List[Document]:
"""Load Couchbase data into Document objects."""
return list(self.lazy_load())
def lazy_load(self) -> Iterator[Document]: def lazy_load(self) -> Iterator[Document]:
"""Load Couchbase data into Document objects lazily.""" """Load Couchbase data into Document objects lazily."""
from datetime import timedelta from datetime import timedelta

View File

@ -1,4 +1,4 @@
from typing import Any, Iterator, List from typing import Any, Iterator
from langchain_core.documents import Document from langchain_core.documents import Document
@ -26,10 +26,6 @@ class BaseDataFrameLoader(BaseLoader):
metadata.pop(self.page_content_column) metadata.pop(self.page_content_column)
yield Document(page_content=text, metadata=metadata) yield Document(page_content=text, metadata=metadata)
def load(self) -> List[Document]:
"""Load full dataframe."""
return list(self.lazy_load())
class DataFrameLoader(BaseDataFrameLoader): class DataFrameLoader(BaseDataFrameLoader):
"""Load `Pandas` DataFrame.""" """Load `Pandas` DataFrame."""

View File

@ -1,4 +1,4 @@
from typing import Iterator, List, Optional from typing import Iterator, Optional
from langchain_core.documents import Document from langchain_core.documents import Document
@ -77,10 +77,6 @@ class AzureAIDocumentIntelligenceLoader(BaseLoader):
mode=mode, mode=mode,
) )
def load(self) -> List[Document]:
"""Load given path as pages."""
return list(self.lazy_load())
def lazy_load( def lazy_load(
self, self,
) -> Iterator[Document]: ) -> Iterator[Document]:

View File

@ -71,10 +71,6 @@ class EtherscanLoader(BaseLoader):
for doc in result: for doc in result:
yield doc yield doc
def load(self) -> List[Document]:
"""Load transactions from spcifc account by Etherscan."""
return list(self.lazy_load())
def getNormTx(self) -> List[Document]: def getNormTx(self) -> List[Document]:
url = ( url = (
f"https://api.etherscan.io/api?module=account&action=txlist&address={self.account_address}" f"https://api.etherscan.io/api?module=account&action=txlist&address={self.account_address}"

View File

@ -1,4 +1,4 @@
from typing import Iterator, List, Optional, Sequence from typing import Iterator, Optional, Sequence
from langchain_core.documents import Document from langchain_core.documents import Document
@ -28,9 +28,6 @@ class FaunaLoader(BaseLoader):
self.secret = secret self.secret = secret
self.metadata_fields = metadata_fields self.metadata_fields = metadata_fields
def load(self) -> List[Document]:
return list(self.lazy_load())
def lazy_load(self) -> Iterator[Document]: def lazy_load(self) -> Iterator[Document]:
try: try:
from fauna import Page, fql from fauna import Page, fql

View File

@ -115,10 +115,6 @@ class GenericLoader(BaseLoader):
for blob in self.blob_loader.yield_blobs(): for blob in self.blob_loader.yield_blobs():
yield from self.blob_parser.lazy_parse(blob) yield from self.blob_parser.lazy_parse(blob)
def load(self) -> List[Document]:
"""Load all documents."""
return list(self.lazy_load())
def load_and_split( def load_and_split(
self, text_splitter: Optional[TextSplitter] = None self, text_splitter: Optional[TextSplitter] = None
) -> List[Document]: ) -> List[Document]:

View File

@ -1,4 +1,4 @@
from typing import Any, Iterator, List from typing import Any, Iterator
from langchain_core.documents import Document from langchain_core.documents import Document
@ -67,7 +67,3 @@ class GeoDataFrameLoader(BaseLoader):
# using WKT instead of str() to help GIS system interoperability # using WKT instead of str() to help GIS system interoperability
yield Document(page_content=geom.wkt, metadata=metadata) yield Document(page_content=geom.wkt, metadata=metadata)
def load(self) -> List[Document]:
"""Load full dataframe."""
return list(self.lazy_load())

View File

@ -127,32 +127,6 @@ class GitHubIssuesLoader(BaseGitHubLoader):
else: else:
url = None url = None
def load(self) -> List[Document]:
"""
Get issues of a GitHub repository.
Returns:
A list of Documents with attributes:
- page_content
- metadata
- url
- title
- creator
- created_at
- last_update_time
- closed_time
- number of comments
- state
- labels
- assignee
- assignees
- milestone
- locked
- number
- is_pull_request
"""
return list(self.lazy_load())
def parse_issue(self, issue: dict) -> Document: def parse_issue(self, issue: dict) -> Document:
"""Create Document objects from a list of GitHub issues.""" """Create Document objects from a list of GitHub issues."""
metadata = { metadata = {

View File

@ -1,5 +1,5 @@
import json import json
from typing import Iterator, List, Mapping, Optional, Sequence, Union from typing import Iterator, Mapping, Optional, Sequence, Union
from langchain_core.documents import Document from langchain_core.documents import Document
@ -84,10 +84,6 @@ class HuggingFaceDatasetLoader(BaseLoader):
for row in dataset[key] for row in dataset[key]
) )
def load(self) -> List[Document]:
"""Load documents."""
return list(self.lazy_load())
def parse_obj(self, page_content: Union[str, object]) -> str: def parse_obj(self, page_content: Union[str, object]) -> str:
if isinstance(page_content, object): if isinstance(page_content, object):
return json.dumps(page_content) return json.dumps(page_content)

View File

@ -106,7 +106,3 @@ class HuggingFaceModelLoader(BaseLoader):
page_content=readme_content, page_content=readme_content,
metadata=model, metadata=model,
) )
def load(self) -> List[Document]:
"""Load model information, including README content."""
return list(self.lazy_load())

View File

@ -91,6 +91,3 @@ class JoplinLoader(BaseLoader):
def lazy_load(self) -> Iterator[Document]: def lazy_load(self) -> Iterator[Document]:
yield from self._get_notes() yield from self._get_notes()
def load(self) -> List[Document]:
return list(self.lazy_load())

View File

@ -1,6 +1,6 @@
import json import json
import urllib.request import urllib.request
from typing import Any, Iterator, List from typing import Any, Iterator
from langchain_core.documents import Document from langchain_core.documents import Document
@ -46,7 +46,3 @@ class LarkSuiteDocLoader(BaseLoader):
"title": metadata_json["data"]["document"]["title"], "title": metadata_json["data"]["document"]["title"],
} }
yield Document(page_content=text, metadata=metadata) yield Document(page_content=text, metadata=metadata)
def load(self) -> List[Document]:
"""Load LarkSuite (FeiShu) document."""
return list(self.lazy_load())

View File

@ -1,6 +1,6 @@
from __future__ import annotations from __future__ import annotations
from typing import Any, Iterator, List, Optional, Sequence from typing import Any, Iterator, Optional, Sequence
from langchain_core.documents import Document from langchain_core.documents import Document
@ -78,6 +78,3 @@ class MaxComputeLoader(BaseLoader):
else: else:
metadata = {k: v for k, v in row.items() if k not in page_content_data} metadata = {k: v for k, v in row.items() if k not in page_content_data}
yield Document(page_content=page_content, metadata=metadata) yield Document(page_content=page_content, metadata=metadata)
def load(self) -> List[Document]:
return list(self.lazy_load())

View File

@ -1,6 +1,6 @@
import logging import logging
from pathlib import Path from pathlib import Path
from typing import Iterator, List, Optional, Sequence, Union from typing import Iterator, Optional, Sequence, Union
from langchain_core.documents import Document from langchain_core.documents import Document
@ -87,11 +87,6 @@ class MWDumpLoader(BaseLoader):
metadata = {"source": page.title} metadata = {"source": page.title}
return Document(page_content=text, metadata=metadata) return Document(page_content=text, metadata=metadata)
def load(self) -> List[Document]:
"""Load from a file path."""
return [doc for doc in self.lazy_load()]
def lazy_load( def lazy_load(
self, self,
) -> Iterator[Document]: ) -> Iterator[Document]:

View File

@ -23,10 +23,6 @@ class MergedDataLoader(BaseLoader):
for document in data: for document in data:
yield document yield document
def load(self) -> List[Document]:
"""Load docs."""
return list(self.lazy_load())
async def alazy_load(self) -> AsyncIterator[Document]: async def alazy_load(self) -> AsyncIterator[Document]:
"""Lazy load docs from each individual loader.""" """Lazy load docs from each individual loader."""
for loader in self.loaders: for loader in self.loaders:

View File

@ -91,7 +91,3 @@ class OneDriveLoader(O365BaseLoader):
if self.object_ids: if self.object_ids:
for blob in self._load_from_object_ids(drive, self.object_ids): for blob in self._load_from_object_ids(drive, self.object_ids):
yield from blob_parser.lazy_parse(blob) yield from blob_parser.lazy_parse(blob)
def load(self) -> List[Document]:
"""Load all documents."""
return list(self.lazy_load())

View File

@ -109,18 +109,6 @@ class OneNoteLoader(BaseLoader, BaseModel):
else: else:
request_url = "" request_url = ""
def load(self) -> List[Document]:
"""
Get pages from OneNote notebooks.
Returns:
A list of Documents with attributes:
- page_content
- metadata
- title
"""
return list(self.lazy_load())
def _get_page_content(self, page_id: str) -> str: def _get_page_content(self, page_id: str) -> str:
"""Get page content from OneNote API""" """Get page content from OneNote API"""
request_url = self.onenote_api_base_url + f"/pages/{page_id}/content" request_url = self.onenote_api_base_url + f"/pages/{page_id}/content"

View File

@ -1,4 +1,4 @@
from typing import Iterator, List from typing import Iterator
from langchain_core.documents import Document from langchain_core.documents import Document
@ -37,8 +37,3 @@ class OpenCityDataLoader(BaseLoader):
"source": self.city_id + "_" + self.dataset_id, "source": self.city_id + "_" + self.dataset_id,
}, },
) )
def load(self) -> List[Document]:
"""Load records."""
return list(self.lazy_load())

View File

@ -157,10 +157,6 @@ class PyPDFLoader(BasePDFLoader):
super().__init__(file_path, headers=headers) super().__init__(file_path, headers=headers)
self.parser = PyPDFParser(password=password, extract_images=extract_images) self.parser = PyPDFParser(password=password, extract_images=extract_images)
def load(self) -> List[Document]:
"""Load given path as pages."""
return list(self.lazy_load())
def lazy_load( def lazy_load(
self, self,
) -> Iterator[Document]: ) -> Iterator[Document]:

View File

@ -1,4 +1,4 @@
from typing import Iterator, List, Optional from typing import Iterator, Optional
from langchain_core.documents import Document from langchain_core.documents import Document
@ -32,9 +32,6 @@ class PubMedLoader(BaseLoader):
top_k_results=load_max_docs, top_k_results=load_max_docs,
) )
def load(self) -> List[Document]:
return list(self._client.lazy_load_docs(self.query))
def lazy_load(self) -> Iterator[Document]: def lazy_load(self) -> Iterator[Document]:
for doc in self._client.lazy_load_docs(self.query): for doc in self._client.lazy_load_docs(self.query):
yield doc yield doc

View File

@ -84,10 +84,6 @@ class ReadTheDocsLoader(BaseLoader):
text = self._clean_data(f.read()) text = self._clean_data(f.read())
yield Document(page_content=text, metadata={"source": str(p)}) yield Document(page_content=text, metadata={"source": str(p)})
def load(self) -> List[Document]:
"""Load documents."""
return list(self.lazy_load())
def _clean_data(self, data: str) -> str: def _clean_data(self, data: str) -> str:
from bs4 import BeautifulSoup from bs4 import BeautifulSoup

View File

@ -314,7 +314,3 @@ class RecursiveUrlLoader(BaseLoader):
return iter(results or []) return iter(results or [])
else: else:
return self._get_child_links_recursive(self.url, visited) return self._get_child_links_recursive(self.url, visited)
def load(self) -> List[Document]:
"""Load web pages."""
return list(self.lazy_load())

View File

@ -100,9 +100,6 @@ class RocksetLoader(BaseLoader):
# ignore # ignore
pass pass
def load(self) -> List[Document]:
return list(self.lazy_load())
def lazy_load(self) -> Iterator[Document]: def lazy_load(self) -> Iterator[Document]:
query_results = self.client.Queries.query( query_results = self.client.Queries.query(
sql=self.query sql=self.query

View File

@ -124,6 +124,3 @@ class RSpaceLoader(BaseLoader):
yield d yield d
else: else:
raise ValueError("Unknown global ID type") raise ValueError("Unknown global ID type")
def load(self) -> List[Document]:
return list(self.lazy_load())

View File

@ -54,7 +54,3 @@ class SharePointLoader(O365BaseLoader):
if self.object_ids: if self.object_ids:
for blob in self._load_from_object_ids(drive, self.object_ids): for blob in self._load_from_object_ids(drive, self.object_ids):
yield from blob_parser.lazy_parse(blob) yield from blob_parser.lazy_parse(blob)
def load(self) -> List[Document]:
"""Load all documents."""
return list(self.lazy_load())

View File

@ -122,7 +122,3 @@ class SnowflakeLoader(BaseLoader):
metadata = {k: v for k, v in row.items() if k in metadata_columns} metadata = {k: v for k, v in row.items() if k in metadata_columns}
doc = Document(page_content=page_content, metadata=metadata) doc = Document(page_content=page_content, metadata=metadata)
yield doc yield doc
def load(self) -> List[Document]:
"""Load data into document objects."""
return list(self.lazy_load())

View File

@ -104,9 +104,6 @@ class SQLDatabaseLoader(BaseLoader):
yield Document(page_content=page_content, metadata=metadata) yield Document(page_content=page_content, metadata=metadata)
def load(self) -> List[Document]:
return list(self.lazy_load())
@staticmethod @staticmethod
def page_content_default_mapper( def page_content_default_mapper(
row: sa.RowMapping, column_names: Optional[List[str]] = None row: sa.RowMapping, column_names: Optional[List[str]] = None

View File

@ -1,4 +1,4 @@
from typing import Any, Iterator, List from typing import Any, Iterator
from langchain_core.documents import Document from langchain_core.documents import Document
@ -19,9 +19,6 @@ class TencentCOSDirectoryLoader(BaseLoader):
self.bucket = bucket self.bucket = bucket
self.prefix = prefix self.prefix = prefix
def load(self) -> List[Document]:
return list(self.lazy_load())
def lazy_load(self) -> Iterator[Document]: def lazy_load(self) -> Iterator[Document]:
"""Load documents.""" """Load documents."""
try: try:

View File

@ -1,6 +1,6 @@
import os import os
import tempfile import tempfile
from typing import Any, Iterator, List from typing import Any, Iterator
from langchain_core.documents import Document from langchain_core.documents import Document
@ -21,9 +21,6 @@ class TencentCOSFileLoader(BaseLoader):
self.bucket = bucket self.bucket = bucket
self.key = key self.key = key
def load(self) -> List[Document]:
return list(self.lazy_load())
def lazy_load(self) -> Iterator[Document]: def lazy_load(self) -> Iterator[Document]:
"""Load documents.""" """Load documents."""
try: try:

View File

@ -1,4 +1,4 @@
from typing import Callable, Dict, Iterator, List, Optional from typing import Callable, Dict, Iterator, Optional
from langchain_core.documents import Document from langchain_core.documents import Document
@ -75,6 +75,3 @@ class TensorflowDatasetLoader(BaseLoader):
def lazy_load(self) -> Iterator[Document]: def lazy_load(self) -> Iterator[Document]:
yield from self._tfds_client.lazy_load() yield from self._tfds_client.lazy_load()
def load(self) -> List[Document]:
return list(self.lazy_load())

View File

@ -65,7 +65,3 @@ class TiDBLoader(BaseLoader):
) )
metadata = {col: row_data[col] for col in self.metadata_columns} metadata = {col: row_data[col] for col in self.metadata_columns}
yield Document(page_content=page_content, metadata=metadata) yield Document(page_content=page_content, metadata=metadata)
def load(self) -> List[Document]:
"""Load TiDB data into document objects."""
return list(self.lazy_load())

View File

@ -1,6 +1,6 @@
from __future__ import annotations from __future__ import annotations
from typing import Iterator, List from typing import Iterator
import requests import requests
from langchain_core.documents import Document from langchain_core.documents import Document
@ -28,7 +28,3 @@ class ToMarkdownLoader(BaseLoader):
text = response.json()["article"] text = response.json()["article"]
metadata = {"source": self.url} metadata = {"source": self.url}
yield Document(page_content=text, metadata=metadata) yield Document(page_content=text, metadata=metadata)
def load(self) -> List[Document]:
"""Load file."""
return list(self.lazy_load())

View File

@ -1,6 +1,6 @@
import json import json
from pathlib import Path from pathlib import Path
from typing import Iterator, List, Union from typing import Iterator, Union
from langchain_core.documents import Document from langchain_core.documents import Document
@ -18,10 +18,6 @@ class TomlLoader(BaseLoader):
"""Initialize the TomlLoader with a source file or directory.""" """Initialize the TomlLoader with a source file or directory."""
self.source = Path(source) self.source = Path(source)
def load(self) -> List[Document]:
"""Load and return all documents."""
return list(self.lazy_load())
def lazy_load(self) -> Iterator[Document]: def lazy_load(self) -> Iterator[Document]:
"""Lazily load the TOML documents from the source file or directory.""" """Lazily load the TOML documents from the source file or directory."""
import tomli import tomli

View File

@ -2,7 +2,7 @@
from __future__ import annotations from __future__ import annotations
from datetime import datetime from datetime import datetime
from typing import Iterator, List, Optional, Sequence from typing import Iterator, Optional, Sequence
from langchain_core.documents import Document from langchain_core.documents import Document
@ -43,9 +43,3 @@ class WeatherDataLoader(BaseLoader):
metadata = {"queried_at": datetime.now()} metadata = {"queried_at": datetime.now()}
content = self.client.run(place) content = self.client.run(place)
yield Document(page_content=content, metadata=metadata) yield Document(page_content=content, metadata=metadata)
def load(
self,
) -> List[Document]:
"""Load weather data for the given locations."""
return list(self.lazy_load())

View File

@ -251,10 +251,6 @@ class WebBaseLoader(BaseLoader):
metadata = _build_metadata(soup, path) metadata = _build_metadata(soup, path)
yield Document(page_content=text, metadata=metadata) yield Document(page_content=text, metadata=metadata)
def load(self) -> List[Document]:
"""Load text from the url(s) in web_path."""
return list(self.lazy_load())
def aload(self) -> List[Document]: def aload(self) -> List[Document]:
"""Load text from the urls in web_path async into Documents.""" """Load text from the urls in web_path async into Documents."""

View File

@ -36,10 +36,6 @@ class ToyLoader(BaseLoader):
) -> Iterator[Document]: ) -> Iterator[Document]:
yield from self.documents yield from self.documents
def load(self) -> List[Document]:
"""Load the documents from the source."""
return list(self.lazy_load())
async def alazy_load( async def alazy_load(
self, self,
) -> AsyncIterator[Document]: ) -> AsyncIterator[Document]: