mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-05 04:38:26 +00:00
community: Use default load() implementation in doc loaders (#18385)
Following https://github.com/langchain-ai/langchain/pull/18289
This commit is contained in:
parent
42341bc787
commit
177f51c7bd
@ -1,6 +1,6 @@
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Iterator, List
|
||||
from typing import Iterator
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
@ -74,6 +74,3 @@ class AcreomLoader(BaseLoader):
|
||||
}
|
||||
|
||||
yield Document(page_content=text, metadata=metadata)
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
return list(self.lazy_load())
|
||||
|
@ -1,4 +1,4 @@
|
||||
from typing import Any, Callable, Iterator, List, Mapping, Optional
|
||||
from typing import Any, Callable, Iterator, Mapping, Optional
|
||||
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.utils.utils import guard_import
|
||||
@ -53,9 +53,6 @@ class AirbyteCDKLoader(BaseLoader):
|
||||
self._stream_name = stream_name
|
||||
self._state = state
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
return list(self.lazy_load())
|
||||
|
||||
def lazy_load(self) -> Iterator[Document]:
|
||||
return self._integration._load_data(
|
||||
stream_name=self._stream_name, state=self._state
|
||||
|
@ -1,4 +1,4 @@
|
||||
from typing import Iterator, List
|
||||
from typing import Iterator
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
@ -34,7 +34,3 @@ class AirtableLoader(BaseLoader):
|
||||
"table_id": self.table_id,
|
||||
},
|
||||
)
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load Documents from table."""
|
||||
return list(self.lazy_load())
|
||||
|
@ -148,7 +148,3 @@ class ArcGISLoader(BaseLoader):
|
||||
)
|
||||
|
||||
yield Document(page_content=page_content, metadata=metadata)
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load all records from FeatureLayer."""
|
||||
return list(self.lazy_load())
|
||||
|
@ -76,9 +76,6 @@ class AstraDBLoader(BaseLoader):
|
||||
self.nb_prefetched = nb_prefetched
|
||||
self.extraction_function = extraction_function
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
return list(self.lazy_load())
|
||||
|
||||
def lazy_load(self) -> Iterator[Document]:
|
||||
for doc in self.collection.paginated_find(
|
||||
filter=self.filter,
|
||||
|
@ -157,7 +157,3 @@ class AthenaLoader(BaseLoader):
|
||||
}
|
||||
doc = Document(page_content=page_content, metadata=metadata)
|
||||
yield doc
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load data into document objects."""
|
||||
return list(self.lazy_load())
|
||||
|
@ -1,4 +1,4 @@
|
||||
from typing import Iterator, List, Optional
|
||||
from typing import Iterator, Optional
|
||||
|
||||
from langchain_community.docstore.document import Document
|
||||
from langchain_community.document_loaders.base import BaseLoader
|
||||
@ -16,10 +16,6 @@ class AzureAIDataLoader(BaseLoader):
|
||||
self.glob_pattern = glob
|
||||
"""Optional glob pattern to select files. Defaults to None."""
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load documents."""
|
||||
return list(self.lazy_load())
|
||||
|
||||
def lazy_load(self) -> Iterator[Document]:
|
||||
"""A lazy loader for Documents."""
|
||||
try:
|
||||
|
@ -1,4 +1,4 @@
|
||||
from typing import Any, Iterator, List
|
||||
from typing import Any, Iterator
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
@ -18,9 +18,6 @@ class BaiduBOSDirectoryLoader(BaseLoader):
|
||||
self.bucket = bucket
|
||||
self.prefix = prefix
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
return list(self.lazy_load())
|
||||
|
||||
def lazy_load(self) -> Iterator[Document]:
|
||||
"""Load documents."""
|
||||
try:
|
||||
|
@ -1,7 +1,7 @@
|
||||
import logging
|
||||
import os
|
||||
import tempfile
|
||||
from typing import Any, Iterator, List
|
||||
from typing import Any, Iterator
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
@ -24,9 +24,6 @@ class BaiduBOSFileLoader(BaseLoader):
|
||||
self.bucket = bucket
|
||||
self.key = key
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
return list(self.lazy_load())
|
||||
|
||||
def lazy_load(self) -> Iterator[Document]:
|
||||
"""Load documents."""
|
||||
try:
|
||||
|
@ -96,16 +96,3 @@ class BibtexLoader(BaseLoader):
|
||||
doc = self._load_entry(entry)
|
||||
if doc:
|
||||
yield doc
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load bibtex file documents from the given bibtex file path.
|
||||
|
||||
See https://bibtexparser.readthedocs.io/en/master/
|
||||
|
||||
Args:
|
||||
file_path: the path to the bibtex file
|
||||
|
||||
Returns:
|
||||
a list of documents with the document.page_content in text format
|
||||
"""
|
||||
return list(self.lazy_load())
|
||||
|
@ -61,7 +61,3 @@ class BrowserlessLoader(BaseLoader):
|
||||
"source": url,
|
||||
},
|
||||
)
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load Documents from URLs."""
|
||||
return list(self.lazy_load())
|
||||
|
@ -5,7 +5,6 @@ from typing import (
|
||||
Any,
|
||||
Callable,
|
||||
Iterator,
|
||||
List,
|
||||
Optional,
|
||||
Sequence,
|
||||
Union,
|
||||
@ -106,9 +105,6 @@ class CassandraLoader(BaseLoader):
|
||||
if query_execution_profile is not _NOT_SET:
|
||||
self.query_kwargs["execution_profile"] = query_execution_profile
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
return list(self.lazy_load())
|
||||
|
||||
def lazy_load(self) -> Iterator[Document]:
|
||||
for row in self.session.execute(self.query, **self.query_kwargs):
|
||||
metadata = self.metadata.copy()
|
||||
|
@ -78,14 +78,3 @@ class AsyncChromiumLoader(BaseLoader):
|
||||
html_content = asyncio.run(self.ascrape_playwright(url))
|
||||
metadata = {"source": url}
|
||||
yield Document(page_content=html_content, metadata=metadata)
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""
|
||||
Load and return all Documents from the provided URLs.
|
||||
|
||||
Returns:
|
||||
List[Document]: A list of Document objects
|
||||
containing the scraped content from each URL.
|
||||
|
||||
"""
|
||||
return list(self.lazy_load())
|
||||
|
@ -68,10 +68,6 @@ class CouchbaseLoader(BaseLoader):
|
||||
self.page_content_fields = page_content_fields
|
||||
self.metadata_fields = metadata_fields
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load Couchbase data into Document objects."""
|
||||
return list(self.lazy_load())
|
||||
|
||||
def lazy_load(self) -> Iterator[Document]:
|
||||
"""Load Couchbase data into Document objects lazily."""
|
||||
from datetime import timedelta
|
||||
|
@ -1,4 +1,4 @@
|
||||
from typing import Any, Iterator, List
|
||||
from typing import Any, Iterator
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
@ -26,10 +26,6 @@ class BaseDataFrameLoader(BaseLoader):
|
||||
metadata.pop(self.page_content_column)
|
||||
yield Document(page_content=text, metadata=metadata)
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load full dataframe."""
|
||||
return list(self.lazy_load())
|
||||
|
||||
|
||||
class DataFrameLoader(BaseDataFrameLoader):
|
||||
"""Load `Pandas` DataFrame."""
|
||||
|
@ -1,4 +1,4 @@
|
||||
from typing import Iterator, List, Optional
|
||||
from typing import Iterator, Optional
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
@ -77,10 +77,6 @@ class AzureAIDocumentIntelligenceLoader(BaseLoader):
|
||||
mode=mode,
|
||||
)
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load given path as pages."""
|
||||
return list(self.lazy_load())
|
||||
|
||||
def lazy_load(
|
||||
self,
|
||||
) -> Iterator[Document]:
|
||||
|
@ -71,10 +71,6 @@ class EtherscanLoader(BaseLoader):
|
||||
for doc in result:
|
||||
yield doc
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load transactions from spcifc account by Etherscan."""
|
||||
return list(self.lazy_load())
|
||||
|
||||
def getNormTx(self) -> List[Document]:
|
||||
url = (
|
||||
f"https://api.etherscan.io/api?module=account&action=txlist&address={self.account_address}"
|
||||
|
@ -1,4 +1,4 @@
|
||||
from typing import Iterator, List, Optional, Sequence
|
||||
from typing import Iterator, Optional, Sequence
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
@ -28,9 +28,6 @@ class FaunaLoader(BaseLoader):
|
||||
self.secret = secret
|
||||
self.metadata_fields = metadata_fields
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
return list(self.lazy_load())
|
||||
|
||||
def lazy_load(self) -> Iterator[Document]:
|
||||
try:
|
||||
from fauna import Page, fql
|
||||
|
@ -115,10 +115,6 @@ class GenericLoader(BaseLoader):
|
||||
for blob in self.blob_loader.yield_blobs():
|
||||
yield from self.blob_parser.lazy_parse(blob)
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load all documents."""
|
||||
return list(self.lazy_load())
|
||||
|
||||
def load_and_split(
|
||||
self, text_splitter: Optional[TextSplitter] = None
|
||||
) -> List[Document]:
|
||||
|
@ -1,4 +1,4 @@
|
||||
from typing import Any, Iterator, List
|
||||
from typing import Any, Iterator
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
@ -67,7 +67,3 @@ class GeoDataFrameLoader(BaseLoader):
|
||||
|
||||
# using WKT instead of str() to help GIS system interoperability
|
||||
yield Document(page_content=geom.wkt, metadata=metadata)
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load full dataframe."""
|
||||
return list(self.lazy_load())
|
||||
|
@ -127,32 +127,6 @@ class GitHubIssuesLoader(BaseGitHubLoader):
|
||||
else:
|
||||
url = None
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""
|
||||
Get issues of a GitHub repository.
|
||||
|
||||
Returns:
|
||||
A list of Documents with attributes:
|
||||
- page_content
|
||||
- metadata
|
||||
- url
|
||||
- title
|
||||
- creator
|
||||
- created_at
|
||||
- last_update_time
|
||||
- closed_time
|
||||
- number of comments
|
||||
- state
|
||||
- labels
|
||||
- assignee
|
||||
- assignees
|
||||
- milestone
|
||||
- locked
|
||||
- number
|
||||
- is_pull_request
|
||||
"""
|
||||
return list(self.lazy_load())
|
||||
|
||||
def parse_issue(self, issue: dict) -> Document:
|
||||
"""Create Document objects from a list of GitHub issues."""
|
||||
metadata = {
|
||||
|
@ -1,5 +1,5 @@
|
||||
import json
|
||||
from typing import Iterator, List, Mapping, Optional, Sequence, Union
|
||||
from typing import Iterator, Mapping, Optional, Sequence, Union
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
@ -84,10 +84,6 @@ class HuggingFaceDatasetLoader(BaseLoader):
|
||||
for row in dataset[key]
|
||||
)
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load documents."""
|
||||
return list(self.lazy_load())
|
||||
|
||||
def parse_obj(self, page_content: Union[str, object]) -> str:
|
||||
if isinstance(page_content, object):
|
||||
return json.dumps(page_content)
|
||||
|
@ -106,7 +106,3 @@ class HuggingFaceModelLoader(BaseLoader):
|
||||
page_content=readme_content,
|
||||
metadata=model,
|
||||
)
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load model information, including README content."""
|
||||
return list(self.lazy_load())
|
||||
|
@ -91,6 +91,3 @@ class JoplinLoader(BaseLoader):
|
||||
|
||||
def lazy_load(self) -> Iterator[Document]:
|
||||
yield from self._get_notes()
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
return list(self.lazy_load())
|
||||
|
@ -1,6 +1,6 @@
|
||||
import json
|
||||
import urllib.request
|
||||
from typing import Any, Iterator, List
|
||||
from typing import Any, Iterator
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
@ -46,7 +46,3 @@ class LarkSuiteDocLoader(BaseLoader):
|
||||
"title": metadata_json["data"]["document"]["title"],
|
||||
}
|
||||
yield Document(page_content=text, metadata=metadata)
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load LarkSuite (FeiShu) document."""
|
||||
return list(self.lazy_load())
|
||||
|
@ -1,6 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Iterator, List, Optional, Sequence
|
||||
from typing import Any, Iterator, Optional, Sequence
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
@ -78,6 +78,3 @@ class MaxComputeLoader(BaseLoader):
|
||||
else:
|
||||
metadata = {k: v for k, v in row.items() if k not in page_content_data}
|
||||
yield Document(page_content=page_content, metadata=metadata)
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
return list(self.lazy_load())
|
||||
|
@ -1,6 +1,6 @@
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Iterator, List, Optional, Sequence, Union
|
||||
from typing import Iterator, Optional, Sequence, Union
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
@ -87,11 +87,6 @@ class MWDumpLoader(BaseLoader):
|
||||
metadata = {"source": page.title}
|
||||
return Document(page_content=text, metadata=metadata)
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load from a file path."""
|
||||
|
||||
return [doc for doc in self.lazy_load()]
|
||||
|
||||
def lazy_load(
|
||||
self,
|
||||
) -> Iterator[Document]:
|
||||
|
@ -23,10 +23,6 @@ class MergedDataLoader(BaseLoader):
|
||||
for document in data:
|
||||
yield document
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load docs."""
|
||||
return list(self.lazy_load())
|
||||
|
||||
async def alazy_load(self) -> AsyncIterator[Document]:
|
||||
"""Lazy load docs from each individual loader."""
|
||||
for loader in self.loaders:
|
||||
|
@ -91,7 +91,3 @@ class OneDriveLoader(O365BaseLoader):
|
||||
if self.object_ids:
|
||||
for blob in self._load_from_object_ids(drive, self.object_ids):
|
||||
yield from blob_parser.lazy_parse(blob)
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load all documents."""
|
||||
return list(self.lazy_load())
|
||||
|
@ -109,18 +109,6 @@ class OneNoteLoader(BaseLoader, BaseModel):
|
||||
else:
|
||||
request_url = ""
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""
|
||||
Get pages from OneNote notebooks.
|
||||
|
||||
Returns:
|
||||
A list of Documents with attributes:
|
||||
- page_content
|
||||
- metadata
|
||||
- title
|
||||
"""
|
||||
return list(self.lazy_load())
|
||||
|
||||
def _get_page_content(self, page_id: str) -> str:
|
||||
"""Get page content from OneNote API"""
|
||||
request_url = self.onenote_api_base_url + f"/pages/{page_id}/content"
|
||||
|
@ -1,4 +1,4 @@
|
||||
from typing import Iterator, List
|
||||
from typing import Iterator
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
@ -37,8 +37,3 @@ class OpenCityDataLoader(BaseLoader):
|
||||
"source": self.city_id + "_" + self.dataset_id,
|
||||
},
|
||||
)
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load records."""
|
||||
|
||||
return list(self.lazy_load())
|
||||
|
@ -157,10 +157,6 @@ class PyPDFLoader(BasePDFLoader):
|
||||
super().__init__(file_path, headers=headers)
|
||||
self.parser = PyPDFParser(password=password, extract_images=extract_images)
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load given path as pages."""
|
||||
return list(self.lazy_load())
|
||||
|
||||
def lazy_load(
|
||||
self,
|
||||
) -> Iterator[Document]:
|
||||
|
@ -1,4 +1,4 @@
|
||||
from typing import Iterator, List, Optional
|
||||
from typing import Iterator, Optional
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
@ -32,9 +32,6 @@ class PubMedLoader(BaseLoader):
|
||||
top_k_results=load_max_docs,
|
||||
)
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
return list(self._client.lazy_load_docs(self.query))
|
||||
|
||||
def lazy_load(self) -> Iterator[Document]:
|
||||
for doc in self._client.lazy_load_docs(self.query):
|
||||
yield doc
|
||||
|
@ -84,10 +84,6 @@ class ReadTheDocsLoader(BaseLoader):
|
||||
text = self._clean_data(f.read())
|
||||
yield Document(page_content=text, metadata={"source": str(p)})
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load documents."""
|
||||
return list(self.lazy_load())
|
||||
|
||||
def _clean_data(self, data: str) -> str:
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
@ -314,7 +314,3 @@ class RecursiveUrlLoader(BaseLoader):
|
||||
return iter(results or [])
|
||||
else:
|
||||
return self._get_child_links_recursive(self.url, visited)
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load web pages."""
|
||||
return list(self.lazy_load())
|
||||
|
@ -100,9 +100,6 @@ class RocksetLoader(BaseLoader):
|
||||
# ignore
|
||||
pass
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
return list(self.lazy_load())
|
||||
|
||||
def lazy_load(self) -> Iterator[Document]:
|
||||
query_results = self.client.Queries.query(
|
||||
sql=self.query
|
||||
|
@ -124,6 +124,3 @@ class RSpaceLoader(BaseLoader):
|
||||
yield d
|
||||
else:
|
||||
raise ValueError("Unknown global ID type")
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
return list(self.lazy_load())
|
||||
|
@ -54,7 +54,3 @@ class SharePointLoader(O365BaseLoader):
|
||||
if self.object_ids:
|
||||
for blob in self._load_from_object_ids(drive, self.object_ids):
|
||||
yield from blob_parser.lazy_parse(blob)
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load all documents."""
|
||||
return list(self.lazy_load())
|
||||
|
@ -122,7 +122,3 @@ class SnowflakeLoader(BaseLoader):
|
||||
metadata = {k: v for k, v in row.items() if k in metadata_columns}
|
||||
doc = Document(page_content=page_content, metadata=metadata)
|
||||
yield doc
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load data into document objects."""
|
||||
return list(self.lazy_load())
|
||||
|
@ -104,9 +104,6 @@ class SQLDatabaseLoader(BaseLoader):
|
||||
|
||||
yield Document(page_content=page_content, metadata=metadata)
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
return list(self.lazy_load())
|
||||
|
||||
@staticmethod
|
||||
def page_content_default_mapper(
|
||||
row: sa.RowMapping, column_names: Optional[List[str]] = None
|
||||
|
@ -1,4 +1,4 @@
|
||||
from typing import Any, Iterator, List
|
||||
from typing import Any, Iterator
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
@ -19,9 +19,6 @@ class TencentCOSDirectoryLoader(BaseLoader):
|
||||
self.bucket = bucket
|
||||
self.prefix = prefix
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
return list(self.lazy_load())
|
||||
|
||||
def lazy_load(self) -> Iterator[Document]:
|
||||
"""Load documents."""
|
||||
try:
|
||||
|
@ -1,6 +1,6 @@
|
||||
import os
|
||||
import tempfile
|
||||
from typing import Any, Iterator, List
|
||||
from typing import Any, Iterator
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
@ -21,9 +21,6 @@ class TencentCOSFileLoader(BaseLoader):
|
||||
self.bucket = bucket
|
||||
self.key = key
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
return list(self.lazy_load())
|
||||
|
||||
def lazy_load(self) -> Iterator[Document]:
|
||||
"""Load documents."""
|
||||
try:
|
||||
|
@ -1,4 +1,4 @@
|
||||
from typing import Callable, Dict, Iterator, List, Optional
|
||||
from typing import Callable, Dict, Iterator, Optional
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
@ -75,6 +75,3 @@ class TensorflowDatasetLoader(BaseLoader):
|
||||
|
||||
def lazy_load(self) -> Iterator[Document]:
|
||||
yield from self._tfds_client.lazy_load()
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
return list(self.lazy_load())
|
||||
|
@ -65,7 +65,3 @@ class TiDBLoader(BaseLoader):
|
||||
)
|
||||
metadata = {col: row_data[col] for col in self.metadata_columns}
|
||||
yield Document(page_content=page_content, metadata=metadata)
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load TiDB data into document objects."""
|
||||
return list(self.lazy_load())
|
||||
|
@ -1,6 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Iterator, List
|
||||
from typing import Iterator
|
||||
|
||||
import requests
|
||||
from langchain_core.documents import Document
|
||||
@ -28,7 +28,3 @@ class ToMarkdownLoader(BaseLoader):
|
||||
text = response.json()["article"]
|
||||
metadata = {"source": self.url}
|
||||
yield Document(page_content=text, metadata=metadata)
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load file."""
|
||||
return list(self.lazy_load())
|
||||
|
@ -1,6 +1,6 @@
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Iterator, List, Union
|
||||
from typing import Iterator, Union
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
@ -18,10 +18,6 @@ class TomlLoader(BaseLoader):
|
||||
"""Initialize the TomlLoader with a source file or directory."""
|
||||
self.source = Path(source)
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load and return all documents."""
|
||||
return list(self.lazy_load())
|
||||
|
||||
def lazy_load(self) -> Iterator[Document]:
|
||||
"""Lazily load the TOML documents from the source file or directory."""
|
||||
import tomli
|
||||
|
@ -2,7 +2,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Iterator, List, Optional, Sequence
|
||||
from typing import Iterator, Optional, Sequence
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
@ -43,9 +43,3 @@ class WeatherDataLoader(BaseLoader):
|
||||
metadata = {"queried_at": datetime.now()}
|
||||
content = self.client.run(place)
|
||||
yield Document(page_content=content, metadata=metadata)
|
||||
|
||||
def load(
|
||||
self,
|
||||
) -> List[Document]:
|
||||
"""Load weather data for the given locations."""
|
||||
return list(self.lazy_load())
|
||||
|
@ -251,10 +251,6 @@ class WebBaseLoader(BaseLoader):
|
||||
metadata = _build_metadata(soup, path)
|
||||
yield Document(page_content=text, metadata=metadata)
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load text from the url(s) in web_path."""
|
||||
return list(self.lazy_load())
|
||||
|
||||
def aload(self) -> List[Document]:
|
||||
"""Load text from the urls in web_path async into Documents."""
|
||||
|
||||
|
@ -36,10 +36,6 @@ class ToyLoader(BaseLoader):
|
||||
) -> Iterator[Document]:
|
||||
yield from self.documents
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load the documents from the source."""
|
||||
return list(self.lazy_load())
|
||||
|
||||
async def alazy_load(
|
||||
self,
|
||||
) -> AsyncIterator[Document]:
|
||||
|
Loading…
Reference in New Issue
Block a user