mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-17 00:17:47 +00:00
docstrings document_loaders
1 (#6847)
- Updated docstrings in `document_loaders` - several code fixes. - added `docs/extras/ecosystem/integrations/airtable.md` @rlancemartin, @eyurtsev
This commit is contained in:
parent
e41b382e1c
commit
77ae8084a0
28
docs/extras/ecosystem/integrations/airtable.md
Normal file
28
docs/extras/ecosystem/integrations/airtable.md
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
# Airtable
|
||||||
|
|
||||||
|
>[Airtable](https://en.wikipedia.org/wiki/Airtable) is a cloud collaboration service.
|
||||||
|
`Airtable` is a spreadsheet-database hybrid, with the features of a database but applied to a spreadsheet.
|
||||||
|
> The fields in an Airtable table are similar to cells in a spreadsheet, but have types such as 'checkbox',
|
||||||
|
> 'phone number', and 'drop-down list', and can reference file attachments like images.
|
||||||
|
|
||||||
|
>Users can create a database, set up column types, add records, link tables to one another, collaborate, sort records
|
||||||
|
> and publish views to external websites.
|
||||||
|
|
||||||
|
## Installation and Setup
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install pyairtable
|
||||||
|
```
|
||||||
|
|
||||||
|
* Get your [API key](https://support.airtable.com/docs/creating-and-using-api-keys-and-access-tokens).
|
||||||
|
* Get the [ID of your base](https://airtable.com/developers/web/api/introduction).
|
||||||
|
* Get the [table ID from the table url](https://www.highviewapps.com/kb/where-can-i-find-the-airtable-base-id-and-table-id/#:~:text=Both%20the%20Airtable%20Base%20ID,URL%20that%20begins%20with%20tbl).
|
||||||
|
|
||||||
|
## Document Loader
|
||||||
|
|
||||||
|
|
||||||
|
```python
|
||||||
|
from langchain.document_loaders import AirtableLoader
|
||||||
|
```
|
||||||
|
|
||||||
|
See an [example](/docs/modules/data_connection/document_loaders/integrations/airtable.html).
|
@ -134,7 +134,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.9.16"
|
"version": "3.10.6"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
@ -145,10 +145,10 @@ from langchain.document_loaders.youtube import (
|
|||||||
YoutubeLoader,
|
YoutubeLoader,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Legacy: only for backwards compat. Use PyPDFLoader instead
|
# Legacy: only for backwards compatibility. Use PyPDFLoader instead
|
||||||
PagedPDFSplitter = PyPDFLoader
|
PagedPDFSplitter = PyPDFLoader
|
||||||
|
|
||||||
# For backwards compatability
|
# For backwards compatibility
|
||||||
TelegramChatLoader = TelegramChatFileLoader
|
TelegramChatLoader = TelegramChatFileLoader
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
|
@ -8,15 +8,20 @@ from langchain.document_loaders.base import BaseLoader
|
|||||||
|
|
||||||
|
|
||||||
class AcreomLoader(BaseLoader):
|
class AcreomLoader(BaseLoader):
|
||||||
|
"""Loader that loads acreom vault from a directory."""
|
||||||
|
|
||||||
FRONT_MATTER_REGEX = re.compile(r"^---\n(.*?)\n---\n", re.MULTILINE | re.DOTALL)
|
FRONT_MATTER_REGEX = re.compile(r"^---\n(.*?)\n---\n", re.MULTILINE | re.DOTALL)
|
||||||
|
"""Regex to match front matter metadata in markdown files."""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self, path: str, encoding: str = "UTF-8", collect_metadata: bool = True
|
self, path: str, encoding: str = "UTF-8", collect_metadata: bool = True
|
||||||
):
|
):
|
||||||
"""Initialize with path."""
|
|
||||||
self.file_path = path
|
self.file_path = path
|
||||||
|
"""Path to the directory containing the markdown files."""
|
||||||
self.encoding = encoding
|
self.encoding = encoding
|
||||||
|
"""Encoding to use when reading the files."""
|
||||||
self.collect_metadata = collect_metadata
|
self.collect_metadata = collect_metadata
|
||||||
|
"""Whether to collect metadata from the front matter."""
|
||||||
|
|
||||||
def _parse_front_matter(self, content: str) -> dict:
|
def _parse_front_matter(self, content: str) -> dict:
|
||||||
"""Parse front matter metadata from the content and return it as a dict."""
|
"""Parse front matter metadata from the content and return it as a dict."""
|
||||||
|
@ -11,11 +11,11 @@ class AirbyteJSONLoader(BaseLoader):
|
|||||||
"""Loader that loads local airbyte json files."""
|
"""Loader that loads local airbyte json files."""
|
||||||
|
|
||||||
def __init__(self, file_path: str):
|
def __init__(self, file_path: str):
|
||||||
"""Initialize with file path. This should start with '/tmp/airbyte_local/'."""
|
"""Initialize with a file path. This should start with '/tmp/airbyte_local/'."""
|
||||||
self.file_path = file_path
|
self.file_path = file_path
|
||||||
|
"""Path to the directory containing the json files."""
|
||||||
|
|
||||||
def load(self) -> List[Document]:
|
def load(self) -> List[Document]:
|
||||||
"""Load file."""
|
|
||||||
text = ""
|
text = ""
|
||||||
for line in open(self.file_path, "r"):
|
for line in open(self.file_path, "r"):
|
||||||
data = json.loads(line)["_airbyte_data"]
|
data = json.loads(line)["_airbyte_data"]
|
||||||
|
@ -10,11 +10,14 @@ class AirtableLoader(BaseLoader):
|
|||||||
def __init__(self, api_token: str, table_id: str, base_id: str):
|
def __init__(self, api_token: str, table_id: str, base_id: str):
|
||||||
"""Initialize with API token and the IDs for table and base"""
|
"""Initialize with API token and the IDs for table and base"""
|
||||||
self.api_token = api_token
|
self.api_token = api_token
|
||||||
|
"""Airtable API token."""
|
||||||
self.table_id = table_id
|
self.table_id = table_id
|
||||||
|
"""Airtable table ID."""
|
||||||
self.base_id = base_id
|
self.base_id = base_id
|
||||||
|
"""Airtable base ID."""
|
||||||
|
|
||||||
def lazy_load(self) -> Iterator[Document]:
|
def lazy_load(self) -> Iterator[Document]:
|
||||||
"""Lazy load records from table."""
|
"""Lazy load Documents from table."""
|
||||||
|
|
||||||
from pyairtable import Table
|
from pyairtable import Table
|
||||||
|
|
||||||
@ -32,5 +35,5 @@ class AirtableLoader(BaseLoader):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def load(self) -> List[Document]:
|
def load(self) -> List[Document]:
|
||||||
"""Load Table."""
|
"""Load Documents from table."""
|
||||||
return list(self.lazy_load())
|
return list(self.lazy_load())
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
"""Logic for loading documents from Apify datasets."""
|
|
||||||
from typing import Any, Callable, Dict, List
|
from typing import Any, Callable, Dict, List
|
||||||
|
|
||||||
from pydantic import BaseModel, root_validator
|
from pydantic import BaseModel, root_validator
|
||||||
@ -8,9 +7,10 @@ from langchain.document_loaders.base import BaseLoader
|
|||||||
|
|
||||||
|
|
||||||
class ApifyDatasetLoader(BaseLoader, BaseModel):
|
class ApifyDatasetLoader(BaseLoader, BaseModel):
|
||||||
"""Logic for loading documents from Apify datasets."""
|
"""Loading Documents from Apify datasets."""
|
||||||
|
|
||||||
apify_client: Any
|
apify_client: Any
|
||||||
|
"""An instance of the ApifyClient class from the apify-client Python package."""
|
||||||
dataset_id: str
|
dataset_id: str
|
||||||
"""The ID of the dataset on the Apify platform."""
|
"""The ID of the dataset on the Apify platform."""
|
||||||
dataset_mapping_function: Callable[[Dict], Document]
|
dataset_mapping_function: Callable[[Dict], Document]
|
||||||
@ -34,7 +34,11 @@ class ApifyDatasetLoader(BaseLoader, BaseModel):
|
|||||||
|
|
||||||
@root_validator()
|
@root_validator()
|
||||||
def validate_environment(cls, values: Dict) -> Dict:
|
def validate_environment(cls, values: Dict) -> Dict:
|
||||||
"""Validate environment."""
|
"""Validate environment.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
values: The values to validate.
|
||||||
|
"""
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from apify_client import ApifyClient
|
from apify_client import ApifyClient
|
||||||
|
@ -19,8 +19,11 @@ class ArxivLoader(BaseLoader):
|
|||||||
load_all_available_meta: Optional[bool] = False,
|
load_all_available_meta: Optional[bool] = False,
|
||||||
):
|
):
|
||||||
self.query = query
|
self.query = query
|
||||||
|
"""The query to be passed to the arxiv.org API."""
|
||||||
self.load_max_docs = load_max_docs
|
self.load_max_docs = load_max_docs
|
||||||
|
"""The maximum number of documents to load."""
|
||||||
self.load_all_available_meta = load_all_available_meta
|
self.load_all_available_meta = load_all_available_meta
|
||||||
|
"""Whether to load all available metadata."""
|
||||||
|
|
||||||
def load(self) -> List[Document]:
|
def load(self) -> List[Document]:
|
||||||
arxiv_client = ArxivAPIWrapper(
|
arxiv_client = ArxivAPIWrapper(
|
||||||
|
@ -9,7 +9,7 @@ class AZLyricsLoader(WebBaseLoader):
|
|||||||
"""Loader that loads AZLyrics webpages."""
|
"""Loader that loads AZLyrics webpages."""
|
||||||
|
|
||||||
def load(self) -> List[Document]:
|
def load(self) -> List[Document]:
|
||||||
"""Load webpage."""
|
"""Load webpages into Documents."""
|
||||||
soup = self.scrape()
|
soup = self.scrape()
|
||||||
title = soup.title.text
|
title = soup.title.text
|
||||||
lyrics = soup.find_all("div", {"class": ""})[2].text
|
lyrics = soup.find_all("div", {"class": ""})[2].text
|
||||||
|
@ -9,20 +9,23 @@ from langchain.document_loaders.base import BaseLoader
|
|||||||
|
|
||||||
|
|
||||||
class AzureBlobStorageContainerLoader(BaseLoader):
|
class AzureBlobStorageContainerLoader(BaseLoader):
|
||||||
"""Loading logic for loading documents from Azure Blob Storage."""
|
"""Loading Documents from Azure Blob Storage."""
|
||||||
|
|
||||||
def __init__(self, conn_str: str, container: str, prefix: str = ""):
|
def __init__(self, conn_str: str, container: str, prefix: str = ""):
|
||||||
"""Initialize with connection string, container and blob prefix."""
|
"""Initialize with connection string, container and blob prefix."""
|
||||||
self.conn_str = conn_str
|
self.conn_str = conn_str
|
||||||
|
"""Connection string for Azure Blob Storage."""
|
||||||
self.container = container
|
self.container = container
|
||||||
|
"""Container name."""
|
||||||
self.prefix = prefix
|
self.prefix = prefix
|
||||||
|
"""Prefix for blob names."""
|
||||||
|
|
||||||
def load(self) -> List[Document]:
|
def load(self) -> List[Document]:
|
||||||
"""Load documents."""
|
"""Load documents."""
|
||||||
try:
|
try:
|
||||||
from azure.storage.blob import ContainerClient
|
from azure.storage.blob import ContainerClient
|
||||||
except ImportError as exc:
|
except ImportError as exc:
|
||||||
raise ValueError(
|
raise ImportError(
|
||||||
"Could not import azure storage blob python package. "
|
"Could not import azure storage blob python package. "
|
||||||
"Please install it with `pip install azure-storage-blob`."
|
"Please install it with `pip install azure-storage-blob`."
|
||||||
) from exc
|
) from exc
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
"""Loading logic for loading documents from an Azure Blob Storage file."""
|
|
||||||
import os
|
import os
|
||||||
import tempfile
|
import tempfile
|
||||||
from typing import List
|
from typing import List
|
||||||
@ -9,20 +8,23 @@ from langchain.document_loaders.unstructured import UnstructuredFileLoader
|
|||||||
|
|
||||||
|
|
||||||
class AzureBlobStorageFileLoader(BaseLoader):
|
class AzureBlobStorageFileLoader(BaseLoader):
|
||||||
"""Loading logic for loading documents from Azure Blob Storage."""
|
"""Loading Documents from Azure Blob Storage."""
|
||||||
|
|
||||||
def __init__(self, conn_str: str, container: str, blob_name: str):
|
def __init__(self, conn_str: str, container: str, blob_name: str):
|
||||||
"""Initialize with connection string, container and blob name."""
|
"""Initialize with connection string, container and blob name."""
|
||||||
self.conn_str = conn_str
|
self.conn_str = conn_str
|
||||||
|
"""Connection string for Azure Blob Storage."""
|
||||||
self.container = container
|
self.container = container
|
||||||
|
"""Container name."""
|
||||||
self.blob = blob_name
|
self.blob = blob_name
|
||||||
|
"""Blob name."""
|
||||||
|
|
||||||
def load(self) -> List[Document]:
|
def load(self) -> List[Document]:
|
||||||
"""Load documents."""
|
"""Load documents."""
|
||||||
try:
|
try:
|
||||||
from azure.storage.blob import BlobClient
|
from azure.storage.blob import BlobClient
|
||||||
except ImportError as exc:
|
except ImportError as exc:
|
||||||
raise ValueError(
|
raise ImportError(
|
||||||
"Could not import azure storage blob python package. "
|
"Could not import azure storage blob python package. "
|
||||||
"Please install it with `pip install azure-storage-blob`."
|
"Please install it with `pip install azure-storage-blob`."
|
||||||
) from exc
|
) from exc
|
||||||
|
@ -8,10 +8,10 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter, TextSplitter
|
|||||||
|
|
||||||
|
|
||||||
class BaseLoader(ABC):
|
class BaseLoader(ABC):
|
||||||
"""Interface for loading documents.
|
"""Interface for loading Documents.
|
||||||
|
|
||||||
Implementations should implement the lazy-loading method using generators
|
Implementations should implement the lazy-loading method using generators
|
||||||
to avoid loading all documents into memory at once.
|
to avoid loading all Documents into memory at once.
|
||||||
|
|
||||||
The `load` method will remain as is for backwards compatibility, but its
|
The `load` method will remain as is for backwards compatibility, but its
|
||||||
implementation should be just `list(self.lazy_load())`.
|
implementation should be just `list(self.lazy_load())`.
|
||||||
@ -22,12 +22,20 @@ class BaseLoader(ABC):
|
|||||||
# This method returns a List which is materialized in memory.
|
# This method returns a List which is materialized in memory.
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def load(self) -> List[Document]:
|
def load(self) -> List[Document]:
|
||||||
"""Load data into document objects."""
|
"""Load data into Document objects."""
|
||||||
|
|
||||||
def load_and_split(
|
def load_and_split(
|
||||||
self, text_splitter: Optional[TextSplitter] = None
|
self, text_splitter: Optional[TextSplitter] = None
|
||||||
) -> List[Document]:
|
) -> List[Document]:
|
||||||
"""Load documents and split into chunks."""
|
"""Load Documents and split into chunks. Chunks are returned as Documents.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text_splitter: TextSplitter instance to use for splitting documents.
|
||||||
|
Defaults to RecursiveCharacterTextSplitter.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of Documents.
|
||||||
|
"""
|
||||||
if text_splitter is None:
|
if text_splitter is None:
|
||||||
_text_splitter: TextSplitter = RecursiveCharacterTextSplitter()
|
_text_splitter: TextSplitter = RecursiveCharacterTextSplitter()
|
||||||
else:
|
else:
|
||||||
@ -40,7 +48,7 @@ class BaseLoader(ABC):
|
|||||||
def lazy_load(
|
def lazy_load(
|
||||||
self,
|
self,
|
||||||
) -> Iterator[Document]:
|
) -> Iterator[Document]:
|
||||||
"""A lazy loader for document content."""
|
"""A lazy loader for Documents."""
|
||||||
raise NotImplementedError(
|
raise NotImplementedError(
|
||||||
f"{self.__class__.__name__} does not implement lazy_load()"
|
f"{self.__class__.__name__} does not implement lazy_load()"
|
||||||
)
|
)
|
||||||
@ -49,7 +57,7 @@ class BaseLoader(ABC):
|
|||||||
class BaseBlobParser(ABC):
|
class BaseBlobParser(ABC):
|
||||||
"""Abstract interface for blob parsers.
|
"""Abstract interface for blob parsers.
|
||||||
|
|
||||||
A blob parser is provides a way to parse raw data stored in a blob into one
|
A blob parser provides a way to parse raw data stored in a blob into one
|
||||||
or more documents.
|
or more documents.
|
||||||
|
|
||||||
The parser can be composed with blob loaders, making it easy to re-use
|
The parser can be composed with blob loaders, making it easy to re-use
|
||||||
|
@ -34,8 +34,12 @@ class BibtexLoader(BaseLoader):
|
|||||||
|
|
||||||
Args:
|
Args:
|
||||||
file_path: Path to the bibtex file.
|
file_path: Path to the bibtex file.
|
||||||
|
parser: The parser to use. If None, a default parser is used.
|
||||||
max_docs: Max number of associated documents to load. Use -1 means
|
max_docs: Max number of associated documents to load. Use -1 means
|
||||||
no limit.
|
no limit.
|
||||||
|
max_content_chars: Maximum number of characters to load from the PDF.
|
||||||
|
load_extra_metadata: Whether to load extra metadata from the PDF.
|
||||||
|
file_pattern: Regex pattern to match the file name in the bibtex.
|
||||||
"""
|
"""
|
||||||
self.file_path = file_path
|
self.file_path = file_path
|
||||||
self.parser = parser or BibtexparserWrapper()
|
self.parser = parser or BibtexparserWrapper()
|
||||||
@ -70,9 +74,7 @@ class BibtexLoader(BaseLoader):
|
|||||||
|
|
||||||
def lazy_load(self) -> Iterator[Document]:
|
def lazy_load(self) -> Iterator[Document]:
|
||||||
"""Load bibtex file using bibtexparser and get the article texts plus the
|
"""Load bibtex file using bibtexparser and get the article texts plus the
|
||||||
|
|
||||||
article metadata.
|
article metadata.
|
||||||
|
|
||||||
See https://bibtexparser.readthedocs.io/en/master/
|
See https://bibtexparser.readthedocs.io/en/master/
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
|
@ -37,7 +37,7 @@ class BigQueryLoader(BaseLoader):
|
|||||||
metadata_columns: Optional. The columns to write into the `metadata` of the
|
metadata_columns: Optional. The columns to write into the `metadata` of the
|
||||||
document.
|
document.
|
||||||
credentials : google.auth.credentials.Credentials, optional
|
credentials : google.auth.credentials.Credentials, optional
|
||||||
Credentials for accessing Google APIs. Use this parameter to override
|
Credentials for accessing Google APIs. Use this parameter to override
|
||||||
default credentials, such as to use Compute Engine
|
default credentials, such as to use Compute Engine
|
||||||
(`google.auth.compute_engine.Credentials`) or Service Account
|
(`google.auth.compute_engine.Credentials`) or Service Account
|
||||||
(`google.oauth2.service_account.Credentials`) credentials directly.
|
(`google.oauth2.service_account.Credentials`) credentials directly.
|
||||||
@ -52,7 +52,7 @@ class BigQueryLoader(BaseLoader):
|
|||||||
try:
|
try:
|
||||||
from google.cloud import bigquery
|
from google.cloud import bigquery
|
||||||
except ImportError as ex:
|
except ImportError as ex:
|
||||||
raise ValueError(
|
raise ImportError(
|
||||||
"Could not import google-cloud-bigquery python package. "
|
"Could not import google-cloud-bigquery python package. "
|
||||||
"Please install it with `pip install google-cloud-bigquery`."
|
"Please install it with `pip install google-cloud-bigquery`."
|
||||||
) from ex
|
) from ex
|
||||||
|
@ -13,11 +13,15 @@ class BiliBiliLoader(BaseLoader):
|
|||||||
"""Loader that loads bilibili transcripts."""
|
"""Loader that loads bilibili transcripts."""
|
||||||
|
|
||||||
def __init__(self, video_urls: List[str]):
|
def __init__(self, video_urls: List[str]):
|
||||||
"""Initialize with bilibili url."""
|
"""Initialize with bilibili url.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
video_urls: List of bilibili urls.
|
||||||
|
"""
|
||||||
self.video_urls = video_urls
|
self.video_urls = video_urls
|
||||||
|
|
||||||
def load(self) -> List[Document]:
|
def load(self) -> List[Document]:
|
||||||
"""Load from bilibili url."""
|
"""Load Documents from bilibili url."""
|
||||||
results = []
|
results = []
|
||||||
for url in self.video_urls:
|
for url in self.video_urls:
|
||||||
transcript, video_info = self._get_bilibili_subs_and_info(url)
|
transcript, video_info = self._get_bilibili_subs_and_info(url)
|
||||||
@ -30,7 +34,7 @@ class BiliBiliLoader(BaseLoader):
|
|||||||
try:
|
try:
|
||||||
from bilibili_api import sync, video
|
from bilibili_api import sync, video
|
||||||
except ImportError:
|
except ImportError:
|
||||||
raise ValueError(
|
raise ImportError(
|
||||||
"requests package not found, please install it with "
|
"requests package not found, please install it with "
|
||||||
"`pip install bilibili-api-python`"
|
"`pip install bilibili-api-python`"
|
||||||
)
|
)
|
||||||
|
@ -12,7 +12,7 @@ from langchain.document_loaders.web_base import WebBaseLoader
|
|||||||
|
|
||||||
|
|
||||||
class BlackboardLoader(WebBaseLoader):
|
class BlackboardLoader(WebBaseLoader):
|
||||||
"""Loader that loads all documents from a Blackboard course.
|
"""Loads all documents from a Blackboard course.
|
||||||
|
|
||||||
This loader is not compatible with all Blackboard courses. It is only
|
This loader is not compatible with all Blackboard courses. It is only
|
||||||
compatible with courses that use the new Blackboard interface.
|
compatible with courses that use the new Blackboard interface.
|
||||||
@ -34,8 +34,11 @@ class BlackboardLoader(WebBaseLoader):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
base_url: str
|
base_url: str
|
||||||
|
"""Base url of the blackboard course."""
|
||||||
folder_path: str
|
folder_path: str
|
||||||
|
"""Path to the folder containing the documents."""
|
||||||
load_all_recursively: bool
|
load_all_recursively: bool
|
||||||
|
"""If True, load all documents recursively."""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
@ -64,7 +67,7 @@ class BlackboardLoader(WebBaseLoader):
|
|||||||
try:
|
try:
|
||||||
self.base_url = blackboard_course_url.split("/webapps/blackboard")[0]
|
self.base_url = blackboard_course_url.split("/webapps/blackboard")[0]
|
||||||
except IndexError:
|
except IndexError:
|
||||||
raise ValueError(
|
raise IndexError(
|
||||||
"Invalid blackboard course url. "
|
"Invalid blackboard course url. "
|
||||||
"Please provide a url that starts with "
|
"Please provide a url that starts with "
|
||||||
"https://<blackboard_url>/webapps/blackboard"
|
"https://<blackboard_url>/webapps/blackboard"
|
||||||
@ -94,10 +97,10 @@ class BlackboardLoader(WebBaseLoader):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def load(self) -> List[Document]:
|
def load(self) -> List[Document]:
|
||||||
"""Load data into document objects.
|
"""Load data into Document objects.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of documents.
|
List of Documents.
|
||||||
"""
|
"""
|
||||||
if self.load_all_recursively:
|
if self.load_all_recursively:
|
||||||
soup_info = self.scrape()
|
soup_info = self.scrape()
|
||||||
@ -118,7 +121,7 @@ class BlackboardLoader(WebBaseLoader):
|
|||||||
return self._get_documents(soup_info)
|
return self._get_documents(soup_info)
|
||||||
|
|
||||||
def _get_folder_path(self, soup: Any) -> str:
|
def _get_folder_path(self, soup: Any) -> str:
|
||||||
"""Get the folder path to save the documents in.
|
"""Get the folder path to save the Documents in.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
soup: BeautifulSoup4 soup object.
|
soup: BeautifulSoup4 soup object.
|
||||||
@ -229,7 +232,7 @@ class BlackboardLoader(WebBaseLoader):
|
|||||||
return relative_paths
|
return relative_paths
|
||||||
|
|
||||||
def download(self, path: str) -> None:
|
def download(self, path: str) -> None:
|
||||||
"""Download a file from a url.
|
"""Download a file from an url.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
path: Path to the file.
|
path: Path to the file.
|
||||||
@ -243,7 +246,7 @@ class BlackboardLoader(WebBaseLoader):
|
|||||||
f.write(response.content)
|
f.write(response.content)
|
||||||
|
|
||||||
def parse_filename(self, url: str) -> str:
|
def parse_filename(self, url: str) -> str:
|
||||||
"""Parse the filename from a url.
|
"""Parse the filename from an url.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
url: Url to parse the filename from.
|
url: Url to parse the filename from.
|
||||||
@ -257,7 +260,7 @@ class BlackboardLoader(WebBaseLoader):
|
|||||||
return self._parse_filename_from_url(url)
|
return self._parse_filename_from_url(url)
|
||||||
|
|
||||||
def _parse_filename_from_url(self, url: str) -> str:
|
def _parse_filename_from_url(self, url: str) -> str:
|
||||||
"""Parse the filename from a url.
|
"""Parse the filename from an url.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
url: Url to parse the filename from.
|
url: Url to parse the filename from.
|
||||||
|
@ -55,6 +55,16 @@ class BlockchainDocumentLoader(BaseLoader):
|
|||||||
get_all_tokens: bool = False,
|
get_all_tokens: bool = False,
|
||||||
max_execution_time: Optional[int] = None,
|
max_execution_time: Optional[int] = None,
|
||||||
):
|
):
|
||||||
|
"""
|
||||||
|
|
||||||
|
Args:
|
||||||
|
contract_address: The address of the smart contract.
|
||||||
|
blockchainType: The blockchain type.
|
||||||
|
api_key: The Alchemy API key.
|
||||||
|
startToken: The start token for pagination.
|
||||||
|
get_all_tokens: Whether to get all tokens on the contract.
|
||||||
|
max_execution_time: The maximum execution time (sec).
|
||||||
|
"""
|
||||||
self.contract_address = contract_address
|
self.contract_address = contract_address
|
||||||
self.blockchainType = blockchainType.value
|
self.blockchainType = blockchainType.value
|
||||||
self.api_key = os.environ.get("ALCHEMY_API_KEY") or api_key
|
self.api_key = os.environ.get("ALCHEMY_API_KEY") or api_key
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
"""Load conversations from ChatGPT data export"""
|
|
||||||
import datetime
|
import datetime
|
||||||
import json
|
import json
|
||||||
from typing import List
|
from typing import List
|
||||||
@ -29,9 +28,15 @@ def concatenate_rows(message: dict, title: str) -> str:
|
|||||||
|
|
||||||
|
|
||||||
class ChatGPTLoader(BaseLoader):
|
class ChatGPTLoader(BaseLoader):
|
||||||
"""Loader that loads conversations from exported ChatGPT data."""
|
"""Load conversations from exported ChatGPT data."""
|
||||||
|
|
||||||
def __init__(self, log_file: str, num_logs: int = -1):
|
def __init__(self, log_file: str, num_logs: int = -1):
|
||||||
|
"""
|
||||||
|
|
||||||
|
Args:
|
||||||
|
log_file: Path to the log file
|
||||||
|
num_logs: Number of logs to load. If 0, load all logs.
|
||||||
|
"""
|
||||||
self.log_file = log_file
|
self.log_file = log_file
|
||||||
self.num_logs = num_logs
|
self.num_logs = num_logs
|
||||||
|
|
||||||
|
@ -284,9 +284,7 @@
|
|||||||
" error=False, # Only runs that succeed\n",
|
" error=False, # Only runs that succeed\n",
|
||||||
")\n",
|
")\n",
|
||||||
"for run in runs:\n",
|
"for run in runs:\n",
|
||||||
" client.create_example(\n",
|
" client.create_example(inputs=run.inputs, outputs=run.outputs, dataset_id=dataset.id)"
|
||||||
" inputs=run.inputs, outputs=run.outputs, dataset_id=dataset.id\n",
|
|
||||||
" )"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -333,7 +331,7 @@
|
|||||||
"eval_llm = ChatOpenAI(model=\"gpt-4\", temperature=0)\n",
|
"eval_llm = ChatOpenAI(model=\"gpt-4\", temperature=0)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Measures accuracy against ground truth\n",
|
"# Measures accuracy against ground truth\n",
|
||||||
"qa_evaluator = get_qa_evaluator(eval_llm) \n",
|
"qa_evaluator = get_qa_evaluator(eval_llm)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Measures how effective and efficient the agent's actions are\n",
|
"# Measures how effective and efficient the agent's actions are\n",
|
||||||
"tools = load_tools([\"serpapi\", \"llm-math\"], llm=llm)\n",
|
"tools = load_tools([\"serpapi\", \"llm-math\"], llm=llm)\n",
|
||||||
@ -392,13 +390,13 @@
|
|||||||
"llm = ChatOpenAI(model=\"gpt-3.5-turbo-0613\", temperature=0)\n",
|
"llm = ChatOpenAI(model=\"gpt-3.5-turbo-0613\", temperature=0)\n",
|
||||||
"tools = load_tools([\"serpapi\", \"llm-math\"], llm=llm)\n",
|
"tools = load_tools([\"serpapi\", \"llm-math\"], llm=llm)\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"\n",
|
||||||
"# Since chains can be stateful (e.g. they can have memory), we need provide\n",
|
"# Since chains can be stateful (e.g. they can have memory), we need provide\n",
|
||||||
"# a way to initialize a new chain for each row in the dataset. This is done\n",
|
"# a way to initialize a new chain for each row in the dataset. This is done\n",
|
||||||
"# by passing in a factory function that returns a new chain for each row.\n",
|
"# by passing in a factory function that returns a new chain for each row.\n",
|
||||||
"def agent_factory():\n",
|
"def agent_factory():\n",
|
||||||
" return initialize_agent(\n",
|
" return initialize_agent(tools, llm, agent=AgentType.OPENAI_FUNCTIONS, verbose=False)\n",
|
||||||
" tools, llm, agent=AgentType.OPENAI_FUNCTIONS, verbose=False\n",
|
"\n",
|
||||||
")\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"# If your chain is NOT stateful, your factory can return the object directly\n",
|
"# If your chain is NOT stateful, your factory can return the object directly\n",
|
||||||
"# to improve runtime performance. For example:\n",
|
"# to improve runtime performance. For example:\n",
|
||||||
@ -477,7 +475,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"from langchain.client import (\n",
|
"from langchain.client import (\n",
|
||||||
" arun_on_dataset,\n",
|
" arun_on_dataset,\n",
|
||||||
" run_on_dataset, # Available if your chain doesn't support async calls.\n",
|
" run_on_dataset, # Available if your chain doesn't support async calls.\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"?arun_on_dataset"
|
"?arun_on_dataset"
|
||||||
@ -616,9 +614,7 @@
|
|||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"agent = initialize_agent(\n",
|
"agent = initialize_agent(tools, llm, agent=AgentType.OPENAI_FUNCTIONS, verbose=False)"
|
||||||
" tools, llm, agent=AgentType.OPENAI_FUNCTIONS, verbose=False\n",
|
|
||||||
")"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
Loading…
Reference in New Issue
Block a user