From 7c24a6b9d1416345d0430773c2a7c48cc9ffe01e Mon Sep 17 00:00:00 2001 From: Bagatur <22008038+baskaryan@users.noreply.github.com> Date: Thu, 20 Jul 2023 08:36:01 -0700 Subject: [PATCH] Bagatur/apify (#8008) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --------- Co-authored-by: Jiří Moravčík Co-authored-by: Jan Čurn --- langchain/document_loaders/apify_dataset.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/langchain/document_loaders/apify_dataset.py b/langchain/document_loaders/apify_dataset.py index 7c0268fa4a8..ca3ae6f995c 100644 --- a/langchain/document_loaders/apify_dataset.py +++ b/langchain/document_loaders/apify_dataset.py @@ -7,7 +7,23 @@ from langchain.document_loaders.base import BaseLoader class ApifyDatasetLoader(BaseLoader, BaseModel): - """Loading Documents from Apify datasets.""" + """Loads datasets from Apify-a web scraping, crawling, and data extraction platform. + For details, see https://docs.apify.com/platform/integrations/langchain + + Example: + .. code-block:: python + + from langchain.document_loaders import ApifyDatasetLoader + from langchain.schema import Document + + loader = ApifyDatasetLoader( + dataset_id="YOUR-DATASET-ID", + dataset_mapping_function=lambda dataset_item: Document( + page_content=dataset_item["text"], metadata={"source": dataset_item["url"]} + ), + ) + documents = loader.load() + """ # noqa: E501 apify_client: Any """An instance of the ApifyClient class from the apify-client Python package."""