docs: update apify integration (#29553)

**Description:** Fixed and updated Apify integration documentation to use the new [langchain-apify](https://github.com/apify/langchain-apify) package. **Twitter handle:** @apify
2025-09-27 22:37:46 +00:00 · 2025-02-13 05:02:55 +01:00
parent 16fb1f5371
commit c8cb7c25bf
7 changed files with 399 additions and 34 deletions
--- a/docs/docs/integrations/document_loaders/apify_dataset.ipynb
+++ b/docs/docs/integrations/document_loaders/apify_dataset.ipynb
@@ -2,7 +2,9 @@
 "cells": [
  {
   "cell_type": "markdown",
-   "metadata": {},
+   "metadata": {
    "id": "xwiDq5fOuoRn"
   },
   "source": [
    "# Apify Dataset\n",
    "\n",
@@ -20,33 +22,63 @@
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "qRW2-mokuoRp",
    "tags": []
   },
   "outputs": [],
   "source": [
-    "%pip install --upgrade --quiet  apify-client"
+    "%pip install --upgrade --quiet langchain langchain-apify langchain-openai"
   ]
  },
  {
   "cell_type": "markdown",
-   "metadata": {},
+   "metadata": {
    "id": "8jRVq16LuoRq"
   },
   "source": [
    "First, import `ApifyDatasetLoader` into your source code:"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 2,
-   "metadata": {},
+   "metadata": {
    "id": "umXQHqIJuoRq"
   },
   "outputs": [],
   "source": [
-    "from langchain_community.document_loaders import ApifyDatasetLoader\n",
+    "from langchain_apify import ApifyDatasetLoader\n",
    "from langchain_core.documents import Document"
   ]
  },
  {
   "cell_type": "markdown",
-   "metadata": {},
+   "metadata": {
    "id": "NjGwKy59vz1X"
   },
   "source": [
    "Find your [Apify API token](https://console.apify.com/account/integrations) and [OpenAI API key](https://platform.openai.com/account/api-keys) and initialize these into environment variable:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "id": "AvzNtyCxwDdr"
   },
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "os.environ[\"APIFY_API_TOKEN\"] = \"your-apify-api-token\"\n",
    "os.environ[\"OPENAI_API_KEY\"] = \"your-openai-api-key\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "d1O-KL48uoRr"
   },
   "source": [
    "Then provide a function that maps Apify dataset record fields to LangChain `Document` format.\n",
    "\n",
@@ -64,8 +96,10 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 8,
-   "metadata": {},
+   "metadata": {
    "id": "m1SpA7XZuoRr"
   },
   "outputs": [],
   "source": [
    "loader = ApifyDatasetLoader(\n",
@@ -78,8 +112,10 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
-   "metadata": {},
+   "metadata": {
    "id": "0hWX7ABsuoRs"
   },
   "outputs": [],
   "source": [
    "data = loader.load()"
@@ -87,7 +123,9 @@
  },
  {
   "cell_type": "markdown",
-   "metadata": {},
+   "metadata": {
    "id": "EJCVFVKNuoRs"
   },
   "source": [
    "## An example with question answering\n",
    "\n",
@@ -96,21 +134,26 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 14,
-   "metadata": {},
+   "metadata": {
    "id": "sNisJKzZuoRt"
   },
   "outputs": [],
   "source": [
    "from langchain.indexes import VectorstoreIndexCreator\n",
-    "from langchain_community.utilities import ApifyWrapper\n",
+    "from langchain_apify import ApifyWrapper\n",
    "from langchain_core.documents import Document\n",
-    "from langchain_openai import OpenAI\n",
+    "from langchain_core.vectorstores import InMemoryVectorStore\n",
    "from langchain_openai import ChatOpenAI\n",
    "from langchain_openai.embeddings import OpenAIEmbeddings"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 15,
-   "metadata": {},
+   "metadata": {
    "id": "qcfmnbdDuoRu"
   },
   "outputs": [],
   "source": [
    "loader = ApifyDatasetLoader(\n",
@@ -123,27 +166,47 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 16,
-   "metadata": {},
+   "metadata": {
    "id": "8b0xzKJxuoRv"
   },
   "outputs": [],
   "source": [
-    "index = VectorstoreIndexCreator(embedding=OpenAIEmbeddings()).from_loaders([loader])"
+    "index = VectorstoreIndexCreator(\n",
    "    vectorstore_cls=InMemoryVectorStore, embedding=OpenAIEmbeddings()\n",
    ").from_loaders([loader])"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 17,
-   "metadata": {},
+   "metadata": {
    "id": "7zPXGsVFwUGA"
   },
   "outputs": [],
   "source": [
    "llm = ChatOpenAI(model=\"gpt-4o-mini\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "id": "ecWrdM4guoRv"
   },
   "outputs": [],
   "source": [
    "query = \"What is Apify?\"\n",
-    "result = index.query_with_sources(query, llm=OpenAI())"
+    "result = index.query_with_sources(query, llm=llm)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
-   "metadata": {},
+   "metadata": {
    "id": "QH8r44e9uoRv",
    "outputId": "361fe050-f75d-4d5a-c327-5e7bd190fba5"
   },
   "outputs": [
    {
     "name": "stdout",
@@ -162,6 +225,9 @@
  }
 ],
 "metadata": {
  "colab": {
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
@@ -181,5 +247,5 @@
  }
 },
 "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 0
-}
+}
--- a/docs/docs/integrations/providers/apify.mdx
+++ b/docs/docs/integrations/providers/apify.mdx
@@ -14,20 +14,34 @@ blogs, or knowledge bases.
 ## Installation and Setup
- Install the Apify API client for Python with `pip install apify-client`
+- Install the LangChain Apify package for Python with:
 ```bash
 pip install langchain-apify
 ```
 - Get your [Apify API token](https://console.apify.com/account/integrations) and either set it as
-  an environment variable (`APIFY_API_TOKEN`) or pass it to the `ApifyWrapper` as `apify_api_token` in the constructor.
+  an environment variable (`APIFY_API_TOKEN`) or pass it as `apify_api_token` in the constructor.
 ## Tool
-## Utility
+You can use the `ApifyActorsTool` to use Apify Actors with agents.
 ```python
 from langchain_apify import ApifyActorsTool
 ```
 See [this notebook](/docs/integrations/tools/apify_actors) for example usage.
 For more information on how to use this tool, visit [the Apify integration documentation](https://docs.apify.com/platform/integrations/langgraph).
 ## Wrapper
 You can use the `ApifyWrapper` to run Actors on the Apify platform.
 ```python
-from langchain_community.utilities import ApifyWrapper
+from langchain_apify import ApifyWrapper
 ```
-For more information on this wrapper, see [the API reference](https://python.langchain.com/api_reference/community/utilities/langchain_community.utilities.apify.ApifyWrapper.html).
+For more information on how to use this wrapper, see [the Apify integration documentation](https://docs.apify.com/platform/integrations/langchain).
 ## Document loader
@@ -35,7 +49,10 @@ For more information on this wrapper, see [the API reference](https://python.lan
 You can also use our `ApifyDatasetLoader` to get data from Apify dataset.
 ```python
-from langchain_community.document_loaders import ApifyDatasetLoader
+from langchain_apify import ApifyDatasetLoader
 ```
 For a more detailed walkthrough of this loader, see [this notebook](/docs/integrations/document_loaders/apify_dataset).
 Source code for this integration can be found in the [LangChain Apify repository](https://github.com/apify/langchain-apify).
--- a/docs/docs/integrations/tools/apify_actors.ipynb
+++ b/docs/docs/integrations/tools/apify_actors.ipynb
@@ -0,0 +1,256 @@
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "_9MNj58sIkGN"
   },
   "source": [
    "# Apify Actor\n",
    "\n",
    "## Overview\n",
    "\n",
    ">[Apify Actors](https://docs.apify.com/platform/actors) are cloud programs designed for a wide range of web scraping, crawling, and data extraction tasks. These actors facilitate automated data gathering from the web, enabling users to extract, process, and store information efficiently. Actors can be used to perform tasks like scraping e-commerce sites for product details, monitoring price changes, or gathering search engine results. They integrate seamlessly with [Apify Datasets](https://docs.apify.com/platform/storage/dataset), allowing the structured data collected by actors to be stored, managed, and exported in formats like JSON, CSV, or Excel for further analysis or use.\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "OHLF9t9v9HCb"
   },
   "source": [
    "## Setup\n",
    "\n",
    "This integration lives in the [langchain-apify](https://pypi.org/project/langchain-apify/) package. The package can be installed using pip.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "4DdGmBn5IbXz"
   },
   "outputs": [],
   "source": [
    "%pip install langchain-apify"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "rEAwonXqwggR"
   },
   "source": [
    "### Prerequisites\n",
    "\n",
    "- **Apify account**: Register your free Apify account [here](https://console.apify.com/sign-up).\n",
    "- **Apify API token**: Learn how to get your API token in the [Apify documentation](https://docs.apify.com/platform/integrations/api)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "9nJOl4MBMkcR"
   },
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "os.environ[\"APIFY_API_TOKEN\"] = \"your-apify-api-token\"\n",
    "os.environ[\"OPENAI_API_KEY\"] = \"your-openai-api-key\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "UfoQxAlCxR9q"
   },
   "source": [
    "## Instantiation"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "qG9KtXtLM8i7"
   },
   "source": [
    "Here we instantiate the `ApifyActorsTool` to be able to call [RAG Web Browser](https://apify.com/apify/rag-web-browser) Apify Actor. This Actor provides web browsing functionality for AI and LLM applications, similar to the web browsing feature in ChatGPT. Any Actor from the [Apify Store](https://apify.com/store) can be used in this way."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {
    "id": "cyxeTlPnM4Ya"
   },
   "outputs": [],
   "source": [
    "from langchain_apify import ApifyActorsTool\n",
    "\n",
    "tool = ApifyActorsTool(\"apify/rag-web-browser\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "fGDLvDCqyKWO"
   },
   "source": [
    "## Invocation\n",
    "\n",
    "The `ApifyActorsTool` takes a single argument, which is `run_input` - a dictionary that is passed as a run input to the Actor. Run input schema documentation can be found in the input section of the Actor details page. See [RAG Web Browser input schema](https://apify.com/apify/rag-web-browser/input-schema).\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "nTWy6Hx1yk04"
   },
   "outputs": [],
   "source": [
    "tool.invoke({\"run_input\": {\"query\": \"what is apify?\", \"maxResults\": 2}})"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "kQsa27hoO58S"
   },
   "source": [
    "## Chaining\n",
    "\n",
    "We can provide the created tool to an [agent](https://python.langchain.com/docs/tutorials/agents/). When asked to search for information, the agent will call the Apify Actor, which will search the web, and then retrieve the search results.\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "YySvLskW72Y8"
   },
   "outputs": [],
   "source": [
    "%pip install langgraph langchain-openai"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {
    "id": "QEDz07btO5Gi"
   },
   "outputs": [],
   "source": [
    "from langchain_core.messages import ToolMessage\n",
    "from langchain_openai import ChatOpenAI\n",
    "from langgraph.prebuilt import create_react_agent\n",
    "\n",
    "model = ChatOpenAI(model=\"gpt-4o\")\n",
    "tools = [tool]\n",
    "graph = create_react_agent(model, tools=tools)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "XS1GEyNkQxGu",
    "outputId": "195273d7-034c-425b-f3f9-95c0a9fb0c9e"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "================================\u001b[1m Human Message \u001b[0m=================================\n",
      "\n",
      "search for what is Apify\n",
      "==================================\u001b[1m Ai Message \u001b[0m==================================\n",
      "Tool Calls:\n",
      "  apify_actor_apify_rag-web-browser (call_27mjHLzDzwa5ZaHWCMH510lm)\n",
      " Call ID: call_27mjHLzDzwa5ZaHWCMH510lm\n",
      "  Args:\n",
      "    run_input: {\"run_input\":{\"query\":\"Apify\",\"maxResults\":3,\"outputFormats\":[\"markdown\"]}}\n",
      "==================================\u001b[1m Ai Message \u001b[0m==================================\n",
      "\n",
      "Apify is a comprehensive platform for web scraping, browser automation, and data extraction. It offers a wide array of tools and services that cater to developers and businesses looking to extract data from websites efficiently and effectively. Here's an overview of Apify:\n",
      "\n",
      "1. **Ecosystem and Tools**:\n",
      "   - Apify provides an ecosystem where developers can build, deploy, and publish data extraction and web automation tools called Actors.\n",
      "   - The platform supports various use cases such as extracting data from social media platforms, conducting automated browser-based tasks, and more.\n",
      "\n",
      "2. **Offerings**:\n",
      "   - Apify offers over 3,000 ready-made scraping tools and code templates.\n",
      "   - Users can also build custom solutions or hire Apify's professional services for more tailored data extraction needs.\n",
      "\n",
      "3. **Technology and Integration**:\n",
      "   - The platform supports integration with popular tools and services like Zapier, GitHub, Google Sheets, Pinecone, and more.\n",
      "   - Apify supports open-source tools and technologies such as JavaScript, Python, Puppeteer, Playwright, Selenium, and its own Crawlee library for web crawling and browser automation.\n",
      "\n",
      "4. **Community and Learning**:\n",
      "   - Apify hosts a community on Discord where developers can get help and share expertise.\n",
      "   - It offers educational resources through the Web Scraping Academy to help users become proficient in data scraping and automation.\n",
      "\n",
      "5. **Enterprise Solutions**:\n",
      "   - Apify provides enterprise-grade web data extraction solutions with high reliability, 99.95% uptime, and compliance with SOC2, GDPR, and CCPA standards.\n",
      "\n",
      "For more information, you can visit [Apify's official website](https://apify.com/) or their [GitHub page](https://github.com/apify) which contains their code repositories and further details about their projects.\n"
     ]
    }
   ],
   "source": [
    "inputs = {\"messages\": [(\"user\", \"search for what is Apify\")]}\n",
    "for s in graph.stream(inputs, stream_mode=\"values\"):\n",
    "    message = s[\"messages\"][-1]\n",
    "    # skip tool messages\n",
    "    if isinstance(message, ToolMessage):\n",
    "        continue\n",
    "    message.pretty_print()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "WYXuQIQx8AvG"
   },
   "source": [
    "## API reference\n",
    "\n",
    "For more information on how to use this integration, see the [git repository](https://github.com/apify/langchain-apify) or the [Apify integration documentation](https://docs.apify.com/platform/integrations/langgraph)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "f1NnMik78oib"
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "colab": {
   "provenance": [],
   "toc_visible": true
  },
  "kernelspec": {
   "display_name": "Python 3",
   "name": "python3"
  },
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
 }
--- a/docs/static/img/ApifyActors.png
+++ b/docs/static/img/ApifyActors.png
--- a/libs/community/langchain_community/document_loaders/apify_dataset.py
+++ b/libs/community/langchain_community/document_loaders/apify_dataset.py
@@ -1,11 +1,22 @@
 from typing import Any, Callable, Dict, List
 from langchain_core._api import deprecated
 from langchain_core.documents import Document
 from pydantic import BaseModel, model_validator
 from langchain_community.document_loaders.base import BaseLoader
@deprecated(
    since="0.3.18",
    message=(
        "This class is deprecated and will be removed in a future version. "
        "You can swap to using the `ApifyDatasetLoader`"
        " implementation in `langchain_apify` package. "
        "See <https://github.com/apify/langchain-apify>"
    ),
    alternative_import="langchain_apify.ApifyDatasetLoader",
 )
 class ApifyDatasetLoader(BaseLoader, BaseModel):
    """Load datasets from `Apify` web scraping, crawling, and data extraction platform.
--- a/libs/community/langchain_community/utilities/apify.py
+++ b/libs/community/langchain_community/utilities/apify.py
@@ -1,5 +1,6 @@
 from typing import TYPE_CHECKING, Any, Callable, Dict, Optional
 from langchain_core._api import deprecated
 from langchain_core.documents import Document
 from langchain_core.utils import get_from_dict_or_env
 from pydantic import BaseModel, model_validator
@@ -8,6 +9,16 @@ if TYPE_CHECKING:
    from langchain_community.document_loaders import ApifyDatasetLoader
@deprecated(
    since="0.3.18",
    message=(
        "This class is deprecated and will be removed in a future version. "
        "You can swap to using the `ApifyWrapper`"
        " implementation in `langchain_apify` package. "
        "See <https://github.com/apify/langchain-apify>"
    ),
    alternative_import="langchain_apify.ApifyWrapper",
 )
 class ApifyWrapper(BaseModel):
    """Wrapper around Apify.
    To use, you should have the ``apify-client`` python package installed,
--- a/libs/packages.yml
+++ b/libs/packages.yml
@@ -386,6 +386,10 @@ packages:
  repo: Nimbleway/langchain-nimble
  path: .
  downloads: 0
 - name: langchain-apify
  path: .
  repo: apify/langchain-apify
  downloads: 204
 - name: langfair
  repo: cvs-health/langfair
  path: .