diff --git a/docs/docs/integrations/document_loaders/oxylabs.ipynb b/docs/docs/integrations/document_loaders/oxylabs.ipynb new file mode 100644 index 00000000000..b23102dd43b --- /dev/null +++ b/docs/docs/integrations/document_loaders/oxylabs.ipynb @@ -0,0 +1,334 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Oxylabs" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[Oxylabs](https://oxylabs.io/) is a web intelligence collection platform that enables companies worldwide to unlock data-driven insights.\n", + "\n", + "## Overview\n", + "\n", + "Oxylabs document loader allows to load data from search engines, e-commerce sites, travel platforms, and any other website. It supports geolocation, browser rendering, data parsing, multiple user agents and many more parameters. Check out [Oxylabs documentation](https://developers.oxylabs.io/scraping-solutions/web-scraper-api) for more information.\n", + "\n", + "\n", + "### Integration details\n", + "\n", + "| Class | Package | Local | Serializable | Pricing |\n", + "|:--------------|:------------------------------------------------------------------|:-----:|:------------:|:-----------------------------:|\n", + "| OxylabsLoader | [langchain-oxylabs](https://github.com/oxylabs/langchain-oxylabs) | ✅ | ❌ | Free 5,000 results for 1 week |\n", + "\n", + "### Loader features\n", + "| Document Lazy Loading |\n", + "|:---------------------:|\n", + "| ✅ |\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Install the required dependencies.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "%pip install -U langchain-oxylabs" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Credentials\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Set up the proper API keys and environment variables.\n", + "Create your API user credentials: Sign up for a free trial or purchase the product\n", + "in the [Oxylabs dashboard](https://dashboard.oxylabs.io/en/registration)\n", + "to create your API user credentials (OXYLABS_USERNAME and OXYLABS_PASSWORD)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import getpass\n", + "import os\n", + "\n", + "os.environ[\"OXYLABS_USERNAME\"] = getpass.getpass(\"Enter your Oxylabs username: \")\n", + "os.environ[\"OXYLABS_PASSWORD\"] = getpass.getpass(\"Enter your Oxylabs password: \")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initialization" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "ExecuteTime": { + "end_time": "2025-08-06T10:57:51.630011Z", + "start_time": "2025-08-06T10:57:51.623814Z" + } + }, + "outputs": [], + "source": [ + "from langchain_oxylabs import OxylabsLoader" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "ExecuteTime": { + "end_time": "2025-08-06T10:57:53.685413Z", + "start_time": "2025-08-06T10:57:53.628859Z" + } + }, + "outputs": [], + "source": [ + "loader = OxylabsLoader(\n", + " urls=[\n", + " \"https://sandbox.oxylabs.io/products/1\",\n", + " \"https://sandbox.oxylabs.io/products/2\",\n", + " ],\n", + " params={\"markdown\": True},\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": "## Load" + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "ExecuteTime": { + "end_time": "2025-08-06T10:59:51.487327Z", + "start_time": "2025-08-06T10:59:48.592743Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2751\n", + "[![](data:image/svg+xml...)![logo](data:image/gif;base64...)![logo](/_next/image?url=%2F_next%2Fstatic%2Fmedia%2FnavLogo.a8764883.png&w=750&q=75)](/)\n", + "\n", + "Game platforms:\n", + "\n", + "* **All**\n", + "\n", + "* [Nintendo platform](/products/category/nintendo)\n", + "\n", + "+ wii\n", + "+ wii-u\n", + "+ nintendo-64\n", + "+ switch\n", + "+ gamecube\n", + "+ game-boy-advance\n", + "+ 3ds\n", + "+ ds\n", + "\n", + "* [Xbox platform](/products/category/xbox-platform)\n", + "\n", + "* **Dreamcast**\n", + "\n", + "* [Playstation platform](/products/category/playstation-platform)\n", + "\n", + "* **Pc**\n", + "\n", + "* **Stadia**\n", + "\n", + "Go Back\n", + "\n", + "Note!This is a sandbox website used for web scraping. Information listed in this website does not have any real meaning and should not be associated with the actual products.\n", + "\n", + "![The Legend of Zelda: Ocarina of Time](data:image/gif;base64...)![The Legend of Zelda: Ocarina of Time](/assets/action-adventure.svg)\n", + "\n", + "## The Legend of Zelda: Ocarina of Time\n", + "\n", + "**Developer:** Nintendo**Platform:****Type:** singleplayer\n", + "\n", + "As a young boy, Link is tricked by Ganondorf, the King of the Gerudo Thieves. The evil human uses Link to g\n", + "5542\n", + "[![](data:image/svg+xml...)![logo](data:image/gif;base64...)![logo](/_next/image?url=%2F_next%2Fstatic%2Fmedia%2FnavLogo.a8764883.png&w=750&q=75)](/)\n", + "\n", + "Game platforms:\n", + "\n", + "* **All**\n", + "\n", + "* [Nintendo platform](/products/category/nintendo)\n", + "\n", + "+ wii\n", + "+ wii-u\n", + "+ nintendo-64\n", + "+ switch\n", + "+ gamecube\n", + "+ game-boy-advance\n", + "+ 3ds\n", + "+ ds\n", + "\n", + "* [Xbox platform](/products/category/xbox-platform)\n", + "\n", + "* **Dreamcast**\n", + "\n", + "* [Playstation platform](/products/category/playstation-platform)\n", + "\n", + "* **Pc**\n", + "\n", + "* **Stadia**\n", + "\n", + "Go Back\n", + "\n", + "Note!This is a sandbox website used for web scraping. Information listed in this website does not have any real meaning and should not be associated with the actual products.\n", + "\n", + "![Super Mario Galaxy](data:image/gif;base64...)![Super Mario Galaxy](/assets/action.svg)\n", + "\n", + "## Super Mario Galaxy\n", + "\n", + "**Developer:** Nintendo**Platform:****Type:** singleplayer\n", + "\n", + "[Metacritic's 2007 Wii Game of the Year] The ultimate Nintendo hero is taking the ultimate step ... out into space. Join Mario as he ushers in a new era of video games, de\n" + ] + } + ], + "source": [ + "for document in loader.load():\n", + " print(document.page_content[:1000])" + ] + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "## Lazy Load" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "for document in loader.lazy_load():\n", + " print(document.page_content[:1000])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Advanced examples\n", + "\n", + "The following examples show the usage of `OxylabsLoader` with geolocation, currency, pagination and user agent parameters for Amazon Search and Google Search sources." + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "ExecuteTime": { + "end_time": "2025-08-06T11:04:19.901122Z", + "start_time": "2025-08-06T11:04:19.838933Z" + } + }, + "outputs": [], + "source": [ + "loader = OxylabsLoader(\n", + " queries=[\"gaming headset\", \"gaming chair\", \"computer mouse\"],\n", + " params={\n", + " \"source\": \"amazon_search\",\n", + " \"parse\": True,\n", + " \"geo_location\": \"DE\",\n", + " \"currency\": \"EUR\",\n", + " \"pages\": 3,\n", + " },\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "ExecuteTime": { + "end_time": "2025-08-06T11:07:17.648142Z", + "start_time": "2025-08-06T11:07:17.595629Z" + } + }, + "outputs": [], + "source": [ + "loader = OxylabsLoader(\n", + " queries=[\"europe gdp per capita\", \"us gdp per capita\"],\n", + " params={\n", + " \"source\": \"google_search\",\n", + " \"parse\": True,\n", + " \"geo_location\": \"Paris, France\",\n", + " \"user_agent_type\": \"mobile\",\n", + " },\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "[More information about this package.](https://github.com/oxylabs/langchain-oxylabs)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/src/theme/FeatureTables.js b/docs/src/theme/FeatureTables.js index a8e579f56d3..fd2ea36638a 100644 --- a/docs/src/theme/FeatureTables.js +++ b/docs/src/theme/FeatureTables.js @@ -856,6 +856,13 @@ const FEATURE_TABLES = { source: "Web interaction and structured data extraction from any web page using an AgentQL query or a Natural Language prompt", api: "API", apiLink: "https://python.langchain.com/docs/integrations/document_loaders/agentql/" + }, + { + name: "Oxylabs", + link: "oxylabs", + source: "Web intelligence platform enabling the access to various data sources.", + api: "API", + apiLink: "https://github.com/oxylabs/langchain-oxylabs" } ] },