From a32798abd7ba5cee02956d4bfc2bf19f8047887a Mon Sep 17 00:00:00 2001 From: Scott Nath Date: Thu, 8 Feb 2024 16:47:50 -0500 Subject: [PATCH] community: Add you.com utility, update you retriever integration docs (#17014) - **Description: changes to you.com files** - general cleanup - adds community/utilities/you.py, moving bulk of code from retriever -> utility - removes `snippet` as endpoint - adds `news` as endpoint - adds more tests **Description: update community MAKE file** - adds `integration_tests` - adds `coverage` - **Issue:** the issue # it fixes if applicable, - [For New Contributors: Update Integration Documentation](https://github.com/langchain-ai/langchain/issues/15664#issuecomment-1920099868) - **Dependencies:** n/a - **Twitter handle:** @scottnath - **Mastodon handle:** scottnath@mastodon.social --------- Co-authored-by: Bagatur --- .../retrievers/you-retriever.ipynb | 385 +++++++++++++++++- .../retrievers/__init__.py | 4 +- .../langchain_community/retrievers/you.py | 65 +-- .../langchain_community/utilities/__init__.py | 9 + .../langchain_community/utilities/you.py | 230 +++++++++++ .../unit_tests/retrievers/test_imports.py | 1 + .../tests/unit_tests/retrievers/test_you.py | 82 +++- .../unit_tests/utilities/test_imports.py | 1 + .../tests/unit_tests/utilities/test_you.py | 190 +++++++++ 9 files changed, 881 insertions(+), 86 deletions(-) create mode 100644 libs/community/langchain_community/utilities/you.py create mode 100644 libs/community/tests/unit_tests/utilities/test_you.py diff --git a/docs/docs/integrations/retrievers/you-retriever.ipynb b/docs/docs/integrations/retrievers/you-retriever.ipynb index 447111de5a5..950c592cda7 100644 --- a/docs/docs/integrations/retrievers/you-retriever.ipynb +++ b/docs/docs/integrations/retrievers/you-retriever.ipynb @@ -2,39 +2,396 @@ "cells": [ { "cell_type": "markdown", - "id": "47828a7a", + "id": "818fc023", "metadata": {}, "source": [ - "## Using the You.com Retriever\n", - "The retriever from You.com is good for retrieving lots of text. We return multiple of the best text snippets per URL we find to be relevant.\n", + "# You.com Retriever\n", "\n", - "First you just need to initialize the retriever" + "The [you.com API](https://api.you.com) is a suite of tools designed to help developers ground the output of LLMs in the most recent, most accurate, most relevant information that may not have been included in their training dataset." + ] + }, + { + "cell_type": "markdown", + "id": "02335552", + "metadata": {}, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "markdown", + "id": "c5c53f28", + "metadata": {}, + "source": [ + "The retriever lives in the `langchain-community` package.\n", + "\n", + "You also need to set your you.com API key." ] }, { "cell_type": "code", "execution_count": null, + "id": "6d091ccb", + "metadata": {}, + "outputs": [], + "source": [ + "%pip install --upgrade --quiet langchain-community" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "f43dd867", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "os.environ[\"YDC_API_KEY\"] = \"\"\n", + "\n", + "# For use in Chaining section\n", + "os.environ[\"OPENAI_API_KEY\"] = \"\"\n", + "\n", + "## ALTERNATIVE: load YDC_API_KEY from a .env file\n", + "\n", + "# !pip install --quiet -U python-dotenv\n", + "# import dotenv\n", + "# dotenv.load_dotenv()" + ] + }, + { + "cell_type": "markdown", + "id": "17e70216", + "metadata": {}, + "source": [ + "It's also helpful (but not needed) to set up [LangSmith](https://smith.langchain.com/) for best-in-class observability" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "ce49fa4c", + "metadata": {}, + "outputs": [], + "source": [ + "# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n", + "# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()\n", + "# os.environ[\"LANGCHAIN_PROJECT\"] = 'Experimentz'" + ] + }, + { + "cell_type": "markdown", + "id": "2278de15", + "metadata": {}, + "source": [ + "## Utility Usage" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ab11b2e0", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.utilities.you import YouSearchAPIWrapper\n", + "\n", + "utility = YouSearchAPIWrapper(num_web_results=1)\n", + "\n", + "utility" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "id": "2656a45d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\n", + "[\n", + " {\n", + " \"description\": \"Be prepared with the most accurate 10-day forecast for Manhattan, NY with highs, lows, chance of precipitation from The Weather Channel and Weather.com\",\n", + " \"snippets\": [\n", + " \"10 Day Weather-Manhattan, NY\\nToday43\\u00b0/39\\u00b01%\\nToday\\nSun 31 | Day\\nGenerally cloudy. High 43F. Winds W at 10 to 15 mph.\\n- Humidity54%\\n- UV Index0 of 11\\n- Sunrise7:19 am\\n- Sunset4:38 pm\\nSun 31 | Night\\nCloudy. Low 39F. Winds light and variable.\\n- Humidity70%\\n- UV Index0 of 11\\n- Moonrise9:13 pmWaning Gibbous\\n- Moonset10:28 am\\nMon 0145\\u00b0/33\\u00b07%\\nMon 01\\nMon 01 | Day\\nConsiderable cloudiness. High around 45F. Winds light and variable.\\n- Humidity71%\\n- UV Index1 of 11\\n- Sunrise7:19 am\\n- Sunset4:39 pm\\nMon 01 | Night\\nA few clouds. Low 33F. Winds NNW at 5 to 10 mph.\\n- Humidity64%\\n- UV Index0 of 11\\n- Moonrise10:14 pmWaning Gibbous\\n- Moonset10:49 am\\nTue 0246\\u00b0/35\\u00b04%\\nTue 02\\nTue 02 | Day\\nMainly sunny. High 46F. Winds NW at 5 to 10 mph.\\n- Humidity52%\\n- UV Index2 of 11\\n- Sunrise7:19 am\\n- Sunset4:40 pm\\nTue 02 | Night\\nA few clouds overnight. Low around 35F. Winds W at 5 to 10 mph.\\n- Humidity64%\\n- UV Index0 of 11\\n- Moonrise11:13 pmWaning Gibbous\\n- Moonset11:08 am\\nWed 0346\\u00b0/38\\u00b04%\\nWed 03\\nWed 03 | Day\",\n", + " \"Radar\\nLatest News\\nOur Changing World\\nYour Privacy\\nTo personalize your product experience, we collect data from your device. We also may use or disclose to specific data vendors your precise geolocation data to provide the Services. To learn more please refer to our Privacy Policy.\\nChoose how my information is shared\",\n", + " \"- Humidity82%\\n- UV Index1 of 11\\n- Sunrise7:18 am\\n- Sunset4:34 pm\\nTue 26 | Night\\nCloudy with light rain developing after midnight. Low 47F. Winds light and variable. Chance of rain 80%.\\n- Humidity90%\\n- UV Index0 of 11\\n- Moonrise4:00 pmFull Moon\\n- Moonset7:17 am\\nWed 2754\\u00b0/49\\u00b093%\\nWed 27\\nWed 27 | Day\\nRain. High 54F. Winds E at 5 to 10 mph. Chance of rain 90%. Rainfall near a half an inch.\\n- Humidity93%\\n- UV Index1 of 11\\n- Sunrise7:18 am\\n- Sunset4:35 pm\\nWed 27 | Night\\nSteady light rain in the evening. Showers continuing late. Low 49F. Winds light and variable. Chance of rain 70%.\\n- Humidity91%\\n- UV Index0 of 11\\n- Moonrise4:59 pmFull Moon\\n- Moonset8:12 am\\nThu 2853\\u00b0/42\\u00b019%\\nThu 28\\nThu 28 | Day\\nCloudy skies early will become partly cloudy later in the day. Slight chance of a rain shower. High 53F. Winds WSW at 5 to 10 mph.\\n- Humidity77%\\n- UV Index1 of 11\\n- Sunrise7:18 am\\n- Sunset4:36 pm\\nThu 28 | Night\\nPartly cloudy skies. Low 42F. Winds W at 5 to 10 mph.\\n- Humidity71%\\n- UV Index0 of 11\",\n", + " \"- Moonrise2:20 amWaning Crescent\\n- Moonset12:33 pm\\nSun 0740\\u00b0/29\\u00b019%\\nSun 07\\nSun 07 | Day\\nIntervals of clouds and sunshine. High around 40F. Winds NW at 5 to 10 mph.\\n- Humidity57%\\n- UV Index2 of 11\\n- Sunrise7:19 am\\n- Sunset4:44 pm\\nSun 07 | Night\\nA few clouds from time to time. Low 29F. Winds NNW at 5 to 10 mph.\\n- Humidity60%\\n- UV Index0 of 11\\n- Moonrise3:28 amWaning Crescent\\n- Moonset1:04 pm\\nMon 0840\\u00b0/32\\u00b035%\\nMon 08\\nMon 08 | Day\\nPartly cloudy early followed mostly cloudy skies and a few snow showers later in the day. High near 40F. Winds N at 5 to 10 mph. Chance of snow 40%.\\n- UV Index1 of 11\\n- Sunrise7:19 am\\n- Sunset4:45 pm\\nMon 08 | Night\\nVariable clouds with snow showers or flurries. Low 32F. Winds NNE at 5 to 10 mph. Chance of snow 60%. Snow accumulations less than one inch.\\n- UV Index0 of 11\\n- Moonrise4:40 amWaning Crescent\\n- Moonset1:43 pm\\nLatest News\\nOur Changing World\\nYour Privacy\",\n", + " \"- Humidity91%\\n- UV Index0 of 11\\n- Moonrise5:50 amWaning Crescent\\n- Moonset2:35 pm\\nWed 1056\\u00b0/39\\u00b034%\\nWed 10\\nWed 10 | Day\\nA shower or two possible early with partly cloudy skies in the afternoon. Morning high of 56F with temps falling to near 45. Winds SW at 15 to 25 mph. Chance of rain 30%.\\n- Humidity66%\\n- UV Index1 of 11\\n- Sunrise7:19 am\\n- Sunset4:47 pm\\nWed 10 | Night\\nA few clouds from time to time. Low 39F. Winds WSW at 10 to 20 mph.\\n- Humidity64%\\n- UV Index0 of 11\\n- Moonrise6:56 amWaning Crescent\\n- Moonset3:38 pm\\nThu 1147\\u00b0/38\\u00b05%\\nThu 11\\nThu 11 | Day\\nPartly cloudy. High 47F. Winds WSW at 5 to 10 mph.\\n- Humidity62%\\n- UV Index2 of 11\\n- Sunrise7:19 am\\n- Sunset4:48 pm\\nThu 11 | Night\\nMostly clear skies. Low 38F. Winds W at 5 to 10 mph.\\n- Humidity66%\\n- UV Index0 of 11\\n- Moonrise7:52 amNew Moon\\n- Moonset4:53 pm\\nFri 1248\\u00b0/42\\u00b019%\\nFri 12\\nFri 12 | Day\\nIntervals of clouds and sunshine. High 48F. Winds WSW at 5 to 10 mph.\\n- Humidity62%\\n- UV Index2 of 11\\n- Sunrise7:18 am\\n- Sunset4:49 pm\",\n", + " \"Sat 1346\\u00b0/36\\u00b053%\\nSat 13\\nSat 13 | Day\\nCloudy with showers. High 46F. Winds WSW at 10 to 15 mph. Chance of rain 50%.\\n- Humidity73%\\n- UV Index1 of 11\\n- Sunrise7:18 am\\n- Sunset4:50 pm\\nSat 13 | Night\\nRain showers early transitioning to snow showers late. Low 36F. Winds W at 10 to 15 mph. Chance of precip 50%.\\n- Humidity70%\\n- UV Index0 of 11\\n- Moonrise9:14 amWaxing Crescent\\n- Moonset7:33 pm\\nSun 1442\\u00b0/34\\u00b037%\\nSun 14\\nSun 14 | Day\\nSnow showers early will transition to a few showers later. High 42F. Winds WSW at 10 to 15 mph. Chance of rain 40%.\\n- Humidity63%\\n- UV Index1 of 11\\n- Sunrise7:18 am\\n- Sunset4:51 pm\\nSun 14 | Night\\nVariable clouds with snow showers. Low 34F. Winds W at 10 to 15 mph. Chance of snow 60%. Snow accumulations less than one inch.\\n- UV Index0 of 11\\n- Moonrise9:44 amWaxing Crescent\\n- Moonset8:52 pm\\nMon 1540\\u00b0/31\\u00b051%\\nMon 15\\nMon 15 | Day\",\n", + " \"- Humidity70%\\n- UV Index1 of 11\\n- Sunrise7:18 am\\n- Sunset4:34 pm\\nMon 25 | Night\\nOvercast with showers at times. Low 43F. Winds light and variable. Chance of rain 40%.\\n- Humidity80%\\n- UV Index0 of 11\\n- Moonrise3:08 pmWaxing Gibbous\\n- Moonset6:14 am\\nTue 2653\\u00b0/45\\u00b058%\\nTue 26\\nTue 26 | Day\\nOvercast with rain showers at times. High 53F. Winds E at 5 to 10 mph. Chance of rain 60%.\\n- Humidity79%\\n- UV Index1 of 11\\n- Sunrise7:18 am\\n- Sunset4:34 pm\\nTue 26 | Night\\nShowers early then scattered thunderstorms developing late. Low near 45F. Winds ESE at 5 to 10 mph. Chance of rain 60%.\\n- Humidity93%\\n- UV Index0 of 11\\n- Moonrise4:00 pmFull Moon\\n- Moonset7:17 am\\nWed 2751\\u00b0/41\\u00b058%\\nWed 27\\nWed 27 | Day\\nCloudy with showers. High 51F. Winds WSW at 5 to 10 mph. Chance of rain 60%.\\n- Humidity79%\\n- UV Index1 of 11\\n- Sunrise7:18 am\\n- Sunset4:35 pm\\nWed 27 | Night\\nCloudy with showers. Low 41F. Winds NW at 5 to 10 mph. Chance of rain 60%.\\n- Humidity72%\\n- UV Index0 of 11\\n- Moonrise4:59 pmFull Moon\\n- Moonset8:13 am\"\n", + " ],\n", + " \"thumbnail_url\": \"https://imgs.search.brave.com/9xHc5-Bh2lvLyRJwQqeegm3gzoF6hawlpF8LZEjFLo8/rs:fit:200:200:1/g:ce/aHR0cHM6Ly9zLnct/eC5jby8yNDB4MTgw/X3R3Y19kZWZhdWx0/LnBuZw\",\n", + " \"title\": \"10-Day Weather Forecast for Manhattan, NY - The Weather Channel ...\",\n", + " \"url\": \"https://weather.com/weather/tenday/l/New+York+NY+USNY0996:1:US\"\n", + " }\n", + "]\n" + ] + } + ], + "source": [ + "import json\n", + "\n", + "# .raw_results returns the unaltered response from the API\n", + "response = utility.raw_results(query=\"What is the weather in NY\")\n", + "# API returns an object with a `hits` key containing a list of hits\n", + "hits = response[\"hits\"]\n", + "\n", + "# with `num_web_results=1`, `hits` should be len of 1\n", + "print(len(hits))\n", + "\n", + "print(json.dumps(hits, indent=2))" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "id": "c8f5689f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "7\n", + "[Document(page_content='10 Day Weather-Manhattan, NY\\nToday43°/39°1%\\nToday\\nSun 31 | Day\\nGenerally cloudy. High 43F. Winds W at 10 to 15 mph.\\n- Humidity54%\\n- UV Index0 of 11\\n- Sunrise7:19 am\\n- Sunset4:38 pm\\nSun 31 | Night\\nCloudy. Low 39F. Winds light and variable.\\n- Humidity70%\\n- UV Index0 of 11\\n- Moonrise9:13 pmWaning Gibbous\\n- Moonset10:28 am\\nMon 0145°/33°7%\\nMon 01\\nMon 01 | Day\\nConsiderable cloudiness. High around 45F. Winds light and variable.\\n- Humidity71%\\n- UV Index1 of 11\\n- Sunrise7:19 am\\n- Sunset4:39 pm\\nMon 01 | Night\\nA few clouds. Low 33F. Winds NNW at 5 to 10 mph.\\n- Humidity64%\\n- UV Index0 of 11\\n- Moonrise10:14 pmWaning Gibbous\\n- Moonset10:49 am\\nTue 0246°/35°4%\\nTue 02\\nTue 02 | Day\\nMainly sunny. High 46F. Winds NW at 5 to 10 mph.\\n- Humidity52%\\n- UV Index2 of 11\\n- Sunrise7:19 am\\n- Sunset4:40 pm\\nTue 02 | Night\\nA few clouds overnight. Low around 35F. Winds W at 5 to 10 mph.\\n- Humidity64%\\n- UV Index0 of 11\\n- Moonrise11:13 pmWaning Gibbous\\n- Moonset11:08 am\\nWed 0346°/38°4%\\nWed 03\\nWed 03 | Day', metadata={'url': 'https://weather.com/weather/tenday/l/New+York+NY+USNY0996:1:US', 'thumbnail_url': 'https://imgs.search.brave.com/9xHc5-Bh2lvLyRJwQqeegm3gzoF6hawlpF8LZEjFLo8/rs:fit:200:200:1/g:ce/aHR0cHM6Ly9zLnct/eC5jby8yNDB4MTgw/X3R3Y19kZWZhdWx0/LnBuZw', 'title': '10-Day Weather Forecast for Manhattan, NY - The Weather Channel ...', 'description': 'Be prepared with the most accurate 10-day forecast for Manhattan, NY with highs, lows, chance of precipitation from The Weather Channel and Weather.com'}), Document(page_content='Radar\\nLatest News\\nOur Changing World\\nYour Privacy\\nTo personalize your product experience, we collect data from your device. We also may use or disclose to specific data vendors your precise geolocation data to provide the Services. To learn more please refer to our Privacy Policy.\\nChoose how my information is shared', metadata={'url': 'https://weather.com/weather/tenday/l/New+York+NY+USNY0996:1:US', 'thumbnail_url': 'https://imgs.search.brave.com/9xHc5-Bh2lvLyRJwQqeegm3gzoF6hawlpF8LZEjFLo8/rs:fit:200:200:1/g:ce/aHR0cHM6Ly9zLnct/eC5jby8yNDB4MTgw/X3R3Y19kZWZhdWx0/LnBuZw', 'title': '10-Day Weather Forecast for Manhattan, NY - The Weather Channel ...', 'description': 'Be prepared with the most accurate 10-day forecast for Manhattan, NY with highs, lows, chance of precipitation from The Weather Channel and Weather.com'}), Document(page_content='- Humidity82%\\n- UV Index1 of 11\\n- Sunrise7:18 am\\n- Sunset4:34 pm\\nTue 26 | Night\\nCloudy with light rain developing after midnight. Low 47F. Winds light and variable. Chance of rain 80%.\\n- Humidity90%\\n- UV Index0 of 11\\n- Moonrise4:00 pmFull Moon\\n- Moonset7:17 am\\nWed 2754°/49°93%\\nWed 27\\nWed 27 | Day\\nRain. High 54F. Winds E at 5 to 10 mph. Chance of rain 90%. Rainfall near a half an inch.\\n- Humidity93%\\n- UV Index1 of 11\\n- Sunrise7:18 am\\n- Sunset4:35 pm\\nWed 27 | Night\\nSteady light rain in the evening. Showers continuing late. Low 49F. Winds light and variable. Chance of rain 70%.\\n- Humidity91%\\n- UV Index0 of 11\\n- Moonrise4:59 pmFull Moon\\n- Moonset8:12 am\\nThu 2853°/42°19%\\nThu 28\\nThu 28 | Day\\nCloudy skies early will become partly cloudy later in the day. Slight chance of a rain shower. High 53F. Winds WSW at 5 to 10 mph.\\n- Humidity77%\\n- UV Index1 of 11\\n- Sunrise7:18 am\\n- Sunset4:36 pm\\nThu 28 | Night\\nPartly cloudy skies. Low 42F. Winds W at 5 to 10 mph.\\n- Humidity71%\\n- UV Index0 of 11', metadata={'url': 'https://weather.com/weather/tenday/l/New+York+NY+USNY0996:1:US', 'thumbnail_url': 'https://imgs.search.brave.com/9xHc5-Bh2lvLyRJwQqeegm3gzoF6hawlpF8LZEjFLo8/rs:fit:200:200:1/g:ce/aHR0cHM6Ly9zLnct/eC5jby8yNDB4MTgw/X3R3Y19kZWZhdWx0/LnBuZw', 'title': '10-Day Weather Forecast for Manhattan, NY - The Weather Channel ...', 'description': 'Be prepared with the most accurate 10-day forecast for Manhattan, NY with highs, lows, chance of precipitation from The Weather Channel and Weather.com'}), Document(page_content='- Moonrise2:20 amWaning Crescent\\n- Moonset12:33 pm\\nSun 0740°/29°19%\\nSun 07\\nSun 07 | Day\\nIntervals of clouds and sunshine. High around 40F. Winds NW at 5 to 10 mph.\\n- Humidity57%\\n- UV Index2 of 11\\n- Sunrise7:19 am\\n- Sunset4:44 pm\\nSun 07 | Night\\nA few clouds from time to time. Low 29F. Winds NNW at 5 to 10 mph.\\n- Humidity60%\\n- UV Index0 of 11\\n- Moonrise3:28 amWaning Crescent\\n- Moonset1:04 pm\\nMon 0840°/32°35%\\nMon 08\\nMon 08 | Day\\nPartly cloudy early followed mostly cloudy skies and a few snow showers later in the day. High near 40F. Winds N at 5 to 10 mph. Chance of snow 40%.\\n- UV Index1 of 11\\n- Sunrise7:19 am\\n- Sunset4:45 pm\\nMon 08 | Night\\nVariable clouds with snow showers or flurries. Low 32F. Winds NNE at 5 to 10 mph. Chance of snow 60%. Snow accumulations less than one inch.\\n- UV Index0 of 11\\n- Moonrise4:40 amWaning Crescent\\n- Moonset1:43 pm\\nLatest News\\nOur Changing World\\nYour Privacy', metadata={'url': 'https://weather.com/weather/tenday/l/New+York+NY+USNY0996:1:US', 'thumbnail_url': 'https://imgs.search.brave.com/9xHc5-Bh2lvLyRJwQqeegm3gzoF6hawlpF8LZEjFLo8/rs:fit:200:200:1/g:ce/aHR0cHM6Ly9zLnct/eC5jby8yNDB4MTgw/X3R3Y19kZWZhdWx0/LnBuZw', 'title': '10-Day Weather Forecast for Manhattan, NY - The Weather Channel ...', 'description': 'Be prepared with the most accurate 10-day forecast for Manhattan, NY with highs, lows, chance of precipitation from The Weather Channel and Weather.com'}), Document(page_content='- Humidity91%\\n- UV Index0 of 11\\n- Moonrise5:50 amWaning Crescent\\n- Moonset2:35 pm\\nWed 1056°/39°34%\\nWed 10\\nWed 10 | Day\\nA shower or two possible early with partly cloudy skies in the afternoon. Morning high of 56F with temps falling to near 45. Winds SW at 15 to 25 mph. Chance of rain 30%.\\n- Humidity66%\\n- UV Index1 of 11\\n- Sunrise7:19 am\\n- Sunset4:47 pm\\nWed 10 | Night\\nA few clouds from time to time. Low 39F. Winds WSW at 10 to 20 mph.\\n- Humidity64%\\n- UV Index0 of 11\\n- Moonrise6:56 amWaning Crescent\\n- Moonset3:38 pm\\nThu 1147°/38°5%\\nThu 11\\nThu 11 | Day\\nPartly cloudy. High 47F. Winds WSW at 5 to 10 mph.\\n- Humidity62%\\n- UV Index2 of 11\\n- Sunrise7:19 am\\n- Sunset4:48 pm\\nThu 11 | Night\\nMostly clear skies. Low 38F. Winds W at 5 to 10 mph.\\n- Humidity66%\\n- UV Index0 of 11\\n- Moonrise7:52 amNew Moon\\n- Moonset4:53 pm\\nFri 1248°/42°19%\\nFri 12\\nFri 12 | Day\\nIntervals of clouds and sunshine. High 48F. Winds WSW at 5 to 10 mph.\\n- Humidity62%\\n- UV Index2 of 11\\n- Sunrise7:18 am\\n- Sunset4:49 pm', metadata={'url': 'https://weather.com/weather/tenday/l/New+York+NY+USNY0996:1:US', 'thumbnail_url': 'https://imgs.search.brave.com/9xHc5-Bh2lvLyRJwQqeegm3gzoF6hawlpF8LZEjFLo8/rs:fit:200:200:1/g:ce/aHR0cHM6Ly9zLnct/eC5jby8yNDB4MTgw/X3R3Y19kZWZhdWx0/LnBuZw', 'title': '10-Day Weather Forecast for Manhattan, NY - The Weather Channel ...', 'description': 'Be prepared with the most accurate 10-day forecast for Manhattan, NY with highs, lows, chance of precipitation from The Weather Channel and Weather.com'}), Document(page_content='Sat 1346°/36°53%\\nSat 13\\nSat 13 | Day\\nCloudy with showers. High 46F. Winds WSW at 10 to 15 mph. Chance of rain 50%.\\n- Humidity73%\\n- UV Index1 of 11\\n- Sunrise7:18 am\\n- Sunset4:50 pm\\nSat 13 | Night\\nRain showers early transitioning to snow showers late. Low 36F. Winds W at 10 to 15 mph. Chance of precip 50%.\\n- Humidity70%\\n- UV Index0 of 11\\n- Moonrise9:14 amWaxing Crescent\\n- Moonset7:33 pm\\nSun 1442°/34°37%\\nSun 14\\nSun 14 | Day\\nSnow showers early will transition to a few showers later. High 42F. Winds WSW at 10 to 15 mph. Chance of rain 40%.\\n- Humidity63%\\n- UV Index1 of 11\\n- Sunrise7:18 am\\n- Sunset4:51 pm\\nSun 14 | Night\\nVariable clouds with snow showers. Low 34F. Winds W at 10 to 15 mph. Chance of snow 60%. Snow accumulations less than one inch.\\n- UV Index0 of 11\\n- Moonrise9:44 amWaxing Crescent\\n- Moonset8:52 pm\\nMon 1540°/31°51%\\nMon 15\\nMon 15 | Day', metadata={'url': 'https://weather.com/weather/tenday/l/New+York+NY+USNY0996:1:US', 'thumbnail_url': 'https://imgs.search.brave.com/9xHc5-Bh2lvLyRJwQqeegm3gzoF6hawlpF8LZEjFLo8/rs:fit:200:200:1/g:ce/aHR0cHM6Ly9zLnct/eC5jby8yNDB4MTgw/X3R3Y19kZWZhdWx0/LnBuZw', 'title': '10-Day Weather Forecast for Manhattan, NY - The Weather Channel ...', 'description': 'Be prepared with the most accurate 10-day forecast for Manhattan, NY with highs, lows, chance of precipitation from The Weather Channel and Weather.com'}), Document(page_content='- Humidity70%\\n- UV Index1 of 11\\n- Sunrise7:18 am\\n- Sunset4:34 pm\\nMon 25 | Night\\nOvercast with showers at times. Low 43F. Winds light and variable. Chance of rain 40%.\\n- Humidity80%\\n- UV Index0 of 11\\n- Moonrise3:08 pmWaxing Gibbous\\n- Moonset6:14 am\\nTue 2653°/45°58%\\nTue 26\\nTue 26 | Day\\nOvercast with rain showers at times. High 53F. Winds E at 5 to 10 mph. Chance of rain 60%.\\n- Humidity79%\\n- UV Index1 of 11\\n- Sunrise7:18 am\\n- Sunset4:34 pm\\nTue 26 | Night\\nShowers early then scattered thunderstorms developing late. Low near 45F. Winds ESE at 5 to 10 mph. Chance of rain 60%.\\n- Humidity93%\\n- UV Index0 of 11\\n- Moonrise4:00 pmFull Moon\\n- Moonset7:17 am\\nWed 2751°/41°58%\\nWed 27\\nWed 27 | Day\\nCloudy with showers. High 51F. Winds WSW at 5 to 10 mph. Chance of rain 60%.\\n- Humidity79%\\n- UV Index1 of 11\\n- Sunrise7:18 am\\n- Sunset4:35 pm\\nWed 27 | Night\\nCloudy with showers. Low 41F. Winds NW at 5 to 10 mph. Chance of rain 60%.\\n- Humidity72%\\n- UV Index0 of 11\\n- Moonrise4:59 pmFull Moon\\n- Moonset8:13 am', metadata={'url': 'https://weather.com/weather/tenday/l/New+York+NY+USNY0996:1:US', 'thumbnail_url': 'https://imgs.search.brave.com/9xHc5-Bh2lvLyRJwQqeegm3gzoF6hawlpF8LZEjFLo8/rs:fit:200:200:1/g:ce/aHR0cHM6Ly9zLnct/eC5jby8yNDB4MTgw/X3R3Y19kZWZhdWx0/LnBuZw', 'title': '10-Day Weather Forecast for Manhattan, NY - The Weather Channel ...', 'description': 'Be prepared with the most accurate 10-day forecast for Manhattan, NY with highs, lows, chance of precipitation from The Weather Channel and Weather.com'})]\n" + ] + } + ], + "source": [ + "# .results returns parsed results with each snippet in a Document\n", + "response = utility.results(query=\"What is the weather in NY\")\n", + "\n", + "# .results should have a Document for each `snippet`\n", + "print(len(response))\n", + "\n", + "print(response)" + ] + }, + { + "cell_type": "markdown", + "id": "190ed085", + "metadata": {}, + "source": [ + "## Retriever Usage" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1367af5c", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.retrievers.you import YouRetriever\n", + "\n", + "retriever = YouRetriever(num_web_results=1)\n", + "\n", + "retriever" + ] + }, + { + "cell_type": "code", + "execution_count": 95, "id": "a90d61d4", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "7\n", + "[Document(page_content='10 Day Weather-Manhattan, NY\\nToday43°/39°1%\\nToday\\nSun 31 | Day\\nGenerally cloudy. High 43F. Winds W at 10 to 15 mph.\\n- Humidity54%\\n- UV Index0 of 11\\n- Sunrise7:19 am\\n- Sunset4:38 pm\\nSun 31 | Night\\nCloudy. Low 39F. Winds light and variable.\\n- Humidity70%\\n- UV Index0 of 11\\n- Moonrise9:13 pmWaning Gibbous\\n- Moonset10:28 am\\nMon 0145°/33°7%\\nMon 01\\nMon 01 | Day\\nConsiderable cloudiness. High around 45F. Winds light and variable.\\n- Humidity71%\\n- UV Index1 of 11\\n- Sunrise7:19 am\\n- Sunset4:39 pm\\nMon 01 | Night\\nA few clouds. Low 33F. Winds NNW at 5 to 10 mph.\\n- Humidity64%\\n- UV Index0 of 11\\n- Moonrise10:14 pmWaning Gibbous\\n- Moonset10:49 am\\nTue 0246°/35°4%\\nTue 02\\nTue 02 | Day\\nMainly sunny. High 46F. Winds NW at 5 to 10 mph.\\n- Humidity52%\\n- UV Index2 of 11\\n- Sunrise7:19 am\\n- Sunset4:40 pm\\nTue 02 | Night\\nA few clouds overnight. Low around 35F. Winds W at 5 to 10 mph.\\n- Humidity64%\\n- UV Index0 of 11\\n- Moonrise11:13 pmWaning Gibbous\\n- Moonset11:08 am\\nWed 0346°/38°4%\\nWed 03\\nWed 03 | Day', metadata={'url': 'https://weather.com/weather/tenday/l/New+York+NY+USNY0996:1:US', 'thumbnail_url': 'https://imgs.search.brave.com/9xHc5-Bh2lvLyRJwQqeegm3gzoF6hawlpF8LZEjFLo8/rs:fit:200:200:1/g:ce/aHR0cHM6Ly9zLnct/eC5jby8yNDB4MTgw/X3R3Y19kZWZhdWx0/LnBuZw', 'title': '10-Day Weather Forecast for Manhattan, NY - The Weather Channel ...', 'description': 'Be prepared with the most accurate 10-day forecast for Manhattan, NY with highs, lows, chance of precipitation from The Weather Channel and Weather.com'}), Document(page_content='Radar\\nLatest News\\nOur Changing World\\nYour Privacy\\nTo personalize your product experience, we collect data from your device. We also may use or disclose to specific data vendors your precise geolocation data to provide the Services. To learn more please refer to our Privacy Policy.\\nChoose how my information is shared', metadata={'url': 'https://weather.com/weather/tenday/l/New+York+NY+USNY0996:1:US', 'thumbnail_url': 'https://imgs.search.brave.com/9xHc5-Bh2lvLyRJwQqeegm3gzoF6hawlpF8LZEjFLo8/rs:fit:200:200:1/g:ce/aHR0cHM6Ly9zLnct/eC5jby8yNDB4MTgw/X3R3Y19kZWZhdWx0/LnBuZw', 'title': '10-Day Weather Forecast for Manhattan, NY - The Weather Channel ...', 'description': 'Be prepared with the most accurate 10-day forecast for Manhattan, NY with highs, lows, chance of precipitation from The Weather Channel and Weather.com'}), Document(page_content='- Humidity82%\\n- UV Index1 of 11\\n- Sunrise7:18 am\\n- Sunset4:34 pm\\nTue 26 | Night\\nCloudy with light rain developing after midnight. Low 47F. Winds light and variable. Chance of rain 80%.\\n- Humidity90%\\n- UV Index0 of 11\\n- Moonrise4:00 pmFull Moon\\n- Moonset7:17 am\\nWed 2754°/49°93%\\nWed 27\\nWed 27 | Day\\nRain. High 54F. Winds E at 5 to 10 mph. Chance of rain 90%. Rainfall near a half an inch.\\n- Humidity93%\\n- UV Index1 of 11\\n- Sunrise7:18 am\\n- Sunset4:35 pm\\nWed 27 | Night\\nSteady light rain in the evening. Showers continuing late. Low 49F. Winds light and variable. Chance of rain 70%.\\n- Humidity91%\\n- UV Index0 of 11\\n- Moonrise4:59 pmFull Moon\\n- Moonset8:12 am\\nThu 2853°/42°19%\\nThu 28\\nThu 28 | Day\\nCloudy skies early will become partly cloudy later in the day. Slight chance of a rain shower. High 53F. Winds WSW at 5 to 10 mph.\\n- Humidity77%\\n- UV Index1 of 11\\n- Sunrise7:18 am\\n- Sunset4:36 pm\\nThu 28 | Night\\nPartly cloudy skies. Low 42F. Winds W at 5 to 10 mph.\\n- Humidity71%\\n- UV Index0 of 11', metadata={'url': 'https://weather.com/weather/tenday/l/New+York+NY+USNY0996:1:US', 'thumbnail_url': 'https://imgs.search.brave.com/9xHc5-Bh2lvLyRJwQqeegm3gzoF6hawlpF8LZEjFLo8/rs:fit:200:200:1/g:ce/aHR0cHM6Ly9zLnct/eC5jby8yNDB4MTgw/X3R3Y19kZWZhdWx0/LnBuZw', 'title': '10-Day Weather Forecast for Manhattan, NY - The Weather Channel ...', 'description': 'Be prepared with the most accurate 10-day forecast for Manhattan, NY with highs, lows, chance of precipitation from The Weather Channel and Weather.com'}), Document(page_content='- Moonrise2:20 amWaning Crescent\\n- Moonset12:33 pm\\nSun 0740°/29°19%\\nSun 07\\nSun 07 | Day\\nIntervals of clouds and sunshine. High around 40F. Winds NW at 5 to 10 mph.\\n- Humidity57%\\n- UV Index2 of 11\\n- Sunrise7:19 am\\n- Sunset4:44 pm\\nSun 07 | Night\\nA few clouds from time to time. Low 29F. Winds NNW at 5 to 10 mph.\\n- Humidity60%\\n- UV Index0 of 11\\n- Moonrise3:28 amWaning Crescent\\n- Moonset1:04 pm\\nMon 0840°/32°35%\\nMon 08\\nMon 08 | Day\\nPartly cloudy early followed mostly cloudy skies and a few snow showers later in the day. High near 40F. Winds N at 5 to 10 mph. Chance of snow 40%.\\n- UV Index1 of 11\\n- Sunrise7:19 am\\n- Sunset4:45 pm\\nMon 08 | Night\\nVariable clouds with snow showers or flurries. Low 32F. Winds NNE at 5 to 10 mph. Chance of snow 60%. Snow accumulations less than one inch.\\n- UV Index0 of 11\\n- Moonrise4:40 amWaning Crescent\\n- Moonset1:43 pm\\nLatest News\\nOur Changing World\\nYour Privacy', metadata={'url': 'https://weather.com/weather/tenday/l/New+York+NY+USNY0996:1:US', 'thumbnail_url': 'https://imgs.search.brave.com/9xHc5-Bh2lvLyRJwQqeegm3gzoF6hawlpF8LZEjFLo8/rs:fit:200:200:1/g:ce/aHR0cHM6Ly9zLnct/eC5jby8yNDB4MTgw/X3R3Y19kZWZhdWx0/LnBuZw', 'title': '10-Day Weather Forecast for Manhattan, NY - The Weather Channel ...', 'description': 'Be prepared with the most accurate 10-day forecast for Manhattan, NY with highs, lows, chance of precipitation from The Weather Channel and Weather.com'}), Document(page_content='- Humidity91%\\n- UV Index0 of 11\\n- Moonrise5:50 amWaning Crescent\\n- Moonset2:35 pm\\nWed 1056°/39°34%\\nWed 10\\nWed 10 | Day\\nA shower or two possible early with partly cloudy skies in the afternoon. Morning high of 56F with temps falling to near 45. Winds SW at 15 to 25 mph. Chance of rain 30%.\\n- Humidity66%\\n- UV Index1 of 11\\n- Sunrise7:19 am\\n- Sunset4:47 pm\\nWed 10 | Night\\nA few clouds from time to time. Low 39F. Winds WSW at 10 to 20 mph.\\n- Humidity64%\\n- UV Index0 of 11\\n- Moonrise6:56 amWaning Crescent\\n- Moonset3:38 pm\\nThu 1147°/38°5%\\nThu 11\\nThu 11 | Day\\nPartly cloudy. High 47F. Winds WSW at 5 to 10 mph.\\n- Humidity62%\\n- UV Index2 of 11\\n- Sunrise7:19 am\\n- Sunset4:48 pm\\nThu 11 | Night\\nMostly clear skies. Low 38F. Winds W at 5 to 10 mph.\\n- Humidity66%\\n- UV Index0 of 11\\n- Moonrise7:52 amNew Moon\\n- Moonset4:53 pm\\nFri 1248°/42°19%\\nFri 12\\nFri 12 | Day\\nIntervals of clouds and sunshine. High 48F. Winds WSW at 5 to 10 mph.\\n- Humidity62%\\n- UV Index2 of 11\\n- Sunrise7:18 am\\n- Sunset4:49 pm', metadata={'url': 'https://weather.com/weather/tenday/l/New+York+NY+USNY0996:1:US', 'thumbnail_url': 'https://imgs.search.brave.com/9xHc5-Bh2lvLyRJwQqeegm3gzoF6hawlpF8LZEjFLo8/rs:fit:200:200:1/g:ce/aHR0cHM6Ly9zLnct/eC5jby8yNDB4MTgw/X3R3Y19kZWZhdWx0/LnBuZw', 'title': '10-Day Weather Forecast for Manhattan, NY - The Weather Channel ...', 'description': 'Be prepared with the most accurate 10-day forecast for Manhattan, NY with highs, lows, chance of precipitation from The Weather Channel and Weather.com'}), Document(page_content='Sat 1346°/36°53%\\nSat 13\\nSat 13 | Day\\nCloudy with showers. High 46F. Winds WSW at 10 to 15 mph. Chance of rain 50%.\\n- Humidity73%\\n- UV Index1 of 11\\n- Sunrise7:18 am\\n- Sunset4:50 pm\\nSat 13 | Night\\nRain showers early transitioning to snow showers late. Low 36F. Winds W at 10 to 15 mph. Chance of precip 50%.\\n- Humidity70%\\n- UV Index0 of 11\\n- Moonrise9:14 amWaxing Crescent\\n- Moonset7:33 pm\\nSun 1442°/34°37%\\nSun 14\\nSun 14 | Day\\nSnow showers early will transition to a few showers later. High 42F. Winds WSW at 10 to 15 mph. Chance of rain 40%.\\n- Humidity63%\\n- UV Index1 of 11\\n- Sunrise7:18 am\\n- Sunset4:51 pm\\nSun 14 | Night\\nVariable clouds with snow showers. Low 34F. Winds W at 10 to 15 mph. Chance of snow 60%. Snow accumulations less than one inch.\\n- UV Index0 of 11\\n- Moonrise9:44 amWaxing Crescent\\n- Moonset8:52 pm\\nMon 1540°/31°51%\\nMon 15\\nMon 15 | Day', metadata={'url': 'https://weather.com/weather/tenday/l/New+York+NY+USNY0996:1:US', 'thumbnail_url': 'https://imgs.search.brave.com/9xHc5-Bh2lvLyRJwQqeegm3gzoF6hawlpF8LZEjFLo8/rs:fit:200:200:1/g:ce/aHR0cHM6Ly9zLnct/eC5jby8yNDB4MTgw/X3R3Y19kZWZhdWx0/LnBuZw', 'title': '10-Day Weather Forecast for Manhattan, NY - The Weather Channel ...', 'description': 'Be prepared with the most accurate 10-day forecast for Manhattan, NY with highs, lows, chance of precipitation from The Weather Channel and Weather.com'}), Document(page_content='- Humidity70%\\n- UV Index1 of 11\\n- Sunrise7:18 am\\n- Sunset4:34 pm\\nMon 25 | Night\\nOvercast with showers at times. Low 43F. Winds light and variable. Chance of rain 40%.\\n- Humidity80%\\n- UV Index0 of 11\\n- Moonrise3:08 pmWaxing Gibbous\\n- Moonset6:14 am\\nTue 2653°/45°58%\\nTue 26\\nTue 26 | Day\\nOvercast with rain showers at times. High 53F. Winds E at 5 to 10 mph. Chance of rain 60%.\\n- Humidity79%\\n- UV Index1 of 11\\n- Sunrise7:18 am\\n- Sunset4:34 pm\\nTue 26 | Night\\nShowers early then scattered thunderstorms developing late. Low near 45F. Winds ESE at 5 to 10 mph. Chance of rain 60%.\\n- Humidity93%\\n- UV Index0 of 11\\n- Moonrise4:00 pmFull Moon\\n- Moonset7:17 am\\nWed 2751°/41°58%\\nWed 27\\nWed 27 | Day\\nCloudy with showers. High 51F. Winds WSW at 5 to 10 mph. Chance of rain 60%.\\n- Humidity79%\\n- UV Index1 of 11\\n- Sunrise7:18 am\\n- Sunset4:35 pm\\nWed 27 | Night\\nCloudy with showers. Low 41F. Winds NW at 5 to 10 mph. Chance of rain 60%.\\n- Humidity72%\\n- UV Index0 of 11\\n- Moonrise4:59 pmFull Moon\\n- Moonset8:13 am', metadata={'url': 'https://weather.com/weather/tenday/l/New+York+NY+USNY0996:1:US', 'thumbnail_url': 'https://imgs.search.brave.com/9xHc5-Bh2lvLyRJwQqeegm3gzoF6hawlpF8LZEjFLo8/rs:fit:200:200:1/g:ce/aHR0cHM6Ly9zLnct/eC5jby8yNDB4MTgw/X3R3Y19kZWZhdWx0/LnBuZw', 'title': '10-Day Weather Forecast for Manhattan, NY - The Weather Channel ...', 'description': 'Be prepared with the most accurate 10-day forecast for Manhattan, NY with highs, lows, chance of precipitation from The Weather Channel and Weather.com'})]\n" + ] + } + ], "source": [ - "from langchain.chains import RetrievalQA\n", - "from langchain.retrievers.you_retriever import YouRetriever\n", - "from langchain_openai import OpenAI\n", + "# .invoke wraps utility.results\n", + "response = retriever.invoke(\"What is the weather in NY\")\n", "\n", - "yr = YouRetriever()\n", - "qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type=\"map_reduce\", retriever=yr)" + "# .invoke should have a Document for each `snippet`\n", + "print(len(response))\n", + "\n", + "print(response)" + ] + }, + { + "cell_type": "markdown", + "id": "aac4a1f9", + "metadata": {}, + "source": [ + "## Chaining" ] }, { "cell_type": "code", "execution_count": null, - "id": "4a223f2f", + "id": "21559e10", "metadata": {}, "outputs": [], "source": [ - "query = \"what starting ohio state quarterback most recently went their entire college career without beating Michigan?\"\n", - "qa.run(query)" + "# you need a model to use in the chain\n", + "!pip install --upgrade --quiet langchain-openai" + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "id": "655e0fec-c831-4efe-a47b-d3bb5c5a81ce", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.retrievers.you import YouRetriever\n", + "from langchain_core.output_parsers import StrOutputParser\n", + "from langchain_core.prompts import ChatPromptTemplate\n", + "from langchain_core.runnables import RunnablePassthrough\n", + "from langchain_openai import ChatOpenAI\n", + "\n", + "# set up runnable\n", + "runnable = RunnablePassthrough\n", + "\n", + "# set up retriever, limit sources to one\n", + "retriever = YouRetriever(num_web_results=1)\n", + "\n", + "# set up model\n", + "model = ChatOpenAI(model=\"gpt-3.5-turbo-16k\")\n", + "\n", + "# set up output parser\n", + "output_parser = StrOutputParser()" + ] + }, + { + "cell_type": "markdown", + "id": "47e1fcca", + "metadata": {}, + "source": [ + "### Invoke" + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "id": "6e92557d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The weather in New York City today is 43° with a high/low of --/39°. The wind is 3 mph, humidity is 63%, and the air quality is considered good.\n" + ] + } + ], + "source": [ + "# set up prompt that expects one question\n", + "prompt = ChatPromptTemplate.from_template(\n", + " \"\"\"Answer the question based only on the context provided.\n", + "\n", + "Context: {context}\n", + "\n", + "Question: {question}\"\"\"\n", + ")\n", + "\n", + "# set up chain\n", + "chain = (\n", + " runnable.assign(context=(lambda x: x[\"question\"]) | retriever)\n", + " | prompt\n", + " | model\n", + " | output_parser\n", + ")\n", + "\n", + "output = chain.invoke({\"question\": \"what is the weather in NY today\"})\n", + "\n", + "print(output)" + ] + }, + { + "cell_type": "markdown", + "id": "66b15f94", + "metadata": {}, + "source": [ + "### Stream" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "id": "cfe5af8b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The weather in New York City today is a high of 39°F and a low of 31°F with a feels like temperature of 43°F. The wind speed is 3 mph, humidity is 63%, and the air quality is considered to be good." + ] + } + ], + "source": [ + "# set up prompt that expects one question\n", + "prompt = ChatPromptTemplate.from_template(\n", + " \"\"\"Answer the question based only on the context provided.\n", + "\n", + "Context: {context}\n", + "\n", + "Question: {question}\"\"\"\n", + ")\n", + "\n", + "# set up chain - same as above\n", + "chain = (\n", + " runnable.assign(context=(lambda x: x[\"question\"]) | retriever)\n", + " | prompt\n", + " | model\n", + " | output_parser\n", + ")\n", + "\n", + "for s in chain.stream({\"question\": \"what is the weather in NY today\"}):\n", + " print(s, end=\"\", flush=True)" + ] + }, + { + "cell_type": "markdown", + "id": "28ee9450", + "metadata": {}, + "source": [ + "### Batch" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "id": "a8d8270b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Based on the provided context, the weather in New York City today is 43° with a high/low of --/39°.\n", + "Based on the provided context, the current weather in San Francisco is partly cloudy with a temperature of 61°F and a humidity of 57%.\n" + ] + } + ], + "source": [ + "chain = (\n", + " runnable.assign(context=(lambda x: x[\"question\"]) | retriever)\n", + " | prompt\n", + " | model\n", + " | output_parser\n", + ")\n", + "\n", + "output = chain.batch(\n", + " [\n", + " {\"question\": \"what is the weather in NY today\"},\n", + " {\"question\": \"what is the weather in sf today\"},\n", + " ]\n", + ")\n", + "\n", + "for o in output:\n", + " print(o)" ] } ], @@ -54,7 +411,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.17" + "version": "3.9.6" } }, "nbformat": 4, diff --git a/libs/community/langchain_community/retrievers/__init__.py b/libs/community/langchain_community/retrievers/__init__.py index a28c3ede92e..e7130931c1a 100644 --- a/libs/community/langchain_community/retrievers/__init__.py +++ b/libs/community/langchain_community/retrievers/__init__.py @@ -70,6 +70,7 @@ from langchain_community.retrievers.weaviate_hybrid_search import ( WeaviateHybridSearchRetriever, ) from langchain_community.retrievers.wikipedia import WikipediaRetriever +from langchain_community.retrievers.you import YouRetriever from langchain_community.retrievers.zep import ZepRetriever from langchain_community.retrievers.zilliz import ZillizRetriever @@ -79,6 +80,7 @@ __all__ = [ "ArceeRetriever", "ArxivRetriever", "AzureCognitiveSearchRetriever", + "BM25Retriever", "BreebsRetriever", "ChatGPTPluginRetriever", "ChaindeskRetriever", @@ -103,10 +105,10 @@ __all__ = [ "SVMRetriever", "TavilySearchAPIRetriever", "TFIDFRetriever", - "BM25Retriever", "VespaRetriever", "WeaviateHybridSearchRetriever", "WikipediaRetriever", + "YouRetriever", "ZepRetriever", "ZillizRetriever", "DocArrayRetriever", diff --git a/libs/community/langchain_community/retrievers/you.py b/libs/community/langchain_community/retrievers/you.py index b65f2aad78f..9564e7307ea 100644 --- a/libs/community/langchain_community/retrievers/you.py +++ b/libs/community/langchain_community/retrievers/you.py @@ -1,64 +1,23 @@ -from typing import Any, Dict, List, Optional +from typing import Any, List from langchain_core.callbacks import CallbackManagerForRetrieverRun from langchain_core.documents import Document -from langchain_core.pydantic_v1 import root_validator from langchain_core.retrievers import BaseRetriever -from langchain_core.utils import get_from_dict_or_env + +from langchain_community.utilities import YouSearchAPIWrapper -class YouRetriever(BaseRetriever): +class YouRetriever(BaseRetriever, YouSearchAPIWrapper): """`You` retriever that uses You.com's search API. - - To connect to the You.com api requires an API key which - you can get by emailing api@you.com. - You can check out our docs at https://documentation.you.com. - - You need to set the environment variable `YDC_API_KEY` for retriever to operate. + It wraps results() to get_relevant_documents + It uses all YouSearchAPIWrapper arguments without any change. """ - ydc_api_key: str - k: Optional[int] = None - n_hits: Optional[int] = None - n_snippets_per_hit: Optional[int] = None - endpoint_type: str = "web" - - @root_validator(pre=True) - def validate_client( - cls, - values: Dict[str, Any], - ) -> Dict[str, Any]: - values["ydc_api_key"] = get_from_dict_or_env( - values, "ydc_api_key", "YDC_API_KEY" - ) - return values - def _get_relevant_documents( - self, query: str, *, run_manager: CallbackManagerForRetrieverRun + self, + query: str, + *, + run_manager: CallbackManagerForRetrieverRun, + **kwargs: Any, ) -> List[Document]: - import requests - - headers = {"X-API-Key": self.ydc_api_key} - if self.endpoint_type == "web": - results = requests.get( - f"https://api.ydc-index.io/search?query={query}", - headers=headers, - ).json() - - docs = [] - n_hits = self.n_hits or len(results["hits"]) - for hit in results["hits"][:n_hits]: - n_snippets_per_hit = self.n_snippets_per_hit or len(hit["snippets"]) - for snippet in hit["snippets"][:n_snippets_per_hit]: - docs.append(Document(page_content=snippet)) - if self.k is not None and len(docs) >= self.k: - return docs - return docs - elif self.endpoint_type == "snippet": - results = requests.get( - f"https://api.ydc-index.io/snippet_search?query={query}", - headers=headers, - ).json() - return [Document(page_content=snippet) for snippet in results] - else: - raise RuntimeError(f"Invalid endpoint type provided {self.endpoint_type}") + return self.results(query, run_manager=run_manager.get_child(), **kwargs) diff --git a/libs/community/langchain_community/utilities/__init__.py b/libs/community/langchain_community/utilities/__init__.py index 2af2c51ac6c..b03fa2585d7 100644 --- a/libs/community/langchain_community/utilities/__init__.py +++ b/libs/community/langchain_community/utilities/__init__.py @@ -248,6 +248,12 @@ def _import_twilio() -> Any: return TwilioAPIWrapper +def _import_you() -> Any: + from langchain_community.utilities.you import YouSearchAPIWrapper + + return YouSearchAPIWrapper + + def _import_wikipedia() -> Any: from langchain_community.utilities.wikipedia import WikipediaAPIWrapper @@ -377,6 +383,8 @@ def __getattr__(name: str) -> Any: return _import_tensorflow_datasets() elif name == "TwilioAPIWrapper": return _import_twilio() + elif name == "YouSearchAPIWrapper": + return _import_you() elif name == "WikipediaAPIWrapper": return _import_wikipedia() elif name == "WolframAlphaAPIWrapper": @@ -434,6 +442,7 @@ __all__ = [ "TensorflowDatasets", "TextRequestsWrapper", "TwilioAPIWrapper", + "YouSearchAPIWrapper", "WikipediaAPIWrapper", "WolframAlphaAPIWrapper", "ZapierNLAWrapper", diff --git a/libs/community/langchain_community/utilities/you.py b/libs/community/langchain_community/utilities/you.py new file mode 100644 index 00000000000..c01a1ed37a1 --- /dev/null +++ b/libs/community/langchain_community/utilities/you.py @@ -0,0 +1,230 @@ +"""Util that calls you.com Search API. + +In order to set this up, follow instructions at: +""" +import json +from typing import Any, Dict, List, Literal, Optional + +import aiohttp +import requests +from langchain_core.documents import Document +from langchain_core.pydantic_v1 import BaseModel, Field, root_validator +from langchain_core.utils import get_from_dict_or_env + +YOU_API_URL = "https://api.ydc-index.io" + + +class YouHitMetadata(BaseModel): + """Metadata on a single hit from you.com""" + + title: str = Field(description="The title of the result") + url: str = Field(description="The url of the result") + thumbnail_url: str = Field(description="Thumbnail associated with the result") + description: str = Field(description="Details about the result") + + +class YouHit(YouHitMetadata): + """A single hit from you.com, which may contain multiple snippets""" + + snippets: List[str] = Field(description="One or snippets of text") + + +class YouAPIOutput(BaseModel): + """The output from you.com api""" + + hits: List[YouHit] = Field( + description="A list of dictionaries containing the results" + ) + + +class YouDocument(BaseModel): + """The output of parsing one snippet""" + + page_content: str = Field(description="One snippet of text") + metadata: YouHitMetadata + + +class YouSearchAPIWrapper(BaseModel): + """Wrapper for you.com Search API. + + To connect to the You.com api requires an API key which + you can get at https://api.you.com. + You can check out the docs at https://documentation.you.com. + + You need to set the environment variable `YDC_API_KEY` for retriever to operate. + + Attributes + ---------- + ydc_api_key: str, optional + you.com api key, if YDC_API_KEY is not set in the environment + num_web_results: int, optional + The max number of web results to return, must be under 20 + safesearch: str, optional + Safesearch settings, one of off, moderate, strict, defaults to moderate + country: str, optional + Country code, ex: 'US' for united states, see api docs for list + k: int, optional + max number of Documents to return using `results()` + n_hits: int, optional, deprecated + Alias for num_web_results + n_snippets_per_hit: int, optional + limit the number of snippets returned per hit + endpoint_type: str, optional + you.com endpoints: search, news, rag; + `web` and `snippet` alias `search` + `rag` returns `{'message': 'Forbidden'}` + @todo `news` endpoint + """ + + ydc_api_key: Optional[str] = None + num_web_results: Optional[int] = None + safesearch: Optional[str] = None + country: Optional[str] = None + k: Optional[int] = None + n_snippets_per_hit: Optional[int] = None + # @todo deprecate `snippet`, not part of API + endpoint_type: Literal["search", "news", "rag", "snippet"] = "search" + # should deprecate n_hits + n_hits: Optional[int] = None + + @root_validator(pre=True) + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key exists in environment.""" + ydc_api_key = get_from_dict_or_env(values, "ydc_api_key", "YDC_API_KEY") + values["ydc_api_key"] = ydc_api_key + + return values + + def _parse_results(self, raw_search_results: Dict) -> List[Document]: + """ + Extracts snippets from each hit and puts them in a Document + Parameters: + raw_search_results: A dict containing list of hits + Returns: + List[YouDocument]: A dictionary of parsed results + """ + + # return news results + if self.endpoint_type == "news": + return [ + Document(page_content=result["description"], metadata=result) + for result in raw_search_results["news"]["results"] + ] + + docs = [] + for hit in raw_search_results["hits"]: + n_snippets_per_hit = self.n_snippets_per_hit or len(hit["snippets"]) + for snippet in hit["snippets"][:n_snippets_per_hit]: + docs.append( + Document( + page_content=snippet, + metadata={ + "url": hit["url"], + "thumbnail_url": hit["thumbnail_url"], + "title": hit["title"], + "description": hit["description"], + }, + ) + ) + if self.k is not None and len(docs) >= self.k: + return docs + return docs + + def raw_results( + self, + query: str, + **kwargs: Any, + ) -> Dict: + """Run query through you.com Search and return hits. + + Args: + query: The query to search for. + num_web_results: The maximum number of results to return. + safesearch: Safesearch settings, + one of off, moderate, strict, defaults to moderate + country: Country code + Returns: YouAPIOutput + """ + headers = {"X-API-Key": self.ydc_api_key or ""} + params = { + "query": query, + "num_web_results": self.num_web_results, + "safesearch": self.safesearch, + "country": self.country, + **kwargs, + } + + params = {k: v for k, v in params.items() if v is not None} + # news endpoint expects `q` instead of `query` + if self.endpoint_type == "news": + params["q"] = params["query"] + del params["query"] + + # @todo deprecate `snippet`, not part of API + if self.endpoint_type == "snippet": + self.endpoint_type = "search" + response = requests.get( + # type: ignore + f"{YOU_API_URL}/{self.endpoint_type}", + params=params, + headers=headers, + ) + response.raise_for_status() + return response.json() + + def results( + self, + query: str, + **kwargs: Any, + ) -> List[Document]: + """Run query through you.com Search and parses results into Documents.""" + + raw_search_results = self.raw_results( + query, + **{key: value for key, value in kwargs.items() if value is not None}, + ) + return self._parse_results(raw_search_results) + + async def raw_results_async( + self, + query: str, + num_web_results: Optional[int] = 5, + safesearch: Optional[str] = "moderate", + country: Optional[str] = "US", + ) -> Dict: + """Get results from the you.com Search API asynchronously.""" + + # Function to perform the API call + async def fetch() -> str: + params = { + "query": query, + "num_web_results": num_web_results, + "safesearch": safesearch, + "country": country, + } + async with aiohttp.ClientSession() as session: + async with session.post(f"{YOU_API_URL}/search", json=params) as res: + if res.status == 200: + data = await res.text() + return data + else: + raise Exception(f"Error {res.status}: {res.reason}") + + results_json_str = await fetch() + return json.loads(results_json_str) + + async def results_async( + self, + query: str, + num_web_results: Optional[int] = 5, + safesearch: Optional[str] = "moderate", + country: Optional[str] = "US", + ) -> List[Document]: + results_json = await self.raw_results_async( + query=query, + num_web_results=num_web_results, + safesearch=safesearch, + country=country, + ) + + return self._parse_results(results_json["results"]) diff --git a/libs/community/tests/unit_tests/retrievers/test_imports.py b/libs/community/tests/unit_tests/retrievers/test_imports.py index d9acc880387..d13bfe28810 100644 --- a/libs/community/tests/unit_tests/retrievers/test_imports.py +++ b/libs/community/tests/unit_tests/retrievers/test_imports.py @@ -34,6 +34,7 @@ EXPECTED_ALL = [ "VespaRetriever", "WeaviateHybridSearchRetriever", "WikipediaRetriever", + "YouRetriever", "ZepRetriever", "ZillizRetriever", "DocArrayRetriever", diff --git a/libs/community/tests/unit_tests/retrievers/test_you.py b/libs/community/tests/unit_tests/retrievers/test_you.py index 0d5acfeb693..dbc8cc65091 100644 --- a/libs/community/tests/unit_tests/retrievers/test_you.py +++ b/libs/community/tests/unit_tests/retrievers/test_you.py @@ -1,26 +1,72 @@ -import json -import os -from unittest import mock - -from langchain_core.documents import Document -from requests import Response +import responses from langchain_community.retrievers.you import YouRetriever +from ..utilities.test_you import ( + LIMITED_PARSED_OUTPUT, + MOCK_PARSED_OUTPUT, + MOCK_RESPONSE_RAW, + NEWS_RESPONSE_PARSED, + NEWS_RESPONSE_RAW, + TEST_ENDPOINT, +) + class TestYouRetriever: + @responses.activate def test_get_relevant_documents(self) -> None: - os.environ["YDC_API_KEY"] = "MOCK KEY!" - retriever = YouRetriever() + responses.add( + responses.GET, f"{TEST_ENDPOINT}/search", json=MOCK_RESPONSE_RAW, status=200 + ) + query = "Test query text" + you_wrapper = YouRetriever(ydc_api_key="test") + results = you_wrapper.get_relevant_documents(query) + expected_result = MOCK_PARSED_OUTPUT + assert results == expected_result - with mock.patch("requests.get") as mock_get: - fixture = {"hits": [{"snippets": ["yo"]}, {"snippets": ["bird up"]}]} - response = Response() - response._content = bytes(json.dumps(fixture).encode("utf-8")) - mock_get.return_value = response + @responses.activate + def test_invoke(self) -> None: + responses.add( + responses.GET, f"{TEST_ENDPOINT}/search", json=MOCK_RESPONSE_RAW, status=200 + ) + query = "Test query text" + you_wrapper = YouRetriever(ydc_api_key="test") + results = you_wrapper.invoke(query) + expected_result = MOCK_PARSED_OUTPUT + assert results == expected_result - actual = retriever.get_relevant_documents("test") - assert actual == [ - Document(page_content="yo"), - Document(page_content="bird up"), - ] + @responses.activate + def test_invoke_max_docs(self) -> None: + responses.add( + responses.GET, f"{TEST_ENDPOINT}/search", json=MOCK_RESPONSE_RAW, status=200 + ) + query = "Test query text" + you_wrapper = YouRetriever(k=2, ydc_api_key="test") + results = you_wrapper.invoke(query) + expected_result = [MOCK_PARSED_OUTPUT[0], MOCK_PARSED_OUTPUT[1]] + assert results == expected_result + + @responses.activate + def test_invoke_limit_snippets(self) -> None: + responses.add( + responses.GET, f"{TEST_ENDPOINT}/search", json=MOCK_RESPONSE_RAW, status=200 + ) + + query = "Test query text" + you_wrapper = YouRetriever(n_snippets_per_hit=1, ydc_api_key="test") + results = you_wrapper.results(query) + expected_result = LIMITED_PARSED_OUTPUT + assert results == expected_result + + @responses.activate + def test_invoke_news(self) -> None: + responses.add( + responses.GET, f"{TEST_ENDPOINT}/news", json=NEWS_RESPONSE_RAW, status=200 + ) + + query = "Test news text" + # ensure limit on number of docs returned + you_wrapper = YouRetriever(endpoint_type="news", ydc_api_key="test") + results = you_wrapper.results(query) + expected_result = NEWS_RESPONSE_PARSED + assert results == expected_result diff --git a/libs/community/tests/unit_tests/utilities/test_imports.py b/libs/community/tests/unit_tests/utilities/test_imports.py index a7bd210b4f9..91d1712be3c 100644 --- a/libs/community/tests/unit_tests/utilities/test_imports.py +++ b/libs/community/tests/unit_tests/utilities/test_imports.py @@ -48,6 +48,7 @@ EXPECTED_ALL = [ "TwilioAPIWrapper", "WikipediaAPIWrapper", "WolframAlphaAPIWrapper", + "YouSearchAPIWrapper", "ZapierNLAWrapper", "MerriamWebsterAPIWrapper", ] diff --git a/libs/community/tests/unit_tests/utilities/test_you.py b/libs/community/tests/unit_tests/utilities/test_you.py new file mode 100644 index 00000000000..adc004ad438 --- /dev/null +++ b/libs/community/tests/unit_tests/utilities/test_you.py @@ -0,0 +1,190 @@ +from typing import Any, Dict, List, Optional, Union + +import responses +from langchain_core.documents import Document + +from langchain_community.utilities.you import YouSearchAPIWrapper + +TEST_ENDPOINT = "https://api.ydc-index.io" + +# Mock you.com response for testing +MOCK_RESPONSE_RAW: Dict[str, List[Dict[str, Union[str, List[str]]]]] = { + "hits": [ + { + "description": "Test description", + "snippets": ["yo", "bird up"], + "thumbnail_url": "https://example.com/image.gif", + "title": "Test title 1", + "url": "https://example.com/article.html", + }, + { + "description": "Test description 2", + "snippets": ["worst show", "on tv"], + "thumbnail_url": "https://example.com/image2.gif", + "title": "Test title 2", + "url": "https://example.com/article2.html", + }, + ] +} + + +def generate_parsed_metadata(num: Optional[int] = 0) -> Dict[Any, Any]: + """generate metadata for testing""" + if num is None: + num = 0 + hit: Dict[str, Union[str, List[str]]] = MOCK_RESPONSE_RAW["hits"][num] + return { + "url": hit["url"], + "thumbnail_url": hit["thumbnail_url"], + "title": hit["title"], + "description": hit["description"], + } + + +def generate_parsed_output(num: Optional[int] = 0) -> List[Document]: + """generate parsed output for testing""" + if num is None: + num = 0 + hit: Dict[str, Union[str, List[str]]] = MOCK_RESPONSE_RAW["hits"][num] + output = [] + for snippit in hit["snippets"]: + doc = Document(page_content=snippit, metadata=generate_parsed_metadata(num)) + output.append(doc) + return output + + +# Mock results after parsing +MOCK_PARSED_OUTPUT = generate_parsed_output() +MOCK_PARSED_OUTPUT.extend(generate_parsed_output(1)) +# Single-snippet +LIMITED_PARSED_OUTPUT = [] +LIMITED_PARSED_OUTPUT.append(generate_parsed_output()[0]) +LIMITED_PARSED_OUTPUT.append(generate_parsed_output(1)[0]) + +# copied from you api docs +NEWS_RESPONSE_RAW = { + "news": { + "results": [ + { + "age": "18 hours ago", + "breaking": True, + "description": "Search on YDC for the news", + "meta_url": { + "hostname": "www.reuters.com", + "netloc": "reuters.com", + "path": "› 2023 › 10 › 18 › politics › inflation › index.html", + "scheme": "https", + }, + "page_age": "2 days", + "page_fetched": "2023-10-12T23:00:00Z", + "thumbnail": {"original": "https://reuters.com/news.jpg"}, + "title": "Breaking News about the World's Greatest Search Engine!", + "type": "news", + "url": "https://news.you.com", + } + ] + } +} + +NEWS_RESPONSE_PARSED = [ + Document(page_content=str(result["description"]), metadata=result) + for result in NEWS_RESPONSE_RAW["news"]["results"] +] + + +@responses.activate +def test_raw_results() -> None: + responses.add( + responses.GET, f"{TEST_ENDPOINT}/search", json=MOCK_RESPONSE_RAW, status=200 + ) + + query = "Test query text" + # ensure default endpoint_type + you_wrapper = YouSearchAPIWrapper(endpoint_type="snippet", ydc_api_key="test") + raw_results = you_wrapper.raw_results(query) + expected_result = MOCK_RESPONSE_RAW + assert raw_results == expected_result + + +@responses.activate +def test_raw_results_defaults() -> None: + responses.add( + responses.GET, f"{TEST_ENDPOINT}/search", json=MOCK_RESPONSE_RAW, status=200 + ) + + query = "Test query text" + # ensure limit on number of docs returned + you_wrapper = YouSearchAPIWrapper(ydc_api_key="test") + raw_results = you_wrapper.raw_results(query) + expected_result = MOCK_RESPONSE_RAW + assert raw_results == expected_result + + +@responses.activate +def test_raw_results_news() -> None: + responses.add( + responses.GET, f"{TEST_ENDPOINT}/news", json=NEWS_RESPONSE_RAW, status=200 + ) + + query = "Test news text" + # ensure limit on number of docs returned + you_wrapper = YouSearchAPIWrapper(endpoint_type="news", ydc_api_key="test") + raw_results = you_wrapper.raw_results(query) + expected_result = NEWS_RESPONSE_RAW + assert raw_results == expected_result + + +@responses.activate +def test_results() -> None: + responses.add( + responses.GET, f"{TEST_ENDPOINT}/search", json=MOCK_RESPONSE_RAW, status=200 + ) + + query = "Test query text" + you_wrapper = YouSearchAPIWrapper(ydc_api_key="test") + results = you_wrapper.results(query) + expected_result = MOCK_PARSED_OUTPUT + assert results == expected_result + + +@responses.activate +def test_results_max_docs() -> None: + responses.add( + responses.GET, f"{TEST_ENDPOINT}/search", json=MOCK_RESPONSE_RAW, status=200 + ) + + query = "Test query text" + you_wrapper = YouSearchAPIWrapper(k=2, ydc_api_key="test") + results = you_wrapper.results(query) + expected_result = generate_parsed_output() + assert results == expected_result + + +@responses.activate +def test_results_limit_snippets() -> None: + responses.add( + responses.GET, f"{TEST_ENDPOINT}/search", json=MOCK_RESPONSE_RAW, status=200 + ) + + query = "Test query text" + you_wrapper = YouSearchAPIWrapper(n_snippets_per_hit=1, ydc_api_key="test") + results = you_wrapper.results(query) + expected_result = LIMITED_PARSED_OUTPUT + assert results == expected_result + + +@responses.activate +def test_results_news() -> None: + responses.add( + responses.GET, f"{TEST_ENDPOINT}/news", json=NEWS_RESPONSE_RAW, status=200 + ) + + query = "Test news text" + # ensure limit on number of docs returned + you_wrapper = YouSearchAPIWrapper(endpoint_type="news", ydc_api_key="test") + raw_results = you_wrapper.results(query) + expected_result = NEWS_RESPONSE_PARSED + assert raw_results == expected_result + + +# @todo test async methods