mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-25 08:03:39 +00:00
Merge branch 'langchain-ai:master' into master
This commit is contained in:
commit
5de212d907
6
.github/CONTRIBUTING.md
vendored
6
.github/CONTRIBUTING.md
vendored
@ -44,7 +44,7 @@ If you are adding an issue, please try to keep it focused on a single, modular b
|
||||
If two issues are related, or blocking, please link them rather than combining them.
|
||||
|
||||
We will try to keep these issues as up to date as possible, though
|
||||
with the rapid rate of develop in this field some may get out of date.
|
||||
with the rapid rate of development in this field some may get out of date.
|
||||
If you notice this happening, please let us know.
|
||||
|
||||
### 🙋Getting Help
|
||||
@ -87,7 +87,7 @@ This will install all requirements for running the package, examples, linting, f
|
||||
|
||||
❗Note: If during installation you receive a `WheelFileValidationError` for `debugpy`, please make sure you are running Poetry v1.5.1. This bug was present in older versions of Poetry (e.g. 1.4.1) and has been resolved in newer releases. If you are still seeing this bug on v1.5.1, you may also try disabling "modern installation" (`poetry config installer.modern-installation false`) and re-installing requirements. See [this `debugpy` issue](https://github.com/microsoft/debugpy/issues/1246) for more details.
|
||||
|
||||
Now, you should be able to run the common tasks in the following section. To double check, run `make test`, all tests should pass. If they don't you may need to pip install additional dependencies, such as `numexpr` and `openapi_schema_pydantic`.
|
||||
Now assuming `make` and `pytest` are installed, you should be able to run the common tasks in the following section. To double check, run `make test` under `libs/langchain`, all tests should pass. If they don't, you may need to pip install additional dependencies, such as `numexpr` and `openapi_schema_pydantic`.
|
||||
|
||||
## ✅ Common Tasks
|
||||
|
||||
@ -134,7 +134,7 @@ We recognize linting can be annoying - if you do not want to do it, please conta
|
||||
### Spellcheck
|
||||
|
||||
Spellchecking for this project is done via [codespell](https://github.com/codespell-project/codespell).
|
||||
Note that `codespell` finds common typos, so could have false-positive (correctly spelled but rarely used) and false-negatives (not finding misspelled) words.
|
||||
Note that `codespell` finds common typos, so it could have false-positive (correctly spelled but rarely used) and false-negatives (not finding misspelled) words.
|
||||
|
||||
To check spelling for this project:
|
||||
|
||||
|
@ -30,12 +30,7 @@
|
||||
"source": [
|
||||
"import boto3\n",
|
||||
"\n",
|
||||
"comprehend_client = boto3.client('comprehend', \n",
|
||||
" region_name='us-east-1', \n",
|
||||
" aws_access_key_id=\"ASIA6BR6ZDLNQLMEGWHM\",\n",
|
||||
" aws_secret_access_key=\"Y79nefFoOfvgrog6sojSe55xTuKqDJY53BgfrtlG\",\n",
|
||||
" aws_session_token=\"IQoJb3JpZ2luX2VjEIP//////////wEaCXVzLWVhc3QtMSJGMEQCIBvUl0Wj5Gu5GrHB+i5fHkaVc2V1381M7UNRX8EggHORAiB+dG/uKJ4loHn2oAcXIEy6+lfU7wygl4zw/vUo2VItFiqfAghMEAIaDDk2NTQyNTU2ODQ3NSIMfbh8uyoO1XONSkuEKvwBTMxeDCi//9U9LGIwZZzIiHOudQAqR2wlIGZKcw//abSeHNBE1AoDT8ibcqk7EuIt9fwnj1WYiLGmSIWd9/kSZShiKdYg0UpNWyr1/LdeutV5byFAjT21RnWTgSMr0QeSCU698PFusvO1Coph8C75pcqTVYsxi/HypJT8OfB5iCxKgfzx0qD4X6hScpIAEYZhgQXHFBAeubqMkVPYEqSob6fSm1vEI8LkU8HG1N2M2p8TzGCQWo5uBgtNkipxve++bkR+xjiNLIpAN3P1xF2/W/lYlz+4xGsi90aZqIVh/tOvAjg7Yx1Dd5Ir2C0fZc7wbtabzVFlJZ7GFcpcMOX0o6cGOp4BismuW2CJRBmFFpoparqraQaiQBY/VDbQg9KQc/Y6o0oCxkESLUdY6ino3yrheT3W832eAg0RwrmEaQqT8kKGyJFimUxrAF/otNQhySLKuSXLooguammJiQAtgK1EhmuLBUBoLcngxQ31kDqw13g7Ccwuo68fnI/QzQLj5MX+V5VLCSp9VrOzi9XSjmeF/TJQARdZeL3CSeu2pATQc80=\"\n",
|
||||
" )"
|
||||
"comprehend_client = boto3.client('comprehend', region_name='us-east-1')"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -389,7 +384,6 @@
|
||||
"comp_moderation_with_config = AmazonComprehendModerationChain(\n",
|
||||
" moderation_config=moderation_config, # specify the configuration\n",
|
||||
" client=comprehend_client, # optionally pass the Boto3 Client\n",
|
||||
" force_base_exception=True, # Force BaseModerationError\n",
|
||||
" unique_id='john.doe@email.com', # A unique ID\n",
|
||||
" moderation_callback=my_callback, # BaseModerationCallbackHandler\n",
|
||||
" verbose=True\n",
|
||||
|
@ -1,86 +1,73 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "8d10861f-a550-4443-bc63-4ce2ae13b841",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Infino - LangChain LLM Monitoring Example\n",
|
||||
"# Infino\n",
|
||||
"\n",
|
||||
"This example shows how one can track the following while calling OpenAI models via LangChain and [Infino](https://github.com/infinohq/infino):\n",
|
||||
"This example shows how one can track the following while calling OpenAI models via `LangChain` and [Infino](https://github.com/infinohq/infino):\n",
|
||||
"\n",
|
||||
"* prompt input,\n",
|
||||
"* response from chatgpt or any other LangChain model,\n",
|
||||
"* response from `ChatGPT` or any other `LangChain` model,\n",
|
||||
"* latency,\n",
|
||||
"* errors,\n",
|
||||
"* number of tokens consumed"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "3a5a0976-9953-41d8-880c-eb3f2992e936",
|
||||
"cell_type": "markdown",
|
||||
"id": "64d14c88-b71c-4524-ab1b-4250a7dbb62b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Requirement already satisfied: matplotlib in /Users/vinaykakade/.pyenv/versions/3.10.11/lib/python3.10/site-packages (3.7.1)\n",
|
||||
"Requirement already satisfied: contourpy>=1.0.1 in /Users/vinaykakade/.pyenv/versions/3.10.11/lib/python3.10/site-packages (from matplotlib) (1.0.7)\n",
|
||||
"Requirement already satisfied: cycler>=0.10 in /Users/vinaykakade/.pyenv/versions/3.10.11/lib/python3.10/site-packages (from matplotlib) (0.11.0)\n",
|
||||
"Requirement already satisfied: fonttools>=4.22.0 in /Users/vinaykakade/.pyenv/versions/3.10.11/lib/python3.10/site-packages (from matplotlib) (4.39.4)\n",
|
||||
"Requirement already satisfied: kiwisolver>=1.0.1 in /Users/vinaykakade/.pyenv/versions/3.10.11/lib/python3.10/site-packages (from matplotlib) (1.4.4)\n",
|
||||
"Requirement already satisfied: numpy>=1.20 in /Users/vinaykakade/.pyenv/versions/3.10.11/lib/python3.10/site-packages (from matplotlib) (1.24.3)\n",
|
||||
"Requirement already satisfied: packaging>=20.0 in /Users/vinaykakade/.pyenv/versions/3.10.11/lib/python3.10/site-packages (from matplotlib) (23.1)\n",
|
||||
"Requirement already satisfied: pillow>=6.2.0 in /Users/vinaykakade/.pyenv/versions/3.10.11/lib/python3.10/site-packages (from matplotlib) (9.5.0)\n",
|
||||
"Requirement already satisfied: pyparsing>=2.3.1 in /Users/vinaykakade/.pyenv/versions/3.10.11/lib/python3.10/site-packages (from matplotlib) (3.0.9)\n",
|
||||
"Requirement already satisfied: python-dateutil>=2.7 in /Users/vinaykakade/.pyenv/versions/3.10.11/lib/python3.10/site-packages (from matplotlib) (2.8.2)\n",
|
||||
"Requirement already satisfied: six>=1.5 in /Users/vinaykakade/.pyenv/versions/3.10.11/lib/python3.10/site-packages (from python-dateutil>=2.7->matplotlib) (1.16.0)\n",
|
||||
"Requirement already satisfied: infinopy in /Users/vinaykakade/.pyenv/versions/3.10.11/lib/python3.10/site-packages (0.0.1)\n",
|
||||
"Requirement already satisfied: docker in /Users/vinaykakade/.pyenv/versions/3.10.11/lib/python3.10/site-packages (from infinopy) (6.1.3)\n",
|
||||
"Requirement already satisfied: requests in /Users/vinaykakade/.pyenv/versions/3.10.11/lib/python3.10/site-packages (from infinopy) (2.31.0)\n",
|
||||
"Requirement already satisfied: packaging>=14.0 in /Users/vinaykakade/.pyenv/versions/3.10.11/lib/python3.10/site-packages (from docker->infinopy) (23.1)\n",
|
||||
"Requirement already satisfied: urllib3>=1.26.0 in /Users/vinaykakade/.pyenv/versions/3.10.11/lib/python3.10/site-packages (from docker->infinopy) (2.0.2)\n",
|
||||
"Requirement already satisfied: websocket-client>=0.32.0 in /Users/vinaykakade/.pyenv/versions/3.10.11/lib/python3.10/site-packages (from docker->infinopy) (1.5.2)\n",
|
||||
"Requirement already satisfied: charset-normalizer<4,>=2 in /Users/vinaykakade/.pyenv/versions/3.10.11/lib/python3.10/site-packages (from requests->infinopy) (3.1.0)\n",
|
||||
"Requirement already satisfied: idna<4,>=2.5 in /Users/vinaykakade/.pyenv/versions/3.10.11/lib/python3.10/site-packages (from requests->infinopy) (3.4)\n",
|
||||
"Requirement already satisfied: certifi>=2017.4.17 in /Users/vinaykakade/.pyenv/versions/3.10.11/lib/python3.10/site-packages (from requests->infinopy) (2023.5.7)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"## Initializing"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ed46c894-caa6-49b2-85d1-f275374fa308",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Install necessary dependencies.\n",
|
||||
"!pip install infinopy\n",
|
||||
"!pip install matplotlib\n",
|
||||
"\n",
|
||||
"!pip install matplotlib"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3a5a0976-9953-41d8-880c-eb3f2992e936",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Remove the (1) import sys and sys.path.append(..) and (2) uncomment `!pip install langchain` after merging the PR for Infino/LangChain integration.\n",
|
||||
"import sys\n",
|
||||
"\n",
|
||||
"sys.path.append(\"../../../../../langchain\")\n",
|
||||
"#!pip install langchain\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"import datetime as dt\n",
|
||||
"from infinopy import InfinoClient\n",
|
||||
"import json\n",
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain.callbacks import InfinoCallbackHandler\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import matplotlib.dates as md\n",
|
||||
"import os\n",
|
||||
"import time\n",
|
||||
"import sys"
|
||||
"import sys\n",
|
||||
"\n",
|
||||
"from infinopy import InfinoClient\n",
|
||||
"from langchain.callbacks import InfinoCallbackHandler"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "9f90210d-c805-4a0c-81e4-d5298942afc4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Start Infino server, initialize the Infino client\n"
|
||||
"## Start Infino server, initialize the Infino client"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -106,7 +93,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "b6b81cda-b841-43ee-8c5e-b1576555765f",
|
||||
"metadata": {},
|
||||
@ -148,7 +134,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "dce1b820-3f1a-4b94-b848-4c6032cadc18",
|
||||
"metadata": {},
|
||||
@ -214,7 +199,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "b68ec697-c922-4fd9-aad1-f49c6ac24e8a",
|
||||
"metadata": {},
|
||||
@ -326,7 +310,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "c3d61822-1781-4bc6-97a2-2abc5c2b2e75",
|
||||
"metadata": {},
|
||||
@ -364,12 +347,11 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "4b171074-c775-48e0-a4b3-f550e2c8eccb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Step 5: Stop infino server"
|
||||
"## Stop infino server"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -415,7 +397,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.4"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
420
docs/extras/integrations/chat_loaders/imessage.ipynb
Normal file
420
docs/extras/integrations/chat_loaders/imessage.ipynb
Normal file
@ -0,0 +1,420 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "01fcfa2f-33a9-48f3-835a-b1956c394d6b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# iMessage\n",
|
||||
"\n",
|
||||
"This notebook shows how to use the iMessage chat loader. This class helps convert iMessage conversations to LangChain chat messages.\n",
|
||||
"\n",
|
||||
"On MacOS, iMessage stores conversations in a sqlite database at `~/Library/Messages/chat.db` (at least for macOS Ventura 13.4). \n",
|
||||
"The `IMessageChatLoader` loads from this database file. \n",
|
||||
"\n",
|
||||
"1. Create the `IMessageChatLoader` with the file path pointed to `chat.db` database you'd like to process.\n",
|
||||
"2. Call `loader.load()` (or `loader.lazy_load()`) to perform the conversion. Optionally use `merge_chat_runs` to combine message from the same sender in sequence, and/or `map_ai_messages` to convert messages from the specified sender to the \"AIMessage\" class.\n",
|
||||
"\n",
|
||||
"## 1. Access Chat DB\n",
|
||||
"\n",
|
||||
"It's likely that your terminal is denied access to `~/Library/Messages`. To use this class, you can copy the DB to an accessible directory (e.g., Documents) and load from there. Alternatively (and not recommended), you can grant full disk access for your terminal emulator in System Settings > Securityand Privacy > Full Disk Access.\n",
|
||||
"\n",
|
||||
"We have created an example database you can use at [this linked drive file](https://drive.google.com/file/d/1NebNKqTA2NXApCmeH6mu0unJD2tANZzo/view?usp=sharing)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "036ce7e0-a38f-4cbe-89a6-a205ae7c23be",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"File chat.db downloaded.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# This uses some example data\n",
|
||||
"import requests\n",
|
||||
"\n",
|
||||
"def download_drive_file(url: str, output_path: str = 'chat.db') -> None:\n",
|
||||
" file_id = url.split('/')[-2]\n",
|
||||
" download_url = f'https://drive.google.com/uc?export=download&id={file_id}'\n",
|
||||
"\n",
|
||||
" response = requests.get(download_url)\n",
|
||||
" if response.status_code != 200:\n",
|
||||
" print('Failed to download the file.')\n",
|
||||
" return\n",
|
||||
"\n",
|
||||
" with open(output_path, 'wb') as file:\n",
|
||||
" file.write(response.content)\n",
|
||||
" print(f'File {output_path} downloaded.')\n",
|
||||
"\n",
|
||||
"url = 'https://drive.google.com/file/d/1NebNKqTA2NXApCmeH6mu0unJD2tANZzo/view?usp=sharing'\n",
|
||||
"\n",
|
||||
"# Download file to chat.db\n",
|
||||
"download_drive_file(url)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cf60f703-76f1-4602-a723-02c59535c1af",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Create the Chat Loader\n",
|
||||
"\n",
|
||||
"Provide the loader with the file path to the zip directory. You can optionally specify the user id that maps to an ai message as well an configure whether to merge message runs."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "4b8b432a-d2bc-49e1-b35f-761730a8fd6d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_loaders.imessage import IMessageChatLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "8ec6661b-0aca-48ae-9e2b-6412856c287b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = IMessageChatLoader(\n",
|
||||
" path=\"./chat.db\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8805a7c5-84b4-49f5-8989-0022f2054ace",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 3. Load messages\n",
|
||||
"\n",
|
||||
"The `load()` (or `lazy_load`) methods return a list of \"ChatSessions\" that currently just contain a list of messages per loaded conversation. All messages are mapped to \"HumanMessage\" objects to start. \n",
|
||||
"\n",
|
||||
"You can optionally choose to merge message \"runs\" (consecutive messages from the same sender) and select a sender to represent the \"AI\". The fine-tuned LLM will learn to generate these AI messages."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "fcd69b3e-020d-4a15-8a0d-61c2d34e1ee1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from typing import List\n",
|
||||
"from langchain.chat_loaders.base import ChatSession\n",
|
||||
"from langchain.chat_loaders.utils import (\n",
|
||||
" map_ai_messages,\n",
|
||||
" merge_chat_runs,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"raw_messages = loader.lazy_load()\n",
|
||||
"# Merge consecutive messages from the same sender into a single message\n",
|
||||
"merged_messages = merge_chat_runs(raw_messages)\n",
|
||||
"# Convert messages from \"Tortoise\" to AI messages. Do you have a guess who these conversations are between?\n",
|
||||
"chat_sessions: List[ChatSession] = list(map_ai_messages(merged_messages, sender=\"Tortoise\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "370b8c26-c7a8-434c-a225-45c20ff14a03",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[AIMessage(content=\"Slow and steady, that's my motto.\", additional_kwargs={'message_time': 1693182723, 'sender': 'Tortoise'}, example=False),\n",
|
||||
" HumanMessage(content='Speed is key!', additional_kwargs={'message_time': 1693182753, 'sender': 'Hare'}, example=False),\n",
|
||||
" AIMessage(content='A balanced approach is more reliable.', additional_kwargs={'message_time': 1693182783, 'sender': 'Tortoise'}, example=False)]"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Now all of the Tortoise's messages will take the AI message class\n",
|
||||
"# which maps to the 'assistant' role in OpenAI's training format\n",
|
||||
"alternating_sessions[0]['messages'][:3]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "05208f9d-3193-4a8d-86a5-13df2c8197e5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 3. Prepare for fine-tuning\n",
|
||||
"\n",
|
||||
"Now it's time to convert our chat messages to OpenAI dictionaries. We can use the `convert_messages_for_finetuning` utility to do so."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "8834861f-f37f-4c08-96c6-917269bf09b8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.adapters.openai import convert_messages_for_finetuning"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "ce7ab0f9-6e6a-4a1c-8b86-c635251d437e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Prepared 10 dialogues for training\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"training_data = convert_messages_for_finetuning(alternating_sessions)\n",
|
||||
"print(f\"Prepared {len(training_data)} dialogues for training\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b494d64c-8056-42ae-b4c1-a9cfabc002ea",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 4. Fine-tune the model\n",
|
||||
"\n",
|
||||
"It's time to fine-tune the model. Make sure you have `openai` installed\n",
|
||||
"and have set your `OPENAI_API_KEY` appropriately"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "b4b60daa-b899-4291-a09a-412ce9c218fc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# %pip install -U openai --quiet"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "2cca6c95-c0d6-4826-b4fa-1c403f217f93",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"File file-zHIgf4r8LltZG3RFpkGd4Sjf ready after 10.19 seconds.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"from io import BytesIO\n",
|
||||
"import time\n",
|
||||
"\n",
|
||||
"import openai\n",
|
||||
"\n",
|
||||
"# We will write the jsonl file in memory\n",
|
||||
"my_file = BytesIO()\n",
|
||||
"for m in training_data:\n",
|
||||
" my_file.write((json.dumps({\"messages\": m}) + \"\\n\").encode('utf-8'))\n",
|
||||
"\n",
|
||||
"my_file.seek(0)\n",
|
||||
"training_file = openai.File.create(\n",
|
||||
" file=my_file,\n",
|
||||
" purpose='fine-tune'\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# OpenAI audits each training file for compliance reasons.\n",
|
||||
"# This make take a few minutes\n",
|
||||
"status = openai.File.retrieve(training_file.id).status\n",
|
||||
"start_time = time.time()\n",
|
||||
"while status != \"processed\":\n",
|
||||
" print(f\"Status=[{status}]... {time.time() - start_time:.2f}s\", end=\"\\r\", flush=True)\n",
|
||||
" time.sleep(5)\n",
|
||||
" status = openai.File.retrieve(training_file.id).status\n",
|
||||
"print(f\"File {training_file.id} ready after {time.time() - start_time:.2f} seconds.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "60ee0476-3113-4dc8-a886-bce878c60b07",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"With the file ready, it's time to kick off a training job."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "c376ddca-5b4f-4e5a-bf4e-6beeb467eacc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"job = openai.FineTuningJob.create(\n",
|
||||
" training_file=training_file.id,\n",
|
||||
" model=\"gpt-3.5-turbo\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "09344c60-0bee-4989-b8d1-4a8821553cc3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Grab a cup of tea while your model is being prepared. This may take some time!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"id": "22eae900-04ca-456b-ba51-1dfff1f8e0e1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Status=[running]... 524.95s\r"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"status = openai.FineTuningJob.retrieve(job.id).status\n",
|
||||
"start_time = time.time()\n",
|
||||
"while status != \"succeeded\":\n",
|
||||
" print(f\"Status=[{status}]... {time.time() - start_time:.2f}s\", end=\"\\r\", flush=True)\n",
|
||||
" time.sleep(5)\n",
|
||||
" job = openai.FineTuningJob.retrieve(job.id)\n",
|
||||
" status = job.status"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"id": "39e72616-a7d9-44b8-a4eb-506611d119f4",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"ft:gpt-3.5-turbo-0613:personal::7sKoRdlz\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(job.fine_tuned_model)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0d717749-b1b6-451f-b3c5-3286b82d45b9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 5. Use in LangChain\n",
|
||||
"\n",
|
||||
"You can use the resulting model ID directly the `ChatOpenAI` model class."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"id": "1579dfca-95c6-47b7-8549-1195b9dce5b0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"\n",
|
||||
"model = ChatOpenAI(\n",
|
||||
" model=job.fine_tuned_model,\n",
|
||||
" temperature=1,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 39,
|
||||
"id": "6f53d1b1-dcbf-4976-a61a-17f74c6f1b0a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.prompts import ChatPromptTemplate\n",
|
||||
"from langchain.schema.output_parser import StrOutputParser\n",
|
||||
"\n",
|
||||
"prompt = ChatPromptTemplate.from_messages(\n",
|
||||
" [\n",
|
||||
" (\"system\", \"You are speaking to hare.\"),\n",
|
||||
" (\"human\", \"{input}\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"chain = prompt | model | StrOutputParser()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 41,
|
||||
"id": "6619c9bc-54ea-4136-bd9a-44557f7da724",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"A symbol of interconnectedness."
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for tok in chain.stream({\"input\": \"What's the golden thread?\"}):\n",
|
||||
" print(tok, end=\"\", flush=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "88e0d1a1-48a9-4d9d-9f4e-010cbbb65af8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
@ -8,9 +8,9 @@
|
||||
"# Etherscan Loader\n",
|
||||
"## Overview\n",
|
||||
"\n",
|
||||
"The Etherscan loader use etherscan api to load transacactions histories under specific account on Ethereum Mainnet.\n",
|
||||
"The Etherscan loader use etherscan api to load transaction histories under specific account on Ethereum Mainnet.\n",
|
||||
"\n",
|
||||
"You will need a Etherscan api key to proceed. The free api key has 5 calls per seconds quota.\n",
|
||||
"You will need a Etherscan api key to proceed. The free api key has 5 calls per second quota.\n",
|
||||
"\n",
|
||||
"The loader supports the following six functinalities:\n",
|
||||
"* Retrieve normal transactions under specific account on Ethereum Mainet\n",
|
||||
|
@ -38,7 +38,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": null,
|
||||
"id": "878928a6-a5ae-4f74-b351-64e3b01733fe",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@ -50,7 +50,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": null,
|
||||
"id": "2216c83f-68e4-4d2f-8ea2-5878fb18bbe7",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@ -66,7 +66,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": null,
|
||||
"id": "8f3b6aa0-b45d-4e37-8c50-5bebe70fdb9d",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@ -93,7 +93,7 @@
|
||||
"source": [
|
||||
"loader = GoogleDriveLoader(\n",
|
||||
" folder_id=\"1yucgL9WGgWZdM1TOuKkeghlPizuzMYb5\",\n",
|
||||
" file_types=[\"document\", \"sheet\"]\n",
|
||||
" file_types=[\"document\", \"sheet\"],\n",
|
||||
" recursive=False\n",
|
||||
")"
|
||||
]
|
||||
@ -110,7 +110,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": null,
|
||||
"id": "94207e39",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -121,7 +121,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": null,
|
||||
"id": "a15fbee0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -136,7 +136,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": null,
|
||||
"id": "98410bda",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -146,21 +146,10 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": null,
|
||||
"id": "e3e72221",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(page_content='\\n \\n \\n Team\\n Location\\n Stanley Cups\\n \\n \\n Blues\\n STL\\n 1\\n \\n \\n Flyers\\n PHI\\n 2\\n \\n \\n Maple Leafs\\n TOR\\n 13\\n \\n \\n', metadata={'filetype': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', 'page_number': 1, 'page_name': 'Stanley Cups', 'text_as_html': '<table border=\"1\" class=\"dataframe\">\\n <tbody>\\n <tr>\\n <td>Team</td>\\n <td>Location</td>\\n <td>Stanley Cups</td>\\n </tr>\\n <tr>\\n <td>Blues</td>\\n <td>STL</td>\\n <td>1</td>\\n </tr>\\n <tr>\\n <td>Flyers</td>\\n <td>PHI</td>\\n <td>2</td>\\n </tr>\\n <tr>\\n <td>Maple Leafs</td>\\n <td>TOR</td>\\n <td>13</td>\\n </tr>\\n </tbody>\\n</table>', 'category': 'Table', 'source': 'https://drive.google.com/file/d/1aA6L2AR3g0CR-PW03HEZZo4NaVlKpaP7/view'})"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs[0]"
|
||||
]
|
||||
@ -175,7 +164,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": null,
|
||||
"id": "0e2d093f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -190,7 +179,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": null,
|
||||
"id": "b35ddcc6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -200,21 +189,10 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": null,
|
||||
"id": "3cc141e0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(page_content='\\n \\n \\n Team\\n Location\\n Stanley Cups\\n \\n \\n Blues\\n STL\\n 1\\n \\n \\n Flyers\\n PHI\\n 2\\n \\n \\n Maple Leafs\\n TOR\\n 13\\n \\n \\n', metadata={'filetype': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', 'page_number': 1, 'page_name': 'Stanley Cups', 'text_as_html': '<table border=\"1\" class=\"dataframe\">\\n <tbody>\\n <tr>\\n <td>Team</td>\\n <td>Location</td>\\n <td>Stanley Cups</td>\\n </tr>\\n <tr>\\n <td>Blues</td>\\n <td>STL</td>\\n <td>1</td>\\n </tr>\\n <tr>\\n <td>Flyers</td>\\n <td>PHI</td>\\n <td>2</td>\\n </tr>\\n <tr>\\n <td>Maple Leafs</td>\\n <td>TOR</td>\\n <td>13</td>\\n </tr>\\n </tbody>\\n</table>', 'category': 'Table', 'source': 'https://drive.google.com/file/d/1aA6L2AR3g0CR-PW03HEZZo4NaVlKpaP7/view'})"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs[0]"
|
||||
]
|
||||
@ -226,6 +204,309 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "83ac576b-48c9-4aad-a35e-e978ea32f746",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Extended usage\n",
|
||||
"An external component can manage the complexity of Google Drive : `langchain-googledrive`\n",
|
||||
"It's compatible with the ̀`langchain.document_loaders.GoogleDriveLoader` and can be used\n",
|
||||
"in its place.\n",
|
||||
"\n",
|
||||
"To be compatible with containers, the authentication uses an environment variable ̀GOOGLE_ACCOUNT_FILE` to credential file (for user or service)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b94f7119-bc1e-4ca3-907f-9d81e837ac59",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install langchain-googledrive"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c4c7474e-49cb-48a1-b3a0-77fba8e2dd70",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"folder_id='root'\n",
|
||||
"#folder_id='1yucgL9WGgWZdM1TOuKkeghlPizuzMYb5'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8357f7f1-e2b1-41ef-8e38-48fcc3897dba",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Use the advanced version.\n",
|
||||
"from langchain_googledrive.document_loaders import GoogleDriveLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "16ab9d3d-1782-4cb9-ab56-d87edbb25a18",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = GoogleDriveLoader(\n",
|
||||
" folder_id=folder_id,\n",
|
||||
" recursive=False,\n",
|
||||
" num_results=2, # Maximum number of file to load\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ebac43aa-dd64-4964-802a-a90172415fd1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"By default, all files with these mime-type can be converted to `Document`.\n",
|
||||
"- text/text\n",
|
||||
"- text/plain\n",
|
||||
"- text/html\n",
|
||||
"- text/csv\n",
|
||||
"- text/markdown\n",
|
||||
"- image/png\n",
|
||||
"- image/jpeg\n",
|
||||
"- application/epub+zip\n",
|
||||
"- application/pdf\n",
|
||||
"- application/rtf\n",
|
||||
"- application/vnd.google-apps.document (GDoc)\n",
|
||||
"- application/vnd.google-apps.presentation (GSlide)\n",
|
||||
"- application/vnd.google-apps.spreadsheet (GSheet)\n",
|
||||
"- application/vnd.google.colaboratory (Notebook colab)\n",
|
||||
"- application/vnd.openxmlformats-officedocument.presentationml.presentation (PPTX)\n",
|
||||
"- application/vnd.openxmlformats-officedocument.wordprocessingml.document (DOCX)\n",
|
||||
"\n",
|
||||
"It's possible to update or customize this. See the documentation of `GDriveLoader`.\n",
|
||||
"\n",
|
||||
"But, the corresponding packages must be installed."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b4560f35-a37d-44e2-be0b-adaa245b3b3d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install unstructured"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6cb08da3-27df-46de-b60e-583bb7e31af4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"for doc in loader.load():\n",
|
||||
" print(\"---\")\n",
|
||||
" print(doc.page_content.strip()[:60]+\"...\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cd13d7d1-db7a-498d-ac98-76ccd9ad9019",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Customize the search pattern\n",
|
||||
"\n",
|
||||
"All parameter compatible with Google [`list()`](https://developers.google.com/drive/api/v3/reference/files/list)\n",
|
||||
"API can be set.\n",
|
||||
"\n",
|
||||
"To specify the new pattern of the Google request, you can use a `PromptTemplate()`.\n",
|
||||
"The variables for the prompt can be set with `kwargs` in the constructor.\n",
|
||||
"Some pre-formated request are proposed (use `{query}`, `{folder_id}` and/or `{mime_type}`):\n",
|
||||
"\n",
|
||||
"You can customize the criteria to select the files. A set of predefined filter are proposed:\n",
|
||||
"| template | description |\n",
|
||||
"| -------------------------------------- | --------------------------------------------------------------------- |\n",
|
||||
"| gdrive-all-in-folder | Return all compatible files from a `folder_id` |\n",
|
||||
"| gdrive-query | Search `query` in all drives |\n",
|
||||
"| gdrive-by-name | Search file with name `query` |\n",
|
||||
"| gdrive-query-in-folder | Search `query` in `folder_id` (and sub-folders if `recursive=true`) |\n",
|
||||
"| gdrive-mime-type | Search a specific `mime_type` |\n",
|
||||
"| gdrive-mime-type-in-folder | Search a specific `mime_type` in `folder_id` |\n",
|
||||
"| gdrive-query-with-mime-type | Search `query` with a specific `mime_type` |\n",
|
||||
"| gdrive-query-with-mime-type-and-folder | Search `query` with a specific `mime_type` and in `folder_id` |\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "81348d59-8fd6-45d4-9de3-5df5cff5c7e2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = GoogleDriveLoader(\n",
|
||||
" folder_id=folder_id,\n",
|
||||
" recursive=False,\n",
|
||||
" template=\"gdrive-query\", # Default template to use\n",
|
||||
" query=\"machine learning\",\n",
|
||||
" num_results=2, # Maximum number of file to load\n",
|
||||
" supportsAllDrives=False, # GDrive `list()` parameter\n",
|
||||
")\n",
|
||||
"for doc in loader.load():\n",
|
||||
" print(\"---\")\n",
|
||||
" print(doc.page_content.strip()[:60]+\"...\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "46c6ba5b-d4b1-4f0f-9801-5c1314021605",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can customize your pattern."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5a5a323b-8d96-46b7-b46a-fd69bd2c8e04",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.prompts.prompt import PromptTemplate\n",
|
||||
"loader = GoogleDriveLoader(\n",
|
||||
" folder_id=folder_id,\n",
|
||||
" recursive=False,\n",
|
||||
" template=PromptTemplate(\n",
|
||||
" input_variables=[\"query\", \"query_name\"],\n",
|
||||
" template=\"fullText contains '{query}' and name contains '{query_name}' and trashed=false\",\n",
|
||||
" ), # Default template to use\n",
|
||||
" query=\"machine learning\",\n",
|
||||
" query_name=\"ML\", \n",
|
||||
" num_results=2, # Maximum number of file to load\n",
|
||||
")\n",
|
||||
"for doc in loader.load():\n",
|
||||
" print(\"---\")\n",
|
||||
" print(doc.page_content.strip()[:60]+\"...\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "375bb465-8f69-407b-94bd-ffa3718ef500",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Modes for GSlide and GSheet\n",
|
||||
"The parameter mode accepts different values:\n",
|
||||
"\n",
|
||||
"- \"document\": return the body of each document\n",
|
||||
"- \"snippets\": return the description of each file (set in metadata of Google Drive files).\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"The conversion can manage in Markdown format:\n",
|
||||
"- bullet\n",
|
||||
"- link\n",
|
||||
"- table\n",
|
||||
"- titles\n",
|
||||
"\n",
|
||||
"The parameter `gslide_mode` accepts different values:\n",
|
||||
"\n",
|
||||
"- \"single\" : one document with <PAGE BREAK>\n",
|
||||
"- \"slide\" : one document by slide\n",
|
||||
"- \"elements\" : one document for each elements.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7493d7b0-0600-49af-8107-7f4597c92de7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = GoogleDriveLoader(\n",
|
||||
" template=\"gdrive-mime-type\",\n",
|
||||
" mime_type=\"application/vnd.google-apps.presentation\", # Only GSlide files\n",
|
||||
" gslide_mode=\"slide\",\n",
|
||||
" num_results=2, # Maximum number of file to load\n",
|
||||
")\n",
|
||||
"for doc in loader.load():\n",
|
||||
" print(\"---\")\n",
|
||||
" print(doc.page_content.strip()[:60]+\"...\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9bf338fb-02d7-452f-8679-c50419b13464",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The parameter `gsheet_mode` accepts different values:\n",
|
||||
"- `\"single\"`: Generate one document by line\n",
|
||||
"- `\"elements\"` : one document with markdown array and <PAGE BREAK> tags."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "469f5af0-67db-4f15-8aee-88cde480729b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = GoogleDriveLoader(\n",
|
||||
" template=\"gdrive-mime-type\",\n",
|
||||
" mime_type=\"application/vnd.google-apps.spreadsheet\", # Only GSheet files\n",
|
||||
" gsheet_mode=\"elements\",\n",
|
||||
" num_results=2, # Maximum number of file to load\n",
|
||||
")\n",
|
||||
"for doc in loader.load():\n",
|
||||
" print(\"---\")\n",
|
||||
" print(doc.page_content.strip()[:60]+\"...\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "09acb864-e919-4add-9e06-deba6f7f0cd8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Advanced usage\n",
|
||||
"All Google File have a 'description' in the metadata. This field can be used to memorize a summary of the document or others indexed tags (See method `lazy_update_description_with_summary()`).\n",
|
||||
"\n",
|
||||
"If you use the `mode=\"snippet\"`, only the description will be used for the body. Else, the `metadata['summary']` has the field.\n",
|
||||
"\n",
|
||||
"Sometime, a specific filter can be used to extract some information from the filename, to select some files with specific criteria. You can use a filter.\n",
|
||||
"\n",
|
||||
"Sometimes, many documents are returned. It's not necessary to have all documents in memory at the same time. You can use the lazy versions of methods, to get one document at a time. It's better to use a complex query in place of a recursive search. For each folder, a query must be applied if you activate `recursive=True`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a5e9c8eb-a266-4ae6-a760-d7826a0aa7c5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"loader = GoogleDriveLoader(\n",
|
||||
" gdrive_api_file=os.environ[\"GOOGLE_ACCOUNT_FILE\"],\n",
|
||||
" num_results=2,\n",
|
||||
" template=\"gdrive-query\",\n",
|
||||
" filter=lambda search, file: \"#test\" not in file.get('description',''),\n",
|
||||
" query='machine learning',\n",
|
||||
" supportsAllDrives=False,\n",
|
||||
" )\n",
|
||||
"for doc in loader.load():\n",
|
||||
" print(\"---\")\n",
|
||||
" print(doc.page_content.strip()[:60]+\"...\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "51efa73a-4e2d-4f9c-abaf-6c9bde2ff69d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@ -244,7 +525,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.13"
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -7,9 +7,20 @@
|
||||
"# Llama.cpp\n",
|
||||
"\n",
|
||||
"[llama-cpp-python](https://github.com/abetlen/llama-cpp-python) is a Python binding for [llama.cpp](https://github.com/ggerganov/llama.cpp). \n",
|
||||
"It supports [several LLMs](https://github.com/ggerganov/llama.cpp).\n",
|
||||
"\n",
|
||||
"This notebook goes over how to run `llama-cpp-python` within LangChain."
|
||||
"It supports inference for [many LLMs](https://github.com/ggerganov/llama.cpp), which can be accessed on [HuggingFace](https://huggingface.co/TheBloke).\n",
|
||||
"\n",
|
||||
"This notebook goes over how to run `llama-cpp-python` within LangChain.\n",
|
||||
"\n",
|
||||
"**Note: new versions of `llama-cpp-python` use GGUF model files (see [here](https://github.com/abetlen/llama-cpp-python/pull/633)).**\n",
|
||||
"\n",
|
||||
"This is a breaking change.\n",
|
||||
" \n",
|
||||
"To convert existing GGML models to GGUF you can run the following in [llama.cpp](https://github.com/ggerganov/llama.cpp):\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"python ./convert-llama-ggmlv3-to-gguf.py --eps 1e-5 --input models/openorca-platypus2-13b.ggmlv3.q4_0.bin --output models/openorca-platypus2-13b.gguf.q4_0.bin\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -19,7 +30,7 @@
|
||||
"## Installation\n",
|
||||
"\n",
|
||||
"There are different options on how to install the llama-cpp package: \n",
|
||||
"- only CPU usage\n",
|
||||
"- CPU usage\n",
|
||||
"- CPU + GPU (using one of many BLAS backends)\n",
|
||||
"- Metal GPU (MacOS with Apple Silicon Chip) \n",
|
||||
"\n",
|
||||
@ -171,7 +182,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
@ -192,7 +203,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
@ -207,15 +218,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Callbacks support token-wise streaming\n",
|
||||
"callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])\n",
|
||||
"# Verbose is required to pass to the callback manager"
|
||||
"callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -240,12 +250,12 @@
|
||||
"source": [
|
||||
"# Make sure the model path is correct for your system!\n",
|
||||
"llm = LlamaCpp(\n",
|
||||
" model_path=\"/Users/rlm/Desktop/Code/llama/llama-2-7b-ggml/llama-2-7b-chat.ggmlv3.q4_0.bin\",\n",
|
||||
" model_path=\"/Users/rlm/Desktop/Code/llama.cpp/models/openorca-platypus2-13b.gguf.q4_0.bin\",\n",
|
||||
" temperature=0.75,\n",
|
||||
" max_tokens=2000,\n",
|
||||
" top_p=1,\n",
|
||||
" callback_manager=callback_manager,\n",
|
||||
" verbose=True,\n",
|
||||
" callback_manager=callback_manager, \n",
|
||||
" verbose=True, # Verbose is required to pass to the callback manager\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@ -375,7 +385,6 @@
|
||||
],
|
||||
"source": [
|
||||
"question = \"What NFL team won the Super Bowl in the year Justin Bieber was born?\"\n",
|
||||
"\n",
|
||||
"llm_chain.run(question)"
|
||||
]
|
||||
},
|
||||
@ -397,7 +406,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -406,46 +415,32 @@
|
||||
"\n",
|
||||
"# Make sure the model path is correct for your system!\n",
|
||||
"llm = LlamaCpp(\n",
|
||||
" model_path=\"./ggml-model-q4_0.bin\",\n",
|
||||
" model_path=\"/Users/rlm/Desktop/Code/llama.cpp/models/openorca-platypus2-13b.gguf.q4_0.bin\",\n",
|
||||
" n_gpu_layers=n_gpu_layers,\n",
|
||||
" n_batch=n_batch,\n",
|
||||
" callback_manager=callback_manager,\n",
|
||||
" verbose=True,\n",
|
||||
" verbose=True, # Verbose is required to pass to the callback manager\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm_chain = LLMChain(prompt=prompt, llm=llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" We are looking for an NFL team that won the Super Bowl when Justin Bieber (born March 1, 1994) was born. \n",
|
||||
"\n",
|
||||
"First, let's look up which year is closest to when Justin Bieber was born:\n",
|
||||
"\n",
|
||||
"* The year before he was born: 1993\n",
|
||||
"* The year of his birth: 1994\n",
|
||||
"* The year after he was born: 1995\n",
|
||||
"1. Identify Justin Bieber's birth date: Justin Bieber was born on March 1, 1994.\n",
|
||||
"\n",
|
||||
"We want to know what NFL team won the Super Bowl in the year that is closest to when Justin Bieber was born. Therefore, we should look up the NFL team that won the Super Bowl in either 1993 or 1994.\n",
|
||||
"2. Find the Super Bowl winner of that year: The NFL season of 1993 with the Super Bowl being played in January or of 1994.\n",
|
||||
"\n",
|
||||
"Now let's find out which NFL team did win the Super Bowl in either of those years:\n",
|
||||
"3. Determine which team won the game: The Dallas Cowboys faced the Buffalo Bills in Super Bowl XXVII on January 31, 1993 (as the year is mis-labelled due to a error). The Dallas Cowboys won this matchup.\n",
|
||||
"\n",
|
||||
"* In 1993, the San Francisco 49ers won the Super Bowl against the Dallas Cowboys by a score of 20-16.\n",
|
||||
"* In 1994, the San Francisco 49ers won the Super Bowl again, this time against the San Diego Chargers by a score of 49-26.\n"
|
||||
"So, Justin Bieber was born when the Dallas Cowboys were the reigning NFL Super Bowl."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -453,27 +448,27 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"llama_print_timings: load time = 238.10 ms\n",
|
||||
"llama_print_timings: sample time = 84.23 ms / 256 runs ( 0.33 ms per token)\n",
|
||||
"llama_print_timings: prompt eval time = 238.04 ms / 49 tokens ( 4.86 ms per token)\n",
|
||||
"llama_print_timings: eval time = 10391.96 ms / 255 runs ( 40.75 ms per token)\n",
|
||||
"llama_print_timings: total time = 15664.80 ms\n"
|
||||
"llama_print_timings: load time = 427.63 ms\n",
|
||||
"llama_print_timings: sample time = 115.85 ms / 164 runs ( 0.71 ms per token, 1415.67 tokens per second)\n",
|
||||
"llama_print_timings: prompt eval time = 427.53 ms / 45 tokens ( 9.50 ms per token, 105.26 tokens per second)\n",
|
||||
"llama_print_timings: eval time = 4526.53 ms / 163 runs ( 27.77 ms per token, 36.01 tokens per second)\n",
|
||||
"llama_print_timings: total time = 5293.77 ms\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\" We are looking for an NFL team that won the Super Bowl when Justin Bieber (born March 1, 1994) was born. \\n\\nFirst, let's look up which year is closest to when Justin Bieber was born:\\n\\n* The year before he was born: 1993\\n* The year of his birth: 1994\\n* The year after he was born: 1995\\n\\nWe want to know what NFL team won the Super Bowl in the year that is closest to when Justin Bieber was born. Therefore, we should look up the NFL team that won the Super Bowl in either 1993 or 1994.\\n\\nNow let's find out which NFL team did win the Super Bowl in either of those years:\\n\\n* In 1993, the San Francisco 49ers won the Super Bowl against the Dallas Cowboys by a score of 20-16.\\n* In 1994, the San Francisco 49ers won the Super Bowl again, this time against the San Diego Chargers by a score of 49-26.\\n\""
|
||||
"\"\\n\\n1. Identify Justin Bieber's birth date: Justin Bieber was born on March 1, 1994.\\n\\n2. Find the Super Bowl winner of that year: The NFL season of 1993 with the Super Bowl being played in January or of 1994.\\n\\n3. Determine which team won the game: The Dallas Cowboys faced the Buffalo Bills in Super Bowl XXVII on January 31, 1993 (as the year is mis-labelled due to a error). The Dallas Cowboys won this matchup.\\n\\nSo, Justin Bieber was born when the Dallas Cowboys were the reigning NFL Super Bowl.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
|
||||
"question = \"What NFL team won the Super Bowl in the year Justin Bieber was born?\"\n",
|
||||
"\n",
|
||||
"llm_chain.run(question)"
|
||||
]
|
||||
},
|
||||
@ -503,15 +498,14 @@
|
||||
"source": [
|
||||
"n_gpu_layers = 1 # Metal set to 1 is enough.\n",
|
||||
"n_batch = 512 # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.\n",
|
||||
"\n",
|
||||
"# Make sure the model path is correct for your system!\n",
|
||||
"llm = LlamaCpp(\n",
|
||||
" model_path=\"./ggml-model-q4_0.bin\",\n",
|
||||
" model_path=\"/Users/rlm/Desktop/Code/llama.cpp/models/openorca-platypus2-13b.gguf.q4_0.bin\",\n",
|
||||
" n_gpu_layers=n_gpu_layers,\n",
|
||||
" n_batch=n_batch,\n",
|
||||
" f16_kv=True, # MUST set to True, otherwise you will run into problem after a couple of calls\n",
|
||||
" callback_manager=callback_manager,\n",
|
||||
" verbose=True,\n",
|
||||
" verbose=True, # Verbose is required to pass to the callback manager\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@ -531,6 +525,144 @@
|
||||
"\n",
|
||||
"For the first call to the LLM, the performance may be slow due to the model compilation in Metal GPU."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Grammars\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"We can specify [grammars](https://github.com/ggerganov/llama.cpp/blob/master/grammars/README.md) to constrain model outputs.\n",
|
||||
"\n",
|
||||
"This will sample tokens according to the grammar.\n",
|
||||
" \n",
|
||||
"For example, supply the path to the specifed `json.gbnf` file in order to produce JSON."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"n_gpu_layers = 1 # Metal set to 1 is enough.\n",
|
||||
"n_batch = 512 # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.\n",
|
||||
"# Make sure the model path is correct for your system!\n",
|
||||
"llm = LlamaCpp(\n",
|
||||
" model_path=\"/Users/rlm/Desktop/Code/llama.cpp/models/openorca-platypus2-13b.gguf.q4_0.bin\",\n",
|
||||
" n_gpu_layers=n_gpu_layers,\n",
|
||||
" n_batch=n_batch,\n",
|
||||
" f16_kv=True, # MUST set to True, otherwise you will run into problem after a couple of calls\n",
|
||||
" callback_manager=callback_manager,\n",
|
||||
" verbose=True, # Verbose is required to pass to the callback manager\n",
|
||||
" grammar_path=\"/Users/rlm/Desktop/Code/langchain-main/langchain/libs/langchain/langchain/llms/grammars/json.gbnf\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{\n",
|
||||
" \"name\": \"John Doe\",\n",
|
||||
" \"age\": 34,\n",
|
||||
" \"\": {\n",
|
||||
" \"title\": \"Software Developer\",\n",
|
||||
" \"company\": \"Google\"\n",
|
||||
" },\n",
|
||||
" \"interests\": [\n",
|
||||
" \"Sports\",\n",
|
||||
" \"Music\",\n",
|
||||
" \"Cooking\"\n",
|
||||
" ],\n",
|
||||
" \"address\": {\n",
|
||||
" \"street_number\": 123,\n",
|
||||
" \"street_name\": \"Oak Street\",\n",
|
||||
" \"city\": \"Mountain View\",\n",
|
||||
" \"state\": \"California\",\n",
|
||||
" \"postal_code\": 94040\n",
|
||||
" }}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"llama_print_timings: load time = 357.51 ms\n",
|
||||
"llama_print_timings: sample time = 1213.30 ms / 144 runs ( 8.43 ms per token, 118.68 tokens per second)\n",
|
||||
"llama_print_timings: prompt eval time = 356.78 ms / 9 tokens ( 39.64 ms per token, 25.23 tokens per second)\n",
|
||||
"llama_print_timings: eval time = 3947.16 ms / 143 runs ( 27.60 ms per token, 36.23 tokens per second)\n",
|
||||
"llama_print_timings: total time = 5846.21 ms\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%capture captured --no-stdout\n",
|
||||
"result=llm(\"Describe a person in JSON format:\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can also supply `list.gbnf` to return a list."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"n_gpu_layers = 1 \n",
|
||||
"n_batch = 512\n",
|
||||
"llm = LlamaCpp(\n",
|
||||
" model_path=\"/Users/rlm/Desktop/Code/llama.cpp/models/openorca-platypus2-13b.gguf.q4_0.bin\",\n",
|
||||
" n_gpu_layers=n_gpu_layers,\n",
|
||||
" n_batch=n_batch,\n",
|
||||
" f16_kv=True, # MUST set to True, otherwise you will run into problem after a couple of calls\n",
|
||||
" callback_manager=callback_manager,\n",
|
||||
" verbose=True,\n",
|
||||
" grammar_path=\"/Users/rlm/Desktop/Code/langchain-main/langchain/libs/langchain/langchain/llms/grammars/list.gbnf\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[\"The Catcher in the Rye\", \"Wuthering Heights\", \"Anna Karenina\"]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"llama_print_timings: load time = 322.34 ms\n",
|
||||
"llama_print_timings: sample time = 232.60 ms / 26 runs ( 8.95 ms per token, 111.78 tokens per second)\n",
|
||||
"llama_print_timings: prompt eval time = 321.90 ms / 11 tokens ( 29.26 ms per token, 34.17 tokens per second)\n",
|
||||
"llama_print_timings: eval time = 680.82 ms / 25 runs ( 27.23 ms per token, 36.72 tokens per second)\n",
|
||||
"llama_print_timings: total time = 1295.27 ms\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%capture captured --no-stdout\n",
|
||||
"result=llm(\"List of top-3 my favourite books:\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@ -549,7 +681,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
"version": "3.9.16"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -5,8 +5,9 @@
|
||||
"id": "f36d938c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Caching integrations\n",
|
||||
"This notebook covers how to cache results of individual LLM calls."
|
||||
"# LLM Caching integrations\n",
|
||||
"\n",
|
||||
"This notebook covers how to cache results of individual LLM calls using different caches."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -26,9 +27,12 @@
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b50f0598",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"jp-MarkdownHeadingCollapsed": true,
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"## In Memory Cache"
|
||||
"## `In Memory` Cache"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -108,9 +112,12 @@
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4bf59c12",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"jp-MarkdownHeadingCollapsed": true,
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"## SQLite Cache"
|
||||
"## `SQLite` Cache"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -203,9 +210,12 @@
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "278ad7ae",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"jp-MarkdownHeadingCollapsed": true,
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"## Redis Cache"
|
||||
"## `Redis` Cache"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -385,9 +395,12 @@
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "684eab55",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"jp-MarkdownHeadingCollapsed": true,
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"## GPTCache\n",
|
||||
"## `GPTCache`\n",
|
||||
"\n",
|
||||
"We can use [GPTCache](https://github.com/zilliztech/GPTCache) for exact match caching OR to cache results based on semantic similarity\n",
|
||||
"\n",
|
||||
@ -614,9 +627,12 @@
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "726fe754",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"jp-MarkdownHeadingCollapsed": true,
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"## Momento Cache\n",
|
||||
"## `Momento` Cache\n",
|
||||
"Use [Momento](/docs/ecosystem/integrations/momento.html) to cache prompts and responses.\n",
|
||||
"\n",
|
||||
"Requires momento to use, uncomment below to install:"
|
||||
@ -723,9 +739,14 @@
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "934943dc",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"jp-MarkdownHeadingCollapsed": true,
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"## SQLAlchemy Cache"
|
||||
"## `SQLAlchemy` Cache\n",
|
||||
"\n",
|
||||
"You can use `SQLAlchemyCache` to cache with any SQL database supported by `SQLAlchemy`."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -735,8 +756,6 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# You can use SQLAlchemyCache to cache with any SQL database supported by SQLAlchemy.\n",
|
||||
"\n",
|
||||
"# from langchain.cache import SQLAlchemyCache\n",
|
||||
"# from sqlalchemy import create_engine\n",
|
||||
"\n",
|
||||
@ -795,7 +814,10 @@
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0c69d84d",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"jp-MarkdownHeadingCollapsed": true,
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"## Optional Caching\n",
|
||||
"You can also turn off caching for specific LLMs should you choose. In the example below, even though global caching is enabled, we turn it off for a specific LLM"
|
||||
@ -874,7 +896,10 @@
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5da41b77",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"jp-MarkdownHeadingCollapsed": true,
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"## Optional Caching in Chains\n",
|
||||
"You can also turn off caching for particular nodes in chains. Note that because of certain interfaces, its often easier to construct the chain first, and then edit the LLM afterwards.\n",
|
||||
@ -1022,9 +1047,9 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "venv",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "venv"
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
@ -1036,7 +1061,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
23
docs/extras/integrations/providers/ainetwork.mdx
Normal file
23
docs/extras/integrations/providers/ainetwork.mdx
Normal file
@ -0,0 +1,23 @@
|
||||
# AINetwork
|
||||
|
||||
>[AI Network](https://www.ainetwork.ai/build-on-ain) is a layer 1 blockchain designed to accommodate
|
||||
> large-scale AI models, utilizing a decentralized GPU network powered by the
|
||||
> [$AIN token](https://www.ainetwork.ai/token), enriching AI-driven `NFTs` (`AINFTs`).
|
||||
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
You need to install `ain-py` python package.
|
||||
|
||||
```bash
|
||||
pip install ain-py
|
||||
```
|
||||
You need to set the `AIN_BLOCKCHAIN_ACCOUNT_PRIVATE_KEY` environmental variable to your AIN Blockchain Account Private Key.
|
||||
## Toolkit
|
||||
|
||||
See a [usage example](/docs/integrations/toolkits/ainetwork).
|
||||
|
||||
```python
|
||||
from langchain.agents.agent_toolkits.ainetwork.toolkit import AINetworkToolkit
|
||||
```
|
||||
|
@ -2,10 +2,10 @@
|
||||
|
||||
>[Infino](https://github.com/infinohq/infino) is an open-source observability platform that stores both metrics and application logs together.
|
||||
|
||||
Key features of infino include:
|
||||
- Metrics Tracking: Capture time taken by LLM model to handle request, errors, number of tokens, and costing indication for the particular LLM.
|
||||
- Data Tracking: Log and store prompt, request, and response data for each LangChain interaction.
|
||||
- Graph Visualization: Generate basic graphs over time, depicting metrics such as request duration, error occurrences, token count, and cost.
|
||||
Key features of `Infino` include:
|
||||
- **Metrics Tracking**: Capture time taken by LLM model to handle request, errors, number of tokens, and costing indication for the particular LLM.
|
||||
- **Data Tracking**: Log and store prompt, request, and response data for each LangChain interaction.
|
||||
- **Graph Visualization**: Generate basic graphs over time, depicting metrics such as request duration, error occurrences, token count, and cost.
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
@ -15,7 +15,7 @@ First, you'll need to install the `infinopy` Python package as follows:
|
||||
pip install infinopy
|
||||
```
|
||||
|
||||
If you already have an Infino Server running, then you're good to go; but if
|
||||
If you already have an `Infino Server` running, then you're good to go; but if
|
||||
you don't, follow the next steps to start it:
|
||||
|
||||
- Make sure you have Docker installed
|
||||
@ -28,7 +28,7 @@ you don't, follow the next steps to start it:
|
||||
|
||||
## Using Infino
|
||||
|
||||
See a [usage example of `InfinoCallbackHandler`](/docs/modules/callbacks/integrations/infino.html).
|
||||
See a [usage example of `InfinoCallbackHandler`](/docs/integrations/callbacks/infino.html).
|
||||
|
||||
```python
|
||||
from langchain.callbacks import InfinoCallbackHandler
|
||||
|
279
docs/extras/integrations/retrievers/google_drive.ipynb
Normal file
279
docs/extras/integrations/retrievers/google_drive.ipynb
Normal file
@ -0,0 +1,279 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b0ed136e-6983-4893-ae1b-b75753af05f8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Google Drive Retriever\n",
|
||||
"This notebook covers how to retrieve documents from Google Drive.\n",
|
||||
"\n",
|
||||
"## Prerequisites\n",
|
||||
"\n",
|
||||
"1. Create a Google Cloud project or use an existing project\n",
|
||||
"1. Enable the [Google Drive API](https://console.cloud.google.com/flows/enableapi?apiid=drive.googleapis.com)\n",
|
||||
"1. [Authorize credentials for desktop app](https://developers.google.com/drive/api/quickstart/python#authorize_credentials_for_a_desktop_application)\n",
|
||||
"1. `pip install --upgrade google-api-python-client google-auth-httplib2 google-auth-oauthlib`\n",
|
||||
"\n",
|
||||
"## Instructions for retrieving your Google Docs data\n",
|
||||
"By default, the `GoogleDriveRetriever` expects the `credentials.json` file to be `~/.credentials/credentials.json`, but this is configurable using the `GOOGLE_ACCOUNT_FILE` environment variable. \n",
|
||||
"The location of `token.json` use the same directory (or use the parameter `token_path`). Note that `token.json` will be created automatically the first time you use the retriever.\n",
|
||||
"\n",
|
||||
"`GoogleDriveRetriever` can retrieve a selection of files with some requests. \n",
|
||||
"\n",
|
||||
"By default, If you use a `folder_id`, all the files inside this folder can be retrieved to `Document`.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "35b94a93-97de-4af8-9cca-de9ffb7930c3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can obtain your folder and document id from the URL:\n",
|
||||
"* Folder: https://drive.google.com/drive/u/0/folders/1yucgL9WGgWZdM1TOuKkeghlPizuzMYb5 -> folder id is `\"1yucgL9WGgWZdM1TOuKkeghlPizuzMYb5\"`\n",
|
||||
"* Document: https://docs.google.com/document/d/1bfaMQ18_i56204VaQDVeAFpqEijJTgvurupdEDiaUQw/edit -> document id is `\"1bfaMQ18_i56204VaQDVeAFpqEijJTgvurupdEDiaUQw\"`\n",
|
||||
"\n",
|
||||
"The special value `root` is for your personal home."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9c9665c9-a023-4078-9d95-e43021cecb6f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install --upgrade google-api-python-client google-auth-httplib2 google-auth-oauthlib"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "878928a6-a5ae-4f74-b351-64e3b01733fe",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-09T10:45:59.438650905Z",
|
||||
"start_time": "2023-05-09T10:45:57.955900302Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.retrievers import GoogleDriveRetriever"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "755907c2-145d-4f0f-9b15-07a628a2d2d2",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-09T10:45:59.442890834Z",
|
||||
"start_time": "2023-05-09T10:45:59.440941528Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"folder_id=\"root\"\n",
|
||||
"#folder_id='1yucgL9WGgWZdM1TOuKkeghlPizuzMYb5'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2216c83f-68e4-4d2f-8ea2-5878fb18bbe7",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-09T10:45:59.795842403Z",
|
||||
"start_time": "2023-05-09T10:45:59.445262457Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = GoogleDriveRetriever(\n",
|
||||
" num_results=2,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "fa339ca0-f478-440c-ba80-0e5f41a19ce1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"By default, all files with these mime-type can be converted to `Document`.\n",
|
||||
"- text/text\n",
|
||||
"- text/plain\n",
|
||||
"- text/html\n",
|
||||
"- text/csv\n",
|
||||
"- text/markdown\n",
|
||||
"- image/png\n",
|
||||
"- image/jpeg\n",
|
||||
"- application/epub+zip\n",
|
||||
"- application/pdf\n",
|
||||
"- application/rtf\n",
|
||||
"- application/vnd.google-apps.document (GDoc)\n",
|
||||
"- application/vnd.google-apps.presentation (GSlide)\n",
|
||||
"- application/vnd.google-apps.spreadsheet (GSheet)\n",
|
||||
"- application/vnd.google.colaboratory (Notebook colab)\n",
|
||||
"- application/vnd.openxmlformats-officedocument.presentationml.presentation (PPTX)\n",
|
||||
"- application/vnd.openxmlformats-officedocument.wordprocessingml.document (DOCX)\n",
|
||||
"\n",
|
||||
"It's possible to update or customize this. See the documentation of `GDriveRetriever`.\n",
|
||||
"\n",
|
||||
"But, the corresponding packages must be installed."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9dadec48",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install unstructured"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8f3b6aa0-b45d-4e37-8c50-5bebe70fdb9d",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-09T10:46:00.990310466Z",
|
||||
"start_time": "2023-05-09T10:45:59.798774595Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever.get_relevant_documents(\"machine learning\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8ff33817-8619-4897-8742-2216b9934d2a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can customize the criteria to select the files. A set of predefined filter are proposed:\n",
|
||||
"| template | description |\n",
|
||||
"| -------------------------------------- | --------------------------------------------------------------------- |\n",
|
||||
"| gdrive-all-in-folder | Return all compatible files from a `folder_id` |\n",
|
||||
"| gdrive-query | Search `query` in all drives |\n",
|
||||
"| gdrive-by-name | Search file with name `query`) |\n",
|
||||
"| gdrive-query-in-folder | Search `query` in `folder_id` (and sub-folders in `_recursive=true`) |\n",
|
||||
"| gdrive-mime-type | Search a specific `mime_type` |\n",
|
||||
"| gdrive-mime-type-in-folder | Search a specific `mime_type` in `folder_id` |\n",
|
||||
"| gdrive-query-with-mime-type | Search `query` with a specific `mime_type` |\n",
|
||||
"| gdrive-query-with-mime-type-and-folder | Search `query` with a specific `mime_type` and in `folder_id` |"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9977c712-9659-4959-b508-f59cc7d49d44",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = GoogleDriveRetriever(\n",
|
||||
" template=\"gdrive-query\", # Search everywhere\n",
|
||||
" num_results=2, # But take only 2 documents\n",
|
||||
")\n",
|
||||
"for doc in retriever.get_relevant_documents(\"machine learning\"):\n",
|
||||
" print(\"---\")\n",
|
||||
" print(doc.page_content.strip()[:60]+\"...\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a5a0f3ef-26fb-4a5c-85f0-5aba90b682b1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Else, you can customize the prompt with a specialized `PromptTemplate`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b0bbebde-0487-4d20-9d77-8070e4f0e0d6",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import PromptTemplate\n",
|
||||
"retriever = GoogleDriveRetriever(\n",
|
||||
" template=PromptTemplate(input_variables=['query'],\n",
|
||||
" # See https://developers.google.com/drive/api/guides/search-files\n",
|
||||
" template=\"(fullText contains '{query}') \"\n",
|
||||
" \"and mimeType='application/vnd.google-apps.document' \"\n",
|
||||
" \"and modifiedTime > '2000-01-01T00:00:00' \"\n",
|
||||
" \"and trashed=false\"),\n",
|
||||
" num_results=2,\n",
|
||||
" # See https://developers.google.com/drive/api/v3/reference/files/list\n",
|
||||
" includeItemsFromAllDrives=False,\n",
|
||||
" supportsAllDrives=False,\n",
|
||||
")\n",
|
||||
"for doc in retriever.get_relevant_documents(\"machine learning\"):\n",
|
||||
" print(f\"{doc.metadata['name']}:\")\n",
|
||||
" print(\"---\")\n",
|
||||
" print(doc.page_content.strip()[:60]+\"...\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9b6fed29-1666-452e-b677-401613270388",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Use GDrive 'description' metadata\n",
|
||||
"Each Google Drive has a `description` field in metadata (see the *details of a file*).\n",
|
||||
"Use the `snippets` mode to return the description of selected files.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "342dbe12-ed83-40f4-8957-0cc8c4609542",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = GoogleDriveRetriever(\n",
|
||||
" template='gdrive-mime-type-in-folder',\n",
|
||||
" folder_id=folder_id,\n",
|
||||
" mime_type='application/vnd.google-apps.document', # Only Google Docs\n",
|
||||
" num_results=2,\n",
|
||||
" mode='snippets',\n",
|
||||
" includeItemsFromAllDrives=False,\n",
|
||||
" supportsAllDrives=False,\n",
|
||||
")\n",
|
||||
"retriever.get_relevant_documents(\"machine learning\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
@ -1,17 +1,17 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# AINetwork Toolkit\n",
|
||||
"# AINetwork\n",
|
||||
"\n",
|
||||
"The AINetwork Toolkit is a set of tools for interacting with the AINetwork Blockchain. These tools allow you to transfer AIN, read and write values, create apps, and set permissions for specific paths within the blockchain database."
|
||||
">[AI Network](https://www.ainetwork.ai/build-on-ain) is a layer 1 blockchain designed to accommodate large-scale AI models, utilizing a decentralized GPU network powered by the [$AIN token](https://www.ainetwork.ai/token), enriching AI-driven `NFTs` (`AINFTs`).\n",
|
||||
">\n",
|
||||
">The `AINetwork Toolkit` is a set of tools for interacting with the [AINetwork Blockchain](https://www.ainetwork.ai/public/whitepaper.pdf). These tools allow you to transfer `AIN`, read and write values, create apps, and set permissions for specific paths within the blockchain database."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@ -30,7 +30,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@ -51,7 +50,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@ -96,7 +94,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@ -119,7 +116,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@ -147,7 +143,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@ -157,7 +152,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@ -174,7 +168,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@ -213,7 +206,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@ -250,7 +242,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@ -290,7 +281,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@ -337,7 +327,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@ -362,7 +351,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@ -397,7 +385,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@ -438,7 +425,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@ -453,9 +440,8 @@
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
215
docs/extras/integrations/toolkits/google_drive.ipynb
Normal file
215
docs/extras/integrations/toolkits/google_drive.ipynb
Normal file
@ -0,0 +1,215 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Google Drive tool\n",
|
||||
"\n",
|
||||
"This notebook walks through connecting a LangChain to the Google Drive API.\n",
|
||||
"\n",
|
||||
"## Prerequisites\n",
|
||||
"\n",
|
||||
"1. Create a Google Cloud project or use an existing project\n",
|
||||
"1. Enable the [Google Drive API](https://console.cloud.google.com/flows/enableapi?apiid=drive.googleapis.com)\n",
|
||||
"1. [Authorize credentials for desktop app](https://developers.google.com/drive/api/quickstart/python#authorize_credentials_for_a_desktop_application)\n",
|
||||
"1. `pip install --upgrade google-api-python-client google-auth-httplib2 google-auth-oauthlib`\n",
|
||||
"\n",
|
||||
"## Instructions for retrieving your Google Docs data\n",
|
||||
"By default, the `GoogleDriveTools` and `GoogleDriveWrapper` expects the `credentials.json` file to be `~/.credentials/credentials.json`, but this is configurable using the `GOOGLE_ACCOUNT_FILE` environment variable. \n",
|
||||
"The location of `token.json` use the same directory (or use the parameter `token_path`). Note that `token.json` will be created automatically the first time you use the tool.\n",
|
||||
"\n",
|
||||
"`GoogleDriveSearchTool` can retrieve a selection of files with some requests. \n",
|
||||
"\n",
|
||||
"By default, If you use a `folder_id`, all the files inside this folder can be retrieved to `Document`, if the name match the query.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install --upgrade google-api-python-client google-auth-httplib2 google-auth-oauthlib"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can obtain your folder and document id from the URL:\n",
|
||||
"* Folder: https://drive.google.com/drive/u/0/folders/1yucgL9WGgWZdM1TOuKkeghlPizuzMYb5 -> folder id is `\"1yucgL9WGgWZdM1TOuKkeghlPizuzMYb5\"`\n",
|
||||
"* Document: https://docs.google.com/document/d/1bfaMQ18_i56204VaQDVeAFpqEijJTgvurupdEDiaUQw/edit -> document id is `\"1bfaMQ18_i56204VaQDVeAFpqEijJTgvurupdEDiaUQw\"`\n",
|
||||
"\n",
|
||||
"The special value `root` is for your personal home."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"folder_id=\"root\"\n",
|
||||
"#folder_id='1yucgL9WGgWZdM1TOuKkeghlPizuzMYb5'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"By default, all files with these mime-type can be converted to `Document`.\n",
|
||||
"- text/text\n",
|
||||
"- text/plain\n",
|
||||
"- text/html\n",
|
||||
"- text/csv\n",
|
||||
"- text/markdown\n",
|
||||
"- image/png\n",
|
||||
"- image/jpeg\n",
|
||||
"- application/epub+zip\n",
|
||||
"- application/pdf\n",
|
||||
"- application/rtf\n",
|
||||
"- application/vnd.google-apps.document (GDoc)\n",
|
||||
"- application/vnd.google-apps.presentation (GSlide)\n",
|
||||
"- application/vnd.google-apps.spreadsheet (GSheet)\n",
|
||||
"- application/vnd.google.colaboratory (Notebook colab)\n",
|
||||
"- application/vnd.openxmlformats-officedocument.presentationml.presentation (PPTX)\n",
|
||||
"- application/vnd.openxmlformats-officedocument.wordprocessingml.document (DOCX)\n",
|
||||
"\n",
|
||||
"It's possible to update or customize this. See the documentation of `GoogleDriveAPIWrapper`.\n",
|
||||
"\n",
|
||||
"But, the corresponding packages must installed."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install unstructured"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.utilities.google_drive import GoogleDriveAPIWrapper\n",
|
||||
"from langchain.tools.google_drive.tool import GoogleDriveSearchTool\n",
|
||||
"\n",
|
||||
"# By default, search only in the filename.\n",
|
||||
"tool = GoogleDriveSearchTool(\n",
|
||||
" api_wrapper=GoogleDriveAPIWrapper(\n",
|
||||
" folder_id=folder_id,\n",
|
||||
" num_results=2,\n",
|
||||
" template=\"gdrive-query-in-folder\", # Search in the body of documents\n",
|
||||
" )\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import logging\n",
|
||||
"logging.basicConfig(level=logging.INFO)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tool.run(\"machine learning\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tool.description"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import load_tools\n",
|
||||
"tools = load_tools([\"google-drive-search\"],\n",
|
||||
" folder_id=folder_id,\n",
|
||||
" template=\"gdrive-query-in-folder\",\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use within an Agent"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import OpenAI\n",
|
||||
"from langchain.agents import initialize_agent, AgentType\n",
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"agent = initialize_agent(\n",
|
||||
" tools=tools,\n",
|
||||
" llm=llm,\n",
|
||||
" agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent.run(\n",
|
||||
" \"Search in google drive, who is 'Yann LeCun' ?\"\n",
|
||||
")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
@ -143,7 +143,7 @@
|
||||
"\n",
|
||||
"Chromium is one of the browsers supported by Playwright, a library used to control browser automation. \n",
|
||||
"\n",
|
||||
"Headless mode means that the browser is running without a graphical user interface, which is commonly used for web scrapin."
|
||||
"Headless mode means that the browser is running without a graphical user interface, which is commonly used for web scraping."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -29,6 +29,15 @@ class SQLDatabaseChain(Chain):
|
||||
from langchain import OpenAI, SQLDatabase
|
||||
db = SQLDatabase(...)
|
||||
db_chain = SQLDatabaseChain.from_llm(OpenAI(), db)
|
||||
|
||||
*Security note*: Make sure that the database connection uses credentials
|
||||
that are narrowly-scoped to only include the permissions this chain needs.
|
||||
Failure to do so may result in data corruption or loss, since this chain may
|
||||
attempt commands like `DROP TABLE` or `INSERT` if appropriately prompted.
|
||||
The best way to guard against such negative outcomes is to (as appropriate)
|
||||
limit the permissions granted to the credentials used with this chain.
|
||||
This issue shows an example negative outcome if these steps are not taken:
|
||||
https://github.com/langchain-ai/langchain/issues/5923
|
||||
"""
|
||||
|
||||
llm_chain: LLMChain
|
||||
@ -49,7 +58,7 @@ class SQLDatabaseChain(Chain):
|
||||
return_direct: bool = False
|
||||
"""Whether or not to return the result of querying the SQL table directly."""
|
||||
use_query_checker: bool = False
|
||||
"""Whether or not the query checker tool should be used to attempt
|
||||
"""Whether or not the query checker tool should be used to attempt
|
||||
to fix the initial SQL from the LLM."""
|
||||
query_checker_prompt: Optional[BasePromptTemplate] = None
|
||||
"""The prompt template that should be used by the query checker"""
|
||||
@ -197,6 +206,17 @@ class SQLDatabaseChain(Chain):
|
||||
prompt: Optional[BasePromptTemplate] = None,
|
||||
**kwargs: Any,
|
||||
) -> SQLDatabaseChain:
|
||||
"""Create a SQLDatabaseChain from an LLM and a database connection.
|
||||
|
||||
*Security note*: Make sure that the database connection uses credentials
|
||||
that are narrowly-scoped to only include the permissions this chain needs.
|
||||
Failure to do so may result in data corruption or loss, since this chain may
|
||||
attempt commands like `DROP TABLE` or `INSERT` if appropriately prompted.
|
||||
The best way to guard against such negative outcomes is to (as appropriate)
|
||||
limit the permissions granted to the credentials used with this chain.
|
||||
This issue shows an example negative outcome if these steps are not taken:
|
||||
https://github.com/langchain-ai/langchain/issues/5923
|
||||
"""
|
||||
prompt = prompt or SQL_PROMPTS.get(db.dialect, PROMPT)
|
||||
llm_chain = LLMChain(llm=llm, prompt=prompt)
|
||||
return cls(llm_chain=llm_chain, database=db, **kwargs)
|
||||
|
83
libs/experimental/tests/unit_tests/conftest.py
Normal file
83
libs/experimental/tests/unit_tests/conftest.py
Normal file
@ -0,0 +1,83 @@
|
||||
"""Configuration for unit tests."""
|
||||
from importlib import util
|
||||
from typing import Dict, Sequence
|
||||
|
||||
import pytest
|
||||
from pytest import Config, Function, Parser
|
||||
|
||||
|
||||
def pytest_addoption(parser: Parser) -> None:
|
||||
"""Add custom command line options to pytest."""
|
||||
parser.addoption(
|
||||
"--only-extended",
|
||||
action="store_true",
|
||||
help="Only run extended tests. Does not allow skipping any extended tests.",
|
||||
)
|
||||
parser.addoption(
|
||||
"--only-core",
|
||||
action="store_true",
|
||||
help="Only run core tests. Never runs any extended tests.",
|
||||
)
|
||||
|
||||
|
||||
def pytest_collection_modifyitems(config: Config, items: Sequence[Function]) -> None:
|
||||
"""Add implementations for handling custom markers.
|
||||
|
||||
At the moment, this adds support for a custom `requires` marker.
|
||||
|
||||
The `requires` marker is used to denote tests that require one or more packages
|
||||
to be installed to run. If the package is not installed, the test is skipped.
|
||||
|
||||
The `requires` marker syntax is:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
@pytest.mark.requires("package1", "package2")
|
||||
def test_something():
|
||||
...
|
||||
"""
|
||||
# Mapping from the name of a package to whether it is installed or not.
|
||||
# Used to avoid repeated calls to `util.find_spec`
|
||||
required_pkgs_info: Dict[str, bool] = {}
|
||||
|
||||
only_extended = config.getoption("--only-extended") or False
|
||||
only_core = config.getoption("--only-core") or False
|
||||
|
||||
if only_extended and only_core:
|
||||
raise ValueError("Cannot specify both `--only-extended` and `--only-core`.")
|
||||
|
||||
for item in items:
|
||||
requires_marker = item.get_closest_marker("requires")
|
||||
if requires_marker is not None:
|
||||
if only_core:
|
||||
item.add_marker(pytest.mark.skip(reason="Skipping not a core test."))
|
||||
continue
|
||||
|
||||
# Iterate through the list of required packages
|
||||
required_pkgs = requires_marker.args
|
||||
for pkg in required_pkgs:
|
||||
# If we haven't yet checked whether the pkg is installed
|
||||
# let's check it and store the result.
|
||||
if pkg not in required_pkgs_info:
|
||||
required_pkgs_info[pkg] = util.find_spec(pkg) is not None
|
||||
|
||||
if not required_pkgs_info[pkg]:
|
||||
if only_extended:
|
||||
pytest.fail(
|
||||
f"Package `{pkg}` is not installed but is required for "
|
||||
f"extended tests. Please install the given package and "
|
||||
f"try again.",
|
||||
)
|
||||
|
||||
else:
|
||||
# If the package is not installed, we immediately break
|
||||
# and mark the test as skipped.
|
||||
item.add_marker(
|
||||
pytest.mark.skip(reason=f"Requires pkg: `{pkg}`")
|
||||
)
|
||||
break
|
||||
else:
|
||||
if only_extended:
|
||||
item.add_marker(
|
||||
pytest.mark.skip(reason="Skipping not an extended test.")
|
||||
)
|
@ -20,6 +20,7 @@ from langchain.callbacks.human import HumanApprovalCallbackHandler
|
||||
from langchain.callbacks.infino_callback import InfinoCallbackHandler
|
||||
from langchain.callbacks.labelstudio_callback import LabelStudioCallbackHandler
|
||||
from langchain.callbacks.manager import (
|
||||
collect_runs,
|
||||
get_openai_callback,
|
||||
tracing_enabled,
|
||||
tracing_v2_enabled,
|
||||
@ -66,6 +67,7 @@ __all__ = [
|
||||
"get_openai_callback",
|
||||
"tracing_enabled",
|
||||
"tracing_v2_enabled",
|
||||
"collect_runs",
|
||||
"wandb_tracing_enabled",
|
||||
"FlyteCallbackHandler",
|
||||
"SageMakerCallbackHandler",
|
||||
|
@ -38,6 +38,7 @@ from langchain.callbacks.base import (
|
||||
)
|
||||
from langchain.callbacks.openai_info import OpenAICallbackHandler
|
||||
from langchain.callbacks.stdout import StdOutCallbackHandler
|
||||
from langchain.callbacks.tracers import run_collector
|
||||
from langchain.callbacks.tracers.langchain import LangChainTracer
|
||||
from langchain.callbacks.tracers.langchain_v1 import LangChainTracerV1, TracerSessionV1
|
||||
from langchain.callbacks.tracers.stdout import ConsoleCallbackHandler
|
||||
@ -75,6 +76,11 @@ tracing_v2_callback_var: ContextVar[
|
||||
] = ContextVar( # noqa: E501
|
||||
"tracing_callback_v2", default=None
|
||||
)
|
||||
run_collector_var: ContextVar[
|
||||
Optional[run_collector.RunCollectorCallbackHandler]
|
||||
] = ContextVar( # noqa: E501
|
||||
"run_collector", default=None
|
||||
)
|
||||
|
||||
|
||||
def _get_debug() -> bool:
|
||||
@ -184,6 +190,24 @@ def tracing_v2_enabled(
|
||||
tracing_v2_callback_var.set(None)
|
||||
|
||||
|
||||
@contextmanager
|
||||
def collect_runs() -> Generator[run_collector.RunCollectorCallbackHandler, None, None]:
|
||||
"""Collect all run traces in context.
|
||||
|
||||
Returns:
|
||||
run_collector.RunCollectorCallbackHandler: The run collector callback handler.
|
||||
|
||||
Example:
|
||||
>>> with collect_runs() as runs_cb:
|
||||
chain.invoke("foo")
|
||||
run_id = runs_cb.traced_runs[0].id
|
||||
"""
|
||||
cb = run_collector.RunCollectorCallbackHandler()
|
||||
run_collector_var.set(cb)
|
||||
yield cb
|
||||
run_collector_var.set(None)
|
||||
|
||||
|
||||
@contextmanager
|
||||
def trace_as_chain_group(
|
||||
group_name: str,
|
||||
@ -1712,6 +1736,7 @@ def _configure(
|
||||
tracer_project = os.environ.get(
|
||||
"LANGCHAIN_PROJECT", os.environ.get("LANGCHAIN_SESSION", "default")
|
||||
)
|
||||
run_collector_ = run_collector_var.get()
|
||||
debug = _get_debug()
|
||||
if (
|
||||
verbose
|
||||
@ -1774,4 +1799,6 @@ def _configure(
|
||||
for handler in callback_manager.handlers
|
||||
):
|
||||
callback_manager.add_handler(open_ai, True)
|
||||
if run_collector_ is not None:
|
||||
callback_manager.add_handler(run_collector_, False)
|
||||
return callback_manager
|
||||
|
@ -3,10 +3,11 @@ from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from concurrent.futures import Future, ThreadPoolExecutor, wait
|
||||
from typing import Any, List, Optional, Sequence, Set, Union
|
||||
from typing import Any, Dict, List, Optional, Sequence, Set, Union
|
||||
from uuid import UUID
|
||||
|
||||
from langsmith import Client, RunEvaluator
|
||||
import langsmith
|
||||
from langsmith import schemas as langsmith_schemas
|
||||
|
||||
from langchain.callbacks.manager import tracing_v2_enabled
|
||||
from langchain.callbacks.tracers.base import BaseTracer
|
||||
@ -62,13 +63,13 @@ class EvaluatorCallbackHandler(BaseTracer):
|
||||
The LangSmith project name to be organize eval chain runs under.
|
||||
"""
|
||||
|
||||
name: str = "evaluator_callback_handler"
|
||||
name = "evaluator_callback_handler"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
evaluators: Sequence[RunEvaluator],
|
||||
evaluators: Sequence[langsmith.RunEvaluator],
|
||||
max_workers: Optional[int] = None,
|
||||
client: Optional[Client] = None,
|
||||
client: Optional[langsmith.Client] = None,
|
||||
example_id: Optional[Union[UUID, str]] = None,
|
||||
skip_unfinished: bool = True,
|
||||
project_name: Optional[str] = "evaluators",
|
||||
@ -86,10 +87,11 @@ class EvaluatorCallbackHandler(BaseTracer):
|
||||
self.futures: Set[Future] = set()
|
||||
self.skip_unfinished = skip_unfinished
|
||||
self.project_name = project_name
|
||||
self.logged_feedback: Dict[str, List[langsmith_schemas.Feedback]] = {}
|
||||
global _TRACERS
|
||||
_TRACERS.append(self)
|
||||
|
||||
def _evaluate_in_project(self, run: Run, evaluator: RunEvaluator) -> None:
|
||||
def _evaluate_in_project(self, run: Run, evaluator: langsmith.RunEvaluator) -> None:
|
||||
"""Evaluate the run in the project.
|
||||
|
||||
Parameters
|
||||
@ -102,11 +104,11 @@ class EvaluatorCallbackHandler(BaseTracer):
|
||||
"""
|
||||
try:
|
||||
if self.project_name is None:
|
||||
self.client.evaluate_run(run, evaluator)
|
||||
feedback = self.client.evaluate_run(run, evaluator)
|
||||
with tracing_v2_enabled(
|
||||
project_name=self.project_name, tags=["eval"], client=self.client
|
||||
):
|
||||
self.client.evaluate_run(run, evaluator)
|
||||
feedback = self.client.evaluate_run(run, evaluator)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Error evaluating run {run.id} with "
|
||||
@ -114,6 +116,8 @@ class EvaluatorCallbackHandler(BaseTracer):
|
||||
exc_info=True,
|
||||
)
|
||||
raise e
|
||||
example_id = str(run.reference_example_id)
|
||||
self.logged_feedback.setdefault(example_id, []).append(feedback)
|
||||
|
||||
def _persist_run(self, run: Run) -> None:
|
||||
"""Run the evaluator on the run.
|
||||
|
117
libs/langchain/langchain/chat_loaders/imessage.py
Normal file
117
libs/langchain/langchain/chat_loaders/imessage.py
Normal file
@ -0,0 +1,117 @@
|
||||
"""IMessage Chat Loader.
|
||||
|
||||
This class is used to load chat sessions from the iMessage chat.db SQLite file.
|
||||
It only works on macOS when you have iMessage enabled and have the chat.db file.
|
||||
|
||||
The chat.db file is likely located at ~/Library/Messages/chat.db. However, your
|
||||
terminal may not have permission to access this file. To resolve this, you can
|
||||
copy the file to a different location, change the permissions of the file, or
|
||||
grant full disk access for your terminal emulator in System Settings > Security
|
||||
and Privacy > Full Disk Access.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, Iterator, List, Optional, Union
|
||||
|
||||
from langchain import schema
|
||||
from langchain.chat_loaders import base as chat_loaders
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import sqlite3
|
||||
|
||||
|
||||
class IMessageChatLoader(chat_loaders.BaseChatLoader):
|
||||
def __init__(self, path: Optional[Union[str, Path]] = None):
|
||||
"""
|
||||
Initialize the IMessageChatLoader.
|
||||
|
||||
Args:
|
||||
path (str or Path, optional): Path to the chat.db SQLite file.
|
||||
Defaults to None, in which case the default path
|
||||
~/Library/Messages/chat.db will be used.
|
||||
"""
|
||||
if path is None:
|
||||
path = Path.home() / "Library" / "Messages" / "chat.db"
|
||||
self.db_path = path if isinstance(path, Path) else Path(path)
|
||||
if not self.db_path.exists():
|
||||
raise FileNotFoundError(f"File {self.db_path} not found")
|
||||
try:
|
||||
pass # type: ignore
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"The sqlite3 module is required to load iMessage chats.\n"
|
||||
"Please install it with `pip install pysqlite3`"
|
||||
) from e
|
||||
|
||||
def _load_single_chat_session(
|
||||
self, cursor: "sqlite3.Cursor", chat_id: int
|
||||
) -> chat_loaders.ChatSession:
|
||||
"""
|
||||
Load a single chat session from the iMessage chat.db.
|
||||
|
||||
Args:
|
||||
cursor: SQLite cursor object.
|
||||
chat_id (int): ID of the chat session to load.
|
||||
|
||||
Returns:
|
||||
ChatSession: Loaded chat session.
|
||||
"""
|
||||
results: List[schema.HumanMessage] = []
|
||||
|
||||
query = """
|
||||
SELECT message.date, handle.id, message.text
|
||||
FROM message
|
||||
JOIN chat_message_join ON message.ROWID = chat_message_join.message_id
|
||||
JOIN handle ON message.handle_id = handle.ROWID
|
||||
WHERE chat_message_join.chat_id = ?
|
||||
ORDER BY message.date ASC;
|
||||
"""
|
||||
cursor.execute(query, (chat_id,))
|
||||
messages = cursor.fetchall()
|
||||
|
||||
for date, sender, text in messages:
|
||||
if text: # Skip empty messages
|
||||
results.append(
|
||||
schema.HumanMessage(
|
||||
role=sender,
|
||||
content=text,
|
||||
additional_kwargs={
|
||||
"message_time": date,
|
||||
"sender": sender,
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
return chat_loaders.ChatSession(messages=results)
|
||||
|
||||
def lazy_load(self) -> Iterator[chat_loaders.ChatSession]:
|
||||
"""
|
||||
Lazy load the chat sessions from the iMessage chat.db
|
||||
and yield them in the required format.
|
||||
|
||||
Yields:
|
||||
ChatSession: Loaded chat session.
|
||||
"""
|
||||
|
||||
try:
|
||||
conn = sqlite3.connect(self.db_path)
|
||||
except sqlite3.OperationalError as e:
|
||||
raise ValueError(
|
||||
f"Could not open iMessage DB file {self.db_path}.\n"
|
||||
"Make sure your terminal emulator has disk access to this file.\n"
|
||||
" You can either copy the DB file to an accessible location"
|
||||
" or grant full disk access for your terminal emulator."
|
||||
" You can grant full disk access for your terminal emulator"
|
||||
" in System Settings > Security and Privacy > Full Disk Access."
|
||||
) from e
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Fetch the list of chat IDs
|
||||
cursor.execute("SELECT ROWID FROM chat")
|
||||
chat_ids = [row[0] for row in cursor.fetchall()]
|
||||
|
||||
for chat_id in chat_ids:
|
||||
yield self._load_single_chat_session(cursor, chat_id)
|
||||
|
||||
conn.close()
|
29
libs/langchain/langchain/llms/grammars/json.gbnf
Normal file
29
libs/langchain/langchain/llms/grammars/json.gbnf
Normal file
@ -0,0 +1,29 @@
|
||||
# Grammar for subset of JSON - doesn't support full string or number syntax
|
||||
|
||||
root ::= object
|
||||
value ::= object | array | string | number | boolean | "null"
|
||||
|
||||
object ::=
|
||||
"{" ws (
|
||||
string ":" ws value
|
||||
("," ws string ":" ws value)*
|
||||
)? "}"
|
||||
|
||||
array ::=
|
||||
"[" ws (
|
||||
value
|
||||
("," ws value)*
|
||||
)? "]"
|
||||
|
||||
string ::=
|
||||
"\"" (
|
||||
[^"\\] |
|
||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
|
||||
)* "\"" ws
|
||||
|
||||
# Only plain integers currently
|
||||
number ::= "-"? [0-9]+ ws
|
||||
boolean ::= ("true" | "false") ws
|
||||
|
||||
# Optional space: by convention, applied in this grammar after literal chars when allowed
|
||||
ws ::= ([ \t\n] ws)?
|
14
libs/langchain/langchain/llms/grammars/list.gbnf
Normal file
14
libs/langchain/langchain/llms/grammars/list.gbnf
Normal file
@ -0,0 +1,14 @@
|
||||
root ::= "[" items "]" EOF
|
||||
|
||||
items ::= item ("," ws* item)*
|
||||
|
||||
item ::= string
|
||||
|
||||
string ::=
|
||||
"\"" word (ws+ word)* "\"" ws*
|
||||
|
||||
word ::= [a-zA-Z]+
|
||||
|
||||
ws ::= " "
|
||||
|
||||
EOF ::= "\n"
|
@ -1,5 +1,8 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any, Dict, Iterator, List, Optional
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Union
|
||||
|
||||
from langchain.callbacks.manager import CallbackManagerForLLMRun
|
||||
from langchain.llms.base import LLM
|
||||
@ -8,6 +11,9 @@ from langchain.schema.output import GenerationChunk
|
||||
from langchain.utils import get_pydantic_field_names
|
||||
from langchain.utils.utils import build_extra_kwargs
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from llama_cpp import LlamaGrammar
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@ -113,12 +119,35 @@ class LlamaCpp(LLM):
|
||||
streaming: bool = True
|
||||
"""Whether to stream the results, token by token."""
|
||||
|
||||
grammar_path: Optional[Union[str, Path]] = None
|
||||
"""
|
||||
grammar_path: Path to the .gbnf file that defines formal grammars
|
||||
for constraining model outputs. For instance, the grammar can be used
|
||||
to force the model to generate valid JSON or to speak exclusively in emojis. At most
|
||||
one of grammar_path and grammar should be passed in.
|
||||
"""
|
||||
grammar: Optional[Union[str, LlamaGrammar]] = None
|
||||
"""
|
||||
grammar: formal grammar for constraining model outputs. For instance, the grammar
|
||||
can be used to force the model to generate valid JSON or to speak exclusively in
|
||||
emojis. At most one of grammar_path and grammar should be passed in.
|
||||
"""
|
||||
|
||||
verbose: bool = True
|
||||
"""Print verbose output to stderr."""
|
||||
|
||||
@root_validator()
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that llama-cpp-python library is installed."""
|
||||
try:
|
||||
from llama_cpp import Llama, LlamaGrammar
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Could not import llama-cpp-python library. "
|
||||
"Please install the llama-cpp-python library to "
|
||||
"use this embedding model: pip install llama-cpp-python"
|
||||
)
|
||||
|
||||
model_path = values["model_path"]
|
||||
model_param_names = [
|
||||
"rope_freq_scale",
|
||||
@ -146,21 +175,26 @@ class LlamaCpp(LLM):
|
||||
model_params.update(values["model_kwargs"])
|
||||
|
||||
try:
|
||||
from llama_cpp import Llama
|
||||
|
||||
values["client"] = Llama(model_path, **model_params)
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Could not import llama-cpp-python library. "
|
||||
"Please install the llama-cpp-python library to "
|
||||
"use this embedding model: pip install llama-cpp-python"
|
||||
)
|
||||
except Exception as e:
|
||||
raise ValueError(
|
||||
f"Could not load Llama model from path: {model_path}. "
|
||||
f"Received error {e}"
|
||||
)
|
||||
|
||||
if values["grammar"] and values["grammar_path"]:
|
||||
grammar = values["grammar"]
|
||||
grammar_path = values["grammar_path"]
|
||||
raise ValueError(
|
||||
"Can only pass in one of grammar and grammar_path. Received "
|
||||
f"{grammar=} and {grammar_path=}."
|
||||
)
|
||||
elif isinstance(values["grammar"], str):
|
||||
values["grammar"] = LlamaGrammar.from_string(values["grammar"])
|
||||
elif values["grammar_path"]:
|
||||
values["grammar"] = LlamaGrammar.from_file(values["grammar_path"])
|
||||
else:
|
||||
pass
|
||||
return values
|
||||
|
||||
@root_validator(pre=True)
|
||||
@ -176,7 +210,7 @@ class LlamaCpp(LLM):
|
||||
@property
|
||||
def _default_params(self) -> Dict[str, Any]:
|
||||
"""Get the default parameters for calling llama_cpp."""
|
||||
return {
|
||||
params = {
|
||||
"suffix": self.suffix,
|
||||
"max_tokens": self.max_tokens,
|
||||
"temperature": self.temperature,
|
||||
@ -187,6 +221,9 @@ class LlamaCpp(LLM):
|
||||
"repeat_penalty": self.repeat_penalty,
|
||||
"top_k": self.top_k,
|
||||
}
|
||||
if self.grammar:
|
||||
params["grammar"] = self.grammar
|
||||
return params
|
||||
|
||||
@property
|
||||
def _identifying_params(self) -> Dict[str, Any]:
|
||||
@ -252,7 +289,10 @@ class LlamaCpp(LLM):
|
||||
# and return the combined strings from the first choices's text:
|
||||
combined_text_output = ""
|
||||
for chunk in self._stream(
|
||||
prompt=prompt, stop=stop, run_manager=run_manager, **kwargs
|
||||
prompt=prompt,
|
||||
stop=stop,
|
||||
run_manager=run_manager,
|
||||
**kwargs,
|
||||
):
|
||||
combined_text_output += chunk.text
|
||||
return combined_text_output
|
||||
|
@ -2,7 +2,7 @@
|
||||
import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Union
|
||||
from typing import Callable, Dict, Union
|
||||
|
||||
import yaml
|
||||
|
||||
@ -26,10 +26,7 @@ def load_prompt_from_config(config: dict) -> BasePromptTemplate:
|
||||
raise ValueError(f"Loading {config_type} prompt not supported")
|
||||
|
||||
prompt_loader = type_to_loader_dict[config_type]
|
||||
# Unclear why type error is being thrown here.
|
||||
# Incompatible return value type (got "Runnable[Dict[Any, Any], PromptValue]",
|
||||
# expected "BasePromptTemplate") [return-value]
|
||||
return prompt_loader(config) # type: ignore[return-value]
|
||||
return prompt_loader(config)
|
||||
|
||||
|
||||
def _load_template(var_name: str, config: dict) -> dict:
|
||||
@ -148,8 +145,7 @@ def _load_prompt_from_file(file: Union[str, Path]) -> BasePromptTemplate:
|
||||
return load_prompt_from_config(config)
|
||||
|
||||
|
||||
type_to_loader_dict = {
|
||||
type_to_loader_dict: Dict[str, Callable[[dict], BasePromptTemplate]] = {
|
||||
"prompt": _load_prompt,
|
||||
"few_shot": _load_few_shot_prompt,
|
||||
# "few_shot_with_templates": _load_few_shot_with_templates_prompt,
|
||||
}
|
||||
|
@ -11,6 +11,7 @@ import uuid
|
||||
import warnings
|
||||
from enum import Enum
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Callable,
|
||||
Coroutine,
|
||||
@ -44,6 +45,9 @@ from langchain.schema.runnable import Runnable, RunnableConfig, RunnableLambda
|
||||
from langchain.smith.evaluation.config import EvalConfig, RunEvalConfig
|
||||
from langchain.smith.evaluation.string_run_evaluator import StringRunEvaluatorChain
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import pandas as pd
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
MODEL_OR_CHAIN_FACTORY = Union[
|
||||
@ -63,6 +67,31 @@ class InputFormatError(Exception):
|
||||
## Shared Utilities
|
||||
|
||||
|
||||
class TestResult(dict):
|
||||
"""A dictionary of the results of a single test run."""
|
||||
|
||||
def to_dataframe(self) -> pd.DataFrame:
|
||||
"""Convert the results to a dataframe."""
|
||||
try:
|
||||
import pandas as pd
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Pandas is required to convert the results to a dataframe."
|
||||
" to install pandas, run `pip install pandas`."
|
||||
) from e
|
||||
|
||||
indices = []
|
||||
records = []
|
||||
for example_id, result in self["results"].items():
|
||||
feedback = result["feedback"]
|
||||
records.append(
|
||||
{**{f.key: f.score for f in feedback}, "output": result["output"]}
|
||||
)
|
||||
indices.append(example_id)
|
||||
|
||||
return pd.DataFrame(records, index=indices)
|
||||
|
||||
|
||||
def _get_eval_project_url(api_url: str, project_id: str) -> str:
|
||||
"""Get the project url from the api url."""
|
||||
parsed = urlparse(api_url)
|
||||
@ -667,7 +696,7 @@ async def _arun_llm_or_chain(
|
||||
tags: Optional[List[str]] = None,
|
||||
callbacks: Optional[List[BaseCallbackHandler]] = None,
|
||||
input_mapper: Optional[Callable[[Dict], Any]] = None,
|
||||
) -> Union[List[dict], List[str], List[LLMResult], List[ChatResult]]:
|
||||
) -> Union[dict, str, LLMResult, ChatResult]:
|
||||
"""Asynchronously run the Chain or language model.
|
||||
|
||||
Args:
|
||||
@ -689,10 +718,10 @@ async def _arun_llm_or_chain(
|
||||
tracer.example_id = example.id
|
||||
else:
|
||||
previous_example_ids = None
|
||||
outputs = []
|
||||
chain_or_llm = (
|
||||
"LLM" if isinstance(llm_or_chain_factory, BaseLanguageModel) else "Chain"
|
||||
)
|
||||
result = None
|
||||
try:
|
||||
if isinstance(llm_or_chain_factory, BaseLanguageModel):
|
||||
output: Any = await _arun_llm(
|
||||
@ -711,15 +740,15 @@ async def _arun_llm_or_chain(
|
||||
callbacks=callbacks,
|
||||
input_mapper=input_mapper,
|
||||
)
|
||||
outputs.append(output)
|
||||
result = output
|
||||
except Exception as e:
|
||||
logger.warning(f"{chain_or_llm} failed for example {example.id}. Error: {e}")
|
||||
outputs.append({"Error": str(e)})
|
||||
result = {"Error": str(e)}
|
||||
if callbacks and previous_example_ids:
|
||||
for example_id, tracer in zip(previous_example_ids, callbacks):
|
||||
if hasattr(tracer, "example_id"):
|
||||
tracer.example_id = example_id
|
||||
return outputs
|
||||
return result
|
||||
|
||||
|
||||
async def _gather_with_concurrency(
|
||||
@ -856,7 +885,7 @@ async def _arun_on_examples(
|
||||
wrapped_model, examples, evaluation, data_type
|
||||
)
|
||||
examples = _validate_example_inputs(examples, wrapped_model, input_mapper)
|
||||
results: Dict[str, List[Any]] = {}
|
||||
results: Dict[str, dict] = {}
|
||||
|
||||
async def process_example(
|
||||
example: Example, callbacks: List[BaseCallbackHandler], job_state: dict
|
||||
@ -869,7 +898,7 @@ async def _arun_on_examples(
|
||||
callbacks=callbacks,
|
||||
input_mapper=input_mapper,
|
||||
)
|
||||
results[str(example.id)] = result
|
||||
results[str(example.id)] = {"output": result}
|
||||
job_state["num_processed"] += 1
|
||||
if verbose:
|
||||
print(
|
||||
@ -890,8 +919,14 @@ async def _arun_on_examples(
|
||||
),
|
||||
*(functools.partial(process_example, e) for e in examples),
|
||||
)
|
||||
all_feedback = {}
|
||||
for handler in evaluation_handlers:
|
||||
handler.wait_for_futures()
|
||||
all_feedback.update(handler.logged_feedback)
|
||||
# join the results and feedback on the example id
|
||||
for example_id, output_dict in results.items():
|
||||
feedback = all_feedback.get(example_id, [])
|
||||
output_dict["feedback"] = feedback
|
||||
return results
|
||||
|
||||
|
||||
@ -978,7 +1013,7 @@ def _run_llm_or_chain(
|
||||
tags: Optional[List[str]] = None,
|
||||
callbacks: Optional[List[BaseCallbackHandler]] = None,
|
||||
input_mapper: Optional[Callable[[Dict], Any]] = None,
|
||||
) -> Union[List[dict], List[str], List[LLMResult], List[ChatResult]]:
|
||||
) -> Union[dict, str, LLMResult, ChatResult]:
|
||||
"""
|
||||
Run the Chain or language model synchronously.
|
||||
|
||||
@ -1001,10 +1036,10 @@ def _run_llm_or_chain(
|
||||
tracer.example_id = example.id
|
||||
else:
|
||||
previous_example_ids = None
|
||||
outputs = []
|
||||
chain_or_llm = (
|
||||
"LLM" if isinstance(llm_or_chain_factory, BaseLanguageModel) else "Chain"
|
||||
)
|
||||
result = None
|
||||
try:
|
||||
if isinstance(llm_or_chain_factory, BaseLanguageModel):
|
||||
output: Any = _run_llm(
|
||||
@ -1023,18 +1058,18 @@ def _run_llm_or_chain(
|
||||
tags=tags,
|
||||
input_mapper=input_mapper,
|
||||
)
|
||||
outputs.append(output)
|
||||
result = output
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"{chain_or_llm} failed for example {example.id} with inputs:"
|
||||
f" {example.inputs}.\nError: {e}",
|
||||
)
|
||||
outputs.append({"Error": str(e)})
|
||||
result = {"Error": str(e)}
|
||||
if callbacks and previous_example_ids:
|
||||
for example_id, tracer in zip(previous_example_ids, callbacks):
|
||||
if hasattr(tracer, "example_id"):
|
||||
tracer.example_id = example_id
|
||||
return outputs
|
||||
return result
|
||||
|
||||
|
||||
def _run_on_examples(
|
||||
@ -1075,7 +1110,7 @@ def _run_on_examples(
|
||||
Returns:
|
||||
A dictionary mapping example ids to the model outputs.
|
||||
"""
|
||||
results: Dict[str, Any] = {}
|
||||
results: Dict[str, dict] = {}
|
||||
wrapped_model = _wrap_in_chain_factory(llm_or_chain_factory)
|
||||
project_name = _get_project_name(project_name, wrapped_model)
|
||||
tracer = LangChainTracer(
|
||||
@ -1085,11 +1120,11 @@ def _run_on_examples(
|
||||
wrapped_model, examples, evaluation, data_type
|
||||
)
|
||||
examples = _validate_example_inputs(examples, wrapped_model, input_mapper)
|
||||
evalution_handler = EvaluatorCallbackHandler(
|
||||
evaluation_handler = EvaluatorCallbackHandler(
|
||||
evaluators=run_evaluators or [],
|
||||
client=client,
|
||||
)
|
||||
callbacks: List[BaseCallbackHandler] = [tracer, evalution_handler]
|
||||
callbacks: List[BaseCallbackHandler] = [tracer, evaluation_handler]
|
||||
for i, example in enumerate(examples):
|
||||
result = _run_llm_or_chain(
|
||||
example,
|
||||
@ -1100,9 +1135,14 @@ def _run_on_examples(
|
||||
)
|
||||
if verbose:
|
||||
print(f"{i+1} processed", flush=True, end="\r")
|
||||
results[str(example.id)] = result
|
||||
results[str(example.id)] = {"output": result}
|
||||
tracer.wait_for_futures()
|
||||
evalution_handler.wait_for_futures()
|
||||
evaluation_handler.wait_for_futures()
|
||||
all_feedback = evaluation_handler.logged_feedback
|
||||
# join the results and feedback on the example id
|
||||
for example_id, output_dict in results.items():
|
||||
feedback = all_feedback.get(example_id, [])
|
||||
output_dict["feedback"] = feedback
|
||||
return results
|
||||
|
||||
|
||||
@ -1276,10 +1316,10 @@ async def arun_on_dataset(
|
||||
input_mapper=input_mapper,
|
||||
data_type=dataset.data_type,
|
||||
)
|
||||
return {
|
||||
"project_name": project_name,
|
||||
"results": results,
|
||||
}
|
||||
return TestResult(
|
||||
project_name=project_name,
|
||||
results=results,
|
||||
)
|
||||
|
||||
|
||||
def _handle_coroutine(coro: Coroutine) -> Any:
|
||||
@ -1461,7 +1501,7 @@ def run_on_dataset(
|
||||
data_type=dataset.data_type,
|
||||
)
|
||||
results = _handle_coroutine(coro)
|
||||
return {
|
||||
"project_name": project_name,
|
||||
"results": results,
|
||||
}
|
||||
return TestResult(
|
||||
project_name=project_name,
|
||||
results=results,
|
||||
)
|
||||
|
@ -19,7 +19,9 @@ def format_tool_to_openai_function(tool: BaseTool) -> FunctionDescription:
|
||||
if isinstance(tool, StructuredTool):
|
||||
schema_ = tool.args_schema.schema()
|
||||
# Bug with required missing for structured tools.
|
||||
required = sorted(schema_["properties"]) # BUG WORKAROUND
|
||||
required = schema_.get(
|
||||
"required", sorted(schema_["properties"]) # Backup is a BUG WORKAROUND
|
||||
)
|
||||
return {
|
||||
"name": tool.name,
|
||||
"description": tool.description,
|
||||
|
@ -1298,7 +1298,7 @@ class Qdrant(VectorStore):
|
||||
embeddings = OpenAIEmbeddings()
|
||||
qdrant = Qdrant.from_texts(texts, embeddings, "localhost")
|
||||
"""
|
||||
qdrant = cls._construct_instance(
|
||||
qdrant = cls.construct_instance(
|
||||
texts,
|
||||
embedding,
|
||||
location,
|
||||
@ -1474,7 +1474,7 @@ class Qdrant(VectorStore):
|
||||
embeddings = OpenAIEmbeddings()
|
||||
qdrant = await Qdrant.afrom_texts(texts, embeddings, "localhost")
|
||||
"""
|
||||
qdrant = await cls._aconstruct_instance(
|
||||
qdrant = await cls.aconstruct_instance(
|
||||
texts,
|
||||
embedding,
|
||||
location,
|
||||
@ -1510,7 +1510,7 @@ class Qdrant(VectorStore):
|
||||
return qdrant
|
||||
|
||||
@classmethod
|
||||
def _construct_instance(
|
||||
def construct_instance(
|
||||
cls: Type[Qdrant],
|
||||
texts: List[str],
|
||||
embedding: Embeddings,
|
||||
@ -1676,7 +1676,7 @@ class Qdrant(VectorStore):
|
||||
return qdrant
|
||||
|
||||
@classmethod
|
||||
async def _aconstruct_instance(
|
||||
async def aconstruct_instance(
|
||||
cls: Type[Qdrant],
|
||||
texts: List[str],
|
||||
embedding: Embeddings,
|
||||
|
@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "langchain"
|
||||
version = "0.0.274"
|
||||
version = "0.0.275"
|
||||
description = "Building applications with LLMs through composability"
|
||||
authors = []
|
||||
license = "MIT"
|
||||
|
@ -0,0 +1,16 @@
|
||||
"""Test the run collector."""
|
||||
|
||||
import uuid
|
||||
|
||||
from langchain.callbacks import collect_runs
|
||||
from tests.unit_tests.llms.fake_llm import FakeLLM
|
||||
|
||||
|
||||
def test_collect_runs() -> None:
|
||||
llm = FakeLLM(queries={"hi": "hello"}, sequential_responses=True)
|
||||
with collect_runs() as cb:
|
||||
llm.predict("hi")
|
||||
assert cb.traced_runs
|
||||
assert len(cb.traced_runs) == 1
|
||||
assert isinstance(cb.traced_runs[0].id, uuid.UUID)
|
||||
assert cb.traced_runs[0].inputs == {"prompts": ["hi"]}
|
@ -182,14 +182,12 @@ def test_run_llm_or_chain_with_input_mapper() -> None:
|
||||
return {"the right input": inputs["the wrong input"]}
|
||||
|
||||
result = _run_llm_or_chain(example, lambda: mock_chain, input_mapper=input_mapper)
|
||||
assert len(result) == 1
|
||||
assert result[0] == {"output": "2", "the right input": "1"}
|
||||
assert result == {"output": "2", "the right input": "1"}
|
||||
bad_result = _run_llm_or_chain(
|
||||
example,
|
||||
lambda: mock_chain,
|
||||
)
|
||||
assert len(bad_result) == 1
|
||||
assert "Error" in bad_result[0]
|
||||
assert "Error" in bad_result
|
||||
|
||||
# Try with LLM
|
||||
def llm_input_mapper(inputs: dict) -> str:
|
||||
@ -197,9 +195,7 @@ def test_run_llm_or_chain_with_input_mapper() -> None:
|
||||
return "the right input"
|
||||
|
||||
mock_llm = FakeLLM(queries={"the right input": "somenumber"})
|
||||
result = _run_llm_or_chain(example, mock_llm, input_mapper=llm_input_mapper)
|
||||
assert len(result) == 1
|
||||
llm_result = result[0]
|
||||
llm_result = _run_llm_or_chain(example, mock_llm, input_mapper=llm_input_mapper)
|
||||
assert isinstance(llm_result, str)
|
||||
assert llm_result == "somenumber"
|
||||
|
||||
@ -300,8 +296,8 @@ async def test_arun_on_dataset(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
tags: Optional[List[str]] = None,
|
||||
callbacks: Optional[Any] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[Dict[str, Any]]:
|
||||
return [{"result": f"Result for example {example.id}"}]
|
||||
) -> Dict[str, Any]:
|
||||
return {"result": f"Result for example {example.id}"}
|
||||
|
||||
def mock_create_project(*args: Any, **kwargs: Any) -> Any:
|
||||
proj = mock.MagicMock()
|
||||
@ -328,9 +324,10 @@ async def test_arun_on_dataset(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
)
|
||||
|
||||
expected = {
|
||||
uuid_: [
|
||||
{"result": f"Result for example {uuid.UUID(uuid_)}"} for _ in range(1)
|
||||
]
|
||||
uuid_: {
|
||||
"output": {"result": f"Result for example {uuid.UUID(uuid_)}"},
|
||||
"feedback": [],
|
||||
}
|
||||
for uuid_ in uuids
|
||||
}
|
||||
assert results["results"] == expected
|
||||
|
Loading…
Reference in New Issue
Block a user