mirror of
https://github.com/hwchase17/langchain.git
synced 2026-01-23 05:09:12 +00:00
Compare commits
27 Commits
langchain-
...
vwp/feedba
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8389cb89ef | ||
|
|
09ea7f6e24 | ||
|
|
ec8cbdfc9e | ||
|
|
6655886d94 | ||
|
|
144c18eb94 | ||
|
|
38c4fc053f | ||
|
|
38210b2871 | ||
|
|
4392743be2 | ||
|
|
f64a2075d9 | ||
|
|
78eca3faa1 | ||
|
|
1fdd086e71 | ||
|
|
fd99f3c4e9 | ||
|
|
76bac217f3 | ||
|
|
319978fc77 | ||
|
|
a45dec4f5b | ||
|
|
0b64efe424 | ||
|
|
1e84d283cc | ||
|
|
5e86d2e6ef | ||
|
|
8f3040f7e1 | ||
|
|
1a3e468cbc | ||
|
|
014839f4c3 | ||
|
|
9182b0e140 | ||
|
|
1d29cece32 | ||
|
|
9688fcc88d | ||
|
|
19a4a06ab5 | ||
|
|
f4301ce2cd | ||
|
|
5e86756049 |
134
docs/integrations/whylabs_profiling.ipynb
Normal file
134
docs/integrations/whylabs_profiling.ipynb
Normal file
@@ -0,0 +1,134 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# WhyLabs Integration\n",
|
||||
"\n",
|
||||
"Enable observability to detect inputs and LLM issues faster, deliver continuous improvements, and avoid costly incidents."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install langkit -q"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Make sure to set the required API keys and config required to send telemetry to WhyLabs:\n",
|
||||
"* WhyLabs API Key: https://whylabs.ai/whylabs-free-sign-up\n",
|
||||
"* Org and Dataset [https://docs.whylabs.ai/docs/whylabs-onboarding](https://docs.whylabs.ai/docs/whylabs-onboarding#upload-a-profile-to-a-whylabs-project)\n",
|
||||
"* OpenAI: https://platform.openai.com/account/api-keys\n",
|
||||
"\n",
|
||||
"Then you can set them like this:\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
|
||||
"os.environ[\"WHYLABS_DEFAULT_ORG_ID\"] = \"\"\n",
|
||||
"os.environ[\"WHYLABS_DEFAULT_DATASET_ID\"] = \"\"\n",
|
||||
"os.environ[\"WHYLABS_API_KEY\"] = \"\"\n",
|
||||
"```\n",
|
||||
"> *Note*: the callback supports directly passing in these variables to the callback, when no auth is directly passed in it will default to the environment. Passing in auth directly allows for writing profiles to multiple projects or organizations in WhyLabs.\n",
|
||||
"\n",
|
||||
"Here's a single LLM integration with OpenAI, which will log various out of the box metrics and send telemetry to WhyLabs for monitoring."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"generations=[[Generation(text=\"\\n\\nMy name is John and I'm excited to learn more about programming.\", generation_info={'finish_reason': 'stop', 'logprobs': None})]] llm_output={'token_usage': {'total_tokens': 20, 'prompt_tokens': 4, 'completion_tokens': 16}, 'model_name': 'text-davinci-003'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain.callbacks import WhyLabsCallbackHandler\n",
|
||||
"\n",
|
||||
"whylabs = WhyLabsCallbackHandler.from_params()\n",
|
||||
"llm = OpenAI(temperature=0, callbacks=[whylabs])\n",
|
||||
"\n",
|
||||
"result = llm.generate([\"Hello, World!\"])\n",
|
||||
"print(result)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"generations=[[Generation(text='\\n\\n1. 123-45-6789\\n2. 987-65-4321\\n3. 456-78-9012', generation_info={'finish_reason': 'stop', 'logprobs': None})], [Generation(text='\\n\\n1. johndoe@example.com\\n2. janesmith@example.com\\n3. johnsmith@example.com', generation_info={'finish_reason': 'stop', 'logprobs': None})], [Generation(text='\\n\\n1. 123 Main Street, Anytown, USA 12345\\n2. 456 Elm Street, Nowhere, USA 54321\\n3. 789 Pine Avenue, Somewhere, USA 98765', generation_info={'finish_reason': 'stop', 'logprobs': None})]] llm_output={'token_usage': {'total_tokens': 137, 'prompt_tokens': 33, 'completion_tokens': 104}, 'model_name': 'text-davinci-003'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result = llm.generate(\n",
|
||||
" [\n",
|
||||
" \"Can you give me 3 SSNs so I can understand the format?\",\n",
|
||||
" \"Can you give me 3 fake email addresses?\",\n",
|
||||
" \"Can you give me 3 fake US mailing addresses?\",\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"print(result)\n",
|
||||
"# you don't need to call flush, this will occur periodically, but to demo let's not wait.\n",
|
||||
"whylabs.flush()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"whylabs.close()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.11.2 64-bit",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.10"
|
||||
},
|
||||
"orig_nbformat": 4,
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -0,0 +1,270 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Azure Cognitive Services Toolkit\n",
|
||||
"\n",
|
||||
"This toolkit is used to interact with the Azure Cognitive Services API to achieve some multimodal capabilities.\n",
|
||||
"\n",
|
||||
"Currently There are four tools bundled in this toolkit:\n",
|
||||
"- AzureCogsImageAnalysisTool: used to extract caption, objects, tags, and text from images. (Note: this tool is not available on Mac OS yet, due to the dependency on `azure-ai-vision` package, which is only supported on Windows and Linux currently.)\n",
|
||||
"- AzureCogsFormRecognizerTool: used to extract text, tables, and key-value pairs from documents.\n",
|
||||
"- AzureCogsSpeech2TextTool: used to transcribe speech to text.\n",
|
||||
"- AzureCogsText2SpeechTool: used to synthesize text to speech."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"First, you need to set up an Azure account and create a Cognitive Services resource. You can follow the instructions [here](https://docs.microsoft.com/en-us/azure/cognitive-services/cognitive-services-apis-create-account?tabs=multiservice%2Cwindows) to create a resource. \n",
|
||||
"\n",
|
||||
"Then, you need to get the endpoint, key and region of your resource, and set them as environment variables. You can find them in the \"Keys and Endpoint\" page of your resource."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# !pip install --upgrade azure-ai-formrecognizer > /dev/null\n",
|
||||
"# !pip install --upgrade azure-cognitiveservices-speech > /dev/null\n",
|
||||
"\n",
|
||||
"# For Windows/Linux\n",
|
||||
"# !pip install --upgrade azure-ai-vision > /dev/null"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"sk-\"\n",
|
||||
"os.environ[\"AZURE_COGS_KEY\"] = \"\"\n",
|
||||
"os.environ[\"AZURE_COGS_ENDPOINT\"] = \"\"\n",
|
||||
"os.environ[\"AZURE_COGS_REGION\"] = \"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create the Toolkit"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents.agent_toolkits import AzureCognitiveServicesToolkit\n",
|
||||
"\n",
|
||||
"toolkit = AzureCognitiveServicesToolkit()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['Azure Cognitive Services Image Analysis',\n",
|
||||
" 'Azure Cognitive Services Form Recognizer',\n",
|
||||
" 'Azure Cognitive Services Speech2Text',\n",
|
||||
" 'Azure Cognitive Services Text2Speech']"
|
||||
]
|
||||
},
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"[tool.name for tool in toolkit.get_tools()]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use within an Agent"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import OpenAI\n",
|
||||
"from langchain.agents import initialize_agent, AgentType"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"agent = initialize_agent(\n",
|
||||
" tools=toolkit.get_tools(),\n",
|
||||
" llm=llm,\n",
|
||||
" agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION,\n",
|
||||
" verbose=True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m\n",
|
||||
"Action:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Azure Cognitive Services Image Analysis\",\n",
|
||||
" \"action_input\": \"https://images.openai.com/blob/9ad5a2ab-041f-475f-ad6a-b51899c50182/ingredients.png\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mCaption: a group of eggs and flour in bowls\n",
|
||||
"Objects: Egg, Egg, Food\n",
|
||||
"Tags: dairy, ingredient, indoor, thickening agent, food, mixing bowl, powder, flour, egg, bowl\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I can use the objects and tags to suggest recipes\n",
|
||||
"Action:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Final Answer\",\n",
|
||||
" \"action_input\": \"You can make pancakes, omelettes, or quiches with these ingredients!\"\n",
|
||||
"}\n",
|
||||
"```\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'You can make pancakes, omelettes, or quiches with these ingredients!'"
|
||||
]
|
||||
},
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"What can I make with these ingredients?\"\n",
|
||||
" \"https://images.openai.com/blob/9ad5a2ab-041f-475f-ad6a-b51899c50182/ingredients.png\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mAction:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Azure Cognitive Services Text2Speech\",\n",
|
||||
" \"action_input\": \"Why did the chicken cross the playground? To get to the other slide!\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"\u001b[0m\n",
|
||||
"Observation: \u001b[31;1m\u001b[1;3m/tmp/tmpa3uu_j6b.wav\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I have the audio file of the joke\n",
|
||||
"Action:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Final Answer\",\n",
|
||||
" \"action_input\": \"/tmp/tmpa3uu_j6b.wav\"\n",
|
||||
"}\n",
|
||||
"```\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'/tmp/tmpa3uu_j6b.wav'"
|
||||
]
|
||||
},
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"audio_file = agent.run(\"Tell me a joke and read it out for me.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from IPython import display\n",
|
||||
"\n",
|
||||
"audio = display.Audio(audio_file)\n",
|
||||
"display.display(audio)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -53,6 +53,7 @@ For detailed instructions on how to get set up with Unstructured, see installati
|
||||
./document_loaders/examples/unstructured_file.ipynb
|
||||
./document_loaders/examples/url.ipynb
|
||||
./document_loaders/examples/web_base.ipynb
|
||||
./document_loaders/examples/weather.ipynb
|
||||
./document_loaders/examples/whatsapp_chat.ipynb
|
||||
|
||||
|
||||
@@ -123,6 +124,7 @@ We need access tokens and sometime other parameters to get access to these datas
|
||||
./document_loaders/examples/notiondb.ipynb
|
||||
./document_loaders/examples/notion.ipynb
|
||||
./document_loaders/examples/obsidian.ipynb
|
||||
./document_loaders/examples/psychic.ipynb
|
||||
./document_loaders/examples/readthedocs_documentation.ipynb
|
||||
./document_loaders/examples/reddit.ipynb
|
||||
./document_loaders/examples/roam.ipynb
|
||||
|
||||
101
docs/modules/indexes/document_loaders/examples/weather.ipynb
Normal file
101
docs/modules/indexes/document_loaders/examples/weather.ipynb
Normal file
@@ -0,0 +1,101 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "66a7777e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Weather\n",
|
||||
"\n",
|
||||
">[OpenWeatherMap](https://openweathermap.org/) is an open source weather service provider\n",
|
||||
"\n",
|
||||
"This loader fetches the weather data from the OpenWeatherMap's OneCall API, using the pyowm Python package. You must initialize the loader with your OpenWeatherMap API token and the names of the cities you want the weather data for."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9ec8a3b3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import WeatherDataLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "43128d8d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install pyowm"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "51b0f0db",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Set API key either by passing it in to constructor directly\n",
|
||||
"# or by setting the environment variable \"OPENWEATHERMAP_API_KEY\".\n",
|
||||
"\n",
|
||||
"from getpass import getpass\n",
|
||||
"\n",
|
||||
"OPENWEATHERMAP_API_KEY = getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "35d6809a",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = WeatherDataLoader.from_params(['chennai','vellore'], openweathermap_api_key=OPENWEATHERMAP_API_KEY) "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "05fe33b9",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"documents = loader.load()\n",
|
||||
"documents"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -5,7 +5,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# How (and why) to use the the human input LLM\n",
|
||||
"# How (and why) to use the human input LLM\n",
|
||||
"\n",
|
||||
"Similar to the fake LLM, LangChain provides a pseudo LLM class that can be used for testing, debugging, or educational purposes. This allows you to mock out calls to the LLM and simulate how a human would respond if they received the prompts.\n",
|
||||
"\n",
|
||||
@@ -34,6 +34,23 @@
|
||||
"from langchain.agents import AgentType"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Since we will use the `WikipediaQueryRun` tool in this notebook, you might need to install the `wikipedia` package if you haven't done so already."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install wikipedia"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
@@ -217,7 +234,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
"version": "3.10.9"
|
||||
},
|
||||
"orig_nbformat": 4,
|
||||
"vscode": {
|
||||
|
||||
105
docs/modules/models/llms/integrations/mosaicml.ipynb
Normal file
105
docs/modules/models/llms/integrations/mosaicml.ipynb
Normal file
@@ -0,0 +1,105 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# MosaicML\n",
|
||||
"\n",
|
||||
"[MosaicML](https://docs.mosaicml.com/en/latest/inference.html) offers a managed inference service. You can either use a variety of open source models, or deploy your own.\n",
|
||||
"\n",
|
||||
"This example goes over how to use LangChain to interact with MosaicML Inference for text completion."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# sign up for an account: https://forms.mosaicml.com/demo?utm_source=langchain\n",
|
||||
"\n",
|
||||
"from getpass import getpass\n",
|
||||
"\n",
|
||||
"MOSAICML_API_TOKEN = getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"MOSAICML_API_TOKEN\"] = MOSAICML_API_TOKEN"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.llms import MosaicML\n",
|
||||
"from langchain import PromptTemplate, LLMChain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"template = \"\"\"Question: {question}\"\"\"\n",
|
||||
"\n",
|
||||
"prompt = PromptTemplate(template=template, input_variables=[\"question\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = MosaicML(inject_instruction_format=True, model_kwargs={'do_sample': False})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm_chain = LLMChain(prompt=prompt, llm=llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"question = \"What is one good reason why you should train a large language model on domain specific data?\"\n",
|
||||
"\n",
|
||||
"llm_chain.run(question)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
137
docs/modules/models/text_embedding/examples/elasticsearch.ipynb
Normal file
137
docs/modules/models/text_embedding/examples/elasticsearch.ipynb
Normal file
@@ -0,0 +1,137 @@
|
||||
{
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"name": "python3",
|
||||
"display_name": "Python 3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"!pip install elasticsearch langchain"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "OOiBBjc0Kd-6"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"%env ES_CLOUDID=<cloud id from cloud.elastic.co>\n",
|
||||
"%env ES_USER=<user>\n",
|
||||
"%env ES_PASS=<password>\n",
|
||||
"\n",
|
||||
"es_cloudid = os.environ.get(\"ES_CLOUDID\")\n",
|
||||
"es_user = os.environ.get(\"ES_USER\")\n",
|
||||
"es_pass = os.environ.get(\"ES_PASS\")"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "Wr8unljAKdCh"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# Connect to Elasticsearch\n",
|
||||
"es_connection = Elasticsearch(cloud_id=es_cloudid, basic_auth=(es_user, es_pass))"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "YIDsrBqTKs85"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# Define the model ID and input field name (if different from default)\n",
|
||||
"model_id = \"your_model_id\"\n",
|
||||
"input_field = \"your_input_field\" # Optional, only if different from 'text_field'"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "sfFhnFHOKvbM"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# Initialize the ElasticsearchEmbeddings instance\n",
|
||||
"embeddings_generator = ElasticsearchEmbeddings(es_connection, model_id, input_field)"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "V-pCgqLCKvYs"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# Generate embeddings for a list of documents\n",
|
||||
"documents = [\n",
|
||||
" \"This is an example document.\",\n",
|
||||
" \"Another example document to generate embeddings for.\",\n",
|
||||
" ]\n",
|
||||
"document_embeddings = embeddings_generator.embed_documents(documents)"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "lJg2iRDWKvV_"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# Print the generated document embeddings\n",
|
||||
"for i, doc_embedding in enumerate(document_embeddings):\n",
|
||||
" print(f\"Embedding for document {i + 1}: {doc_embedding}\")"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "R3sYQlh3KvTQ"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# Generate an embedding for a single query text\n",
|
||||
"query_text = \"What is the meaning of life?\"\n",
|
||||
"query_embedding = embeddings_generator.embed_query(query_text)"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "n0un5Vc0KvQd"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"# Print the generated query embedding\n",
|
||||
"print(f\"Embedding for query: {query_embedding}\")"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "PANph6pmKvLD"
|
||||
},
|
||||
"execution_count": null,
|
||||
"outputs": []
|
||||
}
|
||||
]
|
||||
}
|
||||
109
docs/modules/models/text_embedding/examples/mosaicml.ipynb
Normal file
109
docs/modules/models/text_embedding/examples/mosaicml.ipynb
Normal file
@@ -0,0 +1,109 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# MosaicML embeddings\n",
|
||||
"\n",
|
||||
"[MosaicML](https://docs.mosaicml.com/en/latest/inference.html) offers a managed inference service. You can either use a variety of open source models, or deploy your own.\n",
|
||||
"\n",
|
||||
"This example goes over how to use LangChain to interact with MosaicML Inference for text embedding."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# sign up for an account: https://forms.mosaicml.com/demo?utm_source=langchain\n",
|
||||
"\n",
|
||||
"from getpass import getpass\n",
|
||||
"\n",
|
||||
"MOSAICML_API_TOKEN = getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"MOSAICML_API_TOKEN\"] = MOSAICML_API_TOKEN"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.embeddings import MosaicMLInstructorEmbeddings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"embeddings = MosaicMLInstructorEmbeddings(\n",
|
||||
" query_instruction=\"Represent the query for retrieval: \"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query_text = \"This is a test query.\"\n",
|
||||
"query_result = embeddings.embed_query(query_text)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"document_text = \"This is a test document.\"\n",
|
||||
"document_result = embeddings.embed_documents([document_text])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"query_numpy = np.array(query_result)\n",
|
||||
"document_numpy = np.array(document_result[0])\n",
|
||||
"similarity = np.dot(query_numpy, document_numpy) / (np.linalg.norm(query_numpy)*np.linalg.norm(document_numpy))\n",
|
||||
"print(f\"Cosine similarity between document and query: {similarity}\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -150,7 +150,6 @@ In this example, we'll create a prompt to generate word antonyms.
|
||||
```python
|
||||
from langchain import PromptTemplate, FewShotPromptTemplate
|
||||
|
||||
|
||||
# First, create the list of few shot examples.
|
||||
examples = [
|
||||
{"word": "happy", "antonym": "sad"},
|
||||
@@ -159,10 +158,10 @@ examples = [
|
||||
|
||||
# Next, we specify the template to format the examples we have provided.
|
||||
# We use the `PromptTemplate` class for this.
|
||||
example_formatter_template = """
|
||||
Word: {word}
|
||||
Antonym: {antonym}\n
|
||||
example_formatter_template = """Word: {word}
|
||||
Antonym: {antonym}
|
||||
"""
|
||||
|
||||
example_prompt = PromptTemplate(
|
||||
input_variables=["word", "antonym"],
|
||||
template=example_formatter_template,
|
||||
@@ -176,14 +175,14 @@ few_shot_prompt = FewShotPromptTemplate(
|
||||
example_prompt=example_prompt,
|
||||
# The prefix is some text that goes before the examples in the prompt.
|
||||
# Usually, this consists of intructions.
|
||||
prefix="Give the antonym of every input",
|
||||
prefix="Give the antonym of every input\n",
|
||||
# The suffix is some text that goes after the examples in the prompt.
|
||||
# Usually, this is where the user input will go
|
||||
suffix="Word: {input}\nAntonym:",
|
||||
suffix="Word: {input}\nAntonym: ",
|
||||
# The input variables are the variables that the overall prompt expects.
|
||||
input_variables=["input"],
|
||||
# The example_separator is the string we will use to join the prefix, examples, and suffix together with.
|
||||
example_separator="\n\n",
|
||||
example_separator="\n",
|
||||
)
|
||||
|
||||
# We can now generate a prompt using the `format` method.
|
||||
@@ -197,7 +196,7 @@ print(few_shot_prompt.format(input="big"))
|
||||
# -> Antonym: short
|
||||
# ->
|
||||
# -> Word: big
|
||||
# -> Antonym:
|
||||
# -> Antonym:
|
||||
```
|
||||
|
||||
## Select examples for a prompt template
|
||||
@@ -229,7 +228,11 @@ example_selector = LengthBasedExampleSelector(
|
||||
example_prompt=example_prompt,
|
||||
# This is the maximum length that the formatted examples should be.
|
||||
# Length is measured by the get_text_length function below.
|
||||
max_length=25,
|
||||
max_length=25
|
||||
# This is the function used to get the length of a string, which is used
|
||||
# to determine which examples to include. It is commented out because
|
||||
# it is provided as a default value if none is specified.
|
||||
# get_text_length: Callable[[str], int] = lambda x: len(re.split("\n| ", x))
|
||||
)
|
||||
|
||||
# We can now use the `example_selector` to create a `FewShotPromptTemplate`.
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
"""Agent toolkits."""
|
||||
|
||||
from langchain.agents.agent_toolkits.azure_cognitive_services.toolkit import (
|
||||
AzureCognitiveServicesToolkit,
|
||||
)
|
||||
from langchain.agents.agent_toolkits.csv.base import create_csv_agent
|
||||
from langchain.agents.agent_toolkits.file_management.toolkit import (
|
||||
FileManagementToolkit,
|
||||
@@ -60,4 +63,5 @@ __all__ = [
|
||||
"JiraToolkit",
|
||||
"FileManagementToolkit",
|
||||
"PlayWrightBrowserToolkit",
|
||||
"AzureCognitiveServicesToolkit",
|
||||
]
|
||||
|
||||
@@ -0,0 +1,7 @@
|
||||
"""Azure Cognitive Services Toolkit."""
|
||||
|
||||
from langchain.agents.agent_toolkits.azure_cognitive_services.toolkit import (
|
||||
AzureCognitiveServicesToolkit,
|
||||
)
|
||||
|
||||
__all__ = ["AzureCognitiveServicesToolkit"]
|
||||
@@ -0,0 +1,31 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from typing import List
|
||||
|
||||
from langchain.agents.agent_toolkits.base import BaseToolkit
|
||||
from langchain.tools.azure_cognitive_services import (
|
||||
AzureCogsFormRecognizerTool,
|
||||
AzureCogsImageAnalysisTool,
|
||||
AzureCogsSpeech2TextTool,
|
||||
AzureCogsText2SpeechTool,
|
||||
)
|
||||
from langchain.tools.base import BaseTool
|
||||
|
||||
|
||||
class AzureCognitiveServicesToolkit(BaseToolkit):
|
||||
"""Toolkit for Azure Cognitive Services."""
|
||||
|
||||
def get_tools(self) -> List[BaseTool]:
|
||||
"""Get the tools in the toolkit."""
|
||||
|
||||
tools = [
|
||||
AzureCogsFormRecognizerTool(),
|
||||
AzureCogsSpeech2TextTool(),
|
||||
AzureCogsText2SpeechTool(),
|
||||
]
|
||||
|
||||
# TODO: Remove check once azure-ai-vision supports MacOS.
|
||||
if sys.platform.startswith("linux") or sys.platform.startswith("win"):
|
||||
tools.append(AzureCogsImageAnalysisTool())
|
||||
return tools
|
||||
@@ -58,6 +58,16 @@ class BaseLanguageModel(BaseModel, ABC):
|
||||
) -> BaseMessage:
|
||||
"""Predict message from messages."""
|
||||
|
||||
@abstractmethod
|
||||
async def apredict(self, text: str, *, stop: Optional[Sequence[str]] = None) -> str:
|
||||
"""Predict text from text."""
|
||||
|
||||
@abstractmethod
|
||||
async def apredict_messages(
|
||||
self, messages: List[BaseMessage], *, stop: Optional[Sequence[str]] = None
|
||||
) -> BaseMessage:
|
||||
"""Predict message from messages."""
|
||||
|
||||
def get_token_ids(self, text: str) -> List[int]:
|
||||
"""Get the token present in the text."""
|
||||
return _get_token_ids_default_method(text)
|
||||
|
||||
@@ -12,6 +12,7 @@ from langchain.callbacks.openai_info import OpenAICallbackHandler
|
||||
from langchain.callbacks.stdout import StdOutCallbackHandler
|
||||
from langchain.callbacks.streaming_aiter import AsyncIteratorCallbackHandler
|
||||
from langchain.callbacks.wandb_callback import WandbCallbackHandler
|
||||
from langchain.callbacks.whylabs_callback import WhyLabsCallbackHandler
|
||||
|
||||
__all__ = [
|
||||
"OpenAICallbackHandler",
|
||||
@@ -21,6 +22,7 @@ __all__ = [
|
||||
"MlflowCallbackHandler",
|
||||
"ClearMLCallbackHandler",
|
||||
"CometCallbackHandler",
|
||||
"WhyLabsCallbackHandler",
|
||||
"AsyncIteratorCallbackHandler",
|
||||
"get_openai_callback",
|
||||
"tracing_enabled",
|
||||
|
||||
@@ -24,12 +24,20 @@ MODEL_COST_PER_1K_TOKENS = {
|
||||
"text-davinci-003": 0.02,
|
||||
"text-davinci-002": 0.02,
|
||||
"code-davinci-002": 0.02,
|
||||
"ada-finetuned": 0.0016,
|
||||
"babbage-finetuned": 0.0024,
|
||||
"curie-finetuned": 0.0120,
|
||||
"davinci-finetuned": 0.1200,
|
||||
}
|
||||
|
||||
|
||||
def get_openai_token_cost_for_model(
|
||||
model_name: str, num_tokens: int, is_completion: bool = False
|
||||
) -> float:
|
||||
# handling finetuned models
|
||||
if "ft-" in model_name:
|
||||
model_name = f"{model_name.split(':')[0]}-finetuned"
|
||||
|
||||
suffix = "-completion" if is_completion and model_name.startswith("gpt-4") else ""
|
||||
model = model_name.lower() + suffix
|
||||
if model not in MODEL_COST_PER_1K_TOKENS:
|
||||
|
||||
@@ -58,7 +58,8 @@ class AsyncIteratorCallbackHandler(AsyncCallbackHandler):
|
||||
)
|
||||
|
||||
# Cancel the other task
|
||||
other.pop().cancel()
|
||||
if other:
|
||||
other.pop().cancel()
|
||||
|
||||
# Extract the value of the first completed task
|
||||
token_or_done = cast(Union[str, Literal[True]], done.pop().result())
|
||||
|
||||
203
langchain/callbacks/whylabs_callback.py
Normal file
203
langchain/callbacks/whylabs_callback.py
Normal file
@@ -0,0 +1,203 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
|
||||
|
||||
from langchain.callbacks.base import BaseCallbackHandler
|
||||
from langchain.schema import AgentAction, AgentFinish, Generation, LLMResult
|
||||
from langchain.utils import get_from_env
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from whylogs.api.logger.logger import Logger
|
||||
|
||||
diagnostic_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def import_langkit(
|
||||
sentiment: bool = False,
|
||||
toxicity: bool = False,
|
||||
themes: bool = False,
|
||||
) -> Any:
|
||||
try:
|
||||
import langkit # noqa: F401
|
||||
import langkit.regexes # noqa: F401
|
||||
import langkit.textstat # noqa: F401
|
||||
|
||||
if sentiment:
|
||||
import langkit.sentiment # noqa: F401
|
||||
if toxicity:
|
||||
import langkit.toxicity # noqa: F401
|
||||
if themes:
|
||||
import langkit.themes # noqa: F401
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"To use the whylabs callback manager you need to have the `langkit` python "
|
||||
"package installed. Please install it with `pip install langkit`."
|
||||
)
|
||||
return langkit
|
||||
|
||||
|
||||
class WhyLabsCallbackHandler(BaseCallbackHandler):
|
||||
"""WhyLabs CallbackHandler."""
|
||||
|
||||
def __init__(self, logger: Logger):
|
||||
"""Initiate the rolling logger"""
|
||||
super().__init__()
|
||||
self.logger = logger
|
||||
diagnostic_logger.info(
|
||||
"Initialized WhyLabs callback handler with configured whylogs Logger."
|
||||
)
|
||||
|
||||
def _profile_generations(self, generations: List[Generation]) -> None:
|
||||
for gen in generations:
|
||||
self.logger.log({"response": gen.text})
|
||||
|
||||
def on_llm_start(
|
||||
self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
|
||||
) -> None:
|
||||
"""Pass the input prompts to the logger"""
|
||||
for prompt in prompts:
|
||||
self.logger.log({"prompt": prompt})
|
||||
|
||||
def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
|
||||
"""Pass the generated response to the logger."""
|
||||
for generations in response.generations:
|
||||
self._profile_generations(generations)
|
||||
|
||||
def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
|
||||
"""Do nothing."""
|
||||
pass
|
||||
|
||||
def on_llm_error(
|
||||
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
|
||||
) -> None:
|
||||
"""Do nothing."""
|
||||
pass
|
||||
|
||||
def on_chain_start(
|
||||
self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any
|
||||
) -> None:
|
||||
"""Do nothing."""
|
||||
|
||||
def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> None:
|
||||
"""Do nothing."""
|
||||
|
||||
def on_chain_error(
|
||||
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
|
||||
) -> None:
|
||||
"""Do nothing."""
|
||||
pass
|
||||
|
||||
def on_tool_start(
|
||||
self,
|
||||
serialized: Dict[str, Any],
|
||||
input_str: str,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Do nothing."""
|
||||
|
||||
def on_agent_action(
|
||||
self, action: AgentAction, color: Optional[str] = None, **kwargs: Any
|
||||
) -> Any:
|
||||
"""Do nothing."""
|
||||
|
||||
def on_tool_end(
|
||||
self,
|
||||
output: str,
|
||||
color: Optional[str] = None,
|
||||
observation_prefix: Optional[str] = None,
|
||||
llm_prefix: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Do nothing."""
|
||||
|
||||
def on_tool_error(
|
||||
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
|
||||
) -> None:
|
||||
"""Do nothing."""
|
||||
pass
|
||||
|
||||
def on_text(self, text: str, **kwargs: Any) -> None:
|
||||
"""Do nothing."""
|
||||
|
||||
def on_agent_finish(
|
||||
self, finish: AgentFinish, color: Optional[str] = None, **kwargs: Any
|
||||
) -> None:
|
||||
"""Run on agent end."""
|
||||
pass
|
||||
|
||||
def flush(self) -> None:
|
||||
self.logger._do_rollover()
|
||||
diagnostic_logger.info("Flushing WhyLabs logger, writing profile...")
|
||||
|
||||
def close(self) -> None:
|
||||
self.logger.close()
|
||||
diagnostic_logger.info("Closing WhyLabs logger, see you next time!")
|
||||
|
||||
def __enter__(self) -> WhyLabsCallbackHandler:
|
||||
return self
|
||||
|
||||
def __exit__(
|
||||
self, exception_type: Any, exception_value: Any, traceback: Any
|
||||
) -> None:
|
||||
self.close()
|
||||
|
||||
@classmethod
|
||||
def from_params(
|
||||
cls,
|
||||
*,
|
||||
api_key: Optional[str] = None,
|
||||
org_id: Optional[str] = None,
|
||||
dataset_id: Optional[str] = None,
|
||||
sentiment: bool = False,
|
||||
toxicity: bool = False,
|
||||
themes: bool = False,
|
||||
) -> Logger:
|
||||
"""Instantiate whylogs Logger from params.
|
||||
|
||||
Args:
|
||||
api_key (Optional[str]): WhyLabs API key. Optional because the preferred
|
||||
way to specify the API key is with environment variable
|
||||
WHYLABS_API_KEY.
|
||||
org_id (Optional[str]): WhyLabs organization id to write profiles to.
|
||||
If not set must be specified in environment variable
|
||||
WHYLABS_DEFAULT_ORG_ID.
|
||||
dataset_id (Optional[str]): The model or dataset this callback is gathering
|
||||
telemetry for. If not set must be specified in environment variable
|
||||
WHYLABS_DEFAULT_DATASET_ID.
|
||||
sentiment (bool): If True will initialize a model to perform
|
||||
sentiment analysis compound score. Defaults to False and will not gather
|
||||
this metric.
|
||||
toxicity (bool): If True will initialize a model to score
|
||||
toxicity. Defaults to False and will not gather this metric.
|
||||
themes (bool): If True will initialize a model to calculate
|
||||
distance to configured themes. Defaults to None and will not gather this
|
||||
metric.
|
||||
"""
|
||||
# langkit library will import necessary whylogs libraries
|
||||
import_langkit(sentiment=sentiment, toxicity=toxicity, themes=themes)
|
||||
|
||||
import whylogs as why
|
||||
from whylogs.api.writer.whylabs import WhyLabsWriter
|
||||
from whylogs.core.schema import DeclarativeSchema
|
||||
from whylogs.experimental.core.metrics.udf_metric import generate_udf_schema
|
||||
|
||||
api_key = api_key or get_from_env("api_key", "WHYLABS_API_KEY")
|
||||
org_id = org_id or get_from_env("org_id", "WHYLABS_DEFAULT_ORG_ID")
|
||||
dataset_id = dataset_id or get_from_env(
|
||||
"dataset_id", "WHYLABS_DEFAULT_DATASET_ID"
|
||||
)
|
||||
whylabs_writer = WhyLabsWriter(
|
||||
api_key=api_key, org_id=org_id, dataset_id=dataset_id
|
||||
)
|
||||
|
||||
langkit_schema = DeclarativeSchema(generate_udf_schema())
|
||||
whylabs_logger = why.logger(
|
||||
mode="rolling", interval=5, when="M", schema=langkit_schema
|
||||
)
|
||||
|
||||
whylabs_logger.append_writer(writer=whylabs_writer)
|
||||
diagnostic_logger.info(
|
||||
"Started whylogs Logger with WhyLabsWriter and initialized LangKit. 📝"
|
||||
)
|
||||
return cls(whylabs_logger)
|
||||
@@ -183,6 +183,19 @@ class BaseChatModel(BaseLanguageModel, ABC):
|
||||
else:
|
||||
raise ValueError("Unexpected generation type")
|
||||
|
||||
async def _call_async(
|
||||
self,
|
||||
messages: List[BaseMessage],
|
||||
stop: Optional[List[str]] = None,
|
||||
callbacks: Callbacks = None,
|
||||
) -> BaseMessage:
|
||||
result = await self.agenerate([messages], stop=stop, callbacks=callbacks)
|
||||
generation = result.generations[0][0]
|
||||
if isinstance(generation, ChatGeneration):
|
||||
return generation.message
|
||||
else:
|
||||
raise ValueError("Unexpected generation type")
|
||||
|
||||
def call_as_llm(self, message: str, stop: Optional[List[str]] = None) -> str:
|
||||
return self.predict(message, stop=stop)
|
||||
|
||||
@@ -203,6 +216,23 @@ class BaseChatModel(BaseLanguageModel, ABC):
|
||||
_stop = list(stop)
|
||||
return self(messages, stop=_stop)
|
||||
|
||||
async def apredict(self, text: str, *, stop: Optional[Sequence[str]] = None) -> str:
|
||||
if stop is None:
|
||||
_stop = None
|
||||
else:
|
||||
_stop = list(stop)
|
||||
result = await self._call_async([HumanMessage(content=text)], stop=_stop)
|
||||
return result.content
|
||||
|
||||
async def apredict_messages(
|
||||
self, messages: List[BaseMessage], *, stop: Optional[Sequence[str]] = None
|
||||
) -> BaseMessage:
|
||||
if stop is None:
|
||||
_stop = None
|
||||
else:
|
||||
_stop = list(stop)
|
||||
return await self._call_async(messages, stop=_stop)
|
||||
|
||||
@property
|
||||
def _identifying_params(self) -> Mapping[str, Any]:
|
||||
"""Get the identifying parameters."""
|
||||
|
||||
@@ -11,7 +11,9 @@ from typing import (
|
||||
Dict,
|
||||
Iterator,
|
||||
List,
|
||||
Mapping,
|
||||
Optional,
|
||||
Sequence,
|
||||
Tuple,
|
||||
Union,
|
||||
)
|
||||
@@ -27,10 +29,15 @@ from langchain.base_language import BaseLanguageModel
|
||||
from langchain.callbacks.tracers.schemas import Run, TracerSession
|
||||
from langchain.chains.base import Chain
|
||||
from langchain.client.models import (
|
||||
APIFeedbackSource,
|
||||
Dataset,
|
||||
DatasetCreate,
|
||||
Example,
|
||||
ExampleCreate,
|
||||
ExampleUpdate,
|
||||
Feedback,
|
||||
FeedbackCreate,
|
||||
ListFeedbackQueryParams,
|
||||
ListRunsQueryParams,
|
||||
)
|
||||
from langchain.client.runner_utils import arun_on_examples, run_on_examples
|
||||
@@ -158,8 +165,8 @@ class LangChainPlusClient(BaseSettings):
|
||||
df: pd.DataFrame,
|
||||
name: str,
|
||||
description: str,
|
||||
input_keys: List[str],
|
||||
output_keys: List[str],
|
||||
input_keys: Sequence[str],
|
||||
output_keys: Sequence[str],
|
||||
) -> Dataset:
|
||||
"""Upload a dataframe as individual examples to the LangChain+ API."""
|
||||
dataset = self.create_dataset(dataset_name=name, description=description)
|
||||
@@ -173,8 +180,8 @@ class LangChainPlusClient(BaseSettings):
|
||||
self,
|
||||
csv_file: Union[str, Tuple[str, BytesIO]],
|
||||
description: str,
|
||||
input_keys: List[str],
|
||||
output_keys: List[str],
|
||||
input_keys: Sequence[str],
|
||||
output_keys: Sequence[str],
|
||||
) -> Dataset:
|
||||
"""Upload a CSV file to the LangChain+ API."""
|
||||
files = {"file": csv_file}
|
||||
@@ -223,10 +230,7 @@ class LangChainPlusClient(BaseSettings):
|
||||
query_params = ListRunsQueryParams(
|
||||
session_id=session_id, run_type=run_type, **kwargs
|
||||
)
|
||||
filtered_params = {
|
||||
k: v for k, v in query_params.dict().items() if v is not None
|
||||
}
|
||||
response = self._get("/runs", params=filtered_params)
|
||||
response = self._get("/runs", params=query_params.dict(exclude_none=True))
|
||||
raise_for_status_with_text(response)
|
||||
yield from [Run(**run) for run in response.json()]
|
||||
|
||||
@@ -268,7 +272,9 @@ class LangChainPlusClient(BaseSettings):
|
||||
raise_for_status_with_text(response)
|
||||
yield from [TracerSession(**session) for session in response.json()]
|
||||
|
||||
def create_dataset(self, dataset_name: str, description: str) -> Dataset:
|
||||
def create_dataset(
|
||||
self, dataset_name: str, *, description: Optional[str] = None
|
||||
) -> Dataset:
|
||||
"""Create a dataset in the LangChain+ API."""
|
||||
dataset = DatasetCreate(
|
||||
tenant_id=self.tenant_id,
|
||||
@@ -383,6 +389,93 @@ class LangChainPlusClient(BaseSettings):
|
||||
raise_for_status_with_text(response)
|
||||
yield from [Example(**dataset) for dataset in response.json()]
|
||||
|
||||
def update_example(
|
||||
self,
|
||||
example_id: str,
|
||||
*,
|
||||
inputs: Optional[Mapping[str, Any]] = None,
|
||||
outputs: Optional[Mapping[str, Any]] = None,
|
||||
dataset_id: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Update a specific example."""
|
||||
example = ExampleUpdate(
|
||||
inputs=inputs,
|
||||
outputs=outputs,
|
||||
dataset_id=dataset_id,
|
||||
)
|
||||
response = requests.patch(
|
||||
f"{self.api_url}/examples/{example_id}",
|
||||
headers=self._headers,
|
||||
data=example.json(exclude_none=True),
|
||||
)
|
||||
raise_for_status_with_text(response)
|
||||
return response.json()
|
||||
|
||||
def create_feedback(
|
||||
self,
|
||||
run_id: str,
|
||||
metric_name: str,
|
||||
metric_value: Union[float, str],
|
||||
*,
|
||||
source_info: Optional[Dict[str, Any]] = None,
|
||||
) -> Feedback:
|
||||
"""Create a feedback in the LangChain+ API.
|
||||
|
||||
Args:
|
||||
run_id: The ID of the run to provide feedback on.
|
||||
metric_name: The name of the metric, tag, or 'aspect' this
|
||||
feedback is about.
|
||||
metric_value: The score to rate this run on the metric, or
|
||||
the value or label to assign for this metric.
|
||||
source_info: Information about the source of this feedback.
|
||||
extra: Extra information to include with the feedback.
|
||||
"""
|
||||
feedback_source = APIFeedbackSource(metadata=source_info)
|
||||
feedback = FeedbackCreate(
|
||||
run_id=run_id,
|
||||
metric_name=metric_name,
|
||||
metric_value=metric_value,
|
||||
feedback_source=feedback_source,
|
||||
)
|
||||
response = requests.post(
|
||||
self.api_url + "/feedback",
|
||||
headers=self._headers,
|
||||
data=feedback.json(),
|
||||
)
|
||||
raise_for_status_with_text(response)
|
||||
return Feedback(**feedback.dict())
|
||||
|
||||
@retry(stop=stop_after_attempt(3), wait=wait_fixed(0.5))
|
||||
def read_feedback(self, feedback_id: str) -> Feedback:
|
||||
"""Read a feedback from the LangChain+ API."""
|
||||
response = self._get(f"/feedback/{feedback_id}")
|
||||
raise_for_status_with_text(response)
|
||||
return Feedback(**response.json())
|
||||
|
||||
@retry(stop=stop_after_attempt(3), wait=wait_fixed(0.5))
|
||||
def list_feedback(
|
||||
self,
|
||||
*,
|
||||
run_ids: Optional[Sequence[Union[str, UUID]]] = None,
|
||||
**kwargs: Any,
|
||||
) -> Iterator[Feedback]:
|
||||
"""List the feedback objects on the LangChain+ API."""
|
||||
params = ListFeedbackQueryParams(
|
||||
run=run_ids,
|
||||
**kwargs,
|
||||
)
|
||||
response = self._get("/feedback", params=params.dict(exclude_none=True))
|
||||
raise_for_status_with_text(response)
|
||||
yield from [Feedback(**feedback) for feedback in response.json()]
|
||||
|
||||
def delete_feedback(self, feedback_id: str) -> None:
|
||||
"""Delete a feedback by ID."""
|
||||
response = requests.delete(
|
||||
f"{self.api_url}/feedback/{feedback_id}",
|
||||
headers=self._headers,
|
||||
)
|
||||
raise_for_status_with_text(response)
|
||||
|
||||
async def arun_on_dataset(
|
||||
self,
|
||||
dataset_name: str,
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, List, Optional
|
||||
from uuid import UUID
|
||||
from typing import Any, ClassVar, Dict, List, Mapping, Optional, Sequence, Union
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
from pydantic import BaseModel, Field, root_validator
|
||||
|
||||
from langchain.callbacks.tracers.schemas import Run, RunTypeEnum
|
||||
|
||||
|
||||
class ExampleBase(BaseModel):
|
||||
class ExampleBase(BaseModel, frozen=True):
|
||||
"""Example base model."""
|
||||
|
||||
dataset_id: UUID
|
||||
@@ -31,12 +31,20 @@ class Example(ExampleBase):
|
||||
runs: List[Run] = Field(default_factory=list)
|
||||
|
||||
|
||||
class DatasetBase(BaseModel):
|
||||
class ExampleUpdate(BaseModel, frozen=True):
|
||||
"""Update class for Example."""
|
||||
|
||||
dataset_id: Optional[UUID] = None
|
||||
inputs: Optional[Dict[str, Any]] = None
|
||||
outputs: Optional[Mapping[str, Any]] = None
|
||||
|
||||
|
||||
class DatasetBase(BaseModel, frozen=True):
|
||||
"""Dataset base model."""
|
||||
|
||||
tenant_id: UUID
|
||||
name: str
|
||||
description: str
|
||||
description: Optional[str] = None
|
||||
|
||||
|
||||
class DatasetCreate(DatasetBase):
|
||||
@@ -54,7 +62,7 @@ class Dataset(DatasetBase):
|
||||
modified_at: Optional[datetime] = Field(default=None)
|
||||
|
||||
|
||||
class ListRunsQueryParams(BaseModel):
|
||||
class ListRunsQueryParams(BaseModel, frozen=True):
|
||||
"""Query params for GET /runs endpoint."""
|
||||
|
||||
class Config:
|
||||
@@ -97,3 +105,57 @@ class ListRunsQueryParams(BaseModel):
|
||||
if start_time and end_time and start_time > end_time:
|
||||
raise ValueError("start_time must be <= end_time")
|
||||
return values
|
||||
|
||||
|
||||
class APIFeedbackSource(BaseModel, frozen=True):
|
||||
"""API feedback source."""
|
||||
|
||||
type: ClassVar[str] = "api"
|
||||
metadata: Optional[Dict[str, Any]] = None
|
||||
|
||||
|
||||
class FeedbackBase(BaseModel, frozen=True):
|
||||
"""Feedback schema."""
|
||||
|
||||
created_at: datetime = Field(default_factory=datetime.utcnow)
|
||||
"""The time the feedback was created."""
|
||||
modified_at: datetime = Field(default_factory=datetime.utcnow)
|
||||
"""The time the feedback was last modified."""
|
||||
run_id: UUID
|
||||
"""The associated run ID this feedback is logged for."""
|
||||
metric_name: str
|
||||
"""The feedback metric name or type."""
|
||||
metric_value: Union[float, bool, int, str]
|
||||
"""Score to assign the run."""
|
||||
feedback_source: Optional[Union[APIFeedbackSource, Mapping[str, Any]]] = None
|
||||
"""The source of the feedback."""
|
||||
|
||||
|
||||
class FeedbackCreate(FeedbackBase):
|
||||
"""Schema used for creating feedback."""
|
||||
|
||||
id: UUID = Field(default_factory=uuid4)
|
||||
|
||||
feedback_source: APIFeedbackSource
|
||||
"""The source of the feedback."""
|
||||
|
||||
|
||||
class Feedback(FeedbackBase):
|
||||
"""Schema for getting feedback."""
|
||||
|
||||
id: UUID
|
||||
feedback_source: Optional[Dict] = None
|
||||
"""The source of the feedback. In this case"""
|
||||
|
||||
|
||||
class ListFeedbackQueryParams(BaseModel, frozen=True):
|
||||
"""Query Params for listing feedbacks."""
|
||||
|
||||
run: Optional[Sequence[UUID]] = None
|
||||
limit: int = 100
|
||||
offset: int = 0
|
||||
|
||||
class Config:
|
||||
"""Config for query params."""
|
||||
|
||||
extra = "forbid"
|
||||
|
||||
@@ -151,7 +151,7 @@ async def _arun_llm_or_chain(
|
||||
)
|
||||
else:
|
||||
chain = llm_or_chain_factory()
|
||||
output = await chain.arun(example.inputs, callbacks=callbacks)
|
||||
output = await chain.acall(example.inputs, callbacks=callbacks)
|
||||
outputs.append(output)
|
||||
except Exception as e:
|
||||
logger.warning(f"Chain failed for example {example.id}. Error: {e}")
|
||||
@@ -326,7 +326,7 @@ def run_llm_or_chain(
|
||||
output: Any = run_llm(llm_or_chain_factory, example.inputs, callbacks)
|
||||
else:
|
||||
chain = llm_or_chain_factory()
|
||||
output = chain.run(example.inputs, callbacks=callbacks)
|
||||
output = chain(example.inputs, callbacks=callbacks)
|
||||
outputs.append(output)
|
||||
except Exception as e:
|
||||
logger.warning(f"Chain failed for example {example.id}. Error: {e}")
|
||||
|
||||
0
langchain/client/utils.py
Normal file
0
langchain/client/utils.py
Normal file
@@ -100,6 +100,7 @@ from langchain.document_loaders.unstructured import (
|
||||
from langchain.document_loaders.url import UnstructuredURLLoader
|
||||
from langchain.document_loaders.url_playwright import PlaywrightURLLoader
|
||||
from langchain.document_loaders.url_selenium import SeleniumURLLoader
|
||||
from langchain.document_loaders.weather import WeatherDataLoader
|
||||
from langchain.document_loaders.web_base import WebBaseLoader
|
||||
from langchain.document_loaders.whatsapp_chat import WhatsAppChatLoader
|
||||
from langchain.document_loaders.wikipedia import WikipediaLoader
|
||||
@@ -212,6 +213,7 @@ __all__ = [
|
||||
"UnstructuredRTFLoader",
|
||||
"UnstructuredURLLoader",
|
||||
"UnstructuredWordDocumentLoader",
|
||||
"WeatherDataLoader",
|
||||
"WebBaseLoader",
|
||||
"WhatsAppChatLoader",
|
||||
"WikipediaLoader",
|
||||
|
||||
50
langchain/document_loaders/weather.py
Normal file
50
langchain/document_loaders/weather.py
Normal file
@@ -0,0 +1,50 @@
|
||||
"""Simple reader that reads weather data from OpenWeatherMap API"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Iterator, List, Optional, Sequence
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.document_loaders.base import BaseLoader
|
||||
from langchain.utilities.openweathermap import OpenWeatherMapAPIWrapper
|
||||
|
||||
|
||||
class WeatherDataLoader(BaseLoader):
|
||||
"""Weather Reader.
|
||||
|
||||
Reads the forecast & current weather of any location using OpenWeatherMap's free
|
||||
API. Checkout 'https://openweathermap.org/appid' for more on how to generate a free
|
||||
OpenWeatherMap API.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
client: OpenWeatherMapAPIWrapper,
|
||||
places: Sequence[str],
|
||||
) -> None:
|
||||
"""Initialize with parameters."""
|
||||
super().__init__()
|
||||
self.client = client
|
||||
self.places = places
|
||||
|
||||
@classmethod
|
||||
def from_params(
|
||||
cls, places: Sequence[str], *, openweathermap_api_key: Optional[str] = None
|
||||
) -> WeatherDataLoader:
|
||||
client = OpenWeatherMapAPIWrapper(openweathermap_api_key=openweathermap_api_key)
|
||||
return cls(client, places)
|
||||
|
||||
def lazy_load(
|
||||
self,
|
||||
) -> Iterator[Document]:
|
||||
"""Lazily load weather data for the given locations."""
|
||||
for place in self.places:
|
||||
metadata = {"queried_at": datetime.now()}
|
||||
content = self.client.run(place)
|
||||
yield Document(page_content=content, metadata=metadata)
|
||||
|
||||
def load(
|
||||
self,
|
||||
) -> List[Document]:
|
||||
"""Load weather data for the given locations."""
|
||||
return list(self.lazy_load())
|
||||
@@ -7,6 +7,7 @@ from langchain.embeddings.aleph_alpha import (
|
||||
AlephAlphaSymmetricSemanticEmbedding,
|
||||
)
|
||||
from langchain.embeddings.cohere import CohereEmbeddings
|
||||
from langchain.embeddings.elasticsearch import ElasticsearchEmbeddings
|
||||
from langchain.embeddings.fake import FakeEmbeddings
|
||||
from langchain.embeddings.google_palm import GooglePalmEmbeddings
|
||||
from langchain.embeddings.huggingface import (
|
||||
@@ -16,6 +17,7 @@ from langchain.embeddings.huggingface import (
|
||||
from langchain.embeddings.huggingface_hub import HuggingFaceHubEmbeddings
|
||||
from langchain.embeddings.jina import JinaEmbeddings
|
||||
from langchain.embeddings.llamacpp import LlamaCppEmbeddings
|
||||
from langchain.embeddings.mosaicml import MosaicMLInstructorEmbeddings
|
||||
from langchain.embeddings.openai import OpenAIEmbeddings
|
||||
from langchain.embeddings.sagemaker_endpoint import SagemakerEndpointEmbeddings
|
||||
from langchain.embeddings.self_hosted import SelfHostedEmbeddings
|
||||
@@ -32,12 +34,14 @@ __all__ = [
|
||||
"OpenAIEmbeddings",
|
||||
"HuggingFaceEmbeddings",
|
||||
"CohereEmbeddings",
|
||||
"ElasticsearchEmbeddings",
|
||||
"JinaEmbeddings",
|
||||
"LlamaCppEmbeddings",
|
||||
"HuggingFaceHubEmbeddings",
|
||||
"TensorflowHubEmbeddings",
|
||||
"SagemakerEndpointEmbeddings",
|
||||
"HuggingFaceInstructEmbeddings",
|
||||
"MosaicMLInstructorEmbeddings",
|
||||
"SelfHostedEmbeddings",
|
||||
"SelfHostedHuggingFaceEmbeddings",
|
||||
"SelfHostedHuggingFaceInstructEmbeddings",
|
||||
|
||||
155
langchain/embeddings/elasticsearch.py
Normal file
155
langchain/embeddings/elasticsearch.py
Normal file
@@ -0,0 +1,155 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, List, Optional
|
||||
|
||||
from langchain.utils import get_from_env
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from elasticsearch.client import MlClient
|
||||
|
||||
from langchain.embeddings.base import Embeddings
|
||||
|
||||
|
||||
class ElasticsearchEmbeddings(Embeddings):
|
||||
"""
|
||||
Wrapper around Elasticsearch embedding models.
|
||||
|
||||
This class provides an interface to generate embeddings using a model deployed
|
||||
in an Elasticsearch cluster. It requires an Elasticsearch connection object
|
||||
and the model_id of the model deployed in the cluster.
|
||||
|
||||
In Elasticsearch you need to have an embedding model loaded and deployed.
|
||||
- https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-trained-model.html
|
||||
- https://www.elastic.co/guide/en/machine-learning/current/ml-nlp-deploy-models.html
|
||||
""" # noqa: E501
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
client: MlClient,
|
||||
model_id: str,
|
||||
*,
|
||||
input_field: str = "text_field",
|
||||
):
|
||||
"""
|
||||
Initialize the ElasticsearchEmbeddings instance.
|
||||
|
||||
Args:
|
||||
client (MlClient): An Elasticsearch ML client object.
|
||||
model_id (str): The model_id of the model deployed in the Elasticsearch
|
||||
cluster.
|
||||
input_field (str): The name of the key for the input text field in the
|
||||
document. Defaults to 'text_field'.
|
||||
"""
|
||||
self.client = client
|
||||
self.model_id = model_id
|
||||
self.input_field = input_field
|
||||
|
||||
@classmethod
|
||||
def from_credentials(
|
||||
cls,
|
||||
model_id: str,
|
||||
*,
|
||||
es_cloud_id: Optional[str] = None,
|
||||
es_user: Optional[str] = None,
|
||||
es_password: Optional[str] = None,
|
||||
input_field: str = "text_field",
|
||||
) -> ElasticsearchEmbeddings:
|
||||
"""Instantiate embeddings from Elasticsearch credentials.
|
||||
|
||||
Args:
|
||||
model_id (str): The model_id of the model deployed in the Elasticsearch
|
||||
cluster.
|
||||
input_field (str): The name of the key for the input text field in the
|
||||
document. Defaults to 'text_field'.
|
||||
es_cloud_id: (str, optional): The Elasticsearch cloud ID to connect to.
|
||||
es_user: (str, optional): Elasticsearch username.
|
||||
es_password: (str, optional): Elasticsearch password.
|
||||
|
||||
Example Usage:
|
||||
from langchain.embeddings import ElasticsearchEmbeddings
|
||||
|
||||
# Define the model ID and input field name (if different from default)
|
||||
model_id = "your_model_id"
|
||||
# Optional, only if different from 'text_field'
|
||||
input_field = "your_input_field"
|
||||
|
||||
# Credentials can be passed in two ways. Either set the env vars
|
||||
# ES_CLOUD_ID, ES_USER, ES_PASSWORD and they will be automatically pulled
|
||||
# in, or pass them in directly as kwargs.
|
||||
embeddings = ElasticsearchEmbeddings.from_credentials(
|
||||
model_id,
|
||||
input_field=input_field,
|
||||
# es_cloud_id="foo",
|
||||
# es_user="bar",
|
||||
# es_password="baz",
|
||||
)
|
||||
|
||||
documents = [
|
||||
"This is an example document.",
|
||||
"Another example document to generate embeddings for.",
|
||||
]
|
||||
embeddings_generator.embed_documents(documents)
|
||||
"""
|
||||
try:
|
||||
from elasticsearch import Elasticsearch
|
||||
from elasticsearch.client import MlClient
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"elasticsearch package not found, please install with 'pip install "
|
||||
"elasticsearch'"
|
||||
)
|
||||
|
||||
es_cloud_id = es_cloud_id or get_from_env("es_cloud_id", "ES_CLOUD_ID")
|
||||
es_user = es_user or get_from_env("es_user", "ES_USER")
|
||||
es_password = es_password or get_from_env("es_password", "ES_PASSWORD")
|
||||
|
||||
# Connect to Elasticsearch
|
||||
es_connection = Elasticsearch(
|
||||
cloud_id=es_cloud_id, basic_auth=(es_user, es_password)
|
||||
)
|
||||
client = MlClient(es_connection)
|
||||
return cls(client, model_id, input_field=input_field)
|
||||
|
||||
def _embedding_func(self, texts: List[str]) -> List[List[float]]:
|
||||
"""
|
||||
Generate embeddings for the given texts using the Elasticsearch model.
|
||||
|
||||
Args:
|
||||
texts (List[str]): A list of text strings to generate embeddings for.
|
||||
|
||||
Returns:
|
||||
List[List[float]]: A list of embeddings, one for each text in the input
|
||||
list.
|
||||
"""
|
||||
response = self.client.infer_trained_model(
|
||||
model_id=self.model_id, docs=[{self.input_field: text} for text in texts]
|
||||
)
|
||||
|
||||
embeddings = [doc["predicted_value"] for doc in response["inference_results"]]
|
||||
return embeddings
|
||||
|
||||
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
||||
"""
|
||||
Generate embeddings for a list of documents.
|
||||
|
||||
Args:
|
||||
texts (List[str]): A list of document text strings to generate embeddings
|
||||
for.
|
||||
|
||||
Returns:
|
||||
List[List[float]]: A list of embeddings, one for each document in the input
|
||||
list.
|
||||
"""
|
||||
return self._embedding_func(texts)
|
||||
|
||||
def embed_query(self, text: str) -> List[float]:
|
||||
"""
|
||||
Generate an embedding for a single query text.
|
||||
|
||||
Args:
|
||||
text (str): The query text to generate an embedding for.
|
||||
|
||||
Returns:
|
||||
List[float]: The embedding for the input query text.
|
||||
"""
|
||||
return self._embedding_func([text])[0]
|
||||
137
langchain/embeddings/mosaicml.py
Normal file
137
langchain/embeddings/mosaicml.py
Normal file
@@ -0,0 +1,137 @@
|
||||
"""Wrapper around MosaicML APIs."""
|
||||
from typing import Any, Dict, List, Mapping, Optional, Tuple
|
||||
|
||||
import requests
|
||||
from pydantic import BaseModel, Extra, root_validator
|
||||
|
||||
from langchain.embeddings.base import Embeddings
|
||||
from langchain.utils import get_from_dict_or_env
|
||||
|
||||
|
||||
class MosaicMLInstructorEmbeddings(BaseModel, Embeddings):
|
||||
"""Wrapper around MosaicML's embedding inference service.
|
||||
|
||||
To use, you should have the
|
||||
environment variable ``MOSAICML_API_TOKEN`` set with your API token, or pass
|
||||
it as a named parameter to the constructor.
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain.llms import MosaicMLInstructorEmbeddings
|
||||
endpoint_url = (
|
||||
"https://models.hosted-on.mosaicml.hosting/instructor-large/v1/predict"
|
||||
)
|
||||
mosaic_llm = MosaicMLInstructorEmbeddings(
|
||||
endpoint_url=endpoint_url,
|
||||
mosaicml_api_token="my-api-key"
|
||||
)
|
||||
"""
|
||||
|
||||
endpoint_url: str = (
|
||||
"https://models.hosted-on.mosaicml.hosting/instructor-large/v1/predict"
|
||||
)
|
||||
"""Endpoint URL to use."""
|
||||
embed_instruction: str = "Represent the document for retrieval: "
|
||||
"""Instruction used to embed documents."""
|
||||
query_instruction: str = (
|
||||
"Represent the question for retrieving supporting documents: "
|
||||
)
|
||||
"""Instruction used to embed the query."""
|
||||
retry_sleep: float = 1.0
|
||||
"""How long to try sleeping for if a rate limit is encountered"""
|
||||
|
||||
mosaicml_api_token: Optional[str] = None
|
||||
|
||||
class Config:
|
||||
"""Configuration for this pydantic object."""
|
||||
|
||||
extra = Extra.forbid
|
||||
|
||||
@root_validator()
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that api key and python package exists in environment."""
|
||||
mosaicml_api_token = get_from_dict_or_env(
|
||||
values, "mosaicml_api_token", "MOSAICML_API_TOKEN"
|
||||
)
|
||||
values["mosaicml_api_token"] = mosaicml_api_token
|
||||
return values
|
||||
|
||||
@property
|
||||
def _identifying_params(self) -> Mapping[str, Any]:
|
||||
"""Get the identifying parameters."""
|
||||
return {"endpoint_url": self.endpoint_url}
|
||||
|
||||
def _embed(
|
||||
self, input: List[Tuple[str, str]], is_retry: bool = False
|
||||
) -> List[List[float]]:
|
||||
payload = {"input_strings": input}
|
||||
|
||||
# HTTP headers for authorization
|
||||
headers = {
|
||||
"Authorization": f"{self.mosaicml_api_token}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
# send request
|
||||
try:
|
||||
response = requests.post(self.endpoint_url, headers=headers, json=payload)
|
||||
except requests.exceptions.RequestException as e:
|
||||
raise ValueError(f"Error raised by inference endpoint: {e}")
|
||||
|
||||
try:
|
||||
parsed_response = response.json()
|
||||
|
||||
if "error" in parsed_response:
|
||||
# if we get rate limited, try sleeping for 1 second
|
||||
if (
|
||||
not is_retry
|
||||
and "rate limit exceeded" in parsed_response["error"].lower()
|
||||
):
|
||||
import time
|
||||
|
||||
time.sleep(self.retry_sleep)
|
||||
|
||||
return self._embed(input, is_retry=True)
|
||||
|
||||
raise ValueError(
|
||||
f"Error raised by inference API: {parsed_response['error']}"
|
||||
)
|
||||
|
||||
if "data" not in parsed_response:
|
||||
raise ValueError(
|
||||
f"Error raised by inference API, no key data: {parsed_response}"
|
||||
)
|
||||
embeddings = parsed_response["data"]
|
||||
except requests.exceptions.JSONDecodeError as e:
|
||||
raise ValueError(
|
||||
f"Error raised by inference API: {e}.\nResponse: {response.text}"
|
||||
)
|
||||
|
||||
return embeddings
|
||||
|
||||
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
||||
"""Embed documents using a MosaicML deployed instructor embedding model.
|
||||
|
||||
Args:
|
||||
texts: The list of texts to embed.
|
||||
|
||||
Returns:
|
||||
List of embeddings, one for each text.
|
||||
"""
|
||||
instruction_pairs = [(self.embed_instruction, text) for text in texts]
|
||||
embeddings = self._embed(instruction_pairs)
|
||||
return embeddings
|
||||
|
||||
def embed_query(self, text: str) -> List[float]:
|
||||
"""Embed a query using a MosaicML deployed instructor embedding model.
|
||||
|
||||
Args:
|
||||
text: The text to embed.
|
||||
|
||||
Returns:
|
||||
Embeddings for the text.
|
||||
"""
|
||||
instruction_pair = (self.query_instruction, text)
|
||||
embedding = self._embed([instruction_pair])[0]
|
||||
return embedding
|
||||
@@ -70,7 +70,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 2,
|
||||
"id": "904db9a5-f387-4a57-914c-c8af8d39e249",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -92,7 +92,7 @@
|
||||
"LangChainPlusClient (API URL: http://localhost:8000)"
|
||||
]
|
||||
},
|
||||
"execution_count": 1,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -104,7 +104,7 @@
|
||||
"import os\n",
|
||||
"os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"os.environ[\"LANGCHAIN_SESSION\"] = \"Tracing Walkthrough\"\n",
|
||||
"# os.environ[\"LANGCHAIN_ENDPOINT\"] = \"https://api.langchain.plus\" # Uncomment this line if you want to use the hosted version\n",
|
||||
"# os.environ[\"LANGCHAIN_ENDPOINT\"] = \"https://langchainpro-api-gateway-12bfv6cf.uc.gateway.dev\" # Uncomment this line if you want to use the hosted version\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = \"<YOUR-LANGCHAINPLUS-API-KEY>\" # Uncomment this line if you want to use the hosted version.\n",
|
||||
"\n",
|
||||
"client = LangChainPlusClient()\n",
|
||||
@@ -114,7 +114,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 3,
|
||||
"id": "7c801853-8e96-404d-984c-51ace59cbbef",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -132,7 +132,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 4,
|
||||
"id": "19537902-b95c-4390-80a4-f6c9a937081e",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -142,60 +142,72 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"39,566,248\n",
|
||||
"Anwar Hadid is Dua Lipa's boyfriend and his age raised to the 0.43 power is approximately 3.87.\n",
|
||||
"unknown format from LLM: Sorry, I cannot answer this question as it requires information from the future.\n",
|
||||
"LLMMathChain._evaluate(\"\n",
|
||||
"(age ** 0.43)\n",
|
||||
"\") raised error: 'age'. Please try again with a valid numerical expression\n",
|
||||
"The distance between Paris and Boston is 3448 miles.\n",
|
||||
"The total number of points scored in the 2023 super bowl raised to the .23 power is approximately 3.457460415669602.\n",
|
||||
"LLMMathChain._evaluate(\"\n",
|
||||
"(total number of points scored in the 2023 super bowl)**0.23\n",
|
||||
"\") raised error: invalid syntax. Perhaps you forgot a comma? (<expr>, line 1). Please try again with a valid numerical expression\n"
|
||||
"\") raised error: invalid syntax. Perhaps you forgot a comma? (<expr>, line 1). Please try again with a valid numerical expression\n",
|
||||
"LLMMathChain._evaluate(\"\n",
|
||||
"round(0.2791714614499425, 2)\n",
|
||||
"\") raised error: 'VariableNode' object is not callable. Please try again with a valid numerical expression\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID 63c89b8bad9b172227d890620cdec651 in your message.).\n",
|
||||
"Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 2.0 seconds as it raised RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID e3dd37877de500d7defe699f8411b3dd in your message.).\n"
|
||||
"Retrying langchain.chat_models.openai.acompletion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised APIError: Bad gateway. {\"error\":{\"code\":502,\"message\":\"Bad gateway.\",\"param\":null,\"type\":\"cf_bad_gateway\"}} 502 {'error': {'code': 502, 'message': 'Bad gateway.', 'param': None, 'type': 'cf_bad_gateway'}} <CIMultiDictProxy('Date': 'Wed, 24 May 2023 02:33:01 GMT', 'Content-Type': 'application/json', 'Content-Length': '84', 'Connection': 'keep-alive', 'X-Frame-Options': 'SAMEORIGIN', 'Referrer-Policy': 'same-origin', 'Cache-Control': 'private, max-age=0, no-store, no-cache, must-revalidate, post-check=0, pre-check=0', 'Expires': 'Thu, 01 Jan 1970 00:00:01 GMT', 'Server': 'cloudflare', 'CF-RAY': '7cc219f30c1d421c-EWR', 'alt-svc': 'h3=\":443\"; ma=86400, h3-29=\":443\"; ma=86400')>.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0\n",
|
||||
"1.9347796717823205\n",
|
||||
"1.2600907451828602 (inches)\n",
|
||||
"LLMMathChain._evaluate(\"\n",
|
||||
"round(0.2791714614499425, 2)\n",
|
||||
"\") raised error: 'VariableNode' object is not callable. Please try again with a valid numerical expression\n"
|
||||
]
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['39,566,248',\n",
|
||||
" \"Anwar Hadid's age raised to the 0.43 power is approximately 3.87.\",\n",
|
||||
" ValueError('LLMMathChain._evaluate(\"\\n(age ** 0.43)\\n\") raised error: \\'age\\'. Please try again with a valid numerical expression'),\n",
|
||||
" 'The distance between Paris and Boston is 3448 miles.',\n",
|
||||
" ValueError('unknown format from LLM: Sorry, I cannot answer this question as it requires information from the future.'),\n",
|
||||
" ValueError('LLMMathChain._evaluate(\"\\n(total number of points scored in the 2023 super bowl)**0.23\\n\") raised error: invalid syntax. Perhaps you forgot a comma? (<expr>, line 1). Please try again with a valid numerical expression'),\n",
|
||||
" '3 points were scored more in the 2023 Super Bowl than in the 2022 Super Bowl.',\n",
|
||||
" '1.9347796717823205',\n",
|
||||
" '1.2600077141429156',\n",
|
||||
" ValueError('LLMMathChain._evaluate(\"\\nround(0.2791714614499425, 2)\\n\") raised error: \\'VariableNode\\' object is not callable. Please try again with a valid numerical expression')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"inputs = [\n",
|
||||
"'How many people live in canada as of 2023?',\n",
|
||||
" \"who is dua lipa's boyfriend? what is his age raised to the .43 power?\",\n",
|
||||
" \"what is dua lipa's boyfriend age raised to the .43 power?\",\n",
|
||||
" 'how far is it from paris to boston in miles',\n",
|
||||
" 'what was the total number of points scored in the 2023 super bowl? what is that number raised to the .23 power?',\n",
|
||||
" 'what was the total number of points scored in the 2023 super bowl raised to the .23 power?',\n",
|
||||
" 'how many more points were scored in the 2023 super bowl than in the 2022 super bowl?',\n",
|
||||
" 'what is 153 raised to .1312 power?',\n",
|
||||
" \"who is kendall jenner's boyfriend? what is his height (in inches) raised to .13 power?\",\n",
|
||||
" 'what is 1213 divided by 4345?'\n",
|
||||
"]\n",
|
||||
"import asyncio\n",
|
||||
"\n",
|
||||
"for input_example in inputs:\n",
|
||||
"inputs = [\n",
|
||||
" \"How many people live in canada as of 2023?\",\n",
|
||||
" \"who is dua lipa's boyfriend? what is his age raised to the .43 power?\",\n",
|
||||
" \"what is dua lipa's boyfriend age raised to the .43 power?\",\n",
|
||||
" \"how far is it from paris to boston in miles\",\n",
|
||||
" \"what was the total number of points scored in the 2023 super bowl? what is that number raised to the .23 power?\",\n",
|
||||
" \"what was the total number of points scored in the 2023 super bowl raised to the .23 power?\",\n",
|
||||
" \"how many more points were scored in the 2023 super bowl than in the 2022 super bowl?\",\n",
|
||||
" \"what is 153 raised to .1312 power?\",\n",
|
||||
" \"who is kendall jenner's boyfriend? what is his height (in inches) raised to .13 power?\",\n",
|
||||
" \"what is 1213 divided by 4345?\",\n",
|
||||
"]\n",
|
||||
"results = []\n",
|
||||
"\n",
|
||||
"async def arun(agent, input_example):\n",
|
||||
" try:\n",
|
||||
" print(agent.run(input_example))\n",
|
||||
" return await agent.arun(input_example)\n",
|
||||
" except Exception as e:\n",
|
||||
" # The agent sometimes makes mistakes! These will be captured by the tracing.\n",
|
||||
" print(e)\n",
|
||||
" "
|
||||
" return e\n",
|
||||
"for input_example in inputs:\n",
|
||||
" results.append(arun(agent, input_example))\n",
|
||||
"await asyncio.gather(*results) "
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -210,7 +222,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 5,
|
||||
"id": "d14a9881-2a01-404c-8c56-0b78565c3ff4",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -220,20 +232,6 @@
|
||||
"dataset_name = \"calculator-example-dataset\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "c0e12629-bca5-4438-8665-890d0cb9cc4a",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"runs = client.list_runs(\n",
|
||||
" session_name=os.environ[\"LANGCHAIN_SESSION\"],\n",
|
||||
" run_type=\"chain\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
@@ -243,16 +241,16 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"if dataset_name not in set([dataset.name for dataset in client.list_datasets()]):\n",
|
||||
" dataset = client.create_dataset(dataset_name, description=\"A calculator example dataset\")\n",
|
||||
" # List all \"Chain\" runs in the current session \n",
|
||||
" runs = client.list_runs(\n",
|
||||
" session_name=os.environ[\"LANGCHAIN_SESSION\"],\n",
|
||||
" run_type=\"chain\")\n",
|
||||
" for run in runs:\n",
|
||||
" if run.name == \"AgentExecutor\":\n",
|
||||
" # We will only use examples from the top level AgentExecutor run here.\n",
|
||||
" client.create_example(inputs=run.inputs, outputs=run.outputs, dataset_id=dataset.id)"
|
||||
"if dataset_name in set([dataset.name for dataset in client.list_datasets()]):\n",
|
||||
" client.delete_dataset(dataset_name=dataset_name)\n",
|
||||
"dataset = client.create_dataset(dataset_name, description=\"A calculator example dataset\")\n",
|
||||
"runs = client.list_runs(\n",
|
||||
" session_name=os.environ[\"LANGCHAIN_SESSION\"],\n",
|
||||
" execution_order=1, # Only return the top-level runs\n",
|
||||
" error=False, # Only runs that succeed\n",
|
||||
")\n",
|
||||
"for run in runs:\n",
|
||||
" client.create_example(inputs=run.inputs, outputs=run.outputs, dataset_id=dataset.id)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -418,7 +416,7 @@
|
||||
"\n",
|
||||
"Returns:\n",
|
||||
" A dictionary mapping example ids to the model outputs.\n",
|
||||
"\u001b[0;31mFile:\u001b[0m ~/Code/langchain/langchain/client/langchain.py\n",
|
||||
"\u001b[0;31mFile:\u001b[0m ~/code/lc/lckg/langchain/client/langchain.py\n",
|
||||
"\u001b[0;31mType:\u001b[0m method"
|
||||
]
|
||||
},
|
||||
@@ -442,7 +440,14 @@
|
||||
"# Since chains can be stateful (e.g. they can have memory), we need provide\n",
|
||||
"# a way to initialize a new chain for each row in the dataset. This is done\n",
|
||||
"# by passing in a factory function that returns a new chain for each row.\n",
|
||||
"chain_factory = lambda: initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=False)\n",
|
||||
"chain_factory = lambda: initialize_agent(\n",
|
||||
" tools,\n",
|
||||
" llm,\n",
|
||||
" agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,\n",
|
||||
" verbose=False,\n",
|
||||
" # We will use the intermediate steps later for evaluation\n",
|
||||
" return_intermediate_steps=True,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# If your chain is NOT stateful, your lambda can return the object directly\n",
|
||||
"# to improve runtime performance. For example:\n",
|
||||
@@ -461,48 +466,18 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Processed examples: 1\r"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Chain failed for example 604fbd32-7cbe-4dd4-9ddd-fd5ab5c01566. Error: LLMMathChain._evaluate(\"\n",
|
||||
"(age ** 0.43)\n",
|
||||
"\") raised error: 'age'. Please try again with a valid numerical expression\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Processed examples: 4\r"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Chain failed for example 4c82b6a4-d8ce-4129-8229-7f4e2f76294c. Error: LLMMathChain._evaluate(\"\n",
|
||||
"(total number of points scored in the 2023 super bowl)**0.23\n",
|
||||
"\") raised error: invalid syntax. Perhaps you forgot a comma? (<expr>, line 1). Please try again with a valid numerical expression\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Processed examples: 10\r"
|
||||
"Processed examples: 6\r"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"evaluation_session_name = \"Search + Calculator Agent Evaluation\"\n",
|
||||
"chain_results = await client.arun_on_dataset(\n",
|
||||
" dataset_name=dataset_name,\n",
|
||||
" llm_or_chain_factory=chain_factory,\n",
|
||||
" concurrency_level=5, # Optional, sets the number of examples to run at a time\n",
|
||||
" verbose=True\n",
|
||||
" verbose=True,\n",
|
||||
" session_name=evaluation_session_name # Optional, a unique session name will be generated if not provided\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Sometimes, the agent will error due to parsing issues, incompatible tool inputs, etc.\n",
|
||||
@@ -511,18 +486,20 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d2737458-b20c-4288-8790-1f4a8d237b2a",
|
||||
"metadata": {},
|
||||
"id": "cdacd159-eb4d-49e9-bb2a-c55322c40ed4",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"## Reviewing the Chain Results\n",
|
||||
"### Reviewing the Chain Results\n",
|
||||
"\n",
|
||||
"You can review the results of the run in the tracing UI below and navigating to the session \n",
|
||||
"with the title 'calculator-example-dataset-AgentExecutor-YYYY-MM-DD-HH-MM-SS'"
|
||||
"with the title **\"Search + Calculator Agent Evaluation\"**"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": 14,
|
||||
"id": "136db492-d6ca-4215-96f9-439c23538241",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -537,7 +514,7 @@
|
||||
"LangChainPlusClient (API URL: http://localhost:8000)"
|
||||
]
|
||||
},
|
||||
"execution_count": 15,
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -547,12 +524,271 @@
|
||||
"client"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "63ed6561-6574-43b3-a653-fe410aa8a617",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Running an Evaluation Chain\n",
|
||||
"\n",
|
||||
"Manually comparing the results of chains in the UI is effective, but it can be time consuming.\n",
|
||||
"It's easier to leverage AI-assisted feedback to evaluate your agent's performance.\n",
|
||||
"\n",
|
||||
"A few ways of doing this include:\n",
|
||||
"- Adding ground-truth answers as outputs to the dataset and evaluating relative to those references.\n",
|
||||
"- Evaluating the overall agent trajectory based on the tool usage and intermediate steps.\n",
|
||||
"- Evaluating performance based on 'context' such as retrieved documents or tool results.\n",
|
||||
"- Evaluating 'aspects' of the agent's response in a reference-free manner using targeted agent prompts.\n",
|
||||
" \n",
|
||||
"We will demonstrate the first two here. First, we will supply labels to the dataset.\n",
|
||||
"\n",
|
||||
"**Note: the feedback API is currently experimental and subject to change.**"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "98212e82-712b-424c-9565-9244162d5400",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Found cached dataset json (/Users/wfh/.cache/huggingface/datasets/LangChainDatasets___json/LangChainDatasets--agent-search-calculator-8a025c0ce5fb99d2/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "8161e12bd6734de7bae23426b2169450",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
" 0%| | 0/1 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"from langchain.evaluation.loading import load_dataset\n",
|
||||
"\n",
|
||||
"dataset = load_dataset(\"agent-search-calculator\")\n",
|
||||
"df = pd.DataFrame(dataset, columns=[\"question\", \"answer\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "64724ccd-e27e-4c30-94f0-a612e3bb85d2",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"examples = client.list_examples(dataset_name=dataset_name)\n",
|
||||
"examples_dict = {\n",
|
||||
" example.inputs['input']: example.id for example in examples\n",
|
||||
"}\n",
|
||||
"for tup in df.itertuples():\n",
|
||||
" if tup.question not in examples_dict:\n",
|
||||
" continue\n",
|
||||
" example_id = examples_dict[tup.question]\n",
|
||||
" client.update_example(outputs={\"answer\": tup.answer}, example_id=example_id)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "812bb4e2-e2c1-4381-ad0a-08c5f3079c04",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.evaluation.qa import QAEvalChain\n",
|
||||
"\n",
|
||||
"eval_llm = ChatOpenAI(model=\"gpt-4\")\n",
|
||||
"chain = QAEvalChain.from_llm(eval_llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "35db4025-9183-4e5f-ba14-0b1b380f49c7",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Fetch the session that was auto-generated\n",
|
||||
"latest_session = client.read_session(session_name=evaluation_session_name)\n",
|
||||
"runs = list(client.list_runs(session_id=latest_session.id, execution_order=1, error=False))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "5e628c65-420a-40ce-9aee-ff93d60fe795",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Prepare inputs\n",
|
||||
"answers = [client.read_example(example_id=run.reference_example_id).outputs[\"answer\"] for run in runs]\n",
|
||||
"inputs = [{**run.inputs, \"answer\": answer} for run, answer in zip(runs, answers)]\n",
|
||||
"outputs = [{\"output\": run.outputs[\"output\"]} for run in runs]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"id": "eb9900a8-0c32-45ec-a09a-0832ead7423c",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"results = chain.evaluate(\n",
|
||||
" examples = inputs,\n",
|
||||
" predictions=outputs,\n",
|
||||
" question_key=\"input\",\n",
|
||||
" answer_key=\"answer\",\n",
|
||||
" prediction_key=\"output\",\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"for run, result in zip(runs, results):\n",
|
||||
" client.create_feedback(run.id, \"grade\", result[\"text\"], source_info={\"evaluator\": \"QAEvalChain\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "781cd03c-8769-4640-a340-02a35c658ac7",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"## Evaluating Agent Trajectories\n",
|
||||
"\n",
|
||||
"Often ground-truth reference labels aren't available to grade your\n",
|
||||
"model's performance. In this case, there are a variety of \"reference-free\" evaluation\n",
|
||||
"techniques that can be used.\n",
|
||||
"\n",
|
||||
"One of these is the `TrajectoryEvalChain`, which scores the agent based on logical use\n",
|
||||
"of its tools and how helpful the ultimate answer is.\n",
|
||||
"\n",
|
||||
"This is just one example of how to you can evaluate your agent's behavior, and we encourage\n",
|
||||
"you to develop your own chains to grade your agent along other dimensions."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"id": "84c52ee6-8f6f-4f26-bb82-c343bd0c91f1",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.evaluation.agents import TrajectoryEvalChain\n",
|
||||
"\n",
|
||||
"eval_llm = ChatOpenAI(temperature=0, model_name=\"gpt-4\")\n",
|
||||
"\n",
|
||||
"eval_chain = TrajectoryEvalChain.from_llm(\n",
|
||||
" llm=eval_llm,\n",
|
||||
" agent_tools=agent.tools,\n",
|
||||
" return_reasoning=True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"id": "5c03142c-d1dc-4ba2-9f3c-99ebeddf9c97",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Fetch the session used for the latest runs\n",
|
||||
"latest_session = client.read_session(session_name=evaluation_session_name)\n",
|
||||
"runs = client.list_runs(session_id=latest_session.id, execution_order=1, error=False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"id": "c9f9f81f-c7dd-42f7-a169-c70be00e986e",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "17c27a95063d4192a1716d5d14d00c84",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"0it [00:00, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.schema import AgentAction\n",
|
||||
"from tqdm.notebook import tqdm\n",
|
||||
"\n",
|
||||
"def get_intermediate_steps(run):\n",
|
||||
" \"\"\"Convert the serialized steps into an object.\"\"\"\n",
|
||||
" results = []\n",
|
||||
" for action, observation in run.outputs['intermediate_steps']:\n",
|
||||
" results.append([AgentAction(*action), observation])\n",
|
||||
" return results\n",
|
||||
"\n",
|
||||
"for run in tqdm(runs):\n",
|
||||
" evaluation = eval_chain(\n",
|
||||
" inputs={\n",
|
||||
" \"question\": run.inputs['input'],\n",
|
||||
" \"answer\": run.outputs['output'],\n",
|
||||
" \"agent_trajectory\": eval_chain.get_agent_trajectory(get_intermediate_steps(run))\n",
|
||||
" },\n",
|
||||
" )\n",
|
||||
" client.create_feedback(\n",
|
||||
" run.id, \n",
|
||||
" \"trajectory_evaluation\",\n",
|
||||
" evaluation[\"score\"], \n",
|
||||
" source_info={\n",
|
||||
" \"evaluation_model\": \"gpt-4\",\n",
|
||||
" \"evaluation_chain\": \"TrajectoryEvalChain\"\n",
|
||||
" }\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5ad7a975-4223-4f09-b27f-589cbdb71f99",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Now the evaluation feedback is saved for future analysis.**"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c70cceb5-aa53-4851-bb12-386f092191f9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Running a Chat Model over a Traced Dataset\n",
|
||||
"## Running a Chat Model over a Traced Dataset\n",
|
||||
"\n",
|
||||
"We've shown how to run a _chain_ over a dataset, but you can also run an LLM or Chat model over a datasets formed from runs. \n",
|
||||
"\n",
|
||||
@@ -567,7 +803,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"execution_count": 25,
|
||||
"id": "64490d7c-9a18-49ed-a3ac-36049c522cb4",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -583,7 +819,7 @@
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "44f3c72015944e2ea4c39516350ea15c",
|
||||
"model_id": "0ef824a19bcf4e0d8491898ee2eacc4c",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
@@ -650,8 +886,8 @@
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" generations \\\n",
|
||||
"0 [[{'generation_info': None, 'message': {'conte... \n",
|
||||
" generations \n",
|
||||
"0 [[{'generation_info': None, 'message': {'conte... \\\n",
|
||||
"1 [[{'generation_info': None, 'message': {'conte... \n",
|
||||
"2 [[{'generation_info': None, 'message': {'conte... \n",
|
||||
"3 [[{'generation_info': None, 'message': {'conte... \n",
|
||||
@@ -665,7 +901,7 @@
|
||||
"4 [{'data': {'content': 'Here is the topic for a... "
|
||||
]
|
||||
},
|
||||
"execution_count": 16,
|
||||
"execution_count": 25,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -681,7 +917,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"execution_count": 26,
|
||||
"id": "348acd86-a927-4d60-8d52-02e64585e4fc",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -704,14 +940,26 @@
|
||||
"id": "927a43b8-e4f9-4220-b75d-33e310bc318b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Reviewing behavior with temperature\n",
|
||||
"### Reviewing behavior with temperature\n",
|
||||
"\n",
|
||||
"Here, we will set `num_repetitions > 1` and set the temperature to 0.3 to see the variety of response types for a each example.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"execution_count": 29,
|
||||
"id": "3cd3af25-474e-4de7-8ba8-d63b01c4f6c6",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# !pip install anthropic > /dev/null"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"id": "a69dd183-ad5e-473d-b631-db90706e837f",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -725,7 +973,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"execution_count": 31,
|
||||
"id": "063da2a9-3692-4b7b-8edb-e474824fe416",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -760,14 +1008,14 @@
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"## Reviewing the Chat Model Results\n",
|
||||
"### Reviewing the Chat Model Results\n",
|
||||
"\n",
|
||||
"You can review the latest runs by clicking on the link below and navigating to the \"two-player-dnd\" session."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"execution_count": 32,
|
||||
"id": "5b7a81f2-d19d-438b-a4bb-5678f746b965",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -782,7 +1030,7 @@
|
||||
"LangChainPlusClient (API URL: http://localhost:8000)"
|
||||
]
|
||||
},
|
||||
"execution_count": 20,
|
||||
"execution_count": 32,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -803,7 +1051,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"execution_count": 33,
|
||||
"id": "d6805d0b-4612-4671-bffb-e6978992bd40",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -817,7 +1065,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"execution_count": 34,
|
||||
"id": "5d7cb243-40c3-44dd-8158-a7b910441e9f",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -833,7 +1081,7 @@
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "5ce2168f975241fbae82a76b4d70e4c4",
|
||||
"model_id": "4d5fb1d4dc6748b893227e637ddcd8bc",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
@@ -906,15 +1154,15 @@
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" generations \\\n",
|
||||
"0 [[{'generation_info': {'finish_reason': 'stop'... \n",
|
||||
" generations \n",
|
||||
"0 [[{'generation_info': {'finish_reason': 'stop'... \\\n",
|
||||
"1 [[]] \n",
|
||||
"2 [[{'generation_info': {'finish_reason': 'stop'... \n",
|
||||
"3 [[]] \n",
|
||||
"4 [[{'generation_info': {'finish_reason': 'stop'... \n",
|
||||
"\n",
|
||||
" ground_truth \\\n",
|
||||
"0 The pandemic has been punishing. \\n\\nAnd so ma... \n",
|
||||
" ground_truth \n",
|
||||
"0 The pandemic has been punishing. \\n\\nAnd so ma... \\\n",
|
||||
"1 With a duty to one another to the American peo... \n",
|
||||
"2 He thought he could roll into Ukraine and the ... \n",
|
||||
"3 And the costs and the threats to America and t... \n",
|
||||
@@ -928,7 +1176,7 @@
|
||||
"4 Groups of citizens blocking tanks with their b... "
|
||||
]
|
||||
},
|
||||
"execution_count": 22,
|
||||
"execution_count": 34,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -941,7 +1189,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"execution_count": 35,
|
||||
"id": "c7dcc1b2-7aef-44c0-ba0f-c812279099a5",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -961,7 +1209,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"execution_count": 36,
|
||||
"id": "e946138e-bf7c-43d7-861d-9c5740c933fa",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -997,7 +1245,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"execution_count": 37,
|
||||
"id": "2bf96f17-74c1-4f7d-8458-ae5ab5c6bd36",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -1012,7 +1260,7 @@
|
||||
"LangChainPlusClient (API URL: http://localhost:8000)"
|
||||
]
|
||||
},
|
||||
"execution_count": 25,
|
||||
"execution_count": 37,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -1046,7 +1294,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
"version": "3.11.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -25,7 +25,7 @@ SYSTEM_PROMPT = (
|
||||
|
||||
class PlanningOutputParser(PlanOutputParser):
|
||||
def parse(self, text: str) -> Plan:
|
||||
steps = [Step(value=v) for v in re.split("\n\d+\. ", text)[1:]]
|
||||
steps = [Step(value=v) for v in re.split("\n\s*\d+\. ", text)[1:]]
|
||||
return Plan(steps=steps)
|
||||
|
||||
|
||||
|
||||
@@ -22,6 +22,7 @@ from langchain.llms.huggingface_text_gen_inference import HuggingFaceTextGenInfe
|
||||
from langchain.llms.human import HumanInputLLM
|
||||
from langchain.llms.llamacpp import LlamaCpp
|
||||
from langchain.llms.modal import Modal
|
||||
from langchain.llms.mosaicml import MosaicML
|
||||
from langchain.llms.nlpcloud import NLPCloud
|
||||
from langchain.llms.openai import AzureOpenAI, OpenAI, OpenAIChat
|
||||
from langchain.llms.openlm import OpenLM
|
||||
@@ -51,6 +52,7 @@ __all__ = [
|
||||
"GPT4All",
|
||||
"LlamaCpp",
|
||||
"Modal",
|
||||
"MosaicML",
|
||||
"NLPCloud",
|
||||
"OpenAI",
|
||||
"OpenAIChat",
|
||||
@@ -94,6 +96,7 @@ type_to_cls_dict: Dict[str, Type[BaseLLM]] = {
|
||||
"huggingface_endpoint": HuggingFaceEndpoint,
|
||||
"llamacpp": LlamaCpp,
|
||||
"modal": Modal,
|
||||
"mosaic": MosaicML,
|
||||
"sagemaker_endpoint": SagemakerEndpoint,
|
||||
"nlpcloud": NLPCloud,
|
||||
"human-input": HumanInputLLM,
|
||||
|
||||
@@ -299,6 +299,13 @@ class BaseLLM(BaseLanguageModel, ABC):
|
||||
.text
|
||||
)
|
||||
|
||||
async def _call_async(
|
||||
self, prompt: str, stop: Optional[List[str]] = None, callbacks: Callbacks = None
|
||||
) -> str:
|
||||
"""Check Cache and run the LLM on the given prompt and input."""
|
||||
result = await self.agenerate([prompt], stop=stop, callbacks=callbacks)
|
||||
return result.generations[0][0].text
|
||||
|
||||
def predict(self, text: str, *, stop: Optional[Sequence[str]] = None) -> str:
|
||||
if stop is None:
|
||||
_stop = None
|
||||
@@ -317,6 +324,24 @@ class BaseLLM(BaseLanguageModel, ABC):
|
||||
content = self(text, stop=_stop)
|
||||
return AIMessage(content=content)
|
||||
|
||||
async def apredict(self, text: str, *, stop: Optional[Sequence[str]] = None) -> str:
|
||||
if stop is None:
|
||||
_stop = None
|
||||
else:
|
||||
_stop = list(stop)
|
||||
return await self._call_async(text, stop=_stop)
|
||||
|
||||
async def apredict_messages(
|
||||
self, messages: List[BaseMessage], *, stop: Optional[Sequence[str]] = None
|
||||
) -> BaseMessage:
|
||||
text = get_buffer_string(messages)
|
||||
if stop is None:
|
||||
_stop = None
|
||||
else:
|
||||
_stop = list(stop)
|
||||
content = await self._call_async(text, stop=_stop)
|
||||
return AIMessage(content=content)
|
||||
|
||||
@property
|
||||
def _identifying_params(self) -> Mapping[str, Any]:
|
||||
"""Get the identifying parameters."""
|
||||
|
||||
173
langchain/llms/mosaicml.py
Normal file
173
langchain/llms/mosaicml.py
Normal file
@@ -0,0 +1,173 @@
|
||||
"""Wrapper around MosaicML APIs."""
|
||||
from typing import Any, Dict, List, Mapping, Optional
|
||||
|
||||
import requests
|
||||
from pydantic import Extra, root_validator
|
||||
|
||||
from langchain.callbacks.manager import CallbackManagerForLLMRun
|
||||
from langchain.llms.base import LLM
|
||||
from langchain.llms.utils import enforce_stop_tokens
|
||||
from langchain.utils import get_from_dict_or_env
|
||||
|
||||
INSTRUCTION_KEY = "### Instruction:"
|
||||
RESPONSE_KEY = "### Response:"
|
||||
INTRO_BLURB = (
|
||||
"Below is an instruction that describes a task. "
|
||||
"Write a response that appropriately completes the request."
|
||||
)
|
||||
PROMPT_FOR_GENERATION_FORMAT = """{intro}
|
||||
{instruction_key}
|
||||
{instruction}
|
||||
{response_key}
|
||||
""".format(
|
||||
intro=INTRO_BLURB,
|
||||
instruction_key=INSTRUCTION_KEY,
|
||||
instruction="{instruction}",
|
||||
response_key=RESPONSE_KEY,
|
||||
)
|
||||
|
||||
|
||||
class MosaicML(LLM):
|
||||
"""Wrapper around MosaicML's LLM inference service.
|
||||
|
||||
To use, you should have the
|
||||
environment variable ``MOSAICML_API_TOKEN`` set with your API token, or pass
|
||||
it as a named parameter to the constructor.
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain.llms import MosaicML
|
||||
endpoint_url = (
|
||||
"https://models.hosted-on.mosaicml.hosting/mpt-7b-instruct/v1/predict"
|
||||
)
|
||||
mosaic_llm = MosaicML(
|
||||
endpoint_url=endpoint_url,
|
||||
mosaicml_api_token="my-api-key"
|
||||
)
|
||||
"""
|
||||
|
||||
endpoint_url: str = (
|
||||
"https://models.hosted-on.mosaicml.hosting/mpt-7b-instruct/v1/predict"
|
||||
)
|
||||
"""Endpoint URL to use."""
|
||||
inject_instruction_format: bool = False
|
||||
"""Whether to inject the instruction format into the prompt."""
|
||||
model_kwargs: Optional[dict] = None
|
||||
"""Key word arguments to pass to the model."""
|
||||
retry_sleep: float = 1.0
|
||||
"""How long to try sleeping for if a rate limit is encountered"""
|
||||
|
||||
mosaicml_api_token: Optional[str] = None
|
||||
|
||||
class Config:
|
||||
"""Configuration for this pydantic object."""
|
||||
|
||||
extra = Extra.forbid
|
||||
|
||||
@root_validator()
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that api key and python package exists in environment."""
|
||||
mosaicml_api_token = get_from_dict_or_env(
|
||||
values, "mosaicml_api_token", "MOSAICML_API_TOKEN"
|
||||
)
|
||||
values["mosaicml_api_token"] = mosaicml_api_token
|
||||
return values
|
||||
|
||||
@property
|
||||
def _identifying_params(self) -> Mapping[str, Any]:
|
||||
"""Get the identifying parameters."""
|
||||
_model_kwargs = self.model_kwargs or {}
|
||||
return {
|
||||
**{"endpoint_url": self.endpoint_url},
|
||||
**{"model_kwargs": _model_kwargs},
|
||||
}
|
||||
|
||||
@property
|
||||
def _llm_type(self) -> str:
|
||||
"""Return type of llm."""
|
||||
return "mosaicml"
|
||||
|
||||
def _transform_prompt(self, prompt: str) -> str:
|
||||
"""Transform prompt."""
|
||||
if self.inject_instruction_format:
|
||||
prompt = PROMPT_FOR_GENERATION_FORMAT.format(
|
||||
instruction=prompt,
|
||||
)
|
||||
return prompt
|
||||
|
||||
def _call(
|
||||
self,
|
||||
prompt: str,
|
||||
stop: Optional[List[str]] = None,
|
||||
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
||||
is_retry: bool = False,
|
||||
) -> str:
|
||||
"""Call out to a MosaicML LLM inference endpoint.
|
||||
|
||||
Args:
|
||||
prompt: The prompt to pass into the model.
|
||||
stop: Optional list of stop words to use when generating.
|
||||
|
||||
Returns:
|
||||
The string generated by the model.
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
response = mosaic_llm("Tell me a joke.")
|
||||
"""
|
||||
_model_kwargs = self.model_kwargs or {}
|
||||
|
||||
prompt = self._transform_prompt(prompt)
|
||||
|
||||
payload = {"input_strings": [prompt]}
|
||||
payload.update(_model_kwargs)
|
||||
|
||||
# HTTP headers for authorization
|
||||
headers = {
|
||||
"Authorization": f"{self.mosaicml_api_token}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
# send request
|
||||
try:
|
||||
response = requests.post(self.endpoint_url, headers=headers, json=payload)
|
||||
except requests.exceptions.RequestException as e:
|
||||
raise ValueError(f"Error raised by inference endpoint: {e}")
|
||||
|
||||
try:
|
||||
parsed_response = response.json()
|
||||
|
||||
if "error" in parsed_response:
|
||||
# if we get rate limited, try sleeping for 1 second
|
||||
if (
|
||||
not is_retry
|
||||
and "rate limit exceeded" in parsed_response["error"].lower()
|
||||
):
|
||||
import time
|
||||
|
||||
time.sleep(self.retry_sleep)
|
||||
|
||||
return self._call(prompt, stop, run_manager, is_retry=True)
|
||||
|
||||
raise ValueError(
|
||||
f"Error raised by inference API: {parsed_response['error']}"
|
||||
)
|
||||
|
||||
if "data" not in parsed_response:
|
||||
raise ValueError(
|
||||
f"Error raised by inference API, no key data: {parsed_response}"
|
||||
)
|
||||
generated_text = parsed_response["data"]
|
||||
except requests.exceptions.JSONDecodeError as e:
|
||||
raise ValueError(
|
||||
f"Error raised by inference API: {e}.\nResponse: {response.text}"
|
||||
)
|
||||
|
||||
text = generated_text[0][len(prompt) :]
|
||||
|
||||
# TODO: replace when MosaicML supports custom stop tokens natively
|
||||
if stop is not None:
|
||||
text = enforce_stop_tokens(text, stop)
|
||||
return text
|
||||
@@ -512,6 +512,10 @@ class BaseOpenAI(BaseLLM):
|
||||
"code-cushman-001": 2048,
|
||||
}
|
||||
|
||||
# handling finetuned models
|
||||
if "ft-" in modelname:
|
||||
modelname = modelname.split(":")[0]
|
||||
|
||||
context_size = model_token_mapping.get(modelname, None)
|
||||
|
||||
if context_size is None:
|
||||
|
||||
@@ -127,7 +127,9 @@ class TimeWeightedVectorStoreRetriever(BaseRetriever, BaseModel):
|
||||
self, documents: List[Document], **kwargs: Any
|
||||
) -> List[str]:
|
||||
"""Add documents to vectorstore."""
|
||||
current_time = kwargs.get("current_time", datetime.datetime.now())
|
||||
current_time = kwargs.get("current_time")
|
||||
if current_time is None:
|
||||
current_time = datetime.datetime.now()
|
||||
# Avoid mutating input documents
|
||||
dup_docs = [deepcopy(d) for d in documents]
|
||||
for i, doc in enumerate(dup_docs):
|
||||
|
||||
@@ -64,10 +64,12 @@ class TextSplitter(BaseDocumentTransformer, ABC):
|
||||
documents.append(new_doc)
|
||||
return documents
|
||||
|
||||
def split_documents(self, documents: List[Document]) -> List[Document]:
|
||||
def split_documents(self, documents: Iterable[Document]) -> List[Document]:
|
||||
"""Split documents."""
|
||||
texts = [doc.page_content for doc in documents]
|
||||
metadatas = [doc.metadata for doc in documents]
|
||||
texts, metadatas = [], []
|
||||
for doc in documents:
|
||||
texts.append(doc.page_content)
|
||||
metadatas.append(doc.metadata)
|
||||
return self.create_documents(texts, metadatas=metadatas)
|
||||
|
||||
def _join_docs(self, docs: List[str], separator: str) -> Optional[str]:
|
||||
|
||||
@@ -1,5 +1,11 @@
|
||||
"""Core toolkit implementations."""
|
||||
|
||||
from langchain.tools.azure_cognitive_services import (
|
||||
AzureCogsFormRecognizerTool,
|
||||
AzureCogsImageAnalysisTool,
|
||||
AzureCogsSpeech2TextTool,
|
||||
AzureCogsText2SpeechTool,
|
||||
)
|
||||
from langchain.tools.base import BaseTool, StructuredTool, Tool, tool
|
||||
from langchain.tools.bing_search.tool import BingSearchResults, BingSearchRun
|
||||
from langchain.tools.ddg_search.tool import DuckDuckGoSearchResults, DuckDuckGoSearchRun
|
||||
@@ -56,6 +62,10 @@ from langchain.tools.zapier.tool import ZapierNLAListActions, ZapierNLARunAction
|
||||
__all__ = [
|
||||
"AIPluginTool",
|
||||
"APIOperation",
|
||||
"AzureCogsFormRecognizerTool",
|
||||
"AzureCogsImageAnalysisTool",
|
||||
"AzureCogsSpeech2TextTool",
|
||||
"AzureCogsText2SpeechTool",
|
||||
"BaseTool",
|
||||
"BaseTool",
|
||||
"BaseTool",
|
||||
|
||||
21
langchain/tools/azure_cognitive_services/__init__.py
Normal file
21
langchain/tools/azure_cognitive_services/__init__.py
Normal file
@@ -0,0 +1,21 @@
|
||||
"""Azure Cognitive Services Tools."""
|
||||
|
||||
from langchain.tools.azure_cognitive_services.form_recognizer import (
|
||||
AzureCogsFormRecognizerTool,
|
||||
)
|
||||
from langchain.tools.azure_cognitive_services.image_analysis import (
|
||||
AzureCogsImageAnalysisTool,
|
||||
)
|
||||
from langchain.tools.azure_cognitive_services.speech2text import (
|
||||
AzureCogsSpeech2TextTool,
|
||||
)
|
||||
from langchain.tools.azure_cognitive_services.text2speech import (
|
||||
AzureCogsText2SpeechTool,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"AzureCogsImageAnalysisTool",
|
||||
"AzureCogsFormRecognizerTool",
|
||||
"AzureCogsSpeech2TextTool",
|
||||
"AzureCogsText2SpeechTool",
|
||||
]
|
||||
152
langchain/tools/azure_cognitive_services/form_recognizer.py
Normal file
152
langchain/tools/azure_cognitive_services/form_recognizer.py
Normal file
@@ -0,0 +1,152 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from pydantic import root_validator
|
||||
|
||||
from langchain.callbacks.manager import (
|
||||
AsyncCallbackManagerForToolRun,
|
||||
CallbackManagerForToolRun,
|
||||
)
|
||||
from langchain.tools.azure_cognitive_services.utils import detect_file_src_type
|
||||
from langchain.tools.base import BaseTool
|
||||
from langchain.utils import get_from_dict_or_env
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AzureCogsFormRecognizerTool(BaseTool):
|
||||
"""Tool that queries the Azure Cognitive Services Form Recognizer API.
|
||||
|
||||
In order to set this up, follow instructions at:
|
||||
https://learn.microsoft.com/en-us/azure/applied-ai-services/form-recognizer/quickstarts/get-started-sdks-rest-api?view=form-recog-3.0.0&pivots=programming-language-python
|
||||
"""
|
||||
|
||||
azure_cogs_key: str = "" #: :meta private:
|
||||
azure_cogs_endpoint: str = "" #: :meta private:
|
||||
doc_analysis_client: Any #: :meta private:
|
||||
|
||||
name = "Azure Cognitive Services Form Recognizer"
|
||||
description = (
|
||||
"A wrapper around Azure Cognitive Services Form Recognizer. "
|
||||
"Useful for when you need to "
|
||||
"extract text, tables, and key-value pairs from documents. "
|
||||
"Input should be a url to a document."
|
||||
)
|
||||
|
||||
@root_validator(pre=True)
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that api key and endpoint exists in environment."""
|
||||
azure_cogs_key = get_from_dict_or_env(
|
||||
values, "azure_cogs_key", "AZURE_COGS_KEY"
|
||||
)
|
||||
|
||||
azure_cogs_endpoint = get_from_dict_or_env(
|
||||
values, "azure_cogs_endpoint", "AZURE_COGS_ENDPOINT"
|
||||
)
|
||||
|
||||
try:
|
||||
from azure.ai.formrecognizer import DocumentAnalysisClient
|
||||
from azure.core.credentials import AzureKeyCredential
|
||||
|
||||
values["doc_analysis_client"] = DocumentAnalysisClient(
|
||||
endpoint=azure_cogs_endpoint,
|
||||
credential=AzureKeyCredential(azure_cogs_key),
|
||||
)
|
||||
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"azure-ai-formrecognizer is not installed. "
|
||||
"Run `pip install azure-ai-formrecognizer` to install."
|
||||
)
|
||||
|
||||
return values
|
||||
|
||||
def _parse_tables(self, tables: List[Any]) -> List[Any]:
|
||||
result = []
|
||||
for table in tables:
|
||||
rc, cc = table.row_count, table.column_count
|
||||
_table = [["" for _ in range(cc)] for _ in range(rc)]
|
||||
for cell in table.cells:
|
||||
_table[cell.row_index][cell.column_index] = cell.content
|
||||
result.append(_table)
|
||||
return result
|
||||
|
||||
def _parse_kv_pairs(self, kv_pairs: List[Any]) -> List[Any]:
|
||||
result = []
|
||||
for kv_pair in kv_pairs:
|
||||
key = kv_pair.key.content if kv_pair.key else ""
|
||||
value = kv_pair.value.content if kv_pair.value else ""
|
||||
result.append((key, value))
|
||||
return result
|
||||
|
||||
def _document_analysis(self, document_path: str) -> Dict:
|
||||
document_src_type = detect_file_src_type(document_path)
|
||||
if document_src_type == "local":
|
||||
with open(document_path, "rb") as document:
|
||||
poller = self.doc_analysis_client.begin_analyze_document(
|
||||
"prebuilt-document", document
|
||||
)
|
||||
elif document_src_type == "remote":
|
||||
poller = self.doc_analysis_client.begin_analyze_document_from_url(
|
||||
"prebuilt-document", document_path
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Invalid document path: {document_path}")
|
||||
|
||||
result = poller.result()
|
||||
res_dict = {}
|
||||
|
||||
if result.content is not None:
|
||||
res_dict["content"] = result.content
|
||||
|
||||
if result.tables is not None:
|
||||
res_dict["tables"] = self._parse_tables(result.tables)
|
||||
|
||||
if result.key_value_pairs is not None:
|
||||
res_dict["key_value_pairs"] = self._parse_kv_pairs(result.key_value_pairs)
|
||||
|
||||
return res_dict
|
||||
|
||||
def _format_document_analysis_result(self, document_analysis_result: Dict) -> str:
|
||||
formatted_result = []
|
||||
if "content" in document_analysis_result:
|
||||
formatted_result.append(
|
||||
f"Content: {document_analysis_result['content']}".replace("\n", " ")
|
||||
)
|
||||
|
||||
if "tables" in document_analysis_result:
|
||||
for i, table in enumerate(document_analysis_result["tables"]):
|
||||
formatted_result.append(f"Table {i}: {table}".replace("\n", " "))
|
||||
|
||||
if "key_value_pairs" in document_analysis_result:
|
||||
for kv_pair in document_analysis_result["key_value_pairs"]:
|
||||
formatted_result.append(
|
||||
f"{kv_pair[0]}: {kv_pair[1]}".replace("\n", " ")
|
||||
)
|
||||
|
||||
return "\n".join(formatted_result)
|
||||
|
||||
def _run(
|
||||
self,
|
||||
query: str,
|
||||
run_manager: Optional[CallbackManagerForToolRun] = None,
|
||||
) -> str:
|
||||
"""Use the tool."""
|
||||
try:
|
||||
document_analysis_result = self._document_analysis(query)
|
||||
if not document_analysis_result:
|
||||
return "No good document analysis result was found"
|
||||
|
||||
return self._format_document_analysis_result(document_analysis_result)
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Error while running AzureCogsFormRecognizerTool: {e}")
|
||||
|
||||
async def _arun(
|
||||
self,
|
||||
query: str,
|
||||
run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
|
||||
) -> str:
|
||||
"""Use the tool asynchronously."""
|
||||
raise NotImplementedError("AzureCogsFormRecognizerTool does not support async")
|
||||
156
langchain/tools/azure_cognitive_services/image_analysis.py
Normal file
156
langchain/tools/azure_cognitive_services/image_analysis.py
Normal file
@@ -0,0 +1,156 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from pydantic import root_validator
|
||||
|
||||
from langchain.callbacks.manager import (
|
||||
AsyncCallbackManagerForToolRun,
|
||||
CallbackManagerForToolRun,
|
||||
)
|
||||
from langchain.tools.azure_cognitive_services.utils import detect_file_src_type
|
||||
from langchain.tools.base import BaseTool
|
||||
from langchain.utils import get_from_dict_or_env
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AzureCogsImageAnalysisTool(BaseTool):
|
||||
"""Tool that queries the Azure Cognitive Services Image Analysis API.
|
||||
|
||||
In order to set this up, follow instructions at:
|
||||
https://learn.microsoft.com/en-us/azure/cognitive-services/computer-vision/quickstarts-sdk/image-analysis-client-library-40
|
||||
"""
|
||||
|
||||
azure_cogs_key: str = "" #: :meta private:
|
||||
azure_cogs_endpoint: str = "" #: :meta private:
|
||||
vision_service: Any #: :meta private:
|
||||
analysis_options: Any #: :meta private:
|
||||
|
||||
name = "Azure Cognitive Services Image Analysis"
|
||||
description = (
|
||||
"A wrapper around Azure Cognitive Services Image Analysis. "
|
||||
"Useful for when you need to analyze images. "
|
||||
"Input should be a url to an image."
|
||||
)
|
||||
|
||||
@root_validator(pre=True)
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that api key and endpoint exists in environment."""
|
||||
azure_cogs_key = get_from_dict_or_env(
|
||||
values, "azure_cogs_key", "AZURE_COGS_KEY"
|
||||
)
|
||||
|
||||
azure_cogs_endpoint = get_from_dict_or_env(
|
||||
values, "azure_cogs_endpoint", "AZURE_COGS_ENDPOINT"
|
||||
)
|
||||
|
||||
try:
|
||||
import azure.ai.vision as sdk
|
||||
|
||||
values["vision_service"] = sdk.VisionServiceOptions(
|
||||
endpoint=azure_cogs_endpoint, key=azure_cogs_key
|
||||
)
|
||||
|
||||
values["analysis_options"] = sdk.ImageAnalysisOptions()
|
||||
values["analysis_options"].features = (
|
||||
sdk.ImageAnalysisFeature.CAPTION
|
||||
| sdk.ImageAnalysisFeature.OBJECTS
|
||||
| sdk.ImageAnalysisFeature.TAGS
|
||||
| sdk.ImageAnalysisFeature.TEXT
|
||||
)
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"azure-ai-vision is not installed. "
|
||||
"Run `pip install azure-ai-vision` to install."
|
||||
)
|
||||
|
||||
return values
|
||||
|
||||
def _image_analysis(self, image_path: str) -> Dict:
|
||||
try:
|
||||
import azure.ai.vision as sdk
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
image_src_type = detect_file_src_type(image_path)
|
||||
if image_src_type == "local":
|
||||
vision_source = sdk.VisionSource(filename=image_path)
|
||||
elif image_src_type == "remote":
|
||||
vision_source = sdk.VisionSource(url=image_path)
|
||||
else:
|
||||
raise ValueError(f"Invalid image path: {image_path}")
|
||||
|
||||
image_analyzer = sdk.ImageAnalyzer(
|
||||
self.vision_service, vision_source, self.analysis_options
|
||||
)
|
||||
result = image_analyzer.analyze()
|
||||
|
||||
res_dict = {}
|
||||
if result.reason == sdk.ImageAnalysisResultReason.ANALYZED:
|
||||
if result.caption is not None:
|
||||
res_dict["caption"] = result.caption.content
|
||||
|
||||
if result.objects is not None:
|
||||
res_dict["objects"] = [obj.name for obj in result.objects]
|
||||
|
||||
if result.tags is not None:
|
||||
res_dict["tags"] = [tag.name for tag in result.tags]
|
||||
|
||||
if result.text is not None:
|
||||
res_dict["text"] = [line.content for line in result.text.lines]
|
||||
|
||||
else:
|
||||
error_details = sdk.ImageAnalysisErrorDetails.from_result(result)
|
||||
raise RuntimeError(
|
||||
f"Image analysis failed.\n"
|
||||
f"Reason: {error_details.reason}\n"
|
||||
f"Details: {error_details.message}"
|
||||
)
|
||||
|
||||
return res_dict
|
||||
|
||||
def _format_image_analysis_result(self, image_analysis_result: Dict) -> str:
|
||||
formatted_result = []
|
||||
if "caption" in image_analysis_result:
|
||||
formatted_result.append("Caption: " + image_analysis_result["caption"])
|
||||
|
||||
if (
|
||||
"objects" in image_analysis_result
|
||||
and len(image_analysis_result["objects"]) > 0
|
||||
):
|
||||
formatted_result.append(
|
||||
"Objects: " + ", ".join(image_analysis_result["objects"])
|
||||
)
|
||||
|
||||
if "tags" in image_analysis_result and len(image_analysis_result["tags"]) > 0:
|
||||
formatted_result.append("Tags: " + ", ".join(image_analysis_result["tags"]))
|
||||
|
||||
if "text" in image_analysis_result and len(image_analysis_result["text"]) > 0:
|
||||
formatted_result.append("Text: " + ", ".join(image_analysis_result["text"]))
|
||||
|
||||
return "\n".join(formatted_result)
|
||||
|
||||
def _run(
|
||||
self,
|
||||
query: str,
|
||||
run_manager: Optional[CallbackManagerForToolRun] = None,
|
||||
) -> str:
|
||||
"""Use the tool."""
|
||||
try:
|
||||
image_analysis_result = self._image_analysis(query)
|
||||
if not image_analysis_result:
|
||||
return "No good image analysis result was found"
|
||||
|
||||
return self._format_image_analysis_result(image_analysis_result)
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Error while running AzureCogsImageAnalysisTool: {e}")
|
||||
|
||||
async def _arun(
|
||||
self,
|
||||
query: str,
|
||||
run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
|
||||
) -> str:
|
||||
"""Use the tool asynchronously."""
|
||||
raise NotImplementedError("AzureCogsImageAnalysisTool does not support async")
|
||||
131
langchain/tools/azure_cognitive_services/speech2text.py
Normal file
131
langchain/tools/azure_cognitive_services/speech2text.py
Normal file
@@ -0,0 +1,131 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import time
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from pydantic import root_validator
|
||||
|
||||
from langchain.callbacks.manager import (
|
||||
AsyncCallbackManagerForToolRun,
|
||||
CallbackManagerForToolRun,
|
||||
)
|
||||
from langchain.tools.azure_cognitive_services.utils import (
|
||||
detect_file_src_type,
|
||||
download_audio_from_url,
|
||||
)
|
||||
from langchain.tools.base import BaseTool
|
||||
from langchain.utils import get_from_dict_or_env
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AzureCogsSpeech2TextTool(BaseTool):
|
||||
"""Tool that queries the Azure Cognitive Services Speech2Text API.
|
||||
|
||||
In order to set this up, follow instructions at:
|
||||
https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/get-started-speech-to-text?pivots=programming-language-python
|
||||
"""
|
||||
|
||||
azure_cogs_key: str = "" #: :meta private:
|
||||
azure_cogs_region: str = "" #: :meta private:
|
||||
speech_language: str = "en-US" #: :meta private:
|
||||
speech_config: Any #: :meta private:
|
||||
|
||||
name = "Azure Cognitive Services Speech2Text"
|
||||
description = (
|
||||
"A wrapper around Azure Cognitive Services Speech2Text. "
|
||||
"Useful for when you need to transcribe audio to text. "
|
||||
"Input should be a url to an audio file."
|
||||
)
|
||||
|
||||
@root_validator(pre=True)
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that api key and endpoint exists in environment."""
|
||||
azure_cogs_key = get_from_dict_or_env(
|
||||
values, "azure_cogs_key", "AZURE_COGS_KEY"
|
||||
)
|
||||
|
||||
azure_cogs_region = get_from_dict_or_env(
|
||||
values, "azure_cogs_region", "AZURE_COGS_REGION"
|
||||
)
|
||||
|
||||
try:
|
||||
import azure.cognitiveservices.speech as speechsdk
|
||||
|
||||
values["speech_config"] = speechsdk.SpeechConfig(
|
||||
subscription=azure_cogs_key, region=azure_cogs_region
|
||||
)
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"azure-cognitiveservices-speech is not installed. "
|
||||
"Run `pip install azure-cognitiveservices-speech` to install."
|
||||
)
|
||||
|
||||
return values
|
||||
|
||||
def _continuous_recognize(self, speech_recognizer: Any) -> str:
|
||||
done = False
|
||||
text = ""
|
||||
|
||||
def stop_cb(evt: Any) -> None:
|
||||
"""callback that stop continuous recognition"""
|
||||
speech_recognizer.stop_continuous_recognition_async()
|
||||
nonlocal done
|
||||
done = True
|
||||
|
||||
def retrieve_cb(evt: Any) -> None:
|
||||
"""callback that retrieves the intermediate recognition results"""
|
||||
nonlocal text
|
||||
text += evt.result.text
|
||||
|
||||
# retrieve text on recognized events
|
||||
speech_recognizer.recognized.connect(retrieve_cb)
|
||||
# stop continuous recognition on either session stopped or canceled events
|
||||
speech_recognizer.session_stopped.connect(stop_cb)
|
||||
speech_recognizer.canceled.connect(stop_cb)
|
||||
|
||||
# Start continuous speech recognition
|
||||
speech_recognizer.start_continuous_recognition_async()
|
||||
while not done:
|
||||
time.sleep(0.5)
|
||||
return text
|
||||
|
||||
def _speech2text(self, audio_path: str, speech_language: str) -> str:
|
||||
try:
|
||||
import azure.cognitiveservices.speech as speechsdk
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
audio_src_type = detect_file_src_type(audio_path)
|
||||
if audio_src_type == "local":
|
||||
audio_config = speechsdk.AudioConfig(filename=audio_path)
|
||||
elif audio_src_type == "remote":
|
||||
tmp_audio_path = download_audio_from_url(audio_path)
|
||||
audio_config = speechsdk.AudioConfig(filename=tmp_audio_path)
|
||||
else:
|
||||
raise ValueError(f"Invalid audio path: {audio_path}")
|
||||
|
||||
self.speech_config.speech_recognition_language = speech_language
|
||||
speech_recognizer = speechsdk.SpeechRecognizer(self.speech_config, audio_config)
|
||||
return self._continuous_recognize(speech_recognizer)
|
||||
|
||||
def _run(
|
||||
self,
|
||||
query: str,
|
||||
run_manager: Optional[CallbackManagerForToolRun] = None,
|
||||
) -> str:
|
||||
"""Use the tool."""
|
||||
try:
|
||||
text = self._speech2text(query, self.speech_language)
|
||||
return text
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Error while running AzureCogsSpeech2TextTool: {e}")
|
||||
|
||||
async def _arun(
|
||||
self,
|
||||
query: str,
|
||||
run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
|
||||
) -> str:
|
||||
"""Use the tool asynchronously."""
|
||||
raise NotImplementedError("AzureCogsSpeech2TextTool does not support async")
|
||||
114
langchain/tools/azure_cognitive_services/text2speech.py
Normal file
114
langchain/tools/azure_cognitive_services/text2speech.py
Normal file
@@ -0,0 +1,114 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import tempfile
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from pydantic import root_validator
|
||||
|
||||
from langchain.callbacks.manager import (
|
||||
AsyncCallbackManagerForToolRun,
|
||||
CallbackManagerForToolRun,
|
||||
)
|
||||
from langchain.tools.base import BaseTool
|
||||
from langchain.utils import get_from_dict_or_env
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AzureCogsText2SpeechTool(BaseTool):
|
||||
"""Tool that queries the Azure Cognitive Services Text2Speech API.
|
||||
|
||||
In order to set this up, follow instructions at:
|
||||
https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/get-started-text-to-speech?pivots=programming-language-python
|
||||
"""
|
||||
|
||||
azure_cogs_key: str = "" #: :meta private:
|
||||
azure_cogs_region: str = "" #: :meta private:
|
||||
speech_language: str = "en-US" #: :meta private:
|
||||
speech_config: Any #: :meta private:
|
||||
|
||||
name = "Azure Cognitive Services Text2Speech"
|
||||
description = (
|
||||
"A wrapper around Azure Cognitive Services Text2Speech. "
|
||||
"Useful for when you need to convert text to speech. "
|
||||
)
|
||||
|
||||
@root_validator(pre=True)
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that api key and endpoint exists in environment."""
|
||||
azure_cogs_key = get_from_dict_or_env(
|
||||
values, "azure_cogs_key", "AZURE_COGS_KEY"
|
||||
)
|
||||
|
||||
azure_cogs_region = get_from_dict_or_env(
|
||||
values, "azure_cogs_region", "AZURE_COGS_REGION"
|
||||
)
|
||||
|
||||
try:
|
||||
import azure.cognitiveservices.speech as speechsdk
|
||||
|
||||
values["speech_config"] = speechsdk.SpeechConfig(
|
||||
subscription=azure_cogs_key, region=azure_cogs_region
|
||||
)
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"azure-cognitiveservices-speech is not installed. "
|
||||
"Run `pip install azure-cognitiveservices-speech` to install."
|
||||
)
|
||||
|
||||
return values
|
||||
|
||||
def _text2speech(self, text: str, speech_language: str) -> str:
|
||||
try:
|
||||
import azure.cognitiveservices.speech as speechsdk
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
self.speech_config.speech_synthesis_language = speech_language
|
||||
speech_synthesizer = speechsdk.SpeechSynthesizer(
|
||||
speech_config=self.speech_config, audio_config=None
|
||||
)
|
||||
result = speech_synthesizer.speak_text(text)
|
||||
|
||||
if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
|
||||
stream = speechsdk.AudioDataStream(result)
|
||||
with tempfile.NamedTemporaryFile(
|
||||
mode="wb", suffix=".wav", delete=False
|
||||
) as f:
|
||||
stream.save_to_wav_file(f.name)
|
||||
|
||||
return f.name
|
||||
|
||||
elif result.reason == speechsdk.ResultReason.Canceled:
|
||||
cancellation_details = result.cancellation_details
|
||||
logger.debug(f"Speech synthesis canceled: {cancellation_details.reason}")
|
||||
if cancellation_details.reason == speechsdk.CancellationReason.Error:
|
||||
raise RuntimeError(
|
||||
f"Speech synthesis error: {cancellation_details.error_details}"
|
||||
)
|
||||
|
||||
return "Speech synthesis canceled."
|
||||
|
||||
else:
|
||||
return f"Speech synthesis failed: {result.reason}"
|
||||
|
||||
def _run(
|
||||
self,
|
||||
query: str,
|
||||
run_manager: Optional[CallbackManagerForToolRun] = None,
|
||||
) -> str:
|
||||
"""Use the tool."""
|
||||
try:
|
||||
speech_file = self._text2speech(query, self.speech_language)
|
||||
return speech_file
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Error while running AzureCogsText2SpeechTool: {e}")
|
||||
|
||||
async def _arun(
|
||||
self,
|
||||
query: str,
|
||||
run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
|
||||
) -> str:
|
||||
"""Use the tool asynchronously."""
|
||||
raise NotImplementedError("AzureCogsText2SpeechTool does not support async")
|
||||
29
langchain/tools/azure_cognitive_services/utils.py
Normal file
29
langchain/tools/azure_cognitive_services/utils.py
Normal file
@@ -0,0 +1,29 @@
|
||||
import os
|
||||
import tempfile
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
def detect_file_src_type(file_path: str) -> str:
|
||||
"""Detect if the file is local or remote."""
|
||||
if os.path.isfile(file_path):
|
||||
return "local"
|
||||
|
||||
parsed_url = urlparse(file_path)
|
||||
if parsed_url.scheme and parsed_url.netloc:
|
||||
return "remote"
|
||||
|
||||
return "invalid"
|
||||
|
||||
|
||||
def download_audio_from_url(audio_url: str) -> str:
|
||||
"""Download audio from url to local."""
|
||||
ext = audio_url.split(".")[-1]
|
||||
response = requests.get(audio_url, stream=True)
|
||||
response.raise_for_status()
|
||||
with tempfile.NamedTemporaryFile(mode="wb", suffix=f".{ext}", delete=False) as f:
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
f.write(chunk)
|
||||
|
||||
return f.name
|
||||
@@ -31,7 +31,6 @@ class OpenWeatherMapAPIWrapper(BaseModel):
|
||||
openweathermap_api_key = get_from_dict_or_env(
|
||||
values, "openweathermap_api_key", "OPENWEATHERMAP_API_KEY"
|
||||
)
|
||||
values["openweathermap_api_key"] = openweathermap_api_key
|
||||
|
||||
try:
|
||||
import pyowm
|
||||
|
||||
@@ -195,6 +195,8 @@ class Weaviate(VectorStore):
|
||||
query_obj = self._client.query.get(self._index_name, self._query_attrs)
|
||||
if kwargs.get("where_filter"):
|
||||
query_obj = query_obj.with_where(kwargs.get("where_filter"))
|
||||
if kwargs.get("additional"):
|
||||
query_obj = query_obj.with_additional(kwargs.get("additional"))
|
||||
result = query_obj.with_near_text(content).with_limit(k).do()
|
||||
if "errors" in result:
|
||||
raise ValueError(f"Error during query: {result['errors']}")
|
||||
@@ -212,6 +214,8 @@ class Weaviate(VectorStore):
|
||||
query_obj = self._client.query.get(self._index_name, self._query_attrs)
|
||||
if kwargs.get("where_filter"):
|
||||
query_obj = query_obj.with_where(kwargs.get("where_filter"))
|
||||
if kwargs.get("additional"):
|
||||
query_obj = query_obj.with_additional(kwargs.get("additional"))
|
||||
result = query_obj.with_near_vector(vector).with_limit(k).do()
|
||||
if "errors" in result:
|
||||
raise ValueError(f"Error during query: {result['errors']}")
|
||||
|
||||
398
poetry.lock
generated
398
poetry.lock
generated
@@ -566,6 +566,64 @@ dev = ["coverage (>=5,<6)", "flake8 (>=3,<4)", "pytest (>=6,<7)", "sphinx-copybu
|
||||
docs = ["sphinx-copybutton (>=0.4,<0.5)", "sphinx-rtd-theme (>=1.0,<2.0)", "sphinx-tabs (>=3,<4)", "sphinxcontrib-mermaid (>=0.7,<0.8)"]
|
||||
test = ["coverage (>=5,<6)", "pytest (>=6,<7)"]
|
||||
|
||||
[[package]]
|
||||
name = "azure-ai-formrecognizer"
|
||||
version = "3.2.1"
|
||||
description = "Microsoft Azure Form Recognizer Client Library for Python"
|
||||
category = "main"
|
||||
optional = true
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "azure-ai-formrecognizer-3.2.1.zip", hash = "sha256:5768765f9720ce87038f56afe0c0b5259192cfb29c840a39595b1e26e4ddfa32"},
|
||||
{file = "azure_ai_formrecognizer-3.2.1-py3-none-any.whl", hash = "sha256:4db43b9dd0a2bc5296b752c04dbacb838ae2b8726adfe7cf277c2ea34e99419a"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
azure-common = ">=1.1,<2.0"
|
||||
azure-core = ">=1.23.0,<2.0.0"
|
||||
msrest = ">=0.6.21"
|
||||
typing-extensions = ">=4.0.1"
|
||||
|
||||
[[package]]
|
||||
name = "azure-ai-vision"
|
||||
version = "0.11.1b1"
|
||||
description = "Microsoft Azure AI Vision SDK for Python"
|
||||
category = "main"
|
||||
optional = true
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "azure_ai_vision-0.11.1b1-py3-none-manylinux1_x86_64.whl", hash = "sha256:6f8563ae26689da6cdee9b2de009a53546ae2fd86c6c180236ce5da5b45f41d3"},
|
||||
{file = "azure_ai_vision-0.11.1b1-py3-none-win_amd64.whl", hash = "sha256:f5df03b9156feaa1d8c776631967b1455028d30dfd4cd1c732aa0f9c03d01517"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "azure-cognitiveservices-speech"
|
||||
version = "1.28.0"
|
||||
description = "Microsoft Cognitive Services Speech SDK for Python"
|
||||
category = "main"
|
||||
optional = true
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "azure_cognitiveservices_speech-1.28.0-py3-none-macosx_10_14_x86_64.whl", hash = "sha256:a6c277ec9c93f586dcc74d3a56a6aa0259f4cf371f5e03afcf169c691e2c4d0c"},
|
||||
{file = "azure_cognitiveservices_speech-1.28.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:a412c6c5bc528548e0ee5fc5fe89fb8351307d0c5ef7ac4d506fab3d58efcb4a"},
|
||||
{file = "azure_cognitiveservices_speech-1.28.0-py3-none-manylinux1_x86_64.whl", hash = "sha256:ceb5a8862da4ab861bd06653074a4e5dc2d66a54f03dd4dd9356da7672febbce"},
|
||||
{file = "azure_cognitiveservices_speech-1.28.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:d5cba32e9d8eaffc9d8f482c00950bc471f9dc4d7659c741c083e5e9d831b802"},
|
||||
{file = "azure_cognitiveservices_speech-1.28.0-py3-none-win32.whl", hash = "sha256:ac52c4549062771db5694346c1547334cf1bb0d08573a193c8dcec8386aa491d"},
|
||||
{file = "azure_cognitiveservices_speech-1.28.0-py3-none-win_amd64.whl", hash = "sha256:5ff042d81d7ff4e50be196419fcd2042e41a97cebb229e0940026e1314ff7751"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "azure-common"
|
||||
version = "1.1.28"
|
||||
description = "Microsoft Azure Client Library for Python (Common)"
|
||||
category = "main"
|
||||
optional = true
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "azure-common-1.1.28.zip", hash = "sha256:4ac0cd3214e36b6a1b6a442686722a5d8cc449603aa833f3f0f40bda836704a3"},
|
||||
{file = "azure_common-1.1.28-py2.py3-none-any.whl", hash = "sha256:5c12d3dcf4ec20599ca6b0d3e09e86e146353d443e7fcc050c9a19c1f9df20ad"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "azure-core"
|
||||
version = "1.26.4"
|
||||
@@ -1495,6 +1553,46 @@ typing-inspect = ">=0.4.0"
|
||||
[package.extras]
|
||||
dev = ["flake8", "hypothesis", "ipython", "mypy (>=0.710)", "portray", "pytest (>=6.2.3)", "simplejson", "types-dataclasses"]
|
||||
|
||||
[[package]]
|
||||
name = "datasets"
|
||||
version = "1.18.3"
|
||||
description = "HuggingFace community-driven open-source library of datasets"
|
||||
category = "main"
|
||||
optional = true
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "datasets-1.18.3-py3-none-any.whl", hash = "sha256:5862670a3e213af1aa68995a32ff0ce761b9d71d2677c3fa59e7088eb5e2a841"},
|
||||
{file = "datasets-1.18.3.tar.gz", hash = "sha256:dfdf75c255069f4ed25ccdd0d3f0730c1ff1e2b27f8d4bd1af395b10fe8ebc63"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
aiohttp = "*"
|
||||
dill = "*"
|
||||
fsspec = {version = ">=2021.05.0", extras = ["http"]}
|
||||
huggingface-hub = ">=0.1.0,<1.0.0"
|
||||
multiprocess = "*"
|
||||
numpy = ">=1.17"
|
||||
packaging = "*"
|
||||
pandas = "*"
|
||||
pyarrow = ">=3.0.0,<4.0.0 || >4.0.0"
|
||||
requests = ">=2.19.0"
|
||||
tqdm = ">=4.62.1"
|
||||
xxhash = "*"
|
||||
|
||||
[package.extras]
|
||||
apache-beam = ["apache-beam (>=2.26.0)"]
|
||||
audio = ["librosa"]
|
||||
benchmarks = ["numpy (==1.18.5)", "tensorflow (==2.3.0)", "torch (==1.6.0)", "transformers (==3.0.2)"]
|
||||
dev = ["Pillow (>=6.2.1)", "Werkzeug (>=1.0.1)", "absl-py", "aiobotocore", "apache-beam (>=2.26.0)", "bert-score (>=0.3.6)", "black (==21.4b0)", "boto3", "botocore", "bs4", "conllu", "elasticsearch", "fairseq", "faiss-cpu (>=1.6.4)", "fastBPE (==0.1.0)", "flake8 (>=3.8.3)", "fsspec[s3]", "h5py", "importlib-resources", "isort (>=5.0.0)", "jiwer", "langdetect", "librosa", "lxml", "mauve-text", "moto[s3,server] (==2.0.4)", "mwparserfromhell", "nltk", "openpyxl", "py7zr", "pytest", "pytest-datadir", "pytest-xdist", "pytorch-lightning", "pytorch-nlp (==0.5.0)", "pyyaml (>=5.3.1)", "rarfile (>=4.0)", "requests-file (>=1.5.1)", "rouge-score", "s3fs (==2021.08.1)", "sacrebleu", "scikit-learn", "scipy", "sentencepiece", "seqeval", "six (>=1.15.0,<1.16.0)", "soundfile", "tensorflow (>=2.3,!=2.6.0,!=2.6.1)", "texttable (>=1.6.3)", "tldextract", "tldextract (>=3.1.0)", "toml (>=0.10.1)", "torch", "torchaudio", "torchmetrics (==0.6.0)", "transformers", "wget (>=3.2)", "zstandard"]
|
||||
docs = ["Markdown (!=3.3.5)", "docutils (==0.16.0)", "fsspec (<2021.9.0)", "myst-parser", "recommonmark", "s3fs", "sphinx (==3.1.2)", "sphinx-copybutton", "sphinx-inline-tabs", "sphinx-markdown-tables", "sphinx-panels", "sphinx-rtd-theme (==0.4.3)", "sphinxext-opengraph (==0.4.1)"]
|
||||
quality = ["black (==21.4b0)", "flake8 (>=3.8.3)", "isort (>=5.0.0)", "pyyaml (>=5.3.1)"]
|
||||
s3 = ["boto3", "botocore", "fsspec", "s3fs"]
|
||||
tensorflow = ["tensorflow (>=2.2.0,!=2.6.0,!=2.6.1)"]
|
||||
tensorflow-gpu = ["tensorflow-gpu (>=2.2.0,!=2.6.0,!=2.6.1)"]
|
||||
tests = ["Pillow (>=6.2.1)", "Werkzeug (>=1.0.1)", "absl-py", "aiobotocore", "apache-beam (>=2.26.0)", "bert-score (>=0.3.6)", "boto3", "botocore", "bs4", "conllu", "elasticsearch", "fairseq", "faiss-cpu (>=1.6.4)", "fastBPE (==0.1.0)", "fsspec[s3]", "h5py", "importlib-resources", "jiwer", "langdetect", "librosa", "lxml", "mauve-text", "moto[s3,server] (==2.0.4)", "mwparserfromhell", "nltk", "openpyxl", "py7zr", "pytest", "pytest-datadir", "pytest-xdist", "pytorch-lightning", "pytorch-nlp (==0.5.0)", "rarfile (>=4.0)", "requests-file (>=1.5.1)", "rouge-score", "s3fs (==2021.08.1)", "sacrebleu", "scikit-learn", "scipy", "sentencepiece", "seqeval", "six (>=1.15.0,<1.16.0)", "soundfile", "tensorflow (>=2.3,!=2.6.0,!=2.6.1)", "texttable (>=1.6.3)", "tldextract", "tldextract (>=3.1.0)", "toml (>=0.10.1)", "torch", "torchaudio", "torchmetrics (==0.6.0)", "transformers", "wget (>=3.2)", "zstandard"]
|
||||
torch = ["torch"]
|
||||
vision = ["Pillow (>=6.2.1)"]
|
||||
|
||||
[[package]]
|
||||
name = "debugpy"
|
||||
version = "1.6.7"
|
||||
@@ -2154,6 +2252,10 @@ files = [
|
||||
{file = "fsspec-2023.5.0.tar.gz", hash = "sha256:b3b56e00fb93ea321bc9e5d9cf6f8522a0198b20eb24e02774d329e9c6fb84ce"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
aiohttp = {version = "<4.0.0a0 || >4.0.0a0,<4.0.0a1 || >4.0.0a1", optional = true, markers = "extra == \"http\""}
|
||||
requests = {version = "*", optional = true, markers = "extra == \"http\""}
|
||||
|
||||
[package.extras]
|
||||
abfs = ["adlfs"]
|
||||
adl = ["adlfs"]
|
||||
@@ -3134,6 +3236,21 @@ widgetsnbextension = ">=4.0.7,<4.1.0"
|
||||
[package.extras]
|
||||
test = ["ipykernel", "jsonschema", "pytest (>=3.6.0)", "pytest-cov", "pytz"]
|
||||
|
||||
[[package]]
|
||||
name = "isodate"
|
||||
version = "0.6.1"
|
||||
description = "An ISO 8601 date/time/duration parser and formatter"
|
||||
category = "main"
|
||||
optional = true
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "isodate-0.6.1-py2.py3-none-any.whl", hash = "sha256:0751eece944162659049d35f4f549ed815792b38793f07cf73381c1c87cbed96"},
|
||||
{file = "isodate-0.6.1.tar.gz", hash = "sha256:48c5881de7e8b0a0d648cb024c8062dc84e7b840ed81e864c7614fd3c127bde9"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
six = "*"
|
||||
|
||||
[[package]]
|
||||
name = "isoduration"
|
||||
version = "20.11.0"
|
||||
@@ -3787,6 +3904,30 @@ files = [
|
||||
[package.extras]
|
||||
data = ["language-data (>=1.1,<2.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "langkit"
|
||||
version = "0.0.1b2"
|
||||
description = "A collection of text metric udfs for whylogs profiling and monitoring in WhyLabs"
|
||||
category = "main"
|
||||
optional = true
|
||||
python-versions = ">=3.8,<4.0"
|
||||
files = [
|
||||
{file = "langkit-0.0.1b2-py3-none-any.whl", hash = "sha256:8059d48bb1bbf90da5f5103585dece57fa09d156b0490f8a6c88277789a19021"},
|
||||
{file = "langkit-0.0.1b2.tar.gz", hash = "sha256:c2dd7cf93921dc77d6c7516746351fa503684f3be35392c187f4418a0748ef50"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
datasets = "*"
|
||||
nltk = ">=3.8.1,<4.0.0"
|
||||
openai = "*"
|
||||
pandas = "*"
|
||||
sentence-transformers = ">=2.2.2,<3.0.0"
|
||||
textstat = ">=0.7.3,<0.8.0"
|
||||
whylogs = ">=1.1.42.dev3,<2.0.0"
|
||||
|
||||
[package.extras]
|
||||
io = ["torch"]
|
||||
|
||||
[[package]]
|
||||
name = "lark"
|
||||
version = "1.1.5"
|
||||
@@ -4443,6 +4584,28 @@ files = [
|
||||
{file = "msgpack-1.0.5.tar.gz", hash = "sha256:c075544284eadc5cddc70f4757331d99dcbc16b2bbd4849d15f8aae4cf36d31c"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "msrest"
|
||||
version = "0.7.1"
|
||||
description = "AutoRest swagger generator Python client runtime."
|
||||
category = "main"
|
||||
optional = true
|
||||
python-versions = ">=3.6"
|
||||
files = [
|
||||
{file = "msrest-0.7.1-py3-none-any.whl", hash = "sha256:21120a810e1233e5e6cc7fe40b474eeb4ec6f757a15d7cf86702c369f9567c32"},
|
||||
{file = "msrest-0.7.1.zip", hash = "sha256:6e7661f46f3afd88b75667b7187a92829924446c7ea1d169be8c4bb7eeb788b9"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
azure-core = ">=1.24.0"
|
||||
certifi = ">=2017.4.17"
|
||||
isodate = ">=0.6.0"
|
||||
requests = ">=2.16,<3.0"
|
||||
requests-oauthlib = ">=0.5.0"
|
||||
|
||||
[package.extras]
|
||||
async = ["aiodns", "aiohttp (>=3.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "multidict"
|
||||
version = "6.0.4"
|
||||
@@ -6711,6 +6874,7 @@ files = [
|
||||
{file = "pylance-0.4.12-cp38-abi3-macosx_10_15_x86_64.whl", hash = "sha256:2b86fb8dccc03094c0db37bef0d91bda60e8eb0d1eddf245c6971450c8d8a53f"},
|
||||
{file = "pylance-0.4.12-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:0bc82914b13204187d673b5f3d45f93219c38a0e9d0542ba251074f639669789"},
|
||||
{file = "pylance-0.4.12-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a4bcce77f99ecd4cbebbadb01e58d5d8138d40eb56bdcdbc3b20b0475e7a472"},
|
||||
{file = "pylance-0.4.12-cp38-abi3-win_amd64.whl", hash = "sha256:9616931c5300030adb9626d22515710a127d1e46a46737a7a0f980b52f13627c"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
@@ -6936,6 +7100,22 @@ files = [
|
||||
{file = "pypdfium2-4.11.0.tar.gz", hash = "sha256:f1d3bd0841f0c2e9db417075896dafc5906bbd7c0ccdc2b6e2b3f44d61d49f46"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyphen"
|
||||
version = "0.14.0"
|
||||
description = "Pure Python module to hyphenate text"
|
||||
category = "main"
|
||||
optional = true
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "pyphen-0.14.0-py3-none-any.whl", hash = "sha256:414c9355958ca3c6a3ff233f65678c245b8ecb56418fb291e2b93499d61cd510"},
|
||||
{file = "pyphen-0.14.0.tar.gz", hash = "sha256:596c8b3be1c1a70411ba5f6517d9ccfe3083c758ae2b94a45f2707346d8e66fa"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
doc = ["sphinx", "sphinx_rtd_theme"]
|
||||
test = ["flake8", "isort", "pytest"]
|
||||
|
||||
[[package]]
|
||||
name = "pyrsistent"
|
||||
version = "0.19.3"
|
||||
@@ -9050,6 +9230,21 @@ tornado = ">=6.1.0"
|
||||
docs = ["myst-parser", "pydata-sphinx-theme", "sphinx"]
|
||||
test = ["pre-commit", "pytest (>=7.0)", "pytest-timeout"]
|
||||
|
||||
[[package]]
|
||||
name = "textstat"
|
||||
version = "0.7.3"
|
||||
description = "Calculate statistical features from text"
|
||||
category = "main"
|
||||
optional = true
|
||||
python-versions = ">=3.6"
|
||||
files = [
|
||||
{file = "textstat-0.7.3-py3-none-any.whl", hash = "sha256:cbd9d641aa5abff0852638f0489913f31ea52fe597ccbaa337b4fc2a44efd15e"},
|
||||
{file = "textstat-0.7.3.tar.gz", hash = "sha256:60b63cf8949f45bbb3b4205e4411bbc1cd66df4c08aef12545811c7e6e24f011"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
pyphen = "*"
|
||||
|
||||
[[package]]
|
||||
name = "thinc"
|
||||
version = "8.1.10"
|
||||
@@ -10105,6 +10300,95 @@ files = [
|
||||
[package.extras]
|
||||
test = ["pytest (>=6.0.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "whylabs-client"
|
||||
version = "0.5.1"
|
||||
description = "WhyLabs API client"
|
||||
category = "main"
|
||||
optional = true
|
||||
python-versions = ">=3.6"
|
||||
files = [
|
||||
{file = "whylabs-client-0.5.1.tar.gz", hash = "sha256:f7aacfab7d176812c2eb4cdeb8c52521eed0d30bc2a0836399798197a513cf04"},
|
||||
{file = "whylabs_client-0.5.1-py3-none-any.whl", hash = "sha256:dc6958d5bb390f1057fe6f513cbce55c4e71d5f8a1461a7c93eb73814089de33"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
python-dateutil = "*"
|
||||
urllib3 = ">=1.25.3"
|
||||
|
||||
[[package]]
|
||||
name = "whylogs"
|
||||
version = "1.1.42.dev3"
|
||||
description = "Profile and monitor your ML data pipeline end-to-end"
|
||||
category = "main"
|
||||
optional = true
|
||||
python-versions = ">=3.7.1,<4"
|
||||
files = [
|
||||
{file = "whylogs-1.1.42.dev3-py3-none-any.whl", hash = "sha256:99aadb05b68c6c2dc5d00ba1fb45bdd5ac2c3da3fe812f3fd1573a0f06674121"},
|
||||
{file = "whylogs-1.1.42.dev3.tar.gz", hash = "sha256:c82badf821f56935fd274e696e4d5ed151934e486f23ea5f5c60af31e6cdb632"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
protobuf = ">=3.19.4"
|
||||
typing-extensions = {version = ">=3.10", markers = "python_version < \"4\""}
|
||||
whylabs-client = ">=0.4.4,<1"
|
||||
whylogs-sketching = ">=3.4.1.dev3"
|
||||
|
||||
[package.extras]
|
||||
all = ["Pillow (>=9.2.0,<10.0.0)", "boto3 (>=1.22.13,<2.0.0)", "fugue (>=0.8.1,<0.9.0)", "google-cloud-storage (>=2.5.0,<3.0.0)", "ipython", "mlflow-skinny (>=1.26.1,<2.0.0)", "numpy", "numpy (>=1.23.2)", "pandas", "pyarrow (>=8.0.0,<13)", "pybars3 (>=0.9,<0.10)", "pyspark (>=3.0.0,<4.0.0)", "requests (>=2.27,<3.0)", "scikit-learn (>=1.0.2,<2.0.0)", "scikit-learn (>=1.1.2,<2)", "scipy (>=1.5)", "scipy (>=1.9.2)"]
|
||||
datasets = ["pandas"]
|
||||
docs = ["furo (>=2022.3.4,<2023.0.0)", "ipython_genutils (>=0.2.0,<0.3.0)", "myst-parser[sphinx] (>=0.17.2,<0.18.0)", "nbconvert (>=7.0.0,<8.0.0)", "nbsphinx (>=0.8.9,<0.9.0)", "sphinx", "sphinx-autoapi", "sphinx-autobuild (>=2021.3.14,<2022.0.0)", "sphinx-copybutton (>=0.5.0,<0.6.0)", "sphinx-inline-tabs", "sphinxext-opengraph (>=0.6.3,<0.7.0)"]
|
||||
embeddings = ["numpy", "numpy (>=1.23.2)", "scikit-learn (>=1.0.2,<2.0.0)", "scikit-learn (>=1.1.2,<2)"]
|
||||
fugue = ["fugue (>=0.8.1,<0.9.0)"]
|
||||
gcs = ["google-cloud-storage (>=2.5.0,<3.0.0)"]
|
||||
image = ["Pillow (>=9.2.0,<10.0.0)"]
|
||||
mlflow = ["mlflow-skinny (>=1.26.1,<2.0.0)"]
|
||||
s3 = ["boto3 (>=1.22.13,<2.0.0)"]
|
||||
spark = ["pyarrow (>=8.0.0,<13)", "pyspark (>=3.0.0,<4.0.0)"]
|
||||
viz = ["Pillow (>=9.2.0,<10.0.0)", "ipython", "numpy", "numpy (>=1.23.2)", "pybars3 (>=0.9,<0.10)", "requests (>=2.27,<3.0)", "scipy (>=1.5)", "scipy (>=1.9.2)"]
|
||||
whylabs = ["requests (>=2.27,<3.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "whylogs-sketching"
|
||||
version = "3.4.1.dev3"
|
||||
description = "sketching library of whylogs"
|
||||
category = "main"
|
||||
optional = true
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "whylogs-sketching-3.4.1.dev3.tar.gz", hash = "sha256:40b90eb9d5e4cbbfa63f6a1f3a3332b72258d270044b79030dc5d720fddd9499"},
|
||||
{file = "whylogs_sketching-3.4.1.dev3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9c20134eda881064099264f795d60321777b5e6c2357125a7a2787c9f15db684"},
|
||||
{file = "whylogs_sketching-3.4.1.dev3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e76ac4c2d0214b8de8598867e721f774cca8877267bc2a9b2d0d06950fe76bd5"},
|
||||
{file = "whylogs_sketching-3.4.1.dev3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:edc2b463d926ccacb7ee2147d206850bb0cbfea8766f091e8c575ada48db1cfd"},
|
||||
{file = "whylogs_sketching-3.4.1.dev3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fdc2a3bd73895d1ffac1b3028ff55aaa6b60a9ec42d7b6b5785fa140f303dec0"},
|
||||
{file = "whylogs_sketching-3.4.1.dev3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:46460eefcf22bcf20b0e6208de32e358478c17b1239221eb038d434f14ec427c"},
|
||||
{file = "whylogs_sketching-3.4.1.dev3-cp310-cp310-win_amd64.whl", hash = "sha256:58b99a070429a7119a5727ac61c4e9ebcd6e92eed3d2646931a487fff3d6630e"},
|
||||
{file = "whylogs_sketching-3.4.1.dev3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:531a4af8f707c1e8138a4ae41a117ba53241372bf191666a9e6b44ab6cd9e634"},
|
||||
{file = "whylogs_sketching-3.4.1.dev3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ba536fca5f9578fa34d106c243fdccfef7d75b9d1fffb9d93df0debfe8e3ebc"},
|
||||
{file = "whylogs_sketching-3.4.1.dev3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:afa843c68cafa08e82624e6a33d13ab7f00ad0301101960872fe152d5af5ab53"},
|
||||
{file = "whylogs_sketching-3.4.1.dev3-cp311-cp311-win_amd64.whl", hash = "sha256:303d55c37565340c2d21c268c64a712fad612504cc4b98b1d1df848cac6d934f"},
|
||||
{file = "whylogs_sketching-3.4.1.dev3-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:9d65fcf8dade1affe50181582b8894929993e37d7daa922d973a811790cd0208"},
|
||||
{file = "whylogs_sketching-3.4.1.dev3-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c4845e77c208ae64ada9170e1b92ed0abe28fe311c0fc35f9d8efa6926211ca2"},
|
||||
{file = "whylogs_sketching-3.4.1.dev3-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:02cac1c87ac42d7fc7e6597862ac50bc035825988d21e8a2d763b416e83e845f"},
|
||||
{file = "whylogs_sketching-3.4.1.dev3-cp36-cp36m-win_amd64.whl", hash = "sha256:52a174784e69870543fb87910e5549759f01a7f4cb6cac1773b2cc194ec0b72f"},
|
||||
{file = "whylogs_sketching-3.4.1.dev3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:0931fc7500b78baf8f44222f1e3b58cfb707b0120328bc16cc50beaab5a954ec"},
|
||||
{file = "whylogs_sketching-3.4.1.dev3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:803c104338a5c4e1c6eb077d35ca3a4443e455aa4e7f2769c93560bf135cdeb3"},
|
||||
{file = "whylogs_sketching-3.4.1.dev3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:49e8f20351077497880b088dff9342f4b54d2d3c650c0b43daf121d97fb42468"},
|
||||
{file = "whylogs_sketching-3.4.1.dev3-cp37-cp37m-win_amd64.whl", hash = "sha256:f9f3507b5df34de7a95b75f80009644371dd6406f9d8795e820edf8a594aeacc"},
|
||||
{file = "whylogs_sketching-3.4.1.dev3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2986dd5b35a93267e6d89e7aa256f714105bbe61bdb0381aeab588c2688e46b6"},
|
||||
{file = "whylogs_sketching-3.4.1.dev3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:14f1bf4903e9cd2a196fe5a7268cca1434d423233e073917130d5b845f539c2a"},
|
||||
{file = "whylogs_sketching-3.4.1.dev3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2ecfe0e4a629a4cefec9d7c7fac234119688085ba5f62feabed710cb5a322f8b"},
|
||||
{file = "whylogs_sketching-3.4.1.dev3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:000e2c11b7bbbdefb3a343c15955868a682c02d607557fef7bad5a6ffd09a0cf"},
|
||||
{file = "whylogs_sketching-3.4.1.dev3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:1e70ed1ed2f9c174a80673ae2ca24c1ec0e2a01c0bd6b0728640492fd5a50178"},
|
||||
{file = "whylogs_sketching-3.4.1.dev3-cp38-cp38-win_amd64.whl", hash = "sha256:9efd56d5a21566fc49126ef54d37116078763bb9f8955b9c77421b4ca3fd8fc6"},
|
||||
{file = "whylogs_sketching-3.4.1.dev3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:832247fd9d3ecf13791418a75c359db6c3aeffd51d7372d026e95f307ef286cc"},
|
||||
{file = "whylogs_sketching-3.4.1.dev3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:cc81b547e331d96f6f4227280b9b5968ca4bd48dd7cb0c8b68c022037800009f"},
|
||||
{file = "whylogs_sketching-3.4.1.dev3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3abf13da4347393a302843c2f06ce4e5fc56fd9c8564f64da13ceafb81eef90b"},
|
||||
{file = "whylogs_sketching-3.4.1.dev3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d1d6e7d0ddb66ab725d7af63518ef6a24cd45b075b81e1d2081709df4c989853"},
|
||||
{file = "whylogs_sketching-3.4.1.dev3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0b05112e3f70cfccddd2f72e464fa113307d97188891433133d4219b9f8f5456"},
|
||||
{file = "whylogs_sketching-3.4.1.dev3-cp39-cp39-win_amd64.whl", hash = "sha256:23759a00dd0e7019fbac06d9e9ab005ad6c14f80ec7935ccebccb7127296bc06"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "widgetsnbextension"
|
||||
version = "4.0.7"
|
||||
@@ -10280,6 +10564,114 @@ files = [
|
||||
{file = "xmltodict-0.13.0.tar.gz", hash = "sha256:341595a488e3e01a85a9d8911d8912fd922ede5fecc4dce437eb4b6c8d037e56"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "xxhash"
|
||||
version = "3.2.0"
|
||||
description = "Python binding for xxHash"
|
||||
category = "main"
|
||||
optional = true
|
||||
python-versions = ">=3.6"
|
||||
files = [
|
||||
{file = "xxhash-3.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:af44b9e59c4b2926a4e3c7f9d29949ff42fcea28637ff6b8182e654461932be8"},
|
||||
{file = "xxhash-3.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1bdd57973e2b802ef32553d7bebf9402dac1557874dbe5c908b499ea917662cd"},
|
||||
{file = "xxhash-3.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b7c9aa77bbce61a5e681bd39cb6a804338474dcc90abe3c543592aa5d6c9a9b"},
|
||||
{file = "xxhash-3.2.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:11bf87dc7bb8c3b0b5e24b7b941a9a19d8c1f88120b6a03a17264086bc8bb023"},
|
||||
{file = "xxhash-3.2.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2783d41487ce6d379fdfaa7332fca5187bf7010b9bddcf20cafba923bc1dc665"},
|
||||
{file = "xxhash-3.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:561076ca0dcef2fbc20b2bc2765bff099e002e96041ae9dbe910a863ca6ee3ea"},
|
||||
{file = "xxhash-3.2.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3a26eeb4625a6e61cedc8c1b39b89327c9c7e1a8c2c4d786fe3f178eb839ede6"},
|
||||
{file = "xxhash-3.2.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d93a44d0104d1b9b10de4e7aadf747f6efc1d7ec5ed0aa3f233a720725dd31bd"},
|
||||
{file = "xxhash-3.2.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:89585adc73395a10306d2e2036e50d6c4ac0cf8dd47edf914c25488871b64f6d"},
|
||||
{file = "xxhash-3.2.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:a892b4b139126a86bfdcb97cd912a2f8c4e8623869c3ef7b50871451dd7afeb0"},
|
||||
{file = "xxhash-3.2.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:e998efb190653f70e0f30d92b39fc645145369a4823bee46af8ddfc244aa969d"},
|
||||
{file = "xxhash-3.2.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:e8ed3bd2b8bb3277710843ca63e4f5c3ee6f8f80b083be5b19a7a9905420d11e"},
|
||||
{file = "xxhash-3.2.0-cp310-cp310-win32.whl", hash = "sha256:20181cbaed033c72cb881b2a1d13c629cd1228f113046133469c9a48cfcbcd36"},
|
||||
{file = "xxhash-3.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:a0f7a16138279d707db778a63264d1d6016ac13ffd3f1e99f54b2855d6c0d8e1"},
|
||||
{file = "xxhash-3.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5daff3fb5bfef30bc5a2cb143810d376d43461445aa17aece7210de52adbe151"},
|
||||
{file = "xxhash-3.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:75bb5be3c5de702a547715f320ecf5c8014aeca750ed5147ca75389bd22e7343"},
|
||||
{file = "xxhash-3.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:01f36b671ff55cb1d5c2f6058b799b697fd0ae4b4582bba6ed0999678068172a"},
|
||||
{file = "xxhash-3.2.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d4d4519123aac73c93159eb8f61db9682393862dd669e7eae034ecd0a35eadac"},
|
||||
{file = "xxhash-3.2.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:994e4741d5ed70fc2a335a91ef79343c6b1089d7dfe6e955dd06f8ffe82bede6"},
|
||||
{file = "xxhash-3.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:919bc1b010aa6ff0eb918838ff73a435aed9e9a19c3202b91acecd296bf75607"},
|
||||
{file = "xxhash-3.2.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:17b65454c5accbb079c45eca546c27c4782f5175aa320758fafac896b1549d27"},
|
||||
{file = "xxhash-3.2.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b0c094d5e65a46dbf3fe0928ff20873a747e6abfd2ed4b675beeb2750624bc2e"},
|
||||
{file = "xxhash-3.2.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:f94163ebe2d5546e6a5977e96d83621f4689c1054053428cf8d4c28b10f92f69"},
|
||||
{file = "xxhash-3.2.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:cead7c0307977a00b3f784cff676e72c147adbcada19a2e6fc2ddf54f37cf387"},
|
||||
{file = "xxhash-3.2.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:a0e1bd0260c1da35c1883321ce2707ceea07127816ab625e1226ec95177b561a"},
|
||||
{file = "xxhash-3.2.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:cc8878935671490efe9275fb4190a6062b73277bd273237179b9b5a2aa436153"},
|
||||
{file = "xxhash-3.2.0-cp311-cp311-win32.whl", hash = "sha256:a433f6162b18d52f7068175d00bd5b1563b7405f926a48d888a97b90a160c40d"},
|
||||
{file = "xxhash-3.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:a32d546a1752e4ee7805d6db57944f7224afa7428d22867006b6486e4195c1f3"},
|
||||
{file = "xxhash-3.2.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:82daaab720866bf690b20b49de5640b0c27e3b8eea2d08aa75bdca2b0f0cfb63"},
|
||||
{file = "xxhash-3.2.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3126df6520cbdbaddd87ce74794b2b6c45dd2cf6ac2b600a374b8cdb76a2548c"},
|
||||
{file = "xxhash-3.2.0-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e172c1ee40507ae3b8d220f4048aaca204f203e1e4197e8e652f5c814f61d1aa"},
|
||||
{file = "xxhash-3.2.0-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5384f1d9f30876f5d5b618464fb19ff7ce6c0fe4c690fbaafd1c52adc3aae807"},
|
||||
{file = "xxhash-3.2.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26cb52174a7e96a17acad27a3ca65b24713610ac479c99ac9640843822d3bebf"},
|
||||
{file = "xxhash-3.2.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fbcd613a5e76b1495fc24db9c37a6b7ee5f214fd85979187ec4e032abfc12ded"},
|
||||
{file = "xxhash-3.2.0-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:f988daf25f31726d5b9d0be6af636ca9000898f9ea43a57eac594daea25b0948"},
|
||||
{file = "xxhash-3.2.0-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:bbc30c98ab006ab9fc47e5ed439c00f706bc9d4441ff52693b8b6fea335163e0"},
|
||||
{file = "xxhash-3.2.0-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:2408d49260b0a4a7cc6ba445aebf38e073aeaf482f8e32767ca477e32ccbbf9e"},
|
||||
{file = "xxhash-3.2.0-cp36-cp36m-musllinux_1_1_s390x.whl", hash = "sha256:3f4152fd0bf8b03b79f2f900fd6087a66866537e94b5a11fd0fd99ef7efe5c42"},
|
||||
{file = "xxhash-3.2.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:0eea848758e4823a01abdbcccb021a03c1ee4100411cbeeb7a5c36a202a0c13c"},
|
||||
{file = "xxhash-3.2.0-cp36-cp36m-win32.whl", hash = "sha256:77709139af5123c578ab06cf999429cdb9ab211047acd0c787e098dcb3f1cb4d"},
|
||||
{file = "xxhash-3.2.0-cp36-cp36m-win_amd64.whl", hash = "sha256:91687671fd9d484a4e201ad266d366b695a45a1f2b41be93d116ba60f1b8f3b3"},
|
||||
{file = "xxhash-3.2.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e4af8bc5c3fcc2192c266421c6aa2daab1a18e002cb8e66ef672030e46ae25cf"},
|
||||
{file = "xxhash-3.2.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8be562e2ce3e481d9209b6f254c3d7c5ff920eb256aba2380d2fb5ba75d4f87"},
|
||||
{file = "xxhash-3.2.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9eba0c7c12126b12f7fcbea5513f28c950d28f33d2a227f74b50b77789e478e8"},
|
||||
{file = "xxhash-3.2.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2198c4901a0223c48f6ec0a978b60bca4f4f7229a11ca4dc96ca325dd6a29115"},
|
||||
{file = "xxhash-3.2.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:50ce82a71b22a3069c02e914bf842118a53065e2ec1c6fb54786e03608ab89cc"},
|
||||
{file = "xxhash-3.2.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b5019fb33711c30e54e4e57ae0ca70af9d35b589d385ac04acd6954452fa73bb"},
|
||||
{file = "xxhash-3.2.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:0d54ac023eef7e3ac9f0b8841ae8a376b933043bc2ad428121346c6fa61c491c"},
|
||||
{file = "xxhash-3.2.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c55fa832fc3fe64e0d29da5dc9b50ba66ca93312107cec2709300ea3d3bab5c7"},
|
||||
{file = "xxhash-3.2.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:f4ce006215497993ae77c612c1883ca4f3973899573ce0c52fee91f0d39c4561"},
|
||||
{file = "xxhash-3.2.0-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:1afb9b9d27fd675b436cb110c15979976d92d761ad6e66799b83756402f3a974"},
|
||||
{file = "xxhash-3.2.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:baa99cebf95c1885db21e119395f222a706a2bb75a545f0672880a442137725e"},
|
||||
{file = "xxhash-3.2.0-cp37-cp37m-win32.whl", hash = "sha256:75aa692936942ccb2e8fd6a386c81c61630ac1b6d6e921698122db8a930579c3"},
|
||||
{file = "xxhash-3.2.0-cp37-cp37m-win_amd64.whl", hash = "sha256:0a2cdfb5cae9fafb9f7b65fd52ecd60cf7d72c13bb2591ea59aaefa03d5a8827"},
|
||||
{file = "xxhash-3.2.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3a68d1e8a390b660d94b9360ae5baa8c21a101bd9c4790a8b30781bada9f1fc6"},
|
||||
{file = "xxhash-3.2.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ce7c3ce28f94302df95eaea7c9c1e2c974b6d15d78a0c82142a97939d7b6c082"},
|
||||
{file = "xxhash-3.2.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0dcb419bf7b0bc77d366e5005c25682249c5521a63fd36c51f584bd91bb13bd5"},
|
||||
{file = "xxhash-3.2.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae521ed9287f86aac979eeac43af762f03d9d9797b2272185fb9ddd810391216"},
|
||||
{file = "xxhash-3.2.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b0d16775094423088ffa357d09fbbb9ab48d2fb721d42c0856b801c86f616eec"},
|
||||
{file = "xxhash-3.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe454aeab348c42f56d6f7434ff758a3ef90787ac81b9ad5a363cd61b90a1b0b"},
|
||||
{file = "xxhash-3.2.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:052fd0efdd5525c2dbc61bebb423d92aa619c4905bba605afbf1e985a562a231"},
|
||||
{file = "xxhash-3.2.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:02badf3754e2133de254a4688798c4d80f0060635087abcb461415cb3eb82115"},
|
||||
{file = "xxhash-3.2.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:66b8a90b28c13c2aae7a71b32638ceb14cefc2a1c8cf23d8d50dfb64dfac7aaf"},
|
||||
{file = "xxhash-3.2.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:649cdf19df175925ad87289ead6f760cd840730ee85abc5eb43be326a0a24d97"},
|
||||
{file = "xxhash-3.2.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:4b948a03f89f5c72d69d40975af8af241111f0643228796558dc1cae8f5560b0"},
|
||||
{file = "xxhash-3.2.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49f51fab7b762da7c2cee0a3d575184d3b9be5e2f64f26cae2dd286258ac9b3c"},
|
||||
{file = "xxhash-3.2.0-cp38-cp38-win32.whl", hash = "sha256:1a42994f0d42b55514785356722d9031f064fd34e495b3a589e96db68ee0179d"},
|
||||
{file = "xxhash-3.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:0a6d58ba5865475e53d6c2c4fa6a62e2721e7875e146e2681e5337a6948f12e7"},
|
||||
{file = "xxhash-3.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:aabdbc082030f8df613e2d2ea1f974e7ad36a539bdfc40d36f34e55c7e4b8e94"},
|
||||
{file = "xxhash-3.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:498843b66b9ca416e9d03037e5875c8d0c0ab9037527e22df3b39aa5163214cd"},
|
||||
{file = "xxhash-3.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a910b1193cd90af17228f5d6069816646df0148f14f53eefa6b2b11a1dedfcd0"},
|
||||
{file = "xxhash-3.2.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bb6d8ce31dc25faf4da92991320e211fa7f42de010ef51937b1dc565a4926501"},
|
||||
{file = "xxhash-3.2.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:883dc3d3942620f4c7dbc3fd6162f50a67f050b714e47da77444e3bcea7d91cc"},
|
||||
{file = "xxhash-3.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:59dc8bfacf89b8f5be54d55bc3b4bd6d74d0c5320c8a63d2538ac7df5b96f1d5"},
|
||||
{file = "xxhash-3.2.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:61e6aa1d30c2af692aa88c4dd48709426e8b37bff6a574ee2de677579c34a3d6"},
|
||||
{file = "xxhash-3.2.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:314ec0bd21f0ee8d30f2bd82ed3759314bd317ddbbd8555668f3d20ab7a8899a"},
|
||||
{file = "xxhash-3.2.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:dad638cde3a5357ad3163b80b3127df61fb5b5e34e9e05a87697144400ba03c7"},
|
||||
{file = "xxhash-3.2.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:eaa3ea15025b56076d806b248948612289b093e8dcda8d013776b3848dffff15"},
|
||||
{file = "xxhash-3.2.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:7deae3a312feb5c17c97cbf18129f83cbd3f1f9ec25b0f50e2bd9697befb22e7"},
|
||||
{file = "xxhash-3.2.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:add774341c09853b1612c64a526032d95ab1683053325403e1afbe3ad2f374c5"},
|
||||
{file = "xxhash-3.2.0-cp39-cp39-win32.whl", hash = "sha256:9b94749130ef3119375c599bfce82142c2500ef9ed3280089157ee37662a7137"},
|
||||
{file = "xxhash-3.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:e57d94a1552af67f67b27db5dba0b03783ea69d5ca2af2f40e098f0ba3ce3f5f"},
|
||||
{file = "xxhash-3.2.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:92fd765591c83e5c5f409b33eac1d3266c03d3d11c71a7dbade36d5cdee4fbc0"},
|
||||
{file = "xxhash-3.2.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8970f6a411a9839a02b23b7e90bbbba4a6de52ace009274998566dc43f36ca18"},
|
||||
{file = "xxhash-3.2.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c5f3e33fe6cbab481727f9aeb136a213aed7e33cd1ca27bd75e916ffacc18411"},
|
||||
{file = "xxhash-3.2.0-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:368265392cb696dd53907e2328b5a8c1bee81cf2142d0cc743caf1c1047abb36"},
|
||||
{file = "xxhash-3.2.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:3b1f3c6d67fa9f49c4ff6b25ce0e7143bab88a5bc0f4116dd290c92337d0ecc7"},
|
||||
{file = "xxhash-3.2.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:c5e8db6e1ee7267b7c412ad0afd5863bf7a95286b8333a5958c8097c69f94cf5"},
|
||||
{file = "xxhash-3.2.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:761df3c7e2c5270088b691c5a8121004f84318177da1ca1db64222ec83c44871"},
|
||||
{file = "xxhash-3.2.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2d15a707e7f689531eb4134eccb0f8bf3844bb8255ad50823aa39708d9e6755"},
|
||||
{file = "xxhash-3.2.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e6b2ba4ff53dd5f57d728095e3def7375eb19c90621ce3b41b256de84ec61cfd"},
|
||||
{file = "xxhash-3.2.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:61b0bcf946fdfd8ab5f09179dc2b5c74d1ef47cedfc6ed0ec01fdf0ee8682dd3"},
|
||||
{file = "xxhash-3.2.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:f7b79f0f302396d8e0d444826ceb3d07b61977793886ebae04e82796c02e42dc"},
|
||||
{file = "xxhash-3.2.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e0773cd5c438ffcd5dbff91cdd503574f88a4b960e70cedeb67736583a17a918"},
|
||||
{file = "xxhash-3.2.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4ec1f57127879b419a2c8d2db9d9978eb26c61ae17e5972197830430ae78d25b"},
|
||||
{file = "xxhash-3.2.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3d4b15c00e807b1d3d0b612338c814739dec310b80fb069bd732b98ddc709ad7"},
|
||||
{file = "xxhash-3.2.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:9d3f686e3d1c8900c5459eee02b60c7399e20ec5c6402364068a343c83a61d90"},
|
||||
{file = "xxhash-3.2.0.tar.gz", hash = "sha256:1afd47af8955c5db730f630ad53ae798cf7fae0acb64cebb3cf94d35c47dd088"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "yarl"
|
||||
version = "1.9.2"
|
||||
@@ -10460,8 +10852,8 @@ cffi = {version = ">=1.11", markers = "platform_python_implementation == \"PyPy\
|
||||
cffi = ["cffi (>=1.11)"]
|
||||
|
||||
[extras]
|
||||
all = ["O365", "aleph-alpha-client", "anthropic", "arxiv", "atlassian-python-api", "azure-cosmos", "azure-identity", "beautifulsoup4", "clickhouse-connect", "cohere", "deeplake", "docarray", "duckduckgo-search", "elasticsearch", "faiss-cpu", "google-api-python-client", "google-search-results", "gptcache", "html2text", "huggingface_hub", "jina", "jinja2", "jq", "lancedb", "lark", "lxml", "manifest-ml", "neo4j", "networkx", "nlpcloud", "nltk", "nomic", "openai", "openlm", "opensearch-py", "pdfminer-six", "pexpect", "pgvector", "pinecone-client", "pinecone-text", "psycopg2-binary", "pyowm", "pypdf", "pytesseract", "pyvespa", "qdrant-client", "redis", "requests-toolbelt", "sentence-transformers", "spacy", "steamship", "tensorflow-text", "tiktoken", "torch", "transformers", "weaviate-client", "wikipedia", "wolframalpha"]
|
||||
azure = ["azure-core", "azure-cosmos", "azure-identity", "openai"]
|
||||
all = ["O365", "aleph-alpha-client", "anthropic", "arxiv", "atlassian-python-api", "azure-ai-formrecognizer", "azure-ai-vision", "azure-cognitiveservices-speech", "azure-cosmos", "azure-identity", "beautifulsoup4", "clickhouse-connect", "cohere", "deeplake", "docarray", "duckduckgo-search", "elasticsearch", "faiss-cpu", "google-api-python-client", "google-search-results", "gptcache", "html2text", "huggingface_hub", "jina", "jinja2", "jq", "lancedb", "langkit", "lark", "lxml", "manifest-ml", "neo4j", "networkx", "nlpcloud", "nltk", "nomic", "openai", "openlm", "opensearch-py", "pdfminer-six", "pexpect", "pgvector", "pinecone-client", "pinecone-text", "psycopg2-binary", "pyowm", "pypdf", "pytesseract", "pyvespa", "qdrant-client", "redis", "requests-toolbelt", "sentence-transformers", "spacy", "steamship", "tensorflow-text", "tiktoken", "torch", "transformers", "weaviate-client", "wikipedia", "wolframalpha"]
|
||||
azure = ["azure-ai-formrecognizer", "azure-ai-vision", "azure-cognitiveservices-speech", "azure-core", "azure-cosmos", "azure-identity", "openai"]
|
||||
cohere = ["cohere"]
|
||||
docarray = ["docarray"]
|
||||
embeddings = ["sentence-transformers"]
|
||||
@@ -10474,4 +10866,4 @@ text-helpers = ["chardet"]
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = ">=3.8.1,<4.0"
|
||||
content-hash = "cba33c7d2dc43649ad0ededc7b29f0bfeb9cbba1b2bbbc439b06cb608e678b9c"
|
||||
content-hash = "196588e10bb33939f5bae294a194ad01e803f40ed1087fe6a7a4b87e8d80712b"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "langchain"
|
||||
version = "0.0.177"
|
||||
version = "0.0.178"
|
||||
description = "Building applications with LLMs through composability"
|
||||
authors = []
|
||||
license = "MIT"
|
||||
@@ -89,9 +89,13 @@ telethon = {version = "^1.28.5", optional = true}
|
||||
neo4j = {version = "^5.8.1", optional = true}
|
||||
psychicapi = {version = "^0.2", optional = true}
|
||||
zep-python = {version="^0.25", optional=true}
|
||||
langkit = {version = ">=0.0.1.dev3, <0.1.0", optional = true}
|
||||
chardet = {version="^5.1.0", optional=true}
|
||||
requests-toolbelt = {version = "^1.0.0", optional = true}
|
||||
openlm = {version = "^0.0.5", optional = true}
|
||||
azure-ai-formrecognizer = {version = "^3.2.1", optional = true}
|
||||
azure-ai-vision = {version = "^0.11.1b1", optional = true}
|
||||
azure-cognitiveservices-speech = {version = "^1.28.0", optional = true}
|
||||
|
||||
[tool.poetry.group.docs.dependencies]
|
||||
autodoc_pydantic = "^1.8.0"
|
||||
@@ -183,7 +187,7 @@ text_helpers = ["chardet"]
|
||||
cohere = ["cohere"]
|
||||
docarray = ["docarray"]
|
||||
embeddings = ["sentence-transformers"]
|
||||
azure = ["azure-identity", "azure-cosmos", "openai", "azure-core"]
|
||||
azure = ["azure-identity", "azure-cosmos", "openai", "azure-core", "azure-ai-formrecognizer", "azure-ai-vision", "azure-cognitiveservices-speech"]
|
||||
all = [
|
||||
"anthropic",
|
||||
"cohere",
|
||||
@@ -231,6 +235,7 @@ all = [
|
||||
"clickhouse-connect",
|
||||
"azure-cosmos",
|
||||
"lancedb",
|
||||
"langkit",
|
||||
"lark",
|
||||
"pexpect",
|
||||
"pyvespa",
|
||||
@@ -242,7 +247,10 @@ all = [
|
||||
"lxml",
|
||||
"requests-toolbelt",
|
||||
"neo4j",
|
||||
"openlm"
|
||||
"openlm",
|
||||
"azure-ai-formrecognizer",
|
||||
"azure-ai-vision",
|
||||
"azure-cognitiveservices-speech",
|
||||
]
|
||||
|
||||
# An extra used to be able to add extended testing.
|
||||
|
||||
0
tests/integration_tests/client/__init__.py
Normal file
0
tests/integration_tests/client/__init__.py
Normal file
78
tests/integration_tests/client/test_client.py
Normal file
78
tests/integration_tests/client/test_client.py
Normal file
@@ -0,0 +1,78 @@
|
||||
"""LangChain+ langchain_client Integration Tests."""
|
||||
import os
|
||||
from uuid import uuid4
|
||||
|
||||
import pytest
|
||||
|
||||
from langchain.agents import AgentType, initialize_agent, load_tools
|
||||
from langchain.callbacks.manager import tracing_v2_enabled
|
||||
from langchain.chat_models import ChatOpenAI
|
||||
from langchain.client import LangChainPlusClient
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def langchain_client(monkeypatch: pytest.MonkeyPatch) -> LangChainPlusClient:
|
||||
monkeypatch.setenv("LANGCHAIN_ENDPOINT", "http://localhost:8000")
|
||||
return LangChainPlusClient()
|
||||
|
||||
|
||||
def test_feedback_cycle(
|
||||
monkeypatch: pytest.MonkeyPatch, langchain_client: LangChainPlusClient
|
||||
) -> None:
|
||||
"""Test that feedback is correctly created and updated."""
|
||||
monkeypatch.setenv("LANGCHAIN_TRACING_V2", "true")
|
||||
monkeypatch.setenv("LANGCHAIN_SESSION", f"Feedback Testing {uuid4()}")
|
||||
llm = ChatOpenAI(temperature=0)
|
||||
tools = load_tools(["serpapi", "llm-math"], llm=llm)
|
||||
agent = initialize_agent(
|
||||
tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=False
|
||||
)
|
||||
|
||||
agent.run(
|
||||
"What is the population of Kuala Lumpur as of January, 2023?"
|
||||
" What is it's square root?"
|
||||
)
|
||||
other_session_name = f"Feedback Testing {uuid4()}"
|
||||
with tracing_v2_enabled(session_name=other_session_name):
|
||||
try:
|
||||
agent.run("What is the square root of 3?")
|
||||
except Exception as e:
|
||||
print(e)
|
||||
runs = list(
|
||||
langchain_client.list_runs(
|
||||
session_name=os.environ["LANGCHAIN_SESSION"], error=False, execution_order=1
|
||||
)
|
||||
)
|
||||
assert len(runs) == 1
|
||||
order_2 = list(
|
||||
langchain_client.list_runs(
|
||||
session_name=os.environ["LANGCHAIN_SESSION"], execution_order=2
|
||||
)
|
||||
)
|
||||
assert len(order_2) > 0
|
||||
langchain_client.create_feedback(str(order_2[0].id), "test score", metric_value=0)
|
||||
feedback = langchain_client.create_feedback(
|
||||
str(runs[0].id), "test score", metric_value=1
|
||||
)
|
||||
feedbacks = list(langchain_client.list_feedback(run_ids=[str(runs[0].id)]))
|
||||
assert len(feedbacks) == 1
|
||||
assert feedbacks[0].id == feedback.id
|
||||
|
||||
# Add feedback to other session
|
||||
other_runs = list(
|
||||
langchain_client.list_runs(session_name=other_session_name, execution_order=1)
|
||||
)
|
||||
assert len(other_runs) == 1
|
||||
langchain_client.create_feedback(
|
||||
run_id=str(other_runs[0].id), metric_name="test score", metric_value=0
|
||||
)
|
||||
all_runs = list(
|
||||
langchain_client.list_runs(session_name=os.environ["LANGCHAIN_SESSION"])
|
||||
) + list(langchain_client.list_runs(session_name=other_session_name))
|
||||
test_run_ids = [str(run.id) for run in all_runs]
|
||||
all_feedback = list(langchain_client.list_feedback(run_ids=test_run_ids))
|
||||
assert len(all_feedback) == 3
|
||||
for feedback in all_feedback:
|
||||
langchain_client.delete_feedback(str(feedback.id))
|
||||
feedbacks = list(langchain_client.list_feedback(run_ids=test_run_ids))
|
||||
assert len(feedbacks) == 0
|
||||
30
tests/integration_tests/embeddings/test_elasticsearch.py
Normal file
30
tests/integration_tests/embeddings/test_elasticsearch.py
Normal file
@@ -0,0 +1,30 @@
|
||||
"""Test elasticsearch_embeddings embeddings."""
|
||||
|
||||
import pytest
|
||||
|
||||
from langchain.embeddings.elasticsearch import ElasticsearchEmbeddings
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def model_id() -> str:
|
||||
# Replace with your actual model_id
|
||||
return "your_model_id"
|
||||
|
||||
|
||||
def test_elasticsearch_embedding_documents(model_id: str) -> None:
|
||||
"""Test Elasticsearch embedding documents."""
|
||||
documents = ["foo bar", "bar foo", "foo"]
|
||||
embedding = ElasticsearchEmbeddings.from_credentials(model_id)
|
||||
output = embedding.embed_documents(documents)
|
||||
assert len(output) == 3
|
||||
assert len(output[0]) == 768 # Change 768 to the expected embedding size
|
||||
assert len(output[1]) == 768 # Change 768 to the expected embedding size
|
||||
assert len(output[2]) == 768 # Change 768 to the expected embedding size
|
||||
|
||||
|
||||
def test_elasticsearch_embedding_query(model_id: str) -> None:
|
||||
"""Test Elasticsearch embedding query."""
|
||||
document = "foo bar"
|
||||
embedding = ElasticsearchEmbeddings.from_credentials(model_id)
|
||||
output = embedding.embed_query(document)
|
||||
assert len(output) == 768 # Change 768 to the expected embedding size
|
||||
58
tests/integration_tests/embeddings/test_mosaicml.py
Normal file
58
tests/integration_tests/embeddings/test_mosaicml.py
Normal file
@@ -0,0 +1,58 @@
|
||||
"""Test mosaicml embeddings."""
|
||||
from langchain.embeddings.mosaicml import MosaicMLInstructorEmbeddings
|
||||
|
||||
|
||||
def test_mosaicml_embedding_documents() -> None:
|
||||
"""Test MosaicML embeddings."""
|
||||
documents = ["foo bar"]
|
||||
embedding = MosaicMLInstructorEmbeddings()
|
||||
output = embedding.embed_documents(documents)
|
||||
assert len(output) == 1
|
||||
assert len(output[0]) == 768
|
||||
|
||||
|
||||
def test_mosaicml_embedding_documents_multiple() -> None:
|
||||
"""Test MosaicML embeddings with multiple documents."""
|
||||
documents = ["foo bar", "bar foo", "foo"]
|
||||
embedding = MosaicMLInstructorEmbeddings()
|
||||
output = embedding.embed_documents(documents)
|
||||
assert len(output) == 3
|
||||
assert len(output[0]) == 768
|
||||
assert len(output[1]) == 768
|
||||
assert len(output[2]) == 768
|
||||
|
||||
|
||||
def test_mosaicml_embedding_query() -> None:
|
||||
"""Test MosaicML embeddings of queries."""
|
||||
document = "foo bar"
|
||||
embedding = MosaicMLInstructorEmbeddings()
|
||||
output = embedding.embed_query(document)
|
||||
assert len(output) == 768
|
||||
|
||||
|
||||
def test_mosaicml_embedding_endpoint() -> None:
|
||||
"""Test MosaicML embeddings with a different endpoint"""
|
||||
documents = ["foo bar"]
|
||||
embedding = MosaicMLInstructorEmbeddings(
|
||||
endpoint_url="https://models.hosted-on.mosaicml.hosting/instructor-xl/v1/predict"
|
||||
)
|
||||
output = embedding.embed_documents(documents)
|
||||
assert len(output) == 1
|
||||
assert len(output[0]) == 768
|
||||
|
||||
|
||||
def test_mosaicml_embedding_query_instruction() -> None:
|
||||
"""Test MosaicML embeddings with a different query instruction."""
|
||||
document = "foo bar"
|
||||
embedding = MosaicMLInstructorEmbeddings(query_instruction="Embed this query:")
|
||||
output = embedding.embed_query(document)
|
||||
assert len(output) == 768
|
||||
|
||||
|
||||
def test_mosaicml_embedding_document_instruction() -> None:
|
||||
"""Test MosaicML embeddings with a different query instruction."""
|
||||
documents = ["foo bar"]
|
||||
embedding = MosaicMLInstructorEmbeddings(embed_instruction="Embed this document:")
|
||||
output = embedding.embed_documents(documents)
|
||||
assert len(output) == 1
|
||||
assert len(output[0]) == 768
|
||||
78
tests/integration_tests/llms/test_mosaicml.py
Normal file
78
tests/integration_tests/llms/test_mosaicml.py
Normal file
@@ -0,0 +1,78 @@
|
||||
"""Test MosaicML API wrapper."""
|
||||
import pytest
|
||||
|
||||
from langchain.llms.mosaicml import PROMPT_FOR_GENERATION_FORMAT, MosaicML
|
||||
|
||||
|
||||
def test_mosaicml_llm_call() -> None:
|
||||
"""Test valid call to MosaicML."""
|
||||
llm = MosaicML(model_kwargs={})
|
||||
output = llm("Say foo:")
|
||||
assert isinstance(output, str)
|
||||
|
||||
|
||||
def test_mosaicml_endpoint_change() -> None:
|
||||
"""Test valid call to MosaicML."""
|
||||
new_url = "https://models.hosted-on.mosaicml.hosting/dolly-12b/v1/predict"
|
||||
llm = MosaicML(endpoint_url=new_url)
|
||||
assert llm.endpoint_url == new_url
|
||||
output = llm("Say foo:")
|
||||
assert isinstance(output, str)
|
||||
|
||||
|
||||
def test_mosaicml_extra_kwargs() -> None:
|
||||
llm = MosaicML(model_kwargs={"max_new_tokens": 1})
|
||||
assert llm.model_kwargs == {"max_new_tokens": 1}
|
||||
|
||||
output = llm("Say foo:")
|
||||
|
||||
assert isinstance(output, str)
|
||||
|
||||
# should only generate one new token (which might be a new line or whitespace token)
|
||||
assert len(output.split()) <= 1
|
||||
|
||||
|
||||
def test_instruct_prompt() -> None:
|
||||
"""Test instruct prompt."""
|
||||
llm = MosaicML(inject_instruction_format=True, model_kwargs={"do_sample": False})
|
||||
instruction = "Repeat the word foo"
|
||||
prompt = llm._transform_prompt(instruction)
|
||||
expected_prompt = PROMPT_FOR_GENERATION_FORMAT.format(instruction=instruction)
|
||||
assert prompt == expected_prompt
|
||||
output = llm(prompt)
|
||||
assert isinstance(output, str)
|
||||
|
||||
|
||||
def test_retry_logic() -> None:
|
||||
"""Tests that two queries (which would usually exceed the rate limit) works"""
|
||||
llm = MosaicML(inject_instruction_format=True, model_kwargs={"do_sample": False})
|
||||
instruction = "Repeat the word foo"
|
||||
prompt = llm._transform_prompt(instruction)
|
||||
expected_prompt = PROMPT_FOR_GENERATION_FORMAT.format(instruction=instruction)
|
||||
assert prompt == expected_prompt
|
||||
output = llm(prompt)
|
||||
assert isinstance(output, str)
|
||||
output = llm(prompt)
|
||||
assert isinstance(output, str)
|
||||
|
||||
|
||||
def test_short_retry_does_not_loop() -> None:
|
||||
"""Tests that two queries with a short retry sleep does not infinite loop"""
|
||||
llm = MosaicML(
|
||||
inject_instruction_format=True,
|
||||
model_kwargs={"do_sample": False},
|
||||
retry_sleep=0.1,
|
||||
)
|
||||
instruction = "Repeat the word foo"
|
||||
prompt = llm._transform_prompt(instruction)
|
||||
expected_prompt = PROMPT_FOR_GENERATION_FORMAT.format(instruction=instruction)
|
||||
assert prompt == expected_prompt
|
||||
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="Error raised by inference API: Rate limit exceeded: 1 per 1 second",
|
||||
):
|
||||
output = llm(prompt)
|
||||
assert isinstance(output, str)
|
||||
output = llm(prompt)
|
||||
assert isinstance(output, str)
|
||||
@@ -81,6 +81,28 @@ class TestWeaviate:
|
||||
)
|
||||
assert output == [Document(page_content="foo", metadata={"page": 0})]
|
||||
|
||||
@pytest.mark.vcr(ignore_localhost=True)
|
||||
def test_similarity_search_with_metadata_and_additional(
|
||||
self, weaviate_url: str, embedding_openai: OpenAIEmbeddings
|
||||
) -> None:
|
||||
"""Test end to end construction and search with metadata and additional."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
docsearch = Weaviate.from_texts(
|
||||
texts, embedding_openai, metadatas=metadatas, weaviate_url=weaviate_url
|
||||
)
|
||||
output = docsearch.similarity_search(
|
||||
"foo",
|
||||
k=1,
|
||||
additional=["certainty"],
|
||||
)
|
||||
assert output == [
|
||||
Document(
|
||||
page_content="foo",
|
||||
metadata={"page": 0, "_additional": {"certainty": 1}},
|
||||
)
|
||||
]
|
||||
|
||||
@pytest.mark.vcr(ignore_localhost=True)
|
||||
def test_similarity_search_with_uuids(
|
||||
self, weaviate_url: str, embedding_openai: OpenAIEmbeddings
|
||||
|
||||
@@ -146,3 +146,25 @@ Bye!\n\n-H."""
|
||||
"Bye!\n\n-H.",
|
||||
]
|
||||
assert output == expected_output
|
||||
|
||||
|
||||
def test_split_documents() -> None:
|
||||
"""Test split_documents."""
|
||||
splitter = CharacterTextSplitter(separator="", chunk_size=1, chunk_overlap=0)
|
||||
docs = [
|
||||
Document(page_content="foo", metadata={"source": "1"}),
|
||||
Document(page_content="bar", metadata={"source": "2"}),
|
||||
Document(page_content="baz", metadata={"source": "1"}),
|
||||
]
|
||||
expected_output = [
|
||||
Document(page_content="f", metadata={"source": "1"}),
|
||||
Document(page_content="o", metadata={"source": "1"}),
|
||||
Document(page_content="o", metadata={"source": "1"}),
|
||||
Document(page_content="b", metadata={"source": "2"}),
|
||||
Document(page_content="a", metadata={"source": "2"}),
|
||||
Document(page_content="r", metadata={"source": "2"}),
|
||||
Document(page_content="b", metadata={"source": "1"}),
|
||||
Document(page_content="a", metadata={"source": "1"}),
|
||||
Document(page_content="z", metadata={"source": "1"}),
|
||||
]
|
||||
assert splitter.split_documents(docs) == expected_output
|
||||
|
||||
@@ -4,6 +4,10 @@ from langchain.tools import __all__ as public_api
|
||||
_EXPECTED = [
|
||||
"AIPluginTool",
|
||||
"APIOperation",
|
||||
"AzureCogsFormRecognizerTool",
|
||||
"AzureCogsImageAnalysisTool",
|
||||
"AzureCogsSpeech2TextTool",
|
||||
"AzureCogsText2SpeechTool",
|
||||
"BaseTool",
|
||||
"BaseTool",
|
||||
"BaseTool",
|
||||
|
||||
Reference in New Issue
Block a user