From 388f248391a977007c3ad4a13e99db9222b9dacc Mon Sep 17 00:00:00 2001 From: Bagatur <22008038+baskaryan@users.noreply.github.com> Date: Mon, 6 Nov 2023 14:28:32 -0800 Subject: [PATCH] add oai v1 cookbook (#12961) --- cookbook/README.md | 1 + cookbook/openai_v1_cookbook.ipynb | 184 ++++++++++++++++++ .../llms/huggingface_pipelines.ipynb | 4 +- .../text_embedding/open_clip.ipynb | 26 ++- .../baiducloud_vector_search.ipynb | 11 +- .../integrations/vectorstores/dingo.ipynb | 5 +- 6 files changed, 215 insertions(+), 16 deletions(-) create mode 100644 cookbook/openai_v1_cookbook.ipynb diff --git a/cookbook/README.md b/cookbook/README.md index 63487ed65ef..5ea01df373f 100644 --- a/cookbook/README.md +++ b/cookbook/README.md @@ -38,6 +38,7 @@ Notebook | Description [multiagent_bidding.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/multiagent_bidding.ipynb) | Implement a multi-agent simulation where agents bid to speak, with the highest bidder speaking next, demonstrated through a fictitious presidential debate example. [myscale_vector_sql.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/myscale_vector_sql.ipynb) | Access and interact with the myscale integrated vector database, which can enhance the performance of language model (llm) applications. [openai_functions_retrieval_qa....](https://github.com/langchain-ai/langchain/tree/master/cookbook/openai_functions_retrieval_qa.ipynb) | Structure response output in a question-answering system by incorporating openai functions into a retrieval pipeline. +[openai_v1_cookbook.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/openai_v1_cookbook.ipynb) | Explore new functionality released alongside the V1 release of the OpenAI Python library. [petting_zoo.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/petting_zoo.ipynb) | Create multi-agent simulations with simulated environments using the petting zoo library. [plan_and_execute_agent.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/plan_and_execute_agent.ipynb) | Create plan-and-execute agents that accomplish objectives by planning tasks with a language model (llm) and executing them with a separate agent. [press_releases.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/press_releases.ipynb) | Retrieve and query company press release data powered by [Kay.ai](https://kay.ai). diff --git a/cookbook/openai_v1_cookbook.ipynb b/cookbook/openai_v1_cookbook.ipynb new file mode 100644 index 00000000000..56e67ab0f06 --- /dev/null +++ b/cookbook/openai_v1_cookbook.ipynb @@ -0,0 +1,184 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "f970f757-ec76-4bf0-90cd-a2fb68b945e3", + "metadata": {}, + "source": [ + "# Exploring OpenAI V1 functionality\n", + "\n", + "On 11.06.23 OpenAI released a number of new features, and along with it bumped their Python SDK to 1.0.0. This notebook shows off the new features and how to use them with LangChain." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ee897729-263a-4073-898f-bb4cf01ed829", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install \"openai>=1\"" + ] + }, + { + "cell_type": "markdown", + "id": "71c34763-d1e7-4b9a-a9d7-3e4cc0dfc2c4", + "metadata": {}, + "source": [ + "## [JSON mode](https://platform.openai.com/docs/guides/text-generation/json-mode)\n", + "\n", + "Constrain the model to only generate valid JSON. Note that you must include a system message with instructions to use JSON for this mode to work.\n", + "\n", + "Only works with certain models. " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "c3e067ce-7a43-47a7-bc89-41f1de4cf136", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.chat_models import ChatOpenAI\n", + "from langchain.schema.messages import HumanMessage, SystemMessage" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "db6072c4-f3f3-415d-872b-71ea9f3c02bb", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"companies\": [\n", + " {\n", + " \"name\": \"Google\",\n", + " \"origin\": \"USA\"\n", + " },\n", + " {\n", + " \"name\": \"Deepmind\",\n", + " \"origin\": \"UK\"\n", + " }\n", + " ]\n", + "}\n" + ] + } + ], + "source": [ + "chat = ChatOpenAI(model=\"gpt-3.5-turbo-1106\").bind(\n", + " response_format={\"type\": \"json_object\"}\n", + ")\n", + "\n", + "output = chat.invoke(\n", + " [\n", + " SystemMessage(\n", + " content=\"Extract the 'name' and 'origin' of any companies mentioned in the following statement. Return a JSON list.\"\n", + " ),\n", + " HumanMessage(\n", + " content=\"Google was founded in the USA, while Deepmind was founded in the UK\"\n", + " ),\n", + " ]\n", + ")\n", + "print(output.content)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "08e00ccf-b991-4249-846b-9500a0ccbfa0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'companies': [{'name': 'Google', 'origin': 'USA'},\n", + " {'name': 'Deepmind', 'origin': 'UK'}]}" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import json\n", + "\n", + "json.loads(output.content)" + ] + }, + { + "cell_type": "markdown", + "id": "aa9a94d9-4319-4ab7-a979-c475ce6b5f50", + "metadata": {}, + "source": [ + "## [System fingerprint](https://platform.openai.com/docs/guides/text-generation/reproducible-outputs)\n", + "\n", + "OpenAI sometimes changes model configurations in a way that impacts outputs. Whenever this happens, the system_fingerprint associated with a generation will change." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "1281883c-bf8f-4665-89cd-4f33ccde69ab", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'token_usage': {'completion_tokens': 43, 'prompt_tokens': 49, 'total_tokens': 92}, 'model_name': 'gpt-3.5-turbo-1106', 'system_fingerprint': 'fp_eeff13170a'}\n" + ] + } + ], + "source": [ + "chat = ChatOpenAI(model=\"gpt-3.5-turbo-1106\")\n", + "output = chat.generate(\n", + " [\n", + " [\n", + " SystemMessage(\n", + " content=\"Extract the 'name' and 'origin' of any companies mentioned in the following statement. Return a JSON list.\"\n", + " ),\n", + " HumanMessage(\n", + " content=\"Google was founded in the USA, while Deepmind was founded in the UK\"\n", + " ),\n", + " ]\n", + " ]\n", + ")\n", + "print(output.llm_output)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5c637ba1-322d-4fc9-b97e-3afa83dc4d72", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "poetry-venv", + "language": "python", + "name": "poetry-venv" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/docs/integrations/llms/huggingface_pipelines.ipynb b/docs/docs/integrations/llms/huggingface_pipelines.ipynb index 95423649e93..a5db50b1dcd 100644 --- a/docs/docs/integrations/llms/huggingface_pipelines.ipynb +++ b/docs/docs/integrations/llms/huggingface_pipelines.ipynb @@ -84,9 +84,7 @@ "model_id = \"gpt2\"\n", "tokenizer = AutoTokenizer.from_pretrained(model_id)\n", "model = AutoModelForCausalLM.from_pretrained(model_id)\n", - "pipe = pipeline(\n", - " \"text-generation\", model=model, tokenizer=tokenizer, max_new_tokens=10\n", - ")\n", + "pipe = pipeline(\"text-generation\", model=model, tokenizer=tokenizer, max_new_tokens=10)\n", "hf = HuggingFacePipeline(pipeline=pipe)" ] }, diff --git a/docs/docs/integrations/text_embedding/open_clip.ipynb b/docs/docs/integrations/text_embedding/open_clip.ipynb index e00315ddbcf..52617387f5e 100644 --- a/docs/docs/integrations/text_embedding/open_clip.ipynb +++ b/docs/docs/integrations/text_embedding/open_clip.ipynb @@ -39,8 +39,8 @@ "from langchain.embeddings import OpenCLIPEmbeddings\n", "\n", "# Images\n", - "img_path_dog='/Users/rlm/Desktop/Papers/LLaVA/dog.jpeg'\n", - "img_path_house='/Users/rlm/Desktop/Papers/LLaVA/house.jpeg'\n", + "img_path_dog = \"/Users/rlm/Desktop/Papers/LLaVA/dog.jpeg\"\n", + "img_path_house = \"/Users/rlm/Desktop/Papers/LLaVA/house.jpeg\"\n", "\n", "# Load images and convert to numpy arrays\n", "image_np_dog = np.array(_PILImage.open(img_path_dog).convert(\"RGB\"))\n", @@ -106,18 +106,26 @@ "text_feat_house_np = np.array(text_feat_house[0])\n", "\n", "# Compute similarity\n", - "similarities = np.array([\n", - " [text_feat_dog_np @ img_feat_dog_np.T][0][0], \n", - " [text_feat_dog_np @ img_feat_house_np.T][0][0],\n", - " [text_feat_house_np @ img_feat_dog_np.T][0][0], \n", - " [text_feat_house_np @ img_feat_house_np.T][0][0]\n", - "]).reshape(2, 2)\n", + "similarities = np.array(\n", + " [\n", + " [text_feat_dog_np @ img_feat_dog_np.T][0][0],\n", + " [text_feat_dog_np @ img_feat_house_np.T][0][0],\n", + " [text_feat_house_np @ img_feat_dog_np.T][0][0],\n", + " [text_feat_house_np @ img_feat_house_np.T][0][0],\n", + " ]\n", + ").reshape(2, 2)\n", "\n", "# Ensure similarities is of shape (2, 2)\n", "print(similarities.shape) # Expected: (2, 2)\n", "\n", "# Plot heatmap\n", - "sns.heatmap(similarities, annot=True, cmap='viridis', xticklabels=['dog image', 'house image'], yticklabels=['dog text', 'house text'])\n" + "sns.heatmap(\n", + " similarities,\n", + " annot=True,\n", + " cmap=\"viridis\",\n", + " xticklabels=[\"dog image\", \"house image\"],\n", + " yticklabels=[\"dog text\", \"house text\"],\n", + ")" ] } ], diff --git a/docs/docs/integrations/vectorstores/baiducloud_vector_search.ipynb b/docs/docs/integrations/vectorstores/baiducloud_vector_search.ipynb index d5a6d04963e..4b096d958d1 100644 --- a/docs/docs/integrations/vectorstores/baiducloud_vector_search.ipynb +++ b/docs/docs/integrations/vectorstores/baiducloud_vector_search.ipynb @@ -59,8 +59,8 @@ "import os\n", "import getpass\n", "\n", - "os.environ['QIANFAN_AK'] = getpass.getpass(\"Your Qianfan AK:\")\n", - "os.environ['QIANFAN_SK'] = getpass.getpass(\"Your Qianfan SK:\")" + "os.environ[\"QIANFAN_AK\"] = getpass.getpass(\"Your Qianfan AK:\")\n", + "os.environ[\"QIANFAN_SK\"] = getpass.getpass(\"Your Qianfan SK:\")" ] }, { @@ -85,6 +85,7 @@ "docs = text_splitter.split_documents(documents)\n", "\n", "from langchain.embeddings import QianfanEmbeddingsEndpoint\n", + "\n", "embeddings = QianfanEmbeddingsEndpoint()" ] }, @@ -104,8 +105,12 @@ "source": [ "# Create a bes instance and index docs.\n", "from langchain.vectorstores import BESVectorStore\n", + "\n", "bes = BESVectorStore.from_documents(\n", - " documents=docs, embedding=embeddings, bes_url=\"your bes cluster url\", index_name=\"your vector index\"\n", + " documents=docs,\n", + " embedding=embeddings,\n", + " bes_url=\"your bes cluster url\",\n", + " index_name=\"your vector index\",\n", ")\n", "bes.client.indices.refresh(index=\"your vector index\")" ] diff --git a/docs/docs/integrations/vectorstores/dingo.ipynb b/docs/docs/integrations/vectorstores/dingo.ipynb index 28635072c38..13d123b79e3 100644 --- a/docs/docs/integrations/vectorstores/dingo.ipynb +++ b/docs/docs/integrations/vectorstores/dingo.ipynb @@ -108,7 +108,10 @@ "\n", "dingo_client = DingoDB(user=\"\", password=\"\", host=[\"127.0.0.1:13000\"])\n", "# First, check if our index already exists. If it doesn't, we create it\n", - "if index_name not in dingo_client.get_index() and index_name.upper() not in dingo_client.get_index():\n", + "if (\n", + " index_name not in dingo_client.get_index()\n", + " and index_name.upper() not in dingo_client.get_index()\n", + "):\n", " # we create a new index, modify to your own\n", " dingo_client.create_index(\n", " index_name=index_name, dimension=1536, metric_type=\"cosine\", auto_id=False\n",