From 388f248391a977007c3ad4a13e99db9222b9dacc Mon Sep 17 00:00:00 2001
From: Bagatur <22008038+baskaryan@users.noreply.github.com>
Date: Mon, 6 Nov 2023 14:28:32 -0800
Subject: [PATCH] add oai v1 cookbook (#12961)

---
 cookbook/README.md                            |   1 +
 cookbook/openai_v1_cookbook.ipynb             | 184 ++++++++++++++++++
 .../llms/huggingface_pipelines.ipynb          |   4 +-
 .../text_embedding/open_clip.ipynb            |  26 ++-
 .../baiducloud_vector_search.ipynb            |  11 +-
 .../integrations/vectorstores/dingo.ipynb     |   5 +-
 6 files changed, 215 insertions(+), 16 deletions(-)
 create mode 100644 cookbook/openai_v1_cookbook.ipynb

diff --git a/cookbook/README.md b/cookbook/README.md
index 63487ed65ef..5ea01df373f 100644
--- a/cookbook/README.md
+++ b/cookbook/README.md
@@ -38,6 +38,7 @@ Notebook | Description
 [multiagent_bidding.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/multiagent_bidding.ipynb) | Implement a multi-agent simulation where agents bid to speak, with the highest bidder speaking next, demonstrated through a fictitious presidential debate example.
 [myscale_vector_sql.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/myscale_vector_sql.ipynb) | Access and interact with the myscale integrated vector database, which can enhance the performance of language model (llm) applications.
 [openai_functions_retrieval_qa....](https://github.com/langchain-ai/langchain/tree/master/cookbook/openai_functions_retrieval_qa.ipynb) | Structure response output in a question-answering system by incorporating openai functions into a retrieval pipeline.
+[openai_v1_cookbook.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/openai_v1_cookbook.ipynb) | Explore new functionality released alongside the V1 release of the OpenAI Python library.
 [petting_zoo.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/petting_zoo.ipynb) | Create multi-agent simulations with simulated environments using the petting zoo library.
 [plan_and_execute_agent.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/plan_and_execute_agent.ipynb) | Create plan-and-execute agents that accomplish objectives by planning tasks with a language model (llm) and executing them with a separate agent.
 [press_releases.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/press_releases.ipynb) | Retrieve and query company press release data powered by [Kay.ai](https://kay.ai).
diff --git a/cookbook/openai_v1_cookbook.ipynb b/cookbook/openai_v1_cookbook.ipynb
new file mode 100644
index 00000000000..56e67ab0f06
--- /dev/null
+++ b/cookbook/openai_v1_cookbook.ipynb
@@ -0,0 +1,184 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "f970f757-ec76-4bf0-90cd-a2fb68b945e3",
+   "metadata": {},
+   "source": [
+    "# Exploring OpenAI V1 functionality\n",
+    "\n",
+    "On 11.06.23 OpenAI released a number of new features, and along with it bumped their Python SDK to 1.0.0. This notebook shows off the new features and how to use them with LangChain."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ee897729-263a-4073-898f-bb4cf01ed829",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install \"openai>=1\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "71c34763-d1e7-4b9a-a9d7-3e4cc0dfc2c4",
+   "metadata": {},
+   "source": [
+    "## [JSON mode](https://platform.openai.com/docs/guides/text-generation/json-mode)\n",
+    "\n",
+    "Constrain the model to only generate valid JSON. Note that you must include a system message with instructions to use JSON for this mode to work.\n",
+    "\n",
+    "Only works with certain models. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "c3e067ce-7a43-47a7-bc89-41f1de4cf136",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.chat_models import ChatOpenAI\n",
+    "from langchain.schema.messages import HumanMessage, SystemMessage"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "db6072c4-f3f3-415d-872b-71ea9f3c02bb",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{\n",
+      "  \"companies\": [\n",
+      "    {\n",
+      "      \"name\": \"Google\",\n",
+      "      \"origin\": \"USA\"\n",
+      "    },\n",
+      "    {\n",
+      "      \"name\": \"Deepmind\",\n",
+      "      \"origin\": \"UK\"\n",
+      "    }\n",
+      "  ]\n",
+      "}\n"
+     ]
+    }
+   ],
+   "source": [
+    "chat = ChatOpenAI(model=\"gpt-3.5-turbo-1106\").bind(\n",
+    "    response_format={\"type\": \"json_object\"}\n",
+    ")\n",
+    "\n",
+    "output = chat.invoke(\n",
+    "    [\n",
+    "        SystemMessage(\n",
+    "            content=\"Extract the 'name' and 'origin' of any companies mentioned in the following statement. Return a JSON list.\"\n",
+    "        ),\n",
+    "        HumanMessage(\n",
+    "            content=\"Google was founded in the USA, while Deepmind was founded in the UK\"\n",
+    "        ),\n",
+    "    ]\n",
+    ")\n",
+    "print(output.content)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "08e00ccf-b991-4249-846b-9500a0ccbfa0",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'companies': [{'name': 'Google', 'origin': 'USA'},\n",
+       "  {'name': 'Deepmind', 'origin': 'UK'}]}"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import json\n",
+    "\n",
+    "json.loads(output.content)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "aa9a94d9-4319-4ab7-a979-c475ce6b5f50",
+   "metadata": {},
+   "source": [
+    "## [System fingerprint](https://platform.openai.com/docs/guides/text-generation/reproducible-outputs)\n",
+    "\n",
+    "OpenAI sometimes changes model configurations in a way that impacts outputs. Whenever this happens, the system_fingerprint associated with a generation will change."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "1281883c-bf8f-4665-89cd-4f33ccde69ab",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'token_usage': {'completion_tokens': 43, 'prompt_tokens': 49, 'total_tokens': 92}, 'model_name': 'gpt-3.5-turbo-1106', 'system_fingerprint': 'fp_eeff13170a'}\n"
+     ]
+    }
+   ],
+   "source": [
+    "chat = ChatOpenAI(model=\"gpt-3.5-turbo-1106\")\n",
+    "output = chat.generate(\n",
+    "    [\n",
+    "        [\n",
+    "            SystemMessage(\n",
+    "                content=\"Extract the 'name' and 'origin' of any companies mentioned in the following statement. Return a JSON list.\"\n",
+    "            ),\n",
+    "            HumanMessage(\n",
+    "                content=\"Google was founded in the USA, while Deepmind was founded in the UK\"\n",
+    "            ),\n",
+    "        ]\n",
+    "    ]\n",
+    ")\n",
+    "print(output.llm_output)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5c637ba1-322d-4fc9-b97e-3afa83dc4d72",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "poetry-venv",
+   "language": "python",
+   "name": "poetry-venv"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/docs/integrations/llms/huggingface_pipelines.ipynb b/docs/docs/integrations/llms/huggingface_pipelines.ipynb
index 95423649e93..a5db50b1dcd 100644
--- a/docs/docs/integrations/llms/huggingface_pipelines.ipynb
+++ b/docs/docs/integrations/llms/huggingface_pipelines.ipynb
@@ -84,9 +84,7 @@
     "model_id = \"gpt2\"\n",
     "tokenizer = AutoTokenizer.from_pretrained(model_id)\n",
     "model = AutoModelForCausalLM.from_pretrained(model_id)\n",
-    "pipe = pipeline(\n",
-    "    \"text-generation\", model=model, tokenizer=tokenizer, max_new_tokens=10\n",
-    ")\n",
+    "pipe = pipeline(\"text-generation\", model=model, tokenizer=tokenizer, max_new_tokens=10)\n",
     "hf = HuggingFacePipeline(pipeline=pipe)"
    ]
   },
diff --git a/docs/docs/integrations/text_embedding/open_clip.ipynb b/docs/docs/integrations/text_embedding/open_clip.ipynb
index e00315ddbcf..52617387f5e 100644
--- a/docs/docs/integrations/text_embedding/open_clip.ipynb
+++ b/docs/docs/integrations/text_embedding/open_clip.ipynb
@@ -39,8 +39,8 @@
     "from langchain.embeddings import OpenCLIPEmbeddings\n",
     "\n",
     "# Images\n",
-    "img_path_dog='/Users/rlm/Desktop/Papers/LLaVA/dog.jpeg'\n",
-    "img_path_house='/Users/rlm/Desktop/Papers/LLaVA/house.jpeg'\n",
+    "img_path_dog = \"/Users/rlm/Desktop/Papers/LLaVA/dog.jpeg\"\n",
+    "img_path_house = \"/Users/rlm/Desktop/Papers/LLaVA/house.jpeg\"\n",
     "\n",
     "# Load images and convert to numpy arrays\n",
     "image_np_dog = np.array(_PILImage.open(img_path_dog).convert(\"RGB\"))\n",
@@ -106,18 +106,26 @@
     "text_feat_house_np = np.array(text_feat_house[0])\n",
     "\n",
     "# Compute similarity\n",
-    "similarities = np.array([\n",
-    "    [text_feat_dog_np @ img_feat_dog_np.T][0][0], \n",
-    "    [text_feat_dog_np @ img_feat_house_np.T][0][0],\n",
-    "    [text_feat_house_np @ img_feat_dog_np.T][0][0], \n",
-    "    [text_feat_house_np @ img_feat_house_np.T][0][0]\n",
-    "]).reshape(2, 2)\n",
+    "similarities = np.array(\n",
+    "    [\n",
+    "        [text_feat_dog_np @ img_feat_dog_np.T][0][0],\n",
+    "        [text_feat_dog_np @ img_feat_house_np.T][0][0],\n",
+    "        [text_feat_house_np @ img_feat_dog_np.T][0][0],\n",
+    "        [text_feat_house_np @ img_feat_house_np.T][0][0],\n",
+    "    ]\n",
+    ").reshape(2, 2)\n",
     "\n",
     "# Ensure similarities is of shape (2, 2)\n",
     "print(similarities.shape)  # Expected: (2, 2)\n",
     "\n",
     "# Plot heatmap\n",
-    "sns.heatmap(similarities, annot=True, cmap='viridis', xticklabels=['dog image', 'house image'], yticklabels=['dog text', 'house text'])\n"
+    "sns.heatmap(\n",
+    "    similarities,\n",
+    "    annot=True,\n",
+    "    cmap=\"viridis\",\n",
+    "    xticklabels=[\"dog image\", \"house image\"],\n",
+    "    yticklabels=[\"dog text\", \"house text\"],\n",
+    ")"
    ]
   }
  ],
diff --git a/docs/docs/integrations/vectorstores/baiducloud_vector_search.ipynb b/docs/docs/integrations/vectorstores/baiducloud_vector_search.ipynb
index d5a6d04963e..4b096d958d1 100644
--- a/docs/docs/integrations/vectorstores/baiducloud_vector_search.ipynb
+++ b/docs/docs/integrations/vectorstores/baiducloud_vector_search.ipynb
@@ -59,8 +59,8 @@
     "import os\n",
     "import getpass\n",
     "\n",
-    "os.environ['QIANFAN_AK'] = getpass.getpass(\"Your Qianfan AK:\")\n",
-    "os.environ['QIANFAN_SK'] = getpass.getpass(\"Your Qianfan SK:\")"
+    "os.environ[\"QIANFAN_AK\"] = getpass.getpass(\"Your Qianfan AK:\")\n",
+    "os.environ[\"QIANFAN_SK\"] = getpass.getpass(\"Your Qianfan SK:\")"
    ]
   },
   {
@@ -85,6 +85,7 @@
     "docs = text_splitter.split_documents(documents)\n",
     "\n",
     "from langchain.embeddings import QianfanEmbeddingsEndpoint\n",
+    "\n",
     "embeddings = QianfanEmbeddingsEndpoint()"
    ]
   },
@@ -104,8 +105,12 @@
    "source": [
     "# Create a bes instance and index docs.\n",
     "from langchain.vectorstores import BESVectorStore\n",
+    "\n",
     "bes = BESVectorStore.from_documents(\n",
-    "    documents=docs, embedding=embeddings, bes_url=\"your bes cluster url\", index_name=\"your vector index\"\n",
+    "    documents=docs,\n",
+    "    embedding=embeddings,\n",
+    "    bes_url=\"your bes cluster url\",\n",
+    "    index_name=\"your vector index\",\n",
     ")\n",
     "bes.client.indices.refresh(index=\"your vector index\")"
    ]
diff --git a/docs/docs/integrations/vectorstores/dingo.ipynb b/docs/docs/integrations/vectorstores/dingo.ipynb
index 28635072c38..13d123b79e3 100644
--- a/docs/docs/integrations/vectorstores/dingo.ipynb
+++ b/docs/docs/integrations/vectorstores/dingo.ipynb
@@ -108,7 +108,10 @@
     "\n",
     "dingo_client = DingoDB(user=\"\", password=\"\", host=[\"127.0.0.1:13000\"])\n",
     "# First, check if our index already exists. If it doesn't, we create it\n",
-    "if index_name not in dingo_client.get_index() and index_name.upper() not in dingo_client.get_index():\n",
+    "if (\n",
+    "    index_name not in dingo_client.get_index()\n",
+    "    and index_name.upper() not in dingo_client.get_index()\n",
+    "):\n",
     "    # we create a new index, modify to your own\n",
     "    dingo_client.create_index(\n",
     "        index_name=index_name, dimension=1536, metric_type=\"cosine\", auto_id=False\n",