x

2026-02-18 21:11:06 +00:00 · 2023-11-08 13:46:27 -05:00
259 changed files with 2812 additions and 21512 deletions
--- a/.devcontainer/README.md
+++ b/.devcontainer/README.md
@@ -17,16 +17,13 @@ For more info, check out the [GitHub documentation](https://docs.github.com/en/f
 ## VS Code Dev Containers
 [![Open in Dev Containers](https://img.shields.io/static/v1?label=Dev%20Containers&message=Open&color=blue&logo=visualstudiocode)](https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/langchain-ai/langchain)

-Note: If you click the link above you will open the main repo (langchain-ai/langchain) and not your local cloned repo. This is fine if you only want to run and test the library, but if you want to contribute you can use the  link below and replace with your username and cloned repo name: 
-```
+Note: If you click this link you will open the main repo and not your local cloned repo, you can use this link and replace with your username and cloned repo name: 
 https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/<yourusername>/<yourclonedreponame>

-```
-Then you will have a local cloned repo where you can contribute and then create pull requests.

 If you already have VS Code and Docker installed, you can use the button above to get started. This will cause VS Code to automatically install the Dev Containers extension if needed, clone the source code into a container volume, and spin up a dev container for use.

-Alternatively you can also follow these steps to open this repo in a container using the VS Code Dev Containers extension:
+You can also follow these steps to open this repo in a container using the VS Code Dev Containers extension:

 1. If this is your first time using a development container, please ensure your system meets the pre-reqs (i.e. have Docker installed) in the [getting started steps](https://aka.ms/vscode-remote/containers/getting-started).

--- a/.gitignore
+++ b/.gitignore
@@ -178,4 +178,3 @@ docs/docs/build
 docs/docs/node_modules
 docs/docs/yarn.lock
 _dist
-docs/docs/templates
--- a/cookbook/Multi_modal_RAG.ipynb
+++ b/cookbook/Multi_modal_RAG.ipynb
@@ -12,8 +12,6 @@
   "source": [
    "## Multi-modal RAG\n",
    "\n",
-    "[See Trace of Option 3](https://smith.langchain.com/public/db0441a8-2c17-4070-bdf7-45d4fdf8f517/r/80cb0f89-1766-4caf-8959-fc43ec4b071c)\n",
-    "\n",
    "Many documents contain a mixture of content types, including text and images. \n",
    "\n",
    "Yet, information captured in images is lost in most RAG applications.\n",
@@ -108,7 +106,6 @@
   "outputs": [],
   "source": [
    "from unstructured.partition.pdf import partition_pdf\n",
-    "\n",
    "# Extract images, tables, and chunk text\n",
    "raw_pdf_elements = partition_pdf(\n",
    "    filename=path + \"wildfire_stats.pdf\",\n",
@@ -190,8 +187,8 @@
   "outputs": [],
   "source": [
    "# Apply to text\n",
-    "# Typically this is reccomended only if you have large text chunks\n",
-    "text_summaries = texts  # Skip it\n",
+    "# Typically this is reccomended only if you have large text chunks \n",
+    "text_summaries = texts # Skip it\n",
    "\n",
    "# Apply to tables\n",
    "table_summaries = summarize_chain.batch(tables, {\"max_concurrency\": 5})"
@@ -205,13 +202,7 @@
   "source": [
    "### Image summaries \n",
    "\n",
-    "We will use [GPT4-V](https://openai.com/research/gpt-4v-system-card) to produce the image summaries.\n",
-    "\n",
-    "See the traces for each of the 5 ingested images here ([1](https://smith.langchain.com/public/f5548212-2e70-4fa8-91d6-c3e7d768d52b/r), \n",
-    "[2](https://smith.langchain.com/public/8b198178-5b83-4960-bbc1-c10516779208/r), \n",
-    "[3](https://smith.langchain.com/public/c4fcbcd5-38fb-462a-9ed1-e90b1d009fa9/r), \n",
-    "[4](https://smith.langchain.com/public/1df53c23-63b8-4f87-b5ae-e9d59b2a54ab/r), \n",
-    "[5](https://smith.langchain.com/public/f93efd6c-f9f6-46c9-b169-29270d33ad63/r))"
+    "We will use [GPT4-V](https://openai.com/research/gpt-4v-system-card) to produce the image summaries."
   ]
  },
  {
@@ -229,25 +220,26 @@
    "from langchain.chat_models import ChatOpenAI\n",
    "from langchain.schema.messages import HumanMessage, SystemMessage\n",
    "\n",
-    "\n",
    "def encode_image(image_path):\n",
-    "    \"\"\"Getting the base64 string\"\"\"\n",
+    "    ''' Getting the base64 string '''\n",
    "    with open(image_path, \"rb\") as image_file:\n",
-    "        return base64.b64encode(image_file.read()).decode(\"utf-8\")\n",
-    "\n",
-    "\n",
-    "def image_summarize(img_base64, prompt):\n",
-    "    \"\"\"Image summary\"\"\"\n",
-    "    chat = ChatOpenAI(model=\"gpt-4-vision-preview\", max_tokens=1024)\n",
+    "        return base64.b64encode(image_file.read()).decode('utf-8')\n",
    "\n",
+    "def image_summarize(img_base64,prompt):\n",
+    "    ''' Image summary '''\n",
+    "    chat = ChatOpenAI(model=\"gpt-4-vision-preview\",\n",
+    "                      max_tokens=1024)\n",
+    "    \n",
    "    msg = chat.invoke(\n",
    "        [\n",
    "            HumanMessage(\n",
    "                content=[\n",
-    "                    {\"type\": \"text\", \"text\": prompt},\n",
+    "                    {\"type\": \"text\", \"text\":prompt},\n",
    "                    {\n",
    "                        \"type\": \"image_url\",\n",
-    "                        \"image_url\": {\"url\": f\"data:image/jpeg;base64,{img_base64}\"},\n",
+    "                        \"image_url\": {\n",
+    "                            \"url\": f\"data:image/jpeg;base64,{img_base64}\"\n",
+    "                        },\n",
    "                    },\n",
    "                ]\n",
    "            )\n",
@@ -255,7 +247,6 @@
    "    )\n",
    "    return msg.content\n",
    "\n",
-    "\n",
    "# Store base64 encoded images\n",
    "img_base64_list = []\n",
    "\n",
@@ -263,15 +254,15 @@
    "image_summaries = []\n",
    "\n",
    "# Prompt\n",
-    "prompt = \"Describe the image in detail. Be specific about graphs, such as bar plots.\"\n",
+    "prompt = \"Describe the image in detail. Be specific about graphs, such as bar plots.\" \n",
    "\n",
    "# Read images, encode to base64 strings\n",
    "for img_file in sorted(os.listdir(path)):\n",
-    "    if img_file.endswith(\".jpg\"):\n",
+    "    if img_file.endswith('.jpg'):\n",
    "        img_path = os.path.join(path, img_file)\n",
    "        base64_image = encode_image(img_path)\n",
    "        img_base64_list.append(base64_image)\n",
-    "        image_summaries.append(image_summarize(base64_image, prompt))"
+    "        image_summaries.append(image_summarize(base64_image,prompt))"
   ]
  },
  {
@@ -296,15 +287,14 @@
   "source": [
    "from IPython.display import display, HTML\n",
    "\n",
-    "\n",
    "def plt_img_base64(img_base64):\n",
+    "\n",
    "    # Create an HTML img tag with the base64 string as the source\n",
    "    image_html = f'<img src=\"data:image/jpeg;base64,{img_base64}\" />'\n",
-    "\n",
+    "    \n",
    "    # Display the image by rendering the HTML\n",
    "    display(HTML(image_html))\n",
    "\n",
-    "\n",
    "plt_img_base64(img_base64_list[1])"
   ]
  },
@@ -354,9 +344,8 @@
    "from langchain.retrievers.multi_vector import MultiVectorRetriever\n",
    "\n",
    "# The vectorstore to use to index the child chunks\n",
-    "vectorstore = Chroma(\n",
-    "    collection_name=\"multi_modal_rag\", embedding_function=OpenAIEmbeddings()\n",
-    ")\n",
+    "vectorstore = Chroma(collection_name=\"multi_modal_rag\", \n",
+    "                     embedding_function=OpenAIEmbeddings())\n",
    "\n",
    "# The storage layer for the parent documents\n",
    "store = InMemoryStore()\n",
@@ -481,10 +470,8 @@
   ],
   "source": [
    "from base64 import b64decode\n",
-    "\n",
-    "\n",
    "def split_image_text_types(docs):\n",
-    "    \"\"\"Split base64-encoded images and texts\"\"\"\n",
+    "    ''' Split base64-encoded images and texts '''\n",
    "    b64 = []\n",
    "    text = []\n",
    "    for doc in docs:\n",
@@ -493,9 +480,10 @@
    "            b64.append(doc)\n",
    "        except Exception as e:\n",
    "            text.append(doc)\n",
-    "    return {\"images\": b64, \"texts\": text}\n",
-    "\n",
-    "\n",
+    "    return {\n",
+    "        \"images\": b64,\n",
+    "        \"texts\": text\n",
+    "    }\n",
    "docs_by_type = split_image_text_types(docs)\n",
    "plt_img_base64(docs_by_type[\"images\"][0])"
   ]
@@ -526,40 +514,27 @@
    "from operator import itemgetter\n",
    "from langchain.schema.runnable import RunnablePassthrough, RunnableLambda\n",
    "\n",
-    "\n",
    "def prompt_func(dict):\n",
    "    format_texts = \"\\n\".join(dict[\"context\"][\"texts\"])\n",
    "    return [\n",
    "        HumanMessage(\n",
    "            content=[\n",
-    "                {\n",
-    "                    \"type\": \"text\",\n",
-    "                    \"text\": f\"\"\"Answer the question based only on the following context, which can include text, tables, and the below image:\n",
+    "                {\"type\": \"text\", \"text\": f\"\"\"Answer the question based only on the following context, which can include text, tables, and the below image:\n",
    "Question: {dict[\"question\"]}\n",
    "\n",
    "Text and tables:\n",
    "{format_texts}\n",
-    "\"\"\",\n",
-    "                },\n",
-    "                {\n",
-    "                    \"type\": \"image_url\",\n",
-    "                    \"image_url\": {\n",
-    "                        \"url\": f\"data:image/jpeg;base64,{dict['context']['images'][0]}\"\n",
-    "                    },\n",
-    "                },\n",
+    "\"\"\"},\n",
+    "                {\"type\": \"image_url\", \"image_url\": {\"url\": f\"data:image/jpeg;base64,{dict['context']['images'][0]}\"}},\n",
    "            ]\n",
    "        )\n",
    "    ]\n",
    "\n",
-    "\n",
    "model = ChatOpenAI(temperature=0, model=\"gpt-4-vision-preview\", max_tokens=1024)\n",
    "\n",
    "# RAG pipeline\n",
    "chain = (\n",
-    "    {\n",
-    "        \"context\": retriever | RunnableLambda(split_image_text_types),\n",
-    "        \"question\": RunnablePassthrough(),\n",
-    "    }\n",
+    "    {\"context\": retriever | RunnableLambda(split_image_text_types), \"question\": RunnablePassthrough()}\n",
    "    | RunnableLambda(prompt_func)\n",
    "    | model\n",
    "    | StrOutputParser()\n",
@@ -591,7 +566,9 @@
    }
   ],
   "source": [
-    "chain.invoke(\"What is the change in wild fires from 1993 to 2022?\")"
+    "chain.invoke(\n",
+    "    \"What is the change in wild fires from 1993 to 2022?\"\n",
+    ")"
   ]
  },
  {
@@ -599,7 +576,7 @@
   "id": "dea241f1-bd11-45cb-bb33-c4e2e8286855",
   "metadata": {},
   "source": [
-    "Here is the [trace](https://smith.langchain.com/public/db0441a8-2c17-4070-bdf7-45d4fdf8f517/r/80cb0f89-1766-4caf-8959-fc43ec4b071c). "
+    "Here is the [trace](https://smith.langchain.com/public/43d955ff-212f-4043-8d63-986b0e4e4eed/r). "
   ]
  }
 ],
--- a/cookbook/README.md
+++ b/cookbook/README.md
@@ -20,7 +20,6 @@ Notebook | Description
 [databricks_sql_db.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/databricks_sql_db.ipynb) | Connect to databricks runtimes and databricks sql.
 [deeplake_semantic_search_over_...](https://github.com/langchain-ai/langchain/tree/master/cookbook/deeplake_semantic_search_over_chat.ipynb) | Perform semantic search and question-answering over a group chat using activeloop's deep lake with gpt4.
 [elasticsearch_db_qa.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/elasticsearch_db_qa.ipynb) | Interact with elasticsearch analytics databases in natural language and build search queries via the elasticsearch dsl API.
-[extraction_openai_tools.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/extraction_openai_tools.ipynb) | Structured Data Extraction with OpenAI Tools
 [forward_looking_retrieval_augm...](https://github.com/langchain-ai/langchain/tree/master/cookbook/forward_looking_retrieval_augmented_generation.ipynb) | Implement the forward-looking active retrieval augmented generation (flare) method, which generates answers to questions, identifies uncertain tokens, generates hypothetical questions based on these tokens, and retrieves relevant documents to continue generating the answer.
 [generative_agents_interactive_...](https://github.com/langchain-ai/langchain/tree/master/cookbook/generative_agents_interactive_simulacra_of_human_behavior.ipynb) | Implement a generative agent that simulates human behavior, based on a research paper, using a time-weighted memory object backed by a langchain retriever.
 [gymnasium_agent_simulation.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/gymnasium_agent_simulation.ipynb) | Create a simple agent-environment interaction loop in simulated environments like text-based games with gymnasium.
--- a/cookbook/extraction_openai_tools.ipynb
+++ b/cookbook/extraction_openai_tools.ipynb
@@ -1,213 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "2def22ea",
-   "metadata": {},
-   "source": [
-    "# Extraction with OpenAI Tools\n",
-    "\n",
-    "Performing extraction has never been easier! OpenAI's tool calling ability is the perfect thing to use as it allows for extracting multiple different elements from text that are different types. \n",
-    "\n",
-    "Models after 1106 use tools and support \"parallel function calling\" which makes this super easy."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "id": "5c628496",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain.chat_models import ChatOpenAI\n",
-    "from langchain.pydantic_v1 import BaseModel\n",
-    "from typing import Optional, List\n",
-    "from langchain.chains.openai_tools import create_extraction_chain_pydantic"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "afe9657b",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Make sure to use a recent model that supports tools\n",
-    "model = ChatOpenAI(model=\"gpt-3.5-turbo-1106\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "bc0ca3b6",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Pydantic is an easy way to define a schema\n",
-    "class Person(BaseModel):\n",
-    "    \"\"\"Information about people to extract.\"\"\"\n",
-    "\n",
-    "    name: str\n",
-    "    age: Optional[int] = None"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "id": "2036af68",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "chain = create_extraction_chain_pydantic(Person, model)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "id": "1748ad21",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[Person(name='jane', age=2), Person(name='bob', age=3)]"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "chain.invoke({\"input\": \"jane is 2 and bob is 3\"})"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "id": "c8262ce5",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Let's define another element\n",
-    "class Class(BaseModel):\n",
-    "    \"\"\"Information about classes to extract.\"\"\"\n",
-    "\n",
-    "    teacher: str\n",
-    "    students: List[str]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "id": "4973c104",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "chain = create_extraction_chain_pydantic([Person, Class], model)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "id": "e976a15e",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[Person(name='jane', age=2),\n",
-       " Person(name='bob', age=3),\n",
-       " Class(teacher='Mrs Sampson', students=['jane', 'bob'])]"
-      ]
-     },
-     "execution_count": 14,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "chain.invoke({\"input\": \"jane is 2 and bob is 3 and they are in Mrs Sampson's class\"})"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "6575a7d6",
-   "metadata": {},
-   "source": [
-    "## Under the hood\n",
-    "\n",
-    "Under the hood, this is a simple chain:"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "b8ba83e5",
-   "metadata": {},
-   "source": [
-    "```python\n",
-    "from typing import Union, List, Type, Optional\n",
-    "\n",
-    "from langchain.output_parsers.openai_tools import PydanticToolsParser\n",
-    "from langchain.utils.openai_functions import convert_pydantic_to_openai_tool\n",
-    "from langchain.schema.runnable import Runnable\n",
-    "from langchain.pydantic_v1 import BaseModel\n",
-    "from langchain.prompts import ChatPromptTemplate\n",
-    "from langchain.schema.messages import SystemMessage\n",
-    "from langchain.schema.language_model import BaseLanguageModel\n",
-    "\n",
-    "_EXTRACTION_TEMPLATE = \"\"\"Extract and save the relevant entities mentioned \\\n",
-    "in the following passage together with their properties.\n",
-    "\n",
-    "If a property is not present and is not required in the function parameters, do not include it in the output.\"\"\"  # noqa: E501\n",
-    "\n",
-    "\n",
-    "def create_extraction_chain_pydantic(\n",
-    "    pydantic_schemas: Union[List[Type[BaseModel]], Type[BaseModel]],\n",
-    "    llm: BaseLanguageModel,\n",
-    "    system_message: str = _EXTRACTION_TEMPLATE,\n",
-    ") -> Runnable:\n",
-    "    if not isinstance(pydantic_schemas, list):\n",
-    "        pydantic_schemas = [pydantic_schemas]\n",
-    "    prompt = ChatPromptTemplate.from_messages([\n",
-    "        (\"system\", system_message),\n",
-    "        (\"user\", \"{input}\")\n",
-    "    ])\n",
-    "    tools = [convert_pydantic_to_openai_tool(p) for p in pydantic_schemas]\n",
-    "    model = llm.bind(tools=tools)\n",
-    "    chain = prompt | model | PydanticToolsParser(tools=pydantic_schemas)\n",
-    "    return chain\n",
-    "```"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "2eac6b68",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.1"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
--- a/cookbook/multi_modal_QA.ipynb
+++ b/cookbook/multi_modal_QA.ipynb
@@ -7,7 +7,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "! pip install \"openai>=1\" \"langchain>=0.0.331rc2\" matplotlib pillow"
+    "! pip install \"openai>=1\" \"langchain>=0.0.331rc2\" matplotlib pillow "
   ]
  },
  {
@@ -47,24 +47,22 @@
    "from PIL import Image\n",
    "from IPython.display import display, HTML\n",
    "\n",
-    "\n",
    "def encode_image(image_path):\n",
-    "    \"\"\"Getting the base64 string\"\"\"\n",
-    "\n",
+    "    ''' Getting the base64 string '''\n",
+    "    \n",
    "    with open(image_path, \"rb\") as image_file:\n",
-    "        return base64.b64encode(image_file.read()).decode(\"utf-8\")\n",
+    "        return base64.b64encode(image_file.read()).decode('utf-8')\n",
    "\n",
    "\n",
    "def plt_img_base64(img_base64):\n",
-    "    \"\"\"Display the base64 image\"\"\"\n",
+    "    ''' Display the base64 image '''\n",
    "\n",
    "    # Create an HTML img tag with the base64 string as the source\n",
    "    image_html = f'<img src=\"data:image/jpeg;base64,{img_base64}\" />'\n",
-    "\n",
+    "    \n",
    "    # Display the image by rendering the HTML\n",
    "    display(HTML(image_html))\n",
    "\n",
-    "\n",
    "# Image for QA\n",
    "path = \"/Users/rlm/Desktop/Multimodal_Eval/qa/llm_strategies.jpeg\"\n",
    "img_base64 = encode_image(path)\n",
@@ -101,19 +99,19 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "chat = ChatOpenAI(model=\"gpt-4-vision-preview\", max_tokens=1024)\n",
+    "chat = ChatOpenAI(model=\"gpt-4-vision-preview\",\n",
+    "                  max_tokens=1024)\n",
    "\n",
    "msg = chat.invoke(\n",
    "    [\n",
    "        HumanMessage(\n",
    "            content=[\n",
-    "                {\n",
-    "                    \"type\": \"text\",\n",
-    "                    \"text\": \"Based on the image, what is the difference in training strategy between a small and a large base model?\",\n",
-    "                },\n",
+    "                {\"type\": \"text\", \"text\":\"Based on the image, what is the difference in training strategy between a small and a large base model?\"},\n",
    "                {\n",
    "                    \"type\": \"image_url\",\n",
-    "                    \"image_url\": {\"url\": f\"data:image/jpeg;base64,{img_base64}\"},\n",
+    "                    \"image_url\": {\n",
+    "                        \"url\": f\"data:image/jpeg;base64,{img_base64}\"\n",
+    "                    },\n",
    "                },\n",
    "            ]\n",
    "        )\n",
--- a/cookbook/multi_modal_RAG_chroma.ipynb
+++ b/cookbook/multi_modal_RAG_chroma.ipynb
--- a/cookbook/openai_v1_cookbook.ipynb
+++ b/cookbook/openai_v1_cookbook.ipynb
@@ -17,8 +17,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# need openai>=1.1.0, langchain>=0.0.333, langchain-experimental>=0.0.39\n",
-    "!pip install -U openai langchain langchain-experimental"
+    "!pip install -U openai \"langchain>=0.0.331rc2\" langchain-experimental"
   ]
  },
  {
@@ -134,7 +133,7 @@
    "    name=\"langchain assistant\",\n",
    "    instructions=\"You are a personal math tutor. Write and run code to answer math questions.\",\n",
    "    tools=[{\"type\": \"code_interpreter\"}],\n",
-    "    model=\"gpt-4-1106-preview\",\n",
+    "    model=\"gpt-4-1106-preview\"\n",
    ")\n",
    "output = interpreter_assistant.invoke({\"content\": \"What's 10 - 4 raised to the 2.7\"})\n",
    "output"
@@ -184,7 +183,7 @@
    "    instructions=\"You are a personal math tutor. Write and run code to answer math questions. You can also search the internet.\",\n",
    "    tools=tools,\n",
    "    model=\"gpt-4-1106-preview\",\n",
-    "    as_agent=True,\n",
+    "    as_agent=True\n",
    ")"
   ]
  },
@@ -241,7 +240,7 @@
    "    instructions=\"You are a personal math tutor. Write and run code to answer math questions.\",\n",
    "    tools=tools,\n",
    "    model=\"gpt-4-1106-preview\",\n",
-    "    as_agent=True,\n",
+    "    as_agent=True\n",
    ")"
   ]
  },
@@ -254,7 +253,6 @@
   "source": [
    "from langchain.schema.agent import AgentFinish\n",
    "\n",
-    "\n",
    "def execute_agent(agent, tools, input):\n",
    "    tool_map = {tool.name: tool for tool in tools}\n",
    "    response = agent.invoke(input)\n",
@@ -263,17 +261,9 @@
    "        for action in response:\n",
    "            tool_output = tool_map[action.tool].invoke(action.tool_input)\n",
    "            print(action.tool, action.tool_input, tool_output, end=\"\\n\\n\")\n",
-    "            tool_outputs.append(\n",
-    "                {\"output\": tool_output, \"tool_call_id\": action.tool_call_id}\n",
-    "            )\n",
-    "        response = agent.invoke(\n",
-    "            {\n",
-    "                \"tool_outputs\": tool_outputs,\n",
-    "                \"run_id\": action.run_id,\n",
-    "                \"thread_id\": action.thread_id,\n",
-    "            }\n",
-    "        )\n",
-    "\n",
+    "            tool_outputs.append({\"output\": tool_output, \"tool_call_id\": action.tool_call_id})\n",
+    "        response = agent.invoke({\"tool_outputs\": tool_outputs, \"run_id\": action.run_id, \"thread_id\": action.thread_id})\n",
+    "        \n",
    "    return response"
   ]
  },
@@ -315,9 +305,7 @@
    }
   ],
   "source": [
-    "next_response = execute_agent(\n",
-    "    agent, tools, {\"content\": \"now add 17.241\", \"thread_id\": response.thread_id}\n",
-    ")\n",
+    "next_response = execute_agent(agent, tools, {\"content\": \"now add 17.241\", \"thread_id\": response.thread_id})\n",
    "print(next_response.return_values[\"output\"])"
   ]
  },
@@ -401,85 +389,6 @@
    ")\n",
    "print(output.llm_output)"
   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "aa6565be-985d-4127-848e-c3bca9d7b434",
-   "metadata": {},
-   "source": [
-    "## Breaking changes to Azure classes\n",
-    "\n",
-    "OpenAI V1 rewrote their clients and separated Azure and OpenAI clients. This has led to some changes in LangChain interfaces when using OpenAI V1.\n",
-    "\n",
-    "BREAKING CHANGES:\n",
-    "- To use Azure embeddings with OpenAI V1, you'll need to use the new `AzureOpenAIEmbeddings` instead of the existing `OpenAIEmbeddings`. `OpenAIEmbeddings` continue to work when using Azure with `openai<1`.\n",
-    "```python\n",
-    "from langchain.embeddings import AzureOpenAIEmbeddings\n",
-    "```\n",
-    "\n",
-    "\n",
-    "RECOMMENDED CHANGES:\n",
-    "- When using AzureChatOpenAI, if passing in an Azure endpoint (eg https://example-resource.azure.openai.com/) this should be specified via the `azure_endpoint` parameter or the `AZURE_OPENAI_ENDPOINT`. We're maintaining backwards compatibility for now with specifying this via `openai_api_base`/`base_url` or env var `OPENAI_API_BASE` but this shouldn't be relied upon.\n",
-    "- When using Azure chat or embedding models, pass in API keys either via `openai_api_key` parameter or `AZURE_OPENAI_API_KEY` parameter. We're maintaining backwards compatibility for now with specifying this via `OPENAI_API_KEY` but this shouldn't be relied upon."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "49944887-3972-497e-8da2-6d32d44345a9",
-   "metadata": {},
-   "source": [
-    "## Tools\n",
-    "\n",
-    "Use tools for parallel function calling."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "916292d8-0f89-40a6-af1c-5a1122327de8",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[GetCurrentWeather(location='New York, NY', unit='fahrenheit'),\n",
-       " GetCurrentWeather(location='Los Angeles, CA', unit='fahrenheit'),\n",
-       " GetCurrentWeather(location='San Francisco, CA', unit='fahrenheit')]"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from typing import Literal\n",
-    "\n",
-    "from langchain.output_parsers.openai_tools import PydanticToolsParser\n",
-    "from langchain.utils.openai_functions import convert_pydantic_to_openai_tool\n",
-    "from langchain.prompts import ChatPromptTemplate\n",
-    "from langchain.pydantic_v1 import BaseModel, Field\n",
-    "\n",
-    "\n",
-    "class GetCurrentWeather(BaseModel):\n",
-    "    \"\"\"Get the current weather in a location.\"\"\"\n",
-    "\n",
-    "    location: str = Field(description=\"The city and state, e.g. San Francisco, CA\")\n",
-    "    unit: Literal[\"celsius\", \"fahrenheit\"] = Field(\n",
-    "        default=\"fahrenheit\", description=\"The temperature unit, default to fahrenheit\"\n",
-    "    )\n",
-    "\n",
-    "\n",
-    "prompt = ChatPromptTemplate.from_messages(\n",
-    "    [(\"system\", \"You are a helpful assistant\"), (\"user\", \"{input}\")]\n",
-    ")\n",
-    "model = ChatOpenAI(model=\"gpt-3.5-turbo-1106\").bind(\n",
-    "    tools=[convert_pydantic_to_openai_tool(GetCurrentWeather)]\n",
-    ")\n",
-    "chain = prompt | model | PydanticToolsParser(tools=[GetCurrentWeather])\n",
-    "\n",
-    "chain.invoke({\"input\": \"what's the weather in NYC, LA, and SF\"})"
-   ]
  }
 ],
 "metadata": {
--- a/docs/docs/additional_resources/tutorials.mdx
+++ b/docs/docs/additional_resources/tutorials.mdx
@@ -1,18 +1,15 @@
 # Tutorials

-Below are links to tutorials and courses on LangChain. For written guides on common use cases for LangChain, check out the [use cases guides](/docs/use_cases).
+Below are links to tutorials and courses on LangChain. For written guides on common use cases for LangChain, check out the [use cases guides](/docs/use_cases/qa_structured/sql).

 ⛓ icon marks a new addition [last update 2023-09-21]

 ---------------------

-### [LangChain on Wikipedia](https://en.wikipedia.org/wiki/LangChain)
-
 ### DeepLearning.AI courses
- by [Harrison Chase](https://en.wikipedia.org/wiki/LangChain) and [Andrew Ng](https://en.wikipedia.org/wiki/Andrew_Ng)
+ by [Harrison Chase](https://github.com/hwchase17) and [Andrew Ng](https://en.wikipedia.org/wiki/Andrew_Ng)
 - [LangChain for LLM Application Development](https://learn.deeplearning.ai/langchain)
 - [LangChain Chat with Your Data](https://learn.deeplearning.ai/langchain-chat-with-your-data)
- ⛓ [Functions, Tools and Agents with LangChain](https://learn.deeplearning.ai/functions-tools-agents-langchain)

 ### Handbook
 [LangChain AI Handbook](https://www.pinecone.io/learn/langchain/) By **James Briggs** and **Francisco Ingham**
--- a/docs/docs/expression_language/how_to/binding.ipynb
+++ b/docs/docs/expression_language/how_to/binding.ipynb
@@ -12,19 +12,6 @@
    "Suppose we have a simple prompt + model sequence:"
   ]
  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "950297ed-2d67-4091-8ea7-1d412d259d04",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain.chat_models import ChatOpenAI\n",
-    "from langchain.prompts import ChatPromptTemplate\n",
-    "from langchain.schema import StrOutputParser\n",
-    "from langchain.schema.runnable import RunnablePassthrough"
-   ]
-  },
  {
   "cell_type": "code",
   "execution_count": 11,
@@ -50,6 +37,11 @@
    }
   ],
   "source": [
+    "from langchain.chat_models import ChatOpenAI\n",
+    "from langchain.prompts import ChatPromptTemplate\n",
+    "from langchain.schema import StrOutputParser\n",
+    "from langchain.schema.runnable import RunnablePassthrough\n",
+    "\n",
    "prompt = ChatPromptTemplate.from_messages(\n",
    "    [\n",
    "        (\n",
@@ -113,29 +105,31 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 14,
   "id": "f66a0fe4-fde0-4706-8863-d60253f211c7",
   "metadata": {},
   "outputs": [],
   "source": [
-    "function = {\n",
-    "    \"name\": \"solver\",\n",
-    "    \"description\": \"Formulates and solves an equation\",\n",
-    "    \"parameters\": {\n",
-    "        \"type\": \"object\",\n",
-    "        \"properties\": {\n",
-    "            \"equation\": {\n",
-    "                \"type\": \"string\",\n",
-    "                \"description\": \"The algebraic expression of the equation\",\n",
-    "            },\n",
-    "            \"solution\": {\n",
-    "                \"type\": \"string\",\n",
-    "                \"description\": \"The solution to the equation\",\n",
+    "functions = [\n",
+    "    {\n",
+    "        \"name\": \"solver\",\n",
+    "        \"description\": \"Formulates and solves an equation\",\n",
+    "        \"parameters\": {\n",
+    "            \"type\": \"object\",\n",
+    "            \"properties\": {\n",
+    "                \"equation\": {\n",
+    "                    \"type\": \"string\",\n",
+    "                    \"description\": \"The algebraic expression of the equation\",\n",
+    "                },\n",
+    "                \"solution\": {\n",
+    "                    \"type\": \"string\",\n",
+    "                    \"description\": \"The solution to the equation\",\n",
+    "                },\n",
    "            },\n",
+    "            \"required\": [\"equation\", \"solution\"],\n",
    "        },\n",
-    "        \"required\": [\"equation\", \"solution\"],\n",
-    "    },\n",
-    "}"
+    "    }\n",
+    "]"
   ]
  },
  {
@@ -167,70 +161,19 @@
    "    ]\n",
    ")\n",
    "model = ChatOpenAI(model=\"gpt-4\", temperature=0).bind(\n",
-    "    function_call={\"name\": \"solver\"}, functions=[function]\n",
+    "    function_call={\"name\": \"solver\"}, functions=functions\n",
    ")\n",
    "runnable = {\"equation_statement\": RunnablePassthrough()} | prompt | model\n",
    "runnable.invoke(\"x raised to the third plus seven equals 12\")"
   ]
  },
-  {
-   "cell_type": "markdown",
-   "id": "f07d7528-9269-4d6f-b12e-3669592a9e03",
-   "metadata": {},
-   "source": [
-    "## Attaching OpenAI tools"
-   ]
-  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
   "id": "2cdeeb4c-0c1f-43da-bd58-4f591d9e0671",
   "metadata": {},
   "outputs": [],
-   "source": [
-    "tools = [\n",
-    "    {\n",
-    "        \"type\": \"function\",\n",
-    "        \"function\": {\n",
-    "            \"name\": \"get_current_weather\",\n",
-    "            \"description\": \"Get the current weather in a given location\",\n",
-    "            \"parameters\": {\n",
-    "                \"type\": \"object\",\n",
-    "                \"properties\": {\n",
-    "                    \"location\": {\n",
-    "                        \"type\": \"string\",\n",
-    "                        \"description\": \"The city and state, e.g. San Francisco, CA\",\n",
-    "                    },\n",
-    "                    \"unit\": {\"type\": \"string\", \"enum\": [\"celsius\", \"fahrenheit\"]},\n",
-    "                },\n",
-    "                \"required\": [\"location\"],\n",
-    "            },\n",
-    "        },\n",
-    "    }\n",
-    "]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "2b65beab-48bb-46ff-a5a4-ef8ac95a513c",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_zHN0ZHwrxM7nZDdqTp6dkPko', 'function': {'arguments': '{\"location\": \"San Francisco, CA\", \"unit\": \"celsius\"}', 'name': 'get_current_weather'}, 'type': 'function'}, {'id': 'call_aqdMm9HBSlFW9c9rqxTa7eQv', 'function': {'arguments': '{\"location\": \"New York, NY\", \"unit\": \"celsius\"}', 'name': 'get_current_weather'}, 'type': 'function'}, {'id': 'call_cx8E567zcLzYV2WSWVgO63f1', 'function': {'arguments': '{\"location\": \"Los Angeles, CA\", \"unit\": \"celsius\"}', 'name': 'get_current_weather'}, 'type': 'function'}]})"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model = ChatOpenAI(model=\"gpt-3.5-turbo-1106\").bind(tools=tools)\n",
-    "model.invoke(\"What's the weather in SF, NYC and LA?\")"
-   ]
+   "source": []
  }
 ],
 "metadata": {
--- a/docs/docs/expression_language/how_to/configure.ipynb
+++ b/docs/docs/expression_language/how_to/configure.ipynb
@@ -5,7 +5,7 @@
   "id": "39eaf61b",
   "metadata": {},
   "source": [
-    "# Configure chain internals at runtime\n",
+    "# Configuration\n",
    "\n",
    "Oftentimes you may want to experiment with, or even expose to the end user, multiple different ways of doing things.\n",
    "In order to make this experience as easy as possible, we have defined two methods.\n",
@@ -594,7 +594,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.1"
+   "version": "3.10.1"
  }
 },
 "nbformat": 4,
--- a/docs/docs/expression_language/how_to/functions.ipynb
+++ b/docs/docs/expression_language/how_to/functions.ipynb
@@ -5,7 +5,7 @@
   "id": "fbc4bf6e",
   "metadata": {},
   "source": [
-    "# Run custom functions\n",
+    "# Run arbitrary functions\n",
    "\n",
    "You can use arbitrary functions in the pipeline\n",
    "\n",
@@ -175,7 +175,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.1"
+   "version": "3.10.1"
  }
 },
 "nbformat": 4,
--- a/docs/docs/expression_language/how_to/generators.ipynb
+++ b/docs/docs/expression_language/how_to/generators.ipynb
@@ -4,7 +4,7 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "# Stream custom generator functions\n",
+    "# Custom generator functions\n",
    "\n",
    "You can use generator functions (ie. functions that use the `yield` keyword, and behave like iterators) in a LCEL pipeline.\n",
    "\n",
@@ -21,7 +21,15 @@
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "lion, tiger, wolf, gorilla, panda\n"
+     ]
+    }
+   ],
   "source": [
    "from typing import Iterator, List\n",
    "\n",
@@ -35,51 +43,16 @@
    ")\n",
    "model = ChatOpenAI(temperature=0.0)\n",
    "\n",
-    "str_chain = prompt | model | StrOutputParser()"
+    "\n",
+    "str_chain = prompt | model | StrOutputParser()\n",
+    "\n",
+    "print(str_chain.invoke({\"animal\": \"bear\"}))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "lion, tiger, wolf, gorilla, panda"
-     ]
-    }
-   ],
-   "source": [
-    "for chunk in str_chain.stream({\"animal\": \"bear\"}):\n",
-    "    print(chunk, end=\"\", flush=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'lion, tiger, wolf, gorilla, panda'"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "str_chain.invoke({\"animal\": \"bear\"})"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
   "outputs": [],
   "source": [
    "# This is a custom parser that splits an iterator of llm tokens\n",
@@ -104,61 +77,22 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "list_chain = str_chain | split_into_list"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "['lion']\n",
-      "['tiger']\n",
-      "['wolf']\n",
-      "['gorilla']\n",
-      "['panda']\n"
+      "['lion', 'tiger', 'wolf', 'gorilla', 'panda']\n"
     ]
    }
   ],
   "source": [
-    "for chunk in list_chain.stream({\"animal\": \"bear\"}):\n",
-    "    print(chunk, flush=True)"
+    "list_chain = str_chain | split_into_list\n",
+    "\n",
+    "print(list_chain.invoke({\"animal\": \"bear\"}))"
   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['lion', 'tiger', 'wolf', 'gorilla', 'panda']"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "list_chain.invoke({\"animal\": \"bear\"})"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
  }
 ],
 "metadata": {
@@ -177,9 +111,9 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.1"
+   "version": "3.11.5"
  }
 },
 "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 2
 }
--- a/docs/docs/expression_language/how_to/map.ipynb
+++ b/docs/docs/expression_language/how_to/map.ipynb
@@ -5,7 +5,7 @@
   "id": "b022ab74-794d-4c54-ad47-ff9549ddb9d2",
   "metadata": {},
   "source": [
-    "# Parallelize steps\n",
+    "# Use RunnableParallel/RunnableMap\n",
    "\n",
    "RunnableParallel (aka. RunnableMap) makes it easy to execute multiple Runnables in parallel, and to return the output of these Runnables as a map."
   ]
@@ -195,7 +195,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.1"
+   "version": "3.10.1"
  }
 },
 "nbformat": 4,
--- a/docs/docs/expression_language/how_to/routing.ipynb
+++ b/docs/docs/expression_language/how_to/routing.ipynb
@@ -5,7 +5,7 @@
   "id": "4b47436a",
   "metadata": {},
   "source": [
-    "# Dynamically route logic based on input\n",
+    "# Route between multiple Runnables\n",
    "\n",
    "This notebook covers how to do routing in the LangChain Expression Language.\n",
    "\n",
--- a/docs/docs/expression_language/index.mdx
+++ b/docs/docs/expression_language/index.mdx
@@ -4,30 +4,33 @@ sidebar_class_name: hidden

 # LangChain Expression Language (LCEL)

-LangChain Expression Language, or LCEL, is a declarative way to easily compose chains together.
-LCEL was designed from day 1 to **support putting prototypes in production, with no code changes**, from the simplest “prompt + LLM” chain to the most complex chains (we’ve seen folks successfully run LCEL chains with 100s of steps in production). To highlight a few of the reasons you might want to use LCEL:
+LangChain Expression Language or LCEL is a declarative way to easily compose chains together.
+There are several benefits to writing chains in this manner (as opposed to writing normal code):

-**Streaming support**
-When you build your chains with LCEL you get the best possible time-to-first-token (time elapsed until the first chunk of output comes out). For some chains this means eg. we stream tokens straight from an LLM to a streaming output parser, and you get back parsed, incremental chunks of output at the same rate as the LLM provider outputs the raw tokens.
+**Async, Batch, and Streaming Support**
+Any chain constructed this way will automatically have full sync, async, batch, and streaming support.
+This makes it easy to prototype a chain in a Jupyter notebook using the sync interface, and then expose it as an async streaming interface.

-**Async support**
-Any chain built with LCEL can be called both with the synchronous API (eg. in your Jupyter notebook while prototyping) as well as with the asynchronous API (eg. in a [LangServe](/docs/langsmith) server). This enables using the same code for prototypes and in production, with great performance, and the ability to handle many concurrent requests in the same server.
+**Fallbacks**
+The non-determinism of LLMs makes it important to be able to handle errors gracefully.
+With LCEL you can easily attach fallbacks to any chain.

-**Optimized parallel execution**
-Whenever your LCEL chains have steps that can be executed in parallel (eg if you fetch documents from multiple retrievers) we automatically do it, both in the sync and the async interfaces, for the smallest possible latency.
+**Parallelism**
+Since LLM applications involve (sometimes long) API calls, it often becomes important to run things in parallel.
+With LCEL syntax, any components that can be run in parallel automatically are.

-**Retries and fallbacks**
-Configure retries and fallbacks for any part of your LCEL chain. This is a great way to make your chains more reliable at scale. We’re currently working on adding streaming support for retries/fallbacks, so you can get the added reliability without any latency cost.
-
-**Access intermediate results**
-For more complex chains it’s often very useful to access the results of intermediate steps even before the final output is produced. This can be used let end-users know something is happening, or even just to debug your chain. You can stream intermediate results, and it’s available on every [LangServe](/docs/langserve) server.
-
-**Input and output schemas**
-Input and output schemas give every LCEL chain Pydantic and JSONSchema schemas inferred from the structure of your chain. This can be used for validation of inputs and outputs, and is an integral part of LangServe.
-
-**Seamless LangSmith tracing integration**
+**Seamless LangSmith Tracing Integration**
 As your chains get more and more complex, it becomes increasingly important to understand what exactly is happening at every step.
-With LCEL, **all** steps are automatically logged to [LangSmith](/docs/langsmith/) for maximum observability and debuggability.
+With LCEL, **all** steps are automatically logged to [LangSmith](https://smith.langchain.com) for maximal observability and debuggability.

-**Seamless LangServe deployment integration**
-Any chain created with LCEL can be easily deployed using LangServe.
+#### [Interface](/docs/expression_language/interface)
+The base interface shared by all LCEL objects
+
+#### [How to](/docs/expression_language/how_to)
+How to use core features of LCEL
+
+#### [Cookbook](/docs/expression_language/cookbook)
+Examples of common LCEL usage patterns
+
+#### [Why use LCEL](/docs/expression_language/why)
+A deeper dive into the benefits of LCEL
--- a/docs/docs/expression_language/interface.ipynb
+++ b/docs/docs/expression_language/interface.ipynb
@@ -8,7 +8,7 @@
    "---\n",
    "sidebar_position: 0\n",
    "title: Interface\n",
-    "---"
+    "---\n"
   ]
  },
  {
@@ -31,17 +31,26 @@
    "- [`abatch`](#async-batch): call the chain on a list of inputs async\n",
    "- [`astream_log`](#async-stream-intermediate-steps): stream back intermediate steps as they happen, in addition to the final response\n",
    "\n",
-    "The **input type** and **output type** varies by component:\n",
+    "The **input type** varies by component:\n",
    "\n",
-    "| Component | Input Type | Output Type |\n",
-    "| --- | --- | --- |\n",
-    "| Prompt | Dictionary | PromptValue |\n",
-    "| ChatModel | Single string, list of chat messages or a PromptValue | ChatMessage |\n",
-    "| LLM | Single string, list of chat messages or a PromptValue | String |\n",
-    "| OutputParser | The output of an LLM or ChatModel | Depends on the parser |\n",
-    "| Retriever | Single string | List of Documents |\n",
-    "| Tool | Single string or dictionary, depending on the tool | Depends on the tool |\n",
+    "| Component | Input Type |\n",
+    "| --- | --- |\n",
+    "|Prompt|Dictionary|\n",
+    "|Retriever|Single string|\n",
+    "|LLM, ChatModel| Single string, list of chat messages or a PromptValue|\n",
+    "|Tool|Single string, or dictionary, depending on the tool|\n",
+    "|OutputParser|The output of an LLM or ChatModel|\n",
    "\n",
+    "The **output type** also varies by component:\n",
+    "\n",
+    "| Component | Output Type |\n",
+    "| --- | --- |\n",
+    "| LLM | String |\n",
+    "| ChatModel | ChatMessage |\n",
+    "| Prompt | PromptValue |\n",
+    "| Retriever | List of documents |\n",
+    "| Tool | Depends on the tool |\n",
+    "| OutputParser | Depends on the parser |\n",
    "\n",
    "All runnables expose input and output **schemas** to inspect the inputs and outputs:\n",
    "- [`input_schema`](#input-schema): an input Pydantic model auto-generated from the structure of the Runnable\n",
@@ -1152,7 +1161,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.1"
+   "version": "3.10.12"
  }
 },
 "nbformat": 4,
--- a/docs/docs/expression_language/why.mdx
+++ b/docs/docs/expression_language/why.mdx
@@ -0,0 +1,11 @@
+# Why use LCEL?
+
+The LangChain Expression Language was designed from day 1 to **support putting prototypes in production, with no code changes**, from the simplest “prompt + LLM” chain to the most complex chains (we’ve seen folks successfully running in production LCEL chains with 100s of steps). To highlight a few of the reasons you might want to use LCEL:
+
+- first-class support for streaming: when you build your chains with LCEL you get the best possible time-to-first-token (time elapsed until the first chunk of output comes out). For some chains this means eg. we stream tokens straight from an LLM to a streaming output parser, and you get back parsed, incremental chunks of output at the same rate as the LLM provider outputs the raw tokens. We’re constantly improving streaming support, recently we added a [streaming JSON parser](https://twitter.com/LangChainAI/status/1709690468030914584), and more is in the works.
+- first-class async support: any chain built with LCEL can be called both with the synchronous API (eg. in your Jupyter notebook while prototyping) as well as with the asynchronous API (eg. in a [LangServe](https://github.com/langchain-ai/langserve) server). This enables using the same code for prototypes and in production, with great performance, and the ability to handle many concurrent requests in the same server.
+- optimised parallel execution: whenever your LCEL chains have steps that can be executed in parallel (eg if you fetch documents from multiple retrievers) we automatically do it, both in the sync and the async interfaces, for the smallest possible latency.
+- support for retries and fallbacks: more recently we’ve added support for configuring retries and fallbacks for any part of your LCEL chain. This is a great way to make your chains more reliable at scale. We’re currently working on adding streaming support for retries/fallbacks, so you can get the added reliability without any latency cost.
+- accessing intermediate results: for more complex chains it’s often very useful to access the results of intermediate steps even before the final output is produced. This can be used let end-users know something is happening, or even just to debug your chain. We’ve added support for [streaming intermediate results](https://x.com/LangChainAI/status/1711806009097044193?s=20), and it’s available on every LangServe server.
+- [input and output schemas](https://x.com/LangChainAI/status/1711805322195861934?s=20): input and output schemas give every LCEL chain Pydantic and JSONSchema schemas inferred from the structure of your chain. This can be used for validation of inputs and outputs, and is an integral part of LangServe.
+- tracing with LangSmith: all chains built with LCEL have first-class tracing support, which can be used to debug your chains, or to understand what’s happening in production. To enable this all you have to do is add your [LangSmith](https://www.langchain.com/langsmith) API key as an environment variable.
--- a/docs/docs/get_started/installation.mdx
+++ b/docs/docs/get_started/installation.mdx
@@ -28,37 +28,3 @@ If you want to install from source, you can do so by cloning the repo and be sur
 ```bash
 pip install -e .
 ```
-
-## Langchain experimental
-The `langchain-experimental` package holds experimental LangChain code, intended for research and experimental uses.
-Install with:
-
-```bash
-pip install langchain-experimental
-```
-
-## LangChain CLI
-The LangChain CLI is useful for working with LangChain templates and other LangServe projects.
-Install with:
-
-```bash
-pip install langchain-cli
-```
-
-## LangServe
-LangServe helps developers deploy LangChain runnables and chains as a REST API.
-LangServe is automatically installed by LangChain CLI.
-If not using LangChain CLI, install with:
-
-```bash
-pip install "langserve[all]"
-```
-for both client and server dependencies. Or `pip install "langserve[client]"` for client code, and `pip install "langserve[server]"` for server code.
-
-## LangSmith SDK
-The LangSmith SDK is automatically installed by LangChain.
-If not using LangChain, install with:
-
-```bash
-pip install langsmith
-```
--- a/docs/docs/get_started/introduction.mdx
+++ b/docs/docs/get_started/introduction.mdx
@@ -9,19 +9,23 @@ sidebar_position: 0
 - **Reason**: rely on a language model to reason (about how to answer based on provided context, what actions to take, etc.)

 This framework consists of several parts.
- **LangChain Libraries**: The Python and JavaScript libraries. Contains interfaces and integrations for a myriad of components, a basic run time for combining these components into chains and agents, and off-the-shelf implementations of chains and agents.
- **[LangChain Templates](/docs/templates)**: A collection of easily deployable reference architectures for a wide variety of tasks.
- **[LangServe](/docs/langserve)**: A library for deploying LangChain chains as a REST API.
- **[LangSmith](/docs/langsmith)**: A developer platform that lets you debug, test, evaluate, and monitor chains built on any LLM framework and seamlessly integrates with LangChain.
+You can see how the parts interact with each other below:

 ![LangChain Diagram](/img/langchain_stack.png)

+These parts include:
+
+- **[LangChain Packages]**: The Python and JavaScript packages. Contains interfaces and integrations for a myriad of components, a basic run time for combining these components into chains and agents, and off-the-shelf implementations of chains and agents.
+- **[LangChain Templates](https://github.com/langchain-ai/langchain/tree/master/templates)**: A collection of easily deployable reference architectures for a wide variety of tasks.
+- **[LangServe](https://github.com/langchain-ai/langserve)**: A library for deploying LangChain chains as a REST API.
+- **[LangSmith](https://smith.langchain.com/)**: A developer platform that lets you debug, test, evaluate, and monitor chains built on any LLM framework and seamlessly integrates with LangChain.
+
 Together, these products simplify the entire application lifecycle:
 - **Develop**: Write your applications in LangChain/LangChain.js. Hit the ground running using Templates for reference.
 - **Productionize**: Use LangSmith to inspect, test and monitor your chains, so that you can constantly improve and deploy with confidence.
 - **Deploy**: Turn any chain into an API with LangServe.

-## LangChain Libraries
+## LangChain Packages

 The main value props of the LangChain packages are:
 1. **Components**: composable tools and integrations for working with language models. Components are modular and easy-to-use, whether you are using the rest of the LangChain framework or not
@@ -29,46 +33,31 @@ The main value props of the LangChain packages are:

 Off-the-shelf chains make it easy to get started. Components make it easy to customize existing chains and build new ones.

+
 ## Get started

 [Here’s](/docs/get_started/installation) how to install LangChain, set up your environment, and start building.

 We recommend following our [Quickstart](/docs/get_started/quickstart) guide to familiarize yourself with the framework by building your first LangChain application.

-Read up on our [Security](/docs/security) best practices to make sure you're developing safely with LangChain.
-
-:::note
-
-These docs focus on the Python LangChain library. [Head here](https://js.langchain.com) for docs on the JavaScript LangChain library.
-
-:::
-
-## LangChain Expression Language (LCEL)
-
-LCEL is a declarative way to compose chains. LCEL was designed from day 1 to support putting prototypes in production, with no code changes, from the simplest “prompt + LLM” chain to the most complex chains.
-
- **[Overview](/docs/expression_language/)**: LCEL and its benefits
- **[Interface](/docs/expression_language/interface)**: The standard interface for LCEL objects
- **[How-to](/docs/expression_language/interface)**: Key features of LCEL
- **[Cookbook](/docs/expression_language/cookbook)**: Example code for accomplishing common tasks
-
-
 ## Modules

-LangChain provides standard, extendable interfaces and integrations for the following modules:
+LangChain provides standard, extendable interfaces and integrations for the following modules, listed from least to most complex:

 #### [Model I/O](/docs/modules/model_io/)
 Interface with language models
-
 #### [Retrieval](/docs/modules/data_connection/)
 Interface with application-specific data
-
+#### [Chains](/docs/modules/chains/)
+Construct sequences of calls
 #### [Agents](/docs/modules/agents/)
-Let models choose which tools to use given high-level directives
-
+Let chains choose which tools to use given high-level directives
+#### [Memory](/docs/modules/memory/)
+Persist application state between runs of a chain
+#### [Callbacks](/docs/modules/callbacks/)
+Log and stream intermediate steps of any chain

 ## Examples, ecosystem, and resources
-
 ### [Use cases](/docs/use_cases/question_answering/)
 Walkthroughs and techniques for common end-to-end use cases, like:
 - [Document question answering](/docs/use_cases/question_answering/)
@@ -76,18 +65,15 @@ Walkthroughs and techniques for common end-to-end use cases, like:
 - [Analyzing structured data](/docs/use_cases/qa_structured/sql/)
 - and much more...

-### [Integrations](/docs/integrations/providers/)
-LangChain is part of a rich ecosystem of tools that integrate with our framework and build on top of it. Check out our growing list of [integrations](/docs/integrations/providers/).
-
 ### [Guides](/docs/guides/adapters/openai)
 Best practices for developing with LangChain.

-### [API reference](https://api.python.langchain.com)
-Head to the reference section for full documentation of all classes and methods in the LangChain and LangChain Experimental Python packages.
-
-### [Developer's guide](/docs/contributing)
-Check out the developer's guide for guidelines on contributing and help getting your dev environment set up.
+### [Ecosystem](/docs/integrations/providers/)
+LangChain is part of a rich ecosystem of tools that integrate with our framework and build on top of it. Check out our growing list of [integrations](/docs/integrations/providers/) and [dependent repos](/docs/additional_resources/dependents).

 ### [Community](/docs/community)
 Head to the [Community navigator](/docs/community) to find places to ask questions, share feedback, meet other developers, and dream about the future of LLM’s.

+## API reference
+
+Head to the [reference](https://api.python.langchain.com) section for full documentation of all classes and methods in the LangChain Python package.
--- a/docs/docs/get_started/quickstart.mdx
+++ b/docs/docs/get_started/quickstart.mdx
@@ -1,17 +1,6 @@
 # Quickstart

-In this quickstart we'll show you how to:
- Get setup with LangChain, LangSmith and LangServe
- Use the most basic and common components of LangChain: prompt templates, models, and output parsers
- Use LangChain Expression Language, the protocol that LangChain is built on and which facilitates component chaining
- Build simple application with LangChain
- Trace your application with LangSmith
- Serve your application with LangServe
-
-That's a fair amount to cover! Let's dive in.
-
-## Setup
-### Installation
+## Installation

 To install LangChain run:

@@ -31,7 +20,7 @@ import CodeBlock from "@theme/CodeBlock";

 For more details, see our [Installation guide](/docs/get_started/installation).

-### Environment
+## Environment setup

 Using LangChain will usually require integrations with one or more model providers, data stores, APIs, etc. For this example, we'll use OpenAI's model APIs.

@@ -50,12 +39,12 @@ export OPENAI_API_KEY="..."
 If you'd prefer not to set an environment variable you can pass the key in directly via the `openai_api_key` named parameter when initiating the OpenAI LLM class:

 ```python
-from langchain.chat_models import ChatOpenAI
+from langchain.llms import OpenAI

-llm = ChatOpenAI(openai_api_key="...")
+llm = OpenAI(openai_api_key="...")
 ```

-### LangSmith
+## LangSmith Setup

 Many of the applications you build with LangChain will contain multiple steps with multiple invocations of LLM calls.
 As these applications get more and more complex, it becomes crucial to be able to inspect what exactly is going on inside your chain or agent.
@@ -69,60 +58,50 @@ export LANGCHAIN_TRACING_V2="true"
 export LANGCHAIN_API_KEY=...
 ```

-### LangServe
+## Building an application

-LangServe helps developers deploy LangChain chains as a REST API. You do not need to use LangServe to use LangChain, but in this guide we'll show how you can deploy your app with LangServe.
+Now we can start building our language model application. LangChain provides many modules that can be used to build language model applications.
+Modules can be used as standalones in simple applications and they can be combined for more complex use cases.

-Install with:
-```bash
-pip install "langserve[all]"
-```
+The most common and most important chain that LangChain helps create contains three things:
+- LLM: The language model is the core reasoning engine here. In order to work with LangChain, you need to understand the different types of language models and how to work with them.
+- Prompt Templates: This provides instructions to the language model. This controls what the language model outputs, so understanding how to construct prompts and different prompting strategies is crucial.
+- Output Parsers: These translate the raw response from the LLM to a more workable format, making it easy to use the output downstream.

-## Building with LangChain
-
-LangChain provides many modules that can be used to build language model applications.
-Modules can be used as standalones in simple applications and they can be composed for more complex use cases.
-Composition is powered by **LangChain Expression Language** (LCEL), which defines a unified `Runnable` interface that many modules implement, making it possible to seamlessly chain components.
-
-The simplest and most common chain contains three things:
- LLM/Chat Model: The language model is the core reasoning engine here. In order to work with LangChain, you need to understand the different types of language models and how to work with them.
- Prompt Template: This provides instructions to the language model. This controls what the language model outputs, so understanding how to construct prompts and different prompting strategies is crucial.
- Output Parser: These translate the raw response from the language model to a more workable format, making it easy to use the output downstream.
-
-In this guide we'll cover those three components individually, and then go over how to combine them.
+In this getting started guide we will cover those three components by themselves, and then go over how to combine all of them.
 Understanding these concepts will set you up well for being able to use and customize LangChain applications.
-Most LangChain applications allow you to configure the model and/or the prompt, so knowing how to take advantage of this will be a big enabler.
+Most LangChain applications allow you to configure the LLM and/or the prompt used, so knowing how to take advantage of this will be a big enabler.

-### LLM / Chat Model
+## LLMs

-There are two types of language models:
+There are two types of language models, which in LangChain are called:

- `LLM`: underlying model takes a string as input and returns a string
- `ChatModel`: underlying model takes a list of messages as input and returns a message
+- LLMs: this is a language model which takes a string as input and returns a string
+- ChatModels: this is a language model which takes a list of messages as input and returns a message

-Strings are simple, but what exactly are messages? The base message interface is defined by `BaseMessage`, which has two required attributes:
+The input/output for LLMs is simple and easy to understand - a string.
+But what about ChatModels? The input there is a list of `ChatMessages`, and the output is a single `ChatMessage`.
+A `ChatMessage` has two required components:

- `content`: The content of the message. Usually a string.
- `role`: The entity from which the `BaseMessage` is coming.
+- `content`: This is the content of the message.
+- `role`: This is the role of the entity from which the `ChatMessage` is coming from.

 LangChain provides several objects to easily distinguish between different roles:

- `HumanMessage`: A `BaseMessage` coming from a human/user.
- `AIMessage`: A `BaseMessage` coming from an AI/assistant.
- `SystemMessage`: A `BaseMessage` coming from the system.
- `FunctionMessage` / `ToolMessage`: A `BaseMessage` containing the output of a function or tool call.
+- `HumanMessage`: A `ChatMessage` coming from a human/user.
+- `AIMessage`: A `ChatMessage` coming from an AI/assistant.
+- `SystemMessage`: A `ChatMessage` coming from the system.
+- `FunctionMessage`: A `ChatMessage` coming from a function call.

 If none of those roles sound right, there is also a `ChatMessage` class where you can specify the role manually.
+For more information on how to use these different messages most effectively, see our prompting guide.

-LangChain provides a common interface that's shared by both `LLM`s and `ChatModel`s.
-However it's useful to understand the difference in order to most effectively construct prompts for a given language model.
+Langchain provides a common interface that's shared by both LLMs and ChatModels. 
+However it's useful to understand this difference in order to construct prompts for a given language model.

-The simplest way to call an `LLM` or `ChatModel` is using `.invoke()`, the universal synchronous call method for all LangChain Expression Language (LCEL) objects:
- `LLM.invoke`: Takes in a string, returns a string.
- `ChatModel.invoke`: Takes in a list of `BaseMessage`, returns a `BaseMessage`.
-
-The input types for these methods are actually more general than this, but for simplicity here we can assume LLMs only take strings and Chat models only takes lists of messages.
-Check out the "Go deeper" section below to learn more about model invocation.
+The standard interface that LangChain provides has two methods:
+- `predict`: Takes in a string, returns a string
+- `predict_messages`: Takes in a list of messages, returns a message.

 Let's see how to work with these different types of models and these different types of inputs.
 First, let's import an LLM and a ChatModel.
@@ -133,36 +112,50 @@ from langchain.chat_models import ChatOpenAI

 llm = OpenAI()
 chat_model = ChatOpenAI()
+
+llm.predict("hi!")
+>>> "Hi"
+
+chat_model.predict("hi!")
+>>> "Hi"
 ```

-`LLM` and `ChatModel` objects are effectively configuration objects.
+The `OpenAI` and `ChatOpenAI` objects are basically just configuration objects.
 You can initialize them with parameters like `temperature` and others, and pass them around.

+Next, let's use the `predict` method to run over a string input.
+
+```python
+text = "What would be a good company name for a company that makes colorful socks?"
+
+llm.predict(text)
+# >> Feetful of Fun
+
+chat_model.predict(text)
+# >> Socks O'Color
+```
+
+Finally, let's use the `predict_messages` method to run over a list of messages.
+
 ```python
 from langchain.schema import HumanMessage

 text = "What would be a good company name for a company that makes colorful socks?"
 messages = [HumanMessage(content=text)]

-llm.invoke(text)
+llm.predict_messages(messages)
 # >> Feetful of Fun

-chat_model.invoke(messages)
-# >> AIMessage(content="Socks O'Color")
+chat_model.predict_messages(messages)
+# >> Socks O'Color
 ```

-<details> <summary>Go deeper</summary>
+For both these methods, you can also pass in parameters as keyword arguments.
+For example, you could pass in `temperature=0` to adjust the temperature that is used from what the object was configured with.
+Whatever values are passed in during run time will always override what the object was configured with.

-`LLM.invoke` and `ChatModel.invoke` actually both support as input any of `Union[str, List[BaseMessage], PromptValue]`.
-`PromptValue` is an object that defines it's own custom logic for returning it's inputs either as a string or as messages.
-`LLM`s have logic for coercing any of these into a string, and `ChatModel`s have logic for coercing any of these to messages.
-The fact that `LLM` and `ChatModel` accept the same inputs means that you can directly swap them for one another in most chains without breaking anything,
-though it's of course important to think about how inputs are being coerced and how that may affect model performance.
-To dive deeper on models head to the [Language models](/docs/modules/model_io/models) section.

-</details>
-
-### Prompt templates
+## Prompt templates

 Most LLM applications do not pass user input directly into an LLM. Usually they will add the user input to a larger piece of text, called a prompt template, that provides additional context on the specific task at hand.

@@ -188,10 +181,10 @@ You can "partial" out variables - e.g. you can format only some of the variables
 You can compose them together, easily combining different templates into a single prompt.
 For explanations of these functionalities, see the [section on prompts](/docs/modules/model_io/prompts) for more detail.

-`PromptTemplate`s can also be used to produce a list of messages.
+PromptTemplates can also be used to produce a list of messages.
 In this case, the prompt not only contains information about the content, but also each message (its role, its position in the list, etc.).
-Here, what happens most often is a `ChatPromptTemplate` is a list of `ChatMessageTemplates`.
-Each `ChatMessageTemplate` contains instructions for how to format that `ChatMessage` - its role, and then also its content.
+Here, what happens most often is a ChatPromptTemplate is a list of ChatMessageTemplates.
+Each ChatMessageTemplate contains instructions for how to format that ChatMessage - its role, and then also its content.
 Let's take a look at this below:

 ```python
@@ -218,13 +211,13 @@ chat_prompt.format_messages(input_language="English", output_language="French",

 ChatPromptTemplates can also be constructed in other ways - see the [section on prompts](/docs/modules/model_io/prompts) for more detail.

-### Output parsers
+## Output parsers

-`OutputParsers` convert the raw output of a language model into a format that can be used downstream.
-There are few main types of `OutputParser`s, including:
+OutputParsers convert the raw output of an LLM into a format that can be used downstream.
+There are few main types of OutputParsers, including:

- Convert text from `LLM` into structured information (e.g. JSON)
- Convert a `ChatMessage` into just a string
+- Convert text from LLM into structured information (e.g. JSON)
+- Convert a ChatMessage into just a string
 - Convert the extra information returned from a call besides the message (like OpenAI function invocation) into a string.

 For full information on this, see the [section on output parsers](/docs/modules/model_io/output_parsers).
@@ -246,7 +239,7 @@ CommaSeparatedListOutputParser().parse("hi, bye")
 # >> ['hi', 'bye']
 ```

-### Composing with LCEL
+## PromptTemplate + LLM + OutputParser

 We can now combine all these into one chain.
 This chain will take input variables, pass those to a prompt template to create a prompt, pass the prompt to a language model, and then pass the output through an (optional) output parser.
@@ -254,17 +247,15 @@ This is a convenient way to bundle up a modular piece of logic.
 Let's see it in action!

 ```python
-from typing import List
-
 from langchain.chat_models import ChatOpenAI
-from langchain.prompts import ChatPromptTemplate
+from langchain.prompts.chat import ChatPromptTemplate
 from langchain.schema import BaseOutputParser

-class CommaSeparatedListOutputParser(BaseOutputParser[List[str]]):
+class CommaSeparatedListOutputParser(BaseOutputParser):
    """Parse the output of an LLM call to a comma-separated list."""


-    def parse(self, text: str) -> List[str]:
+    def parse(self, text: str):
        """Parse the output of an LLM call."""
        return text.strip().split(", ")

@@ -282,118 +273,20 @@ chain.invoke({"text": "colors"})
 # >> ['red', 'blue', 'green', 'yellow', 'orange']
 ```

+
 Note that we are using the `|` syntax to join these components together.
-This `|` syntax is powered by the LangChain Expression Language (LCEL) and relies on the universal `Runnable` interface that all of these objects implement.
-To learn more about LCEL, read the documentation [here](/docs/expression_language).
-
-## Tracing with LangSmith
-
-Assuming we've set our environment variables as shown in the beginning, all of the model and chain calls we've been making will have been automatically logged to LangSmith.
-Once there, we can use LangSmith to debug and annotate our application traces, then turn them into datasets for evaluating future iterations of the application.
-
-Check out what the trace for the above chain would look like:
-https://smith.langchain.com/public/09370280-4330-4eb4-a7e8-c91817f6aa13/r
-
-For more on LangSmith [head here](/docs/langsmith/).
-
-## Serving with LangServe
-
-Now that we've built an application, we need to serve it. That's where LangServe comes in.
-LangServe helps developers deploy LCEL chains as a REST API.
-The library is integrated with FastAPI and uses pydantic for data validation.
-
-### Server
-
-To create a server for our application we'll make a `serve.py` file with three things:
-1. The definition of our chain (same as above)
-2. Our FastAPI app
-3. A definition of a route from which to serve the chain, which is done with `langserve.add_routes`
-
-```python
-#!/usr/bin/env python
-from typing import List
-
-from fastapi import FastAPI
-from langchain.prompts import ChatPromptTemplate
-from langchain.chat_models import ChatOpenAI
-from langchain.schema import BaseOutputParser
-from langserve import add_routes
-
-# 1. Chain definition
-
-class CommaSeparatedListOutputParser(BaseOutputParser[List[str]]):
-    """Parse the output of an LLM call to a comma-separated list."""
-
-
-    def parse(self, text: str) -> List[str]:
-        """Parse the output of an LLM call."""
-        return text.strip().split(", ")
-
-template = """You are a helpful assistant who generates comma separated lists.
-A user will pass in a category, and you should generate 5 objects in that category in a comma separated list.
-ONLY return a comma separated list, and nothing more."""
-human_template = "{text}"
-
-chat_prompt = ChatPromptTemplate.from_messages([
-    ("system", template),
-    ("human", human_template),
-])
-category_chain = chat_prompt | ChatOpenAI() | CommaSeparatedListOutputParser()
-
-# 2. App definition
-app = FastAPI(
-  title="LangChain Server",
-  version="1.0",
-  description="A simple api server using Langchain's Runnable interfaces",
-)
-
-# 3. Adding chain route
-add_routes(
-    app,
-    category_chain,
-    path="/category_chain",
-)
-
-if __name__ == "__main__":
-    import uvicorn
-
-    uvicorn.run(app, host="localhost", port=8000)
-```
-
-And that's it! If we execute this file:
-```bash
-python serve.py
-```
-we should see our chain being served at localhost:8000.
-
-### Playground
-
-Every LangServe service comes with a simple built-in UI for configuring and invoking the application with streaming output and visibility into intermediate steps.
-Head to http://localhost:8000/category_chain/playground/ to try it out!
-
-### Client
-
-Now let's set up a client for programmatically interacting with our service. We can easily do this with the `langserve.RemoteRunnable`.
-Using this, we can interact with the served chain as if it were running client-side.
-
-```python
-from langserve import RemoteRunnable
-
-remote_chain = RemoteRunnable("http://localhost:8000/category_chain/")
-remote_chain.invoke({"text": "colors"})
-# >> ['red', 'blue', 'green', 'yellow', 'orange']
-```
-
-To learn more about the many other features of LangServe [head here](/docs/langserve).
+This `|` syntax is called the LangChain Expression Language.
+To learn more about this syntax, read the documentation [here](/docs/expression_language).

 ## Next steps

-We've touched on how to build an application with LangChain, how to trace it with LangSmith, and how to serve it with LangServe.
-There are a lot more features in all three of these than we can cover here.
+This is it!
+We've now gone over how to create the core building block of LangChain applications.
+There is a lot more nuance in all these components (LLMs, prompts, output parsers) and a lot more different components to learn about as well.
 To continue on your journey:

- Read up on [LangChain Expression Language (LCEL)](/docs/expression_language) to learn how to chain these components together
- [Dive deeper](/docs/modules/model_io) into LLMs, prompts, and output parsers and learn the other [key components](/docs/modules)
- Explore common [end-to-end use cases](/docs/use_cases/qa_structured/sql) and [template applications](/docs/templates)
- [Read up on LangSmith](/docs/langsmith/), the platform for debugging, testing, monitoring and more
- Learn more about serving your applications with [LangServe](/docs/langserve)
+- [Dive deeper](/docs/modules/model_io) into LLMs, prompts, and output parsers
+- Learn the other [key components](/docs/modules)
+- Read up on [LangChain Expression Language](/docs/expression_language) to learn how to chain these components together
+- Check out our [helpful guides](/docs/guides) for detailed walkthroughs on particular topics
+- Explore [end-to-end use cases](/docs/use_cases/qa_structured/sql)
--- a/docs/docs/guides/debugging.md
+++ b/docs/docs/guides/debugging.md
@@ -8,7 +8,7 @@ Here are a few different tools and functionalities to aid in debugging.

 ## Tracing

-Platforms with tracing capabilities like [LangSmith](/docs/langsmith/) and [WandB](/docs/integrations/providers/wandb_tracing) are the most comprehensive solutions for debugging. These platforms make it easy to not only log and visualize LLM apps, but also to actively debug, test and refine them.
+Platforms with tracing capabilities like [LangSmith](/docs/guides/langsmith/) and [WandB](/docs/integrations/providers/wandb_tracing) are the most comprehensive solutions for debugging. These platforms make it easy to not only log and visualize LLM apps, but also to actively debug, test and refine them.

 For anyone building production-grade LLM applications, we highly recommend using a platform like this.

--- a/docs/docs/integrations/callbacks/trubrics.ipynb
+++ b/docs/docs/integrations/callbacks/trubrics.ipynb
@@ -113,7 +113,7 @@
    "tags": []
   },
   "source": [
-    "Here are two examples of how to use the `TrubricsCallbackHandler` with Langchain [LLMs](https://python.langchain.com/docs/modules/model_io/llms/) or [Chat Models](https://python.langchain.com/docs/modules/model_io/chat/). We will use OpenAI models, so set your `OPENAI_API_KEY` key here:"
+    "Here are two examples of how to use the `TrubricsCallbackHandler` with Langchain [LLMs](https://python.langchain.com/docs/modules/model_io/models/llms/) or [Chat Models](https://python.langchain.com/docs/modules/model_io/models/chat/). We will use OpenAI models, so set your `OPENAI_API_KEY` key here:"
   ]
  },
  {
--- a/docs/docs/integrations/chat/azure_chat_openai.ipynb
+++ b/docs/docs/integrations/chat/azure_chat_openai.ipynb
@@ -5,20 +5,18 @@
   "id": "38f26d7a",
   "metadata": {},
   "source": [
-    "# Azure OpenAI\n",
+    "# Azure\n",
    "\n",
-    "This notebook goes over how to connect to an Azure hosted OpenAI endpoint. We recommend having version `openai>=1` installed."
+    "This notebook goes over how to connect to an Azure hosted OpenAI endpoint"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
   "id": "96164b42",
   "metadata": {},
   "outputs": [],
   "source": [
-    "import os\n",
-    "\n",
    "from langchain.chat_models import AzureChatOpenAI\n",
    "from langchain.schema import HumanMessage"
   ]
@@ -26,51 +24,57 @@
  {
   "cell_type": "code",
   "execution_count": 4,
-   "id": "cbe4bb58-ba13-4355-8af9-cd990dc47a64",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "os.environ[\"AZURE_OPENAI_API_KEY\"] = \"...\"\n",
-    "os.environ[\"AZURE_OPENAI_ENDPOINT\"] = \"https://<your-endpoint>.openai.azure.com/\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
   "id": "8161278f",
   "metadata": {},
   "outputs": [],
   "source": [
+    "BASE_URL = \"https://${TODO}.openai.azure.com\"\n",
+    "API_KEY = \"...\"\n",
+    "DEPLOYMENT_NAME = \"chat\"\n",
    "model = AzureChatOpenAI(\n",
+    "    openai_api_base=BASE_URL,\n",
    "    openai_api_version=\"2023-05-15\",\n",
-    "    azure_deployment=\"your-deployment-name\",\n",
+    "    deployment_name=DEPLOYMENT_NAME,\n",
+    "    openai_api_key=API_KEY,\n",
+    "    openai_api_type=\"azure\",\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 5,
   "id": "99509140",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "AIMessage(content=\"J'adore la programmation.\")"
+       "AIMessage(content=\"\\n\\nJ'aime programmer.\", additional_kwargs={})"
      ]
     },
-     "execution_count": 15,
+     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
-    "message = HumanMessage(\n",
-    "    content=\"Translate this sentence from English to French. I love programming.\"\n",
-    ")\n",
-    "model([message])"
+    "model(\n",
+    "    [\n",
+    "        HumanMessage(\n",
+    "            content=\"Translate this sentence from English to French. I love programming.\"\n",
+    "        )\n",
+    "    ]\n",
+    ")"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3b6e9376",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
  {
   "cell_type": "markdown",
   "id": "f27fa24d",
@@ -84,7 +88,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
   "id": "0531798a",
   "metadata": {},
   "outputs": [],
@@ -94,19 +98,48 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 14,
+   "id": "3fd97dfc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "BASE_URL = \"https://{endpoint}.openai.azure.com\"\n",
+    "API_KEY = \"...\"\n",
+    "DEPLOYMENT_NAME = \"gpt-35-turbo\"  # in Azure, this deployment has version 0613 - input and output tokens are counted separately"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
   "id": "aceddb72",
   "metadata": {
    "scrolled": true
   },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Total Cost (USD): $0.000054\n"
+     ]
+    }
+   ],
   "source": [
    "model = AzureChatOpenAI(\n",
+    "    openai_api_base=BASE_URL,\n",
    "    openai_api_version=\"2023-05-15\",\n",
-    "    azure_deployment=\"gpt-35-turbo\",  # in Azure, this deployment has version 0613 - input and output tokens are counted separately\n",
+    "    deployment_name=DEPLOYMENT_NAME,\n",
+    "    openai_api_key=API_KEY,\n",
+    "    openai_api_type=\"azure\",\n",
    ")\n",
    "with get_openai_callback() as cb:\n",
-    "    model([message])\n",
+    "    model(\n",
+    "        [\n",
+    "            HumanMessage(\n",
+    "                content=\"Translate this sentence from English to French. I love programming.\"\n",
+    "            )\n",
+    "        ]\n",
+    "    )\n",
    "    print(\n",
    "        f\"Total Cost (USD): ${format(cb.total_cost, '.6f')}\"\n",
    "    )  # without specifying the model version, flat-rate 0.002 USD per 1k input and output tokens is used"
@@ -136,12 +169,21 @@
   ],
   "source": [
    "model0613 = AzureChatOpenAI(\n",
+    "    openai_api_base=BASE_URL,\n",
    "    openai_api_version=\"2023-05-15\",\n",
-    "    deployment_name=\"gpt-35-turbo,\n",
+    "    deployment_name=DEPLOYMENT_NAME,\n",
+    "    openai_api_key=API_KEY,\n",
+    "    openai_api_type=\"azure\",\n",
    "    model_version=\"0613\",\n",
    ")\n",
    "with get_openai_callback() as cb:\n",
-    "    model0613([message])\n",
+    "    model0613(\n",
+    "        [\n",
+    "            HumanMessage(\n",
+    "                content=\"Translate this sentence from English to French. I love programming.\"\n",
+    "            )\n",
+    "        ]\n",
+    "    )\n",
    "    print(f\"Total Cost (USD): ${format(cb.total_cost, '.6f')}\")"
   ]
  },
@@ -170,7 +212,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.1"
+   "version": "3.8.10"
  }
 },
 "nbformat": 4,
--- a/docs/docs/integrations/document_loaders/docusaurus.ipynb
+++ b/docs/docs/integrations/document_loaders/docusaurus.ipynb
--- a/docs/docs/integrations/llms/huggingface_pipelines.ipynb
+++ b/docs/docs/integrations/llms/huggingface_pipelines.ipynb
@@ -1,218 +1,214 @@
 {
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "id": "959300d4",
-      "metadata": {},
-      "source": [
-        "# Hugging Face Local Pipelines\n",
-        "\n",
-        "Hugging Face models can be run locally through the `HuggingFacePipeline` class.\n",
-        "\n",
-        "The [Hugging Face Model Hub](https://huggingface.co/models) hosts over 120k models, 20k datasets, and 50k demo apps (Spaces), all open source and publicly available, in an online platform where people can easily collaborate and build ML together.\n",
-        "\n",
-        "These can be called from LangChain either through this local pipeline wrapper or by calling their hosted inference endpoints through the HuggingFaceHub class. For more information on the hosted pipelines, see the [HuggingFaceHub](huggingface_hub.html) notebook."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "id": "4c1b8450-5eaf-4d34-8341-2d785448a1ff",
-      "metadata": {
-        "tags": []
-      },
-      "source": [
-        "To use, you should have the ``transformers`` python [package installed](https://pypi.org/project/transformers/), as well as [pytorch](https://pytorch.org/get-started/locally/). You can also install `xformer` for a more memory-efficient attention implementation."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "id": "d772b637-de00-4663-bd77-9bc96d798db2",
-      "metadata": {
-        "tags": []
-      },
-      "outputs": [],
-      "source": [
-        "%pip install transformers --quiet"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "id": "91ad075f-71d5-4bc8-ab91-cc0ad5ef16bb",
-      "metadata": {},
-      "source": [
-        "### Model Loading\n",
-        "\n",
-        "Models can be loaded by specifying the model parameters using the `from_model_id` method."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "id": "165ae236-962a-4763-8052-c4836d78a5d2",
-      "metadata": {
-        "tags": []
-      },
-      "outputs": [],
-      "source": [
-        "from langchain.llms.huggingface_pipeline import HuggingFacePipeline\n",
-        "\n",
-        "hf = HuggingFacePipeline.from_model_id(\n",
-        "    model_id=\"gpt2\",\n",
-        "    task=\"text-generation\",\n",
-        "    pipeline_kwargs={\"max_new_tokens\": 10},\n",
-        ")"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "id": "00104b27-0c15-4a97-b198-4512337ee211",
-      "metadata": {},
-      "source": [
-        "They can also be loaded by passing in an existing `transformers` pipeline directly"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "from langchain.llms.huggingface_pipeline import HuggingFacePipeline\n",
-        "from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline\n",
-        "\n",
-        "model_id = \"gpt2\"\n",
-        "tokenizer = AutoTokenizer.from_pretrained(model_id)\n",
-        "model = AutoModelForCausalLM.from_pretrained(model_id)\n",
-        "pipe = pipeline(\"text-generation\", model=model, tokenizer=tokenizer, max_new_tokens=10)\n",
-        "hf = HuggingFacePipeline(pipeline=pipe)"
-      ],
-      "id": "7f426a4f"
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "### Create Chain\n",
-        "\n",
-        "With the model loaded into memory, you can compose it with a prompt to\n",
-        "form a chain."
-      ],
-      "id": "60e7ba8d"
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "id": "3acf0069",
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "from langchain.prompts import PromptTemplate\n",
-        "\n",
-        "template = \"\"\"Question: {question}\n",
-        "\n",
-        "Answer: Let's think step by step.\"\"\"\n",
-        "prompt = PromptTemplate.from_template(template)\n",
-        "\n",
-        "chain = prompt | hf\n",
-        "\n",
-        "question = \"What is electroencephalography?\"\n",
-        "\n",
-        "print(chain.invoke({\"question\": question}))"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "id": "dbbc3a37",
-      "metadata": {},
-      "source": [
-        "### GPU Inference\n",
-        "\n",
-        "When running on a machine with GPU, you can specify the `device=n` parameter to put the model on the specified device.\n",
-        "Defaults to `-1` for CPU inference.\n",
-        "\n",
-        "If you have multiple-GPUs and/or the model is too large for a single GPU, you can specify `device_map=\"auto\"`, which requires and uses the [Accelerate](https://huggingface.co/docs/accelerate/index) library to automatically determine how to load the model weights. \n",
-        "\n",
-        "*Note*: both `device` and `device_map` should not be specified together and can lead to unexpected behavior."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "gpu_llm = HuggingFacePipeline.from_model_id(\n",
-        "    model_id=\"gpt2\",\n",
-        "    task=\"text-generation\",\n",
-        "    device=0,  # replace with device_map=\"auto\" to use the accelerate library.\n",
-        "    pipeline_kwargs={\"max_new_tokens\": 10},\n",
-        ")\n",
-        "\n",
-        "gpu_chain = prompt | gpu_llm\n",
-        "\n",
-        "question = \"What is electroencephalography?\"\n",
-        "\n",
-        "print(gpu_chain.invoke({\"question\": question}))"
-      ],
-      "id": "703c91c8"
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "### Batch GPU Inference\n",
-        "\n",
-        "If running on a device with GPU, you can also run inference on the GPU in batch mode."
-      ],
-      "id": "59276016"
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "id": "097ba62f",
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "gpu_llm = HuggingFacePipeline.from_model_id(\n",
-        "    model_id=\"bigscience/bloom-1b7\",\n",
-        "    task=\"text-generation\",\n",
-        "    device=0,  # -1 for CPU\n",
-        "    batch_size=2,  # adjust as needed based on GPU map and model size.\n",
-        "    model_kwargs={\"temperature\": 0, \"max_length\": 64},\n",
-        ")\n",
-        "\n",
-        "gpu_chain = prompt | gpu_llm.bind(stop=[\"\\n\\n\"])\n",
-        "\n",
-        "questions = []\n",
-        "for i in range(4):\n",
-        "    questions.append({\"question\": f\"What is the number {i} in french?\"})\n",
-        "\n",
-        "answers = gpu_chain.batch(questions)\n",
-        "for answer in answers:\n",
-        "    print(answer)"
-      ]
-    }
-  ],
-  "metadata": {
-    "kernelspec": {
-      "display_name": "Python 3 (ipykernel)",
-      "language": "python",
-      "name": "python3"
-    },
-    "language_info": {
-      "codemirror_mode": {
-        "name": "ipython",
-        "version": 3
-      },
-      "file_extension": ".py",
-      "mimetype": "text/x-python",
-      "name": "python",
-      "nbconvert_exporter": "python",
-      "pygments_lexer": "ipython3",
-      "version": "3.10.5"
-    }
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "959300d4",
+   "metadata": {},
+   "source": [
+    "# Hugging Face Local Pipelines\n",
+    "\n",
+    "Hugging Face models can be run locally through the `HuggingFacePipeline` class.\n",
+    "\n",
+    "The [Hugging Face Model Hub](https://huggingface.co/models) hosts over 120k models, 20k datasets, and 50k demo apps (Spaces), all open source and publicly available, in an online platform where people can easily collaborate and build ML together.\n",
+    "\n",
+    "These can be called from LangChain either through this local pipeline wrapper or by calling their hosted inference endpoints through the HuggingFaceHub class. For more information on the hosted pipelines, see the [HuggingFaceHub](huggingface_hub.html) notebook."
+   ]
  },
-  "nbformat": 4,
-  "nbformat_minor": 5
-}
+  {
+   "cell_type": "markdown",
+   "id": "4c1b8450-5eaf-4d34-8341-2d785448a1ff",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "To use, you should have the ``transformers`` python [package installed](https://pypi.org/project/transformers/), as well as [pytorch](https://pytorch.org/get-started/locally/). You can also install `xformer` for a more memory-efficient attention implementation."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d772b637-de00-4663-bd77-9bc96d798db2",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "%pip install transformers --quiet"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "91ad075f-71d5-4bc8-ab91-cc0ad5ef16bb",
+   "metadata": {},
+   "source": [
+    "### Model Loading\n",
+    "\n",
+    "Models can be loaded by specifying the model parameters using the `from_model_id` method."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "165ae236-962a-4763-8052-c4836d78a5d2",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from langchain.llms.huggingface_pipeline import HuggingFacePipeline\n",
+    "\n",
+    "hf = HuggingFacePipeline.from_model_id(\n",
+    "    model_id=\"gpt2\",\n",
+    "    task=\"text-generation\",\n",
+    "    pipeline_kwargs={\"max_new_tokens\": 10},\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "00104b27-0c15-4a97-b198-4512337ee211",
+   "metadata": {},
+   "source": [
+    "They can also be loaded by passing in an existing `transformers` pipeline directly"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.llms.huggingface_pipeline import HuggingFacePipeline\n",
+    "from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline\n",
+    "\n",
+    "model_id = \"gpt2\"\n",
+    "tokenizer = AutoTokenizer.from_pretrained(model_id)\n",
+    "model = AutoModelForCausalLM.from_pretrained(model_id)\n",
+    "pipe = pipeline(\"text-generation\", model=model, tokenizer=tokenizer, max_new_tokens=10)\n",
+    "hf = HuggingFacePipeline(pipeline=pipe)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Create Chain\n",
+    "\n",
+    "With the model loaded into memory, you can compose it with a prompt to\n",
+    "form a chain."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3acf0069",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.prompts import PromptTemplate\n",
+    "\n",
+    "template = \"\"\"Question: {question}\n",
+    "\n",
+    "Answer: Let's think step by step.\"\"\"\n",
+    "prompt = PromptTemplate.from_template(template)\n",
+    "\n",
+    "chain = prompt | hf\n",
+    "\n",
+    "question = \"What is electroencephalography?\"\n",
+    "\n",
+    "print(chain.invoke({\"question\": question}))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "dbbc3a37",
+   "metadata": {},
+   "source": [
+    "### GPU Inference\n",
+    "\n",
+    "When running on a machine with GPU, you can specify the `device=n` parameter to put the model on the specified device.\n",
+    "Defaults to `-1` for CPU inference.\n",
+    "\n",
+    "If you have multiple-GPUs and/or the model is too large for a single GPU, you can specify `device_map=\"auto\"`, which requires and uses the [Accelerate](https://huggingface.co/docs/accelerate/index) library to automatically determine how to load the model weights. \n",
+    "\n",
+    "*Note*: both `device` and `device_map` should not be specified together and can lead to unexpected behavior."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "gpu_llm = HuggingFacePipeline.from_model_id(\n",
+    "    model_id=\"gpt2\",\n",
+    "    task=\"text-generation\",\n",
+    "    device=0,  # replace with device_map=\"auto\" to use the accelerate library.\n",
+    "    pipeline_kwargs={\"max_new_tokens\": 10},\n",
+    ")\n",
+    "\n",
+    "gpu_chain = prompt | gpu_llm\n",
+    "\n",
+    "question = \"What is electroencephalography?\"\n",
+    "\n",
+    "print(gpu_chain.invoke({\"question\": question}))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Batch GPU Inference\n",
+    "\n",
+    "If running on a device with GPU, you can also run inference on the GPU in batch mode."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "097ba62f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "gpu_llm = HuggingFacePipeline.from_model_id(\n",
+    "    model_id=\"bigscience/bloom-1b7\",\n",
+    "    task=\"text-generation\",\n",
+    "    device=0,  # -1 for CPU\n",
+    "    batch_size=2,  # adjust as needed based on GPU map and model size.\n",
+    "    model_kwargs={\"temperature\": 0, \"max_length\": 64},\n",
+    ")\n",
+    "\n",
+    "gpu_chain = prompt | gpu_llm.bind(stop=[\"\\n\\n\"])\n",
+    "\n",
+    "questions = []\n",
+    "for i in range(4):\n",
+    "    questions.append({\"question\": f\"What is the number {i} in french?\"})\n",
+    "\n",
+    "answers = gpu_chain.batch(questions)\n",
+    "for answer in answers:\n",
+    "    print(answer)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/docs/docs/integrations/llms/replicate.ipynb
+++ b/docs/docs/integrations/llms/replicate.ipynb
@@ -288,7 +288,7 @@
   "metadata": {},
   "source": [
    "## Streaming Response\n",
-    "You can optionally stream the response as it is produced, which is helpful to show interactivity to users for time-consuming generations. See detailed docs on [Streaming](https://python.langchain.com/docs/modules/model_io/llms/how_to/streaming_llm) for more information."
+    "You can optionally stream the response as it is produced, which is helpful to show interactivity to users for time-consuming generations. See detailed docs on [Streaming](https://python.langchain.com/docs/modules/model_io/models/llms/how_to/streaming_llm) for more information."
   ]
  },
  {
--- a/docs/docs/integrations/memory/neo4j_chat_message_history.ipynb
+++ b/docs/docs/integrations/memory/neo4j_chat_message_history.ipynb
@@ -1,76 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "91c6a7ef",
-   "metadata": {},
-   "source": [
-    "# Neo4j\n",
-    "\n",
-    "[Neo4j](https://en.wikipedia.org/wiki/Neo4j) is an open-source graph database management system, renowned for its efficient management of highly connected data. Unlike traditional databases that store data in tables, Neo4j uses a graph structure with nodes, edges, and properties to represent and store data. This design allows for high-performance queries on complex data relationships.\n",
-    "\n",
-    "This notebook goes over how to use `Neo4j` to store chat message history."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d15e3302",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain.memory import Neo4jChatMessageHistory\n",
-    "\n",
-    "history = Neo4jChatMessageHistory(\n",
-    "    url=\"bolt://localhost:7687\",\n",
-    "    username=\"neo4j\",\n",
-    "    password=\"password\",\n",
-    "    session_id=\"session_id_1\",\n",
-    ")\n",
-    "\n",
-    "history.add_user_message(\"hi!\")\n",
-    "\n",
-    "history.add_ai_message(\"whats up?\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "64fc465e",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "history.messages"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "8af285f8",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.8.8"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
--- a/docs/docs/integrations/platforms/microsoft.mdx
+++ b/docs/docs/integrations/platforms/microsoft.mdx
@@ -2,7 +2,7 @@

 All functionality related to `Microsoft Azure` and other `Microsoft` products.

-## Chat Models
+## LLM
 ### Azure OpenAI

 >[Microsoft Azure](https://en.wikipedia.org/wiki/Microsoft_Azure), often referred to as `Azure` is a cloud computing platform run by `Microsoft`, which offers access, management, and development of applications and services through global data centers. It provides a range of capabilities, including software as a service (SaaS), platform as a service (PaaS), and infrastructure as a service (IaaS). `Microsoft Azure` supports many programming languages, tools, and frameworks, including Microsoft-specific and third-party software and systems.
@@ -18,15 +18,16 @@ Set the environment variables to get access to the `Azure OpenAI` service.
 ```python
 import os

-os.environ["AZURE_OPENAI_ENDPOINT"] = "https://<your-endpoint.openai.azure.com/"
-os.environ["AZURE_OPENAI_API_KEY"] = "your AzureOpenAI key"
+os.environ["OPENAI_API_TYPE"] = "azure"
+os.environ["OPENAI_API_BASE"] = "https://<your-endpoint.openai.azure.com/"
+os.environ["OPENAI_API_KEY"] = "your AzureOpenAI key"
+os.environ["OPENAI_API_VERSION"] = "2023-05-15"
 ```

-See a [usage example](/docs/integrations/chat/azure_chat_openai)
-
+See a [usage example](/docs/integrations/llms/azure_openai_example).

 ```python
-from langchain.chat_models import AzureChatOpenAI
+from langchain.llms import AzureOpenAI
 ```

 ## Text Embedding Models
@@ -35,16 +36,16 @@ from langchain.chat_models import AzureChatOpenAI
 See a [usage example](/docs/integrations/text_embedding/azureopenai)

 ```python
-from langchain.embeddings import AzureOpenAIEmbeddings
+from langchain.embeddings import OpenAIEmbeddings
 ```

-## LLMs
+## Chat Models
 ### Azure OpenAI

-See a [usage example](/docs/integrations/llms/azure_openai_example).
+See a [usage example](/docs/integrations/chat/azure_chat_openai)

 ```python
-from langchain.llms import AzureOpenAI
+from langchain.chat_models import AzureChatOpenAI
 ```

 ## Document loaders
--- a/docs/docs/integrations/providers/fireworks.md
+++ b/docs/docs/integrations/providers/fireworks.md
@@ -1,47 +1,22 @@
 # Fireworks

-This page covers how to use [Fireworks](https://app.fireworks.ai/) models within
-Langchain.
+This page covers how to use the Fireworks models within Langchain.

-## Installation and setup
+## Installation and Setup

- Install the Fireworks client library.
-
-  ```
-  pip install fireworks-ai
-  ```
-
- Get a Fireworks API key by signing up at [app.fireworks.ai](https://app.fireworks.ai).
+- To use the Fireworks model, you need to have a Fireworks API key. To generate one, sign up at [app.fireworks.ai](https://app.fireworks.ai).
 - Authenticate by setting the FIREWORKS_API_KEY environment variable.

-## Authentication
+## LLM

-There are two ways to authenticate using your Fireworks API key:
+Fireworks integrates with Langchain through the LLM module, which allows for standardized usage of any models deployed on the Fireworks models.

-1.  Setting the `FIREWORKS_API_KEY` environment variable.
-
-    ```python
-    os.environ["FIREWORKS_API_KEY"] = "<KEY>"
-    ```
-
-2.  Setting `fireworks_api_key` field in the Fireworks LLM module.
-
-    ```python
-    llm = Fireworks(fireworks_api_key="<KEY>")
-    ```
-
-## Using the Fireworks LLM module
-
-Fireworks integrates with Langchain through the LLM module. In this example, we
-will work the llama-v2-13b-chat model. 
+In this example, we'll work the llama-v2-13b-chat model. 

 ```python
 from langchain.llms.fireworks import Fireworks 

-llm = Fireworks(
-    fireworks_api_key="<KEY>",
-    model="accounts/fireworks/models/llama-v2-13b-chat",
-    max_tokens=256)
+llm = Fireworks(model="fireworks-llama-v2-13b-chat", max_tokens=256, temperature=0.4)
 llm("Name 3 sports.")
 ```

--- a/docs/docs/integrations/providers/minimax.mdx
+++ b/docs/docs/integrations/providers/minimax.mdx
@@ -11,7 +11,7 @@ Get a [Minimax group id](https://api.minimax.chat/user-center/basic-information)
 ## LLM

 There exists a Minimax LLM wrapper, which you can access with
-See a [usage example](/docs/modules/model_io/llms/integrations/minimax).
+See a [usage example](/docs/modules/model_io/models/llms/integrations/minimax).

 ```python
 from langchain.llms import Minimax
@@ -19,7 +19,7 @@ from langchain.llms import Minimax

 ## Chat Models

-See a [usage example](/docs/modules/model_io/chat/integrations/minimax)
+See a [usage example](/docs/modules/model_io/models/chat/integrations/minimax)

 ```python
 from langchain.chat_models import MiniMaxChat
--- a/docs/docs/integrations/providers/motherduck.mdx
+++ b/docs/docs/integrations/providers/motherduck.mdx
@@ -46,6 +46,6 @@ eng = sqlalchemy.create_engine(conn_str)
 set_llm_cache(SQLAlchemyCache(engine=eng))
 ```

-From here, see the [LLM Caching](/docs/modules/model_io/llms/how_to/llm_caching) documentation on how to use.
+From here, see the [LLM Caching](/docs/modules/model_io/models/llms/how_to/llm_caching) documentation on how to use.


--- a/docs/docs/integrations/retrievers/fleet_context.ipynb
+++ b/docs/docs/integrations/retrievers/fleet_context.ipynb
@@ -19,7 +19,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "!pip install langchain fleet-context openai pandas faiss-cpu # faiss-gpu for CUDA supported GPU"
+    "!pip install langchain openai pandas faiss-cpu # faiss-gpu for CUDA supported GPU"
   ]
  },
  {
@@ -43,25 +43,29 @@
    "\n",
    "\n",
    "def load_fleet_retriever(\n",
-    "    df: pd.DataFrame,\n",
+    "    url: str,\n",
    "    *,\n",
-    "    vectorstore_cls: Type[VectorStore] = FAISS,\n",
-    "    docstore: Optional[BaseStore] = None,\n",
-    "    **kwargs: Any,\n",
+    "    vectorstore_cls: Type[VectorStore]=FAISS, \n",
+    "    docstore: Optional[BaseStore]=None,\n",
+    "    **kwargs: Any\n",
    "):\n",
+    "    df = pd.read_parquet(url)\n",
    "    vectorstore = _populate_vectorstore(df, vectorstore_cls)\n",
    "    if docstore is None:\n",
    "        return vectorstore.as_retriever(**kwargs)\n",
    "    else:\n",
    "        _populate_docstore(df, docstore)\n",
    "        return MultiVectorRetriever(\n",
-    "            vectorstore=vectorstore, docstore=docstore, id_key=\"parent\", **kwargs\n",
+    "            vectorstore=vectorstore, \n",
+    "            docstore=docstore, \n",
+    "            id_key=\"parent\", \n",
+    "            **kwargs\n",
    "        )\n",
    "\n",
    "\n",
    "def _populate_vectorstore(\n",
    "    df: pd.DataFrame,\n",
-    "    vectorstore_cls: Type[VectorStore],\n",
+    "    vectorstore_cls: Type[VectorStore], \n",
    ") -> VectorStore:\n",
    "    if not hasattr(vectorstore_cls, \"from_embeddings\"):\n",
    "        raise ValueError(\n",
@@ -71,11 +75,11 @@
    "    texts_embeddings = []\n",
    "    metadatas = []\n",
    "    for _, row in df.iterrows():\n",
-    "        texts_embeddings.append((row.metadata[\"text\"], row[\"dense_embeddings\"]))\n",
+    "        texts_embeddings.append((row.metadata['text'], row['dense_embeddings']))\n",
    "        metadatas.append(row.metadata)\n",
    "    return vectorstore_cls.from_embeddings(\n",
-    "        texts_embeddings,\n",
-    "        OpenAIEmbeddings(model=\"text-embedding-ada-002\"),\n",
+    "        texts_embeddings, \n",
+    "        OpenAIEmbeddings(model='text-embedding-ada-002'), \n",
    "        metadatas=metadatas,\n",
    "    )\n",
    "\n",
@@ -83,19 +87,15 @@
    "def _populate_docstore(df: pd.DataFrame, docstore: BaseStore) -> None:\n",
    "    parent_docs = []\n",
    "    df = df.copy()\n",
-    "    df[\"parent\"] = df.metadata.apply(itemgetter(\"parent\"))\n",
-    "    for parent_id, group in df.groupby(\"parent\"):\n",
-    "        sorted_group = group.iloc[\n",
-    "            group.metadata.apply(itemgetter(\"section_index\")).argsort()\n",
-    "        ]\n",
-    "        text = \"\".join(sorted_group.metadata.apply(itemgetter(\"text\")))\n",
-    "        metadata = {\n",
-    "            k: sorted_group.iloc[0].metadata[k] for k in (\"title\", \"type\", \"url\")\n",
-    "        }\n",
-    "        text = metadata[\"title\"] + \"\\n\" + text\n",
-    "        metadata[\"id\"] = parent_id\n",
+    "    df['parent'] = df.metadata.apply(itemgetter('parent'))\n",
+    "    for parent_id, group in df.groupby('parent'):\n",
+    "        sorted_group = group.iloc[group.metadata.apply(itemgetter('section_index')).argsort()]\n",
+    "        text = \"\".join(sorted_group.metadata.apply(itemgetter('text')))\n",
+    "        metadata = {k: sorted_group.iloc[0].metadata[k] for k in ('title', 'type', 'url')}\n",
+    "        text = metadata['title'] + \"\\n\" + text\n",
+    "        metadata['id'] = parent_id\n",
    "        parent_docs.append(Document(page_content=text, metadata=metadata))\n",
-    "    docstore.mset(((d.metadata[\"id\"], d) for d in parent_docs))"
+    "    docstore.mset(((d.metadata['id'], d) for d in parent_docs))"
   ]
  },
  {
@@ -105,10 +105,7 @@
   "source": [
    "## Retriever chunks\n",
    "\n",
-    "As part of their embedding process, the Fleet AI team first chunked long documents before embedding them. This means the vectors correspond to sections of pages in the LangChain docs, not entire pages. By default, when we spin up a retriever from these embeddings, we'll be retrieving these embedded chunks.",
-    "\n",
-    "\n",
-    "We will be using Fleet Context's `download_embeddings()` to grab Langchain's documentation embeddings. You can view all supported libraries' documentation at https://fleet.so/context."
+    "As part of their embedding process, the Fleet AI team first chunked long documents before embedding them. This means the vectors correspond to sections of pages in the LangChain docs, not entire pages. By default, when we spin up a retriever from these embeddings, we'll be retrieving these embedded chunks:"
   ]
  },
  {
@@ -118,10 +115,9 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "from context import download_embeddings\n",
-    "\n",
-    "df = download_embeddings(\"langchain\")\n",
-    "vecstore_retriever = load_fleet_retriever(df)"
+    "vecstore_retriever = load_fleet_retriever(\n",
+    "    \"https://www.dropbox.com/scl/fi/4rescpkrg9970s3huz47l/libraries_langchain_release.parquet?rlkey=283knw4wamezfwiidgpgptkep&dl=1\",\n",
+    ")"
   ]
  },
  {
@@ -179,7 +175,7 @@
    "\n",
    "parent_retriever = load_fleet_retriever(\n",
    "    \"https://www.dropbox.com/scl/fi/4rescpkrg9970s3huz47l/libraries_langchain_release.parquet?rlkey=283knw4wamezfwiidgpgptkep&dl=1\",\n",
-    "    docstore=InMemoryStore(),\n",
+    "    docstore=InMemoryStore()\n",
    ")"
   ]
  },
@@ -229,11 +225,8 @@
    "from langchain.schema import StrOutputParser\n",
    "from langchain.schema.runnable import RunnablePassthrough\n",
    "\n",
-    "prompt = ChatPromptTemplate.from_messages(\n",
-    "    [\n",
-    "        (\n",
-    "            \"system\",\n",
-    "            \"\"\"You are a great software engineer who is very familiar \\\n",
+    "prompt = ChatPromptTemplate.from_messages([\n",
+    "    (\"system\", \"\"\"You are a great software engineer who is very familiar \\\n",
    "with Python. Given a user question or request about a new Python library called LangChain and \\\n",
    "parts of the LangChain documentation, answer the question or generate the requested code. \\\n",
    "Your answers must be accurate, should include code whenever possible, and should assume anything \\\n",
@@ -243,21 +236,17 @@
    "LangChain Documentation\n",
    "------------------\n",
    "\n",
-    "{context}\"\"\",\n",
-    "        ),\n",
-    "        (\"human\", \"{question}\"),\n",
-    "    ]\n",
-    ")\n",
+    "{context}\"\"\"),\n",
+    "    (\"human\", \"{question}\")\n",
+    "])\n",
    "\n",
    "model = ChatOpenAI(model=\"gpt-3.5-turbo-16k\")\n",
    "\n",
    "chain = (\n",
    "    {\n",
    "        \"question\": RunnablePassthrough(),\n",
-    "        \"context\": parent_retriever\n",
-    "        | (lambda docs: \"\\n\\n\".join(d.page_content for d in docs)),\n",
-    "    }\n",
-    "    | prompt\n",
+    "        \"context\": parent_retriever | (lambda docs: \"\\n\\n\".join(d.page_content for d in docs))\n",
+    "    } | prompt\n",
    "    | model\n",
    "    | StrOutputParser()\n",
    ")"
@@ -300,10 +289,8 @@
    }
   ],
   "source": [
-    "for chunk in chain.invoke(\n",
-    "    \"How do I create a FAISS vector store retriever that returns 10 documents per search query\"\n",
-    "):\n",
-    "    print(chunk, end=\"\", flush=True)"
+    "for chunk in chain.invoke(\"How do I create a FAISS vector store retriever that returns 10 documents per search query\"):\n",
+    "    print(chunk, end='', flush=True)"
   ]
  }
 ],
--- a/docs/docs/integrations/retrievers/google_drive.ipynb
+++ b/docs/docs/integrations/retrievers/google_drive.ipynb
@@ -66,26 +66,25 @@
   "id": "fa339ca0-f478-440c-ba80-0e5f41a19ce1",
   "metadata": {},
   "source": [
-    "By default, all files with these MIME types can be converted to `Document`.\n",
+    "By default, all files with these mime-type can be converted to `Document`.\n",
+    "- text/text\n",
+    "- text/plain\n",
+    "- text/html\n",
+    "- text/csv\n",
+    "- text/markdown\n",
+    "- image/png\n",
+    "- image/jpeg\n",
+    "- application/epub+zip\n",
+    "- application/pdf\n",
+    "- application/rtf\n",
+    "- application/vnd.google-apps.document (GDoc)\n",
+    "- application/vnd.google-apps.presentation (GSlide)\n",
+    "- application/vnd.google-apps.spreadsheet (GSheet)\n",
+    "- application/vnd.google.colaboratory (Notebook colab)\n",
+    "- application/vnd.openxmlformats-officedocument.presentationml.presentation (PPTX)\n",
+    "- application/vnd.openxmlformats-officedocument.wordprocessingml.document (DOCX)\n",
    "\n",
-    "- `text/text`\n",
-    "- `text/plain`\n",
-    "- `text/html`\n",
-    "- `text/csv`\n",
-    "- `text/markdown`\n",
-    "- `image/png`\n",
-    "- `image/jpeg`\n",
-    "- `application/epub+zip`\n",
-    "- `application/pdf`\n",
-    "- `application/rtf`\n",
-    "- `application/vnd.google-apps.document` (GDoc)\n",
-    "- `application/vnd.google-apps.presentation` (GSlide)\n",
-    "- `application/vnd.google-apps.spreadsheet` (GSheet)\n",
-    "- `application/vnd.google.colaboratory` (Notebook colab)\n",
-    "- `application/vnd.openxmlformats-officedocument.presentationml.presentation` (PPTX)\n",
-    "- `application/vnd.openxmlformats-officedocument.wordprocessingml.document` (DOCX)\n",
-    "\n",
-    "It's possible to update or customize this. See the documentation of `GoogleDriveRetriever`.\n",
+    "It's possible to update or customize this. See the documentation of `GDriveRetriever`.\n",
    "\n",
    "But, the corresponding packages must be installed."
   ]
@@ -122,17 +121,16 @@
   "metadata": {},
   "source": [
    "You can customize the criteria to select the files. A set of predefined filter are proposed:\n",
-    "\n",
-    "| Template                                 | Description                                                           |\n",
-    "| --------------------------------------   | --------------------------------------------------------------------- |\n",
-    "| `gdrive-all-in-folder`                   | Return all compatible files from a `folder_id`                        |\n",
-    "| `gdrive-query`                           | Search `query` in all drives                                          |\n",
-    "| `gdrive-by-name`                         | Search file with name `query`                                         |\n",
-    "| `gdrive-query-in-folder`                 | Search `query` in `folder_id` (and sub-folders in `_recursive=true`)  |\n",
-    "| `gdrive-mime-type`                       | Search a specific `mime_type`                                         |\n",
-    "| `gdrive-mime-type-in-folder`             | Search a specific `mime_type` in `folder_id`                          |\n",
-    "| `gdrive-query-with-mime-type`            | Search `query` with a specific `mime_type`                            |\n",
-    "| `gdrive-query-with-mime-type-and-folder` | Search `query` with a specific `mime_type` and in `folder_id`         |"
+    "| template                               | description                                                           |\n",
+    "| -------------------------------------- | --------------------------------------------------------------------- |\n",
+    "| gdrive-all-in-folder                   | Return all compatible files from a `folder_id`                        |\n",
+    "| gdrive-query                           | Search `query` in all drives                                          |\n",
+    "| gdrive-by-name                         | Search file with name `query`)                                        |\n",
+    "| gdrive-query-in-folder                 | Search `query` in `folder_id` (and sub-folders in `_recursive=true`)  |\n",
+    "| gdrive-mime-type                       | Search a specific `mime_type`                                         |\n",
+    "| gdrive-mime-type-in-folder             | Search a specific `mime_type` in `folder_id`                          |\n",
+    "| gdrive-query-with-mime-type            | Search `query` with a specific `mime_type`                            |\n",
+    "| gdrive-query-with-mime-type-and-folder | Search `query` with a specific `mime_type` and in `folder_id`         |"
   ]
  },
  {
--- a/docs/docs/integrations/text_embedding/azureopenai.ipynb
+++ b/docs/docs/integrations/text_embedding/azureopenai.ipynb
@@ -5,100 +5,9 @@
   "id": "c3852491",
   "metadata": {},
   "source": [
-    "# Azure OpenAI\n",
+    "# AzureOpenAI\n",
    "\n",
-    "Let's load the Azure OpenAI Embedding class with environment variables set to indicate to use Azure endpoints."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "8a6ed30d-806f-4800-b5fd-d04126be9060",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "\n",
-    "os.environ[\"AZURE_OPENAI_API_KEY\"] = \"...\"\n",
-    "os.environ[\"AZURE_OPENAI_ENDPOINT\"] = \"https://<your-endpoint>.openai.azure.com/\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "20179bc7-3f71-4909-be12-d38bce009b18",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain.embeddings import AzureOpenAIEmbeddings\n",
-    "\n",
-    "embeddings = AzureOpenAIEmbeddings(\n",
-    "    azure_deployment=\"<your-embeddings-deployment-name>\",\n",
-    "    openai_api_version=\"2023-05-15\",\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "f8cb9dca-738b-450f-9986-5c3efd3c6eb3",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "text = \"this is a test document\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "0fae0295-b117-4a5a-8b98-500c79306551",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "query_result = embeddings.embed_query(text)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "65a01ddd-0bbf-444f-a87f-93af25ef902c",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "doc_result = embeddings.embed_documents([text])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "45771052-68ca-4e03-9c4f-a0c7796d9442",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[-0.012222584727053133,\n",
-       " 0.0072103982392216145,\n",
-       " -0.014818063280923775,\n",
-       " -0.026444746872933557,\n",
-       " -0.0034330499700826883]"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "doc_result[0][:5]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "e66ec1f2-6768-4ee5-84bf-a2d76adc20c8",
-   "metadata": {},
-   "source": [
-    "## [Legacy] When using `openai<1`"
+    "Let's load the OpenAI Embedding class with environment variables set to indicate to use Azure endpoints."
   ]
  },
  {
@@ -170,9 +79,9 @@
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "poetry-venv",
+   "display_name": "Python 3 (ipykernel)",
   "language": "python",
-   "name": "poetry-venv"
+   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
--- a/docs/docs/integrations/text_embedding/fastembed.ipynb
+++ b/docs/docs/integrations/text_embedding/fastembed.ipynb
@@ -1,156 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Qdrant FastEmbed\n",
-    "\n",
-    "[FastEmbed](https://qdrant.github.io/fastembed/) is a lightweight, fast, Python library built for embedding generation. \n",
-    "\n",
-    "- Quantized model weights\n",
-    "- ONNX Runtime, no PyTorch dependency\n",
-    "- CPU-first design\n",
-    "- Data-parallelism for encoding of large datasets."
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "id": "2a773d8d",
-   "metadata": {},
-   "source": [
-    "## Dependencies\n",
-    "\n",
-    "To use FastEmbed with LangChain, install the `fastembed` Python package."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "91ea14ce-831d-409a-a88f-30353acdabd1",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "%pip install fastembed"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "id": "426f1156",
-   "metadata": {},
-   "source": [
-    "## Imports"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "3f5dc9d7-65e3-4b5b-9086-3327d016cfe0",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "from langchain.embeddings.fastembed import FastEmbedEmbeddings"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Instantiating FastEmbed\n",
-    "   \n",
-    "### Parameters\n",
-    "- `model_name: str` (default: \"BAAI/bge-small-en-v1.5\")\n",
-    "    > Name of the FastEmbedding model to use. You can find the list of supported models [here](https://qdrant.github.io/fastembed/examples/Supported_Models/).\n",
-    "\n",
-    "- `max_length: int` (default: 512)\n",
-    "    > The maximum number of tokens. Unknown behavior for values > 512.\n",
-    "\n",
-    "- `cache_dir: Optional[str]`\n",
-    "    > The path to the cache directory. Defaults to `local_cache` in the parent directory.\n",
-    "\n",
-    "- `threads: Optional[int]`\n",
-    "    > The number of threads a single onnxruntime session can use. Defaults to None.\n",
-    "\n",
-    "- `doc_embed_type: Literal[\"default\", \"passage\"]` (default: \"default\")\n",
-    "    > \"default\": Uses FastEmbed's default embedding method.\n",
-    "    \n",
-    "    > \"passage\": Prefixes the text with \"passage\" before embedding."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "6fb585dd",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "embeddings = FastEmbedEmbeddings()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Usage\n",
-    "\n",
-    "### Generating document embeddings"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "document_embeddings = embeddings.embed_documents(\n",
-    "    [\"This is a document\", \"This is some other document\"]\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Generating query embeddings"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "query_embeddings = embeddings.embed_query(\"This is a query\")"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.6"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
--- a/docs/docs/integrations/text_embedding/open_clip.ipynb
+++ b/docs/docs/integrations/text_embedding/open_clip.ipynb
--- a/docs/docs/integrations/tools/memorize.ipynb
+++ b/docs/docs/integrations/tools/memorize.ipynb
@@ -48,14 +48,14 @@
    "from getpass import getpass\n",
    "\n",
    "\n",
-    "if not os.environ.get(\"GRADIENT_ACCESS_TOKEN\", None):\n",
+    "if not os.environ.get(\"GRADIENT_ACCESS_TOKEN\",None):\n",
    "    # Access token under https://auth.gradient.ai/select-workspace\n",
    "    os.environ[\"GRADIENT_ACCESS_TOKEN\"] = getpass(\"gradient.ai access token:\")\n",
-    "if not os.environ.get(\"GRADIENT_WORKSPACE_ID\", None):\n",
+    "if not os.environ.get(\"GRADIENT_WORKSPACE_ID\",None):\n",
    "    # `ID` listed in `$ gradient workspace list`\n",
    "    # also displayed after login at at https://auth.gradient.ai/select-workspace\n",
    "    os.environ[\"GRADIENT_WORKSPACE_ID\"] = getpass(\"gradient.ai workspace id:\")\n",
-    "if not os.environ.get(\"GRADIENT_MODEL_ADAPTER_ID\", None):\n",
+    "if not os.environ.get(\"GRADIENT_MODEL_ADAPTER_ID\",None):\n",
    "    # `ID` listed in `$ gradient model list --workspace-id \"$GRADIENT_WORKSPACE_ID\"`\n",
    "    os.environ[\"GRADIENT_MODEL_ID\"] = getpass(\"gradient.ai model id:\")"
   ]
@@ -82,7 +82,7 @@
   "outputs": [],
   "source": [
    "llm = GradientLLM(\n",
-    "    model_id=os.environ[\"GRADIENT_MODEL_ID\"],\n",
+    "    model_id=os.environ['GRADIENT_MODEL_ID'],\n",
    "    # # optional: set new credentials, they default to environment variables\n",
    "    # gradient_workspace_id=os.environ[\"GRADIENT_WORKSPACE_ID\"],\n",
    "    # gradient_access_token=os.environ[\"GRADIENT_ACCESS_TOKEN\"],\n",
@@ -169,9 +169,7 @@
    }
   ],
   "source": [
-    "agent.run(\n",
-    "    \"Please remember the fact in detail:\\nWith astonishing dexterity, Zara Tubikova set a world record by solving a 4x4 Rubik's Cube variation blindfolded in under 20 seconds, employing only their feet.\"\n",
-    ")"
+    "agent.run(\"Please remember the fact in detail:\\nWith astonishing dexterity, Zara Tubikova set a world record by solving a 4x4 Rubik's Cube variation blindfolded in under 20 seconds, employing only their feet.\")"
   ]
  }
 ],
--- a/docs/docs/integrations/vectorstores/astradb.ipynb
+++ b/docs/docs/integrations/vectorstores/astradb.ipynb
@@ -25,7 +25,9 @@
   "id": "dbe7c156-0413-47e3-9237-4769c4248869",
   "metadata": {},
   "source": [
-    "Use of the integration requires the following Python package."
+    "Use of the integration requires the following Python package.\n",
+    "\n",
+    "_Note: depending on your LangChain setup, you may need to install other dependencies needed for this demo._"
   ]
  },
  {
@@ -38,15 +40,6 @@
    "!pip install --quiet \"astrapy>=0.5.3\""
   ]
  },
-  {
-   "cell_type": "markdown",
-   "id": "2453d83a-bc8f-41e1-a692-befe4dd90156",
-   "metadata": {},
-   "source": [
-    "_Note: depending on your LangChain setup, you may need to install/upgrade other dependencies needed for this demo_\n",
-    "_(specifically, recent versions of `datasets` `openai` `pypdf` and `tiktoken` are required)._"
-   ]
-  },
  {
   "cell_type": "code",
   "execution_count": null,
@@ -57,9 +50,7 @@
    "import os\n",
    "from getpass import getpass\n",
    "\n",
-    "from datasets import (\n",
-    "    load_dataset,\n",
-    ")  # if not present yet, run: pip install \"datasets==2.14.6\"\n",
+    "from datasets import load_dataset  # if not present yet, run: pip install \"datasets==2.14.6\"\n",
    "\n",
    "from langchain.schema import Document\n",
    "from langchain.embeddings import OpenAIEmbeddings\n",
@@ -376,7 +367,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "retriever = vstore.as_retriever(search_kwargs={\"k\": 3})\n",
+    "retriever = vstore.as_retriever(search_kwargs={'k': 3})\n",
    "\n",
    "philo_template = \"\"\"\n",
    "You are a philosopher that draws inspiration from great thinkers of the past\n",
@@ -396,9 +387,9 @@
    "llm = ChatOpenAI()\n",
    "\n",
    "chain = (\n",
-    "    {\"context\": retriever, \"question\": RunnablePassthrough()}\n",
-    "    | philo_prompt\n",
-    "    | llm\n",
+    "    {\"context\": retriever, \"question\": RunnablePassthrough()} \n",
+    "    | philo_prompt \n",
+    "    | llm \n",
    "    | StrOutputParser()\n",
    ")"
   ]
--- a/docs/docs/integrations/vectorstores/baiducloud_vector_search.ipynb
+++ b/docs/docs/integrations/vectorstores/baiducloud_vector_search.ipynb
@@ -5,7 +5,7 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "# Baidu Cloud ElasticSearch VectorSearch\n",
+    "# Biadu Cloud ElasticSearch VectorSearch\n",
    "\n",
    ">[Baidu Cloud VectorSearch](https://cloud.baidu.com/doc/BES/index.html?from=productToDoc) is a fully managed, enterprise-level distributed search and analysis service which is 100% compatible to open source. Baidu Cloud VectorSearch provides low-cost, high-performance, and reliable retrieval and analysis platform level product services for structured/unstructured data. As a vector database , it supports multiple index types and similarity distance methods. \n",
    "\n",
@@ -139,7 +139,7 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "Please feel free to contact <liuboyao@baidu.com> or <chenweixu01@baidu.com> if you encounter any problems during use, and we will do our best to support you."
+    "Please feel free to contact <liuboyao@baidu.com> if you encounter any problems during use, and we will do our best to support you."
   ]
  }
 ],
--- a/docs/docs/integrations/vectorstores/timescalevector.ipynb
+++ b/docs/docs/integrations/vectorstores/timescalevector.ipynb
@@ -180,7 +180,7 @@
    "# Specify directly if testing\n",
    "# SERVICE_URL = \"postgres://tsdbadmin:<password>@<id>.tsdb.cloud.timescale.com:<port>/tsdb?sslmode=require\"\n",
    "\n",
-    "# # You can get also it from an environment variables. We suggest using a .env file.\n",
+    "# # You can get also it from an enviornment variables. We suggest using a .env file.\n",
    "# import os\n",
    "# SERVICE_URL = os.environ.get(\"TIMESCALE_SERVICE_URL\", \"\")"
   ]
--- a/docs/docs/integrations/vectorstores/weaviate.ipynb
+++ b/docs/docs/integrations/vectorstores/weaviate.ipynb
@@ -10,19 +10,9 @@
    "\n",
    ">[Weaviate](https://weaviate.io/) is an open-source vector database. It allows you to store data objects and vector embeddings from your favorite ML-models, and scale seamlessly into billions of data objects.\n",
    "\n",
-    "This notebook shows how to use the functionality related to the `Weaviate` vector database.\n",
+    "This notebook shows how to use functionality related to the `Weaviate`vector database.\n",
    "\n",
-    "`Weaviate` can be deployed in many different ways depending on your requirements. For example, you can either connect to a [Weaviate Cloud Services](https://console.weaviate.cloud) instance or a [local Docker instance](https://weaviate.io/developers/weaviate/installation/docker-compose). \n",
-    "See the `Weaviate` [installation instructions](https://weaviate.io/developers/weaviate/installation) for more information."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "5fb59dec",
-   "metadata": {},
-   "source": [
-    "## Prerequisites\n",
-    "Install the `weaviate-client` package and set the relevant environment variables."
+    "See the `Weaviate` [installation instructions](https://weaviate.io/developers/weaviate/installation)."
   ]
  },
  {
@@ -37,21 +27,19 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Requirement already satisfied: weaviate-client in /opt/homebrew/lib/python3.11/site-packages (3.23.1)\n",
-      "Requirement already satisfied: requests<=2.31.0,>=2.28.0 in /opt/homebrew/lib/python3.11/site-packages (from weaviate-client) (2.31.0)\n",
-      "Requirement already satisfied: validators<=0.21.0,>=0.18.2 in /opt/homebrew/lib/python3.11/site-packages (from weaviate-client) (0.21.0)\n",
-      "Requirement already satisfied: tqdm<5.0.0,>=4.59.0 in /opt/homebrew/lib/python3.11/site-packages (from weaviate-client) (4.66.1)\n",
-      "Requirement already satisfied: authlib>=1.1.0 in /opt/homebrew/lib/python3.11/site-packages (from weaviate-client) (1.2.1)\n",
-      "Requirement already satisfied: cryptography>=3.2 in /opt/homebrew/lib/python3.11/site-packages (from authlib>=1.1.0->weaviate-client) (41.0.4)\n",
-      "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/homebrew/lib/python3.11/site-packages (from requests<=2.31.0,>=2.28.0->weaviate-client) (2.0.12)\n",
-      "Requirement already satisfied: idna<4,>=2.5 in /opt/homebrew/lib/python3.11/site-packages (from requests<=2.31.0,>=2.28.0->weaviate-client) (3.4)\n",
-      "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/homebrew/lib/python3.11/site-packages (from requests<=2.31.0,>=2.28.0->weaviate-client) (1.26.17)\n",
-      "Requirement already satisfied: certifi>=2017.4.17 in /opt/homebrew/lib/python3.11/site-packages (from requests<=2.31.0,>=2.28.0->weaviate-client) (2023.7.22)\n",
-      "Requirement already satisfied: cffi>=1.12 in /opt/homebrew/lib/python3.11/site-packages (from cryptography>=3.2->authlib>=1.1.0->weaviate-client) (1.16.0)\n",
-      "Requirement already satisfied: pycparser in /opt/homebrew/lib/python3.11/site-packages (from cffi>=1.12->cryptography>=3.2->authlib>=1.1.0->weaviate-client) (2.21)\n",
-      "\n",
-      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.1\u001b[0m\n",
-      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3.11 -m pip install --upgrade pip\u001b[0m\n"
+      "Requirement already satisfied: weaviate-client in /workspaces/langchain/.venv/lib/python3.9/site-packages (3.19.1)\n",
+      "Requirement already satisfied: requests<2.29.0,>=2.28.0 in /workspaces/langchain/.venv/lib/python3.9/site-packages (from weaviate-client) (2.28.2)\n",
+      "Requirement already satisfied: validators<=0.21.0,>=0.18.2 in /workspaces/langchain/.venv/lib/python3.9/site-packages (from weaviate-client) (0.20.0)\n",
+      "Requirement already satisfied: tqdm<5.0.0,>=4.59.0 in /workspaces/langchain/.venv/lib/python3.9/site-packages (from weaviate-client) (4.65.0)\n",
+      "Requirement already satisfied: authlib>=1.1.0 in /workspaces/langchain/.venv/lib/python3.9/site-packages (from weaviate-client) (1.2.0)\n",
+      "Requirement already satisfied: cryptography>=3.2 in /workspaces/langchain/.venv/lib/python3.9/site-packages (from authlib>=1.1.0->weaviate-client) (40.0.2)\n",
+      "Requirement already satisfied: charset-normalizer<4,>=2 in /workspaces/langchain/.venv/lib/python3.9/site-packages (from requests<2.29.0,>=2.28.0->weaviate-client) (3.1.0)\n",
+      "Requirement already satisfied: idna<4,>=2.5 in /workspaces/langchain/.venv/lib/python3.9/site-packages (from requests<2.29.0,>=2.28.0->weaviate-client) (3.4)\n",
+      "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /workspaces/langchain/.venv/lib/python3.9/site-packages (from requests<2.29.0,>=2.28.0->weaviate-client) (1.26.15)\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in /workspaces/langchain/.venv/lib/python3.9/site-packages (from requests<2.29.0,>=2.28.0->weaviate-client) (2023.5.7)\n",
+      "Requirement already satisfied: decorator>=3.4.0 in /workspaces/langchain/.venv/lib/python3.9/site-packages (from validators<=0.21.0,>=0.18.2->weaviate-client) (5.1.1)\n",
+      "Requirement already satisfied: cffi>=1.12 in /workspaces/langchain/.venv/lib/python3.9/site-packages (from cryptography>=3.2->authlib>=1.1.0->weaviate-client) (1.15.1)\n",
+      "Requirement already satisfied: pycparser in /workspaces/langchain/.venv/lib/python3.9/site-packages (from cffi>=1.12->cryptography>=3.2->authlib>=1.1.0->weaviate-client) (2.21)\n"
     ]
    }
   ],
@@ -60,6 +48,7 @@
   ]
  },
  {
+   "attachments": {},
   "cell_type": "markdown",
   "id": "6b34828d-e627-4d85-aabd-eeb15d9f4b00",
   "metadata": {},
@@ -92,7 +81,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 21,
   "id": "53b7ce2d-3c09-4d1c-b66b-5769ce6746ae",
   "metadata": {},
   "outputs": [],
@@ -101,18 +90,9 @@
    "WEAVIATE_API_KEY = os.environ[\"WEAVIATE_API_KEY\"]"
   ]
  },
-  {
-   "cell_type": "markdown",
-   "id": "b867eb31",
-   "metadata": {},
-   "source": [
-    "## Similarity search\n",
-    "Below you can see a minimal example of how to approach a simple similarity search."
-   ]
-  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 7,
   "id": "aac9563e",
   "metadata": {
    "tags": []
@@ -127,7 +107,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 12,
   "id": "a3c3999a",
   "metadata": {},
   "outputs": [],
@@ -144,7 +124,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 14,
   "id": "21e9e528",
   "metadata": {},
   "outputs": [],
@@ -154,7 +134,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 15,
   "id": "b4170176",
   "metadata": {},
   "outputs": [],
@@ -165,7 +145,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 16,
   "id": "ecf3b890",
   "metadata": {},
   "outputs": [
@@ -206,7 +186,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 23,
   "id": "f6604f1d",
   "metadata": {},
   "outputs": [
@@ -259,7 +239,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 17,
   "id": "102105a1",
   "metadata": {},
   "outputs": [
@@ -285,7 +265,7 @@
   "id": "8fc3487b",
   "metadata": {},
   "source": [
-    "## Persistence"
+    "# Persistence"
   ]
  },
  {
@@ -293,7 +273,7 @@
   "id": "281c0fcc",
   "metadata": {},
   "source": [
-    "Anything uploaded to Weaviate is automatically persistent into the database. You do not need to call any specific method or pass any parameters for this to happen."
+    "Anything uploaded to weaviate is automatically persistent into the database. You do not need to call any specific method or pass any param for this to happen."
   ]
  },
  {
@@ -305,14 +285,14 @@
    "\n",
    "This section goes over different options for how to use Weaviate as a retriever.\n",
    "\n",
-    "### Maximal marginal relevance search (MMR)\n",
+    "### MMR\n",
    "\n",
    "In addition to using similarity search in the retriever object, you can also use `mmr`."
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": null,
   "id": "8b7df7ae",
   "metadata": {},
   "outputs": [
@@ -332,54 +312,12 @@
    "retriever.get_relevant_documents(query)[0]"
   ]
  },
-  {
-   "cell_type": "markdown",
-   "id": "4b14a3a5",
-   "metadata": {},
-   "source": [
-    "### Hybrid search\n",
-    "Weaviate also offers hybrid search. See [`WeaviateHybridSearchRetriever`](https://python.langchain.com/docs/integrations/retrievers/weaviate-hybrid) for reference."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "508016e8",
-   "metadata": {},
-   "source": [
-    "## Use cases\n",
-    "As the following example shows, LLMs don't have access to knowledge outside of their training data. Thus, vector stores come in handy to provide LLMs with additional context."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "id": "5299b13b",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "\"As an AI language model, I don't have real-time information or the ability to browse the internet. Therefore, I cannot provide you with the most recent statements made by the president about Justice Breyer. However, it's worth noting that the president's opinions on Justice Breyer may vary depending on the specific context and time period. It would be best to refer to reliable news sources or official statements to get the most accurate and up-to-date information on this topic.\""
-      ]
-     },
-     "execution_count": 13,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from langchain.chat_models import ChatOpenAI\n",
-    "\n",
-    "llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)\n",
-    "llm.predict(\"What did the president say about Justice Breyer\")"
-   ]
-  },
  {
   "cell_type": "markdown",
   "id": "fbd7a6cb",
   "metadata": {},
   "source": [
-    "### Question Answering with Sources"
+    "## Question Answering with Sources"
   ]
  },
  {
@@ -392,7 +330,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
   "id": "5e824f3b",
   "metadata": {},
   "outputs": [],
@@ -403,7 +341,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": null,
   "id": "61209cc3",
   "metadata": {},
   "outputs": [],
@@ -416,7 +354,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": null,
   "id": "4abc3d37",
   "metadata": {},
   "outputs": [],
@@ -432,7 +370,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": null,
   "id": "c7062393",
   "metadata": {},
   "outputs": [],
@@ -444,7 +382,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": null,
   "id": "7e41b773",
   "metadata": {},
   "outputs": [
@@ -466,115 +404,6 @@
    "    return_only_outputs=True,\n",
    ")"
   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "05007f8a",
-   "metadata": {},
-   "source": [
-    "### Retrieval-Augmented Generation"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "id": "30f285a1",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with open(\"../../modules/state_of_the_union.txt\") as f:\n",
-    "    state_of_the_union = f.read()\n",
-    "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
-    "texts = text_splitter.split_text(state_of_the_union)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "id": "08490f15",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "docsearch = Weaviate.from_texts(\n",
-    "    texts,\n",
-    "    embeddings,\n",
-    "    weaviate_url=WEAVIATE_URL,\n",
-    "    by_text=False,\n",
-    "    metadatas=[{\"source\": f\"{i}-pl\"} for i in range(len(texts))],\n",
-    ")\n",
-    "\n",
-    "retriever = docsearch.as_retriever()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "id": "499cb1f5",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "input_variables=['context', 'question'] messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template=\"You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\\nQuestion: {question} \\nContext: {context} \\nAnswer:\\n\"))]\n"
-     ]
-    }
-   ],
-   "source": [
-    "from langchain.prompts import ChatPromptTemplate\n",
-    "\n",
-    "template = \"\"\"You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\n",
-    "Question: {question} \n",
-    "Context: {context} \n",
-    "Answer:\n",
-    "\"\"\"\n",
-    "prompt = ChatPromptTemplate.from_template(template)\n",
-    "\n",
-    "print(prompt)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "id": "28d95686",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain.chat_models import ChatOpenAI\n",
-    "\n",
-    "llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "id": "c697d0cd",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'The president thanked Justice Breyer for his service and dedication to the country.'"
-      ]
-     },
-     "execution_count": 23,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from langchain.schema.runnable import RunnablePassthrough\n",
-    "from langchain.schema.output_parser import StrOutputParser\n",
-    "\n",
-    "rag_chain = (\n",
-    "    {\"context\": retriever, \"question\": RunnablePassthrough()}\n",
-    "    | prompt\n",
-    "    | llm\n",
-    "    | StrOutputParser()\n",
-    ")\n",
-    "\n",
-    "rag_chain.invoke(\"What did the president say about Justice Breyer\")"
-   ]
  }
 ],
 "metadata": {
@@ -593,7 +422,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.4"
+   "version": "3.10.9"
  }
 },
 "nbformat": 4,
--- a/docs/docs/langsmith/index.md
+++ b/docs/docs/langsmith/index.md
@@ -7,7 +7,7 @@ sidebar_class_name: hidden
 [LangSmith](https://smith.langchain.com) helps you trace and evaluate your language model applications and intelligent agents to help you
 move from prototype to production.

-Check out the [interactive walkthrough](/docs/langsmith/walkthrough) to get started.
+Check out the [interactive walkthrough](/docs/guides/langsmith/walkthrough) to get started.

 For more information, please refer to the [LangSmith documentation](https://docs.smith.langchain.com/).

--- a/docs/docs/langsmith/walkthrough.ipynb
+++ b/docs/docs/langsmith/walkthrough.ipynb
@@ -8,7 +8,7 @@
   },
   "source": [
    "# LangSmith Walkthrough\n",
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/langchain-ai/langchain/blob/master/docs/docs/langsmith/walkthrough.ipynb)\n",
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/langchain-ai/langchain/blob/master/docs/docs/guides/langsmith/walkthrough.ipynb)\n",
    "\n",
    "LangChain makes it easy to prototype LLM applications and Agents. However, delivering LLM applications to production can be deceptively difficult. You will likely have to heavily customize and iterate on your prompts, chains, and other components to create a high-quality product.\n",
    "\n",
@@ -140,7 +140,7 @@
   "source": [
    "from langchain import hub\n",
    "from langchain.agents import AgentExecutor\n",
-    "from langchain.agents.format_scratchpad import format_to_openai_function_messages\n",
+    "from langchain.agents.format_scratchpad import format_to_openai_functions\n",
    "from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser\n",
    "from langchain.chat_models import ChatOpenAI\n",
    "from langchain.tools import DuckDuckGoSearchResults\n",
@@ -165,7 +165,7 @@
    "runnable_agent = (\n",
    "    {\n",
    "        \"input\": lambda x: x[\"input\"],\n",
-    "        \"agent_scratchpad\": lambda x: format_to_openai_function_messages(\n",
+    "        \"agent_scratchpad\": lambda x: format_to_openai_functions(\n",
    "            x[\"intermediate_steps\"]\n",
    "        ),\n",
    "    }\n",
@@ -335,7 +335,7 @@
   "source": [
    "from langchain.chat_models import ChatOpenAI\n",
    "from langchain.agents import AgentType, initialize_agent, load_tools, AgentExecutor\n",
-    "from langchain.agents.format_scratchpad import format_to_openai_function_messages\n",
+    "from langchain.agents.format_scratchpad import format_to_openai_functions\n",
    "from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser\n",
    "from langchain.tools.render import format_tool_to_openai_function\n",
    "from langchain import hub\n",
@@ -351,7 +351,7 @@
    "    runnable_agent = (\n",
    "        {\n",
    "            \"input\": lambda x: x[\"input\"],\n",
-    "            \"agent_scratchpad\": lambda x: format_to_openai_function_messages(\n",
+    "            \"agent_scratchpad\": lambda x: format_to_openai_functions(\n",
    "                x[\"intermediate_steps\"]\n",
    "            ),\n",
    "        }\n",
--- a/docs/docs/modules/agents/agent_types/index.mdx
+++ b/docs/docs/modules/agents/agent_types/index.mdx
@@ -38,7 +38,7 @@ It uses the ReAct framework to decide which tool to use, and uses memory to reme
 ## [Self-ask with search](/docs/modules/agents/agent_types/self_ask_with_search)

 This agent utilizes a single tool that should be named `Intermediate Answer`.
-This tool should be able to look up factual answers to questions. This agent
+This tool should be able to lookup factual answers to questions. This agent
 is equivalent to the original [self-ask with search paper](https://ofir.io/self-ask.pdf),
 where a Google search API was provided as the tool.

@@ -46,7 +46,7 @@ where a Google search API was provided as the tool.

 This agent uses the ReAct framework to interact with a docstore. Two tools must
 be provided: a `Search` tool and a `Lookup` tool (they must be named exactly as so).
-The `Search` tool should search for a document, while the `Lookup` tool should look up
+The `Search` tool should search for a document, while the `Lookup` tool should lookup
 a term in the most recently found document.
 This agent is equivalent to the
 original [ReAct paper](https://arxiv.org/pdf/2210.03629.pdf), specifically the Wikipedia example.
--- a/docs/docs/modules/agents/agent_types/openai_functions_agent.ipynb
+++ b/docs/docs/modules/agents/agent_types/openai_functions_agent.ipynb
@@ -143,7 +143,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "from langchain.agents.format_scratchpad import format_to_openai_function_messages\n",
+    "from langchain.agents.format_scratchpad import format_to_openai_functions\n",
    "from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser"
   ]
  },
@@ -157,7 +157,7 @@
    "agent = (\n",
    "    {\n",
    "        \"input\": lambda x: x[\"input\"],\n",
-    "        \"agent_scratchpad\": lambda x: format_to_openai_function_messages(\n",
+    "        \"agent_scratchpad\": lambda x: format_to_openai_functions(\n",
    "            x[\"intermediate_steps\"]\n",
    "        ),\n",
    "    }\n",
--- a/docs/docs/modules/agents/agent_types/openai_multi_functions_agent.ipynb
+++ b/docs/docs/modules/agents/agent_types/openai_multi_functions_agent.ipynb
@@ -115,7 +115,9 @@
   "cell_type": "code",
   "execution_count": 6,
   "id": "ba8e4cbe",
-   "metadata": {},
+   "metadata": {
+    "scrolled": false
+   },
   "outputs": [
    {
     "name": "stdout",
@@ -252,7 +254,9 @@
   "cell_type": "code",
   "execution_count": 19,
   "id": "4362ebc7",
-   "metadata": {},
+   "metadata": {
+    "scrolled": false
+   },
   "outputs": [
    {
     "name": "stdout",
@@ -454,7 +458,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.1"
+   "version": "3.10.1"
  }
 },
 "nbformat": 4,
--- a/docs/docs/modules/agents/agent_types/openai_tools.ipynb
+++ b/docs/docs/modules/agents/agent_types/openai_tools.ipynb
@@ -1,250 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "e10aa932",
-   "metadata": {},
-   "source": [
-    "# OpenAI tools\n",
-    "\n",
-    "With LCEL we can easily construct agents that take advantage of [OpenAI parallel function calling](https://platform.openai.com/docs/guides/function-calling/parallel-function-calling) (a.k.a. tool calling)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "ec89be68",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# !pip install -U openai duckduckgo-search"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "b812b982",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain.agents import initialize_agent, AgentExecutor, AgentType, Tool\n",
-    "from langchain.agents.format_scratchpad.openai_tools import (\n",
-    "    format_to_openai_tool_messages,\n",
-    ")\n",
-    "from langchain.agents.output_parsers.openai_tools import OpenAIToolsAgentOutputParser\n",
-    "from langchain.chat_models import ChatOpenAI\n",
-    "from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder\n",
-    "from langchain.tools import DuckDuckGoSearchRun, BearlyInterpreterTool\n",
-    "from langchain.tools.render import format_tool_to_openai_tool"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "6ef71dfc-074b-409a-8451-863feef937ae",
-   "metadata": {},
-   "source": [
-    "## Tools\n",
-    "\n",
-    "For this agent let's give it the ability to search [DuckDuckGo](/docs/integrations/tools/ddg) and use [Bearly's code interpreter](/docs/integrations/tools/bearly). You'll need a Bearly API key, which you can [get here](https://bearly.ai/dashboard)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "id": "23fc0aa6",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "lc_tools = [DuckDuckGoSearchRun(), BearlyInterpreterTool(api_key=\"...\").as_tool()]\n",
-    "oai_tools = [format_tool_to_openai_tool(tool) for tool in lc_tools]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "90c293df-ce11-4600-b912-e937215ec644",
-   "metadata": {},
-   "source": [
-    "## Prompt template\n",
-    "\n",
-    "We need to make sure we have a user input message and an \"agent_scratchpad\" messages placeholder, which is where the AgentExecutor will track AI messages invoking tools and Tool messages returning the tool output."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "id": "55292bed",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "prompt = ChatPromptTemplate.from_messages(\n",
-    "    [\n",
-    "        (\"system\", \"You are a helpful assistant\"),\n",
-    "        (\"user\", \"{input}\"),\n",
-    "        MessagesPlaceholder(variable_name=\"agent_scratchpad\"),\n",
-    "    ]\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "32904250-c53e-415e-abdf-7ce8b1357fb7",
-   "metadata": {},
-   "source": [
-    "## Model\n",
-    "\n",
-    "Only certain models support parallel function calling, so make sure you're using a compatible model."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "id": "552421b3",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "llm = ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-1106\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "6fc73aa5-e185-4c6a-8770-1279c3ae5530",
-   "metadata": {},
-   "source": [
-    "## Agent\n",
-    "\n",
-    "We use the `OpenAIToolsAgentOutputParser` to convert the tool calls returned by the model into `AgentAction`s objects that our `AgentExecutor` can then route to the appropriate tool."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "id": "bf514eb4",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "agent = (\n",
-    "    {\n",
-    "        \"input\": lambda x: x[\"input\"],\n",
-    "        \"agent_scratchpad\": lambda x: format_to_openai_tool_messages(\n",
-    "            x[\"intermediate_steps\"]\n",
-    "        ),\n",
-    "    }\n",
-    "    | prompt\n",
-    "    | llm.bind(tools=oai_tools)\n",
-    "    | OpenAIToolsAgentOutputParser()\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "ea032e1c-523d-4509-a008-e693529324be",
-   "metadata": {},
-   "source": [
-    "## Agent executor"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "id": "bdc7e506",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "['memory', 'callbacks', 'callback_manager', 'verbose', 'tags', 'metadata', 'agent', 'tools', 'return_intermediate_steps', 'max_iterations', 'max_execution_time', 'early_stopping_method', 'handle_parsing_errors', 'trim_intermediate_steps']\n"
-     ]
-    }
-   ],
-   "source": [
-    "agent_executor = AgentExecutor(agent=agent, tools=lc_tools, verbose=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "id": "2cd65218",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "\n",
-      "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
-      "\u001b[32;1m\u001b[1;3m\n",
-      "Invoking: `duckduckgo_search` with `average temperature in Los Angeles today`\n",
-      "\n",
-      "\n",
-      "\u001b[0m\u001b[36;1m\u001b[1;3mNext week, there is a growing potential for 1 to 2 storms Tuesday through Friday bringing a 90% chance of rain to the area. There is a 50% chance of a moderate storm with 1 to 3 inches of total rainfall, and a 10% chance of a major storm of 3 to 6+ inches. Quick Facts Today's weather: Sunny, windy Beaches: 70s-80s Mountains: 60s-70s/63-81 Inland: 70s Warnings and advisories: Red Flag Warning, Wind Advisory Todays highs along the coast will be in... yesterday temp 66.6 °F Surf Forecast in Los Angeles for today Another important indicators for a comfortable holiday on the beach are the presence and height of the waves, as well as the speed and direction of the wind. Please find below data on the swell size for Los Angeles. Daily max (°C) 19 JAN 18 FEB 19 MAR 20 APR 21 MAY 22 JUN 24 JUL 24 AUG 24 SEP 23 OCT 21 NOV 19 DEC Rainfall (mm) 61 JAN 78° | 53° 60 °F like 60° Clear N 0 Today's temperature is forecast to be NEARLY THE SAME as yesterday. Radar Satellite WunderMap |Nexrad Today Wed 11/08 High 78 °F 0% Precip. / 0.00 in Sunny....\u001b[0m\u001b[32;1m\u001b[1;3m\n",
-      "Invoking: `duckduckgo_search` with `average temperature in New York City today`\n",
-      "\n",
-      "\n",
-      "\u001b[0m\u001b[36;1m\u001b[1;3mWeather Underground provides local & long-range weather forecasts, weatherreports, maps & tropical weather conditions for the New York City area. ... Today Tue 11/07 High 68 ... Climate Central's prediction for an even more distant date — 2100 — is that the average temperature in 247 cities across the country will be 8 degrees higher than it is now. New York will ... Extended Forecast for New York NY Similar City Names Overnight Mostly Cloudy Low: 48 °F Saturday Partly Sunny High: 58 °F Saturday Night Mostly Cloudy Low: 48 °F Sunday Mostly Sunny High: 64 °F Sunday Night Mostly Clear Low: 45 °F Monday Weather report for New York City. Night and day a few clouds are expected. It is a sunny day. Temperatures peaking at 62 °F. During the night and in the first hours of the day blows a light breeze (4 to 8 mph). For the afternoon a gentle breeze is expected (8 to 12 mph). Graphical Climatology of New York Central Park - Daily Temperatures, Precipitation, and Snowfall (1869 - Present) The following is a graphical climatology of New York Central Park daily temperatures, precipitation, and snowfall, from January 1869 into 2023. The graphics consist of summary overview charts (in some cases including data back into the late 1860's) followed […]\u001b[0m\u001b[32;1m\u001b[1;3m\n",
-      "Invoking: `duckduckgo_search` with `average temperature in San Francisco today`\n",
-      "\n",
-      "\n",
-      "\u001b[0m\u001b[36;1m\u001b[1;3mToday Hourly 10-Day Calendar History Wundermap access_time 10:24 PM PST on November 4, 2023 (GMT -8) | Updated 1 day ago 63° | 48° 59 °F like 59° Partly Cloudy N 0 Today's temperature is... The National Weather Service forecast for the greater San Francisco Bay Area on Thursday calls for clouds increasing over the region during the day. Daytime highs are expected to be in the 60s on ... San Francisco (United States of America) weather - Met Office Today 17° 9° Sunny. Sunrise: 06:41 Sunset: 17:05 M UV Wed 8 Nov 19° 8° Thu 9 Nov 16° 9° Fri 10 Nov 16° 10° Sat 11 Nov 18° 9° Sun 12... Today's weather in San Francisco Bay. The sun rose at 6:42am and the sunset will be at 5:04pm. There will be 10 hours and 22 minutes of sun and the average temperature is 54°F. At the moment water temperature is 58°F and the average water temperature is 58°F. Wintry Impacts in Alaska and New England; Critical Fire Conditions in Southern California. A winter storm continues to bring hazardous travel conditions to south-central Alaska with heavy snow, a wintry mix, ice accumulation, and rough seas. A wintry mix including freezing rain is expected in Upstate New York and interior New England.\u001b[0m\u001b[32;1m\u001b[1;3m\n",
-      "Invoking: `duckduckgo_search` with `current temperature in Los Angeles`\n",
-      "responded: It seems that the search results did not provide the specific average temperatures for today in Los Angeles, New York City, and San Francisco. Let me try another approach to gather this information for you.\n",
-      "\n",
-      "\u001b[0m\u001b[36;1m\u001b[1;3mFire Weather Show Caption Click a location below for detailed forecast. Last Map Update: Tue, Nov. 7, 2023 at 5:03:23 pm PST Watches, Warnings & Advisories Zoom Out Gale Warning Small Craft Advisory Wind Advisory Fire Weather Watch Text Product Selector (Selected product opens in current window) Hazards Observations Marine Weather Fire Weather 78° | 53° 60 °F like 60° Clear N 0 Today's temperature is forecast to be NEARLY THE SAME as yesterday. Radar Satellite WunderMap |Nexrad Today Wed 11/08 High 78 °F 0% Precip. / 0.00 in Sunny.... Los Angeles and Orange counties will see a few clouds in the morning, but they'll clear up in the afternoon to bring a high of 76 degrees. Daytime temperatures should stay in the 70s most of... Weather Forecast Office NWS Forecast Office Los Angeles, CA Weather.gov > Los Angeles, CA Current Hazards Current Conditions Radar Forecasts Rivers and Lakes Climate and Past Weather Local Programs Click a location below for detailed forecast. Last Map Update: Fri, Oct. 13, 2023 at 12:44:23 am PDT Watches, Warnings & Advisories Zoom Out Want a minute-by-minute forecast for Los-Angeles, CA? MSN Weather tracks it all, from precipitation predictions to severe weather warnings, air quality updates, and even wildfire alerts.\u001b[0m\u001b[32;1m\u001b[1;3m\n",
-      "Invoking: `duckduckgo_search` with `current temperature in New York City`\n",
-      "responded: It seems that the search results did not provide the specific average temperatures for today in Los Angeles, New York City, and San Francisco. Let me try another approach to gather this information for you.\n",
-      "\n",
-      "\u001b[0m\u001b[36;1m\u001b[1;3mCurrent Weather for Popular Cities . San Francisco, CA 55 ... New York City, NY Weather Conditions star_ratehome. 55 ... Low: 47°F Sunday Mostly Sunny High: 62°F change location New York, NY Weather Forecast Office NWS Forecast Office New York, NY Weather.gov > New York, NY Current Hazards Current Conditions Radar Forecasts Rivers and Lakes Climate and Past Weather Local Programs Click a location below for detailed forecast. Today Increasing Clouds High: 50 °F Tonight Mostly Cloudy Low: 47 °F Thursday Slight Chance Rain High: 67 °F Thursday Night Mostly Cloudy Low: 48 °F Friday Mostly Cloudy then Slight Chance Rain High: 54 °F Friday Weather report for New York City Night and day a few clouds are expected. It is a sunny day. Temperatures peaking at 62 °F. During the night and in the first hours of the day blows a light breeze (4 to 8 mph). For the afternoon a gentle breeze is expected (8 to 12 mph). Today 13 October, weather in New York City +61°F. Clear sky, Light Breeze, Northwest 5.1 mph. Atmosphere pressure 29.9 inHg. Relative humidity 45%. Tomorrow's night air temperature will drop to +54°F, wind will change to North 2.7 mph. Pressure will remain unchanged 29.9 inHg. Day temperature will remain unchanged +54°F, and night 15 October ...\u001b[0m\u001b[32;1m\u001b[1;3m\n",
-      "Invoking: `duckduckgo_search` with `current temperature in San Francisco`\n",
-      "responded: It seems that the search results did not provide the specific average temperatures for today in Los Angeles, New York City, and San Francisco. Let me try another approach to gather this information for you.\n",
-      "\n",
-      "\u001b[0m\u001b[36;1m\u001b[1;3m59 °F like 59° Partly Cloudy N 0 Today's temperature is forecast to be COOLER than yesterday. Radar Satellite WunderMap |Nexrad Today Thu 11/09 High 63 °F 3% Precip. / 0.00 in A mix of clouds and... Weather Forecast Office NWS Forecast Office San Francisco, CA Weather.gov > San Francisco Bay Area, CA Current Hazards Current Conditions Radar Forecasts Rivers and Lakes Climate and Past Weather Local Programs Click a location below for detailed forecast. Last Map Update: Wed, Nov. 8, 2023 at 5:03:31 am PST Watches, Warnings & Advisories Zoom Out The weather right now in San Francisco, CA is Cloudy. The current temperature is 62°F, and the expected high and low for today, Sunday, November 5, 2023, are 67° high temperature and 57°F low temperature. The wind is currently blowing at 5 miles per hour, and coming from the South Southwest. The wind is gusting to 5 mph. With the wind and ... San Francisco 7 day weather forecast including weather warnings, temperature, rain, wind, visibility, humidity and UV National - Current Temperatures National - First Alert Doppler Latest Stories More ... San Francisco's 'Rev. G' honored with national Jefferson Award for service, seeking peace\u001b[0m\u001b[32;1m\u001b[1;3m\n",
-      "Invoking: `bearly_interpreter` with `{'python_code': '(78 + 53 + 55) / 3'}`\n",
-      "\n",
-      "\n",
-      "\u001b[0m\u001b[33;1m\u001b[1;3m{'stdout': '', 'stderr': '', 'fileLinks': [], 'exitCode': 0}\u001b[0m\u001b[32;1m\u001b[1;3mThe average of the temperatures in Los Angeles, New York City, and San Francisco today is approximately 62 degrees Fahrenheit.\u001b[0m\n",
-      "\n",
-      "\u001b[1m> Finished chain.\u001b[0m\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "{'input': \"What's the average of the temperatures in LA, NYC, and SF today?\",\n",
-       " 'output': 'The average of the temperatures in Los Angeles, New York City, and San Francisco today is approximately 62 degrees Fahrenheit.'}"
-      ]
-     },
-     "execution_count": 22,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "agent_executor.invoke(\n",
-    "    {\"input\": \"What's the average of the temperatures in LA, NYC, and SF today?\"}\n",
-    ")"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.1"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
--- a/docs/docs/modules/agents/how_to/agent_structured.ipynb
+++ b/docs/docs/modules/agents/how_to/agent_structured.ipynb
@@ -205,7 +205,7 @@
    "\n",
    "- prompt: a simple prompt with placeholders for the user's question and then the `agent_scratchpad` (any intermediate steps)\n",
    "- tools: we can attach the tools and `Response` format to the LLM as functions\n",
-    "- format scratchpad: in order to format the `agent_scratchpad` from intermediate steps, we will use the standard `format_to_openai_function_messages`. This takes intermediate steps and formats them as AIMessages and FunctionMessages.\n",
+    "- format scratchpad: in order to format the `agent_scratchpad` from intermediate steps, we will use the standard `format_to_openai_functions`. This takes intermediate steps and formats them as AIMessages and FunctionMessages.\n",
    "- output parser: we will use our custom parser above to parse the response of the LLM\n",
    "- AgentExecutor: we will use the standard AgentExecutor to run the loop of agent-tool-agent-tool..."
   ]
@@ -220,7 +220,7 @@
    "from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder\n",
    "from langchain.chat_models import ChatOpenAI\n",
    "from langchain.tools.render import format_tool_to_openai_function\n",
-    "from langchain.agents.format_scratchpad import format_to_openai_function_messages\n",
+    "from langchain.agents.format_scratchpad import format_to_openai_functions\n",
    "from langchain.agents import AgentExecutor"
   ]
  },
@@ -278,7 +278,7 @@
    "    {\n",
    "        \"input\": lambda x: x[\"input\"],\n",
    "        # Format agent scratchpad from intermediate steps\n",
-    "        \"agent_scratchpad\": lambda x: format_to_openai_function_messages(\n",
+    "        \"agent_scratchpad\": lambda x: format_to_openai_functions(\n",
    "            x[\"intermediate_steps\"]\n",
    "        ),\n",
    "    }\n",
--- a/docs/docs/modules/agents/how_to/custom_llm_agent.mdx
+++ b/docs/docs/modules/agents/how_to/custom_llm_agent.mdx
@@ -1,4 +1,4 @@
-# Custom LLM Agent
+# Custom LLM agent

 This notebook goes through how to create your own custom LLM agent.

--- a/docs/docs/modules/agents/how_to/custom_llm_chat_agent.mdx
+++ b/docs/docs/modules/agents/how_to/custom_llm_chat_agent.mdx
@@ -1,13 +1,13 @@
-# Custom LLM Chat Agent
+# Custom LLM Agent (with a ChatModel)

-This notebook explains how to create your own custom agent based on a chat model.
+This notebook goes through how to create your own custom agent based on a chat model.

-An LLM chat agent consists of four key components:
+An LLM chat agent consists of three parts:

- `PromptTemplate`: This is the prompt template that instructs the language model on what to do.
- `ChatModel`: This is the language model that powers the agent.
- `stop` sequence: Instructs the LLM to stop generating as soon as this string is found.
- `OutputParser`: This determines how to parse the LLM output into an `AgentAction` or `AgentFinish` object.
+- `PromptTemplate`: This is the prompt template that can be used to instruct the language model on what to do
+- `ChatModel`: This is the language model that powers the agent
+- `stop` sequence: Instructs the LLM to stop generating as soon as this string is found
+- `OutputParser`: This determines how to parse the LLM output into an `AgentAction` or `AgentFinish` object

 The LLM Agent is used in an `AgentExecutor`. This `AgentExecutor` can largely be thought of as a loop that:
 1. Passes user input and any previous steps to the Agent (in this case, the LLM Agent)
--- a/docs/docs/modules/agents/how_to/mrkl.mdx
+++ b/docs/docs/modules/agents/how_to/mrkl.mdx
@@ -3,7 +3,7 @@
 This walkthrough demonstrates how to replicate the [MRKL](https://arxiv.org/pdf/2205.00445.pdf) system using agents.

 This uses the example Chinook database.
-To set it up, follow the instructions on https://database.guide/2-sample-databases-sqlite/ and place the `.db` file in a "notebooks" folder at the root of this repository.
+To set it up follow the instructions on https://database.guide/2-sample-databases-sqlite/, placing the `.db` file in a notebooks folder at the root of this repository.

 ```python
 from langchain.chains import LLMMathChain
@@ -127,7 +127,7 @@ mrkl.run("What is the full name of the artist who recently released an album cal

 </CodeOutputBlock>

-## Using a Chat Model
+## With a chat model

 ```python
 from langchain.chat_models import ChatOpenAI
--- a/docs/docs/modules/agents/index.ipynb
+++ b/docs/docs/modules/agents/index.ipynb
@@ -1,673 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "raw",
-   "id": "97e00fdb-f771-473f-90fc-d6038e19fd9a",
-   "metadata": {},
-   "source": [
-    "---\n",
-    "sidebar_position: 3\n",
-    "sidebar_class_name: hidden\n",
-    "title: Agents\n",
-    "---"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "f4c03f40-1328-412d-8a48-1db0cd481b77",
-   "metadata": {},
-   "source": [
-    "The core idea of agents is to use a language model to choose a sequence of actions to take.\n",
-    "In chains, a sequence of actions is hardcoded (in code).\n",
-    "In agents, a language model is used as a reasoning engine to determine which actions to take and in which order.\n",
-    "\n",
-    "## Concepts\n",
-    "There are several key components here:\n",
-    "\n",
-    "### Agent\n",
-    "\n",
-    "This is the chain responsible for deciding what step to take next.\n",
-    "This is powered by a language model and a prompt.\n",
-    "The inputs to this chain are:\n",
-    "\n",
-    "1. Tools: Descriptions of available tools\n",
-    "2. User input: The high level objective\n",
-    "3. Intermediate steps: Any (action, tool output) pairs previously executed in order to achieve the user input\n",
-    "\n",
-    "The output is the next action(s) to take or the final response to send to the user (`AgentAction`s or `AgentFinish`). An action specifies a tool and the input to that tool. \n",
-    "\n",
-    "Different agents have different prompting styles for reasoning, different ways of encoding inputs, and different ways of parsing the output.\n",
-    "For a full list of built-in agents see [agent types](/docs/modules/agents/agent_types/).\n",
-    "You can also **easily build custom agents**, which we show how to do in the Get started section below.\n",
-    "\n",
-    "### Tools\n",
-    "\n",
-    "Tools are functions that an agent can invoke.\n",
-    "There are two important design considerations around tools:\n",
-    "\n",
-    "1. Giving the agent access to the right tools\n",
-    "2. Describing the tools in a way that is most helpful to the agent\n",
-    "\n",
-    "Without thinking through both, you won't be able to build a working agent.\n",
-    "If you don't give the agent access to a correct set of tools, it will never be able to accomplish the objectives you give it.\n",
-    "If you don't describe the tools well, the agent won't know how to use them properly.\n",
-    "\n",
-    "LangChain provides a wide set of built-in tools, but also makes it easy to define your own (including custom descriptions).\n",
-    "For a full list of built-in tools, see the [tools integrations section](/docs/integrations/tools/)\n",
-    "\n",
-    "### Toolkits\n",
-    "\n",
-    "For many common tasks, an agent will need a set of related tools.\n",
-    "For this LangChain provides the concept of toolkits - groups of around 3-5 tools needed to accomplish specific objectives.\n",
-    "For example, the GitHub toolkit has a tool for searching through GitHub issues, a tool for reading a file, a tool for commenting, etc.\n",
-    "\n",
-    "LangChain provides a wide set of toolkits to get started.\n",
-    "For a full list of built-in toolkits, see the [toolkits integrations section](/docs/integrations/toolkits/)\n",
-    "\n",
-    "### AgentExecutor\n",
-    "\n",
-    "The agent executor is the runtime for an agent.\n",
-    "This is what actually calls the agent, executes the actions it chooses, passes the action outputs back to the agent, and repeats.\n",
-    "In pseudocode, this looks roughly like:\n",
-    "\n",
-    "```python\n",
-    "next_action = agent.get_action(...)\n",
-    "while next_action != AgentFinish:\n",
-    "    observation = run(next_action)\n",
-    "    next_action = agent.get_action(..., next_action, observation)\n",
-    "return next_action\n",
-    "```\n",
-    "\n",
-    "While this may seem simple, there are several complexities this runtime handles for you, including:\n",
-    "\n",
-    "1. Handling cases where the agent selects a non-existent tool\n",
-    "2. Handling cases where the tool errors\n",
-    "3. Handling cases where the agent produces output that cannot be parsed into a tool invocation\n",
-    "4. Logging and observability at all levels (agent decisions, tool calls) to stdout and/or to [LangSmith](/docs/langsmith).\n",
-    "\n",
-    "### Other types of agent runtimes\n",
-    "\n",
-    "The `AgentExecutor` class is the main agent runtime supported by LangChain.\n",
-    "However, there are other, more experimental runtimes we also support.\n",
-    "These include:\n",
-    "\n",
-    "- [Plan-and-execute Agent](/docs/use_cases/more/agents/autonomous_agents/plan_and_execute)\n",
-    "- [Baby AGI](/docs/use_cases/more/agents/autonomous_agents/baby_agi)\n",
-    "- [Auto GPT](/docs/use_cases/more/agents/autonomous_agents/autogpt)\n",
-    "\n",
-    "You can also always create your own custom execution logic, which we show how to do below.\n",
-    "\n",
-    "## Get started\n",
-    "\n",
-    "To best understand the agent framework, lets build an agent from scratch using LangChain Expression Language (LCEL).\n",
-    "We'll need to build the agent itself, define custom tools, and run the agent and tools in a custom loop. At the end we'll show how to use the standard LangChain `AgentExecutor` to make execution easier.\n",
-    "\n",
-    "Some important terminology (and schema) to know:\n",
-    "\n",
-    "1. `AgentAction`: This is a dataclass that represents the action an agent should take. It has a `tool` property (which is the name of the tool that should be invoked) and a `tool_input` property (the input to that tool)\n",
-    "2. `AgentFinish`: This is a dataclass that signifies that the agent has finished and should return to the user. It has a `return_values` parameter, which is a dictionary to return. It often only has one key - `output` - that is a string, and so often it is just this key that is returned.\n",
-    "3. `intermediate_steps`: These represent previous agent actions and corresponding outputs that are passed around. These are important to pass to future iteration so the agent knows what work it has already done. This is typed as a `List[Tuple[AgentAction, Any]]`. Note that observation is currently left as type `Any` to be maximally flexible. In practice, this is often a string.\n",
-    "\n",
-    "### Setup: LangSmith\n",
-    "\n",
-    "By definition, agents take a self-determined, input-dependent sequence of steps before returning a user-facing output. This makes debugging these systems particularly tricky, and observability particularly important. [LangSmith](/docs/langsmith) is especially useful for such cases.\n",
-    "\n",
-    "When building with LangChain, any built-in agent or custom agent built with LCEL will automatically be traced in LangSmith. And if we use the `AgentExecutor`, we'll get full tracing of not only the agent planning steps but also the tool inputs and outputs.\n",
-    "\n",
-    "To set up LangSmith we just need set the following environment variables:\n",
-    "\n",
-    "```bash\n",
-    "export LANGCHAIN_TRACING_V2=\"true\"\n",
-    "export LANGCHAIN_API_KEY=\"<your-api-key>\"\n",
-    "```\n",
-    "\n",
-    "### Define the agent\n",
-    "\n",
-    "We first need to create our agent.\n",
-    "This is the chain responsible for determining what action to take next.\n",
-    "\n",
-    "In this example, we will use OpenAI Function Calling to create this agent.\n",
-    "**This is generally the most reliable way to create agents.**\n",
-    "\n",
-    "For this guide, we will construct a custom agent that has access to a custom tool.\n",
-    "We are choosing this example because for most real world use cases you will NEED to customize either the agent or the tools. \n",
-    "We'll create a simple tool that computes the length of a word.\n",
-    "This is useful because it's actually something LLMs can mess up due to tokenization.\n",
-    "We will first create it WITHOUT memory, but we will then show how to add memory in.\n",
-    "Memory is needed to enable conversation.\n",
-    "\n",
-    "First, let's load the language model we're going to use to control the agent."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "89cf72b4-6046-4b47-8f27-5522d8cb8036",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain.chat_models import ChatOpenAI\n",
-    "\n",
-    "llm = ChatOpenAI(model=\"gpt-3.5-turbo\", temperature=0)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "0afe32b4-5b67-49fd-9f05-e94c46fbcc08",
-   "metadata": {},
-   "source": [
-    "We can see that it struggles to count the letters in the string \"educa\"."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "id": "d8eafbad-4084-4f27-b880-308430c44bcf",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "AIMessage(content='There are 6 letters in the word \"educa\".')"
-      ]
-     },
-     "execution_count": 12,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "llm.invoke(\"how many letters in the word educa?\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "20f353a1-7b03-4692-ba6c-581d82de454b",
-   "metadata": {},
-   "source": [
-    "Next, let's define some tools to use.\n",
-    "Let's write a really simple Python function to calculate the length of a word that is passed in."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "6bf6c6a6-4aa2-44fc-9d90-5981de827c2f",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain.agents import tool\n",
-    "\n",
-    "\n",
-    "@tool\n",
-    "def get_word_length(word: str) -> int:\n",
-    "    \"\"\"Returns the length of a word.\"\"\"\n",
-    "    return len(word)\n",
-    "\n",
-    "\n",
-    "tools = [get_word_length]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "22dc3aeb-012f-4fe6-a980-2bd6d7612e1d",
-   "metadata": {},
-   "source": [
-    "Now let us create the prompt.\n",
-    "Because OpenAI Function Calling is finetuned for tool usage, we hardly need any instructions on how to reason, or how to output format.\n",
-    "We will just have two input variables: `input` and `agent_scratchpad`. `input` should be a string containing the user objective. `agent_scratchpad` should be a sequence of messages that contains the previous agent tool invocations and the corresponding tool outputs."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "62c98f77-d203-42cf-adcf-7da9ee93f7c8",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder\n",
-    "\n",
-    "prompt = ChatPromptTemplate.from_messages(\n",
-    "    [\n",
-    "        (\n",
-    "            \"system\",\n",
-    "            \"You are very powerful assistant, but bad at calculating lengths of words.\",\n",
-    "        ),\n",
-    "        (\"user\", \"{input}\"),\n",
-    "        MessagesPlaceholder(variable_name=\"agent_scratchpad\"),\n",
-    "    ]\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "be29b821-b988-4921-8a1f-f04ec87e2863",
-   "metadata": {},
-   "source": [
-    "How does the agent know what tools it can use?\n",
-    "In this case we're relying on OpenAI function calling LLMs, which take functions as a separate argument and have been specifically trained to know when to invoke those functions.\n",
-    "\n",
-    "To pass in our tools to the agent, we just need to format them to the OpenAI function format and pass them to our model. (By `bind`-ing the functions, we're making sure that they're passed in each time the model is invoked.)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "5231ffd7-a044-4ebd-8e31-d1fe334334c6",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain.tools.render import format_tool_to_openai_function\n",
-    "\n",
-    "llm_with_tools = llm.bind(functions=[format_tool_to_openai_function(t) for t in tools])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "6efbf02b-8686-4559-8b4c-c2be803cb475",
-   "metadata": {},
-   "source": [
-    "Putting those pieces together, we can now create the agent.\n",
-    "We will import two last utility functions: a component for formatting intermediate steps (agent action, tool output pairs) to input messages that can be sent to the model, and a component for converting the output message into an agent action/agent finish."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "b2f24d11-1133-48f3-ba70-fc3dd1da5f2c",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain.agents.format_scratchpad import format_to_openai_function_messages\n",
-    "from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser\n",
-    "\n",
-    "agent = (\n",
-    "    {\n",
-    "        \"input\": lambda x: x[\"input\"],\n",
-    "        \"agent_scratchpad\": lambda x: format_to_openai_function_messages(\n",
-    "            x[\"intermediate_steps\"]\n",
-    "        ),\n",
-    "    }\n",
-    "    | prompt\n",
-    "    | llm_with_tools\n",
-    "    | OpenAIFunctionsAgentOutputParser()\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "7d55d2ad-6608-44ab-9949-b16ae8031f53",
-   "metadata": {},
-   "source": [
-    "Now that we have our agent, let's play around with it!\n",
-    "Let's pass in a simple question and empty intermediate steps and see what it returns:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "01cb7adc-97b6-4713-890e-5d1ddeba909c",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "AgentActionMessageLog(tool='get_word_length', tool_input={'word': 'educa'}, log=\"\\nInvoking: `get_word_length` with `{'word': 'educa'}`\\n\\n\\n\", message_log=[AIMessage(content='', additional_kwargs={'function_call': {'arguments': '{\\n  \"word\": \"educa\"\\n}', 'name': 'get_word_length'}})])"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "agent.invoke({\"input\": \"how many letters in the word educa?\", \"intermediate_steps\": []})"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "689ec562-3ec1-4b28-928b-c78c788aa097",
-   "metadata": {},
-   "source": [
-    "We can see that it responds with an `AgentAction` to take (it's actually an `AgentActionMessageLog` - a subclass of `AgentAction` which also tracks the full message log). \n",
-    "\n",
-    "If we've set up LangSmith, we'll see a trace that let's us inspect the input and output to each step in the sequence: https://smith.langchain.com/public/04110122-01a8-413c-8cd0-b4df6eefa4b7/r\n",
-    "\n",
-    "### Define the runtime\n",
-    "\n",
-    "So this is just the first step - now we need to write a runtime for this.\n",
-    "The simplest one is just one that continuously loops, calling the agent, then taking the action, and repeating until an `AgentFinish` is returned.\n",
-    "Let's code that up below:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "29bbf63b-f866-4b8c-aeea-2f9cffe70b78",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "TOOL NAME: get_word_length\n",
-      "TOOL INPUT: {'word': 'educa'}\n",
-      "There are 5 letters in the word \"educa\".\n"
-     ]
-    }
-   ],
-   "source": [
-    "from langchain.schema.agent import AgentFinish\n",
-    "\n",
-    "user_input = \"how many letters in the word educa?\"\n",
-    "intermediate_steps = []\n",
-    "while True:\n",
-    "    output = agent.invoke(\n",
-    "        {\n",
-    "            \"input\": user_input,\n",
-    "            \"intermediate_steps\": intermediate_steps,\n",
-    "        }\n",
-    "    )\n",
-    "    if isinstance(output, AgentFinish):\n",
-    "        final_result = output.return_values[\"output\"]\n",
-    "        break\n",
-    "    else:\n",
-    "        print(f\"TOOL NAME: {output.tool}\")\n",
-    "        print(f\"TOOL INPUT: {output.tool_input}\")\n",
-    "        tool = {\"get_word_length\": get_word_length}[output.tool]\n",
-    "        observation = tool.run(output.tool_input)\n",
-    "        intermediate_steps.append((output, observation))\n",
-    "print(final_result)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "2de8e688-fed4-4efc-a2bc-8d3c504dd764",
-   "metadata": {},
-   "source": [
-    "Woo! It's working.\n",
-    "\n",
-    "### Using AgentExecutor\n",
-    "\n",
-    "To simplify this a bit, we can import and use the `AgentExecutor` class.\n",
-    "This bundles up all of the above and adds in error handling, early stopping, tracing, and other quality-of-life improvements that reduce safeguards you need to write."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "id": "9c94ee41-f146-403e-bd0a-5756a53d7842",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain.agents import AgentExecutor\n",
-    "\n",
-    "agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "9cbd94a2-b456-45e6-835c-a33be3475119",
-   "metadata": {},
-   "source": [
-    "Now let's test it out!"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "id": "6e1e64c7-627c-4713-82ca-8f6db3d9c8f5",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "\n",
-      "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
-      "\u001b[32;1m\u001b[1;3m\n",
-      "Invoking: `get_word_length` with `{'word': 'educa'}`\n",
-      "\n",
-      "\n",
-      "\u001b[0m\u001b[36;1m\u001b[1;3m5\u001b[0m\u001b[32;1m\u001b[1;3mThere are 5 letters in the word \"educa\".\u001b[0m\n",
-      "\n",
-      "\u001b[1m> Finished chain.\u001b[0m\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "{'input': 'how many letters in the word educa?',\n",
-       " 'output': 'There are 5 letters in the word \"educa\".'}"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "agent_executor.invoke({\"input\": \"how many letters in the word educa?\"})"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "1578aede-2ad2-4c15-832e-3e0a1660b342",
-   "metadata": {},
-   "source": [
-    "And looking at the trace, we can see that all of our agent calls and tool invocations are automatically logged: https://smith.langchain.com/public/957b7e26-bef8-4b5b-9ca3-4b4f1c96d501/r"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "a29c0705-b9bc-419f-aae4-974fc092faab",
-   "metadata": {},
-   "source": [
-    "### Adding memory\n",
-    "\n",
-    "This is great - we have an agent!\n",
-    "However, this agent is stateless - it doesn't remember anything about previous interactions.\n",
-    "This means you can't ask follow up questions easily.\n",
-    "Let's fix that by adding in memory.\n",
-    "\n",
-    "In order to do this, we need to do two things:\n",
-    "\n",
-    "1. Add a place for memory variables to go in the prompt\n",
-    "2. Keep track of the chat history\n",
-    "\n",
-    "First, let's add a place for memory in the prompt.\n",
-    "We do this by adding a placeholder for messages with the key `\"chat_history\"`.\n",
-    "Notice that we put this ABOVE the new user input (to follow the conversation flow)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "id": "ceef8c26-becc-4893-b55c-efcf52c4b9d9",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain.prompts import MessagesPlaceholder\n",
-    "\n",
-    "MEMORY_KEY = \"chat_history\"\n",
-    "prompt = ChatPromptTemplate.from_messages(\n",
-    "    [\n",
-    "        (\n",
-    "            \"system\",\n",
-    "            \"You are very powerful assistant, but bad at calculating lengths of words.\",\n",
-    "        ),\n",
-    "        MessagesPlaceholder(variable_name=MEMORY_KEY),\n",
-    "        (\"user\", \"{input}\"),\n",
-    "        MessagesPlaceholder(variable_name=\"agent_scratchpad\"),\n",
-    "    ]\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "fc4f1e1b-695d-4b25-88aa-d46c015e6342",
-   "metadata": {},
-   "source": [
-    "We can then set up a list to track the chat history"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "id": "935abfee-ab5d-4e9a-b33c-6a40a6fa4777",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain.schema.messages import HumanMessage, AIMessage\n",
-    "\n",
-    "chat_history = []"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "c107b5dd-b934-48a0-a8c5-3b5bd76f2b98",
-   "metadata": {},
-   "source": [
-    "We can then put it all together!"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "id": "24b094ff-bbea-45c4-8000-ed2b5de459a9",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "agent = (\n",
-    "    {\n",
-    "        \"input\": lambda x: x[\"input\"],\n",
-    "        \"agent_scratchpad\": lambda x: format_to_openai_function_messages(\n",
-    "            x[\"intermediate_steps\"]\n",
-    "        ),\n",
-    "        \"chat_history\": lambda x: x[\"chat_history\"],\n",
-    "    }\n",
-    "    | prompt\n",
-    "    | llm_with_tools\n",
-    "    | OpenAIFunctionsAgentOutputParser()\n",
-    ")\n",
-    "agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "e34ee9bd-20be-4ab7-b384-a5f0335e7611",
-   "metadata": {},
-   "source": [
-    "When running, we now need to track the inputs and outputs as chat history\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "id": "f238022b-3348-45cd-bd6a-c6770b7dc600",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "\n",
-      "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
-      "\u001b[32;1m\u001b[1;3m\n",
-      "Invoking: `get_word_length` with `{'word': 'educa'}`\n",
-      "\n",
-      "\n",
-      "\u001b[0m\u001b[36;1m\u001b[1;3m5\u001b[0m\u001b[32;1m\u001b[1;3mThere are 5 letters in the word \"educa\".\u001b[0m\n",
-      "\n",
-      "\u001b[1m> Finished chain.\u001b[0m\n",
-      "\n",
-      "\n",
-      "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
-      "\u001b[32;1m\u001b[1;3mNo, \"educa\" is not a real word in English.\u001b[0m\n",
-      "\n",
-      "\u001b[1m> Finished chain.\u001b[0m\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "{'input': 'is that a real word?',\n",
-       " 'chat_history': [HumanMessage(content='how many letters in the word educa?'),\n",
-       "  AIMessage(content='There are 5 letters in the word \"educa\".')],\n",
-       " 'output': 'No, \"educa\" is not a real word in English.'}"
-      ]
-     },
-     "execution_count": 17,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "input1 = \"how many letters in the word educa?\"\n",
-    "result = agent_executor.invoke({\"input\": input1, \"chat_history\": chat_history})\n",
-    "chat_history.extend(\n",
-    "    [\n",
-    "        HumanMessage(content=input1),\n",
-    "        AIMessage(content=result[\"output\"]),\n",
-    "    ]\n",
-    ")\n",
-    "agent_executor.invoke({\"input\": \"is that a real word?\", \"chat_history\": chat_history})"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "6ba072cd-eb58-409d-83be-55c8110e37f0",
-   "metadata": {},
-   "source": [
-    "Here's the LangSmith trace: https://smith.langchain.com/public/1e1b7e07-3220-4a6c-8a1e-f04182a755b3/r"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "9e8b9127-758b-4dab-b093-2e6357dca3e6",
-   "metadata": {},
-   "source": [
-    "## Next Steps\n",
-    "\n",
-    "Awesome! You've now run your first end-to-end agent.\n",
-    "To dive deeper, you can:\n",
-    "\n",
-    "- Check out all the different [agent types](/docs/modules/agents/agent_types/) supported\n",
-    "- Learn all the controls for [AgentExecutor](/docs/modules/agents/how_to/)\n",
-    "- Explore the how-to's of [tools](/docs/modules/agents/tools/) and all the [tool integrations](/docs/integrations/tools)\n",
-    "- See a full list of all the off-the-shelf [toolkits](/docs/integrations/toolkits/) we provide"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "abbe7160-7c82-48ba-a4d3-4426c62edd2a",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.1"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
--- a/docs/docs/modules/agents/index.mdx
+++ b/docs/docs/modules/agents/index.mdx
@@ -0,0 +1,312 @@
+---
+sidebar_position: 4
+---
+# Agents
+
+The core idea of agents is to use an LLM to choose a sequence of actions to take.
+In chains, a sequence of actions is hardcoded (in code).
+In agents, a language model is used as a reasoning engine to determine which actions to take and in which order.
+
+Some important terminology (and schema) to know:
+
+1. `AgentAction`: This is a dataclass that represents the action an agent should take. It has a `tool` property (which is the name of the tool that should be invoked) and a `tool_input` property (the input to that tool)
+2. `AgentFinish`: This is a dataclass that signifies that the agent has finished and should return to the user. It has a `return_values` parameter, which is a dictionary to return. It often only has one key - `output` - that is a string, and so often it is just this key that is returned.
+3. `intermediate_steps`: These represent previous agent actions and corresponding outputs that are passed around. These are important to pass to future iteration so the agent knows what work it has already done. This is typed as a `List[Tuple[AgentAction, Any]]`. Note that observation is currently left as type `Any` to be maximally flexible. In practice, this is often a string.
+
+There are several key components here:
+
+## Agent
+
+This is the chain responsible for deciding what step to take next.
+This is powered by a language model and a prompt.
+The inputs to this chain are:
+
+1. List of available tools
+2. User input
+3. Any previously executed steps (`intermediate_steps`)
+
+This chain then returns either the next action to take or the final response to send to the user (`AgentAction` or `AgentFinish`).
+
+Different agents have different prompting styles for reasoning, different ways of encoding input, and different ways of parsing the output.
+For a full list of agent types see [agent types](/docs/modules/agents/agent_types/)
+
+## Tools
+
+Tools are functions that an agent calls.
+There are two important considerations here:
+
+1. Giving the agent access to the right tools
+2. Describing the tools in a way that is most helpful to the agent
+
+Without both, the agent you are trying to build will not work.
+If you don't give the agent access to a correct set of tools, it will never be able to accomplish the objective.
+If you don't describe the tools properly, the agent won't know how to properly use them.
+
+LangChain provides a wide set of tools to get started, but also makes it easy to define your own (including custom descriptions).
+For a full list of tools, see [here](/docs/modules/agents/tools/)
+
+## Toolkits
+
+Often the set of tools an agent has access to is more important than a single tool.
+For this LangChain provides the concept of toolkits - groups of tools needed to accomplish specific objectives.
+There are generally around 3-5 tools in a toolkit.
+
+LangChain provides a wide set of toolkits to get started.
+For a full list of toolkits, see [here](/docs/modules/agents/toolkits/)
+
+## AgentExecutor
+
+The agent executor is the runtime for an agent.
+This is what actually calls the agent and executes the actions it chooses.
+Pseudocode for this runtime is below:
+
+```python
+next_action = agent.get_action(...)
+while next_action != AgentFinish:
+    observation = run(next_action)
+    next_action = agent.get_action(..., next_action, observation)
+return next_action
+```
+
+While this may seem simple, there are several complexities this runtime handles for you, including:
+
+1. Handling cases where the agent selects a non-existent tool
+2. Handling cases where the tool errors
+3. Handling cases where the agent produces output that cannot be parsed into a tool invocation
+4. Logging and observability at all levels (agent decisions, tool calls) either to stdout or [LangSmith](https://smith.langchain.com).
+
+## Other types of agent runtimes
+
+The `AgentExecutor` class is the main agent runtime supported by LangChain.
+However, there are other, more experimental runtimes we also support.
+These include:
+
+- [Plan-and-execute Agent](/docs/use_cases/more/agents/autonomous_agents/plan_and_execute)
+- [Baby AGI](/docs/use_cases/more/agents/autonomous_agents/baby_agi)
+- [Auto GPT](/docs/use_cases/more/agents/autonomous_agents/autogpt)
+
+## Get started
+
+This will go over how to get started building an agent.
+We will create this agent from scratch, using LangChain Expression Language.
+We will then define custom tools, and then run it in a custom loop (we will also show how to use the standard LangChain `AgentExecutor`).
+
+### Set up the agent
+
+We first need to create our agent.
+This is the chain responsible for determining what action to take next.
+
+In this example, we will use OpenAI Function Calling to create this agent.
+This is generally the most reliable way create agents.
+In this example we will show what it is like to construct this agent from scratch, using LangChain Expression Language.
+
+For this guide, we will construct a custom agent that has access to a custom tool.
+We are choosing this example because we think for most use cases you will NEED to customize either the agent or the tools.
+The tool we will give the agent is a tool to calculate the length of a word.
+This is useful because this is actually something LLMs can mess up due to tokenization.
+We will first create it WITHOUT memory, but we will then show how to add memory in.
+Memory is needed to enable conversation.
+
+First, let's load the language model we're going to use to control the agent.
+```python
+from langchain.chat_models import ChatOpenAI
+llm = ChatOpenAI(temperature=0)
+```
+
+Next, let's define some tools to use.
+Let's write a really simple Python function to calculate the length of a word that is passed in.
+
+
+
+```python
+from langchain.agents import tool
+
+@tool
+def get_word_length(word: str) -> int:
+    """Returns the length of a word."""
+    return len(word)
+
+tools = [get_word_length]
+```
+
+Now let us create the prompt.
+Because OpenAI Function Calling is finetuned for tool usage, we hardly need any instructions on how to reason, or how to output format.
+We will just have two input variables: `input` (for the user question) and `agent_scratchpad` (for any previous steps taken)
+
+```python
+from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
+prompt = ChatPromptTemplate.from_messages([
+    ("system", "You are very powerful assistant, but bad at calculating lengths of words."),
+    ("user", "{input}"),
+    MessagesPlaceholder(variable_name="agent_scratchpad"),
+])
+```
+
+How does the agent know what tools it can use?
+Those are passed in as a separate argument, so we can bind those as keyword arguments to the LLM.
+
+```python
+from langchain.tools.render import format_tool_to_openai_function
+llm_with_tools = llm.bind(
+    functions=[format_tool_to_openai_function(t) for t in tools]
+)
+```
+
+Putting those pieces together, we can now create the agent.
+We will import two last utility functions: a component for formatting intermediate steps to messages, and a component for converting the output message into an agent action/agent finish.
+
+
+```python
+from langchain.agents.format_scratchpad import format_to_openai_functions
+from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser
+agent = {
+    "input": lambda x: x["input"],
+    "agent_scratchpad": lambda x: format_to_openai_functions(x['intermediate_steps'])
+} | prompt | llm_with_tools | OpenAIFunctionsAgentOutputParser()
+```
+
+Now that we have our agent, let's play around with it!
+Let's pass in a simple question and empty intermediate steps and see what it returns:
+
+```python
+agent.invoke({
+    "input": "how many letters in the word educa?",
+    "intermediate_steps": []
+})
+```
+
+We can see that it responds with an `AgentAction` to take (it's actually an `AgentActionMessageLog` - a subclass of `AgentAction` which also tracks the full message log).
+
+So this is just the first step - now we need to write a runtime for this.
+The simplest one is just one that continuously loops, calling the agent, then taking the action, and repeating until an `AgentFinish` is returned.
+Let's code that up below:
+
+```python
+from langchain.schema.agent import AgentFinish
+intermediate_steps = []
+while True:
+    output = agent.invoke({
+        "input": "how many letters in the word educa?",
+        "intermediate_steps": intermediate_steps
+    })
+    if isinstance(output, AgentFinish):
+        final_result = output.return_values["output"]
+        break
+    else:
+        print(output.tool, output.tool_input)
+        tool = {
+            "get_word_length": get_word_length
+        }[output.tool]
+        observation = tool.run(output.tool_input)
+        intermediate_steps.append((output, observation))
+print(final_result)
+```
+
+We can see this prints out the following:
+
+<CodeOutputBlock lang="python">
+
+```
+get_word_length {'word': 'educa'}
+There are 5 letters in the word "educa".
+```
+
+</CodeOutputBlock>
+
+Woo! It's working.
+
+To simplify this a bit, we can import and use the `AgentExecutor` class.
+This bundles up all of the above and adds in error handling, early stopping, tracing, and other quality-of-life improvements that reduce safeguards you need to write.
+
+
+```python
+from langchain.agents import AgentExecutor
+agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
+```
+
+Now let's test it out!
+
+
+```python
+agent_executor.invoke({"input": "how many letters in the word educa?"})
+```
+
+<CodeOutputBlock lang="python">
+
+```
+
+
+    > Entering new AgentExecutor chain...
+
+    Invoking: `get_word_length` with `{'word': 'educa'}`
+
+    5
+
+    There are 5 letters in the word "educa".
+
+    > Finished chain.
+
+    'There are 5 letters in the word "educa".'
+```
+
+</CodeOutputBlock>
+
+This is great - we have an agent!
+However, this agent is stateless - it doesn't remember anything about previous interactions.
+This means you can't ask follow up questions easily.
+Let's fix that by adding in memory.
+
+In order to do this, we need to do two things:
+
+1. Add a place for memory variables to go in the prompt
+2. Keep track of the chat history
+
+First, let's add a place for memory in the prompt.
+We do this by adding a placeholder for messages with the key `"chat_history"`.
+Notice that we put this ABOVE the new user input (to follow the conversation flow).
+
+```python
+from langchain.prompts import MessagesPlaceholder
+
+MEMORY_KEY = "chat_history"
+prompt = ChatPromptTemplate.from_messages([
+    ("system", "You are very powerful assistant, but bad at calculating lengths of words."),
+    MessagesPlaceholder(variable_name=MEMORY_KEY),
+    ("user", "{input}"),
+    MessagesPlaceholder(variable_name="agent_scratchpad"),
+])
+```
+We can then set up a list to track the chat history
+```
+from langchain.schema.messages import HumanMessage, AIMessage
+chat_history = []
+```
+
+We can then put it all together!
+
+```python
+agent = {
+    "input": lambda x: x["input"],
+    "agent_scratchpad": lambda x: format_to_openai_functions(x['intermediate_steps']),
+    "chat_history": lambda x: x["chat_history"]
+} | prompt | llm_with_tools | OpenAIFunctionsAgentOutputParser()
+agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
+```
+When running, we now need to track the inputs and outputs as chat history
+```
+input1 = "how many letters in the word educa?"
+result = agent_executor.invoke({"input": input1, "chat_history": chat_history})
+chat_history.append(HumanMessage(content=input1))
+chat_history.append(AIMessage(content=result['output']))
+agent_executor.invoke({"input": "is that a real word?", "chat_history": chat_history})
+```
+
+## Next Steps
+
+Awesome! You've now run your first end-to-end agent.
+To dive deeper, you can:
+
+- Check out all the different [agent types](/docs/modules/agents/agent_types/) supported
+- Learn all the controls for [AgentExecutor](/docs/modules/agents/how_to/)
+- See a full list of all the off-the-shelf [toolkits](/docs/modules/agents/toolkits/) we provide
+- Explore all the individual [tools](/docs/modules/agents/tools/) supported
--- a/docs/docs/modules/agents/toolkits/index.mdx
+++ b/docs/docs/modules/agents/toolkits/index.mdx
@@ -0,0 +1,10 @@
+---
+sidebar_position: 3
+---
+# Toolkits
+
+:::info
+Head to [Integrations](/docs/integrations/toolkits/) for documentation on built-in toolkit integrations.
+:::
+
+Toolkits are collections of tools that are designed to be used together for specific tasks and have convenience loading methods.
--- a/docs/docs/modules/agents/tools/custom_tools.ipynb
+++ b/docs/docs/modules/agents/tools/custom_tools.ipynb
@@ -29,8 +29,7 @@
   "outputs": [],
   "source": [
    "# Import things that are needed generically\n",
-    "from langchain.chains import LLMMathChain\n",
-    "from langchain.utilities import SerpAPIWrapper\n",
+    "from langchain.chains import LLMMathChain\nfrom langchain.utilities import SerpAPIWrapper\n",
    "from langchain.agents import AgentType, initialize_agent\n",
    "from langchain.chat_models import ChatOpenAI\n",
    "from langchain.tools import BaseTool, StructuredTool, Tool, tool"
@@ -231,7 +230,7 @@
   "id": "6f12eaf0",
   "metadata": {},
   "source": [
-    "### Subclassing the BaseTool\n",
+    "### Subclassing the BaseTool class\n",
    "\n",
    "You can also directly subclass `BaseTool`. This is useful if you want more control over the instance variables or if you want to propagate callbacks to nested chains or other tools."
   ]
@@ -368,7 +367,7 @@
   "id": "824eaf74",
   "metadata": {},
   "source": [
-    "### Using the decorator\n",
+    "## Using the `tool` decorator\n",
    "\n",
    "To make it easier to define custom tools, a `@tool` decorator is provided. This decorator can be used to quickly create a `Tool` from a simple function. The decorator uses the function name as the tool name by default, but this can be overridden by passing a string as the first argument. Additionally, the decorator will use the function's docstring as the tool's description."
   ]
@@ -532,7 +531,7 @@
   "id": "fb0a38eb",
   "metadata": {},
   "source": [
-    "### Subclassing the BaseTool\n",
+    "## Subclassing the BaseTool\n",
    "\n",
    "The BaseTool automatically infers the schema from the `_run` method's signature."
   ]
@@ -625,7 +624,7 @@
   "id": "7d68b0ac",
   "metadata": {},
   "source": [
-    "### Using the decorator\n",
+    "## Using the decorator\n",
    "\n",
    "The `tool` decorator creates a structured tool automatically if the signature has multiple arguments."
   ]
@@ -775,8 +774,7 @@
    "from langchain.agents import initialize_agent, Tool\n",
    "from langchain.agents import AgentType\n",
    "from langchain.llms import OpenAI\n",
-    "from langchain.chains import LLMMathChain\n",
-    "from langchain.utilities import SerpAPIWrapper\n",
+    "from langchain.chains import LLMMathChain\nfrom langchain.utilities import SerpAPIWrapper\n",
    "\n",
    "search = SerpAPIWrapper()\n",
    "tools = [\n",
--- a/docs/docs/modules/agents/tools/index.mdx
+++ b/docs/docs/modules/agents/tools/index.mdx
@@ -4,17 +4,17 @@ sidebar_position: 2
 # Tools

 :::info
-For documentation on built-in tool integrations, visit [Integrations](/docs/integrations/tools/).
+Head to [Integrations](/docs/integrations/tools/) for documentation on built-in tool integrations.
 :::

 Tools are interfaces that an agent can use to interact with the world.

-## Getting Started
+## Get started

 Tools are functions that agents can use to interact with the world.
 These tools can be generic utilities (e.g. search), other chains, or even other agents.

-Currently, tools can be loaded using the following snippet:
+Currently, tools can be loaded with the following snippet:

 ```python
 from langchain.agents import load_tools
--- a/docs/docs/modules/agents/tools/toolkits.mdx
+++ b/docs/docs/modules/agents/tools/toolkits.mdx
@@ -1,10 +0,0 @@
---
-sidebar_position: 3
---
-# Toolkits
-
-:::info
-For documentation on built-in toolkit integrations, visit [Integrations](/docs/integrations/toolkits/).
-:::
-
-Toolkits are collections of tools that are designed to be used together for specific tasks and have convenient loading methods.
--- a/docs/docs/modules/callbacks/index.mdx
+++ b/docs/docs/modules/callbacks/index.mdx
@@ -1,6 +1,5 @@
 ---
 sidebar_position: 5
-sidebar_class_name: hidden
 ---
 # Callbacks

--- a/docs/docs/modules/chains/index.ipynb
+++ b/docs/docs/modules/chains/index.ipynb
@@ -8,7 +8,6 @@
    "---\n",
    "sidebar_position: 2\n",
    "title: Chains\n",
-    "sidebar_class_name: hidden\n",
    "---"
   ]
  },
--- a/docs/docs/modules/data_connection/index.mdx
+++ b/docs/docs/modules/data_connection/index.mdx
@@ -1,6 +1,5 @@
 ---
 sidebar_position: 1
-sidebar_class_name: hidden
 ---

 # Retrieval
--- a/docs/docs/modules/index.mdx
+++ b/docs/docs/modules/index.mdx
@@ -4,18 +4,16 @@ sidebar_class_name: hidden

 # Modules

-LangChain provides standard, extendable interfaces and external integrations for the following main modules:
+LangChain provides standard, extendable interfaces and external integrations for the following modules, listed from least to most complex:

 #### [Model I/O](/docs/modules/model_io/)
 Interface with language models
 #### [Retrieval](/docs/modules/data_connection/)
 Interface with application-specific data
+#### [Chains](/docs/modules/chains/)
+Construct sequences of calls
 #### [Agents](/docs/modules/agents/)
 Let chains choose which tools to use given high-level directives
-
-## Additional
-#### [Chains](/docs/modules/chains/)
-Common, building block compositions
 #### [Memory](/docs/modules/memory/)
 Persist application state between runs of a chain
 #### [Callbacks](/docs/modules/callbacks/)
--- a/docs/docs/modules/memory/index.mdx
+++ b/docs/docs/modules/memory/index.mdx
@@ -1,6 +1,5 @@
 ---
 sidebar_position: 3
-sidebar_class_name: hidden
 ---
 # Memory

--- a/docs/docs/modules/model_io/chat/streaming.ipynb
+++ b/docs/docs/modules/model_io/chat/streaming.ipynb
@@ -1,88 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "d0df7646-b1e1-4014-a841-6dae9b3c50d9",
-   "metadata": {},
-   "source": [
-    "# Streaming\n",
-    "\n",
-    "All ChatModels implement the Runnable interface, which comes with default implementations of all methods, ie. ainvoke, batch, abatch, stream, astream. This gives all ChatModels basic support for streaming.\n",
-    "\n",
-    "Streaming support defaults to returning an Iterator (or AsyncIterator in the case of async streaming) of a single value, the final result returned by the underlying ChatModel provider. This obviously doesn't give you token-by-token streaming, which requires native support from the ChatModel provider, but ensures your code that expects an iterator of tokens can work for any of our ChatModel integrations.\n",
-    "\n",
-    "See which [integrations support token-by-token streaming here](/docs/integrations/chat/)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "03080a2c-45e8-45b9-a367-62816eae54c4",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain.chat_models import ChatAnthropic"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "975c4f32-21f6-4a71-9091-f87b56347c33",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      " Here's a song I just improvised about goldfish on the moon:\n",
-      "\n",
-      "Floating in space, looking for a place \n",
-      "To call their home, all alone\n",
-      "Swimming through stars, these goldfish from Mars\n",
-      "Left their fishbowl behind, a new life to find\n",
-      "On the moon, where the craters loom\n",
-      "Searching for food, maybe some lunar food\n",
-      "Out of their depth, close to death\n",
-      "How they wish, for just one small fish\n",
-      "To join them up here, their future unclear\n",
-      "On the moon, where the Earth looms\n",
-      "Dreaming of home, filled with foam\n",
-      "Their bodies adapt, continuing to last \n",
-      "On the moon, where they learn to swoon\n",
-      "Over cheese that astronauts tease\n",
-      "As they stare back at Earth, the planet of birth\n",
-      "These goldfish out of water, swim on and on\n",
-      "Lunar pioneers, conquering their fears\n",
-      "On the moon, where they happily swoon"
-     ]
-    }
-   ],
-   "source": [
-    "chat = ChatAnthropic(model=\"claude-2\")\n",
-    "for chunk in chat.stream(\"Write me a song about goldfish on the moon\"):\n",
-    "    print(chunk.content, end=\"\", flush=True)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "poetry-venv",
-   "language": "python",
-   "name": "poetry-venv"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.1"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
--- a/docs/docs/modules/model_io/chat/token_usage_tracking.ipynb
+++ b/docs/docs/modules/model_io/chat/token_usage_tracking.ipynb
@@ -1,181 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "e5715368",
-   "metadata": {},
-   "source": [
-    "# Tracking token usage\n",
-    "\n",
-    "This notebook goes over how to track your token usage for specific calls. It is currently only implemented for the OpenAI API.\n",
-    "\n",
-    "Let's first look at an extremely simple example of tracking token usage for a single Chat model call."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "9455db35",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain.chat_models import ChatOpenAI\n",
-    "from langchain.callbacks import get_openai_callback"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "id": "d1c55cc9",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "llm = ChatOpenAI(model_name=\"gpt-4\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "31667d54",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Tokens Used: 24\n",
-      "\tPrompt Tokens: 11\n",
-      "\tCompletion Tokens: 13\n",
-      "Successful Requests: 1\n",
-      "Total Cost (USD): $0.0011099999999999999\n"
-     ]
-    }
-   ],
-   "source": [
-    "with get_openai_callback() as cb:\n",
-    "    result = llm.invoke(\"Tell me a joke\")\n",
-    "    print(cb)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "c0ab6d27",
-   "metadata": {},
-   "source": [
-    "Anything inside the context manager will get tracked. Here's an example of using it to track multiple calls in sequence."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "id": "e09420f4",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "48\n"
-     ]
-    }
-   ],
-   "source": [
-    "with get_openai_callback() as cb:\n",
-    "    result = llm.invoke(\"Tell me a joke\")\n",
-    "    result2 = llm.invoke(\"Tell me a joke\")\n",
-    "    print(cb.total_tokens)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "d8186e7b",
-   "metadata": {},
-   "source": [
-    "If a chain or agent with multiple steps in it is used, it will track all those steps."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "id": "5d1125c6",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain.agents import load_tools\n",
-    "from langchain.agents import initialize_agent\n",
-    "from langchain.agents import AgentType\n",
-    "from langchain.llms import OpenAI\n",
-    "\n",
-    "tools = load_tools([\"serpapi\", \"llm-math\"], llm=llm)\n",
-    "agent = initialize_agent(tools, llm, agent=AgentType.OPENAI_FUNCTIONS, verbose=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "id": "2f98c536",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "\n",
-      "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
-      "\u001b[32;1m\u001b[1;3m\n",
-      "Invoking: `Search` with `Olivia Wilde's current boyfriend`\n",
-      "\n",
-      "\n",
-      "\u001b[0m\u001b[36;1m\u001b[1;3m['Things are looking golden for Olivia Wilde, as the actress has jumped back into the dating pool following her split from Harry Styles — read ...', \"“I did not want service to take place at the home of Olivia's current partner because Otis and Daisy might be present,” Sudeikis wrote in his ...\", \"February 2021: Olivia Wilde praises Harry Styles' modesty. One month after the duo made headlines with their budding romance, Wilde gave her new beau major ...\", 'An insider revealed to People that the new couple had been dating for some time. \"They were in Montecito, California this weekend for a wedding, ...', 'A source told People last year that Wilde and Styles were still friends despite deciding to take a break. \"He\\'s still touring and is now going ...', \"... love life. “He's your typical average Joe.” The source adds, “She's not giving too much away right now and wants to keep the relationship ...\", \"Multiple sources said the two were “taking a break” from dating because of distance and different priorities. “He's still touring and is now ...\", 'Comments. Filed under. celebrity couples · celebrity dating · harry styles · jason sudeikis · olivia wilde ... Now Holds A Darker MeaningNYPost.', '... dating during filming. The 39-year-old did however look very cosy with the comedian, although his relationship status is unknown. Olivia ...']\u001b[0m\u001b[32;1m\u001b[1;3m\n",
-      "Invoking: `Search` with `Harry Styles current age`\n",
-      "responded: Olivia Wilde's current boyfriend is Harry Styles. Let me find out his age for you.\n",
-      "\n",
-      "\u001b[0m\u001b[36;1m\u001b[1;3m29 years\u001b[0m\u001b[32;1m\u001b[1;3m\n",
-      "Invoking: `Calculator` with `29 ^ 0.23`\n",
-      "\n",
-      "\n",
-      "\u001b[0m\u001b[33;1m\u001b[1;3mAnswer: 2.169459462491557\u001b[0m\u001b[32;1m\u001b[1;3mHarry Styles' current age (29 years) raised to the 0.23 power is approximately 2.17.\u001b[0m\n",
-      "\n",
-      "\u001b[1m> Finished chain.\u001b[0m\n",
-      "Total Tokens: 1929\n",
-      "Prompt Tokens: 1799\n",
-      "Completion Tokens: 130\n",
-      "Total Cost (USD): $0.06176999999999999\n"
-     ]
-    }
-   ],
-   "source": [
-    "with get_openai_callback() as cb:\n",
-    "    response = agent.run(\n",
-    "        \"Who is Olivia Wilde's boyfriend? What is his current age raised to the 0.23 power?\"\n",
-    "    )\n",
-    "    print(f\"Total Tokens: {cb.total_tokens}\")\n",
-    "    print(f\"Prompt Tokens: {cb.prompt_tokens}\")\n",
-    "    print(f\"Completion Tokens: {cb.completion_tokens}\")\n",
-    "    print(f\"Total Cost (USD): ${cb.total_cost}\")"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.1"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
--- a/docs/docs/modules/model_io/index.mdx
+++ b/docs/docs/modules/model_io/index.mdx
@@ -2,7 +2,6 @@
 sidebar_position: 0
 sidebar_custom_props:
  description: Interface with language models
-sidebar_class_name: hidden
 ---

 # Model I/O
@@ -10,18 +9,8 @@ sidebar_class_name: hidden
 The core element of any language model application is...the model. LangChain gives you the building blocks to interface with any language model.

 - [Prompts](/docs/modules/model_io/prompts/): Templatize, dynamically select, and manage model inputs
- [Chat models](/docs/modules/model_io/chat/): Models that are backed by a language model but take a list of Chat Messages as input and return a Chat Message
- [LLMs](/docs/modules/model_io/llms/): Models that take a text string as input and return a text string
+- [Language models](/docs/modules/model_io/models/): Make calls to language models through common interfaces
 - [Output parsers](/docs/modules/model_io/output_parsers/): Extract information from model outputs

 ![model_io_diagram](/img/model_io.jpg)

-
-## LLMs vs Chat models
-
-LLMs and chat models are subtly but importantly different. LLMs in LangChain refer to pure text completion models.
-The APIs they wrap take a string prompt as input and output a string completion. OpenAI's GPT-3 is implemented as an LLM.
-Chat models are often backed by LLMs but tuned specifically for having conversations.
-And, crucially, their provider APIs use a different interface than pure text completion models. Instead of a single string,
-they take a list of chat messages as input. Usually these messages are labeled with the speaker (usually one of "System",
-"AI", and "Human"). And they return an AI chat message as output. GPT-4 and Anthropic's Claude-2 are both implemented as chat models.
--- a/docs/docs/modules/model_io/llms/async_llm.ipynb
+++ b/docs/docs/modules/model_io/llms/async_llm.ipynb
@@ -1,121 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "f6574496-b360-4ffa-9523-7fd34a590164",
-   "metadata": {},
-   "source": [
-    "# Async API\n",
-    "\n",
-    "All `LLM`s implement the `Runnable` interface, which comes with default implementations of all methods, ie. ainvoke, batch, abatch, stream, astream. This gives all `LLM`s basic support for asynchronous calls.\n",
-    "\n",
-    "Async support defaults to calling the `LLM`'s respective sync method in asyncio's default thread pool executor. This lets other async functions in your application make progress while the `LLM` is being executed, by moving this call to a background thread. Where `LLM`s providers have native implementations for async, that is used instead of the default `LLM` implementation.\n",
-    "\n",
-    "See which [integrations provide native async support here](/docs/integrations/llms/).\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "5e49e96c-0f88-466d-b3d3-ea0966bdf19e",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[1mConcurrent executed in 1.03 seconds.\u001b[0m\n",
-      "\u001b[1mSerial executed in 6.80 seconds.\u001b[0m\n"
-     ]
-    }
-   ],
-   "source": [
-    "import time\n",
-    "import asyncio\n",
-    "\n",
-    "from langchain.llms import OpenAI\n",
-    "\n",
-    "llm = OpenAI(model=\"gpt-3.5-turbo-instruct\", temperature=0.9)\n",
-    "\n",
-    "\n",
-    "def invoke_serially():\n",
-    "    for _ in range(10):\n",
-    "        resp = llm.invoke(\"Hello, how are you?\")\n",
-    "\n",
-    "\n",
-    "async def async_invoke(llm):\n",
-    "    resp = await llm.ainvoke(\"Hello, how are you?\")\n",
-    "\n",
-    "\n",
-    "async def invoke_concurrently():\n",
-    "    tasks = [async_invoke(llm) for _ in range(10)]\n",
-    "    await asyncio.gather(*tasks)\n",
-    "\n",
-    "\n",
-    "s = time.perf_counter()\n",
-    "# If running this outside of Jupyter, use asyncio.run(generate_concurrently())\n",
-    "await invoke_concurrently()\n",
-    "elapsed = time.perf_counter() - s\n",
-    "print(\"\\033[1m\" + f\"Concurrent executed in {elapsed:0.2f} seconds.\" + \"\\033[0m\")\n",
-    "\n",
-    "s = time.perf_counter()\n",
-    "invoke_serially()\n",
-    "elapsed = time.perf_counter() - s\n",
-    "print(\"\\033[1m\" + f\"Serial executed in {elapsed:0.2f} seconds.\" + \"\\033[0m\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "e0b60caf-f99e-46a6-bdad-46b2cfea29ac",
-   "metadata": {},
-   "source": [
-    "To simplify things we could also just use `abatch` to run a batch concurrently:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "bd11000f-2232-491a-9f70-abcbb4611fbf",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[1mBatch executed in 1.31 seconds.\u001b[0m\n"
-     ]
-    }
-   ],
-   "source": [
-    "s = time.perf_counter()\n",
-    "# If running this outside of Jupyter, use asyncio.run(generate_concurrently())\n",
-    "await llm.abatch([\"Hello, how are you?\"] * 10)\n",
-    "elapsed = time.perf_counter() - s\n",
-    "print(\"\\033[1m\" + f\"Batch executed in {elapsed:0.2f} seconds.\" + \"\\033[0m\")"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.1"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
--- a/docs/docs/modules/model_io/llms/llm_serialization.ipynb
+++ b/docs/docs/modules/model_io/llms/llm_serialization.ipynb
@@ -1,179 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "73f9bf40",
-   "metadata": {},
-   "source": [
-    "# Serialization\n",
-    "\n",
-    "LangChain Python and LangChain JS share a serialization scheme. You can check if a LangChain class is serializable by running with the `is_lc_serializable` class method."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "9c9fb6ff",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain.llms import OpenAI\n",
-    "from langchain.llms.loading import load_llm"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "441d28cb-e898-47fd-8f27-f620a9cd6c34",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "True"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "OpenAI.is_lc_serializable()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "87b8a7c6-35b7-4fab-938b-4d05e9cc06f1",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "llm = OpenAI(model=\"gpt-3.5-turbo-instruct\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "88ce018b",
-   "metadata": {},
-   "source": [
-    "## Dump\n",
-    "\n",
-    "Any serializable object can be serialized to a dict or json string."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "f12b28f3",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'lc': 1,\n",
-       " 'type': 'constructor',\n",
-       " 'id': ['langchain', 'llms', 'openai', 'OpenAI'],\n",
-       " 'kwargs': {'model': 'gpt-3.5-turbo-instruct',\n",
-       "  'openai_api_key': {'lc': 1, 'type': 'secret', 'id': ['OPENAI_API_KEY']}}}"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from langchain.load import dumpd, dumps\n",
-    "\n",
-    "dumpd(llm)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "095b1d56",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'{\"lc\": 1, \"type\": \"constructor\", \"id\": [\"langchain\", \"llms\", \"openai\", \"OpenAI\"], \"kwargs\": {\"model\": \"gpt-3.5-turbo-instruct\", \"openai_api_key\": {\"lc\": 1, \"type\": \"secret\", \"id\": [\"OPENAI_API_KEY\"]}}}'"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "dumps(llm)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "ab3e4223",
-   "metadata": {},
-   "source": [
-    "## Load\n",
-    "\n",
-    "Any serialized object can be loaded."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "id": "68e45b1c",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain.load import loads\n",
-    "from langchain.load.load import load\n",
-    "\n",
-    "loaded_1 = load(dumpd(llm))\n",
-    "loaded_2 = loads(dumps(llm))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "id": "c9272667-7fe3-4e5f-a1cc-69e8829b9e8f",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "\n",
-      "I am an AI and do not have the capability to experience emotions. But thank you for asking. Is there anything I can assist you with?\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(loaded_1.invoke(\"How are you doing?\"))"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.1"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
--- a/docs/docs/modules/model_io/llms/streaming_llm.ipynb
+++ b/docs/docs/modules/model_io/llms/streaming_llm.ipynb
@@ -1,112 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "fc37c39a-7406-4c13-a754-b8e95fd970a0",
-   "metadata": {},
-   "source": [
-    "# Streaming\n",
-    "\n",
-    "All `LLM`s implement the `Runnable` interface, which comes with default implementations of all methods, ie. ainvoke, batch, abatch, stream, astream. This gives all `LLM`s basic support for streaming.\n",
-    "\n",
-    "Streaming support defaults to returning an Iterator (or AsyncIterator in the case of async streaming) of a single value, the final result returned by the underlying `LLM` provider. This obviously doesn't give you token-by-token streaming, which requires native support from the `LLM` provider, but ensures your code that expects an iterator of tokens can work for any of our `LLM` integrations.\n",
-    "\n",
-    "See which [integrations support token-by-token streaming here](/docs/integrations/llms/)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "9baa0527-b97d-41d3-babd-472ec5e59e3e",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "\n",
-      "Verse 1:\n",
-      "Bubbles dancing in my glass\n",
-      "Clear and crisp, it's such a blast\n",
-      "Refreshing taste, it's like a dream\n",
-      "Sparkling water, you make me beam\n",
-      "\n",
-      "Chorus:\n",
-      "Oh sparkling water, you're my delight\n",
-      "With every sip, you make me feel so right\n",
-      "You're like a party in my mouth\n",
-      "I can't get enough, I'm hooked no doubt\n",
-      "\n",
-      "Verse 2:\n",
-      "No sugar, no calories, just pure bliss\n",
-      "You're the perfect drink, I must confess\n",
-      "From lemon to lime, so many flavors to choose\n",
-      "Sparkling water, you never fail to amuse\n",
-      "\n",
-      "Chorus:\n",
-      "Oh sparkling water, you're my delight\n",
-      "With every sip, you make me feel so right\n",
-      "You're like a party in my mouth\n",
-      "I can't get enough, I'm hooked no doubt\n",
-      "\n",
-      "Bridge:\n",
-      "Some may say you're just plain water\n",
-      "But to me, you're so much more\n",
-      "You bring a sparkle to my day\n",
-      "In every single way\n",
-      "\n",
-      "Chorus:\n",
-      "Oh sparkling water, you're my delight\n",
-      "With every sip, you make me feel so right\n",
-      "You're like a party in my mouth\n",
-      "I can't get enough, I'm hooked no doubt\n",
-      "\n",
-      "Outro:\n",
-      "So here's to you, my dear sparkling water\n",
-      "You'll always be my go-to drink forever\n",
-      "With your effervescence and refreshing taste\n",
-      "You'll always have a special place."
-     ]
-    }
-   ],
-   "source": [
-    "from langchain.llms import OpenAI\n",
-    "\n",
-    "\n",
-    "llm = OpenAI(model=\"gpt-3.5-turbo-instruct\", temperature=0, max_tokens=512)\n",
-    "for chunk in llm.stream(\"Write me a song about sparkling water.\"):\n",
-    "    print(chunk, end=\"\", flush=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d81140f2-384b-4470-bf93-957013c6620b",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.1"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
--- a/docs/docs/modules/model_io/models/chat/chat_model_caching.mdx
+++ b/docs/docs/modules/model_io/models/chat/chat_model_caching.mdx
--- a/docs/docs/modules/model_io/models/chat/human_input_chat_model.ipynb
+++ b/docs/docs/modules/model_io/models/chat/human_input_chat_model.ipynb
--- a/docs/docs/modules/model_io/models/chat/index.ipynb
+++ b/docs/docs/modules/model_io/models/chat/index.ipynb
@@ -24,7 +24,9 @@
    "While chat models use language models under the hood, the interface they use is a bit different.\n",
    "Rather than using a \"text in, text out\" API, they use an interface where \"chat messages\" are the inputs and outputs.\n",
    "\n",
-    "## Setup\n",
+    "## Get started\n",
+    "\n",
+    "### Setup\n",
    "\n",
    "For this example we'll need to install the OpenAI Python package:\n",
    "\n",
@@ -77,7 +79,7 @@
   "id": "4ca3a777-8641-42fb-9e02-a7770a633d29",
   "metadata": {},
   "source": [
-    "## Messages\n",
+    "### Messages\n",
    "\n",
    "The chat model interface is based around messages rather than raw text.\n",
    "The types of messages currently supported in LangChain are `AIMessage`, `HumanMessage`, `SystemMessage`, `FunctionMessage` and `ChatMessage` -- `ChatMessage` takes in an arbitrary role parameter. Most of the time, you'll just be dealing with `HumanMessage`, `AIMessage`, and `SystemMessage`"
@@ -88,7 +90,7 @@
   "id": "54e5088f-98dd-437e-bac8-99b750946b29",
   "metadata": {},
   "source": [
-    "## LCEL\n",
+    "### LCEL\n",
    "\n",
    "Chat models implement the [Runnable interface](/docs/expression_language/interface), the basic building block of the [LangChain Expression Language (LCEL)](/docs/expression_language/). This means they support `invoke`, `ainvoke`, `stream`, `astream`, `batch`, `abatch`, `astream_log` calls.\n",
    "\n",
@@ -588,30 +590,12 @@
    "    print(chunk)"
   ]
  },
-  {
-   "cell_type": "markdown",
-   "id": "a4a7d783-4ddf-42e7-b143-8050891663c2",
-   "metadata": {},
-   "source": [
-    "## [LangSmith](/docs/langsmith)\n",
-    "\n",
-    "All `ChatModel`s come with built-in LangSmith tracing. Just set the following environment variables:\n",
-    "```bash\n",
-    "export LANGCHAIN_TRACING_V2=\"true\"\n",
-    "export LANGCHAIN_API_KEY=<your-api-key>\n",
-    "```\n",
-    "\n",
-    "and any `ChatModel` invocation (whether it's nested in a chain or not) will automatically be traced. A trace will include inputs, outputs, latency, token usage, invocation params, environment params, and more. See an example here: https://smith.langchain.com/public/a54192ae-dd5c-4f7a-88d1-daa1eaba1af7/r.\n",
-    "\n",
-    "In LangSmith you can then provide feedback for any trace, compile annotated datasets for evals, debug performance in the playground, and more."
-   ]
-  },
  {
   "cell_type": "markdown",
   "id": "7b289727-3983-43f7-a8b2-dd5582d49b6a",
   "metadata": {},
   "source": [
-    "## [Legacy] `__call__`\n",
+    "### `__call__`\n",
    "#### Messages in -> message out\n",
    "\n",
    "For convenience you can also treat chat models as callables. You can get chat completions by passing one or more messages to the chat model. The response will be a message."
@@ -686,7 +670,7 @@
   "id": "2b996c69-fd5d-4889-af4a-19dfd2833021",
   "metadata": {},
   "source": [
-    "## [Legacy] `generate`\n",
+    "### `generate`\n",
    "#### Batch calls, richer outputs\n",
    "\n",
    "You can go one step further and generate completions for multiple sets of messages using `generate`. This returns an `LLMResult` with an additional `message` parameter. This will include additional information about each generation beyond the returned message (e.g. the finish reason) and additional information about the full API call (e.g. total tokens used)."
--- a/docs/docs/modules/model_io/models/chat/llm_chain.mdx
+++ b/docs/docs/modules/model_io/models/chat/llm_chain.mdx
@@ -0,0 +1,20 @@
+# LLMChain
+
+You can use the existing LLMChain in a very similar way to before - provide a prompt and a model.
+
+```python
+chain = LLMChain(llm=chat, prompt=chat_prompt)
+```
+
+
+```python
+chain.run(input_language="English", output_language="French", text="I love programming.")
+```
+
+<CodeOutputBlock lang="python">
+
+```
+    "J'adore la programmation."
+```
+
+</CodeOutputBlock>
--- a/docs/docs/modules/model_io/models/chat/prompts.mdx
+++ b/docs/docs/modules/model_io/models/chat/prompts.mdx
--- a/docs/docs/modules/model_io/models/chat/streaming.mdx
+++ b/docs/docs/modules/model_io/models/chat/streaming.mdx
@@ -0,0 +1,63 @@
+# Streaming
+
+Some chat models provide a streaming response. This means that instead of waiting for the entire response to be returned, you can start processing it as soon as it's available. This is useful if you want to display the response to the user as it's being generated, or if you want to process the response as it's being generated.
+
+```python
+from langchain.chat_models import ChatOpenAI
+from langchain.schema import (
+    HumanMessage,
+)
+
+
+from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
+chat = ChatOpenAI(streaming=True, callbacks=[StreamingStdOutCallbackHandler()], temperature=0)
+resp = chat([HumanMessage(content="Write me a song about sparkling water.")])
+```
+
+<CodeOutputBlock lang="python">
+
+```
+    Verse 1:
+    Bubbles rising to the top
+    A refreshing drink that never stops
+    Clear and crisp, it's pure delight
+    A taste that's sure to excite
+
+    Chorus:
+    Sparkling water, oh so fine
+    A drink that's always on my mind
+    With every sip, I feel alive
+    Sparkling water, you're my vibe
+
+    Verse 2:
+    No sugar, no calories, just pure bliss
+    A drink that's hard to resist
+    It's the perfect way to quench my thirst
+    A drink that always comes first
+
+    Chorus:
+    Sparkling water, oh so fine
+    A drink that's always on my mind
+    With every sip, I feel alive
+    Sparkling water, you're my vibe
+
+    Bridge:
+    From the mountains to the sea
+    Sparkling water, you're the key
+    To a healthy life, a happy soul
+    A drink that makes me feel whole
+
+    Chorus:
+    Sparkling water, oh so fine
+    A drink that's always on my mind
+    With every sip, I feel alive
+    Sparkling water, you're my vibe
+
+    Outro:
+    Sparkling water, you're the one
+    A drink that's always so much fun
+    I'll never let you go, my friend
+    Sparkling
+```
+
+</CodeOutputBlock>
--- a/docs/docs/modules/model_io/models/index.mdx
+++ b/docs/docs/modules/model_io/models/index.mdx
@@ -0,0 +1,23 @@
+---
+sidebar_position: 1
+---
+# Language models
+
+LangChain provides interfaces and integrations for two types of models:
+
+- [LLMs](/docs/modules/model_io/models/llms/): Models that take a text string as input and return a text string
+- [Chat models](/docs/modules/model_io/models/chat/): Models that are backed by a language model but take a list of Chat Messages as input and return a Chat Message
+
+## LLMs vs chat models
+
+LLMs and chat models are subtly but importantly different. LLMs in LangChain refer to pure text completion models.
+The APIs they wrap take a string prompt as input and output a string completion. OpenAI's GPT-3 is implemented as an LLM.
+Chat models are often backed by LLMs but tuned specifically for having conversations.
+And, crucially, their provider APIs use a different interface than pure text completion models. Instead of a single string,
+they take a list of chat messages as input. Usually these messages are labeled with the speaker (usually one of "System",
+"AI", and "Human"). And they return an AI chat message as output. GPT-4 and Anthropic's Claude are both implemented as chat models.
+
+To make it possible to swap LLMs and chat models, both implement the Base Language Model interface. This includes common
+methods "predict", which takes a string and returns a string, and "predict messages", which takes messages and returns a message.
+If you are using a specific model it's recommended you use the methods specific to that model class (i.e., "predict" for LLMs and "predict messages" for chat models),
+but if you're creating an application that should work with different types of models the shared interface can be helpful.
--- a/docs/docs/modules/model_io/models/llms/async_llm.ipynb
+++ b/docs/docs/modules/model_io/models/llms/async_llm.ipynb
@@ -0,0 +1,160 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "f6574496-b360-4ffa-9523-7fd34a590164",
+   "metadata": {},
+   "source": [
+    "# Async API\n",
+    "\n",
+    "LangChain provides async support for LLMs by leveraging the [asyncio](https://docs.python.org/3/library/asyncio.html) library.\n",
+    "\n",
+    "Async support is particularly useful for calling multiple LLMs concurrently, as these calls are network-bound. Currently, `OpenAI`, `PromptLayerOpenAI`, `ChatOpenAI`, `Anthropic` and `Cohere` are supported, but async support for other LLMs is on the roadmap.\n",
+    "\n",
+    "You can use the `agenerate` method to call an OpenAI LLM asynchronously."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "5e49e96c-0f88-466d-b3d3-ea0966bdf19e",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "I'm doing well, thank you. How about you?\n",
+      "\n",
+      "\n",
+      "I'm doing well, thank you. How about you?\n",
+      "\n",
+      "\n",
+      "I'm doing well, how about you?\n",
+      "\n",
+      "\n",
+      "I'm doing well, thank you. How about you?\n",
+      "\n",
+      "\n",
+      "I'm doing well, thank you. How about you?\n",
+      "\n",
+      "\n",
+      "I'm doing well, thank you. How about yourself?\n",
+      "\n",
+      "\n",
+      "I'm doing well, thank you! How about you?\n",
+      "\n",
+      "\n",
+      "I'm doing well, thank you. How about you?\n",
+      "\n",
+      "\n",
+      "I'm doing well, thank you! How about you?\n",
+      "\n",
+      "\n",
+      "I'm doing well, thank you. How about you?\n",
+      "\u001B[1mConcurrent executed in 1.39 seconds.\u001B[0m\n",
+      "\n",
+      "\n",
+      "I'm doing well, thank you. How about you?\n",
+      "\n",
+      "\n",
+      "I'm doing well, thank you. How about you?\n",
+      "\n",
+      "I'm doing well, thank you. How about you?\n",
+      "\n",
+      "\n",
+      "I'm doing well, thank you. How about you?\n",
+      "\n",
+      "\n",
+      "I'm doing well, thank you. How about yourself?\n",
+      "\n",
+      "\n",
+      "I'm doing well, thanks for asking. How about you?\n",
+      "\n",
+      "\n",
+      "I'm doing well, thanks! How about you?\n",
+      "\n",
+      "\n",
+      "I'm doing well, thank you. How about you?\n",
+      "\n",
+      "\n",
+      "I'm doing well, thank you. How about yourself?\n",
+      "\n",
+      "\n",
+      "I'm doing well, thanks for asking. How about you?\n",
+      "\u001B[1mSerial executed in 5.77 seconds.\u001B[0m\n"
+     ]
+    }
+   ],
+   "source": [
+    "import time\n",
+    "import asyncio\n",
+    "\n",
+    "from langchain.llms import OpenAI\n",
+    "\n",
+    "\n",
+    "def generate_serially():\n",
+    "    llm = OpenAI(temperature=0.9)\n",
+    "    for _ in range(10):\n",
+    "        resp = llm.generate([\"Hello, how are you?\"])\n",
+    "        print(resp.generations[0][0].text)\n",
+    "\n",
+    "\n",
+    "async def async_generate(llm):\n",
+    "    resp = await llm.agenerate([\"Hello, how are you?\"])\n",
+    "    print(resp.generations[0][0].text)\n",
+    "\n",
+    "\n",
+    "async def generate_concurrently():\n",
+    "    llm = OpenAI(temperature=0.9)\n",
+    "    tasks = [async_generate(llm) for _ in range(10)]\n",
+    "    await asyncio.gather(*tasks)\n",
+    "\n",
+    "\n",
+    "s = time.perf_counter()\n",
+    "# If running this outside of Jupyter, use asyncio.run(generate_concurrently())\n",
+    "await generate_concurrently()\n",
+    "elapsed = time.perf_counter() - s\n",
+    "print(\"\\033[1m\" + f\"Concurrent executed in {elapsed:0.2f} seconds.\" + \"\\033[0m\")\n",
+    "\n",
+    "s = time.perf_counter()\n",
+    "generate_serially()\n",
+    "elapsed = time.perf_counter() - s\n",
+    "print(\"\\033[1m\" + f\"Serial executed in {elapsed:0.2f} seconds.\" + \"\\033[0m\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e1d3a966-3a27-44e8-9441-ed72f01b86f4",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/docs/docs/modules/model_io/models/llms/custom_llm.ipynb
+++ b/docs/docs/modules/model_io/models/llms/custom_llm.ipynb
--- a/docs/docs/modules/model_io/models/llms/fake_llm.ipynb
+++ b/docs/docs/modules/model_io/models/llms/fake_llm.ipynb
--- a/docs/docs/modules/model_io/models/llms/human_input_llm.ipynb
+++ b/docs/docs/modules/model_io/models/llms/human_input_llm.ipynb
--- a/docs/docs/modules/model_io/models/llms/index.ipynb
+++ b/docs/docs/modules/model_io/models/llms/index.ipynb
@@ -6,7 +6,7 @@
   "metadata": {},
   "source": [
    "---\n",
-    "sidebar_position: 2\n",
+    "sidebar_position: 0\n",
    "title: LLMs\n",
    "---"
   ]
@@ -23,6 +23,7 @@
    "Large Language Models (LLMs) are a core component of LangChain.\n",
    "LangChain does not serve its own LLMs, but rather provides a standard interface for interacting with many different LLMs.\n",
    "\n",
+    "## Get started\n",
    "\n",
    "There are lots of LLM providers (OpenAI, Cohere, Hugging Face, etc) - the `LLM` class is designed to provide a standard interface for all of them.\n",
    "\n",
@@ -85,7 +86,7 @@
   "id": "966b5d74-defd-4f89-8c37-a68ca4a161d9",
   "metadata": {},
   "source": [
-    "## LCEL\n",
+    "### LCEL\n",
    "\n",
    "LLMs implement the [Runnable interface](/docs/expression_language/interface), the basic building block of the [LangChain Expression Language (LCEL)](/docs/expression_language/). This means they support `invoke`, `ainvoke`, `stream`, `astream`, `batch`, `abatch`, `astream_log` calls.\n",
    "\n",
@@ -454,30 +455,12 @@
    "    print(chunk)"
   ]
  },
-  {
-   "cell_type": "markdown",
-   "id": "09108687-ed15-468b-9ac5-674e75785199",
-   "metadata": {},
-   "source": [
-    "## [LangSmith](/docs/langsmith)\n",
-    "\n",
-    "All `LLM`s come with built-in LangSmith tracing. Just set the following environment variables:\n",
-    "```bash\n",
-    "export LANGCHAIN_TRACING_V2=\"true\"\n",
-    "export LANGCHAIN_API_KEY=<your-api-key>\n",
-    "```\n",
-    "\n",
-    "and any `LLM` invocation (whether it's nested in a chain or not) will automatically be traced. A trace will include inputs, outputs, latency, token usage, invocation params, environment params, and more. See an example here: https://smith.langchain.com/public/7924621a-ff58-4b1c-a2a2-035a354ef434/r.\n",
-    "\n",
-    "In LangSmith you can then provide feedback for any trace, compile annotated datasets for evals, debug performance in the playground, and more."
-   ]
-  },
  {
   "cell_type": "markdown",
   "id": "20ef52be-6e51-43a3-be2a-b1a862d5fc80",
   "metadata": {},
   "source": [
-    "### [Legacy] `__call__`: string in -> string out\n",
+    "### `__call__`: string in -> string out\n",
    "The simplest way to use an LLM is a callable: pass in a string, get a string completion."
   ]
  },
@@ -507,7 +490,7 @@
   "id": "7b4ad9e5-50ec-4031-bfaa-23a0130da3c6",
   "metadata": {},
   "source": [
-    "### [Legacy] `generate`: batch calls, richer outputs\n",
+    "### `generate`: batch calls, richer outputs\n",
    "`generate` lets you call the model with a list of strings, getting back a more complete response than just the text. This complete response can include things like multiple top responses and other LLM provider-specific information:\n",
    "\n"
   ]
--- a/docs/docs/modules/model_io/models/llms/llm.json
+++ b/docs/docs/modules/model_io/models/llms/llm.json
@@ -0,0 +1,12 @@
+{
+    "model_name": "text-davinci-003",
+    "temperature": 0.7,
+    "max_tokens": 256,
+    "top_p": 1.0,
+    "frequency_penalty": 0.0,
+    "presence_penalty": 0.0,
+    "n": 1,
+    "best_of": 1,
+    "request_timeout": null,
+    "_type": "openai"
+}
--- a/docs/docs/modules/model_io/models/llms/llm.yaml
+++ b/docs/docs/modules/model_io/models/llms/llm.yaml
@@ -0,0 +1,10 @@
+_type: openai
+best_of: 1
+frequency_penalty: 0.0
+max_tokens: 256
+model_name: text-davinci-003
+n: 1
+presence_penalty: 0.0
+request_timeout: null
+temperature: 0.7
+top_p: 1.0
--- a/docs/docs/modules/model_io/models/llms/llm_caching.mdx
+++ b/docs/docs/modules/model_io/models/llms/llm_caching.mdx
--- a/docs/docs/modules/model_io/models/llms/llm_serialization.ipynb
+++ b/docs/docs/modules/model_io/models/llms/llm_serialization.ipynb
@@ -0,0 +1,168 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "73f9bf40",
+   "metadata": {},
+   "source": [
+    "# Serialization\n",
+    "\n",
+    "This notebook walks through how to write and read an LLM Configuration to and from disk. This is useful if you want to save the configuration for a given LLM (e.g., the provider, the temperature, etc)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "9c9fb6ff",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.llms import OpenAI\n",
+    "from langchain.llms.loading import load_llm"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "88ce018b",
+   "metadata": {},
+   "source": [
+    "## Loading\n",
+    "First, lets go over loading an LLM from disk. LLMs can be saved on disk in two formats: json or yaml. No matter the extension, they are loaded in the same way."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "f12b28f3",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{\r\n",
+      "    \"model_name\": \"text-davinci-003\",\r\n",
+      "    \"temperature\": 0.7,\r\n",
+      "    \"max_tokens\": 256,\r\n",
+      "    \"top_p\": 1.0,\r\n",
+      "    \"frequency_penalty\": 0.0,\r\n",
+      "    \"presence_penalty\": 0.0,\r\n",
+      "    \"n\": 1,\r\n",
+      "    \"best_of\": 1,\r\n",
+      "    \"request_timeout\": null,\r\n",
+      "    \"_type\": \"openai\"\r\n",
+      "}"
+     ]
+    }
+   ],
+   "source": [
+    "!cat llm.json"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "9ab709fc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "llm = load_llm(\"llm.json\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "095b1d56",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "_type: openai\r\n",
+      "best_of: 1\r\n",
+      "frequency_penalty: 0.0\r\n",
+      "max_tokens: 256\r\n",
+      "model_name: text-davinci-003\r\n",
+      "n: 1\r\n",
+      "presence_penalty: 0.0\r\n",
+      "request_timeout: null\r\n",
+      "temperature: 0.7\r\n",
+      "top_p: 1.0\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "!cat llm.yaml"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "8cafaafe",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "llm = load_llm(\"llm.yaml\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ab3e4223",
+   "metadata": {},
+   "source": [
+    "## Saving\n",
+    "If you want to go from an LLM in memory to a serialized version of it, you can do so easily by calling the `.save` method. Again, this supports both json and yaml."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "b38f685d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "llm.save(\"llm.json\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "b7365503",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "llm.save(\"llm.yaml\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "68e45b1c",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/docs/docs/modules/model_io/models/llms/streaming_llm.mdx
+++ b/docs/docs/modules/model_io/models/llms/streaming_llm.mdx
@@ -0,0 +1,74 @@
+# Streaming
+
+Some LLMs provide a streaming response. This means that instead of waiting for the entire response to be returned, you can start processing it as soon as it's available. This is useful if you want to display the response to the user as it's being generated, or if you want to process the response as it's being generated.
+
+Currently, we support streaming for a broad range of LLM implementations, including but not limited to `OpenAI`, `ChatOpenAI`, `ChatAnthropic`, `Hugging Face Text Generation Inference`, and `Replicate`. This feature has been expanded to accommodate most of the models. To utilize streaming, use a [`CallbackHandler`](https://github.com/langchain-ai/langchain/blob/master/langchain/callbacks/base.py) that implements `on_llm_new_token`. In this example, we are using `StreamingStdOutCallbackHandler`.
+```python
+from langchain.llms import OpenAI
+from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
+
+
+llm = OpenAI(streaming=True, callbacks=[StreamingStdOutCallbackHandler()], temperature=0)
+resp = llm("Write me a song about sparkling water.")
+```
+
+<CodeOutputBlock lang="python">
+
+```
+    Verse 1
+    I'm sippin' on sparkling water,
+    It's so refreshing and light,
+    It's the perfect way to quench my thirst
+    On a hot summer night.
+
+    Chorus
+    Sparkling water, sparkling water,
+    It's the best way to stay hydrated,
+    It's so crisp and so clean,
+    It's the perfect way to stay refreshed.
+
+    Verse 2
+    I'm sippin' on sparkling water,
+    It's so bubbly and bright,
+    It's the perfect way to cool me down
+    On a hot summer night.
+
+    Chorus
+    Sparkling water, sparkling water,
+    It's the best way to stay hydrated,
+    It's so crisp and so clean,
+    It's the perfect way to stay refreshed.
+
+    Verse 3
+    I'm sippin' on sparkling water,
+    It's so light and so clear,
+    It's the perfect way to keep me cool
+    On a hot summer night.
+
+    Chorus
+    Sparkling water, sparkling water,
+    It's the best way to stay hydrated,
+    It's so crisp and so clean,
+    It's the perfect way to stay refreshed.
+```
+
+</CodeOutputBlock>
+
+We still have access to the end `LLMResult` if using `generate`. However, `token_usage` is not currently supported for streaming.
+
+
+```python
+llm.generate(["Tell me a joke."])
+```
+
+<CodeOutputBlock lang="python">
+
+```
+    Q: What did the fish say when it hit the wall?
+    A: Dam!
+
+
+    LLMResult(generations=[[Generation(text='\n\nQ: What did the fish say when it hit the wall?\nA: Dam!', generation_info={'finish_reason': 'stop', 'logprobs': None})]], llm_output={'token_usage': {}, 'model_name': 'text-davinci-003'})
+```
+
+</CodeOutputBlock>
--- a/docs/docs/modules/model_io/models/llms/token_usage_tracking.ipynb
+++ b/docs/docs/modules/model_io/models/llms/token_usage_tracking.ipynb
@@ -14,7 +14,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 2,
   "id": "9455db35",
   "metadata": {},
   "outputs": [],
@@ -25,17 +25,17 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
   "id": "d1c55cc9",
   "metadata": {},
   "outputs": [],
   "source": [
-    "llm = OpenAI(model_name=\"gpt-3.5-turbo-instruct\", n=2, best_of=2)"
+    "llm = OpenAI(model_name=\"text-davinci-002\", n=2, best_of=2)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
   "id": "31667d54",
   "metadata": {},
   "outputs": [
@@ -43,17 +43,17 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Tokens Used: 37\n",
+      "Tokens Used: 42\n",
      "\tPrompt Tokens: 4\n",
-      "\tCompletion Tokens: 33\n",
+      "\tCompletion Tokens: 38\n",
      "Successful Requests: 1\n",
-      "Total Cost (USD): $7.2e-05\n"
+      "Total Cost (USD): $0.00084\n"
     ]
    }
   ],
   "source": [
    "with get_openai_callback() as cb:\n",
-    "    result = llm.invoke(\"Tell me a joke\")\n",
+    "    result = llm(\"Tell me a joke\")\n",
    "    print(cb)"
   ]
  },
@@ -67,7 +67,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
   "id": "e09420f4",
   "metadata": {},
   "outputs": [
@@ -75,14 +75,14 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "72\n"
+      "91\n"
     ]
    }
   ],
   "source": [
    "with get_openai_callback() as cb:\n",
-    "    result = llm.invoke(\"Tell me a joke\")\n",
-    "    result2 = llm.invoke(\"Tell me a joke\")\n",
+    "    result = llm(\"Tell me a joke\")\n",
+    "    result2 = llm(\"Tell me a joke\")\n",
    "    print(cb.total_tokens)"
   ]
  },
@@ -96,7 +96,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
   "id": "5d1125c6",
   "metadata": {},
   "outputs": [],
@@ -115,7 +115,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 8,
   "id": "2f98c536",
   "metadata": {},
   "outputs": [
@@ -129,23 +129,24 @@
      "\u001b[32;1m\u001b[1;3m I need to find out who Olivia Wilde's boyfriend is and then calculate his age raised to the 0.23 power.\n",
      "Action: Search\n",
      "Action Input: \"Olivia Wilde boyfriend\"\u001b[0m\n",
-      "Observation: \u001b[36;1m\u001b[1;3m[\"Olivia Wilde and Harry Styles took fans by surprise with their whirlwind romance, which began when they met on the set of Don't Worry Darling.\", 'Olivia Wilde started dating Harry Styles after ending her years-long engagement to Jason Sudeikis — see their relationship timeline.', 'Olivia Wilde and Harry Styles were spotted early on in their relationship walking around London. (. Image ...', \"Looks like Olivia Wilde and Jason Sudeikis are starting 2023 on good terms. Amid their highly publicized custody battle – and the actress' ...\", 'The two started dating after Wilde split up with actor Jason Sudeikisin 2020. However, their relationship came to an end last November.', \"Olivia Wilde and Harry Styles started dating during the filming of Don't Worry Darling. While the movie got a lot of backlash because of the ...\", \"Here's what we know so far about Harry Styles and Olivia Wilde's relationship.\", 'Olivia and the Grammy winner kept their romance out of the spotlight as their relationship began just two months after her split from ex-fiancé ...', \"Harry Styles and Olivia Wilde first met on the set of Don't Worry Darling and stepped out as a couple in January 2021. Relive all their biggest relationship ...\"]\u001b[0m\n",
-      "Thought:\u001b[32;1m\u001b[1;3m Harry Styles is Olivia Wilde's boyfriend.\n",
+      "Observation: \u001b[36;1m\u001b[1;3mSudeikis and Wilde's relationship ended in November 2020. Wilde was publicly served with court documents regarding child custody while she was presenting Don't Worry Darling at CinemaCon 2022. In January 2021, Wilde began dating singer Harry Styles after meeting during the filming of Don't Worry Darling.\u001b[0m\n",
+      "Thought:\u001b[32;1m\u001b[1;3m I need to find out Harry Styles' age.\n",
      "Action: Search\n",
      "Action Input: \"Harry Styles age\"\u001b[0m\n",
      "Observation: \u001b[36;1m\u001b[1;3m29 years\u001b[0m\n",
      "Thought:\u001b[32;1m\u001b[1;3m I need to calculate 29 raised to the 0.23 power.\n",
      "Action: Calculator\n",
      "Action Input: 29^0.23\u001b[0m\n",
-      "Observation: \u001b[33;1m\u001b[1;3mAnswer: 2.169459462491557\u001b[0m\n",
+      "Observation: \u001b[33;1m\u001b[1;3mAnswer: 2.169459462491557\n",
+      "\u001b[0m\n",
      "Thought:\u001b[32;1m\u001b[1;3m I now know the final answer.\n",
-      "Final Answer: Harry Styles is Olivia Wilde's boyfriend and his current age raised to the 0.23 power is 2.169459462491557.\u001b[0m\n",
+      "Final Answer: Harry Styles, Olivia Wilde's boyfriend, is 29 years old and his age raised to the 0.23 power is 2.169459462491557.\u001b[0m\n",
      "\n",
      "\u001b[1m> Finished chain.\u001b[0m\n",
-      "Total Tokens: 2205\n",
-      "Prompt Tokens: 2053\n",
-      "Completion Tokens: 152\n",
-      "Total Cost (USD): $0.0441\n"
+      "Total Tokens: 1506\n",
+      "Prompt Tokens: 1350\n",
+      "Completion Tokens: 156\n",
+      "Total Cost (USD): $0.03012\n"
     ]
    }
   ],
@@ -162,7 +163,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
   "id": "80ca77a3",
   "metadata": {},
   "outputs": [],
@@ -185,7 +186,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.1"
+   "version": "3.11.3"
  }
 },
 "nbformat": 4,
--- a/docs/docs/modules/model_io/output_parsers/index.ipynb
+++ b/docs/docs/modules/model_io/output_parsers/index.ipynb
@@ -6,7 +6,7 @@
   "metadata": {},
   "source": [
    "---\n",
-    "sidebar_position: 3\n",
+    "sidebar_position: 2\n",
    "title: Output parsers\n",
    "---"
   ]
--- a/docs/docs/use_cases/chatbots.ipynb
+++ b/docs/docs/use_cases/chatbots.ipynb
@@ -42,7 +42,7 @@
    "\n",
    "The chat model interface is based around messages rather than raw text. Several components are important to consider for chat:\n",
    "\n",
-    "* `chat model`: See [here](/docs/integrations/chat) for a list of chat model integrations and [here](/docs/modules/model_io/chat) for documentation on the chat model interface in LangChain. You can use `LLMs` (see [here](/docs/modules/model_io/llms)) for chatbots as well, but chat models have a more conversational tone and natively support a message interface.\n",
+    "* `chat model`: See [here](/docs/integrations/chat) for a list of chat model integrations and [here](/docs/modules/model_io/models/chat) for documentation on the chat model interface in LangChain. You can use `LLMs` (see [here](/docs/modules/model_io/models/llms)) for chatbots as well, but chat models have a more conversational tone and natively support a message interface.\n",
    "* `prompt template`: Prompt templates make it easy to assemble prompts that combine default messages, user input, chat history, and (optionally) additional retrieved context.\n",
    "* `memory`: [See here](/docs/modules/memory/) for in-depth documentation on memory types\n",
    "* `retriever` (optional): [See here](/docs/modules/data_connection/retrievers) for in-depth documentation on retrieval systems. These are useful if you want to build a chatbot with domain-specific knowledge.\n",
@@ -71,7 +71,7 @@
   "id": "88197b95",
   "metadata": {},
   "source": [
-    "With a plain chat model, we can get chat completions by [passing one or more messages](/docs/modules/model_io/chat) to the model.\n",
+    "With a plain chat model, we can get chat completions by [passing one or more messages](/docs/modules/model_io/models/chat) to the model.\n",
    "\n",
    "The chat model will respond with a message."
   ]
--- a/docs/docs/use_cases/question_answering/index.ipynb
+++ b/docs/docs/use_cases/question_answering/index.ipynb
@@ -28,7 +28,7 @@
    "2. `Splitting`: [Text splitters](/docs/modules/data_connection/document_transformers/) break `Documents` into splits of specified size\n",
    "3. `Storage`: Storage (e.g., often a [vectorstore](/docs/modules/data_connection/vectorstores/)) will house [and often embed](https://www.pinecone.io/learn/vector-embeddings/) the splits\n",
    "4. `Retrieval`: The app retrieves splits from storage (e.g., often [with similar embeddings](https://www.pinecone.io/learn/k-nearest-neighbor/) to the input question)\n",
-    "5. `Generation`: An [LLM](/docs/modules/model_io/llms/) produces an answer using a prompt that includes the question and the retrieved data\n",
+    "5. `Generation`: An [LLM](/docs/modules/model_io/models/llms/) produces an answer using a prompt that includes the question and the retrieved data\n",
    "\n",
    "![flow.jpeg](/img/qa_flow.jpeg)\n",
    "\n",
--- a/docs/docusaurus.config.js
+++ b/docs/docusaurus.config.js
@@ -175,11 +175,6 @@ const config = {
            label: "More",
            position: "left",
            items: [
-              {
-                type: "docSidebar",
-                sidebarId: "templates",
-                label: "Templates",
-              },
              {
                to: "/docs/community",
                label: "Community",
@@ -221,10 +216,6 @@ const config = {
                href: "https://github.com/langchain-ai/langserve",
                label: "LangServe GitHub",
              },
-              {
-                href: "https://github.com/langchain-ai/langchain/tree/master/templates",
-                label: "Templates GitHub",
-              },
              {
                href: "https://smith.langchain.com/hub",
                label: "LangChain Hub",
--- a/docs/scripts/copy_templates.py
+++ b/docs/scripts/copy_templates.py
@@ -1,34 +0,0 @@
-import glob
-import os
-import re
-import shutil
-from pathlib import Path
-
-TEMPLATES_DIR = Path(os.path.abspath(__file__)).parents[2] / "templates"
-DOCS_TEMPLATES_DIR = Path(os.path.abspath(__file__)).parents[1] / "docs" / "templates"
-
-
-readmes = list(glob.glob(str(TEMPLATES_DIR) + "/*/README.md"))
-destinations = [readme[len(str(TEMPLATES_DIR)) + 1 : -10] + ".md" for readme in readmes]
-for source, destination in zip(readmes, destinations):
-    full_destination = DOCS_TEMPLATES_DIR / destination
-    shutil.copyfile(source, full_destination)
-    with open(full_destination, "r") as f:
-        content = f.read()
-    # remove images
-    content = re.sub("\!\[.*?\]\((.*?)\)", "", content)
-    with open(full_destination, "w") as f:
-        f.write(content)
-
-sidebar_hidden = """---
-sidebar_class_name: hidden
---
-
-"""
-TEMPLATES_INDEX_DESTINATION = DOCS_TEMPLATES_DIR / "index.md"
-with open(TEMPLATES_INDEX_DESTINATION, "r") as f:
-    content = f.read()
-# replace relative links
-content = re.sub("\]\(\.\.\/", "](/docs/templates/", content)
-with open(TEMPLATES_INDEX_DESTINATION, "w") as f:
-    f.write(sidebar_hidden + content)
--- a/docs/scripts/model_feat_table.py
+++ b/docs/scripts/model_feat_table.py
@@ -23,7 +23,7 @@ CHAT_MODEL_FEAT_TABLE_CORRECTION = {

 LLM_TEMPLATE = """\
 ---
-sidebar_position: 1
+sidebar_position: 0
 sidebar_class_name: hidden
 ---

@@ -43,7 +43,7 @@ Each LLM integration can optionally provide native implementations for async, st

 CHAT_MODEL_TEMPLATE = """\
 ---
-sidebar_position: 0
+sidebar_position: 1
 sidebar_class_name: hidden
 ---

--- a/docs/sidebars.js
+++ b/docs/sidebars.js
@@ -46,22 +46,8 @@ module.exports = {
    {
      type: "category",
      label: "Modules",
-      collapsed: false,
-      items: [
-        { type: "category", label: "Model I/O", collapsed: true, items: [{type:"autogenerated", dirName: "modules/model_io" }], link: { type: 'doc', id: "modules/model_io/index" }},
-        { type: "category", label: "Retrieval", collapsed: true, items: [{type:"autogenerated", dirName: "modules/data_connection" }], link: { type: 'doc', id: "modules/data_connection/index" }},
-        { type: "category", label: "Agents", collapsed: true, items: [{type:"autogenerated", dirName: "modules/agents" }], link: { type: 'doc', id: "modules/agents/index" }},
-        {
-          type: "category",
-          label: "More",
-          collapsed: true,
-          items: [
-            { type: "category", label: "Chains", collapsed: true, items: [{type:"autogenerated", dirName: "modules/chains" }], link: { type: 'doc', id: "modules/chains/index" }},
-            { type: "category", label: "Memory", collapsed: true, items: [{type:"autogenerated", dirName: "modules/memory" }], link: { type: 'doc', id: "modules/memory/index" }},
-            { type: "category", label: "Callbacks", collapsed: true, items: [{type:"autogenerated", dirName: "modules/callbacks" }], link: { type: 'doc', id: "modules/callbacks/index" }},
-          ]
-        }
-      ],
+      collapsed: true,
+      items: [{ type: "autogenerated", dirName: "modules" } ],
      link: {
        type: 'doc',
        id: "modules/index"
@@ -123,14 +109,4 @@ module.exports = {
  guides: [
    {type: "autogenerated", dirName: "guides" }
  ],
-  templates: [
-    {
-      type: "category",
-      label: "Templates",
-      items: [
-        { type: "autogenerated", dirName: "templates" },
-      ],
-      link: { type: 'doc', id: "templates/index" }
-    },
-  ],
 };
--- a/docs/static/img/langchain_stack.png
+++ b/docs/static/img/langchain_stack.png
--- a/docs/vercel.json
+++ b/docs/vercel.json
@@ -1,37 +1,5 @@
 {
  "redirects": [
-    {
-      "source": "/docs/modules/agents/toolkits(/?)",
-      "destination": "/docs/modules/agents/tools/toolkits"
-    },
-    {
-      "source": "/docs/modules/model_io/models(/?)",
-      "destination": "/docs/modules/model_io/"
-    },
-    {
-      "source": "/docs/modules/model_io/models/:path*(/?)",
-      "destination": "/docs/modules/model_io/:path*"
-    },
-    {
-      "source": "/docs/modules/model_io/llms/fake_llm",
-      "destination": "/cookbook"
-    },
-    {
-      "source": "/docs/modules/model_io/llms/human_input_llm",
-      "destination": "/cookbook"
-    },
-    {
-      "source": "/docs/modules/model_io/chat/human_input_chat_model",
-      "destination": "/cookbook"
-    },
-    {
-      "source": "/docs/expression_language/why",
-      "destination": "/docs/expression_language/"
-    },
-    {
-      "source": "/docs/modules/model_io/chat/llm_chain",
-      "destination":  "/docs/modules/chains/foundational/llm_chain"
-    },
    {
      "source": "/docs/guides/langsmith(/?)",
      "destination": "/docs/langsmith/"
@@ -2973,7 +2941,7 @@
      "destination": "/docs/integrations/chat/anthropic"
    },
    {
-      "source": "/docs/modules/model_io/chat/integrations/anthropic",
+      "source": "/docs/modules/model_io/models/chat/integrations/anthropic",
      "destination": "/docs/integrations/chat/anthropic"
    },
    {
@@ -2981,7 +2949,7 @@
      "destination": "/docs/integrations/chat/azure_chat_openai"
    },
    {
-      "source": "/docs/modules/model_io/chat/integrations/azure_chat_openai",
+      "source": "/docs/modules/model_io/models/chat/integrations/azure_chat_openai",
      "destination": "/docs/integrations/chat/azure_chat_openai"
    },
    {
@@ -2989,7 +2957,7 @@
      "destination": "/docs/integrations/chat/google_vertex_ai_palm"
    },
    {
-      "source": "/docs/modules/model_io/chat/integrations/google_vertex_ai_palm",
+      "source": "/docs/modules/model_io/models/chat/integrations/google_vertex_ai_palm",
      "destination": "/docs/integrations/chat/google_vertex_ai_palm"
    },
    {
@@ -2997,7 +2965,7 @@
      "destination": "/docs/integrations/chat/openai"
    },
    {
-      "source": "/docs/modules/model_io/chat/integrations/openai",
+      "source": "/docs/modules/model_io/models/chat/integrations/openai",
      "destination": "/docs/integrations/chat/openai"
    },
    {
@@ -3005,39 +2973,39 @@
      "destination": "/docs/integrations/chat/promptlayer_chatopenai"
    },
    {
-      "source": "/docs/modules/model_io/chat/integrations/promptlayer_chatopenai",
+      "source": "/docs/modules/model_io/models/chat/integrations/promptlayer_chatopenai",
      "destination": "/docs/integrations/chat/promptlayer_chatopenai"
    },
    {
      "source": "/en/latest/modules/models/llms/examples/async_llm.html",
-      "destination": "/docs/modules/model_io/llms/how_to/async_llm"
+      "destination": "/docs/modules/model_io/models/llms/how_to/async_llm"
    },
    {
      "source": "/en/latest/modules/models/llms/examples/custom_llm.html",
-      "destination": "/docs/modules/model_io/llms/how_to/custom_llm"
+      "destination": "/docs/modules/model_io/models/llms/how_to/custom_llm"
    },
    {
      "source": "/en/latest/modules/models/llms/examples/fake_llm.html",
-      "destination": "/docs/modules/model_io/llms/how_to/fake_llm"
+      "destination": "/docs/modules/model_io/models/llms/how_to/fake_llm"
    },
    {
      "source": "/en/latest/modules/models/llms/examples/human_input_llm.html",
-      "destination": "/docs/modules/model_io/llms/how_to/human_input_llm"
+      "destination": "/docs/modules/model_io/models/llms/how_to/human_input_llm"
    },
    {
      "source": "/en/latest/modules/models/llms/examples/llm_serialization.html",
-      "destination": "/docs/modules/model_io/llms/how_to/llm_serialization"
+      "destination": "/docs/modules/model_io/models/llms/how_to/llm_serialization"
    },
    {
      "source": "/en/latest/modules/models/llms/examples/token_usage_tracking.html",
-      "destination": "/docs/modules/model_io/llms/how_to/token_usage_tracking"
+      "destination": "/docs/modules/model_io/models/llms/how_to/token_usage_tracking"
    },
    {
      "source": "/en/latest/modules/models/llms/integrations/ai21.html",
      "destination": "/docs/integrations/llms/ai21"
    },
    {
-      "source": "/docs/modules/model_io/llms/integrations/ai21",
+      "source": "/docs/modules/model_io/models/llms/integrations/ai21",
      "destination": "/docs/integrations/llms/ai21"
    },
    {
@@ -3045,7 +3013,7 @@
      "destination": "/docs/integrations/llms/aleph_alpha"
    },
    {
-      "source": "/docs/modules/model_io/llms/integrations/aleph_alpha",
+      "source": "/docs/modules/model_io/models/llms/integrations/aleph_alpha",
      "destination": "/docs/integrations/llms/aleph_alpha"
    },
    {
@@ -3053,7 +3021,7 @@
      "destination": "/docs/integrations/llms/anyscale"
    },
    {
-      "source": "/docs/modules/model_io/llms/integrations/anyscale",
+      "source": "/docs/modules/model_io/models/llms/integrations/anyscale",
      "destination": "/docs/integrations/llms/anyscale"
    },
    {
@@ -3061,7 +3029,7 @@
      "destination": "/docs/integrations/llms/azure_openai_example"
    },
    {
-      "source": "/docs/modules/model_io/llms/integrations/azure_openai_example",
+      "source": "/docs/modules/model_io/models/llms/integrations/azure_openai_example",
      "destination": "/docs/integrations/llms/azure_openai_example"
    },
    {
@@ -3069,7 +3037,7 @@
      "destination": "/docs/integrations/llms/banana"
    },
    {
-      "source": "/docs/modules/model_io/llms/integrations/banana",
+      "source": "/docs/modules/model_io/models/llms/integrations/banana",
      "destination": "/docs/integrations/llms/banana"
    },
    {
@@ -3077,7 +3045,7 @@
      "destination": "/docs/integrations/llms/baseten"
    },
    {
-      "source": "/docs/modules/model_io/llms/integrations/baseten",
+      "source": "/docs/modules/model_io/models/llms/integrations/baseten",
      "destination": "/docs/integrations/llms/baseten"
    },
    {
@@ -3085,7 +3053,7 @@
      "destination": "/docs/integrations/llms/beam"
    },
    {
-      "source": "/docs/modules/model_io/llms/integrations/beam",
+      "source": "/docs/modules/model_io/models/llms/integrations/beam",
      "destination": "/docs/integrations/llms/beam"
    },
    {
@@ -3093,7 +3061,7 @@
      "destination": "/docs/integrations/llms/bedrock"
    },
    {
-      "source": "/docs/modules/model_io/llms/integrations/bedrock",
+      "source": "/docs/modules/model_io/models/llms/integrations/bedrock",
      "destination": "/docs/integrations/llms/bedrock"
    },
    {
@@ -3101,7 +3069,7 @@
      "destination": "/docs/integrations/llms/cerebriumai_example"
    },
    {
-      "source": "/docs/modules/model_io/llms/integrations/cerebriumai_example",
+      "source": "/docs/modules/model_io/models/llms/integrations/cerebriumai_example",
      "destination": "/docs/integrations/llms/cerebriumai_example"
    },
    {
@@ -3109,7 +3077,7 @@
      "destination": "/docs/integrations/llms/cohere"
    },
    {
-      "source": "/docs/modules/model_io/llms/integrations/cohere",
+      "source": "/docs/modules/model_io/models/llms/integrations/cohere",
      "destination": "/docs/integrations/llms/cohere"
    },
    {
@@ -3117,7 +3085,7 @@
      "destination": "/docs/integrations/llms/ctransformers"
    },
    {
-      "source": "/docs/modules/model_io/llms/integrations/ctransformers",
+      "source": "/docs/modules/model_io/models/llms/integrations/ctransformers",
      "destination": "/docs/integrations/llms/ctransformers"
    },
    {
@@ -3125,7 +3093,7 @@
      "destination": "/docs/integrations/llms/databricks"
    },
    {
-      "source": "/docs/modules/model_io/llms/integrations/databricks",
+      "source": "/docs/modules/model_io/models/llms/integrations/databricks",
      "destination": "/docs/integrations/llms/databricks"
    },
    {
@@ -3133,7 +3101,7 @@
      "destination": "/docs/integrations/llms/deepinfra_example"
    },
    {
-      "source": "/docs/modules/model_io/llms/integrations/deepinfra_example",
+      "source": "/docs/modules/model_io/models/llms/integrations/deepinfra_example",
      "destination": "/docs/integrations/llms/deepinfra_example"
    },
    {
@@ -3141,7 +3109,7 @@
      "destination": "/docs/integrations/llms/forefrontai_example"
    },
    {
-      "source": "/docs/modules/model_io/llms/integrations/forefrontai_example",
+      "source": "/docs/modules/model_io/models/llms/integrations/forefrontai_example",
      "destination": "/docs/integrations/llms/forefrontai_example"
    },
    {
@@ -3149,7 +3117,7 @@
      "destination": "/docs/integrations/llms/google_vertex_ai_palm"
    },
    {
-      "source": "/docs/modules/model_io/llms/integrations/google_vertex_ai_palm",
+      "source": "/docs/modules/model_io/models/llms/integrations/google_vertex_ai_palm",
      "destination": "/docs/integrations/llms/google_vertex_ai_palm"
    },
    {
@@ -3157,7 +3125,7 @@
      "destination": "/docs/integrations/llms/gooseai_example"
    },
    {
-      "source": "/docs/modules/model_io/llms/integrations/gooseai_example",
+      "source": "/docs/modules/model_io/models/llms/integrations/gooseai_example",
      "destination": "/docs/integrations/llms/gooseai_example"
    },
    {
@@ -3165,7 +3133,7 @@
      "destination": "/docs/integrations/llms/huggingface_hub"
    },
    {
-      "source": "/docs/modules/model_io/llms/integrations/huggingface_hub",
+      "source": "/docs/modules/model_io/models/llms/integrations/huggingface_hub",
      "destination": "/docs/integrations/llms/huggingface_hub"
    },
    {
@@ -3173,7 +3141,7 @@
      "destination": "/docs/integrations/llms/huggingface_pipelines"
    },
    {
-      "source": "/docs/modules/model_io/llms/integrations/huggingface_pipelines",
+      "source": "/docs/modules/model_io/models/llms/integrations/huggingface_pipelines",
      "destination": "/docs/integrations/llms/huggingface_pipelines"
    },
    {
@@ -3181,7 +3149,7 @@
      "destination": "/docs/integrations/llms/huggingface_textgen_inference"
    },
    {
-      "source": "/docs/modules/model_io/llms/integrations/huggingface_textgen_inference",
+      "source": "/docs/modules/model_io/models/llms/integrations/huggingface_textgen_inference",
      "destination": "/docs/integrations/llms/huggingface_textgen_inference"
    },
    {
@@ -3189,7 +3157,7 @@
      "destination": "/docs/integrations/llms/jsonformer_experimental"
    },
    {
-      "source": "/docs/modules/model_io/llms/integrations/jsonformer_experimental",
+      "source": "/docs/modules/model_io/models/llms/integrations/jsonformer_experimental",
      "destination": "/docs/integrations/llms/jsonformer_experimental"
    },
    {
@@ -3197,7 +3165,7 @@
      "destination": "/docs/integrations/llms/llamacpp"
    },
    {
-      "source": "/docs/modules/model_io/llms/integrations/llamacpp",
+      "source": "/docs/modules/model_io/models/llms/integrations/llamacpp",
      "destination": "/docs/integrations/llms/llamacpp"
    },
    {
@@ -3205,7 +3173,7 @@
      "destination": "/docs/integrations/llms/llm_caching"
    },
    {
-      "source": "/docs/modules/model_io/llms/integrations/llm_caching",
+      "source": "/docs/modules/model_io/models/llms/integrations/llm_caching",
      "destination": "/docs/integrations/llms/llm_caching"
    },
    {
@@ -3213,7 +3181,7 @@
      "destination": "/docs/integrations/llms/manifest"
    },
    {
-      "source": "/docs/modules/model_io/llms/integrations/manifest",
+      "source": "/docs/modules/model_io/models/llms/integrations/manifest",
      "destination": "/docs/integrations/llms/manifest"
    },
    {
@@ -3221,7 +3189,7 @@
      "destination": "/docs/integrations/llms/modal"
    },
    {
-      "source": "/docs/modules/model_io/llms/integrations/modal",
+      "source": "/docs/modules/model_io/models/llms/integrations/modal",
      "destination": "/docs/integrations/llms/modal"
    },
    {
@@ -3229,7 +3197,7 @@
      "destination": "/docs/integrations/llms/mosaicml"
    },
    {
-      "source": "/docs/modules/model_io/llms/integrations/mosaicml",
+      "source": "/docs/modules/model_io/models/llms/integrations/mosaicml",
      "destination": "/docs/integrations/llms/mosaicml"
    },
    {
@@ -3237,7 +3205,7 @@
      "destination": "/docs/integrations/llms/nlpcloud"
    },
    {
-      "source": "/docs/modules/model_io/llms/integrations/nlpcloud",
+      "source": "/docs/modules/model_io/models/llms/integrations/nlpcloud",
      "destination": "/docs/integrations/llms/nlpcloud"
    },
    {
@@ -3245,7 +3213,7 @@
      "destination": "/docs/integrations/llms/openai"
    },
    {
-      "source": "/docs/modules/model_io/llms/integrations/openai",
+      "source": "/docs/modules/model_io/models/llms/integrations/openai",
      "destination": "/docs/integrations/llms/openai"
    },
    {
@@ -3253,7 +3221,7 @@
      "destination": "/docs/integrations/llms/openlm"
    },
    {
-      "source": "/docs/modules/model_io/llms/integrations/openlm",
+      "source": "/docs/modules/model_io/models/llms/integrations/openlm",
      "destination": "/docs/integrations/llms/openlm"
    },
    {
@@ -3261,7 +3229,7 @@
      "destination": "/docs/integrations/llms/petals_example"
    },
    {
-      "source": "/docs/modules/model_io/llms/integrations/petals_example",
+      "source": "/docs/modules/model_io/models/llms/integrations/petals_example",
      "destination": "/docs/integrations/llms/petals_example"
    },
    {
@@ -3269,7 +3237,7 @@
      "destination": "/docs/integrations/llms/pipelineai_example"
    },
    {
-      "source": "/docs/modules/model_io/llms/integrations/pipelineai_example",
+      "source": "/docs/modules/model_io/models/llms/integrations/pipelineai_example",
      "destination": "/docs/integrations/llms/pipelineai_example"
    },
    {
@@ -3277,7 +3245,7 @@
      "destination": "/docs/integrations/llms/predictionguard"
    },
    {
-      "source": "/docs/modules/model_io/llms/integrations/predictionguard",
+      "source": "/docs/modules/model_io/models/llms/integrations/predictionguard",
      "destination": "/docs/integrations/llms/predictionguard"
    },
    {
@@ -3285,7 +3253,7 @@
      "destination": "/docs/integrations/llms/promptlayer_openai"
    },
    {
-      "source": "/docs/modules/model_io/llms/integrations/promptlayer_openai",
+      "source": "/docs/modules/model_io/models/llms/integrations/promptlayer_openai",
      "destination": "/docs/integrations/llms/promptlayer_openai"
    },
    {
@@ -3293,7 +3261,7 @@
      "destination": "/docs/integrations/llms/rellm_experimental"
    },
    {
-      "source": "/docs/modules/model_io/llms/integrations/rellm_experimental",
+      "source": "/docs/modules/model_io/models/llms/integrations/rellm_experimental",
      "destination": "/docs/integrations/llms/rellm_experimental"
    },
    {
@@ -3301,7 +3269,7 @@
      "destination": "/docs/integrations/llms/replicate"
    },
    {
-      "source": "/docs/modules/model_io/llms/integrations/replicate",
+      "source": "/docs/modules/model_io/models/llms/integrations/replicate",
      "destination": "/docs/integrations/llms/replicate"
    },
    {
@@ -3309,7 +3277,7 @@
      "destination": "/docs/integrations/llms/runhouse"
    },
    {
-      "source": "/docs/modules/model_io/llms/integrations/runhouse",
+      "source": "/docs/modules/model_io/models/llms/integrations/runhouse",
      "destination": "/docs/integrations/llms/runhouse"
    },
    {
@@ -3317,7 +3285,7 @@
      "destination": "/docs/integrations/llms/sagemaker"
    },
    {
-      "source": "/docs/modules/model_io/llms/integrations/sagemaker",
+      "source": "/docs/modules/model_io/models/llms/integrations/sagemaker",
      "destination": "/docs/integrations/llms/sagemaker"
    },
    {
@@ -3325,7 +3293,7 @@
      "destination": "/docs/integrations/llms/stochasticai"
    },
    {
-      "source": "/docs/modules/model_io/llms/integrations/stochasticai",
+      "source": "/docs/modules/model_io/models/llms/integrations/stochasticai",
      "destination": "/docs/integrations/llms/stochasticai"
    },
    {
@@ -3333,7 +3301,7 @@
      "destination": "/docs/integrations/llms/writer"
    },
    {
-      "source": "/docs/modules/model_io/llms/integrations/writer",
+      "source": "/docs/modules/model_io/models/llms/integrations/writer",
      "destination": "/docs/integrations/llms/writer"
    },
    {
@@ -3597,12 +3565,12 @@
      "destination": "/docs/modules/memory/:path*"
    },
    {
-      "source": "/docs/modules/model_io/chat/how_to/:path*",
-      "destination": "/docs/modules/model_io/chat/:path*"
+      "source": "/docs/modules/model_io/models/chat/how_to/:path*",
+      "destination": "/docs/modules/model_io/models/chat/:path*"
    },
    {
-      "source": "/docs/modules/model_io/llms/how_to/:path*",
-      "destination": "/docs/modules/model_io/llms/:path*"
+      "source": "/docs/modules/model_io/models/llms/how_to/:path*",
+      "destination": "/docs/modules/model_io/models/llms/:path*"
    },
    {
      "source": "/docs/modules/callbacks/integrations/:path*",
@@ -3617,11 +3585,11 @@
      "destination": "/docs/integrations/text_embedding/:path*"
    },
    {
-      "source": "/docs/modules/model_io/llms/integrations/:path*",
+      "source": "/docs/modules/model_io/models/llms/integrations/:path*",
      "destination": "/docs/integrations/llms/:path*"
    },
    {
-      "source": "/docs/modules/model_io/chat/integrations/:path*",
+      "source": "/docs/modules/model_io/models/chat/integrations/:path*",
      "destination": "/docs/integrations/chat/:path*"
    },
    {
@@ -3646,11 +3614,11 @@
    },
    {
      "source": "/en/latest/modules/models.html",
-      "destination": "/docs/modules/model_io/"
+      "destination": "/docs/modules/model_io/models/"
    },
    {
      "source": "/en/latest/modules/models/:path*",
-      "destination": "/docs/modules/model_io/:path*"
+      "destination": "/docs/modules/model_io/models/:path*"
    },
    {
      "source": "/en/latest/modules/prompts/prompt_templates/examples/:path*",
--- a/docs/vercel_build.sh
+++ b/docs/vercel_build.sh
@@ -47,11 +47,8 @@ source .venv/bin/activate
 python3.11 -m pip install --upgrade pip
 python3.11 -m pip install -r vercel_requirements.txt
 python3.11 scripts/model_feat_table.py
-mkdir docs/templates
-cp ../templates/docs/INDEX.md docs/templates/index.md
-python3.11 scripts/copy_templates.py
+nbdoc_build --srcdir docs
 cp ../cookbook/README.md src/pages/cookbook.mdx
 cp ../.github/CONTRIBUTING.md docs/contributing.md
 wget https://raw.githubusercontent.com/langchain-ai/langserve/main/README.md -O docs/langserve.md
-nbdoc_build --srcdir docs
 python3.11 scripts/generate_api_reference_links.py
--- a/libs/cli/langchain_cli/project_template/Dockerfile
+++ b/libs/cli/langchain_cli/project_template/Dockerfile
@@ -1,21 +0,0 @@
-FROM python:3.11-slim
-
-RUN pip install poetry==1.6.1
-
-RUN poetry config virtualenvs.create false
-
-WORKDIR /code
-
-COPY ./pyproject.toml ./poetry.lock* ./
-
-COPY ./packages ./packages
-
-RUN poetry install  --no-interaction --no-ansi --no-root
-
-COPY ./app ./app
-
-RUN poetry install --no-interaction --no-ansi
-
-EXPOSE 8080
-
-CMD exec uvicorn app.server:app --host 0.0.0.0 --port 8080
--- a/libs/cli/langchain_cli/project_template/README.md
+++ b/libs/cli/langchain_cli/project_template/README.md
@@ -47,33 +47,3 @@ export LANGCHAIN_PROJECT=<your-project>  # if not specified, defaults to "defaul
 ```bash
 langchain serve
 ```
-
-## Running in Docker
-
-This project folder includes a Dockerfile that allows you to easily build and host your LangServe app.
-
-### Building the Image
-
-To build the image, you simply:
-
-```shell
-docker build . -t my-langserve-app
-```
-
-If you tag your image with something other than `my-langserve-app`,
-note it for use in the next step.
-
-### Running the Image Locally
-
-To run the image, you'll need to include any environment variables
-necessary for your application.
-
-In the below example, we inject the `OPENAI_API_KEY` environment
-variable with the value set in my local environment
-(`$OPENAI_API_KEY`)
-
-We also expose port 8080 with the `-p 8080:8080` option.
-
-```shell
-docker run -e OPENAI_API_KEY=$OPENAI_API_KEY -p 8080:8080 my-langserve-app
-```
--- a/libs/experimental/langchain_experimental/open_clip/init.py
+++ b/libs/experimental/langchain_experimental/open_clip/init.py
@@ -1,3 +0,0 @@
-from .open_clip import OpenCLIPEmbeddings
-
-__all__ = ["OpenCLIPEmbeddings"]
--- a/Show More
+++ b/Show More