Update model i/o docs (#13160)

2025-07-31 00:29:57 +00:00 · 2023-11-09 20:35:55 -08:00 · 2023-11-09 20:35:55 -08:00 · 850336bcf1
commit 850336bcf1
parent cf271784fa
33 changed files with 750 additions and 552 deletions
--- a/docs/docs/modules/model_io/models/llms/fake_llm.ipynb
+++ b/docs/docs/modules/model_io/models/llms/fake_llm.ipynb
--- a/docs/docs/modules/model_io/models/chat/human_input_chat_model.ipynb
+++ b/docs/docs/modules/model_io/models/chat/human_input_chat_model.ipynb
--- a/docs/docs/modules/model_io/models/llms/human_input_llm.ipynb
+++ b/docs/docs/modules/model_io/models/llms/human_input_llm.ipynb
--- a/docs/docs/integrations/callbacks/trubrics.ipynb
+++ b/docs/docs/integrations/callbacks/trubrics.ipynb
@ -113,7 +113,7 @@
    "tags": []
   },
   "source": [
-    "Here are two examples of how to use the `TrubricsCallbackHandler` with Langchain [LLMs](https://python.langchain.com/docs/modules/model_io/models/llms/) or [Chat Models](https://python.langchain.com/docs/modules/model_io/models/chat/). We will use OpenAI models, so set your `OPENAI_API_KEY` key here:"
+    "Here are two examples of how to use the `TrubricsCallbackHandler` with Langchain [LLMs](https://python.langchain.com/docs/modules/model_io/llms/) or [Chat Models](https://python.langchain.com/docs/modules/model_io/chat/). We will use OpenAI models, so set your `OPENAI_API_KEY` key here:"
   ]
  },
  {
--- a/docs/docs/integrations/llms/replicate.ipynb
+++ b/docs/docs/integrations/llms/replicate.ipynb
@ -288,7 +288,7 @@
   "metadata": {},
   "source": [
    "## Streaming Response\n",
-    "You can optionally stream the response as it is produced, which is helpful to show interactivity to users for time-consuming generations. See detailed docs on [Streaming](https://python.langchain.com/docs/modules/model_io/models/llms/how_to/streaming_llm) for more information."
+    "You can optionally stream the response as it is produced, which is helpful to show interactivity to users for time-consuming generations. See detailed docs on [Streaming](https://python.langchain.com/docs/modules/model_io/llms/how_to/streaming_llm) for more information."
   ]
  },
  {
--- a/docs/docs/integrations/providers/minimax.mdx
+++ b/docs/docs/integrations/providers/minimax.mdx
@ -11,7 +11,7 @@ Get a [Minimax group id](https://api.minimax.chat/user-center/basic-information)
 ## LLM

 There exists a Minimax LLM wrapper, which you can access with
-See a [usage example](/docs/modules/model_io/models/llms/integrations/minimax).
+See a [usage example](/docs/modules/model_io/llms/integrations/minimax).

 ```python
 from langchain.llms import Minimax
@ -19,7 +19,7 @@ from langchain.llms import Minimax

 ## Chat Models

-See a [usage example](/docs/modules/model_io/models/chat/integrations/minimax)
+See a [usage example](/docs/modules/model_io/chat/integrations/minimax)

 ```python
 from langchain.chat_models import MiniMaxChat
--- a/docs/docs/integrations/providers/motherduck.mdx
+++ b/docs/docs/integrations/providers/motherduck.mdx
@ -46,6 +46,6 @@ eng = sqlalchemy.create_engine(conn_str)
 set_llm_cache(SQLAlchemyCache(engine=eng))
 ```

-From here, see the [LLM Caching](/docs/modules/model_io/models/llms/how_to/llm_caching) documentation on how to use.
+From here, see the [LLM Caching](/docs/modules/model_io/llms/how_to/llm_caching) documentation on how to use.


--- a/docs/docs/integrations/retrievers/google_drive.ipynb
+++ b/docs/docs/integrations/retrievers/google_drive.ipynb
@ -58,7 +58,7 @@
    "\n",
    "retriever = GoogleDriveRetriever(\n",
    "    num_results=2,\n",
-    ")\n"
+    ")"
   ]
  },
  {
@ -97,7 +97,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "#!pip install unstructured\n"
+    "#!pip install unstructured"
   ]
  },
  {
@ -113,7 +113,7 @@
   },
   "outputs": [],
   "source": [
-    "retriever.get_relevant_documents(\"machine learning\")\n"
+    "retriever.get_relevant_documents(\"machine learning\")"
   ]
  },
  {
@ -150,7 +150,7 @@
    ")\n",
    "for doc in retriever.get_relevant_documents(\"machine learning\"):\n",
    "    print(\"---\")\n",
-    "    print(doc.page_content.strip()[:60] + \"...\")\n"
+    "    print(doc.page_content.strip()[:60] + \"...\")"
   ]
  },
  {
@ -189,7 +189,7 @@
    "for doc in retriever.get_relevant_documents(\"machine learning\"):\n",
    "    print(f\"{doc.metadata['name']}:\")\n",
    "    print(\"---\")\n",
-    "    print(doc.page_content.strip()[:60] + \"...\")\n"
+    "    print(doc.page_content.strip()[:60] + \"...\")"
   ]
  },
  {
@ -221,7 +221,7 @@
    "    includeItemsFromAllDrives=False,\n",
    "    supportsAllDrives=False,\n",
    ")\n",
-    "retriever.get_relevant_documents(\"machine learning\")\n"
+    "retriever.get_relevant_documents(\"machine learning\")"
   ]
  }
 ],
--- a/docs/docs/integrations/vectorstores/baiducloud_vector_search.ipynb
+++ b/docs/docs/integrations/vectorstores/baiducloud_vector_search.ipynb
@ -105,8 +105,12 @@
   "source": [
    "# Create a bes instance and index docs.\n",
    "from langchain.vectorstores import BESVectorStore\n",
+    "\n",
    "bes = BESVectorStore.from_documents(\n",
-    "    documents=docs, embedding=embeddings, bes_url=\"your bes cluster url\", index_name=\"your vector index\"\n",
+    "    documents=docs,\n",
+    "    embedding=embeddings,\n",
+    "    bes_url=\"your bes cluster url\",\n",
+    "    index_name=\"your vector index\",\n",
    ")\n",
    "bes.client.indices.refresh(index=\"your vector index\")"
   ]
--- a/docs/docs/modules/model_io/models/chat/chat_model_caching.mdx
+++ b/docs/docs/modules/model_io/models/chat/chat_model_caching.mdx
--- a/docs/docs/modules/model_io/models/chat/index.ipynb
+++ b/docs/docs/modules/model_io/models/chat/index.ipynb
@ -6,6 +6,7 @@
   "metadata": {},
   "source": [
    "---\n",
+    "sidebar_position: 1\n",
    "title: Chat models\n",
    "---"
   ]
--- a/docs/docs/modules/model_io/models/chat/prompts.mdx
+++ b/docs/docs/modules/model_io/models/chat/prompts.mdx
--- a/docs/docs/modules/model_io/models/chat/streaming.ipynb
+++ b/docs/docs/modules/model_io/models/chat/streaming.ipynb
--- a/docs/docs/modules/model_io/chat/token_usage_tracking.ipynb
+++ b/docs/docs/modules/model_io/chat/token_usage_tracking.ipynb
@ -0,0 +1,181 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "e5715368",
+   "metadata": {},
+   "source": [
+    "# Tracking token usage\n",
+    "\n",
+    "This notebook goes over how to track your token usage for specific calls. It is currently only implemented for the OpenAI API.\n",
+    "\n",
+    "Let's first look at an extremely simple example of tracking token usage for a single Chat model call."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "9455db35",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.chat_models import ChatOpenAI\n",
+    "from langchain.callbacks import get_openai_callback"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "d1c55cc9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "llm = ChatOpenAI(model_name=\"gpt-4\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "31667d54",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Tokens Used: 24\n",
+      "\tPrompt Tokens: 11\n",
+      "\tCompletion Tokens: 13\n",
+      "Successful Requests: 1\n",
+      "Total Cost (USD): $0.0011099999999999999\n"
+     ]
+    }
+   ],
+   "source": [
+    "with get_openai_callback() as cb:\n",
+    "    result = llm.invoke(\"Tell me a joke\")\n",
+    "    print(cb)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c0ab6d27",
+   "metadata": {},
+   "source": [
+    "Anything inside the context manager will get tracked. Here's an example of using it to track multiple calls in sequence."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "e09420f4",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "48\n"
+     ]
+    }
+   ],
+   "source": [
+    "with get_openai_callback() as cb:\n",
+    "    result = llm.invoke(\"Tell me a joke\")\n",
+    "    result2 = llm.invoke(\"Tell me a joke\")\n",
+    "    print(cb.total_tokens)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d8186e7b",
+   "metadata": {},
+   "source": [
+    "If a chain or agent with multiple steps in it is used, it will track all those steps."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "5d1125c6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.agents import load_tools\n",
+    "from langchain.agents import initialize_agent\n",
+    "from langchain.agents import AgentType\n",
+    "from langchain.llms import OpenAI\n",
+    "\n",
+    "tools = load_tools([\"serpapi\", \"llm-math\"], llm=llm)\n",
+    "agent = initialize_agent(tools, llm, agent=AgentType.OPENAI_FUNCTIONS, verbose=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "2f98c536",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
+      "\u001b[32;1m\u001b[1;3m\n",
+      "Invoking: `Search` with `Olivia Wilde's current boyfriend`\n",
+      "\n",
+      "\n",
+      "\u001b[0m\u001b[36;1m\u001b[1;3m['Things are looking golden for Olivia Wilde, as the actress has jumped back into the dating pool following her split from Harry Styles — read ...', \"“I did not want service to take place at the home of Olivia's current partner because Otis and Daisy might be present,” Sudeikis wrote in his ...\", \"February 2021: Olivia Wilde praises Harry Styles' modesty. One month after the duo made headlines with their budding romance, Wilde gave her new beau major ...\", 'An insider revealed to People that the new couple had been dating for some time. \"They were in Montecito, California this weekend for a wedding, ...', 'A source told People last year that Wilde and Styles were still friends despite deciding to take a break. \"He\\'s still touring and is now going ...', \"... love life. “He's your typical average Joe.” The source adds, “She's not giving too much away right now and wants to keep the relationship ...\", \"Multiple sources said the two were “taking a break” from dating because of distance and different priorities. “He's still touring and is now ...\", 'Comments. Filed under. celebrity couples · celebrity dating · harry styles · jason sudeikis · olivia wilde ... Now Holds A Darker MeaningNYPost.', '... dating during filming. The 39-year-old did however look very cosy with the comedian, although his relationship status is unknown. Olivia ...']\u001b[0m\u001b[32;1m\u001b[1;3m\n",
+      "Invoking: `Search` with `Harry Styles current age`\n",
+      "responded: Olivia Wilde's current boyfriend is Harry Styles. Let me find out his age for you.\n",
+      "\n",
+      "\u001b[0m\u001b[36;1m\u001b[1;3m29 years\u001b[0m\u001b[32;1m\u001b[1;3m\n",
+      "Invoking: `Calculator` with `29 ^ 0.23`\n",
+      "\n",
+      "\n",
+      "\u001b[0m\u001b[33;1m\u001b[1;3mAnswer: 2.169459462491557\u001b[0m\u001b[32;1m\u001b[1;3mHarry Styles' current age (29 years) raised to the 0.23 power is approximately 2.17.\u001b[0m\n",
+      "\n",
+      "\u001b[1m> Finished chain.\u001b[0m\n",
+      "Total Tokens: 1929\n",
+      "Prompt Tokens: 1799\n",
+      "Completion Tokens: 130\n",
+      "Total Cost (USD): $0.06176999999999999\n"
+     ]
+    }
+   ],
+   "source": [
+    "with get_openai_callback() as cb:\n",
+    "    response = agent.run(\n",
+    "        \"Who is Olivia Wilde's boyfriend? What is his current age raised to the 0.23 power?\"\n",
+    "    )\n",
+    "    print(f\"Total Tokens: {cb.total_tokens}\")\n",
+    "    print(f\"Prompt Tokens: {cb.prompt_tokens}\")\n",
+    "    print(f\"Completion Tokens: {cb.completion_tokens}\")\n",
+    "    print(f\"Total Cost (USD): ${cb.total_cost}\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/docs/docs/modules/model_io/index.mdx
+++ b/docs/docs/modules/model_io/index.mdx
@ -10,8 +10,18 @@ sidebar_class_name: hidden
 The core element of any language model application is...the model. LangChain gives you the building blocks to interface with any language model.

 - [Prompts](/docs/modules/model_io/prompts/): Templatize, dynamically select, and manage model inputs
- [Language models](/docs/modules/model_io/models/): Make calls to language models through common interfaces
+- [Chat models](/docs/modules/model_io/chat/): Models that are backed by a language model but take a list of Chat Messages as input and return a Chat Message
+- [LLMs](/docs/modules/model_io/llms/): Models that take a text string as input and return a text string
 - [Output parsers](/docs/modules/model_io/output_parsers/): Extract information from model outputs

 ![model_io_diagram](/img/model_io.jpg)

+
+## LLMs vs Chat models
+
+LLMs and chat models are subtly but importantly different. LLMs in LangChain refer to pure text completion models.
+The APIs they wrap take a string prompt as input and output a string completion. OpenAI's GPT-3 is implemented as an LLM.
+Chat models are often backed by LLMs but tuned specifically for having conversations.
+And, crucially, their provider APIs use a different interface than pure text completion models. Instead of a single string,
+they take a list of chat messages as input. Usually these messages are labeled with the speaker (usually one of "System",
+"AI", and "Human"). And they return an AI chat message as output. GPT-4 and Anthropic's Claude-2 are both implemented as chat models.
--- a/docs/docs/modules/model_io/llms/async_llm.ipynb
+++ b/docs/docs/modules/model_io/llms/async_llm.ipynb
@ -0,0 +1,121 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "f6574496-b360-4ffa-9523-7fd34a590164",
+   "metadata": {},
+   "source": [
+    "# Async API\n",
+    "\n",
+    "All `LLM`s implement the `Runnable` interface, which comes with default implementations of all methods, ie. ainvoke, batch, abatch, stream, astream. This gives all `LLM`s basic support for asynchronous calls.\n",
+    "\n",
+    "Async support defaults to calling the `LLM`'s respective sync method in asyncio's default thread pool executor. This lets other async functions in your application make progress while the `LLM` is being executed, by moving this call to a background thread. Where `LLM`s providers have native implementations for async, that is used instead of the default `LLM` implementation.\n",
+    "\n",
+    "See which [integrations provide native async support here](/docs/integrations/llms/).\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "5e49e96c-0f88-466d-b3d3-ea0966bdf19e",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[1mConcurrent executed in 1.03 seconds.\u001b[0m\n",
+      "\u001b[1mSerial executed in 6.80 seconds.\u001b[0m\n"
+     ]
+    }
+   ],
+   "source": [
+    "import time\n",
+    "import asyncio\n",
+    "\n",
+    "from langchain.llms import OpenAI\n",
+    "\n",
+    "llm = OpenAI(model=\"gpt-3.5-turbo-instruct\", temperature=0.9)\n",
+    "\n",
+    "\n",
+    "def invoke_serially():\n",
+    "    for _ in range(10):\n",
+    "        resp = llm.invoke(\"Hello, how are you?\")\n",
+    "\n",
+    "\n",
+    "async def async_invoke(llm):\n",
+    "    resp = await llm.ainvoke(\"Hello, how are you?\")\n",
+    "\n",
+    "\n",
+    "async def invoke_concurrently():\n",
+    "    tasks = [async_invoke(llm) for _ in range(10)]\n",
+    "    await asyncio.gather(*tasks)\n",
+    "\n",
+    "\n",
+    "s = time.perf_counter()\n",
+    "# If running this outside of Jupyter, use asyncio.run(generate_concurrently())\n",
+    "await invoke_concurrently()\n",
+    "elapsed = time.perf_counter() - s\n",
+    "print(\"\\033[1m\" + f\"Concurrent executed in {elapsed:0.2f} seconds.\" + \"\\033[0m\")\n",
+    "\n",
+    "s = time.perf_counter()\n",
+    "invoke_serially()\n",
+    "elapsed = time.perf_counter() - s\n",
+    "print(\"\\033[1m\" + f\"Serial executed in {elapsed:0.2f} seconds.\" + \"\\033[0m\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e0b60caf-f99e-46a6-bdad-46b2cfea29ac",
+   "metadata": {},
+   "source": [
+    "To simplify things we could also just use `abatch` to run a batch concurrently:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "bd11000f-2232-491a-9f70-abcbb4611fbf",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[1mBatch executed in 1.31 seconds.\u001b[0m\n"
+     ]
+    }
+   ],
+   "source": [
+    "s = time.perf_counter()\n",
+    "# If running this outside of Jupyter, use asyncio.run(generate_concurrently())\n",
+    "await llm.abatch([\"Hello, how are you?\"] * 10)\n",
+    "elapsed = time.perf_counter() - s\n",
+    "print(\"\\033[1m\" + f\"Batch executed in {elapsed:0.2f} seconds.\" + \"\\033[0m\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/docs/docs/modules/model_io/models/llms/custom_llm.ipynb
+++ b/docs/docs/modules/model_io/models/llms/custom_llm.ipynb
--- a/docs/docs/modules/model_io/models/llms/index.ipynb
+++ b/docs/docs/modules/model_io/models/llms/index.ipynb
@ -6,6 +6,7 @@
   "metadata": {},
   "source": [
    "---\n",
+    "sidebar_position: 2\n",
    "title: LLMs\n",
    "---"
   ]
@ -22,7 +23,6 @@
    "Large Language Models (LLMs) are a core component of LangChain.\n",
    "LangChain does not serve its own LLMs, but rather provides a standard interface for interacting with many different LLMs.\n",
    "\n",
-    "## Get started\n",
    "\n",
    "There are lots of LLM providers (OpenAI, Cohere, Hugging Face, etc) - the `LLM` class is designed to provide a standard interface for all of them.\n",
    "\n",
@ -85,7 +85,7 @@
   "id": "966b5d74-defd-4f89-8c37-a68ca4a161d9",
   "metadata": {},
   "source": [
-    "### LCEL\n",
+    "## LCEL\n",
    "\n",
    "LLMs implement the [Runnable interface](/docs/expression_language/interface), the basic building block of the [LangChain Expression Language (LCEL)](/docs/expression_language/). This means they support `invoke`, `ainvoke`, `stream`, `astream`, `batch`, `abatch`, `astream_log` calls.\n",
    "\n",
@ -454,12 +454,30 @@
    "    print(chunk)"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "09108687-ed15-468b-9ac5-674e75785199",
+   "metadata": {},
+   "source": [
+    "## [LangSmith](https://smith.langchain.com)\n",
+    "\n",
+    "All `LLM`s come with built-in LangSmith tracing. Just set the following environment variables:\n",
+    "```bash\n",
+    "export LANGCHAIN_TRACING_V2=\"true\"\n",
+    "export LANGCHAIN_API_KEY=<your-api-key>\n",
+    "```\n",
+    "\n",
+    "and any `LLM` invocation (whether it's nested in a chain or not) will automatically be traced. A trace will include inputs, outputs, latency, token usage, invocation params, environment params, and more. See an example here: https://smith.langchain.com/public/7924621a-ff58-4b1c-a2a2-035a354ef434/r.\n",
+    "\n",
+    "In LangSmith you can then provide feedback for any trace, compile annotated datasets for evals, debug performance in the playground, and more."
+   ]
+  },
  {
   "cell_type": "markdown",
   "id": "20ef52be-6e51-43a3-be2a-b1a862d5fc80",
   "metadata": {},
   "source": [
-    "### `__call__`: string in -> string out\n",
+    "### [Legacy] `__call__`: string in -> string out\n",
    "The simplest way to use an LLM is a callable: pass in a string, get a string completion."
   ]
  },
@ -489,7 +507,7 @@
   "id": "7b4ad9e5-50ec-4031-bfaa-23a0130da3c6",
   "metadata": {},
   "source": [
-    "### `generate`: batch calls, richer outputs\n",
+    "### [Legacy] `generate`: batch calls, richer outputs\n",
    "`generate` lets you call the model with a list of strings, getting back a more complete response than just the text. This complete response can include things like multiple top responses and other LLM provider-specific information:\n",
    "\n"
   ]
--- a/docs/docs/modules/model_io/models/llms/llm_caching.mdx
+++ b/docs/docs/modules/model_io/models/llms/llm_caching.mdx
--- a/docs/docs/modules/model_io/llms/llm_serialization.ipynb
+++ b/docs/docs/modules/model_io/llms/llm_serialization.ipynb
@ -0,0 +1,179 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "73f9bf40",
+   "metadata": {},
+   "source": [
+    "# Serialization\n",
+    "\n",
+    "LangChain Python and LangChain JS share a serialization scheme. You can check if a LangChain class is serializable by running with the `is_lc_serializable` class method."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "9c9fb6ff",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.llms import OpenAI\n",
+    "from langchain.llms.loading import load_llm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "441d28cb-e898-47fd-8f27-f620a9cd6c34",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "OpenAI.is_lc_serializable()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "87b8a7c6-35b7-4fab-938b-4d05e9cc06f1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "llm = OpenAI(model=\"gpt-3.5-turbo-instruct\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "88ce018b",
+   "metadata": {},
+   "source": [
+    "## Dump\n",
+    "\n",
+    "Any serializable object can be serialized to a dict or json string."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "f12b28f3",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'lc': 1,\n",
+       " 'type': 'constructor',\n",
+       " 'id': ['langchain', 'llms', 'openai', 'OpenAI'],\n",
+       " 'kwargs': {'model': 'gpt-3.5-turbo-instruct',\n",
+       "  'openai_api_key': {'lc': 1, 'type': 'secret', 'id': ['OPENAI_API_KEY']}}}"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain.load import dumpd, dumps\n",
+    "\n",
+    "dumpd(llm)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "095b1d56",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'{\"lc\": 1, \"type\": \"constructor\", \"id\": [\"langchain\", \"llms\", \"openai\", \"OpenAI\"], \"kwargs\": {\"model\": \"gpt-3.5-turbo-instruct\", \"openai_api_key\": {\"lc\": 1, \"type\": \"secret\", \"id\": [\"OPENAI_API_KEY\"]}}}'"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "dumps(llm)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ab3e4223",
+   "metadata": {},
+   "source": [
+    "## Load\n",
+    "\n",
+    "Any serialized object can be loaded."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "68e45b1c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.load import loads\n",
+    "from langchain.load.load import load\n",
+    "\n",
+    "loaded_1 = load(dumpd(llm))\n",
+    "loaded_2 = loads(dumps(llm))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "c9272667-7fe3-4e5f-a1cc-69e8829b9e8f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "I am an AI and do not have the capability to experience emotions. But thank you for asking. Is there anything I can assist you with?\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(loaded_1.invoke(\"How are you doing?\"))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/docs/docs/modules/model_io/llms/streaming_llm.ipynb
+++ b/docs/docs/modules/model_io/llms/streaming_llm.ipynb
@ -0,0 +1,112 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "fc37c39a-7406-4c13-a754-b8e95fd970a0",
+   "metadata": {},
+   "source": [
+    "# Streaming\n",
+    "\n",
+    "All `LLM`s implement the `Runnable` interface, which comes with default implementations of all methods, ie. ainvoke, batch, abatch, stream, astream. This gives all `LLM`s basic support for streaming.\n",
+    "\n",
+    "Streaming support defaults to returning an Iterator (or AsyncIterator in the case of async streaming) of a single value, the final result returned by the underlying `LLM` provider. This obviously doesn't give you token-by-token streaming, which requires native support from the `LLM` provider, but ensures your code that expects an iterator of tokens can work for any of our `LLM` integrations.\n",
+    "\n",
+    "See which [integrations support token-by-token streaming here](/docs/integrations/llms/)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "9baa0527-b97d-41d3-babd-472ec5e59e3e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Verse 1:\n",
+      "Bubbles dancing in my glass\n",
+      "Clear and crisp, it's such a blast\n",
+      "Refreshing taste, it's like a dream\n",
+      "Sparkling water, you make me beam\n",
+      "\n",
+      "Chorus:\n",
+      "Oh sparkling water, you're my delight\n",
+      "With every sip, you make me feel so right\n",
+      "You're like a party in my mouth\n",
+      "I can't get enough, I'm hooked no doubt\n",
+      "\n",
+      "Verse 2:\n",
+      "No sugar, no calories, just pure bliss\n",
+      "You're the perfect drink, I must confess\n",
+      "From lemon to lime, so many flavors to choose\n",
+      "Sparkling water, you never fail to amuse\n",
+      "\n",
+      "Chorus:\n",
+      "Oh sparkling water, you're my delight\n",
+      "With every sip, you make me feel so right\n",
+      "You're like a party in my mouth\n",
+      "I can't get enough, I'm hooked no doubt\n",
+      "\n",
+      "Bridge:\n",
+      "Some may say you're just plain water\n",
+      "But to me, you're so much more\n",
+      "You bring a sparkle to my day\n",
+      "In every single way\n",
+      "\n",
+      "Chorus:\n",
+      "Oh sparkling water, you're my delight\n",
+      "With every sip, you make me feel so right\n",
+      "You're like a party in my mouth\n",
+      "I can't get enough, I'm hooked no doubt\n",
+      "\n",
+      "Outro:\n",
+      "So here's to you, my dear sparkling water\n",
+      "You'll always be my go-to drink forever\n",
+      "With your effervescence and refreshing taste\n",
+      "You'll always have a special place."
+     ]
+    }
+   ],
+   "source": [
+    "from langchain.llms import OpenAI\n",
+    "\n",
+    "\n",
+    "llm = OpenAI(model=\"gpt-3.5-turbo-instruct\", temperature=0, max_tokens=512)\n",
+    "for chunk in llm.stream(\"Write me a song about sparkling water.\"):\n",
+    "    print(chunk, end=\"\", flush=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d81140f2-384b-4470-bf93-957013c6620b",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/docs/docs/modules/model_io/models/llms/token_usage_tracking.ipynb
+++ b/docs/docs/modules/model_io/models/llms/token_usage_tracking.ipynb
@ -14,7 +14,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
   "id": "9455db35",
   "metadata": {},
   "outputs": [],
@ -25,17 +25,17 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
   "id": "d1c55cc9",
   "metadata": {},
   "outputs": [],
   "source": [
-    "llm = OpenAI(model_name=\"text-davinci-002\", n=2, best_of=2)"
+    "llm = OpenAI(model_name=\"gpt-3.5-turbo-instruct\", n=2, best_of=2)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
   "id": "31667d54",
   "metadata": {},
   "outputs": [
@ -43,17 +43,17 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Tokens Used: 42\n",
+      "Tokens Used: 37\n",
      "\tPrompt Tokens: 4\n",
-      "\tCompletion Tokens: 38\n",
+      "\tCompletion Tokens: 33\n",
      "Successful Requests: 1\n",
-      "Total Cost (USD): $0.00084\n"
+      "Total Cost (USD): $7.2e-05\n"
     ]
    }
   ],
   "source": [
    "with get_openai_callback() as cb:\n",
-    "    result = llm(\"Tell me a joke\")\n",
+    "    result = llm.invoke(\"Tell me a joke\")\n",
    "    print(cb)"
   ]
  },
@ -67,7 +67,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 4,
   "id": "e09420f4",
   "metadata": {},
   "outputs": [
@ -75,14 +75,14 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "91\n"
+      "72\n"
     ]
    }
   ],
   "source": [
    "with get_openai_callback() as cb:\n",
-    "    result = llm(\"Tell me a joke\")\n",
-    "    result2 = llm(\"Tell me a joke\")\n",
+    "    result = llm.invoke(\"Tell me a joke\")\n",
+    "    result2 = llm.invoke(\"Tell me a joke\")\n",
    "    print(cb.total_tokens)"
   ]
  },
@ -96,7 +96,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 5,
   "id": "5d1125c6",
   "metadata": {},
   "outputs": [],
@ -115,7 +115,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 6,
   "id": "2f98c536",
   "metadata": {},
   "outputs": [
@ -129,24 +129,23 @@
      "\u001b[32;1m\u001b[1;3m I need to find out who Olivia Wilde's boyfriend is and then calculate his age raised to the 0.23 power.\n",
      "Action: Search\n",
      "Action Input: \"Olivia Wilde boyfriend\"\u001b[0m\n",
-      "Observation: \u001b[36;1m\u001b[1;3mSudeikis and Wilde's relationship ended in November 2020. Wilde was publicly served with court documents regarding child custody while she was presenting Don't Worry Darling at CinemaCon 2022. In January 2021, Wilde began dating singer Harry Styles after meeting during the filming of Don't Worry Darling.\u001b[0m\n",
-      "Thought:\u001b[32;1m\u001b[1;3m I need to find out Harry Styles' age.\n",
+      "Observation: \u001b[36;1m\u001b[1;3m[\"Olivia Wilde and Harry Styles took fans by surprise with their whirlwind romance, which began when they met on the set of Don't Worry Darling.\", 'Olivia Wilde started dating Harry Styles after ending her years-long engagement to Jason Sudeikis — see their relationship timeline.', 'Olivia Wilde and Harry Styles were spotted early on in their relationship walking around London. (. Image ...', \"Looks like Olivia Wilde and Jason Sudeikis are starting 2023 on good terms. Amid their highly publicized custody battle – and the actress' ...\", 'The two started dating after Wilde split up with actor Jason Sudeikisin 2020. However, their relationship came to an end last November.', \"Olivia Wilde and Harry Styles started dating during the filming of Don't Worry Darling. While the movie got a lot of backlash because of the ...\", \"Here's what we know so far about Harry Styles and Olivia Wilde's relationship.\", 'Olivia and the Grammy winner kept their romance out of the spotlight as their relationship began just two months after her split from ex-fiancé ...', \"Harry Styles and Olivia Wilde first met on the set of Don't Worry Darling and stepped out as a couple in January 2021. Relive all their biggest relationship ...\"]\u001b[0m\n",
+      "Thought:\u001b[32;1m\u001b[1;3m Harry Styles is Olivia Wilde's boyfriend.\n",
      "Action: Search\n",
      "Action Input: \"Harry Styles age\"\u001b[0m\n",
      "Observation: \u001b[36;1m\u001b[1;3m29 years\u001b[0m\n",
      "Thought:\u001b[32;1m\u001b[1;3m I need to calculate 29 raised to the 0.23 power.\n",
      "Action: Calculator\n",
      "Action Input: 29^0.23\u001b[0m\n",
-      "Observation: \u001b[33;1m\u001b[1;3mAnswer: 2.169459462491557\n",
-      "\u001b[0m\n",
+      "Observation: \u001b[33;1m\u001b[1;3mAnswer: 2.169459462491557\u001b[0m\n",
      "Thought:\u001b[32;1m\u001b[1;3m I now know the final answer.\n",
-      "Final Answer: Harry Styles, Olivia Wilde's boyfriend, is 29 years old and his age raised to the 0.23 power is 2.169459462491557.\u001b[0m\n",
+      "Final Answer: Harry Styles is Olivia Wilde's boyfriend and his current age raised to the 0.23 power is 2.169459462491557.\u001b[0m\n",
      "\n",
      "\u001b[1m> Finished chain.\u001b[0m\n",
-      "Total Tokens: 1506\n",
-      "Prompt Tokens: 1350\n",
-      "Completion Tokens: 156\n",
-      "Total Cost (USD): $0.03012\n"
+      "Total Tokens: 2205\n",
+      "Prompt Tokens: 2053\n",
+      "Completion Tokens: 152\n",
+      "Total Cost (USD): $0.0441\n"
     ]
    }
   ],
@ -163,7 +162,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
   "id": "80ca77a3",
   "metadata": {},
   "outputs": [],
@ -186,7 +185,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.3"
+   "version": "3.9.1"
  }
 },
 "nbformat": 4,
--- a/docs/docs/modules/model_io/models/index.mdx
+++ b/docs/docs/modules/model_io/models/index.mdx
@ -1,23 +0,0 @@
---
-sidebar_position: 1
---
-# Language models
-
-LangChain provides interfaces and integrations for two types of models:
-
- [Chat models](/docs/modules/model_io/models/chat/): Models that are backed by a language model but take a list of Chat Messages as input and return a Chat Message
- [LLMs](/docs/modules/model_io/models/llms/): Models that take a text string as input and return a text string
-
-## LLMs vs Chat models
-
-LLMs and chat models are subtly but importantly different. LLMs in LangChain refer to pure text completion models.
-The APIs they wrap take a string prompt as input and output a string completion. OpenAI's GPT-3 is implemented as an LLM.
-Chat models are often backed by LLMs but tuned specifically for having conversations.
-And, crucially, their provider APIs use a different interface than pure text completion models. Instead of a single string,
-they take a list of chat messages as input. Usually these messages are labeled with the speaker (usually one of "System",
-"AI", and "Human"). And they return an AI chat message as output. GPT-4 and Anthropic's Claude are both implemented as chat models.
-
-To make it possible to swap LLMs and chat models, both implement the Base Language Model interface. This includes common
-methods "predict", which takes a string and returns a string, and "predict messages", which takes messages and returns a message.
-If you are using a specific model it's recommended you use the methods specific to that model class (i.e., "predict" for LLMs and "predict messages" for chat models),
-but if you're creating an application that should work with different types of models the shared interface can be helpful.
--- a/docs/docs/modules/model_io/models/llms/async_llm.ipynb
+++ b/docs/docs/modules/model_io/models/llms/async_llm.ipynb
@ -1,160 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "f6574496-b360-4ffa-9523-7fd34a590164",
-   "metadata": {},
-   "source": [
-    "# Async API\n",
-    "\n",
-    "LangChain provides async support for LLMs by leveraging the [asyncio](https://docs.python.org/3/library/asyncio.html) library.\n",
-    "\n",
-    "Async support is particularly useful for calling multiple LLMs concurrently, as these calls are network-bound. Currently, `OpenAI`, `PromptLayerOpenAI`, `ChatOpenAI`, `Anthropic` and `Cohere` are supported, but async support for other LLMs is on the roadmap.\n",
-    "\n",
-    "You can use the `agenerate` method to call an OpenAI LLM asynchronously."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "5e49e96c-0f88-466d-b3d3-ea0966bdf19e",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "\n",
-      "I'm doing well, thank you. How about you?\n",
-      "\n",
-      "\n",
-      "I'm doing well, thank you. How about you?\n",
-      "\n",
-      "\n",
-      "I'm doing well, how about you?\n",
-      "\n",
-      "\n",
-      "I'm doing well, thank you. How about you?\n",
-      "\n",
-      "\n",
-      "I'm doing well, thank you. How about you?\n",
-      "\n",
-      "\n",
-      "I'm doing well, thank you. How about yourself?\n",
-      "\n",
-      "\n",
-      "I'm doing well, thank you! How about you?\n",
-      "\n",
-      "\n",
-      "I'm doing well, thank you. How about you?\n",
-      "\n",
-      "\n",
-      "I'm doing well, thank you! How about you?\n",
-      "\n",
-      "\n",
-      "I'm doing well, thank you. How about you?\n",
-      "\u001B[1mConcurrent executed in 1.39 seconds.\u001B[0m\n",
-      "\n",
-      "\n",
-      "I'm doing well, thank you. How about you?\n",
-      "\n",
-      "\n",
-      "I'm doing well, thank you. How about you?\n",
-      "\n",
-      "I'm doing well, thank you. How about you?\n",
-      "\n",
-      "\n",
-      "I'm doing well, thank you. How about you?\n",
-      "\n",
-      "\n",
-      "I'm doing well, thank you. How about yourself?\n",
-      "\n",
-      "\n",
-      "I'm doing well, thanks for asking. How about you?\n",
-      "\n",
-      "\n",
-      "I'm doing well, thanks! How about you?\n",
-      "\n",
-      "\n",
-      "I'm doing well, thank you. How about you?\n",
-      "\n",
-      "\n",
-      "I'm doing well, thank you. How about yourself?\n",
-      "\n",
-      "\n",
-      "I'm doing well, thanks for asking. How about you?\n",
-      "\u001B[1mSerial executed in 5.77 seconds.\u001B[0m\n"
-     ]
-    }
-   ],
-   "source": [
-    "import time\n",
-    "import asyncio\n",
-    "\n",
-    "from langchain.llms import OpenAI\n",
-    "\n",
-    "\n",
-    "def generate_serially():\n",
-    "    llm = OpenAI(temperature=0.9)\n",
-    "    for _ in range(10):\n",
-    "        resp = llm.generate([\"Hello, how are you?\"])\n",
-    "        print(resp.generations[0][0].text)\n",
-    "\n",
-    "\n",
-    "async def async_generate(llm):\n",
-    "    resp = await llm.agenerate([\"Hello, how are you?\"])\n",
-    "    print(resp.generations[0][0].text)\n",
-    "\n",
-    "\n",
-    "async def generate_concurrently():\n",
-    "    llm = OpenAI(temperature=0.9)\n",
-    "    tasks = [async_generate(llm) for _ in range(10)]\n",
-    "    await asyncio.gather(*tasks)\n",
-    "\n",
-    "\n",
-    "s = time.perf_counter()\n",
-    "# If running this outside of Jupyter, use asyncio.run(generate_concurrently())\n",
-    "await generate_concurrently()\n",
-    "elapsed = time.perf_counter() - s\n",
-    "print(\"\\033[1m\" + f\"Concurrent executed in {elapsed:0.2f} seconds.\" + \"\\033[0m\")\n",
-    "\n",
-    "s = time.perf_counter()\n",
-    "generate_serially()\n",
-    "elapsed = time.perf_counter() - s\n",
-    "print(\"\\033[1m\" + f\"Serial executed in {elapsed:0.2f} seconds.\" + \"\\033[0m\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e1d3a966-3a27-44e8-9441-ed72f01b86f4",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
--- a/docs/docs/modules/model_io/models/llms/llm.json
+++ b/docs/docs/modules/model_io/models/llms/llm.json
@ -1,12 +0,0 @@
-{
-    "model_name": "text-davinci-003",
-    "temperature": 0.7,
-    "max_tokens": 256,
-    "top_p": 1.0,
-    "frequency_penalty": 0.0,
-    "presence_penalty": 0.0,
-    "n": 1,
-    "best_of": 1,
-    "request_timeout": null,
-    "_type": "openai"
-}
--- a/docs/docs/modules/model_io/models/llms/llm.yaml
+++ b/docs/docs/modules/model_io/models/llms/llm.yaml
@ -1,10 +0,0 @@
-_type: openai
-best_of: 1
-frequency_penalty: 0.0
-max_tokens: 256
-model_name: text-davinci-003
-n: 1
-presence_penalty: 0.0
-request_timeout: null
-temperature: 0.7
-top_p: 1.0
--- a/docs/docs/modules/model_io/models/llms/llm_serialization.ipynb
+++ b/docs/docs/modules/model_io/models/llms/llm_serialization.ipynb
@ -1,168 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "73f9bf40",
-   "metadata": {},
-   "source": [
-    "# Serialization\n",
-    "\n",
-    "This notebook walks through how to write and read an LLM Configuration to and from disk. This is useful if you want to save the configuration for a given LLM (e.g., the provider, the temperature, etc)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "9c9fb6ff",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain.llms import OpenAI\n",
-    "from langchain.llms.loading import load_llm"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "88ce018b",
-   "metadata": {},
-   "source": [
-    "## Loading\n",
-    "First, lets go over loading an LLM from disk. LLMs can be saved on disk in two formats: json or yaml. No matter the extension, they are loaded in the same way."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "f12b28f3",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{\r\n",
-      "    \"model_name\": \"text-davinci-003\",\r\n",
-      "    \"temperature\": 0.7,\r\n",
-      "    \"max_tokens\": 256,\r\n",
-      "    \"top_p\": 1.0,\r\n",
-      "    \"frequency_penalty\": 0.0,\r\n",
-      "    \"presence_penalty\": 0.0,\r\n",
-      "    \"n\": 1,\r\n",
-      "    \"best_of\": 1,\r\n",
-      "    \"request_timeout\": null,\r\n",
-      "    \"_type\": \"openai\"\r\n",
-      "}"
-     ]
-    }
-   ],
-   "source": [
-    "!cat llm.json"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "9ab709fc",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "llm = load_llm(\"llm.json\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "095b1d56",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "_type: openai\r\n",
-      "best_of: 1\r\n",
-      "frequency_penalty: 0.0\r\n",
-      "max_tokens: 256\r\n",
-      "model_name: text-davinci-003\r\n",
-      "n: 1\r\n",
-      "presence_penalty: 0.0\r\n",
-      "request_timeout: null\r\n",
-      "temperature: 0.7\r\n",
-      "top_p: 1.0\r\n"
-     ]
-    }
-   ],
-   "source": [
-    "!cat llm.yaml"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "8cafaafe",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "llm = load_llm(\"llm.yaml\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "ab3e4223",
-   "metadata": {},
-   "source": [
-    "## Saving\n",
-    "If you want to go from an LLM in memory to a serialized version of it, you can do so easily by calling the `.save` method. Again, this supports both json and yaml."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "b38f685d",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "llm.save(\"llm.json\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "b7365503",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "llm.save(\"llm.yaml\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "68e45b1c",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
--- a/docs/docs/modules/model_io/models/llms/streaming_llm.mdx
+++ b/docs/docs/modules/model_io/models/llms/streaming_llm.mdx
@ -1,74 +0,0 @@
-# Streaming
-
-Some LLMs provide a streaming response. This means that instead of waiting for the entire response to be returned, you can start processing it as soon as it's available. This is useful if you want to display the response to the user as it's being generated, or if you want to process the response as it's being generated.
-
-Currently, we support streaming for a broad range of LLM implementations, including but not limited to `OpenAI`, `ChatOpenAI`, `ChatAnthropic`, `Hugging Face Text Generation Inference`, and `Replicate`. This feature has been expanded to accommodate most of the models. To utilize streaming, use a [`CallbackHandler`](https://github.com/langchain-ai/langchain/blob/master/langchain/callbacks/base.py) that implements `on_llm_new_token`. In this example, we are using `StreamingStdOutCallbackHandler`.
-```python
-from langchain.llms import OpenAI
-from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
-
-
-llm = OpenAI(streaming=True, callbacks=[StreamingStdOutCallbackHandler()], temperature=0)
-resp = llm("Write me a song about sparkling water.")
-```
-
-<CodeOutputBlock lang="python">
-
-```
-    Verse 1
-    I'm sippin' on sparkling water,
-    It's so refreshing and light,
-    It's the perfect way to quench my thirst
-    On a hot summer night.
-
-    Chorus
-    Sparkling water, sparkling water,
-    It's the best way to stay hydrated,
-    It's so crisp and so clean,
-    It's the perfect way to stay refreshed.
-
-    Verse 2
-    I'm sippin' on sparkling water,
-    It's so bubbly and bright,
-    It's the perfect way to cool me down
-    On a hot summer night.
-
-    Chorus
-    Sparkling water, sparkling water,
-    It's the best way to stay hydrated,
-    It's so crisp and so clean,
-    It's the perfect way to stay refreshed.
-
-    Verse 3
-    I'm sippin' on sparkling water,
-    It's so light and so clear,
-    It's the perfect way to keep me cool
-    On a hot summer night.
-
-    Chorus
-    Sparkling water, sparkling water,
-    It's the best way to stay hydrated,
-    It's so crisp and so clean,
-    It's the perfect way to stay refreshed.
-```
-
-</CodeOutputBlock>
-
-We still have access to the end `LLMResult` if using `generate`. However, `token_usage` is not currently supported for streaming.
-
-
-```python
-llm.generate(["Tell me a joke."])
-```
-
-<CodeOutputBlock lang="python">
-
-```
-    Q: What did the fish say when it hit the wall?
-    A: Dam!
-
-
-    LLMResult(generations=[[Generation(text='\n\nQ: What did the fish say when it hit the wall?\nA: Dam!', generation_info={'finish_reason': 'stop', 'logprobs': None})]], llm_output={'token_usage': {}, 'model_name': 'text-davinci-003'})
-```
-
-</CodeOutputBlock>
--- a/docs/docs/modules/model_io/output_parsers/index.ipynb
+++ b/docs/docs/modules/model_io/output_parsers/index.ipynb
@ -6,7 +6,7 @@
   "metadata": {},
   "source": [
    "---\n",
-    "sidebar_position: 2\n",
+    "sidebar_position: 3\n",
    "title: Output parsers\n",
    "---"
   ]
--- a/docs/docs/use_cases/chatbots.ipynb
+++ b/docs/docs/use_cases/chatbots.ipynb
@ -42,7 +42,7 @@
    "\n",
    "The chat model interface is based around messages rather than raw text. Several components are important to consider for chat:\n",
    "\n",
-    "* `chat model`: See [here](/docs/integrations/chat) for a list of chat model integrations and [here](/docs/modules/model_io/models/chat) for documentation on the chat model interface in LangChain. You can use `LLMs` (see [here](/docs/modules/model_io/models/llms)) for chatbots as well, but chat models have a more conversational tone and natively support a message interface.\n",
+    "* `chat model`: See [here](/docs/integrations/chat) for a list of chat model integrations and [here](/docs/modules/model_io/chat) for documentation on the chat model interface in LangChain. You can use `LLMs` (see [here](/docs/modules/model_io/llms)) for chatbots as well, but chat models have a more conversational tone and natively support a message interface.\n",
    "* `prompt template`: Prompt templates make it easy to assemble prompts that combine default messages, user input, chat history, and (optionally) additional retrieved context.\n",
    "* `memory`: [See here](/docs/modules/memory/) for in-depth documentation on memory types\n",
    "* `retriever` (optional): [See here](/docs/modules/data_connection/retrievers) for in-depth documentation on retrieval systems. These are useful if you want to build a chatbot with domain-specific knowledge.\n",
@ -71,7 +71,7 @@
   "id": "88197b95",
   "metadata": {},
   "source": [
-    "With a plain chat model, we can get chat completions by [passing one or more messages](/docs/modules/model_io/models/chat) to the model.\n",
+    "With a plain chat model, we can get chat completions by [passing one or more messages](/docs/modules/model_io/chat) to the model.\n",
    "\n",
    "The chat model will respond with a message."
   ]
--- a/docs/docs/use_cases/question_answering/index.ipynb
+++ b/docs/docs/use_cases/question_answering/index.ipynb
@ -28,7 +28,7 @@
    "2. `Splitting`: [Text splitters](/docs/modules/data_connection/document_transformers/) break `Documents` into splits of specified size\n",
    "3. `Storage`: Storage (e.g., often a [vectorstore](/docs/modules/data_connection/vectorstores/)) will house [and often embed](https://www.pinecone.io/learn/vector-embeddings/) the splits\n",
    "4. `Retrieval`: The app retrieves splits from storage (e.g., often [with similar embeddings](https://www.pinecone.io/learn/k-nearest-neighbor/) to the input question)\n",
-    "5. `Generation`: An [LLM](/docs/modules/model_io/models/llms/) produces an answer using a prompt that includes the question and the retrieved data\n",
+    "5. `Generation`: An [LLM](/docs/modules/model_io/llms/) produces an answer using a prompt that includes the question and the retrieved data\n",
    "\n",
    "![flow.jpeg](/img/qa_flow.jpeg)\n",
    "\n",
--- a/docs/scripts/model_feat_table.py
+++ b/docs/scripts/model_feat_table.py
@ -23,7 +23,7 @@ CHAT_MODEL_FEAT_TABLE_CORRECTION = {

 LLM_TEMPLATE = """\
 ---
-sidebar_position: 0
+sidebar_position: 1
 sidebar_class_name: hidden
 ---

@ -43,7 +43,7 @@ Each LLM integration can optionally provide native implementations for async, st

 CHAT_MODEL_TEMPLATE = """\
 ---
-sidebar_position: 1
+sidebar_position: 0
 sidebar_class_name: hidden
 ---

--- a/docs/vercel.json
+++ b/docs/vercel.json
@ -1,5 +1,25 @@
 {
  "redirects": [
+    {
+      "source": "/docs/modules/model_io/models(/?)",
+      "destination": "/docs/modules/model_io/"
+    },
+    {
+      "source": "/docs/modules/model_io/models/:path*(/?)",
+      "destination": "/docs/modules/model_io/:path*"
+    },
+    {
+      "source": "/docs/modules/model_io/llms/fake_llm",
+      "destination": "/cookbook"
+    },
+    {
+      "source": "/docs/modules/model_io/llms/human_input_llm",
+      "destination": "/cookbook"
+    },
+    {
+      "source": "/docs/modules/model_io/chat/human_input_chat_model",
+      "destination": "/cookbook"
+    },
    {
      "source": "/docs/expression_language/why",
      "destination": "/docs/expression_language/"
@ -2949,7 +2969,7 @@
      "destination": "/docs/integrations/chat/anthropic"
    },
    {
-      "source": "/docs/modules/model_io/models/chat/integrations/anthropic",
+      "source": "/docs/modules/model_io/chat/integrations/anthropic",
      "destination": "/docs/integrations/chat/anthropic"
    },
    {
@ -2957,7 +2977,7 @@
      "destination": "/docs/integrations/chat/azure_chat_openai"
    },
    {
-      "source": "/docs/modules/model_io/models/chat/integrations/azure_chat_openai",
+      "source": "/docs/modules/model_io/chat/integrations/azure_chat_openai",
      "destination": "/docs/integrations/chat/azure_chat_openai"
    },
    {
@ -2965,7 +2985,7 @@
      "destination": "/docs/integrations/chat/google_vertex_ai_palm"
    },
    {
-      "source": "/docs/modules/model_io/models/chat/integrations/google_vertex_ai_palm",
+      "source": "/docs/modules/model_io/chat/integrations/google_vertex_ai_palm",
      "destination": "/docs/integrations/chat/google_vertex_ai_palm"
    },
    {
@ -2973,7 +2993,7 @@
      "destination": "/docs/integrations/chat/openai"
    },
    {
-      "source": "/docs/modules/model_io/models/chat/integrations/openai",
+      "source": "/docs/modules/model_io/chat/integrations/openai",
      "destination": "/docs/integrations/chat/openai"
    },
    {
@ -2981,39 +3001,39 @@
      "destination": "/docs/integrations/chat/promptlayer_chatopenai"
    },
    {
-      "source": "/docs/modules/model_io/models/chat/integrations/promptlayer_chatopenai",
+      "source": "/docs/modules/model_io/chat/integrations/promptlayer_chatopenai",
      "destination": "/docs/integrations/chat/promptlayer_chatopenai"
    },
    {
      "source": "/en/latest/modules/models/llms/examples/async_llm.html",
-      "destination": "/docs/modules/model_io/models/llms/how_to/async_llm"
+      "destination": "/docs/modules/model_io/llms/how_to/async_llm"
    },
    {
      "source": "/en/latest/modules/models/llms/examples/custom_llm.html",
-      "destination": "/docs/modules/model_io/models/llms/how_to/custom_llm"
+      "destination": "/docs/modules/model_io/llms/how_to/custom_llm"
    },
    {
      "source": "/en/latest/modules/models/llms/examples/fake_llm.html",
-      "destination": "/docs/modules/model_io/models/llms/how_to/fake_llm"
+      "destination": "/docs/modules/model_io/llms/how_to/fake_llm"
    },
    {
      "source": "/en/latest/modules/models/llms/examples/human_input_llm.html",
-      "destination": "/docs/modules/model_io/models/llms/how_to/human_input_llm"
+      "destination": "/docs/modules/model_io/llms/how_to/human_input_llm"
    },
    {
      "source": "/en/latest/modules/models/llms/examples/llm_serialization.html",
-      "destination": "/docs/modules/model_io/models/llms/how_to/llm_serialization"
+      "destination": "/docs/modules/model_io/llms/how_to/llm_serialization"
    },
    {
      "source": "/en/latest/modules/models/llms/examples/token_usage_tracking.html",
-      "destination": "/docs/modules/model_io/models/llms/how_to/token_usage_tracking"
+      "destination": "/docs/modules/model_io/llms/how_to/token_usage_tracking"
    },
    {
      "source": "/en/latest/modules/models/llms/integrations/ai21.html",
      "destination": "/docs/integrations/llms/ai21"
    },
    {
-      "source": "/docs/modules/model_io/models/llms/integrations/ai21",
+      "source": "/docs/modules/model_io/llms/integrations/ai21",
      "destination": "/docs/integrations/llms/ai21"
    },
    {
@ -3021,7 +3041,7 @@
      "destination": "/docs/integrations/llms/aleph_alpha"
    },
    {
-      "source": "/docs/modules/model_io/models/llms/integrations/aleph_alpha",
+      "source": "/docs/modules/model_io/llms/integrations/aleph_alpha",
      "destination": "/docs/integrations/llms/aleph_alpha"
    },
    {
@ -3029,7 +3049,7 @@
      "destination": "/docs/integrations/llms/anyscale"
    },
    {
-      "source": "/docs/modules/model_io/models/llms/integrations/anyscale",
+      "source": "/docs/modules/model_io/llms/integrations/anyscale",
      "destination": "/docs/integrations/llms/anyscale"
    },
    {
@ -3037,7 +3057,7 @@
      "destination": "/docs/integrations/llms/azure_openai_example"
    },
    {
-      "source": "/docs/modules/model_io/models/llms/integrations/azure_openai_example",
+      "source": "/docs/modules/model_io/llms/integrations/azure_openai_example",
      "destination": "/docs/integrations/llms/azure_openai_example"
    },
    {
@ -3045,7 +3065,7 @@
      "destination": "/docs/integrations/llms/banana"
    },
    {
-      "source": "/docs/modules/model_io/models/llms/integrations/banana",
+      "source": "/docs/modules/model_io/llms/integrations/banana",
      "destination": "/docs/integrations/llms/banana"
    },
    {
@ -3053,7 +3073,7 @@
      "destination": "/docs/integrations/llms/baseten"
    },
    {
-      "source": "/docs/modules/model_io/models/llms/integrations/baseten",
+      "source": "/docs/modules/model_io/llms/integrations/baseten",
      "destination": "/docs/integrations/llms/baseten"
    },
    {
@ -3061,7 +3081,7 @@
      "destination": "/docs/integrations/llms/beam"
    },
    {
-      "source": "/docs/modules/model_io/models/llms/integrations/beam",
+      "source": "/docs/modules/model_io/llms/integrations/beam",
      "destination": "/docs/integrations/llms/beam"
    },
    {
@ -3069,7 +3089,7 @@
      "destination": "/docs/integrations/llms/bedrock"
    },
    {
-      "source": "/docs/modules/model_io/models/llms/integrations/bedrock",
+      "source": "/docs/modules/model_io/llms/integrations/bedrock",
      "destination": "/docs/integrations/llms/bedrock"
    },
    {
@ -3077,7 +3097,7 @@
      "destination": "/docs/integrations/llms/cerebriumai_example"
    },
    {
-      "source": "/docs/modules/model_io/models/llms/integrations/cerebriumai_example",
+      "source": "/docs/modules/model_io/llms/integrations/cerebriumai_example",
      "destination": "/docs/integrations/llms/cerebriumai_example"
    },
    {
@ -3085,7 +3105,7 @@
      "destination": "/docs/integrations/llms/cohere"
    },
    {
-      "source": "/docs/modules/model_io/models/llms/integrations/cohere",
+      "source": "/docs/modules/model_io/llms/integrations/cohere",
      "destination": "/docs/integrations/llms/cohere"
    },
    {
@ -3093,7 +3113,7 @@
      "destination": "/docs/integrations/llms/ctransformers"
    },
    {
-      "source": "/docs/modules/model_io/models/llms/integrations/ctransformers",
+      "source": "/docs/modules/model_io/llms/integrations/ctransformers",
      "destination": "/docs/integrations/llms/ctransformers"
    },
    {
@ -3101,7 +3121,7 @@
      "destination": "/docs/integrations/llms/databricks"
    },
    {
-      "source": "/docs/modules/model_io/models/llms/integrations/databricks",
+      "source": "/docs/modules/model_io/llms/integrations/databricks",
      "destination": "/docs/integrations/llms/databricks"
    },
    {
@ -3109,7 +3129,7 @@
      "destination": "/docs/integrations/llms/deepinfra_example"
    },
    {
-      "source": "/docs/modules/model_io/models/llms/integrations/deepinfra_example",
+      "source": "/docs/modules/model_io/llms/integrations/deepinfra_example",
      "destination": "/docs/integrations/llms/deepinfra_example"
    },
    {
@ -3117,7 +3137,7 @@
      "destination": "/docs/integrations/llms/forefrontai_example"
    },
    {
-      "source": "/docs/modules/model_io/models/llms/integrations/forefrontai_example",
+      "source": "/docs/modules/model_io/llms/integrations/forefrontai_example",
      "destination": "/docs/integrations/llms/forefrontai_example"
    },
    {
@ -3125,7 +3145,7 @@
      "destination": "/docs/integrations/llms/google_vertex_ai_palm"
    },
    {
-      "source": "/docs/modules/model_io/models/llms/integrations/google_vertex_ai_palm",
+      "source": "/docs/modules/model_io/llms/integrations/google_vertex_ai_palm",
      "destination": "/docs/integrations/llms/google_vertex_ai_palm"
    },
    {
@ -3133,7 +3153,7 @@
      "destination": "/docs/integrations/llms/gooseai_example"
    },
    {
-      "source": "/docs/modules/model_io/models/llms/integrations/gooseai_example",
+      "source": "/docs/modules/model_io/llms/integrations/gooseai_example",
      "destination": "/docs/integrations/llms/gooseai_example"
    },
    {
@ -3141,7 +3161,7 @@
      "destination": "/docs/integrations/llms/huggingface_hub"
    },
    {
-      "source": "/docs/modules/model_io/models/llms/integrations/huggingface_hub",
+      "source": "/docs/modules/model_io/llms/integrations/huggingface_hub",
      "destination": "/docs/integrations/llms/huggingface_hub"
    },
    {
@ -3149,7 +3169,7 @@
      "destination": "/docs/integrations/llms/huggingface_pipelines"
    },
    {
-      "source": "/docs/modules/model_io/models/llms/integrations/huggingface_pipelines",
+      "source": "/docs/modules/model_io/llms/integrations/huggingface_pipelines",
      "destination": "/docs/integrations/llms/huggingface_pipelines"
    },
    {
@ -3157,7 +3177,7 @@
      "destination": "/docs/integrations/llms/huggingface_textgen_inference"
    },
    {
-      "source": "/docs/modules/model_io/models/llms/integrations/huggingface_textgen_inference",
+      "source": "/docs/modules/model_io/llms/integrations/huggingface_textgen_inference",
      "destination": "/docs/integrations/llms/huggingface_textgen_inference"
    },
    {
@ -3165,7 +3185,7 @@
      "destination": "/docs/integrations/llms/jsonformer_experimental"
    },
    {
-      "source": "/docs/modules/model_io/models/llms/integrations/jsonformer_experimental",
+      "source": "/docs/modules/model_io/llms/integrations/jsonformer_experimental",
      "destination": "/docs/integrations/llms/jsonformer_experimental"
    },
    {
@ -3173,7 +3193,7 @@
      "destination": "/docs/integrations/llms/llamacpp"
    },
    {
-      "source": "/docs/modules/model_io/models/llms/integrations/llamacpp",
+      "source": "/docs/modules/model_io/llms/integrations/llamacpp",
      "destination": "/docs/integrations/llms/llamacpp"
    },
    {
@ -3181,7 +3201,7 @@
      "destination": "/docs/integrations/llms/llm_caching"
    },
    {
-      "source": "/docs/modules/model_io/models/llms/integrations/llm_caching",
+      "source": "/docs/modules/model_io/llms/integrations/llm_caching",
      "destination": "/docs/integrations/llms/llm_caching"
    },
    {
@ -3189,7 +3209,7 @@
      "destination": "/docs/integrations/llms/manifest"
    },
    {
-      "source": "/docs/modules/model_io/models/llms/integrations/manifest",
+      "source": "/docs/modules/model_io/llms/integrations/manifest",
      "destination": "/docs/integrations/llms/manifest"
    },
    {
@ -3197,7 +3217,7 @@
      "destination": "/docs/integrations/llms/modal"
    },
    {
-      "source": "/docs/modules/model_io/models/llms/integrations/modal",
+      "source": "/docs/modules/model_io/llms/integrations/modal",
      "destination": "/docs/integrations/llms/modal"
    },
    {
@ -3205,7 +3225,7 @@
      "destination": "/docs/integrations/llms/mosaicml"
    },
    {
-      "source": "/docs/modules/model_io/models/llms/integrations/mosaicml",
+      "source": "/docs/modules/model_io/llms/integrations/mosaicml",
      "destination": "/docs/integrations/llms/mosaicml"
    },
    {
@ -3213,7 +3233,7 @@
      "destination": "/docs/integrations/llms/nlpcloud"
    },
    {
-      "source": "/docs/modules/model_io/models/llms/integrations/nlpcloud",
+      "source": "/docs/modules/model_io/llms/integrations/nlpcloud",
      "destination": "/docs/integrations/llms/nlpcloud"
    },
    {
@ -3221,7 +3241,7 @@
      "destination": "/docs/integrations/llms/openai"
    },
    {
-      "source": "/docs/modules/model_io/models/llms/integrations/openai",
+      "source": "/docs/modules/model_io/llms/integrations/openai",
      "destination": "/docs/integrations/llms/openai"
    },
    {
@ -3229,7 +3249,7 @@
      "destination": "/docs/integrations/llms/openlm"
    },
    {
-      "source": "/docs/modules/model_io/models/llms/integrations/openlm",
+      "source": "/docs/modules/model_io/llms/integrations/openlm",
      "destination": "/docs/integrations/llms/openlm"
    },
    {
@ -3237,7 +3257,7 @@
      "destination": "/docs/integrations/llms/petals_example"
    },
    {
-      "source": "/docs/modules/model_io/models/llms/integrations/petals_example",
+      "source": "/docs/modules/model_io/llms/integrations/petals_example",
      "destination": "/docs/integrations/llms/petals_example"
    },
    {
@ -3245,7 +3265,7 @@
      "destination": "/docs/integrations/llms/pipelineai_example"
    },
    {
-      "source": "/docs/modules/model_io/models/llms/integrations/pipelineai_example",
+      "source": "/docs/modules/model_io/llms/integrations/pipelineai_example",
      "destination": "/docs/integrations/llms/pipelineai_example"
    },
    {
@ -3253,7 +3273,7 @@
      "destination": "/docs/integrations/llms/predictionguard"
    },
    {
-      "source": "/docs/modules/model_io/models/llms/integrations/predictionguard",
+      "source": "/docs/modules/model_io/llms/integrations/predictionguard",
      "destination": "/docs/integrations/llms/predictionguard"
    },
    {
@ -3261,7 +3281,7 @@
      "destination": "/docs/integrations/llms/promptlayer_openai"
    },
    {
-      "source": "/docs/modules/model_io/models/llms/integrations/promptlayer_openai",
+      "source": "/docs/modules/model_io/llms/integrations/promptlayer_openai",
      "destination": "/docs/integrations/llms/promptlayer_openai"
    },
    {
@ -3269,7 +3289,7 @@
      "destination": "/docs/integrations/llms/rellm_experimental"
    },
    {
-      "source": "/docs/modules/model_io/models/llms/integrations/rellm_experimental",
+      "source": "/docs/modules/model_io/llms/integrations/rellm_experimental",
      "destination": "/docs/integrations/llms/rellm_experimental"
    },
    {
@ -3277,7 +3297,7 @@
      "destination": "/docs/integrations/llms/replicate"
    },
    {
-      "source": "/docs/modules/model_io/models/llms/integrations/replicate",
+      "source": "/docs/modules/model_io/llms/integrations/replicate",
      "destination": "/docs/integrations/llms/replicate"
    },
    {
@ -3285,7 +3305,7 @@
      "destination": "/docs/integrations/llms/runhouse"
    },
    {
-      "source": "/docs/modules/model_io/models/llms/integrations/runhouse",
+      "source": "/docs/modules/model_io/llms/integrations/runhouse",
      "destination": "/docs/integrations/llms/runhouse"
    },
    {
@ -3293,7 +3313,7 @@
      "destination": "/docs/integrations/llms/sagemaker"
    },
    {
-      "source": "/docs/modules/model_io/models/llms/integrations/sagemaker",
+      "source": "/docs/modules/model_io/llms/integrations/sagemaker",
      "destination": "/docs/integrations/llms/sagemaker"
    },
    {
@ -3301,7 +3321,7 @@
      "destination": "/docs/integrations/llms/stochasticai"
    },
    {
-      "source": "/docs/modules/model_io/models/llms/integrations/stochasticai",
+      "source": "/docs/modules/model_io/llms/integrations/stochasticai",
      "destination": "/docs/integrations/llms/stochasticai"
    },
    {
@ -3309,7 +3329,7 @@
      "destination": "/docs/integrations/llms/writer"
    },
    {
-      "source": "/docs/modules/model_io/models/llms/integrations/writer",
+      "source": "/docs/modules/model_io/llms/integrations/writer",
      "destination": "/docs/integrations/llms/writer"
    },
    {
@ -3573,12 +3593,12 @@
      "destination": "/docs/modules/memory/:path*"
    },
    {
-      "source": "/docs/modules/model_io/models/chat/how_to/:path*",
-      "destination": "/docs/modules/model_io/models/chat/:path*"
+      "source": "/docs/modules/model_io/chat/how_to/:path*",
+      "destination": "/docs/modules/model_io/chat/:path*"
    },
    {
-      "source": "/docs/modules/model_io/models/llms/how_to/:path*",
-      "destination": "/docs/modules/model_io/models/llms/:path*"
+      "source": "/docs/modules/model_io/llms/how_to/:path*",
+      "destination": "/docs/modules/model_io/llms/:path*"
    },
    {
      "source": "/docs/modules/callbacks/integrations/:path*",
@ -3593,11 +3613,11 @@
      "destination": "/docs/integrations/text_embedding/:path*"
    },
    {
-      "source": "/docs/modules/model_io/models/llms/integrations/:path*",
+      "source": "/docs/modules/model_io/llms/integrations/:path*",
      "destination": "/docs/integrations/llms/:path*"
    },
    {
-      "source": "/docs/modules/model_io/models/chat/integrations/:path*",
+      "source": "/docs/modules/model_io/chat/integrations/:path*",
      "destination": "/docs/integrations/chat/:path*"
    },
    {
@ -3622,11 +3642,11 @@
    },
    {
      "source": "/en/latest/modules/models.html",
-      "destination": "/docs/modules/model_io/models/"
+      "destination": "/docs/modules/model_io/"
    },
    {
      "source": "/en/latest/modules/models/:path*",
-      "destination": "/docs/modules/model_io/models/:path*"
+      "destination": "/docs/modules/model_io/:path*"
    },
    {
      "source": "/en/latest/modules/prompts/prompt_templates/examples/:path*",