bump 0.0.339rc2 (#13787 )

RELEASE: core 0.0.5 (#13786 )
Revert "INFRA: temp rm master condition (#13753 )" (#13759 )
2026-02-04 08:10:25 +00:00 · 2023-11-23 12:50:09 -08:00 · 2023-11-23 12:23:50 -08:00 · 2023-11-22 17:22:07 -08:00 · 2023-11-22 16:59:50 -08:00 · 2023-11-22 16:26:37 -08:00
1522 changed files with 73057 additions and 27348 deletions
--- a/.github/workflows/_compile_integration_test.yml
+++ b/.github/workflows/_compile_integration_test.yml
@@ -7,6 +7,10 @@ on:
        required: true
        type: string
        description: "From which folder this pipeline executes"
+      langchain-core-location:
+        required: false
+        type: string
+        description: "Relative path to the langchain core library folder"

 env:
  POETRY_VERSION: "1.6.1"
@@ -40,6 +44,14 @@ jobs:
        shell: bash
        run: poetry install --with=test_integration

+      - name: Install langchain core editable
+        working-directory: ${{ inputs.working-directory }}
+        if: ${{ inputs.langchain-core-location }}
+        env:
+          LANGCHAIN_CORE_LOCATION: ${{ inputs.langchain-core-location }}
+        run: |
+          poetry run pip install -e "$LANGCHAIN_CORE_LOCATION"
+
      - name: Check integration tests compile
        shell: bash
        run: poetry run pytest -m compile tests/integration_tests
--- a/.github/workflows/_lint.yml
+++ b/.github/workflows/_lint.yml
@@ -11,6 +11,10 @@ on:
        required: false
        type: string
        description: "Relative path to the langchain library folder"
+      langchain-core-location:
+        required: false
+        type: string
+        description: "Relative path to the langchain core library folder"

 env:
  POETRY_VERSION: "1.6.1"
@@ -76,7 +80,15 @@ jobs:
        env:
          LANGCHAIN_LOCATION: ${{ inputs.langchain-location }}
        run: |
-          pip install -e "$LANGCHAIN_LOCATION"
+          poetry run pip install -e "$LANGCHAIN_LOCATION"
+
+      - name: Install langchain core editable
+        working-directory: ${{ inputs.working-directory }}
+        if: ${{ inputs.langchain-core-location }}
+        env:
+          LANGCHAIN_CORE_LOCATION: ${{ inputs.langchain-core-location }}
+        run: |
+          poetry run pip install -e "$LANGCHAIN_CORE_LOCATION"

      - name: Get .mypy_cache to speed up mypy
        uses: actions/cache@v3
--- a/.github/workflows/_pydantic_compatibility.yml
+++ b/.github/workflows/_pydantic_compatibility.yml
@@ -7,6 +7,14 @@ on:
        required: true
        type: string
        description: "From which folder this pipeline executes"
+      langchain-location:
+        required: false
+        type: string
+        description: "Relative path to the langchain library folder"
+      langchain-core-location:
+        required: false
+        type: string
+        description: "Relative path to the langchain core library folder"

 env:
  POETRY_VERSION: "1.6.1"
@@ -40,6 +48,22 @@ jobs:
        shell: bash
        run: poetry install

+      - name: Install langchain editable
+        working-directory: ${{ inputs.working-directory }}
+        if: ${{ inputs.langchain-location }}
+        env:
+          LANGCHAIN_LOCATION: ${{ inputs.langchain-location }}
+        run: |
+          poetry run pip install -e "$LANGCHAIN_LOCATION"
+
+      - name: Install langchain core editable
+        working-directory: ${{ inputs.working-directory }}
+        if: ${{ inputs.langchain-core-location }}
+        env:
+          LANGCHAIN_CORE_LOCATION: ${{ inputs.langchain-core-location }}
+        run: |
+          poetry run pip install -e "$LANGCHAIN_CORE_LOCATION"
+
      - name: Install the opposite major version of pydantic
        # If normal tests use pydantic v1, here we'll use v2, and vice versa.
        shell: bash
--- a/.github/workflows/_test.yml
+++ b/.github/workflows/_test.yml
@@ -7,6 +7,14 @@ on:
        required: true
        type: string
        description: "From which folder this pipeline executes"
+      langchain-location:
+        required: false
+        type: string
+        description: "Relative path to the langchain library folder"
+      langchain-core-location:
+        required: false
+        type: string
+        description: "Relative path to the langchain core library folder"

 env:
  POETRY_VERSION: "1.6.1"
@@ -40,9 +48,26 @@ jobs:
        shell: bash
        run: poetry install

+      - name: Install langchain editable
+        working-directory: ${{ inputs.working-directory }}
+        if: ${{ inputs.langchain-location }}
+        env:
+          LANGCHAIN_LOCATION: ${{ inputs.langchain-location }}
+        run: |
+          poetry run pip install -e "$LANGCHAIN_LOCATION"
+
+      - name: Install langchain core editable
+        working-directory: ${{ inputs.working-directory }}
+        if: ${{ inputs.langchain-core-location }}
+        env:
+          LANGCHAIN_CORE_LOCATION: ${{ inputs.langchain-core-location }}
+        run: |
+          poetry run pip install -e "$LANGCHAIN_CORE_LOCATION"
+
      - name: Run core tests
        shell: bash
-        run: make test
+        run: |
+          make test

      - name: Ensure the tests did not create any additional files
        shell: bash
--- a/.github/workflows/langchain_ci.yml
+++ b/.github/workflows/langchain_ci.yml
@@ -14,6 +14,7 @@ on:
      - '.github/workflows/langchain_ci.yml'
      - 'libs/*'
      - 'libs/langchain/**'
+      - 'libs/core/**'
  workflow_dispatch:  # Allows to trigger the workflow manually in GitHub UI

 # If another push to the same PR or branch happens while this workflow is still running,
@@ -36,6 +37,7 @@ jobs:
      ./.github/workflows/_lint.yml
    with:
      working-directory: libs/langchain
+      langchain-core-location: ../core
    secrets: inherit

  test:
@@ -43,6 +45,7 @@ jobs:
      ./.github/workflows/_test.yml
    with:
      working-directory: libs/langchain
+      langchain-core-location: ../core
    secrets: inherit

  compile-integration-tests:
@@ -50,6 +53,7 @@ jobs:
      ./.github/workflows/_compile_integration_test.yml
    with:
      working-directory: libs/langchain
+      langchain-core-location: ../core
    secrets: inherit

  pydantic-compatibility:
@@ -57,8 +61,49 @@ jobs:
      ./.github/workflows/_pydantic_compatibility.yml
    with:
      working-directory: libs/langchain
+      langchain-core-location: ../core
    secrets: inherit

+  # It's possible that langchain works fine with the latest *published* langchain-core,
+  # but is broken with the langchain-core on `master`.
+  #
+  # We want to catch situations like that *before* releasing a new langchain-core, hence this test.
+  test-with-latest-langchain-core:
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        working-directory: ${{ env.WORKDIR }}
+    strategy:
+      matrix:
+        python-version:
+          - "3.8"
+          - "3.9"
+          - "3.10"
+          - "3.11"
+    name: test with unpublished langchain-core - Python ${{ matrix.python-version }}
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }}
+        uses: "./.github/actions/poetry_setup"
+        with:
+          python-version: ${{ matrix.python-version }}
+          poetry-version: ${{ env.POETRY_VERSION }}
+          working-directory: ${{ env.WORKDIR }}
+          cache-key: unpublished-langchain-core
+
+      - name: Install dependencies
+        shell: bash
+        run: |
+          echo "Running tests with unpublished langchain, installing dependencies with poetry..."
+          poetry install
+
+          echo "Editably installing langchain-core outside of poetry, to avoid messing up lockfile..."
+          poetry run pip install -e ../core
+
+      - name: Run tests
+        run: make test
+
  extended-tests:
    runs-on: ubuntu-latest
    defaults:
@@ -89,6 +134,11 @@ jobs:
          echo "Running extended tests, installing dependencies with poetry..."
          poetry install -E extended_testing

+      - name: Install langchain core editable
+        shell: bash
+        run: |
+          poetry run pip install -e ../core
+
      - name: Run extended tests
        run: make extended_tests

--- a/.github/workflows/langchain_core_ci.yml
+++ b/.github/workflows/langchain_core_ci.yml
@@ -0,0 +1,52 @@
+---
+name: libs/langchain core CI
+
+on:
+  push:
+    branches: [ master ]
+  pull_request:
+    paths:
+      - '.github/actions/poetry_setup/action.yml'
+      - '.github/tools/**'
+      - '.github/workflows/_lint.yml'
+      - '.github/workflows/_test.yml'
+      - '.github/workflows/_pydantic_compatibility.yml'
+      - '.github/workflows/langchain_core_ci.yml'
+      - 'libs/core/**'
+  workflow_dispatch:  # Allows to trigger the workflow manually in GitHub UI
+
+# If another push to the same PR or branch happens while this workflow is still running,
+# cancel the earlier run in favor of the next run.
+#
+# There's no point in testing an outdated version of the code. GitHub only allows
+# a limited number of job runners to be active at the same time, so it's better to cancel
+# pointless jobs early so that more useful jobs can run sooner.
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+env:
+  POETRY_VERSION: "1.6.1"
+  WORKDIR: "libs/core"
+
+jobs:
+  lint:
+    uses:
+      ./.github/workflows/_lint.yml
+    with:
+      working-directory: libs/core
+    secrets: inherit
+
+  test:
+    uses:
+      ./.github/workflows/_test.yml
+    with:
+      working-directory: libs/core
+    secrets: inherit
+
+  pydantic-compatibility:
+    uses:
+      ./.github/workflows/_pydantic_compatibility.yml
+    with:
+      working-directory: libs/core
+    secrets: inherit
--- a/.github/workflows/langchain_core_release.yml
+++ b/.github/workflows/langchain_core_release.yml
@@ -0,0 +1,13 @@
+---
+name: libs/core Release
+
+on:
+  workflow_dispatch:  # Allows to trigger the workflow manually in GitHub UI
+
+jobs:
+  release:
+    uses:
+      ./.github/workflows/_release.yml
+    with:
+      working-directory: libs/core
+    secrets: inherit
--- a/.github/workflows/langchain_experimental_ci.yml
+++ b/.github/workflows/langchain_experimental_ci.yml
@@ -13,6 +13,8 @@ on:
      - '.github/workflows/langchain_experimental_ci.yml'
      - 'libs/*'
      - 'libs/experimental/**'
+      - 'libs/langchain/**'
+      - 'libs/core/**'
  workflow_dispatch:  # Allows to trigger the workflow manually in GitHub UI

 # If another push to the same PR or branch happens while this workflow is still running,
@@ -36,6 +38,7 @@ jobs:
    with:
      working-directory: libs/experimental
      langchain-location: ../langchain
+      langchain-core-location: ../core
    secrets: inherit

  test:
@@ -43,6 +46,8 @@ jobs:
      ./.github/workflows/_test.yml
    with:
      working-directory: libs/experimental
+      langchain-location: ../langchain
+      langchain-core-location: ../core
    secrets: inherit

  compile-integration-tests:
@@ -88,6 +93,7 @@ jobs:

          echo "Editably installing langchain outside of poetry, to avoid messing up lockfile..."
          poetry run pip install -e ../langchain
+          poetry run pip install -e ../core

      - name: Run tests
        run: make test
--- a/cookbook/Multi_modal_RAG.ipynb
+++ b/cookbook/Multi_modal_RAG.ipynb
--- a/cookbook/code-analysis-deeplake.ipynb
+++ b/cookbook/code-analysis-deeplake.ipynb
@@ -648,7 +648,7 @@
    {
     "data": {
      "text/plain": [
-       "OpenAIEmbeddings(client=<class 'openai.api_resources.embedding.Embedding'>, model='text-embedding-ada-002', deployment='text-embedding-ada-002', openai_api_version='', openai_api_base='', openai_api_type='', openai_proxy='', embedding_ctx_length=8191, openai_api_key='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', openai_organization='', allowed_special=set(), disallowed_special='all', chunk_size=1000, max_retries=6, request_timeout=None, headers=None, tiktoken_model_name=None, show_progress_bar=False, model_kwargs={})"
+       "OpenAIEmbeddings(client=<class 'openai.api_resources.embedding.Embedding'>, model='text-embedding-ada-002', deployment='text-embedding-ada-002', openai_api_version='', openai_api_base='', openai_api_type='', openai_proxy='', embedding_ctx_length=8191, openai_api_key='', openai_organization='', allowed_special=set(), disallowed_special='all', chunk_size=1000, max_retries=6, request_timeout=None, headers=None, tiktoken_model_name=None, show_progress_bar=False, model_kwargs={})"
      ]
     },
     "execution_count": 13,
--- a/cookbook/docugami_xml_kg_rag.ipynb
+++ b/cookbook/docugami_xml_kg_rag.ipynb
--- a/cookbook/llm_bash.ipynb
+++ b/cookbook/llm_bash.ipynb
@@ -69,8 +69,8 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "from langchain.chains.llm_bash.prompt import BashOutputParser\n",
    "from langchain.prompts.prompt import PromptTemplate\n",
+    "from langchain_experimental.llm_bash.prompt import BashOutputParser\n",
    "\n",
    "_PROMPT_TEMPLATE = \"\"\"If someone asks you to perform a task, your job is to come up with a series of bash commands that will perform the task. There is no need to put \"#!/bin/bash\" in your answer. Make sure to reason step by step, using this format:\n",
    "Question: \"copy the files in the directory named 'target' into a new directory at the same level as target called 'myNewDirectory'\"\n",
--- a/docs/api_reference/create_api_rst.py
+++ b/docs/api_reference/create_api_rst.py
@@ -13,8 +13,10 @@ HERE = Path(__file__).parent

 PKG_DIR = ROOT_DIR / "libs" / "langchain" / "langchain"
 EXP_DIR = ROOT_DIR / "libs" / "experimental" / "langchain_experimental"
+CORE_DIR = ROOT_DIR / "libs" / "core" / "langchain_core"
 WRITE_FILE = HERE / "api_reference.rst"
 EXP_WRITE_FILE = HERE / "experimental_api_reference.rst"
+CORE_WRITE_FILE = HERE / "core_api_reference.rst"


 ClassKind = Literal["TypedDict", "Regular", "Pydantic", "enum"]
@@ -292,6 +294,17 @@ def _document_langchain_experimental() -> None:


 def _document_langchain_core() -> None:
+    """Document the langchain_core package."""
+    # Generate core_api_reference.rst
+    core_members = _load_package_modules(CORE_DIR)
+    core_doc = ".. _core_api_reference:\n\n" + _construct_doc(
+        "langchain_core", core_members
+    )
+    with open(CORE_WRITE_FILE, "w") as f:
+        f.write(core_doc)
+
+
+def _document_langchain() -> None:
    """Document the main langchain package."""
    # load top level module members
    lc_members = _load_package_modules(PKG_DIR)
@@ -306,7 +319,6 @@ def _document_langchain_core() -> None:
            "agents.output_parsers": agents["output_parsers"],
            "agents.format_scratchpad": agents["format_scratchpad"],
            "tools.render": tools["render"],
-            "schema.runnable": schema["runnable"],
        }
    )

@@ -318,8 +330,9 @@ def _document_langchain_core() -> None:

 def main() -> None:
    """Generate the reference.rst file for each package."""
-    _document_langchain_core()
+    _document_langchain()
    _document_langchain_experimental()
+    _document_langchain_core()


 if __name__ == "__main__":
--- a/docs/api_reference/requirements.txt
+++ b/docs/api_reference/requirements.txt
@@ -1,5 +1,6 @@
 -e libs/langchain
 -e libs/experimental
+-e libs/core
 pydantic<2
 autodoc_pydantic==1.8.0
 myst_parser
--- a/docs/api_reference/themes/scikit-learn-modern/nav.html
+++ b/docs/api_reference/themes/scikit-learn-modern/nav.html
@@ -34,6 +34,9 @@
        <li class="nav-item">
          <a class="sk-nav-link nav-link" href="{{ pathto('api_reference') }}">API</a>
        </li>
+        <li class="nav-item">
+          <a class="sk-nav-link nav-link" href="{{ pathto('core_api_reference') }}">Core</a>
+        </li>
        <li class="nav-item">
          <a class="sk-nav-link nav-link" href="{{ pathto('experimental_api_reference') }}">Experimental</a>
        </li>
--- a/docs/docs/expression_language/how_to/message_history.ipynb
+++ b/docs/docs/expression_language/how_to/message_history.ipynb
@@ -0,0 +1,396 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "6a4becbd-238e-4c1d-a02d-08e61fbc3763",
+   "metadata": {},
+   "source": [
+    "# Add message history (memory)\n",
+    "\n",
+    "The `RunnableWithMessageHistory` let's us add message history to certain types of chains.\n",
+    "\n",
+    "Specifically, it can be used for any Runnable that takes as input one of\n",
+    "* a sequence of `BaseMessage`\n",
+    "* a dict with a key that takes a sequence of `BaseMessage`\n",
+    "* a dict with a key that takes the latest message(s) as a string or sequence of `BaseMessage`, and a separate key that takes historical messages\n",
+    "\n",
+    "And returns as output one of\n",
+    "* a string that can be treated as the contents of an `AIMessage`\n",
+    "* a sequence of `BaseMessage`\n",
+    "* a dict with a key that contains a sequence of `BaseMessage`\n",
+    "\n",
+    "Let's take a look at some examples to see how it works."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6bca45e5-35d9-4603-9ca9-6ac0ce0e35cd",
+   "metadata": {},
+   "source": [
+    "## Setup\n",
+    "\n",
+    "We'll use Redis to store our chat message histories and Anthropic's claude-2 model so we'll need to install the following dependencies:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "477d04b3-c2b6-4ba5-962f-492c0d625cd5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install -U langchain redis anthropic"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "93776323-d6b8-4912-bb6a-867c5e655f46",
+   "metadata": {},
+   "source": [
+    "Set your [Anthropic API  key](https://console.anthropic.com/):"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c7f56f69-d2f1-4a21-990c-b5551eb012fa",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import getpass\n",
+    "import os\n",
+    "\n",
+    "os.environ[\"ANTHROPIC_API_KEY\"] = getpass.getpass()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6a0ec9e0-7b1c-4c6f-b570-e61d520b47c6",
+   "metadata": {},
+   "source": [
+    "Start a local Redis Stack server if we don't have an existing Redis deployment to connect to:\n",
+    "```bash\n",
+    "docker run -d -p 6379:6379 -p 8001:8001 redis/redis-stack:latest\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "cd6a250e-17fe-4368-a39d-1fe6b2cbde68",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "REDIS_URL = \"redis://localhost:6379/0\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "36f43b87-655c-4f64-aa7b-bd8c1955d8e5",
+   "metadata": {},
+   "source": [
+    "### [LangSmith](/docs/langsmith)\n",
+    "\n",
+    "LangSmith is especially useful for something like message history injection, where it can be hard to otherwise understand what the inputs are to various parts of the chain.\n",
+    "\n",
+    "Note that LangSmith is not needed, but it is helpful.\n",
+    "If you do want to use LangSmith, after you sign up at the link above, make sure to uncoment the below and set your environment variables to start logging traces:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "2afc1556-8da1-4499-ba11-983b66c58b18",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
+    "# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1a5a632e-ba9e-4488-b586-640ad5494f62",
+   "metadata": {},
+   "source": [
+    "## Example: Dict input, message output\n",
+    "\n",
+    "Let's create a simple chain that takes a dict as input and returns a BaseMessage.\n",
+    "\n",
+    "In this case the `\"question\"` key in the input represents our input message, and the `\"history\"` key is where our historical messages will be injected."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "2a150d6f-8878-4950-8634-a608c5faad56",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from typing import Optional\n",
+    "\n",
+    "from langchain.chat_models import ChatAnthropic\n",
+    "from langchain.memory.chat_message_histories import RedisChatMessageHistory\n",
+    "from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder\n",
+    "from langchain.schema.chat_history import BaseChatMessageHistory\n",
+    "from langchain.schema.runnable.history import RunnableWithMessageHistory"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "3185edba-4eb6-4b32-80c6-577c0d19af97",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "prompt = ChatPromptTemplate.from_messages(\n",
+    "    [\n",
+    "        (\"system\", \"You're an assistant who's good at {ability}\"),\n",
+    "        MessagesPlaceholder(variable_name=\"history\"),\n",
+    "        (\"human\", \"{question}\"),\n",
+    "    ]\n",
+    ")\n",
+    "\n",
+    "chain = prompt | ChatAnthropic(model=\"claude-2\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f9d81796-ce61-484c-89e2-6c567d5e54ef",
+   "metadata": {},
+   "source": [
+    "### Adding message history\n",
+    "\n",
+    "To add message history to our original chain we wrap it in the `RunnableWithMessageHistory` class.\n",
+    "\n",
+    "Crucially, we also need to  define a method that takes a session_id string and based on it returns a `BaseChatMessageHistory`. Given the same input, this method should return an equivalent output.\n",
+    "\n",
+    "In this case we'll also want to specify `input_messages_key` (the key to be treated as the latest input message) and `history_messages_key` (the key to add historical messages to)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "ca7c64d8-e138-4ef8-9734-f82076c47d80",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "chain_with_history = RunnableWithMessageHistory(\n",
+    "    chain,\n",
+    "    lambda session_id: RedisChatMessageHistory(session_id, url=REDIS_URL),\n",
+    "    input_messages_key=\"question\",\n",
+    "    history_messages_key=\"history\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "37eefdec-9901-4650-b64c-d3c097ed5f4d",
+   "metadata": {},
+   "source": [
+    "## Invoking with config\n",
+    "\n",
+    "Whenever we call our chain with message history, we need to include a config that contains the `session_id`\n",
+    "```python\n",
+    "config={\"configurable\": {\"session_id\": \"<SESSION_ID>\"}}\n",
+    "```\n",
+    "\n",
+    "Given the same configuration, our chain should be pulling from the same chat message history."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "a85bcc22-ca4c-4ad5-9440-f94be7318f3e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "AIMessage(content=' Cosine is one of the basic trigonometric functions in mathematics. It is defined as the ratio of the adjacent side to the hypotenuse in a right triangle.\\n\\nSome key properties and facts about cosine:\\n\\n- It is denoted by cos(θ), where θ is the angle in a right triangle. \\n\\n- The cosine of an acute angle is always positive. For angles greater than 90 degrees, cosine can be negative.\\n\\n- Cosine is one of the three main trig functions along with sine and tangent.\\n\\n- The cosine of 0 degrees is 1. As the angle increases towards 90 degrees, the cosine value decreases towards 0.\\n\\n- The range of values for cosine is -1 to 1.\\n\\n- The cosine function maps angles in a circle to the x-coordinate on the unit circle.\\n\\n- Cosine is used to find adjacent side lengths in right triangles, and has many other applications in mathematics, physics, engineering and more.\\n\\n- Key cosine identities include: cos(A+B) = cosAcosB − sinAsinB and cos(2A) = cos^2(A) − sin^2(A)\\n\\nSo in summary, cosine is a fundamental trig')"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "chain_with_history.invoke(\n",
+    "    {\"ability\": \"math\", \"question\": \"What does cosine mean?\"},\n",
+    "    config={\"configurable\": {\"session_id\": \"foobar\"}},\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "ab29abd3-751f-41ce-a1b0-53f6b565e79d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "AIMessage(content=' The inverse of the cosine function is called the arccosine or inverse cosine, often denoted as cos-1(x) or arccos(x).\\n\\nThe key properties and facts about arccosine:\\n\\n- It is defined as the angle θ between 0 and π radians whose cosine is x. So arccos(x) = θ such that cos(θ) = x.\\n\\n- The range of arccosine is 0 to π radians (0 to 180 degrees).\\n\\n- The domain of arccosine is -1 to 1. \\n\\n- arccos(cos(θ)) = θ for values of θ from 0 to π radians.\\n\\n- arccos(x) is the angle in a right triangle whose adjacent side is x and hypotenuse is 1.\\n\\n- arccos(0) = 90 degrees. As x increases from 0 to 1, arccos(x) decreases from 90 to 0 degrees.\\n\\n- arccos(1) = 0 degrees. arccos(-1) = 180 degrees.\\n\\n- The graph of y = arccos(x) is part of the unit circle, restricted to x')"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "chain_with_history.invoke(\n",
+    "    {\"ability\": \"math\", \"question\": \"What's its inverse\"},\n",
+    "    config={\"configurable\": {\"session_id\": \"foobar\"}},\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "da3d1feb-b4bb-4624-961c-7db2e1180df7",
+   "metadata": {},
+   "source": [
+    ":::tip [Langsmith trace](https://smith.langchain.com/public/863a003b-7ca8-4b24-be9e-d63ec13c106e/r)\n",
+    ":::"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "61d5115e-64a1-4ad5-b676-8afd4ef6093e",
+   "metadata": {},
+   "source": [
+    "Looking at the Langsmith trace for the second call, we can see that when constructing the prompt, a \"history\" variable has been injected which is a list of two messages (our first input and first output)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "028cf151-6cd5-4533-b3cf-c8d735554647",
+   "metadata": {},
+   "source": [
+    "## Example: messages input, dict output"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "0bb446b5-6251-45fe-a92a-4c6171473c53",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'output_message': AIMessage(content=' Here is a summary of Simone de Beauvoir\\'s views on free will:\\n\\n- De Beauvoir was an existentialist philosopher and believed strongly in the concept of free will. She rejected the idea that human nature or instincts determine behavior.\\n\\n- Instead, de Beauvoir argued that human beings define their own essence or nature through their actions and choices. As she famously wrote, \"One is not born, but rather becomes, a woman.\"\\n\\n- De Beauvoir believed that while individuals are situated in certain cultural contexts and social conditions, they still have agency and the ability to transcend these situations. Freedom comes from choosing one\\'s attitude toward these constraints.\\n\\n- She emphasized the radical freedom and responsibility of the individual. We are \"condemned to be free\" because we cannot escape making choices and taking responsibility for our choices. \\n\\n- De Beauvoir felt that many people evade their freedom and responsibility by adopting rigid mindsets, ideologies, or conforming uncritically to social roles.\\n\\n- She advocated for the recognition of ambiguity in the human condition and warned against the quest for absolute rules that deny freedom and responsibility. Authentic living involves embracing ambiguity.\\n\\nIn summary, de Beauvoir promoted an existential ethics')}"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain.schema.messages import HumanMessage\n",
+    "from langchain.schema.runnable import RunnableMap\n",
+    "\n",
+    "chain = RunnableMap({\"output_message\": ChatAnthropic(model=\"claude-2\")})\n",
+    "chain_with_history = RunnableWithMessageHistory(\n",
+    "    chain,\n",
+    "    lambda session_id: RedisChatMessageHistory(session_id, url=REDIS_URL),\n",
+    "    output_messages_key=\"output_message\",\n",
+    ")\n",
+    "\n",
+    "chain_with_history.invoke(\n",
+    "    [HumanMessage(content=\"What did Simone de Beauvoir believe about free will\")],\n",
+    "    config={\"configurable\": {\"session_id\": \"baz\"}},\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "601ce3ff-aea8-424d-8e54-fd614256af4f",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'output_message': AIMessage(content=\" There are many similarities between Simone de Beauvoir's views on free will and those of Jean-Paul Sartre, though some key differences emerge as well:\\n\\nSimilarities with Sartre:\\n\\n- Both were existentialist thinkers who rejected determinism and emphasized human freedom and responsibility.\\n\\n- They agreed that existence precedes essence - there is no predefined human nature that determines who we are.\\n\\n- Individuals must define themselves through their choices and actions. This leads to anxiety but also freedom.\\n\\n- The human condition is characterized by ambiguity and uncertainty, rather than fixed meanings/values.\\n\\n- Both felt that most people evade their freedom through self-deception, conformity, or adopting collective identities/values uncritically.\\n\\nDifferences from Sartre: \\n\\n- Sartre placed more emphasis on the burden and anguish of radical freedom. De Beauvoir focused more on its positive potential.\\n\\n- De Beauvoir critiqued Sartre's premise that human relations are necessarily conflictual. She saw more potential for mutual recognition.\\n\\n- Sartre saw the Other's gaze as a threat to freedom. De Beauvoir put more stress on how the Other's gaze can confirm\")}"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "chain_with_history.invoke(\n",
+    "    [HumanMessage(content=\"How did this compare to Sartre\")],\n",
+    "    config={\"configurable\": {\"session_id\": \"baz\"}},\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b898d1b1-11e6-4d30-a8dd-cc5e45533611",
+   "metadata": {},
+   "source": [
+    ":::tip [LangSmith trace](https://smith.langchain.com/public/f6c3e1d1-a49d-4955-a9fa-c6519df74fa7/r)\n",
+    ":::"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1724292c-01c6-44bb-83e8-9cdb6bf01483",
+   "metadata": {},
+   "source": [
+    "## More examples\n",
+    "\n",
+    "We could also do any of the below:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fd89240b-5a25-48f8-9568-5c1127f9ffad",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from operator import itemgetter\n",
+    "\n",
+    "# messages in, messages out\n",
+    "RunnableWithMessageHistory(\n",
+    "    ChatAnthropic(model=\"claude-2\"),\n",
+    "    lambda session_id: RedisChatMessageHistory(session_id, url=REDIS_URL),\n",
+    ")\n",
+    "\n",
+    "# dict with single key for all messages in, messages out\n",
+    "RunnableWithMessageHistory(\n",
+    "    itemgetter(\"input_messages\") | ChatAnthropic(model=\"claude-2\"),\n",
+    "    lambda session_id: RedisChatMessageHistory(session_id, url=REDIS_URL),\n",
+    "    input_messages_key=\"input_messages\",\n",
+    ")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "poetry-venv",
+   "language": "python",
+   "name": "poetry-venv"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/docs/docs/get_started/introduction.mdx
+++ b/docs/docs/get_started/introduction.mdx
@@ -14,7 +14,7 @@ This framework consists of several parts.
 - **[LangServe](/docs/langserve)**: A library for deploying LangChain chains as a REST API.
 - **[LangSmith](/docs/langsmith)**: A developer platform that lets you debug, test, evaluate, and monitor chains built on any LLM framework and seamlessly integrates with LangChain.

-![LangChain Diagram](/img/langchain_stack.png)
+![LangChain Diagram](/svg/langchain_stack.svg)

 Together, these products simplify the entire application lifecycle:
 - **Develop**: Write your applications in LangChain/LangChain.js. Hit the ground running using Templates for reference.
@@ -49,7 +49,7 @@ LCEL is a declarative way to compose chains. LCEL was designed from day 1 to sup

 - **[Overview](/docs/expression_language/)**: LCEL and its benefits
 - **[Interface](/docs/expression_language/interface)**: The standard interface for LCEL objects
- **[How-to](/docs/expression_language/interface)**: Key features of LCEL
+- **[How-to](/docs/expression_language/how_to)**: Key features of LCEL
 - **[Cookbook](/docs/expression_language/cookbook)**: Example code for accomplishing common tasks


--- a/docs/docs/integrations/chat/azure_chat_openai.ipynb
+++ b/docs/docs/integrations/chat/azure_chat_openai.ipynb
@@ -7,7 +7,9 @@
   "source": [
    "# Azure OpenAI\n",
    "\n",
-    "This notebook goes over how to connect to an Azure hosted OpenAI endpoint. We recommend having version `openai>=1` installed."
+    ">[Azure OpenAI Service](https://learn.microsoft.com/en-us/azure/ai-services/openai/overview) provides REST API access to OpenAI's powerful language models including the GPT-4, GPT-3.5-Turbo, and Embeddings model series. These models can be easily adapted to your specific task including but not limited to content generation, summarization, semantic search, and natural language to code translation. Users can access the service through REST APIs, Python SDK, or a web-based interface in the Azure OpenAI Studio.\n",
+    "\n",
+    "This notebook goes over how to connect to an Azure-hosted OpenAI endpoint. We recommend having version `openai>=1` installed."
   ]
  },
  {
@@ -162,7 +164,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.1"
+   "version": "3.10.12"
  }
 },
 "nbformat": 4,
--- a/docs/docs/integrations/chat/azureml_chat_endpoint.ipynb
+++ b/docs/docs/integrations/chat/azureml_chat_endpoint.ipynb
@@ -4,11 +4,13 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "# AzureML Chat Online Endpoint\n",
+    "# Azure ML Endpoint\n",
    "\n",
-    "[AzureML](https://azure.microsoft.com/en-us/products/machine-learning/) is a platform used to build, train, and deploy machine learning models. Users can explore the types of models to deploy in the Model Catalog, which provides Azure Foundation Models and OpenAI Models. Azure Foundation Models include various open-source models and popular Hugging Face models. Users can also import models of their liking into AzureML.\n",
+    ">[Azure Machine Learning](https://azure.microsoft.com/en-us/products/machine-learning/) is a platform used to build, train, and deploy machine learning models. Users can explore the types of models to deploy in the Model Catalog, which provides Azure Foundation Models and OpenAI Models. `Azure Foundation Models` include various open-source models and popular Hugging Face models. Users can also import models of their liking into AzureML.\n",
+    ">\n",
+    ">[Azure Machine Learning Online Endpoints](https://learn.microsoft.com/en-us/azure/machine-learning/concept-endpoints). After you train machine learning models or pipelines, you need to deploy them to production so that others can use them for inference. Inference is the process of applying new input data to the machine learning model or pipeline to generate outputs. While these outputs are typically referred to as \"predictions,\" inferencing can be used to generate outputs for other machine learning tasks, such as classification and clustering. In `Azure Machine Learning`, you perform inferencing by using endpoints and deployments. `Endpoints` and `Deployments` allow you to decouple the interface of your production workload from the implementation that serves it.\n",
    "\n",
-    "This notebook goes over how to use a chat model hosted on an `AzureML online endpoint`"
+    "This notebook goes over how to use a chat model hosted on an `Azure Machine Learning Endpoint`."
   ]
  },
  {
@@ -91,7 +93,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.1"
+   "version": "3.10.12"
  }
 },
 "nbformat": 4,
--- a/docs/docs/integrations/chat/hunyuan.ipynb
+++ b/docs/docs/integrations/chat/hunyuan.ipynb
@@ -36,7 +36,7 @@
   "outputs": [],
   "source": [
    "chat = ChatHunyuan(\n",
-    "    hunyuan_app_id=\"YOUR_APP_ID\",\n",
+    "    hunyuan_app_id=111111111,\n",
    "    hunyuan_secret_id=\"YOUR_SECRET_ID\",\n",
    "    hunyuan_secret_key=\"YOUR_SECRET_KEY\",\n",
    ")"
--- a/docs/docs/integrations/chat/llama2_chat.ipynb
+++ b/docs/docs/integrations/chat/llama2_chat.ipynb
@@ -0,0 +1,729 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "90a1faf2",
+   "metadata": {},
+   "source": [
+    "# Llama-2 Chat\n",
+    "\n",
+    "This notebook shows how to augment Llama-2 `LLM`s with the `Llama2Chat` wrapper to support the [Llama-2 chat prompt format](https://huggingface.co/blog/llama2#how-to-prompt-llama-2). Several `LLM` implementations in LangChain can be used as interface to Llama-2 chat models. These include [HuggingFaceTextGenInference](https://python.langchain.com/docs/integrations/llms/huggingface_textgen_inference), [LlamaCpp](https://python.langchain.com/docs/use_cases/question_answering/how_to/local_retrieval_qa), [GPT4All](https://python.langchain.com/docs/integrations/llms/gpt4all), ..., to mention a few examples. \n",
+    "\n",
+    "`Llama2Chat` is a generic wrapper that implements `BaseChatModel` and can therefore be used in applications as [chat model](https://python.langchain.com/docs/modules/model_io/models/chat/). `Llama2Chat` converts a list of [chat messages](https://python.langchain.com/docs/modules/model_io/models/chat/#messages) into the [required chat prompt format](https://huggingface.co/blog/llama2#how-to-prompt-llama-2) and forwards the formatted prompt as `str` to the wrapped `LLM`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "36c03540",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.chains import LLMChain\n",
+    "from langchain.memory import ConversationBufferMemory\n",
+    "from langchain_experimental.chat_models import Llama2Chat"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5c76910f",
+   "metadata": {},
+   "source": [
+    "For the chat application examples below, we'll use the following chat `prompt_template`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "9bbfaf3a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.prompts.chat import (\n",
+    "    ChatPromptTemplate,\n",
+    "    HumanMessagePromptTemplate,\n",
+    "    MessagesPlaceholder,\n",
+    ")\n",
+    "from langchain.schema import SystemMessage\n",
+    "\n",
+    "template_messages = [\n",
+    "    SystemMessage(content=\"You are a helpful assistant.\"),\n",
+    "    MessagesPlaceholder(variable_name=\"chat_history\"),\n",
+    "    HumanMessagePromptTemplate.from_template(\"{text}\"),\n",
+    "]\n",
+    "prompt_template = ChatPromptTemplate.from_messages(template_messages)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2f3343b7",
+   "metadata": {},
+   "source": [
+    "## Chat with Llama-2 via `HuggingFaceTextGenInference` LLM"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2ff99380",
+   "metadata": {},
+   "source": [
+    "A [HuggingFaceTextGenInference](https://python.langchain.com/docs/integrations/llms/huggingface_textgen_inference) LLM encapsulates access to a [text-generation-inference](https://github.com/huggingface/text-generation-inference) server. In the following example, the inference server serves a [meta-llama/Llama-2-13b-chat-hf](https://huggingface.co/meta-llama/Llama-2-13b-chat-hf) model. It can be started locally with:\n",
+    "\n",
+    "```bash\n",
+    "docker run \\\n",
+    "  --rm \\\n",
+    "  --gpus all \\\n",
+    "  --ipc=host \\\n",
+    "  -p 8080:80 \\\n",
+    "  -v ~/.cache/huggingface/hub:/data \\\n",
+    "  -e HF_API_TOKEN=${HF_API_TOKEN} \\\n",
+    "  ghcr.io/huggingface/text-generation-inference:0.9 \\\n",
+    "  --hostname 0.0.0.0 \\\n",
+    "  --model-id meta-llama/Llama-2-13b-chat-hf \\\n",
+    "  --quantize bitsandbytes \\\n",
+    "  --num-shard 4\n",
+    "```\n",
+    "\n",
+    "This works on a machine with 4 x RTX 3080ti cards, for example. Adjust the `--num_shard` value to the number of GPUs available. The `HF_API_TOKEN` environment variable holds the Hugging Face API token."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "238095fd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# !pip3 install text-generation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "79c4ace9",
+   "metadata": {},
+   "source": [
+    "Create a `HuggingFaceTextGenInference` instance that connects to the local inference server and wrap it into `Llama2Chat`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "7a9f6de2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.llms import HuggingFaceTextGenInference\n",
+    "\n",
+    "llm = HuggingFaceTextGenInference(\n",
+    "    inference_server_url=\"http://127.0.0.1:8080/\",\n",
+    "    max_new_tokens=512,\n",
+    "    top_k=50,\n",
+    "    temperature=0.1,\n",
+    "    repetition_penalty=1.03,\n",
+    ")\n",
+    "\n",
+    "model = Llama2Chat(llm=llm)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4f646a2b",
+   "metadata": {},
+   "source": [
+    "Then you are ready to use the chat `model` together with `prompt_template` and conversation `memory` in an `LLMChain`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "54b5d1d1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "memory = ConversationBufferMemory(memory_key=\"chat_history\", return_messages=True)\n",
+    "chain = LLMChain(llm=model, prompt=prompt_template, memory=memory)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "e6717947",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      " Sure, I'd be happy to help! Here are a few popular locations to consider visiting in Vienna:\n",
+      "\n",
+      "1. Schönbrunn Palace\n",
+      "2. St. Stephen's Cathedral\n",
+      "3. Hofburg Palace\n",
+      "4. Belvedere Palace\n",
+      "5. Prater Park\n",
+      "6. Vienna State Opera\n",
+      "7. Albertina Museum\n",
+      "8. Museum of Natural History\n",
+      "9. Kunsthistorisches Museum\n",
+      "10. Ringstrasse\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\n",
+    "    chain.run(\n",
+    "        text=\"What can I see in Vienna? Propose a few locations. Names only, no details.\"\n",
+    "    )\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "17bf10d5",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      " Certainly! St. Stephen's Cathedral (Stephansdom) is one of the most recognizable landmarks in Vienna and a must-see attraction for visitors. This stunning Gothic cathedral is located in the heart of the city and is known for its intricate stone carvings, colorful stained glass windows, and impressive dome.\n",
+      "\n",
+      "The cathedral was built in the 12th century and has been the site of many important events throughout history, including the coronation of Holy Roman emperors and the funeral of Mozart. Today, it is still an active place of worship and offers guided tours, concerts, and special events. Visitors can climb up the south tower for panoramic views of the city or attend a service to experience the beautiful music and chanting.\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(chain.run(text=\"Tell me more about #2.\"))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2a297e09",
+   "metadata": {},
+   "source": [
+    "## Chat with Llama-2 via `LlamaCPP` LLM"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "52c1a0b9",
+   "metadata": {},
+   "source": [
+    "For using a Llama-2 chat model with a [LlamaCPP](https://python.langchain.com/docs/integrations/llms/llamacpp) `LMM`, install the `llama-cpp-python` library using [these installation instructions](https://python.langchain.com/docs/integrations/llms/llamacpp#installation). The following example uses a quantized [llama-2-7b-chat.Q4_0.gguf](https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_0.gguf) model stored locally at `~/Models/llama-2-7b-chat.Q4_0.gguf`. \n",
+    "\n",
+    "After creating a `LlamaCpp` instance, the `llm` is again wrapped into `Llama2Chat`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "07c0d04e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "llama_model_loader: loaded meta data with 19 key-value pairs and 291 tensors from /home/martin/Models/llama-2-7b-chat.Q4_0.gguf (version GGUF V2)\n",
+      "llama_model_loader: - tensor    0:                token_embd.weight q4_0     [  4096, 32000,     1,     1 ]\n",
+      "llama_model_loader: - tensor    1:           blk.0.attn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor    2:            blk.0.ffn_down.weight q4_0     [ 11008,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor    3:            blk.0.ffn_gate.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor    4:              blk.0.ffn_up.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor    5:            blk.0.ffn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor    6:              blk.0.attn_k.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor    7:         blk.0.attn_output.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor    8:              blk.0.attn_q.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor    9:              blk.0.attn_v.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   10:           blk.1.attn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor   11:            blk.1.ffn_down.weight q4_0     [ 11008,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   12:            blk.1.ffn_gate.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor   13:              blk.1.ffn_up.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor   14:            blk.1.ffn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor   15:              blk.1.attn_k.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   16:         blk.1.attn_output.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   17:              blk.1.attn_q.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   18:              blk.1.attn_v.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   19:          blk.10.attn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor   20:           blk.10.ffn_down.weight q4_0     [ 11008,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   21:           blk.10.ffn_gate.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor   22:             blk.10.ffn_up.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor   23:           blk.10.ffn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor   24:             blk.10.attn_k.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   25:        blk.10.attn_output.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   26:             blk.10.attn_q.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   27:             blk.10.attn_v.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   28:          blk.11.attn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor   29:           blk.11.ffn_down.weight q4_0     [ 11008,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   30:           blk.11.ffn_gate.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor   31:             blk.11.ffn_up.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor   32:           blk.11.ffn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor   33:             blk.11.attn_k.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   34:        blk.11.attn_output.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   35:             blk.11.attn_q.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   36:             blk.11.attn_v.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   37:          blk.12.attn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor   38:           blk.12.ffn_down.weight q4_0     [ 11008,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   39:           blk.12.ffn_gate.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor   40:             blk.12.ffn_up.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor   41:           blk.12.ffn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor   42:             blk.12.attn_k.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   43:        blk.12.attn_output.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   44:             blk.12.attn_q.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   45:             blk.12.attn_v.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   46:          blk.13.attn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor   47:           blk.13.ffn_down.weight q4_0     [ 11008,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   48:           blk.13.ffn_gate.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor   49:             blk.13.ffn_up.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor   50:           blk.13.ffn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor   51:             blk.13.attn_k.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   52:        blk.13.attn_output.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   53:             blk.13.attn_q.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   54:             blk.13.attn_v.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   55:          blk.14.attn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor   56:           blk.14.ffn_down.weight q4_0     [ 11008,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   57:           blk.14.ffn_gate.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor   58:             blk.14.ffn_up.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor   59:           blk.14.ffn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor   60:             blk.14.attn_k.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   61:        blk.14.attn_output.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   62:             blk.14.attn_q.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   63:             blk.14.attn_v.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   64:          blk.15.attn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor   65:           blk.15.ffn_down.weight q4_0     [ 11008,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   66:           blk.15.ffn_gate.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor   67:             blk.15.ffn_up.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor   68:           blk.15.ffn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor   69:             blk.15.attn_k.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   70:        blk.15.attn_output.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   71:             blk.15.attn_q.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   72:             blk.15.attn_v.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   73:          blk.16.attn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor   74:           blk.16.ffn_down.weight q4_0     [ 11008,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   75:           blk.16.ffn_gate.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor   76:             blk.16.ffn_up.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor   77:           blk.16.ffn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor   78:             blk.16.attn_k.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   79:        blk.16.attn_output.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   80:             blk.16.attn_q.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   81:             blk.16.attn_v.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   82:          blk.17.attn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor   83:           blk.17.ffn_down.weight q4_0     [ 11008,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   84:           blk.17.ffn_gate.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor   85:             blk.17.ffn_up.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor   86:           blk.17.ffn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor   87:             blk.17.attn_k.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   88:        blk.17.attn_output.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   89:             blk.17.attn_q.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   90:             blk.17.attn_v.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   91:          blk.18.attn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor   92:           blk.18.ffn_down.weight q4_0     [ 11008,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   93:           blk.18.ffn_gate.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor   94:             blk.18.ffn_up.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor   95:           blk.18.ffn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor   96:             blk.18.attn_k.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   97:        blk.18.attn_output.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   98:             blk.18.attn_q.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor   99:             blk.18.attn_v.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  100:          blk.19.attn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor  101:           blk.19.ffn_down.weight q4_0     [ 11008,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  102:           blk.19.ffn_gate.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor  103:             blk.19.ffn_up.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor  104:           blk.19.ffn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor  105:             blk.19.attn_k.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  106:        blk.19.attn_output.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  107:             blk.19.attn_q.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  108:             blk.19.attn_v.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  109:           blk.2.attn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor  110:            blk.2.ffn_down.weight q4_0     [ 11008,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  111:            blk.2.ffn_gate.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor  112:              blk.2.ffn_up.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor  113:            blk.2.ffn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor  114:              blk.2.attn_k.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  115:         blk.2.attn_output.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  116:              blk.2.attn_q.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  117:              blk.2.attn_v.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  118:          blk.20.attn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor  119:           blk.20.ffn_down.weight q4_0     [ 11008,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  120:           blk.20.ffn_gate.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor  121:             blk.20.ffn_up.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor  122:           blk.20.ffn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor  123:             blk.20.attn_k.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  124:        blk.20.attn_output.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  125:             blk.20.attn_q.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  126:             blk.20.attn_v.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  127:          blk.21.attn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor  128:           blk.21.ffn_down.weight q4_0     [ 11008,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  129:           blk.21.ffn_gate.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor  130:             blk.21.ffn_up.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor  131:           blk.21.ffn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor  132:             blk.21.attn_k.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  133:        blk.21.attn_output.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  134:             blk.21.attn_q.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  135:             blk.21.attn_v.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  136:          blk.22.attn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor  137:           blk.22.ffn_down.weight q4_0     [ 11008,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  138:           blk.22.ffn_gate.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor  139:             blk.22.ffn_up.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor  140:           blk.22.ffn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor  141:             blk.22.attn_k.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  142:        blk.22.attn_output.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  143:             blk.22.attn_q.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  144:             blk.22.attn_v.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  145:          blk.23.attn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor  146:           blk.23.ffn_down.weight q4_0     [ 11008,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  147:           blk.23.ffn_gate.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor  148:             blk.23.ffn_up.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor  149:           blk.23.ffn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor  150:             blk.23.attn_k.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  151:        blk.23.attn_output.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  152:             blk.23.attn_q.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  153:             blk.23.attn_v.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  154:           blk.3.attn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor  155:            blk.3.ffn_down.weight q4_0     [ 11008,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  156:            blk.3.ffn_gate.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor  157:              blk.3.ffn_up.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor  158:            blk.3.ffn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor  159:              blk.3.attn_k.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  160:         blk.3.attn_output.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  161:              blk.3.attn_q.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  162:              blk.3.attn_v.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  163:           blk.4.attn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor  164:            blk.4.ffn_down.weight q4_0     [ 11008,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  165:            blk.4.ffn_gate.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor  166:              blk.4.ffn_up.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor  167:            blk.4.ffn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor  168:              blk.4.attn_k.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  169:         blk.4.attn_output.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  170:              blk.4.attn_q.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  171:              blk.4.attn_v.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  172:           blk.5.attn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor  173:            blk.5.ffn_down.weight q4_0     [ 11008,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  174:            blk.5.ffn_gate.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor  175:              blk.5.ffn_up.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor  176:            blk.5.ffn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor  177:              blk.5.attn_k.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  178:         blk.5.attn_output.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  179:              blk.5.attn_q.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  180:              blk.5.attn_v.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  181:           blk.6.attn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor  182:            blk.6.ffn_down.weight q4_0     [ 11008,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  183:            blk.6.ffn_gate.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor  184:              blk.6.ffn_up.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor  185:            blk.6.ffn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor  186:              blk.6.attn_k.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  187:         blk.6.attn_output.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  188:              blk.6.attn_q.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  189:              blk.6.attn_v.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  190:           blk.7.attn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor  191:            blk.7.ffn_down.weight q4_0     [ 11008,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  192:            blk.7.ffn_gate.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor  193:              blk.7.ffn_up.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor  194:            blk.7.ffn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor  195:              blk.7.attn_k.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  196:         blk.7.attn_output.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  197:              blk.7.attn_q.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  198:              blk.7.attn_v.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  199:           blk.8.attn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor  200:            blk.8.ffn_down.weight q4_0     [ 11008,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  201:            blk.8.ffn_gate.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor  202:              blk.8.ffn_up.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor  203:            blk.8.ffn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor  204:              blk.8.attn_k.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  205:         blk.8.attn_output.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  206:              blk.8.attn_q.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  207:              blk.8.attn_v.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  208:           blk.9.attn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor  209:            blk.9.ffn_down.weight q4_0     [ 11008,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  210:            blk.9.ffn_gate.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor  211:              blk.9.ffn_up.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor  212:            blk.9.ffn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor  213:              blk.9.attn_k.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  214:         blk.9.attn_output.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  215:              blk.9.attn_q.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  216:              blk.9.attn_v.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  217:                    output.weight q6_K     [  4096, 32000,     1,     1 ]\n",
+      "llama_model_loader: - tensor  218:          blk.24.attn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor  219:           blk.24.ffn_down.weight q4_0     [ 11008,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  220:           blk.24.ffn_gate.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor  221:             blk.24.ffn_up.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor  222:           blk.24.ffn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor  223:             blk.24.attn_k.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  224:        blk.24.attn_output.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  225:             blk.24.attn_q.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  226:             blk.24.attn_v.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  227:          blk.25.attn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor  228:           blk.25.ffn_down.weight q4_0     [ 11008,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  229:           blk.25.ffn_gate.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor  230:             blk.25.ffn_up.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor  231:           blk.25.ffn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor  232:             blk.25.attn_k.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  233:        blk.25.attn_output.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  234:             blk.25.attn_q.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  235:             blk.25.attn_v.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  236:          blk.26.attn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor  237:           blk.26.ffn_down.weight q4_0     [ 11008,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  238:           blk.26.ffn_gate.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor  239:             blk.26.ffn_up.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor  240:           blk.26.ffn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor  241:             blk.26.attn_k.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  242:        blk.26.attn_output.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  243:             blk.26.attn_q.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  244:             blk.26.attn_v.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  245:          blk.27.attn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor  246:           blk.27.ffn_down.weight q4_0     [ 11008,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  247:           blk.27.ffn_gate.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor  248:             blk.27.ffn_up.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor  249:           blk.27.ffn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor  250:             blk.27.attn_k.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  251:        blk.27.attn_output.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  252:             blk.27.attn_q.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  253:             blk.27.attn_v.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  254:          blk.28.attn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor  255:           blk.28.ffn_down.weight q4_0     [ 11008,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  256:           blk.28.ffn_gate.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor  257:             blk.28.ffn_up.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor  258:           blk.28.ffn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor  259:             blk.28.attn_k.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  260:        blk.28.attn_output.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  261:             blk.28.attn_q.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  262:             blk.28.attn_v.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  263:          blk.29.attn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor  264:           blk.29.ffn_down.weight q4_0     [ 11008,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  265:           blk.29.ffn_gate.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor  266:             blk.29.ffn_up.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor  267:           blk.29.ffn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor  268:             blk.29.attn_k.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  269:        blk.29.attn_output.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  270:             blk.29.attn_q.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  271:             blk.29.attn_v.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  272:          blk.30.attn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor  273:           blk.30.ffn_down.weight q4_0     [ 11008,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  274:           blk.30.ffn_gate.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor  275:             blk.30.ffn_up.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor  276:           blk.30.ffn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor  277:             blk.30.attn_k.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  278:        blk.30.attn_output.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  279:             blk.30.attn_q.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  280:             blk.30.attn_v.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  281:          blk.31.attn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor  282:           blk.31.ffn_down.weight q4_0     [ 11008,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  283:           blk.31.ffn_gate.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor  284:             blk.31.ffn_up.weight q4_0     [  4096, 11008,     1,     1 ]\n",
+      "llama_model_loader: - tensor  285:           blk.31.ffn_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - tensor  286:             blk.31.attn_k.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  287:        blk.31.attn_output.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  288:             blk.31.attn_q.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  289:             blk.31.attn_v.weight q4_0     [  4096,  4096,     1,     1 ]\n",
+      "llama_model_loader: - tensor  290:               output_norm.weight f32      [  4096,     1,     1,     1 ]\n",
+      "llama_model_loader: - kv   0:                       general.architecture str     \n",
+      "llama_model_loader: - kv   1:                               general.name str     \n",
+      "llama_model_loader: - kv   2:                       llama.context_length u32     \n",
+      "llama_model_loader: - kv   3:                     llama.embedding_length u32     \n",
+      "llama_model_loader: - kv   4:                          llama.block_count u32     \n",
+      "llama_model_loader: - kv   5:                  llama.feed_forward_length u32     \n",
+      "llama_model_loader: - kv   6:                 llama.rope.dimension_count u32     \n",
+      "llama_model_loader: - kv   7:                 llama.attention.head_count u32     \n",
+      "llama_model_loader: - kv   8:              llama.attention.head_count_kv u32     \n",
+      "llama_model_loader: - kv   9:     llama.attention.layer_norm_rms_epsilon f32     \n",
+      "llama_model_loader: - kv  10:                          general.file_type u32     \n",
+      "llama_model_loader: - kv  11:                       tokenizer.ggml.model str     \n",
+      "llama_model_loader: - kv  12:                      tokenizer.ggml.tokens arr     \n",
+      "llama_model_loader: - kv  13:                      tokenizer.ggml.scores arr     \n",
+      "llama_model_loader: - kv  14:                  tokenizer.ggml.token_type arr     \n",
+      "llama_model_loader: - kv  15:                tokenizer.ggml.bos_token_id u32     \n",
+      "llama_model_loader: - kv  16:                tokenizer.ggml.eos_token_id u32     \n",
+      "llama_model_loader: - kv  17:            tokenizer.ggml.unknown_token_id u32     \n",
+      "llama_model_loader: - kv  18:               general.quantization_version u32     \n",
+      "llama_model_loader: - type  f32:   65 tensors\n",
+      "llama_model_loader: - type q4_0:  225 tensors\n",
+      "llama_model_loader: - type q6_K:    1 tensors\n",
+      "llm_load_vocab: special tokens definition check successful ( 259/32000 ).\n",
+      "llm_load_print_meta: format           = GGUF V2\n",
+      "llm_load_print_meta: arch             = llama\n",
+      "llm_load_print_meta: vocab type       = SPM\n",
+      "llm_load_print_meta: n_vocab          = 32000\n",
+      "llm_load_print_meta: n_merges         = 0\n",
+      "llm_load_print_meta: n_ctx_train      = 4096\n",
+      "llm_load_print_meta: n_embd           = 4096\n",
+      "llm_load_print_meta: n_head           = 32\n",
+      "llm_load_print_meta: n_head_kv        = 32\n",
+      "llm_load_print_meta: n_layer          = 32\n",
+      "llm_load_print_meta: n_rot            = 128\n",
+      "llm_load_print_meta: n_gqa            = 1\n",
+      "llm_load_print_meta: f_norm_eps       = 0.0e+00\n",
+      "llm_load_print_meta: f_norm_rms_eps   = 1.0e-06\n",
+      "llm_load_print_meta: f_clamp_kqv      = 0.0e+00\n",
+      "llm_load_print_meta: f_max_alibi_bias = 0.0e+00\n",
+      "llm_load_print_meta: n_ff             = 11008\n",
+      "llm_load_print_meta: rope scaling     = linear\n",
+      "llm_load_print_meta: freq_base_train  = 10000.0\n",
+      "llm_load_print_meta: freq_scale_train = 1\n",
+      "llm_load_print_meta: n_yarn_orig_ctx  = 4096\n",
+      "llm_load_print_meta: rope_finetuned   = unknown\n",
+      "llm_load_print_meta: model type       = 7B\n",
+      "llm_load_print_meta: model ftype      = mostly Q4_0\n",
+      "llm_load_print_meta: model params     = 6.74 B\n",
+      "llm_load_print_meta: model size       = 3.56 GiB (4.54 BPW) \n",
+      "llm_load_print_meta: general.name   = LLaMA v2\n",
+      "llm_load_print_meta: BOS token = 1 '<s>'\n",
+      "llm_load_print_meta: EOS token = 2 '</s>'\n",
+      "llm_load_print_meta: UNK token = 0 '<unk>'\n",
+      "llm_load_print_meta: LF token  = 13 '<0x0A>'\n",
+      "llm_load_tensors: ggml ctx size =    0.11 MB\n",
+      "llm_load_tensors: mem required  = 3647.97 MB\n",
+      "..................................................................................................\n",
+      "llama_new_context_with_model: n_ctx      = 512\n",
+      "llama_new_context_with_model: freq_base  = 10000.0\n",
+      "llama_new_context_with_model: freq_scale = 1\n",
+      "llama_new_context_with_model: kv self size  =  256.00 MB\n",
+      "llama_build_graph: non-view tensors processed: 740/740\n",
+      "llama_new_context_with_model: compute buffer total size = 2.66 MB\n",
+      "AVX = 1 | AVX2 = 1 | AVX512 = 1 | AVX512_VBMI = 0 | AVX512_VNNI = 1 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | \n"
+     ]
+    }
+   ],
+   "source": [
+    "from os.path import expanduser\n",
+    "\n",
+    "from langchain.llms import LlamaCpp\n",
+    "\n",
+    "model_path = expanduser(\"~/Models/llama-2-7b-chat.Q4_0.gguf\")\n",
+    "\n",
+    "llm = LlamaCpp(\n",
+    "    model_path=model_path,\n",
+    "    streaming=False,\n",
+    ")\n",
+    "model = Llama2Chat(llm=llm)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "50498d96",
+   "metadata": {},
+   "source": [
+    "and used in the same way as in the previous example."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "90782b96",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "memory = ConversationBufferMemory(memory_key=\"chat_history\", return_messages=True)\n",
+    "chain = LLMChain(llm=model, prompt=prompt_template, memory=memory)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "2160b26d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  Of course! Vienna is a beautiful city with a rich history and culture. Here are some of the top tourist attractions you might want to consider visiting:\n",
+      "1. Schönbrunn Palace\n",
+      "2. St. Stephen's Cathedral\n",
+      "3. Hofburg Palace\n",
+      "4. Belvedere Palace\n",
+      "5. Prater Park\n",
+      "6. MuseumsQuartier\n",
+      "7. Ringstrasse\n",
+      "8. Vienna State Opera\n",
+      "9. Kunsthistorisches Museum\n",
+      "10. Imperial Palace\n",
+      "\n",
+      "These are just a few of the many amazing places to see in Vienna. Each one has its own unique history and charm, so I hope you enjoy exploring this beautiful city!\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "llama_print_timings:        load time =     250.46 ms\n",
+      "llama_print_timings:      sample time =      56.40 ms /   144 runs   (    0.39 ms per token,  2553.37 tokens per second)\n",
+      "llama_print_timings: prompt eval time =    1444.25 ms /    47 tokens (   30.73 ms per token,    32.54 tokens per second)\n",
+      "llama_print_timings:        eval time =    8832.02 ms /   143 runs   (   61.76 ms per token,    16.19 tokens per second)\n",
+      "llama_print_timings:       total time =   10645.94 ms\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\n",
+    "    chain.run(\n",
+    "        text=\"What can I see in Vienna? Propose a few locations. Names only, no details.\"\n",
+    "    )\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "d9ce06e3",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Llama.generate: prefix-match hit\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  Of course! St. Stephen's Cathedral (also known as Stephansdom) is a stunning Gothic-style cathedral located in the heart of Vienna, Austria. It is one of the most recognizable landmarks in the city and is considered a symbol of Vienna.\n",
+      "Here are some interesting facts about St. Stephen's Cathedral:\n",
+      "1. History: The construction of St. Stephen's Cathedral began in the 12th century on the site of a former Romanesque church, and it took over 600 years to complete. The cathedral has been renovated and expanded several times throughout its history, with the most significant renovation taking place in the 19th century.\n",
+      "2. Architecture: St. Stephen's Cathedral is built in the Gothic style, characterized by its tall spires, pointed arches, and intricate stone carvings. The cathedral features a mix of Romanesque, Gothic, and Baroque elements, making it a unique blend of styles.\n",
+      "3. Design: The cathedral's design is based on the plan of a cross with a long nave and two shorter arms extending from it. The main altar is\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "llama_print_timings:        load time =     250.46 ms\n",
+      "llama_print_timings:      sample time =     100.60 ms /   256 runs   (    0.39 ms per token,  2544.73 tokens per second)\n",
+      "llama_print_timings: prompt eval time =    5128.71 ms /   160 tokens (   32.05 ms per token,    31.20 tokens per second)\n",
+      "llama_print_timings:        eval time =   16193.02 ms /   255 runs   (   63.50 ms per token,    15.75 tokens per second)\n",
+      "llama_print_timings:       total time =   21988.57 ms\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(chain.run(text=\"Tell me more about #2.\"))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.18"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/docs/docs/integrations/document_loaders/example_data/notebook.md
+++ b/docs/docs/integrations/document_loaders/example_data/notebook.md
@@ -1,29 +0,0 @@
-# Notebook
-
-This notebook covers how to load data from an .ipynb notebook into a format suitable by LangChain.
-
-
-
-
-```python
-from langchain.document_loaders import NotebookLoader
-```
-
-
-```python
-loader = NotebookLoader("example_data/notebook.ipynb")
-```
-
-`NotebookLoader.load()` loads the `.ipynb` notebook file into a `Document` object.
-
-**Parameters**:
-
-* `include_outputs` (bool): whether to include cell outputs in the resulting document (default is False).
-* `max_output_length` (int): the maximum number of characters to include from each cell output (default is 10).
-* `remove_newline` (bool): whether to remove newline characters from the cell sources and outputs (default is False).
-* `traceback` (bool): whether to include full traceback (default is False).
-
-
-```python
-loader.load(include_outputs=True, max_output_length=20, remove_newline=True)
-```
--- a/docs/docs/integrations/document_loaders/youtube_transcript.ipynb
+++ b/docs/docs/integrations/document_loaders/youtube_transcript.ipynb
@@ -99,7 +99,7 @@
    "\n",
    "Language param : It's a list of language codes in a descending priority, `en` by default.\n",
    "\n",
-    "translation param : It's a translate preference when the youtube does'nt have your select language, `en` by default."
+    "translation param : It's a translate preference, you can translate available transcript to your preferred language."
   ]
  },
  {
--- a/docs/docs/integrations/llms/huggingface_hub.ipynb
+++ b/docs/docs/integrations/llms/huggingface_hub.ipynb
@@ -101,8 +101,8 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "from langchain.prompts import PromptTemplate\n",
-    "from langchain.chains import LLMChain"
+    "from langchain.chains import LLMChain\n",
+    "from langchain.prompts import PromptTemplate"
   ]
  },
  {
--- a/docs/docs/integrations/memory/elasticsearch_chat_message_history.ipynb
+++ b/docs/docs/integrations/memory/elasticsearch_chat_message_history.ipynb
@@ -7,11 +7,11 @@
    "id": "683953b3"
   },
   "source": [
-    "# Elasticsearch Chat Message History\n",
+    "# Elasticsearch\n",
    "\n",
    ">[Elasticsearch](https://www.elastic.co/elasticsearch/) is a distributed, RESTful search and analytics engine, capable of performing both vector and lexical search. It is built on top of the Apache Lucene library.\n",
    "\n",
-    "This notebook shows how to use chat message history functionality with Elasticsearch."
+    "This notebook shows how to use chat message history functionality with `Elasticsearch`."
   ]
  },
  {
@@ -46,6 +46,59 @@
    "%pip install elasticsearch langchain"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "c46c216c",
+   "metadata": {},
+   "source": [
+    "## Authentication\n",
+    "\n",
+    "### How to obtain a password for the default \"elastic\" user\n",
+    "\n",
+    "To obtain your Elastic Cloud password for the default \"elastic\" user:\n",
+    "1. Log in to the [Elastic Cloud console](https://cloud.elastic.co)\n",
+    "2. Go to \"Security\" > \"Users\"\n",
+    "3. Locate the \"elastic\" user and click \"Edit\"\n",
+    "4. Click \"Reset password\"\n",
+    "5. Follow the prompts to reset the password\n",
+    "\n",
+    "\n",
+    "### Use the Username/password\n",
+    "\n",
+    "```python\n",
+    "es_username = os.environ.get(\"ES_USERNAME\", \"elastic\")\n",
+    "es_password = os.environ.get(\"ES_PASSWORD\", \"change me...\")\n",
+    "\n",
+    "history = ElasticsearchChatMessageHistory(\n",
+    "    es_url=es_url,\n",
+    "    es_user=es_username,\n",
+    "    es_password=es_password,\n",
+    "    index=\"test-history\",\n",
+    "    session_id=\"test-session\"\n",
+    ")\n",
+    "```\n",
+    "\n",
+    "### How to obtain an API key\n",
+    "\n",
+    "To obtain an API key:\n",
+    "1. Log in to the [Elastic Cloud console](https://cloud.elastic.co)\n",
+    "2. Open `Kibana` and go to Stack Management > API Keys\n",
+    "3. Click \"Create API key\"\n",
+    "4. Enter a name for the API key and click \"Create\"\n",
+    "\n",
+    "### Use the API key\n",
+    "\n",
+    "```python\n",
+    "es_api_key = os.environ.get(\"ES_API_KEY\")\n",
+    "\n",
+    "history = ElasticsearchChatMessageHistory(\n",
+    "    es_api_key=es_api_key,\n",
+    "    index=\"test-history\",\n",
+    "    session_id=\"test-session\"\n",
+    ")\n",
+    "```\n"
+   ]
+  },
  {
   "cell_type": "markdown",
   "id": "8be8fcc3",
@@ -104,58 +157,6 @@
    "history.add_user_message(\"hi!\")\n",
    "history.add_ai_message(\"whats up?\")"
   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "c46c216c",
-   "metadata": {},
-   "source": [
-    "# Authentication\n",
-    "\n",
-    "## Username/password\n",
-    "\n",
-    "```python\n",
-    "es_username = os.environ.get(\"ES_USERNAME\", \"elastic\")\n",
-    "es_password = os.environ.get(\"ES_PASSWORD\", \"changeme\")\n",
-    "\n",
-    "history = ElasticsearchChatMessageHistory(\n",
-    "    es_url=es_url,\n",
-    "    es_user=es_username,\n",
-    "    es_password=es_password,\n",
-    "    index=\"test-history\",\n",
-    "    session_id=\"test-session\"\n",
-    ")\n",
-    "```\n",
-    "\n",
-    "### How to obtain a password for the default \"elastic\" user\n",
-    "\n",
-    "To obtain your Elastic Cloud password for the default \"elastic\" user:\n",
-    "1. Log in to the Elastic Cloud console at https://cloud.elastic.co\n",
-    "2. Go to \"Security\" > \"Users\"\n",
-    "3. Locate the \"elastic\" user and click \"Edit\"\n",
-    "4. Click \"Reset password\"\n",
-    "5. Follow the prompts to reset the password\n",
-    "\n",
-    "## API key\n",
-    "\n",
-    "```python\n",
-    "es_api_key = os.environ.get(\"ES_API_KEY\")\n",
-    "\n",
-    "history = ElasticsearchChatMessageHistory(\n",
-    "    es_api_key=es_api_key,\n",
-    "    index=\"test-history\",\n",
-    "    session_id=\"test-session\"\n",
-    ")\n",
-    "```\n",
-    "\n",
-    "### How to obtain an API key\n",
-    "\n",
-    "To obtain an API key:\n",
-    "1. Log in to the Elastic Cloud console at https://cloud.elastic.co\n",
-    "2. Open Kibana and go to Stack Management > API Keys\n",
-    "3. Click \"Create API key\"\n",
-    "4. Enter a name for the API key and click \"Create\""
-   ]
  }
 ],
 "metadata": {
@@ -177,7 +178,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.9"
+   "version": "3.10.12"
  }
 },
 "nbformat": 4,
--- a/docs/docs/integrations/memory/mongodb_chat_message_history.ipynb
+++ b/docs/docs/integrations/memory/mongodb_chat_message_history.ipynb
@@ -5,7 +5,7 @@
   "id": "91c6a7ef",
   "metadata": {},
   "source": [
-    "# MongodDB\n",
+    "# MongoDB\n",
    "\n",
    ">`MongoDB` is a source-available cross-platform document-oriented database program. Classified as a NoSQL database program, `MongoDB` uses `JSON`-like documents with optional schemas.\n",
    ">\n",
--- a/docs/docs/integrations/memory/upstash_redis_chat_message_history.ipynb
+++ b/docs/docs/integrations/memory/upstash_redis_chat_message_history.ipynb
@@ -4,9 +4,11 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "# Upstash Redis Chat Message History\n",
+    "# Upstash Redis\n",
    "\n",
-    "This notebook goes over how to use Upstash Redis to store chat message history."
+    ">[Upstash](https://upstash.com/docs/introduction) is a provider of the serverless `Redis`, `Kafka`, and `QStash` APIs.\n",
+    "\n",
+    "This notebook goes over how to use `Upstash Redis` to store chat message history."
   ]
  },
  {
@@ -42,7 +44,7 @@
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": ".venv",
+   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
@@ -56,10 +58,9 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.3"
-  },
-  "orig_nbformat": 4
+   "version": "3.10.12"
+  }
 },
 "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
--- a/docs/docs/integrations/providers/langchain_decorators.mdx
+++ b/docs/docs/integrations/providers/langchain_decorators.mdx
@@ -1,10 +1,13 @@
 # LangChain Decorators ✨

-lanchchain decorators is a layer on the top of LangChain that provides syntactic sugar 🍭 for writing custom langchain prompts and chains
-
-For Feedback, Issues, Contributions - please raise an issue here: 
-[ju-bezdek/langchain-decorators](https://github.com/ju-bezdek/langchain-decorators)
+~~~
+Disclaimer: `LangChain decorators` is not created by the LangChain team and is not supported by it.
+~~~

+>`LangChain decorators` is a layer on the top of LangChain that provides syntactic sugar 🍭 for writing custom langchain prompts and chains
+>
+>For Feedback, Issues, Contributions - please raise an issue here: 
+>[ju-bezdek/langchain-decorators](https://github.com/ju-bezdek/langchain-decorators)


 Main principles and benefits:
@@ -17,7 +20,6 @@ Main principles and benefits:
 - easily share parameters between the prompts by binding them to one class


-
 Here is a simple example of a code written with **LangChain Decorators ✨**

 ``` python
--- a/docs/docs/integrations/retrievers/Activeloop
+++ b/docs/docs/integrations/retrievers/Activeloop
@@ -4,14 +4,16 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "# Activeloop DeepLake's DeepMemory + LangChain + ragas or how to get +27% on RAG recall."
+    "# Activeloop Deep Memory"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "Retrieval-Augmented Generators (RAGs) have recently gained significant attention. As advanced RAG techniques and agents emerge, they expand the potential of what RAGs can accomplish. However, several challenges may limit the integration of RAGs into production. The primary factors to consider when implementing RAGs in production settings are accuracy (recall), cost, and latency. For basic use cases, OpenAI's Ada model paired with a naive similarity search can produce satisfactory results. Yet, for higher accuracy or recall during searches, one might need to employ advanced retrieval techniques. These methods might involve varying data chunk sizes, rewriting queries multiple times, and more, potentially increasing latency and costs.  [Activeloop's](https://activeloop.ai/) [Deep Memory](https://www.activeloop.ai/resources/use-deep-memory-to-boost-rag-apps-accuracy-by-up-to-22/) a feature available to Activeloop Deep Lake users, addresses these issuea by introducing a tiny neural network layer trained to match user queries with relevant data from a corpus. While this addition incurs minimal latency during search, it can boost retrieval accuracy by up to 27\n",
+    ">[Activeloop Deep Memory](https://docs.activeloop.ai/performance-features/deep-memory) is a suite of tools that enables you to optimize your Vector Store for your use-case and achieve higher accuracy in your LLM apps.\n",
+    "\n",
+    "`Retrieval-Augmented Generatation` (`RAG`) has recently gained significant attention. As advanced RAG techniques and agents emerge, they expand the potential of what RAGs can accomplish. However, several challenges may limit the integration of RAGs into production. The primary factors to consider when implementing RAGs in production settings are accuracy (recall), cost, and latency. For basic use cases, OpenAI's Ada model paired with a naive similarity search can produce satisfactory results. Yet, for higher accuracy or recall during searches, one might need to employ advanced retrieval techniques. These methods might involve varying data chunk sizes, rewriting queries multiple times, and more, potentially increasing latency and costs.  [Activeloop's](https://activeloop.ai/) [Deep Memory](https://www.activeloop.ai/resources/use-deep-memory-to-boost-rag-apps-accuracy-by-up-to-22/) a feature available to `Activeloop Deep Lake` users, addresses these issuea by introducing a tiny neural network layer trained to match user queries with relevant data from a corpus. While this addition incurs minimal latency during search, it can boost retrieval accuracy by up to 27\n",
    "% and remains cost-effective and simple to use, without requiring any additional advanced rag techniques.\n"
   ]
  },
@@ -19,23 +21,13 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "For this tutorial we will parse deeplake documentation, and create a RAG system that could answer the question from the docs. \n",
-    "\n",
-    "The tutorial can be divided into several parts:\n",
-    "1. [Dataset creation and uploading](#1-dataset-creation)\n",
-    "2. [Generating synthetic queries and training deep_memory](#2-generating-synthetic-queries-and-training-deep_memory)\n",
-    "3. [Evaluating deep memory performance](#3-evaluating-deep-memory-performance)\n",
-    "    - 3.1 [using deepmemory recall@10 metric](#31-using-deepmemory-recall10-metric)\n",
-    "    - 3.2 [using ragas](#32-deepmemory--ragas)\n",
-    "    - 3.3 [deep_memory inference](#33-deepmemory-inference)\n",
-    "    - 3.4 [deep_memory cost savings](#34-cost-savings)"
+    "For this tutorial we will parse `DeepLake` documentation, and create a RAG system that could answer the question from the docs. \n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "<a name=\"dataset-creation\"></a>\n",
    "## 1. Dataset Creation"
   ]
  },
@@ -227,10 +219,11 @@
  },
  {
   "cell_type": "markdown",
-   "metadata": {},
+   "metadata": {
+    "jp-MarkdownHeadingCollapsed": true
+   },
   "source": [
-    "<a name=\"training\"></a>\n",
-    "## 2. Generating synthetic queries and training deep_memory "
+    "## 2. Generating synthetic queries and training Deep Memory "
   ]
  },
  {
@@ -422,8 +415,7 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "<a name=\"evaluation\"></a>\n",
-    "## 3. Evaluating deep memory performance"
+    "## 3. Evaluating Deep Memory performance"
   ]
  },
  {
@@ -437,15 +429,16 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "<a name=\"recall@10\"></a>\n",
-    "### 3.1 using deepmemory recall@10 metric"
+    "### 3.1 Deep Memory evaluation"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "For the beginning we can use deep_memory's builtin evaluation method. it can be done easily in a few lines of code:"
+    "For the beginning we can use deep_memory's builtin evaluation method. \n",
+    "It calculates several `recall` metrics.\n",
+    "It can be done easily in a few lines of code."
   ]
  },
  {
@@ -495,8 +488,7 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "<a name=\"ragas\"></a>\n",
-    "### 3.2 DeepMemory + ragas"
+    "### 3.2 Deep Memory + RAGas"
   ]
  },
  {
@@ -596,10 +588,11 @@
  },
  {
   "cell_type": "markdown",
-   "metadata": {},
+   "metadata": {
+    "jp-MarkdownHeadingCollapsed": true
+   },
   "source": [
-    "<a name=\"inference\"></a>\n",
-    "### 3.3 DeepMemory Inference"
+    "### 3.3 Deep Memory Inference"
   ]
  },
  {
@@ -677,8 +670,7 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "<a name=\"cost\"></a>\n",
-    "### 3.4 Cost savings"
+    "### 3.4 Deep Memory cost savings"
   ]
  },
  {
@@ -691,7 +683,7 @@
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
@@ -705,10 +697,9 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.4"
-  },
-  "orig_nbformat": 4
+   "version": "3.10.12"
+  }
 },
 "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
--- a/docs/docs/integrations/retrievers/embedchain.ipynb
+++ b/docs/docs/integrations/retrievers/embedchain.ipynb
@@ -0,0 +1,255 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "2f0f85ac-9c49-4111-a320-e53bccc99b13",
+   "metadata": {},
+   "source": [
+    "# Embedchain\n",
+    "\n",
+    "Embedchain is a RAG framework to create data pipelines. It loads, indexes, retrieves and syncs all the data.\n",
+    "\n",
+    "It is available as an [open source package](https://github.com/embedchain/embedchain) and as a [hosted platform solution](https://app.embedchain.ai/).\n",
+    "\n",
+    "This notebook shows how to use a retriever that uses Embedchain."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e48de822-307b-4284-96e7-c91f11ce005b",
+   "metadata": {},
+   "source": [
+    "# Installation\n",
+    "\n",
+    "First you will need to install the [`embedchain` package](https://pypi.org/project/embedchain/). \n",
+    "\n",
+    "You can install the package by running "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "c690a78c-5999-4072-b4e1-2712ff73f950",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#!pip install --upgrade embedchain"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bc89ba12-6ebd-4cd6-8c85-7410531579ff",
+   "metadata": {},
+   "source": [
+    "# Create New Retriever\n",
+    "\n",
+    "`EmbedchainRetriever` has a static `.create()` factory method that takes the following arguments:\n",
+    "\n",
+    "* `yaml_path: string` optional -- Path to the YAML configuration file. If not provided, a default configuration is used. You can browse the [docs](https://docs.embedchain.ai/) to explore various customization options."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "8e639bd4-2e60-487b-b7aa-f7e6b921b069",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdin",
+     "output_type": "stream",
+     "text": [
+      " ········\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Setup API Key\n",
+    "\n",
+    "import os\n",
+    "from getpass import getpass\n",
+    "\n",
+    "os.environ[\"OPENAI_API_KEY\"] = getpass()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "223fbc76-91ad-4504-87e9-980fb0e027fc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.retrievers import EmbedchainRetriever\n",
+    "\n",
+    "# create retriever with default options\n",
+    "retriever = EmbedchainRetriever.create()\n",
+    "\n",
+    "# or if you want to customize, pass the yaml config path\n",
+    "# retriever = EmbedchainRetiever.create(yaml_path=\"config.yaml\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "536f3a1d-3491-45b5-9f25-869bd6fb6d6a",
+   "metadata": {},
+   "source": [
+    "# Add Data\n",
+    "\n",
+    "In embedchain, you can as many supported data types as possible. You can browse our [docs](https://docs.embedchain.ai/) to see the data types supported.\n",
+    "\n",
+    "Embedchain automatically deduces the types of the data. So you can add a string, URL or local file path."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "31262be3-7d0d-42e8-9253-052160576dc7",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Inserting batches in chromadb: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:08<00:00,  2.22s/it]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Successfully saved https://en.wikipedia.org/wiki/Elon_Musk (DataType.WEB_PAGE). New chunks count: 378\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Inserting batches in chromadb: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.17s/it]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Successfully saved https://www.forbes.com/profile/elon-musk (DataType.WEB_PAGE). New chunks count: 13\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Inserting batches in chromadb: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.25s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Successfully saved https://www.youtube.com/watch?v=RcYjXbSJBN8 (DataType.YOUTUBE_VIDEO). New chunks count: 53\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "['1eab8dd1ffa92906f7fc839862871ca5',\n",
+       " '8cf46026cabf9b05394a2658bd1fe890',\n",
+       " 'da3227cdbcedb018e05c47b774d625f6']"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "retriever.add_texts(\n",
+    "    [\n",
+    "        \"https://en.wikipedia.org/wiki/Elon_Musk\",\n",
+    "        \"https://www.forbes.com/profile/elon-musk\",\n",
+    "        \"https://www.youtube.com/watch?v=RcYjXbSJBN8\",\n",
+    "    ]\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e1f34a62-7f8e-4c03-8e10-c317ed3296aa",
+   "metadata": {},
+   "source": [
+    "# Use Retriever\n",
+    "\n",
+    "You can now use the retrieve to find relevant documents given a query"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "6106baf9-652a-4a94-b2d7-d6a5d2917975",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "result = retriever.get_relevant_documents(\n",
+    "    \"How many companies does Elon Musk run and name those?\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "1deae5d0-e0fa-431d-b164-e9680ef3e69b",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[Document(page_content='Views Filmography Companies Zip2 X.com PayPal SpaceX Starlink Tesla, Inc. Energycriticismlitigation OpenAI Neuralink The Boring Company Thud X Corp. Twitteracquisitiontenure as CEO xAI In popular culture Elon Musk (Isaacson) Elon Musk (Vance) Ludicrous Power Play \"Members Only\" \"The Platonic Permutation\" \"The Musk Who Fell to Earth\" \"One Crew over the Crewcoo\\'s Morty\" Elon Musk\\'s Crash Course Related Boring Test Tunnel Hyperloop Musk family Musk vs. Zuckerberg SolarCity Tesla Roadster in space', metadata={'source': 'https://en.wikipedia.org/wiki/Elon_Musk', 'document_id': 'c33c05d0-5028-498b-b5e3-c43a4f9e8bf8--3342161a0fbc19e91f6bf387204aa30fbb2cea05abc81882502476bde37b9392'}),\n",
+       " Document(page_content='Elon Musk PROFILEElon MuskCEO, Tesla$241.2B$508M (0.21%)Real Time Net Worthas of 11/18/23Reflects change since 5 pm ET of prior trading day. 1 in the world todayPhoto by Martin Schoeller for ForbesAbout Elon MuskElon Musk cofounded six companies, including electric car maker Tesla, rocket producer SpaceX and tunneling startup Boring Company.He owns about 21% of Tesla between stock and options, but has pledged more than half his shares as collateral for personal loans of up to $3.5', metadata={'source': 'https://www.forbes.com/profile/elon-musk', 'document_id': 'c33c05d0-5028-498b-b5e3-c43a4f9e8bf8--3c8573134c575fafc025e9211413723e1f7a725b5936e8ee297fb7fb63bdd01a'}),\n",
+       " Document(page_content='to form PayPal. In October 2002, eBay acquired PayPal for $1.5 billion, and that same year, with $100 million of the money he made, Musk founded SpaceX, a spaceflight services company. In 2004, he became an early investor in electric vehicle manufacturer Tesla Motors, Inc. (now Tesla, Inc.). He became its chairman and product architect, assuming the position of CEO in 2008. In 2006, Musk helped create SolarCity, a solar-energy company that was acquired by Tesla in 2016 and became Tesla Energy.', metadata={'source': 'https://en.wikipedia.org/wiki/Elon_Musk', 'document_id': 'c33c05d0-5028-498b-b5e3-c43a4f9e8bf8--3342161a0fbc19e91f6bf387204aa30fbb2cea05abc81882502476bde37b9392'})]"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "result"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b3f26c2b-048d-4588-90a0-50f5c9c35837",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/docs/docs/integrations/text_embedding/ernie.ipynb
+++ b/docs/docs/integrations/text_embedding/ernie.ipynb
@@ -4,9 +4,9 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "# ERNIE Embedding-V1\n",
+    "# ERNIE\n",
    "\n",
-    "[ERNIE Embedding-V1](https://cloud.baidu.com/doc/WENXINWORKSHOP/s/alj562vvu) is a text representation model based on Baidu Wenxin's large-scale model technology, \n",
+    "[ERNIE Embedding-V1](https://cloud.baidu.com/doc/WENXINWORKSHOP/s/alj562vvu) is a text representation model based on `Baidu Wenxin` large-scale model technology, \n",
    "which converts text into a vector form represented by numerical values, and is used in text retrieval, information recommendation, knowledge mining and other scenarios."
   ]
  },
@@ -53,8 +53,19 @@
   "language": "python",
   "name": "python3"
  },
-  "orig_nbformat": 4
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
 },
 "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
--- a/docs/docs/integrations/text_embedding/fastembed.ipynb
+++ b/docs/docs/integrations/text_embedding/fastembed.ipynb
@@ -5,14 +5,14 @@
   "id": "900fbd04-f6aa-4813-868f-1c54e3265385",
   "metadata": {},
   "source": [
-    "# Qdrant FastEmbed\n",
+    "# FastEmbed by Qdrant\n",
    "\n",
-    "[FastEmbed](https://qdrant.github.io/fastembed/) is a lightweight, fast, Python library built for embedding generation. \n",
-    "\n",
-    "- Quantized model weights\n",
-    "- ONNX Runtime, no PyTorch dependency\n",
-    "- CPU-first design\n",
-    "- Data-parallelism for encoding of large datasets."
+    ">[FastEmbed](https://qdrant.github.io/fastembed/) from [Qdrant](https://qdrant.tech) is a lightweight, fast, Python library built for embedding generation. \n",
+    ">\n",
+    ">- Quantized model weights\n",
+    ">- ONNX Runtime, no PyTorch dependency\n",
+    ">- CPU-first design\n",
+    ">- Data-parallelism for encoding of large datasets."
   ]
  },
  {
@@ -154,7 +154,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.6"
+   "version": "3.10.12"
  }
 },
 "nbformat": 4,
--- a/docs/docs/integrations/text_embedding/instruct_embeddings.ipynb
+++ b/docs/docs/integrations/text_embedding/instruct_embeddings.ipynb
@@ -5,8 +5,10 @@
   "id": "59428e05",
   "metadata": {},
   "source": [
-    "# InstructEmbeddings\n",
-    "Let's load the HuggingFace instruct Embeddings class."
+    "# Instruct Embeddings on Hugging Face\n",
+    "\n",
+    ">[Hugging Face sentence-transformers](https://huggingface.co/sentence-transformers) is a Python framework for state-of-the-art sentence, text and image embeddings.\n",
+    ">One of the instruct embedding models is used in the `HuggingFaceInstructEmbeddings` class.\n"
   ]
  },
  {
@@ -85,7 +87,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.1"
+   "version": "3.10.12"
  },
  "vscode": {
   "interpreter": {
--- a/docs/docs/integrations/text_embedding/johnsnowlabs_embedding.ipynb
+++ b/docs/docs/integrations/text_embedding/johnsnowlabs_embedding.ipynb
@@ -2,183 +2,207 @@
 "cells": [
  {
   "cell_type": "markdown",
-   "source": [
-    "# Johnsnowlabs Embedding\n",
-    "\n",
-    "### Loading the Johnsnowlabs embedding class to generate and query embeddings\n",
-    "\n",
-    "Models are loaded with [nlp.load](https://nlp.johnsnowlabs.com/docs/en/jsl/load_api) and spark session is started with [nlp.start()](https://nlp.johnsnowlabs.com/docs/en/jsl/start-a-sparksession) under the hood.\n",
-    "For all 24.000+ models, see the [John Snow Labs Model Models Hub](https://nlp.johnsnowlabs.com/models)\n"
-   ],
   "metadata": {
-    "collapsed": false
-   }
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
+   },
+   "source": [
+    "# John Snow Labs\n",
+    "\n",
+    ">[John Snow Labs](https://nlp.johnsnowlabs.com/) NLP & LLM ecosystem includes software libraries for state-of-the-art AI at scale, Responsible AI, No-Code AI, and access to over 20,000 models for Healthcare, Legal, Finance, etc.\n",
+    ">\n",
+    ">Models are loaded with [nlp.load](https://nlp.johnsnowlabs.com/docs/en/jsl/load_api) and spark session is started >with [nlp.start()](https://nlp.johnsnowlabs.com/docs/en/jsl/start-a-sparksession) under the hood.\n",
+    ">For all 24.000+ models, see the [John Snow Labs Model Models Hub](https://nlp.johnsnowlabs.com/models)\n"
+   ]
  },
  {
   "cell_type": "markdown",
+   "metadata": {},
   "source": [
-    "! pip install johnsnowlabs\n"
-   ],
-   "metadata": {
-    "collapsed": false
-   }
+    "## Setting up"
+   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
+   "metadata": {
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
+   },
+   "outputs": [],
+   "source": [
+    "! pip install johnsnowlabs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
+   },
   "outputs": [],
   "source": [
    "# If you have a enterprise license, you can run this to install enterprise features\n",
    "# from johnsnowlabs import nlp\n",
    "# nlp.install()"
-   ],
-   "metadata": {
-    "collapsed": false
-   }
-  },
-  {
-   "cell_type": "code",
-   "source": [
-    "#### Import the necessary classes"
-   ],
-   "metadata": {
-    "collapsed": false
-   },
-   "execution_count": 1,
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Found existing installation: langchain 0.0.189\n",
-      "Uninstalling langchain-0.0.189:\n",
-      "  Successfully uninstalled langchain-0.0.189\n"
-     ]
-    }
   ]
  },
  {
   "cell_type": "markdown",
-   "source": [],
-   "metadata": {
-    "collapsed": false
-   }
+   "metadata": {},
+   "source": [
+    "## Example"
+   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
+   "metadata": {
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
+   },
   "outputs": [],
   "source": [
    "from langchain.embeddings.johnsnowlabs import JohnSnowLabsEmbeddings"
-   ],
-   "metadata": {
-    "collapsed": false
-   }
+   ]
  },
  {
   "cell_type": "markdown",
-   "source": [
-    "#### Initialize Johnsnowlabs Embeddings and Spark Session"
-   ],
   "metadata": {
-    "collapsed": false
-   }
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
+   },
+   "source": [
+    "Initialize Johnsnowlabs Embeddings and Spark Session"
+   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
+   "metadata": {
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
+   },
   "outputs": [],
   "source": [
    "embedder = JohnSnowLabsEmbeddings(\"en.embed_sentence.biobert.clinical_base_cased\")"
-   ],
-   "metadata": {
-    "collapsed": false
-   }
+   ]
  },
  {
   "cell_type": "markdown",
-   "source": [
-    "#### Define some example texts . These could be any documents that you want to analyze - for example, news articles, social media posts, or product reviews."
-   ],
   "metadata": {
-    "collapsed": false
-   }
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
+   },
+   "source": [
+    "Define some example texts . These could be any documents that you want to analyze - for example, news articles, social media posts, or product reviews."
+   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
+   "metadata": {
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
+   },
   "outputs": [],
   "source": [
    "texts = [\"Cancer is caused by smoking\", \"Antibiotics aren't painkiller\"]"
-   ],
-   "metadata": {
-    "collapsed": false
-   }
+   ]
  },
  {
   "cell_type": "markdown",
-   "source": [
-    "#### Generate and print embeddings for the texts . The JohnSnowLabsEmbeddings class generates an embedding for each document, which is a numerical representation of the document's content. These embeddings can be used for various natural language processing tasks, such as document similarity comparison or text classification."
-   ],
   "metadata": {
-    "collapsed": false
-   }
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
+   },
+   "source": [
+    "Generate and print embeddings for the texts . The JohnSnowLabsEmbeddings class generates an embedding for each document, which is a numerical representation of the document's content. These embeddings can be used for various natural language processing tasks, such as document similarity comparison or text classification."
+   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
+   "metadata": {
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
+   },
   "outputs": [],
   "source": [
    "embeddings = embedder.embed_documents(texts)\n",
    "for i, embedding in enumerate(embeddings):\n",
    "    print(f\"Embedding for document {i+1}: {embedding}\")"
-   ],
-   "metadata": {
-    "collapsed": false
-   }
+   ]
  },
  {
   "cell_type": "markdown",
-   "source": [
-    "#### Generate and print an embedding for a single piece of text. You can also generate an embedding for a single piece of text, such as a search query. This can be useful for tasks like information retrieval, where you want to find documents that are similar to a given query."
-   ],
   "metadata": {
-    "collapsed": false
-   }
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
+   },
+   "source": [
+    "Generate and print an embedding for a single piece of text. You can also generate an embedding for a single piece of text, such as a search query. This can be useful for tasks like information retrieval, where you want to find documents that are similar to a given query."
+   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
+   "metadata": {
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
+   },
   "outputs": [],
   "source": [
    "query = \"Cancer is caused by smoking\"\n",
    "query_embedding = embedder.embed_query(query)\n",
    "print(f\"Embedding for query: {query_embedding}\")"
-   ],
-   "metadata": {
-    "collapsed": false
-   }
+   ]
  }
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
-    "version": 2
+    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.6"
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
  }
 },
 "nbformat": 4,
- "nbformat_minor": 0
+ "nbformat_minor": 4
 }
--- a/docs/docs/integrations/text_embedding/sentence_transformers.ipynb
+++ b/docs/docs/integrations/text_embedding/sentence_transformers.ipynb
@@ -5,11 +5,13 @@
   "id": "ed47bb62",
   "metadata": {},
   "source": [
-    "# Sentence Transformers\n",
+    "# Sentence Transformers on Hugging Face\n",
    "\n",
-    ">[SentenceTransformers](https://www.sbert.net/) embeddings are called using the `HuggingFaceEmbeddings` integration. We have also added an alias for `SentenceTransformerEmbeddings` for users who are more familiar with directly using that package.\n",
+    ">[Hugging Face sentence-transformers](https://huggingface.co/sentence-transformers) is a Python framework for state-of-the-art sentence, text and image embeddings.\n",
+    ">One of the embedding models is used in the `HuggingFaceEmbeddings` class.\n",
+    ">We have also added an alias for `SentenceTransformerEmbeddings` for users who are more familiar with directly using that package.\n",
    "\n",
-    "`SentenceTransformers` is a python package that can generate text and image embeddings, originating from [Sentence-BERT](https://arxiv.org/abs/1908.10084)"
+    "`sentence_transformers` package models are originating from [Sentence-BERT](https://arxiv.org/abs/1908.10084)"
   ]
  },
  {
--- a/docs/docs/integrations/text_embedding/tensorflowhub.ipynb
+++ b/docs/docs/integrations/text_embedding/tensorflowhub.ipynb
@@ -5,7 +5,11 @@
   "id": "fff4734f",
   "metadata": {},
   "source": [
-    "# TensorflowHub\n",
+    "# TensorFlow Hub\n",
+    "\n",
+    ">[TensorFlow Hub](https://www.tensorflow.org/hub) is a repository of trained machine learning models ready for fine-tuning and deployable anywhere. Reuse trained models like `BERT` and `Faster R-CNN` with just a few lines of code.\n",
+    ">\n",
+    ">\n",
    "Let's load the TensorflowHub Embedding class."
   ]
  },
@@ -105,7 +109,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.1"
+   "version": "3.10.12"
  },
  "vscode": {
   "interpreter": {
--- a/docs/docs/integrations/text_embedding/voyageai.ipynb
+++ b/docs/docs/integrations/text_embedding/voyageai.ipynb
@@ -7,6 +7,8 @@
   "source": [
    "# Voyage AI\n",
    "\n",
+    ">[Voyage AI](https://www.voyageai.com/) provides cutting-edge embedding/vectorizations models.\n",
+    "\n",
    "Let's load the Voyage Embedding class."
   ]
  },
@@ -215,7 +217,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.18"
+   "version": "3.10.12"
  },
  "vscode": {
   "interpreter": {
--- a/docs/docs/integrations/toolkits/azure_cognitive_services.ipynb
+++ b/docs/docs/integrations/toolkits/azure_cognitive_services.ipynb
@@ -12,7 +12,8 @@
    "- AzureCogsImageAnalysisTool: used to extract caption, objects, tags, and text from images. (Note: this tool is not available on Mac OS yet, due to the dependency on `azure-ai-vision` package, which is only supported on Windows and Linux currently.)\n",
    "- AzureCogsFormRecognizerTool: used to extract text, tables, and key-value pairs from documents.\n",
    "- AzureCogsSpeech2TextTool: used to transcribe speech to text.\n",
-    "- AzureCogsText2SpeechTool: used to synthesize text to speech."
+    "- AzureCogsText2SpeechTool: used to synthesize text to speech.\n",
+    "- AzureCogsTextAnalyticsHealthTool: used to extract healthcare entities."
   ]
  },
  {
@@ -32,6 +33,7 @@
   "source": [
    "# !pip install --upgrade azure-ai-formrecognizer > /dev/null\n",
    "# !pip install --upgrade azure-cognitiveservices-speech > /dev/null\n",
+    "# !pip install --upgrade azure-ai-textanalytics > /dev/null\n",
    "\n",
    "# For Windows/Linux\n",
    "# !pip install --upgrade azure-ai-vision > /dev/null"
@@ -60,7 +62,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -101,7 +103,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -111,7 +113,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -240,6 +242,65 @@
    "display.display(audio)"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
+      "\u001b[32;1m\u001b[1;3mAction:\n",
+      "```\n",
+      "{\n",
+      "  \"action\": \"azure_cognitive_services_text_analyics_health\",\n",
+      "  \"action_input\": \"The patient is a 54-year-old gentleman with a history of progressive angina over the past several months. The patient had a cardiac catheterization in July of this year revealing total occlusion of the RCA and 50% left main disease, with a strong family history of coronary artery disease with a brother dying at the age of 52 from a myocardial infarction and another brother who is status post coronary artery bypass grafting. The patient had a stress echocardiogram done on July, 2001, which showed no wall motion abnormalities, but this was a difficult study due to body habitus. The patient went for six minutes with minimal ST depressions in the anterior lateral leads, thought due to fatigue and wrist pain, his anginal equivalent. Due to the patient's increased symptoms and family history and history left main disease with total occasional of his RCA was referred for revascularization with open heart surgery.\"\n",
+      "}\n",
+      "```\n",
+      "\u001b[0m\n",
+      "Observation: \u001b[36;1m\u001b[1;3mThe text conatins the following healthcare entities: 54-year-old is a healthcare entity of type Age, gentleman is a healthcare entity of type Gender, progressive angina is a healthcare entity of type Diagnosis, past several months is a healthcare entity of type Time, cardiac catheterization is a healthcare entity of type ExaminationName, July of this year is a healthcare entity of type Time, total is a healthcare entity of type ConditionQualifier, occlusion is a healthcare entity of type SymptomOrSign, RCA is a healthcare entity of type BodyStructure, 50 is a healthcare entity of type MeasurementValue, % is a healthcare entity of type MeasurementUnit, left main is a healthcare entity of type BodyStructure, disease is a healthcare entity of type Diagnosis, family is a healthcare entity of type FamilyRelation, coronary artery disease is a healthcare entity of type Diagnosis, brother is a healthcare entity of type FamilyRelation, dying is a healthcare entity of type Diagnosis, 52 is a healthcare entity of type Age, myocardial infarction is a healthcare entity of type Diagnosis, brother is a healthcare entity of type FamilyRelation, coronary artery bypass grafting is a healthcare entity of type TreatmentName, stress echocardiogram is a healthcare entity of type ExaminationName, July, 2001 is a healthcare entity of type Time, wall motion abnormalities is a healthcare entity of type SymptomOrSign, body habitus is a healthcare entity of type SymptomOrSign, six minutes is a healthcare entity of type Time, minimal is a healthcare entity of type ConditionQualifier, ST depressions in the anterior lateral leads is a healthcare entity of type SymptomOrSign, fatigue is a healthcare entity of type SymptomOrSign, wrist pain is a healthcare entity of type SymptomOrSign, anginal equivalent is a healthcare entity of type SymptomOrSign, increased is a healthcare entity of type Course, symptoms is a healthcare entity of type SymptomOrSign, family is a healthcare entity of type FamilyRelation, left is a healthcare entity of type Direction, main is a healthcare entity of type BodyStructure, disease is a healthcare entity of type Diagnosis, occasional is a healthcare entity of type Course, RCA is a healthcare entity of type BodyStructure, revascularization is a healthcare entity of type TreatmentName, open heart surgery is a healthcare entity of type TreatmentName\u001b[0m\n",
+      "Thought:\u001b[32;1m\u001b[1;3m I know what to respond\n",
+      "Action:\n",
+      "```\n",
+      "{\n",
+      "  \"action\": \"Final Answer\",\n",
+      "  \"action_input\": \"The text contains the following diagnoses: progressive angina, coronary artery disease, myocardial infarction, and coronary artery bypass grafting.\"\n",
+      "}\n",
+      "```\u001b[0m\n",
+      "\n",
+      "\u001b[1m> Finished chain.\u001b[0m\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'The text contains the following diagnoses: progressive angina, coronary artery disease, myocardial infarction, and coronary artery bypass grafting.'"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "agent.run(\n",
+    "    \"\"\"The patient is a 54-year-old gentleman with a history of progressive angina over the past several months.\n",
+    "The patient had a cardiac catheterization in July of this year revealing total occlusion of the RCA and 50% left main disease ,\n",
+    "with a strong family history of coronary artery disease with a brother dying at the age of 52 from a myocardial infarction and\n",
+    "another brother who is status post coronary artery bypass grafting. The patient had a stress echocardiogram done on July , 2001 ,\n",
+    "which showed no wall motion abnormalities , but this was a difficult study due to body habitus. The patient went for six minutes with\n",
+    "minimal ST depressions in the anterior lateral leads , thought due to fatigue and wrist pain , his anginal equivalent. Due to the patient's\n",
+    "increased symptoms and family history and history left main disease with total occasional of his RCA was referred for revascularization with open heart surgery.\n",
+    "\n",
+    "List all the diagnoses.\n",
+    "\"\"\"\n",
+    ")"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
@@ -264,7 +325,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.12"
+   "version": "3.8.10"
  }
 },
 "nbformat": 4,
--- a/docs/docs/integrations/toolkits/clickup.ipynb
+++ b/docs/docs/integrations/toolkits/clickup.ipynb
@@ -4,7 +4,11 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "# ClickUp Langchain Toolkit"
+    "# ClickUp\n",
+    "\n",
+    ">[ClickUp](https://clickup.com/) is an all-in-one productivity platform that provides small and large teams across industries with flexible and customizable work management solutions, tools, and functions. \n",
+    "\n",
+    ">It is a cloud-based project management solution for businesses of all sizes featuring communication and collaboration tools to help achieve organizational goals."
   ]
  },
  {
@@ -27,14 +31,14 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "# Init"
+    "## Initializing"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "## Get Authenticated\n",
+    "### Get Authenticated\n",
    "1. Create a [ClickUp App](https://help.clickup.com/hc/en-us/articles/6303422883095-Create-your-own-app-with-the-ClickUp-API)\n",
    "2. Follow [these steps](https://clickup.com/api/developer-portal/authentication/) to get your `client_id` and `client_secret`.\n",
    "    - *Suggestion: use `https://google.com` as the redirect_uri. This is what we assume in the defaults for this toolkit.*\n",
@@ -112,18 +116,7 @@
   "source": [
    "access_token = ClickupAPIWrapper.get_access_token(\n",
    "    oauth_client_id, oauth_client_secret, code\n",
-    ")\n",
-    "\n",
-    "if access_token is not None:\n",
-    "    print(\"Copy/paste this code, into the next cell so you can reuse it!\")\n",
-    "    print(access_token)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Create Toolkit"
+    ")"
   ]
  },
  {
@@ -142,12 +135,6 @@
    }
   ],
   "source": [
-    "# Set your access token here\n",
-    "access_token = \"12345678_myaccesstokengoeshere123\"\n",
-    "access_token = (\n",
-    "    \"81928627_c009bf122ccf36ec3ba3e0ef748b07042c5e4217260042004a5934540cb61527\"\n",
-    ")\n",
-    "\n",
    "# Init toolkit\n",
    "clickup_api_wrapper = ClickupAPIWrapper(access_token=access_token)\n",
    "toolkit = ClickupToolkit.from_clickup_api_wrapper(clickup_api_wrapper)\n",
@@ -160,7 +147,7 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "## Create Agent"
+    "### Create Agent"
   ]
  },
  {
@@ -180,7 +167,7 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "# Run"
+    "## Use an Agent"
   ]
  },
  {
@@ -203,7 +190,7 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "## Navigation\n",
+    "### Navigation\n",
    "You can get the teams, folder and spaces your user has access to"
   ]
  },
@@ -287,7 +274,7 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "## Task Operations\n",
+    "### Task Operations\n",
    "You can get, ask question about tasks and update them"
   ]
  },
@@ -594,7 +581,7 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "## Creation\n",
+    "### Creation\n",
    "You can create tasks, lists and folders"
   ]
  },
@@ -778,7 +765,7 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "# Multi-Step Tasks"
+    "## Multi-Step Tasks"
   ]
  },
  {
@@ -848,7 +835,7 @@
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "clickup-copilot",
+   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
@@ -862,10 +849,9 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.18"
-  },
-  "orig_nbformat": 4
+   "version": "3.10.12"
+  }
 },
 "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
--- a/docs/docs/integrations/vectorstores/astradb.ipynb
+++ b/docs/docs/integrations/vectorstores/astradb.ipynb
@@ -44,7 +44,7 @@
   "metadata": {},
   "source": [
    "_Note: depending on your LangChain setup, you may need to install/upgrade other dependencies needed for this demo_\n",
-    "_(specifically, recent versions of `datasets` `openai` `pypdf` and `tiktoken` are required)._"
+    "_(specifically, recent versions of `datasets`, `openai`, `pypdf` and `tiktoken` are required)._"
   ]
  },
  {
@@ -64,8 +64,6 @@
    "from langchain.document_loaders import PyPDFLoader\n",
    "from langchain.embeddings import OpenAIEmbeddings\n",
    "from langchain.prompts import ChatPromptTemplate\n",
-    "\n",
-    "# if not present yet, run: pip install \"datasets==2.14.6\"\n",
    "from langchain.schema import Document\n",
    "from langchain.schema.output_parser import StrOutputParser\n",
    "from langchain.schema.runnable import RunnablePassthrough\n",
@@ -145,7 +143,7 @@
   "outputs": [],
   "source": [
    "ASTRA_DB_API_ENDPOINT = input(\"ASTRA_DB_API_ENDPOINT = \")\n",
-    "ASTRA_DB_TOKEN = getpass(\"ASTRA_DB_TOKEN = \")"
+    "ASTRA_DB_APPLICATION_TOKEN = getpass(\"ASTRA_DB_APPLICATION_TOKEN = \")"
   ]
  },
  {
@@ -159,7 +157,7 @@
    "    embedding=embe,\n",
    "    collection_name=\"astra_vector_demo\",\n",
    "    api_endpoint=ASTRA_DB_API_ENDPOINT,\n",
-    "    token=ASTRA_DB_TOKEN,\n",
+    "    token=ASTRA_DB_APPLICATION_TOKEN,\n",
    ")"
   ]
  },
@@ -171,6 +169,14 @@
    "### Load a dataset"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "552e56b0-301a-4b06-99c7-57ba6faa966f",
+   "metadata": {},
+   "source": [
+    "Convert each entry in the source dataset into a `Document`, then write them into the vector store:"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
@@ -190,6 +196,16 @@
    "print(f\"\\nInserted {len(inserted_ids)} documents.\")"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "79d4f436-ef04-4288-8f79-97c9abb983ed",
+   "metadata": {},
+   "source": [
+    "In the above, `metadata` dictionaries are created from the source data and are part of the `Document`.\n",
+    "\n",
+    "_Note: check the [Astra DB API Docs](https://docs.datastax.com/en/astra-serverless/docs/develop/dev-with-json.html#_json_api_limits) for the valid metadata field names: some characters are reserved and cannot be used._"
+   ]
+  },
  {
   "cell_type": "markdown",
   "id": "084d8802-ab39-4262-9a87-42eafb746f92",
@@ -213,6 +229,16 @@
    "print(f\"\\nInserted {len(inserted_ids_2)} documents.\")"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "63840eb3-8b29-4017-bc2f-301bf5001f28",
+   "metadata": {},
+   "source": [
+    "_Note: you may want to speed up the execution of `add_texts` and `add_documents` by increasing the concurrency level for_\n",
+    "_these bulk operations - check out the `*_concurrency` parameters in the class constructor and the `add_texts` docstrings_\n",
+    "_for more details. Depending on the network and the client machine specifications, your best-performing choice of parameters may vary._"
+   ]
+  },
  {
   "cell_type": "markdown",
   "id": "c031760a-1fc5-4855-adf2-02ed52fe2181",
@@ -625,7 +651,7 @@
   "outputs": [],
   "source": [
    "ASTRA_DB_ID = input(\"ASTRA_DB_ID = \")\n",
-    "ASTRA_DB_TOKEN = getpass(\"ASTRA_DB_TOKEN = \")\n",
+    "ASTRA_DB_APPLICATION_TOKEN = getpass(\"ASTRA_DB_APPLICATION_TOKEN = \")\n",
    "\n",
    "desired_keyspace = input(\"ASTRA_DB_KEYSPACE (optional, can be left empty) = \")\n",
    "if desired_keyspace:\n",
@@ -645,7 +671,7 @@
    "\n",
    "cassio.init(\n",
    "    database_id=ASTRA_DB_ID,\n",
-    "    token=ASTRA_DB_TOKEN,\n",
+    "    token=ASTRA_DB_APPLICATION_TOKEN,\n",
    "    keyspace=ASTRA_DB_KEYSPACE,\n",
    ")"
   ]
--- a/docs/docs/integrations/vectorstores/faiss.ipynb
+++ b/docs/docs/integrations/vectorstores/faiss.ipynb
@@ -38,8 +38,8 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
-   "id": "47f9b495-88f1-4286-8d5d-1416103931a7",
+   "execution_count": null,
+   "id": "dc37144c-208d-4ab3-9f3a-0407a69fe052",
   "metadata": {
    "tags": []
   },
@@ -51,34 +51,12 @@
    "os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")\n",
    "\n",
    "# Uncomment the following line if you need to initialize FAISS with no AVX2 optimization\n",
-    "# os.environ['FAISS_NO_AVX2'] = '1'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "aac9563e",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
+    "# os.environ['FAISS_NO_AVX2'] = '1'\n",
+    "\n",
    "from langchain.document_loaders import TextLoader\n",
    "from langchain.embeddings.openai import OpenAIEmbeddings\n",
    "from langchain.text_splitter import CharacterTextSplitter\n",
-    "from langchain.vectorstores import FAISS"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "a3c3999a",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "from langchain.document_loaders import TextLoader\n",
+    "from langchain.vectorstores import FAISS\n",
    "\n",
    "loader = TextLoader(\"../../../extras/modules/state_of_the_union.txt\")\n",
    "documents = loader.load()\n",
@@ -200,31 +178,15 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 16,
-   "id": "428a6816",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "db.save_local(\"faiss_index\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "id": "56d1841c",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "new_db = FAISS.load_local(\"faiss_index\", embeddings)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "id": "39055525",
+   "execution_count": null,
+   "id": "1b31fe27-e0b3-42c6-b17c-8270b517ee1f",
   "metadata": {},
   "outputs": [],
   "source": [
+    "db.save_local(\"faiss_index\")\n",
+    "\n",
+    "new_db = FAISS.load_local(\"faiss_index\", embeddings)\n",
+    "\n",
    "docs = new_db.similarity_search(query)"
   ]
  },
@@ -266,30 +228,11 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "pkl = db.serialize_to_bytes()  # serializes the faiss index"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "eb083247",
-   "metadata": {
-    "vscode": {
-     "languageId": "r"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "embeddings = HuggingFaceEmbeddings(model_name=\"all-MiniLM-L6-v2\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e36e220b",
-   "metadata": {},
-   "outputs": [],
-   "source": [
+    "from langchain.embeddings.huggingface import HuggingFaceEmbeddings\n",
+    "\n",
+    "pkl = db.serialize_to_bytes()  # serializes the faiss\n",
+    "embeddings = HuggingFaceEmbeddings(model_name=\"all-MiniLM-L6-v2\")\n",
+    "\n",
    "db = FAISS.deserialize_from_bytes(\n",
    "    embeddings=embeddings, serialized=pkl\n",
    ")  # Load the index"
@@ -306,33 +249,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 20,
-   "id": "6dfd2b78",
+   "execution_count": null,
+   "id": "9b8f5e31-3f40-4e94-8d97-5883125efba7",
   "metadata": {},
   "outputs": [],
   "source": [
    "db1 = FAISS.from_texts([\"foo\"], embeddings)\n",
-    "db2 = FAISS.from_texts([\"bar\"], embeddings)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "id": "29960da7",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'068c473b-d420-487a-806b-fb0ccea7f711': Document(page_content='foo', metadata={})}"
-      ]
-     },
-     "execution_count": 21,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
+    "db2 = FAISS.from_texts([\"bar\"], embeddings)\n",
+    "\n",
    "db1.docstore._dict"
   ]
  },
--- a/docs/docs/integrations/vectorstores/faiss_async.ipynb
+++ b/docs/docs/integrations/vectorstores/faiss_async.ipynb
@@ -5,15 +5,16 @@
   "id": "683953b3",
   "metadata": {},
   "source": [
-    "# Faiss\n",
+    "# Faiss (Async)\n",
    "\n",
    ">[Facebook AI Similarity Search (Faiss)](https://engineering.fb.com/2017/03/29/data-infrastructure/faiss-a-library-for-efficient-similarity-search/) is a library for efficient similarity search and clustering of dense vectors. It contains algorithms that search in sets of vectors of any size, up to ones that possibly do not fit in RAM. It also contains supporting code for evaluation and parameter tuning.\n",
    "\n",
    "[Faiss documentation](https://faiss.ai/).\n",
    "\n",
-    "This notebook shows how to use functionality related to the `FAISS` vector database using asyncio.\n",
+    "This notebook shows how to use functionality related to the `FAISS` vector database using `asyncio`.\n",
+    "LangChain implemented the synchronous and asynchronous vector store functions.\n",
    "\n",
-    "See synchronous version [here](https://python.langchain.com/docs/integrations/vectorstores/faiss)."
+    "See `synchronous` version [here](https://python.langchain.com/docs/integrations/vectorstores/faiss)."
   ]
  },
  {
@@ -40,8 +41,8 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
-   "id": "47f9b495-88f1-4286-8d5d-1416103931a7",
+   "execution_count": null,
+   "id": "971a172a-2d87-4eec-be92-87aa174fec30",
   "metadata": {
    "tags": []
   },
@@ -53,81 +54,25 @@
    "os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")\n",
    "\n",
    "# Uncomment the following line if you need to initialize FAISS with no AVX2 optimization\n",
-    "# os.environ['FAISS_NO_AVX2'] = '1'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "aac9563e",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
+    "# os.environ['FAISS_NO_AVX2'] = '1'\n",
+    "\n",
    "from langchain.document_loaders import TextLoader\n",
    "from langchain.embeddings.openai import OpenAIEmbeddings\n",
    "from langchain.text_splitter import CharacterTextSplitter\n",
-    "from langchain.vectorstores import FAISS"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "a3c3999a",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "from langchain.document_loaders import TextLoader\n",
+    "from langchain.vectorstores import FAISS\n",
    "\n",
    "loader = TextLoader(\"../../../extras/modules/state_of_the_union.txt\")\n",
    "documents = loader.load()\n",
    "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
    "docs = text_splitter.split_documents(documents)\n",
    "\n",
-    "embeddings = OpenAIEmbeddings()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "5eabdb75",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
+    "embeddings = OpenAIEmbeddings()\n",
+    "\n",
    "db = await FAISS.afrom_documents(docs, embeddings)\n",
    "\n",
    "query = \"What did the president say about Ketanji Brown Jackson\"\n",
-    "docs = await db.asimilarity_search(query)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "4b172de8",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
-      "\n",
-      "Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
-      "\n",
-      "One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
-      "\n",
-      "And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n"
-     ]
-    }
-   ],
-   "source": [
+    "docs = await db.asimilarity_search(query)\n",
+    "\n",
    "print(docs[0].page_content)"
   ]
  },
@@ -142,33 +87,13 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
-   "id": "186ee1d8",
+   "execution_count": null,
+   "id": "30bf7c85-a273-45dc-ae9e-f138e330b42e",
   "metadata": {},
   "outputs": [],
   "source": [
-    "docs_and_scores = await db.asimilarity_search_with_score(query)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "284e04b5",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'source': './state_of_the_union.txt'}),\n",
-       " 0.36871302)"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
+    "docs_and_scores = await db.asimilarity_search_with_score(query)\n",
+    "\n",
    "docs_and_scores[0]"
   ]
  },
@@ -202,52 +127,17 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
-   "id": "428a6816",
+   "execution_count": null,
+   "id": "88e11f08-1ac8-45aa-8bc0-56439ef87256",
   "metadata": {},
   "outputs": [],
   "source": [
-    "db.save_local(\"faiss_index\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "id": "56d1841c",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "new_db = FAISS.load_local(\"faiss_index\", embeddings, asynchronous=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "id": "39055525",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "docs = await new_db.asimilarity_search(query)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "id": "98378c4e",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'source': './state_of_the_union.txt'})"
-      ]
-     },
-     "execution_count": 16,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
+    "db.save_local(\"faiss_index\")\n",
+    "\n",
+    "new_db = FAISS.load_local(\"faiss_index\", embeddings, asynchronous=True)\n",
+    "\n",
+    "docs = await new_db.asimilarity_search(query)\n",
+    "\n",
    "docs[0]"
   ]
  },
@@ -261,26 +151,6 @@
    "you can pickle the FAISS Index by these functions. If you use embeddings model which is of 90 mb (sentence-transformers/all-MiniLM-L6-v2 or any other model), the resultant pickle size would be more than 90 mb. the size of the model is also included in the overall size. To overcome this, use the below functions. These functions only serializes FAISS index and size would be much lesser. this can be helpful if you wish to store the index in database like sql."
   ]
  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "id": "d8faead5",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "pkl = db.serialize_to_bytes()  # serializes the faiss index"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "eb083247",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "embeddings = HuggingFaceEmbeddings(model_name=\"all-MiniLM-L6-v2\")"
-   ]
-  },
  {
   "cell_type": "code",
   "execution_count": null,
@@ -288,6 +158,10 @@
   "metadata": {},
   "outputs": [],
   "source": [
+    "from langchain.embeddings.huggingface import HuggingFaceEmbeddings\n",
+    "\n",
+    "pkl = db.serialize_to_bytes()  # serializes the faiss index\n",
+    "embeddings = HuggingFaceEmbeddings(model_name=\"all-MiniLM-L6-v2\")\n",
    "db = FAISS.deserialize_from_bytes(\n",
    "    embeddings=embeddings, serialized=pkl, asynchronous=True\n",
    ")  # Load the index"
@@ -596,7 +470,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.3"
+   "version": "3.10.12"
  }
 },
 "nbformat": 4,
--- a/docs/docs/integrations/vectorstores/hippo.ipynb
+++ b/docs/docs/integrations/vectorstores/hippo.ipynb
@@ -4,13 +4,15 @@
   "cell_type": "markdown",
   "id": "357f24224a8e818f",
   "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
   },
   "source": [
-    "## Hippo\n",
+    "# Hippo\n",
    "\n",
-    ">[Hippo](https://www.transwarp.cn/starwarp) Please visit our official website for how to run a Hippo instance and\n",
-    "how to use functionality related to the Hippo vector database\n",
+    ">[Transwarp Hippo](https://www.transwarp.cn/en/subproduct/hippo) is an enterprise-level cloud-native distributed vector database that supports storage, retrieval, and management of massive vector-based datasets. It efficiently solves problems such as vector similarity search and high-density vector clustering. `Hippo` features high availability, high performance, and easy scalability. It has many functions, such as multiple vector search indexes, data partitioning and sharding, data persistence, incremental data ingestion, vector scalar field filtering, and mixed queries. It can effectively meet the high real-time search demands of enterprises for massive vector data\n",
    "\n",
    "## Getting Started\n",
    "\n",
@@ -21,12 +23,15 @@
   "cell_type": "markdown",
   "id": "a92d2ce26df7ac4c",
   "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
   },
   "source": [
    "## Installing Dependencies\n",
    "\n",
-    "Initially, we require the installation of certain dependencies, such as OpenAI, Langchain, and Hippo-API. Please note, you should install the appropriate versions tailored to your environment."
+    "Initially, we require the installation of certain dependencies, such as OpenAI, Langchain, and Hippo-API. Please note, that you should install the appropriate versions tailored to your environment."
   ]
  },
  {
@@ -38,7 +43,10 @@
     "end_time": "2023-10-30T06:47:54.718488Z",
     "start_time": "2023-10-30T06:47:53.563129Z"
    },
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
   },
   "outputs": [
    {
@@ -59,12 +67,15 @@
   "cell_type": "markdown",
   "id": "554081137df2c252",
   "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
   },
   "source": [
    "Note: Python version needs to be >=3.8.\n",
    "\n",
-    "## Best Practice\n",
+    "## Best Practices\n",
    "### Importing Dependency Packages"
   ]
  },
@@ -77,7 +88,10 @@
     "end_time": "2023-10-30T06:47:56.003409Z",
     "start_time": "2023-10-30T06:47:55.998839Z"
    },
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
   },
   "outputs": [],
   "source": [
@@ -94,7 +108,10 @@
   "cell_type": "markdown",
   "id": "dad255dae8aea755",
   "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
   },
   "source": [
    "### Loading Knowledge Documents"
@@ -109,7 +126,10 @@
     "end_time": "2023-10-30T06:47:59.027869Z",
     "start_time": "2023-10-30T06:47:59.023934Z"
    },
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
   },
   "outputs": [],
   "source": [
@@ -122,7 +142,10 @@
   "cell_type": "markdown",
   "id": "e9b93c330f1c6160",
   "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
   },
   "source": [
    "### Segmenting the Knowledge Document\n",
@@ -139,7 +162,10 @@
     "end_time": "2023-10-30T06:48:00.279351Z",
     "start_time": "2023-10-30T06:48:00.275763Z"
    },
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
   },
   "outputs": [],
   "source": [
@@ -151,7 +177,10 @@
   "cell_type": "markdown",
   "id": "eefe28c7c993ffdf",
   "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
   },
   "source": [
    "### Declaring the Embedding Model\n",
@@ -167,7 +196,10 @@
     "end_time": "2023-10-30T06:48:11.686166Z",
     "start_time": "2023-10-30T06:48:11.664355Z"
    },
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
   },
   "outputs": [],
   "source": [
@@ -188,7 +220,10 @@
   "cell_type": "markdown",
   "id": "e60235602ed91d3c",
   "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
   },
   "source": [
    "### Declaring Hippo Client"
@@ -203,7 +238,10 @@
     "end_time": "2023-10-30T06:48:48.594298Z",
     "start_time": "2023-10-30T06:48:48.585267Z"
    },
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
   },
   "outputs": [],
   "source": [
@@ -214,7 +252,10 @@
   "cell_type": "markdown",
   "id": "43ee6dbd765c3172",
   "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
   },
   "source": [
    "### Storing the Document"
@@ -229,7 +270,10 @@
     "end_time": "2023-10-30T06:51:12.661741Z",
     "start_time": "2023-10-30T06:51:06.257156Z"
    },
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
   },
   "outputs": [
    {
@@ -257,7 +301,10 @@
   "cell_type": "markdown",
   "id": "89077cc9763d5dd0",
   "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
   },
   "source": [
    "### Conducting Knowledge-based Question and Answer\n",
@@ -274,7 +321,10 @@
     "end_time": "2023-10-30T06:51:28.329351Z",
     "start_time": "2023-10-30T06:51:28.318713Z"
    },
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
   },
   "outputs": [],
   "source": [
@@ -293,7 +343,10 @@
   "cell_type": "markdown",
   "id": "a4c5d73016a9db0c",
   "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
   },
   "source": [
    "### Acquiring Related Knowledge Based on the Question："
@@ -308,7 +361,10 @@
     "end_time": "2023-10-30T06:51:33.195634Z",
     "start_time": "2023-10-30T06:51:32.196493Z"
    },
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
   },
   "outputs": [],
   "source": [
@@ -328,7 +384,10 @@
   "cell_type": "markdown",
   "id": "e5adbaaa7086d1ae",
   "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
   },
   "source": [
    "### Constructing a Prompt Template"
@@ -343,7 +402,10 @@
     "end_time": "2023-10-30T06:51:35.649376Z",
     "start_time": "2023-10-30T06:51:35.645763Z"
    },
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
   },
   "outputs": [],
   "source": [
@@ -358,7 +420,10 @@
   "cell_type": "markdown",
   "id": "b36b6a9adbec8a82",
   "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
   },
   "source": [
    "### Waiting for the Large Language Model to Generate an Answer"
@@ -373,7 +438,10 @@
     "end_time": "2023-10-30T06:52:17.967885Z",
     "start_time": "2023-10-30T06:51:37.692819Z"
    },
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
   },
   "outputs": [
    {
@@ -402,7 +470,10 @@
    "ExecuteTime": {
     "start_time": "2023-10-30T06:42:42.172639Z"
    },
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
   },
   "outputs": [],
   "source": []
@@ -410,21 +481,21 @@
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
-    "version": 2
+    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.6"
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
  }
 },
 "nbformat": 4,
--- a/docs/docs/integrations/vectorstores/vectorstores/semadb.ipynb
+++ b/docs/docs/integrations/vectorstores/vectorstores/semadb.ipynb
@@ -7,11 +7,11 @@
   "source": [
    "# SemaDB\n",
    "\n",
-    "> SemaDB is a no fuss vector similarity database for building AI applications. The hosted SemaDB Cloud offers a no fuss developer experience to get started.\n",
+    "> [SemaDB](https://www.semafind.com/products/semadb) from [SemaFind](https://www.semafind.com) is a no fuss vector similarity database for building AI applications. The hosted `SemaDB Cloud` offers a no fuss developer experience to get started.\n",
    "\n",
    "The full documentation of the API along with examples and an interactive playground is available on [RapidAPI](https://rapidapi.com/semafind-semadb/api/semadb).\n",
    "\n",
-    "This notebook demonstrates how the `langchain` wrapper can be used with SemaDB Cloud."
+    "This notebook demonstrates usage of the `SemaDB Cloud` vector store."
   ]
  },
  {
--- a/docs/docs/integrations/vectorstores/sklearn.ipynb
+++ b/docs/docs/integrations/vectorstores/sklearn.ipynb
@@ -217,7 +217,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.6"
+   "version": "3.10.12"
  }
 },
 "nbformat": 4,
--- a/docs/docs/integrations/vectorstores/sqlitevss.ipynb
+++ b/docs/docs/integrations/vectorstores/sqlitevss.ipynb
@@ -3,12 +3,15 @@
  {
   "cell_type": "markdown",
   "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
   },
   "source": [
-    "# sqlite-vss\n",
+    "# SQLite-VSS\n",
    "\n",
-    ">[sqlite-vss](https://alexgarcia.xyz/sqlite-vss/) is an SQLite extension designed for vector search, emphasizing local-first operations and easy integration into applications without external servers. Leveraging the Faiss library, it offers efficient similarity search and clustering capabilities.\n",
+    ">[SQLite-VSS](https://alexgarcia.xyz/sqlite-vss/) is an `SQLite` extension designed for vector search, emphasizing local-first operations and easy integration into applications without external servers. Leveraging the `Faiss` library, it offers efficient similarity search and clustering capabilities.\n",
    "\n",
    "This notebook shows how to use the `SQLiteVSS` vector database."
   ]
@@ -17,7 +20,10 @@
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
   },
   "outputs": [],
   "source": [
@@ -28,10 +34,13 @@
  {
   "cell_type": "markdown",
   "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
   },
   "source": [
-    "### Quickstart"
+    "## Quickstart"
   ]
  },
  {
@@ -42,7 +51,10 @@
     "end_time": "2023-09-06T14:55:55.370351Z",
     "start_time": "2023-09-06T14:55:53.547755Z"
    },
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
   },
   "outputs": [
    {
@@ -97,10 +109,13 @@
  {
   "cell_type": "markdown",
   "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
   },
   "source": [
-    "### Using existing sqlite connection"
+    "## Using existing SQLite connection"
   ]
  },
  {
@@ -111,7 +126,10 @@
     "end_time": "2023-09-06T14:59:22.086252Z",
     "start_time": "2023-09-06T14:59:21.693237Z"
    },
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
   },
   "outputs": [
    {
@@ -166,7 +184,10 @@
     "end_time": "2023-09-06T15:01:15.550318Z",
     "start_time": "2023-09-06T15:01:15.546428Z"
    },
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
   },
   "outputs": [],
   "source": [
@@ -180,7 +201,10 @@
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
   },
   "outputs": [],
   "source": []
@@ -188,23 +212,23 @@
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
-    "version": 2
+    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.6"
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
  }
 },
 "nbformat": 4,
- "nbformat_minor": 0
+ "nbformat_minor": 4
 }
--- a/docs/docs/integrations/vectorstores/timescalevector.ipynb
+++ b/docs/docs/integrations/vectorstores/timescalevector.ipynb
@@ -7,28 +7,30 @@
   "source": [
    "# Timescale Vector (Postgres)\n",
    "\n",
+    ">[Timescale Vector](https://www.timescale.com/ai?utm_campaign=vectorlaunch&utm_source=langchain&utm_medium=referral) is `PostgreSQL++` vector database for AI applications.\n",
+    "\n",
    "This notebook shows how to use the Postgres vector database `Timescale Vector`. You'll learn how to use TimescaleVector for (1) semantic search, (2) time-based vector search, (3) self-querying, and (4) how to create indexes to speed up queries.\n",
    "\n",
    "## What is Timescale Vector?\n",
-    "**[Timescale Vector](https://www.timescale.com/ai?utm_campaign=vectorlaunch&utm_source=langchain&utm_medium=referral) is PostgreSQL++ for AI applications.**\n",
    "\n",
-    "Timescale Vector enables you to efficiently store and query millions of vector embeddings in `PostgreSQL`.\n",
+    "`Timescale Vector` enables you to efficiently store and query millions of vector embeddings in `PostgreSQL`.\n",
    "- Enhances `pgvector` with faster and more accurate similarity search on 100M+ vectors via `DiskANN` inspired indexing algorithm.\n",
    "- Enables fast time-based vector search via automatic time-based partitioning and indexing.\n",
    "- Provides a familiar SQL interface for querying vector embeddings and relational data.\n",
    "\n",
-    "Timescale Vector is cloud PostgreSQL for AI that scales with you from POC to production:\n",
+    "`Timescale Vector` is cloud `PostgreSQL` for AI that scales with you from POC to production:\n",
    "- Simplifies operations by enabling you to store relational metadata, vector embeddings, and time-series data in a single database.\n",
-    "- Benefits from rock-solid PostgreSQL foundation with enterprise-grade feature liked streaming backups and replication, high-availability and row-level security.\n",
+    "- Benefits from rock-solid PostgreSQL foundation with enterprise-grade features like streaming backups and replication, high availability and row-level security.\n",
    "- Enables a worry-free experience with enterprise-grade security and compliance.\n",
    "\n",
    "## How to access Timescale Vector\n",
-    "Timescale Vector is available on [Timescale](https://www.timescale.com/ai?utm_campaign=vectorlaunch&utm_source=langchain&utm_medium=referral), the cloud PostgreSQL platform. (There is no self-hosted version at this time.)\n",
+    "\n",
+    "`Timescale Vector` is available on [Timescale](https://www.timescale.com/ai?utm_campaign=vectorlaunch&utm_source=langchain&utm_medium=referral), the cloud PostgreSQL platform. (There is no self-hosted version at this time.)\n",
    "\n",
    "LangChain users get a 90-day free trial for Timescale Vector.\n",
    "- To get started, [signup](https://console.cloud.timescale.com/signup?utm_campaign=vectorlaunch&utm_source=langchain&utm_medium=referral) to Timescale, create a new database and follow this notebook!\n",
    "- See the [Timescale Vector explainer blog](https://www.timescale.com/blog/how-we-made-postgresql-the-best-vector-database/?utm_campaign=vectorlaunch&utm_source=langchain&utm_medium=referral) for more details and performance benchmarks.\n",
-    "- See the [installation instructions](https://github.com/timescale/python-vector) for more details on using Timescale Vector in python."
+    "- See the [installation instructions](https://github.com/timescale/python-vector) for more details on using Timescale Vector in Python."
   ]
  },
  {
@@ -1726,7 +1728,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.8.16"
+   "version": "3.10.12"
  }
 },
 "nbformat": 4,
--- a/docs/docs/integrations/vectorstores/vearch.ipynb
+++ b/docs/docs/integrations/vectorstores/vearch.ipynb
@@ -1,5 +1,43 @@
 {
 "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Vearch\n",
+    "\n",
+    ">[Vearch](https://vearch.readthedocs.io) is the vector search infrastructure for deeping learning and AI applications.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Setting up\n",
+    "\n",
+    "Follow [instructions](https://vearch.readthedocs.io/en/latest/quick-start-guide.html#)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install vearch\n",
+    "\n",
+    "# OR\n",
+    "\n",
+    "!pip install vearch_cluster"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Example"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 2,
@@ -464,7 +502,7 @@
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "Python 3.10.13 ('vearch_cluster_langchain')",
+   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
@@ -478,9 +516,8 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.13"
+   "version": "3.10.12"
  },
-  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "f1da10a89896267ed34b497c9568817f36cc7ea79826b5cfca4d96376f5b4835"
@@ -488,5 +525,5 @@
  }
 },
 "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
--- a/docs/docs/integrations/vectorstores/zep.ipynb
+++ b/docs/docs/integrations/vectorstores/zep.ipynb
@@ -4,27 +4,21 @@
   "cell_type": "markdown",
   "id": "9eb8dfa6fdb71ef5",
   "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
   },
   "source": [
    "# Zep\n",
-    "## VectorStore Example for [Zep](https://docs.getzep.com/) - Fast, scalable building blocks for LLM Apps\n",
    "\n",
-    "### More on Zep:\n",
+    ">[Zep](https://docs.getzep.com/) is an open-source platform for LLM apps. Go from a prototype\n",
+    ">built in LangChain or LlamaIndex, or a custom app, to production in minutes without rewriting code.\n",
    "\n",
-    "Zep is an open source platform for productionizing LLM apps. Go from a prototype\n",
-    "built in LangChain or LlamaIndex, or a custom app, to production in minutes without\n",
-    "rewriting code.\n",
+    "## Key Features:\n",
    "\n",
-    "## Fast, Scalable Building Blocks for LLM Apps\n",
-    "Zep is an open source platform for productionizing LLM apps. Go from a prototype\n",
-    "built in LangChain or LlamaIndex, or a custom app, to production in minutes without\n",
-    "rewriting code.\n",
-    "\n",
-    "Key Features:\n",
-    "\n",
-    "- **Fast!** Zep operates independently of the your chat loop, ensuring a snappy user experience.\n",
-    "- **Chat History Memory, Archival, and Enrichment**, populate your prompts with relevant chat history, sumamries, named entities, intent data, and more.\n",
+    "- **Fast!** `Zep` operates independently of your chat loop, ensuring a snappy user experience.\n",
+    "- **Chat History Memory, Archival, and Enrichment**, populate your prompts with relevant chat history, summaries, named entities, intent data, and more.\n",
    "- **Vector Search over Chat History and Documents** Automatic embedding of documents, chat histories, and summaries. Use Zep's similarity or native MMR Re-ranked search to find the most relevant.\n",
    "- **Manage Users and their Chat Sessions** Users and their Chat Sessions are first-class citizens in Zep, allowing you to manage user interactions with your bots or agents easily.\n",
    "- **Records Retention and Privacy Compliance** Comply with corporate and regulatory mandates for records retention while ensuring compliance with privacy regulations such as CCPA and GDPR. Fulfill *Right To Be Forgotten* requests with a single API call\n",
@@ -34,14 +28,15 @@
    "and searching your user's chat history.\n",
    "\n",
    "## Installation\n",
-    "Follow the [Zep Quickstart Guide](https://docs.getzep.com/deployment/quickstart/) to install and get started with Zep.\n",
    "\n",
-    "## Usage\n",
+    "Follow the [Zep Quickstart Guide](https://docs.getzep.com/deployment/quickstart/) to install and get started with Zep.\n",
    "\n",
    "You'll need your Zep API URL and optionally an API key to use the Zep VectorStore. \n",
    "See the [Zep docs](https://docs.getzep.com) for more information.\n",
    "\n",
-    "In the examples below, we're using Zep's auto-embedding feature which automatically embed documents on the Zep server \n",
+    "## Usage\n",
+    "\n",
+    "In the examples below, we're using Zep's auto-embedding feature which automatically embeds documents on the Zep server \n",
    "using low-latency embedding models.\n",
    "\n",
    "## Note\n",
@@ -55,7 +50,10 @@
   "cell_type": "markdown",
   "id": "9a3a11aab1412d98",
   "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
   },
   "source": [
    "## Load or create a Collection from documents"
@@ -70,7 +68,10 @@
     "end_time": "2023-08-13T01:07:50.672390Z",
     "start_time": "2023-08-13T01:07:48.777799Z"
    },
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
   },
   "outputs": [],
   "source": [
@@ -124,7 +125,10 @@
     "end_time": "2023-08-13T01:07:53.807663Z",
     "start_time": "2023-08-13T01:07:50.671241Z"
    },
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
   },
   "outputs": [
    {
@@ -170,7 +174,10 @@
   "cell_type": "markdown",
   "id": "94ca9dfa7d0ecaa5",
   "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
   },
   "source": [
    "## Simarility Search Query over the Collection"
@@ -185,7 +192,10 @@
     "end_time": "2023-08-13T01:07:54.195988Z",
     "start_time": "2023-08-13T01:07:53.808550Z"
    },
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
   },
   "outputs": [
    {
@@ -237,7 +247,10 @@
   "cell_type": "markdown",
   "id": "e02b61a9af0b2c80",
   "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
   },
   "source": [
    "## Search over Collection Re-ranked by MMR\n",
@@ -254,7 +267,10 @@
     "end_time": "2023-08-13T01:07:54.394873Z",
     "start_time": "2023-08-13T01:07:54.180901Z"
    },
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
   },
   "outputs": [
    {
@@ -304,7 +320,10 @@
   "cell_type": "markdown",
   "id": "42455e31d4ab0d68",
   "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
   },
   "source": [
    "# Filter by Metadata\n",
@@ -321,7 +340,10 @@
     "end_time": "2023-08-13T01:08:06.323569Z",
     "start_time": "2023-08-13T01:07:54.381822Z"
    },
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
   },
   "outputs": [
    {
@@ -367,10 +389,13 @@
   "cell_type": "markdown",
   "id": "5b225f3ae1e61de8",
   "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
   },
   "source": [
-    "### We see results from both books. Note the `source` metadata"
+    "We see results from both books. Note the `source` metadata"
   ]
  },
  {
@@ -382,7 +407,10 @@
     "end_time": "2023-08-13T01:08:06.504769Z",
     "start_time": "2023-08-13T01:08:06.325435Z"
    },
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
   },
   "outputs": [
    {
@@ -431,10 +459,13 @@
   "cell_type": "markdown",
   "id": "7b81d7cae351a1ec",
   "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
   },
   "source": [
-    "### Let's try again using a filter for only the Sherlock Holmes document."
+    "Now, we set up a filter"
   ]
  },
  {
@@ -446,7 +477,10 @@
     "end_time": "2023-08-13T01:08:06.672836Z",
     "start_time": "2023-08-13T01:08:06.505944Z"
    },
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
   },
   "outputs": [
    {
@@ -515,7 +549,7 @@
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
@@ -529,7 +563,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.6"
+   "version": "3.10.12"
  }
 },
 "nbformat": 4,
--- a/docs/docs/modules/agents/agent_types/openai_assistants.ipynb
+++ b/docs/docs/modules/agents/agent_types/openai_assistants.ipynb
@@ -25,7 +25,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
   "id": "aa761a93-caa1-4e56-b901-5ff50a89bc82",
   "metadata": {},
   "outputs": [],
@@ -35,10 +35,21 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 12,
   "id": "5944a18a-95eb-44ce-a66f-5f50db1d3e1f",
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[ThreadMessage(id='msg_qgxkD5kvkZyl0qOaL4czPFkZ', assistant_id='asst_0T8S7CJuUa4Y4hm1PF6n62v7', content=[MessageContentText(text=Text(annotations=[], value='The result of the calculation \\\\(10 - 4^{2.7}\\\\) is approximately \\\\(-32.224\\\\).'), type='text')], created_at=1700169519, file_ids=[], metadata={}, object='thread.message', role='assistant', run_id='run_aH3ZgSWNk3vYIBQm3vpE8tr4', thread_id='thread_9K6cYfx1RBh0pOWD8SxwVWW9')]"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
   "source": [
    "interpreter_assistant = OpenAIAssistantRunnable.create_assistant(\n",
    "    name=\"langchain assistant\",\n",
@@ -72,19 +83,21 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
   "id": "cc0cba70-8507-498d-92ac-fe47133db200",
   "metadata": {},
   "outputs": [],
   "source": [
+    "import getpass\n",
+    "\n",
    "from langchain.tools import DuckDuckGoSearchRun, E2BDataAnalysisTool\n",
    "\n",
-    "tools = [E2BDataAnalysisTool(api_key=\"...\"), DuckDuckGoSearchRun()]"
+    "tools = [E2BDataAnalysisTool(api_key=getpass.getpass()), DuckDuckGoSearchRun()]"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
   "id": "91e6973d-3d9a-477f-99e2-4aaad16004ec",
   "metadata": {},
   "outputs": [],
@@ -103,15 +116,31 @@
   "id": "78fa9320-06fc-4cbc-a3cf-39aaf2427080",
   "metadata": {},
   "source": [
-    "#### Using AgentExecutor"
+    "#### Using AgentExecutor\n",
+    "\n",
+    "The OpenAIAssistantRunnable is compatible with the AgentExecutor, so we can pass it in as an agent directly to the executor. The AgentExecutor handles calling the invoked tools and uploading the tool outputs back to the Assistants API. Plus it comes with built-in LangSmith tracing."
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
   "id": "e38007a4-fcc1-419b-9ae4-70d36c3fc1cd",
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'content': \"What's the weather in SF today divided by 2.7\",\n",
+       " 'output': \"The search results indicate that the weather in San Francisco is 67 °F. Now I will divide this temperature by 2.7 and provide you with the result. Please note that this is a mathematical operation and does not represent a meaningful physical quantity.\\n\\nLet's calculate 67 °F divided by 2.7.\\nThe result of dividing the current temperature in San Francisco, which is 67 °F, by 2.7 is approximately 24.815.\",\n",
+       " 'thread_id': 'thread_hcpYI0tfpB9mHa9d95W7nK2B',\n",
+       " 'run_id': 'run_qOuVmPXS9xlV3XNPcfP8P9W2'}"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
   "source": [
    "from langchain.agents import AgentExecutor\n",
    "\n",
@@ -119,17 +148,28 @@
    "agent_executor.invoke({\"content\": \"What's the weather in SF today divided by 2.7\"})"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "db6b9cbf-dd54-4346-be6c-842e08756ccc",
+   "metadata": {},
+   "source": [
+    ":::tip [LangSmith trace](https://smith.langchain.com/public/6750972b-0849-4beb-a8bb-353d424ffade/r)\n",
+    ":::"
+   ]
+  },
  {
   "cell_type": "markdown",
   "id": "6bf4199a-eed1-485a-8da3-aed948c0e1e2",
   "metadata": {},
   "source": [
-    "#### Custom execution"
+    "#### Custom execution\n",
+    "\n",
+    "Or with LCEL we can easily write our own execution loop for running the assistant. This gives us full control over execution."
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
   "id": "357361ff-f54d-4fd0-b69b-77689f56f40e",
   "metadata": {},
   "outputs": [],
@@ -145,7 +185,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
   "id": "864e7f9b-0501-4bb7-8aad-a7aa19b601af",
   "metadata": {},
   "outputs": [],
@@ -177,34 +217,86 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
   "id": "5ad6bb07-aac4-4b71-9e67-cc177fcbc537",
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "e2b_data_analysis {'python_code': 'result = 10 - 4 ** 2.7\\nprint(result)'} {\"stdout\": \"-32.22425314473263\", \"stderr\": \"\", \"artifacts\": []}\n",
+      "\n",
+      "\\( 10 - 4^{2.7} \\) equals approximately -32.224.\n"
+     ]
+    }
+   ],
   "source": [
    "response = execute_agent(agent, tools, {\"content\": \"What's 10 - 4 raised to the 2.7\"})\n",
    "print(response.return_values[\"output\"])"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "6fd9f9c0-4b07-4f71-a784-88ee7bd4b089",
+   "metadata": {},
+   "source": [
+    "## Using existing Thread\n",
+    "\n",
+    "To use an existing thread we just need to pass the \"thread_id\" in when invoking the agent."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "f55a3a3a-8169-491e-aa15-cf30a2b230df",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "e2b_data_analysis {'python_code': 'result = 10 - 4 ** 2.7 + 17.241\\nprint(result)'} {\"stdout\": \"-14.983253144732629\", \"stderr\": \"\", \"artifacts\": []}\n",
+      "\n",
+      "\\( 10 - 4^{2.7} + 17.241 \\) equals approximately -14.983.\n"
+     ]
+    }
+   ],
+   "source": [
+    "next_response = execute_agent(\n",
+    "    agent,\n",
+    "    tools,\n",
+    "    {\"content\": \"now add 17.241\", \"thread_id\": response.return_values[\"thread_id\"]},\n",
+    ")\n",
+    "print(next_response.return_values[\"output\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1b97ee01-a657-452c-ba7f-95227ec7056e",
+   "metadata": {},
+   "source": [
+    "## Using existing Assistant\n",
+    "\n",
+    "To use an existing Assistant we can initialize the `OpenAIAssistantRunnable` directly with an `assistant_id`."
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "f55a3a3a-8169-491e-aa15-cf30a2b230df",
+   "id": "08ef6ef5-e8bc-4c69-882d-65273655f6a7",
   "metadata": {},
   "outputs": [],
   "source": [
-    "next_response = execute_agent(\n",
-    "    agent, tools, {\"content\": \"now add 17.241\", \"thread_id\": response.thread_id}\n",
-    ")\n",
-    "print(next_response.return_values[\"output\"])"
+    "agent = OpenAIAssistantRunnable(assistant_id=\"<ASSISTANT_ID>\", as_agent=True)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "poetry-venv",
   "language": "python",
-   "name": "python3"
+   "name": "poetry-venv"
  },
  "language_info": {
   "codemirror_mode": {
--- a/docs/docs/modules/data_connection/index.mdx
+++ b/docs/docs/modules/data_connection/index.mdx
@@ -18,23 +18,23 @@ This encompasses several key modules.

 **[Document loaders](/docs/modules/data_connection/document_loaders/)**

-Load documents from many different sources.
+**Document loaders** load documents from many different sources.
 LangChain provides over 100 different document loaders as well as integrations with other major providers in the space,
 like AirByte and Unstructured.
-We provide integrations to load all types of documents (HTML, PDF, code) from all types of locations (private s3 buckets, public websites).
+LangChain provides integrations to load all types of documents (HTML, PDF, code) from all types of locations (private S3 buckets, public websites).

 **[Document transformers](/docs/modules/data_connection/document_transformers/)**

 A key part of retrieval is fetching only the relevant parts of documents.
-This involves several transformation steps in order to best prepare the documents for retrieval.
+This involves several transformation steps to prepare the documents for retrieval.
 One of the primary ones here is splitting (or chunking) a large document into smaller chunks.
-LangChain provides several different algorithms for doing this, as well as logic optimized for specific document types (code, markdown, etc).
+LangChain provides several transformation algorithms for doing this, as well as logic optimized for specific document types (code, markdown, etc).

 **[Text embedding models](/docs/modules/data_connection/text_embedding/)**

-Another key part of retrieval has become creating embeddings for documents.
+Another key part of retrieval is creating embeddings for documents.
 Embeddings capture the semantic meaning of the text, allowing you to quickly and
-efficiently find other pieces of text that are similar.
+efficiently find other pieces of a text that are similar.
 LangChain provides integrations with over 25 different embedding providers and methods,
 from open-source to proprietary API,
 allowing you to choose the one best suited for your needs.
@@ -51,7 +51,7 @@ LangChain exposes a standard interface, allowing you to easily swap between vect

 Once the data is in the database, you still need to retrieve it.
 LangChain supports many different retrieval algorithms and is one of the places where we add the most value.
-We support basic methods that are easy to get started - namely simple semantic search.
+LangChain supports basic methods that are easy to get started - namely simple semantic search.
 However, we have also added a collection of algorithms on top of this to increase performance.
 These include:

@@ -60,3 +60,13 @@ These include:
 - [Ensemble Retriever](/docs/modules/data_connection/retrievers/ensemble): Sometimes you may want to retrieve documents from multiple different sources, or using multiple different algorithms. The ensemble retriever allows you to easily do this.
 - And more!

+**[Indexing](/docs/modules/data_connection/indexing)**
+
+The LangChain **Indexing API** syncs your data from any source into a vector store,
+helping you:
+
+- Avoid writing duplicated content into the vector store
+- Avoid re-writing unchanged content
+- Avoid re-computing embeddings over unchanged content
+
+All of which should save you time and money, as well as improve your vector search results.
--- a/docs/docs/use_cases/question_answering/index.ipynb
+++ b/docs/docs/use_cases/question_answering/index.ipynb
@@ -57,11 +57,14 @@
    "1. **Load**: First we need to load our data. We'll use [DocumentLoaders](/docs/modules/data_connection/document_loaders/) for this.\n",
    "2. **Split**: [Text splitters](/docs/modules/data_connection/document_transformers/) break large `Documents` into smaller chunks. This is useful both for indexing data and for passing it in to a model, since large chunks are harder to search over and won't in a model's finite context window.\n",
    "3. **Store**: We need somewhere to store and index our splits, so that they can later be searched over. This is often done using a [VectorStore](/docs/modules/data_connection/vectorstores/) and [Embeddings](/docs/modules/data_connection/text_embedding/) model.\n",
+    "\n",
+    "![index_diagram](/img/rag_indexing.png)\n",
+    "\n",
    "#### Retrieval and generation\n",
    "4. **Retrieve**: Given a user input, relevant splits are retrieved from storage using a [Retriever](/docs/modules/data_connection/retrievers/).\n",
    "5. **Generate**: A [ChatModel](/docs/modules/model_io/chat_models) / [LLM](/docs/modules/model_io/llms/) produces an answer using a prompt that includes the question and the retrieved data\n",
    "\n",
-    "![flow.jpeg](/img/qa_flow.jpeg)"
+    "![retrieval_diagram](/img/rag_retrieval_generation.png)"
   ]
  },
  {
@@ -1053,10 +1056,10 @@
  },
  {
   "cell_type": "markdown",
-   "id": "e6e5191f-43e6-4fa0-9ba5-db002fcaacf3",
+   "id": "fdf6c7e0-84f8-4747-b2ae-e84315152bd9",
   "metadata": {},
   "source": [
-    "Of course, we've written here the logic for using chat history when it's provided, but we haven't actually added functionality for storing chat history for each user session. This is something that's fairly application specific and is usually best handled outside of LangChain."
+    "Here we've gone over how to add chain logic for incorporating historical outputs. But how do we actually store and retrieve historical outputs for different sessions? For that check out the LCEL [How to add message history (memory)](/docs/expression_language/how_to/message_history) page."
   ]
  },
  {
--- a/docs/static/img/qa_flow.jpeg
+++ b/docs/static/img/qa_flow.jpeg
--- a/docs/static/img/rag_indexing.png
+++ b/docs/static/img/rag_indexing.png
--- a/docs/static/img/rag_retrieval_generation.png
+++ b/docs/static/img/rag_retrieval_generation.png
--- a/docs/static/svg/langchain_stack.svg
+++ b/docs/static/svg/langchain_stack.svg
--- a/docs/vercel.json
+++ b/docs/vercel.json
@@ -492,10 +492,18 @@
      "source": "/docs/integrations/providers/cassandra",
      "destination": "/docs/integrations/providers/astradb"
    },
+    {
+      "source": "/docs/integrations/vectorstores/vectorstores/semadb",
+      "destination": "/docs/integrations/vectorstores/semadb"
+    },
    {
      "source": "/docs/integrations/vectorstores/cassandra",
      "destination": "/docs/integrations/vectorstores/astradb"
    },
+    {
+      "source": "/docs/integrations/vectorstores/async_faiss",
+      "destination": "/docs/integrations/vectorstores/faiss_async"
+    },
    {
      "source": "/docs/integrations/cerebriumai",
      "destination": "/docs/integrations/providers/cerebriumai"
--- a/libs/cli/langchain_cli/cli.py
+++ b/libs/cli/langchain_cli/cli.py
@@ -7,7 +7,7 @@ from langchain_cli.namespaces import app as app_namespace
 from langchain_cli.namespaces import template as template_namespace
 from langchain_cli.utils.packages import get_langserve_export, get_package_root

-__version__ = "0.0.17"
+__version__ = "0.0.19"

 app = typer.Typer(no_args_is_help=True, add_completion=False)
 app.add_typer(
--- a/libs/cli/langchain_cli/namespaces/app.py
+++ b/libs/cli/langchain_cli/namespaces/app.py
@@ -110,6 +110,10 @@ def new(
    readme_contents = readme.read_text()
    readme.write_text(readme_contents.replace("__app_name__", app_name))

+    pyproject = destination_dir / "pyproject.toml"
+    pyproject_contents = pyproject.read_text()
+    pyproject.write_text(pyproject_contents.replace("__app_name__", app_name))
+
    # add packages if specified
    if has_packages:
        add(package, project_dir=destination_dir, pip=pip_bool)
--- a/libs/cli/langchain_cli/project_template/Dockerfile
+++ b/libs/cli/langchain_cli/project_template/Dockerfile
@@ -6,7 +6,7 @@ RUN poetry config virtualenvs.create false

 WORKDIR /code

-COPY ./pyproject.toml ./poetry.lock* ./
+COPY ./pyproject.toml ./README.md ./poetry.lock* ./

 COPY ./packages ./packages

--- a/libs/cli/langchain_cli/project_template/pyproject.toml
+++ b/libs/cli/langchain_cli/project_template/pyproject.toml
@@ -4,11 +4,15 @@ version = "0.1.0"
 description = ""
 authors = ["Your Name <you@example.com>"]
 readme = "README.md"
+packages = [
+    { include = "app" },
+]

 [tool.poetry.dependencies]
 python = "^3.11"
 uvicorn = "^0.23.2"
-langserve = {extras = ["server"], version = ">=0.0.22"}
+langserve = {extras = ["server"], version = ">=0.0.30"}
+pydantic = "<2"


 [tool.poetry.group.dev.dependencies]
--- a/libs/cli/pyproject.toml
+++ b/libs/cli/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "langchain-cli"
-version = "0.0.17"
+version = "0.0.19"
 description = "CLI for interacting with LangChain"
 authors = ["Erick Friis <erick@langchain.dev>"]
 readme = "README.md"
--- a/libs/core/Makefile
+++ b/libs/core/Makefile
@@ -0,0 +1,56 @@
+.PHONY: all format lint test tests test_watch integration_tests docker_tests help extended_tests
+
+# Default target executed when no arguments are given to make.
+all: help
+
+# Define a variable for the test file path.
+TEST_FILE ?= tests/unit_tests/
+
+test:
+	poetry run pytest $(TEST_FILE)
+
+tests:
+	poetry run pytest $(TEST_FILE)
+
+test_watch:
+	poetry run ptw --snapshot-update --now . -- -x tests/unit_tests
+
+
+######################
+# LINTING AND FORMATTING
+######################
+
+# Define a variable for Python and notebook files.
+PYTHON_FILES=.
+lint format: PYTHON_FILES=.
+lint_diff format_diff: PYTHON_FILES=$(shell git diff --relative=libs/experimental --name-only --diff-filter=d master | grep -E '\.py$$|\.ipynb$$')
+
+lint lint_diff:
+	./scripts/check_pydantic.sh .
+	./scripts/check_imports.sh
+	poetry run ruff .
+	[ "$(PYTHON_FILES)" = "" ] || poetry run ruff format $(PYTHON_FILES) --diff
+	[ "$(PYTHON_FILES)" = "" ] || poetry run mypy $(PYTHON_FILES)
+
+format format_diff:
+	poetry run ruff format $(PYTHON_FILES)
+	poetry run ruff --select I --fix $(PYTHON_FILES)
+
+spell_check:
+	poetry run codespell --toml pyproject.toml
+
+spell_fix:
+	poetry run codespell --toml pyproject.toml -w
+
+######################
+# HELP
+######################
+
+help:
+	@echo '----'
+	@echo 'format                       - run code formatters'
+	@echo 'lint                         - run linters'
+	@echo 'test                         - run unit tests'
+	@echo 'tests                        - run unit tests'
+	@echo 'test TEST_FILE=<test_file>   - run all tests in file'
+	@echo 'test_watch                   - run unit tests in watch mode'
--- a/libs/core/README.md
+++ b/libs/core/README.md
@@ -0,0 +1 @@
+# langchain-core
--- a/libs/core/langchain_core/init.py
+++ b/libs/core/langchain_core/init.py
@@ -0,0 +1,7 @@
+from importlib import metadata
+
+try:
+    __version__ = metadata.version(__package__)
+except metadata.PackageNotFoundError:
+    # Case where package metadata is not available.
+    __version__ = ""
--- a/libs/core/langchain_core/_api/init.py
+++ b/libs/core/langchain_core/_api/init.py
@@ -0,0 +1,29 @@
+"""Helper functions for managing the LangChain API.
+
+This module is only relevant for LangChain developers, not for users.
+
+.. warning::
+
+    This module and its submodules are for internal use only.  Do not use them
+    in your own code.  We may change the API at any time with no warning.
+
+"""
+
+from .deprecation import (
+    LangChainDeprecationWarning,
+    deprecated,
+    suppress_langchain_deprecation_warning,
+    surface_langchain_deprecation_warnings,
+    warn_deprecated,
+)
+from .path import as_import_path, get_relative_path
+
+__all__ = [
+    "as_import_path",
+    "deprecated",
+    "get_relative_path",
+    "LangChainDeprecationWarning",
+    "suppress_langchain_deprecation_warning",
+    "surface_langchain_deprecation_warnings",
+    "warn_deprecated",
+]
--- a/libs/core/langchain_core/_api/deprecation.py
+++ b/libs/core/langchain_core/_api/deprecation.py
@@ -0,0 +1,341 @@
+"""Helper functions for deprecating parts of the LangChain API.
+
+This module was adapted from matplotlibs _api/deprecation.py module:
+
+https://github.com/matplotlib/matplotlib/blob/main/lib/matplotlib/_api/deprecation.py
+
+.. warning::
+
+    This module is for internal use only.  Do not use it in your own code.
+    We may change the API at any time with no warning.
+"""
+
+import contextlib
+import functools
+import inspect
+import warnings
+from typing import Any, Callable, Generator, Type, TypeVar
+
+
+class LangChainDeprecationWarning(DeprecationWarning):
+    """A class for issuing deprecation warnings for LangChain users."""
+
+
+class LangChainPendingDeprecationWarning(PendingDeprecationWarning):
+    """A class for issuing deprecation warnings for LangChain users."""
+
+
+# PUBLIC API
+
+
+T = TypeVar("T", Type, Callable)
+
+
+def deprecated(
+    since: str,
+    *,
+    message: str = "",
+    name: str = "",
+    alternative: str = "",
+    pending: bool = False,
+    obj_type: str = "",
+    addendum: str = "",
+    removal: str = "",
+) -> Callable[[T], T]:
+    """Decorator to mark a function, a class, or a property as deprecated.
+
+    When deprecating a classmethod, a staticmethod, or a property, the
+    ``@deprecated`` decorator should go *under* ``@classmethod`` and
+    ``@staticmethod`` (i.e., `deprecated` should directly decorate the
+    underlying callable), but *over* ``@property``.
+
+    When deprecating a class ``C`` intended to be used as a base class in a
+    multiple inheritance hierarchy, ``C`` *must* define an ``__init__`` method
+    (if ``C`` instead inherited its ``__init__`` from its own base class, then
+    ``@deprecated`` would mess up ``__init__`` inheritance when installing its
+    own (deprecation-emitting) ``C.__init__``).
+
+    Parameters are the same as for `warn_deprecated`, except that *obj_type*
+    defaults to 'class' if decorating a class, 'attribute' if decorating a
+    property, and 'function' otherwise.
+
+    Arguments:
+        since : str
+            The release at which this API became deprecated.
+        message : str, optional
+            Override the default deprecation message. The %(since)s,
+            %(name)s, %(alternative)s, %(obj_type)s, %(addendum)s,
+            and %(removal)s format specifiers will be replaced by the
+            values of the respective arguments passed to this function.
+        name : str, optional
+            The name of the deprecated object.
+        alternative : str, optional
+            An alternative API that the user may use in place of the
+            deprecated API. The deprecation warning will tell the user
+            about this alternative if provided.
+        pending : bool, optional
+            If True, uses a PendingDeprecationWarning instead of a
+            DeprecationWarning. Cannot be used together with removal.
+        obj_type : str, optional
+            The object type being deprecated.
+        addendum : str, optional
+            Additional text appended directly to the final message.
+        removal : str, optional
+            The expected removal version. With the default (an empty
+            string), a removal version is automatically computed from
+            since. Set to other Falsy values to not schedule a removal
+            date. Cannot be used together with pending.
+
+    Examples
+    --------
+
+        .. code-block:: python
+
+            @deprecated('1.4.0')
+            def the_function_to_deprecate():
+                pass
+    """
+
+    def deprecate(
+        obj: T,
+        *,
+        _obj_type: str = obj_type,
+        _name: str = name,
+        _message: str = message,
+        _alternative: str = alternative,
+        _pending: bool = pending,
+        _addendum: str = addendum,
+    ) -> T:
+        """Implementation of the decorator returned by `deprecated`."""
+        if isinstance(obj, type):
+            if not _obj_type:
+                _obj_type = "class"
+            wrapped = obj.__init__  # type: ignore
+            _name = _name or obj.__name__
+            old_doc = obj.__doc__
+
+            def finalize(wrapper: Callable[..., Any], new_doc: str) -> T:
+                """Finalize the deprecation of a class."""
+                try:
+                    obj.__doc__ = new_doc
+                except AttributeError:  # Can't set on some extension objects.
+                    pass
+                obj.__init__ = functools.wraps(obj.__init__)(  # type: ignore[misc]
+                    wrapper
+                )
+                return obj
+
+        elif isinstance(obj, property):
+            if not _obj_type:
+                _obj_type = "attribute"
+            wrapped = None
+            _name = _name or obj.fget.__name__
+            old_doc = obj.__doc__
+
+            class _deprecated_property(type(obj)):  # type: ignore
+                """A deprecated property."""
+
+                def __get__(self, instance, owner=None):  # type: ignore
+                    if instance is not None or owner is not None:
+                        emit_warning()
+                    return super().__get__(instance, owner)
+
+                def __set__(self, instance, value):  # type: ignore
+                    if instance is not None:
+                        emit_warning()
+                    return super().__set__(instance, value)
+
+                def __delete__(self, instance):  # type: ignore
+                    if instance is not None:
+                        emit_warning()
+                    return super().__delete__(instance)
+
+                def __set_name__(self, owner, set_name):  # type: ignore
+                    nonlocal _name
+                    if _name == "<lambda>":
+                        _name = set_name
+
+            def finalize(_: Any, new_doc: str) -> Any:  # type: ignore
+                """Finalize the property."""
+                return _deprecated_property(
+                    fget=obj.fget, fset=obj.fset, fdel=obj.fdel, doc=new_doc
+                )
+
+        else:
+            if not _obj_type:
+                _obj_type = "function"
+            wrapped = obj
+            _name = _name or obj.__name__  # type: ignore
+            old_doc = wrapped.__doc__
+
+            def finalize(  # type: ignore
+                wrapper: Callable[..., Any], new_doc: str
+            ) -> T:
+                """Wrap the wrapped function using the wrapper and update the docstring.
+
+                Args:
+                    wrapper: The wrapper function.
+                    new_doc: The new docstring.
+
+                Returns:
+                    The wrapped function.
+                """
+                wrapper = functools.wraps(wrapped)(wrapper)
+                wrapper.__doc__ = new_doc
+                return wrapper
+
+        def emit_warning() -> None:
+            """Emit the warning."""
+            warn_deprecated(
+                since,
+                message=_message,
+                name=_name,
+                alternative=_alternative,
+                pending=_pending,
+                obj_type=_obj_type,
+                addendum=_addendum,
+                removal=removal,
+            )
+
+        def warning_emitting_wrapper(*args: Any, **kwargs: Any) -> Any:
+            """Wrapper for the original wrapped callable that emits a warning.
+
+            Args:
+                *args: The positional arguments to the function.
+                **kwargs: The keyword arguments to the function.
+
+            Returns:
+                The return value of the function being wrapped.
+            """
+            emit_warning()
+            return wrapped(*args, **kwargs)
+
+        old_doc = inspect.cleandoc(old_doc or "").strip("\n")
+
+        if not old_doc:
+            new_doc = "[*Deprecated*]"
+        else:
+            new_doc = f"[*Deprecated*]  {old_doc}"
+
+        # Modify the docstring to include a deprecation notice.
+        notes_header = "\nNotes\n-----"
+        components = [
+            message,
+            f"Use {alternative} instead." if alternative else "",
+            addendum,
+        ]
+        details = " ".join([component.strip() for component in components if component])
+        new_doc += (
+            f"[*Deprecated*] {old_doc}\n"
+            f"{notes_header if notes_header not in old_doc else ''}\n"
+            f".. deprecated:: {since}\n"
+            f"   {details}"
+        )
+
+        return finalize(warning_emitting_wrapper, new_doc)
+
+    return deprecate
+
+
+@contextlib.contextmanager
+def suppress_langchain_deprecation_warning() -> Generator[None, None, None]:
+    """Context manager to suppress LangChainDeprecationWarning."""
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore", LangChainDeprecationWarning)
+        warnings.simplefilter("ignore", LangChainPendingDeprecationWarning)
+        yield
+
+
+def warn_deprecated(
+    since: str,
+    *,
+    message: str = "",
+    name: str = "",
+    alternative: str = "",
+    pending: bool = False,
+    obj_type: str = "",
+    addendum: str = "",
+    removal: str = "",
+) -> None:
+    """Display a standardized deprecation.
+
+    Arguments:
+        since : str
+            The release at which this API became deprecated.
+        message : str, optional
+            Override the default deprecation message. The %(since)s,
+            %(name)s, %(alternative)s, %(obj_type)s, %(addendum)s,
+            and %(removal)s format specifiers will be replaced by the
+            values of the respective arguments passed to this function.
+        name : str, optional
+            The name of the deprecated object.
+        alternative : str, optional
+            An alternative API that the user may use in place of the
+            deprecated API. The deprecation warning will tell the user
+            about this alternative if provided.
+        pending : bool, optional
+            If True, uses a PendingDeprecationWarning instead of a
+            DeprecationWarning. Cannot be used together with removal.
+        obj_type : str, optional
+            The object type being deprecated.
+        addendum : str, optional
+            Additional text appended directly to the final message.
+        removal : str, optional
+            The expected removal version. With the default (an empty
+            string), a removal version is automatically computed from
+            since. Set to other Falsy values to not schedule a removal
+            date. Cannot be used together with pending.
+    """
+    if pending and removal:
+        raise ValueError("A pending deprecation cannot have a scheduled removal")
+
+    if not pending:
+        if not removal:
+            removal = f"in {removal}" if removal else "within ?? minor releases"
+            raise NotImplementedError(
+                f"Need to determine which default deprecation schedule to use. "
+                f"{removal}"
+            )
+        else:
+            removal = f"in {removal}"
+
+    if not message:
+        message = ""
+
+        if obj_type:
+            message += f"The {obj_type} `{name}`"
+        else:
+            message += f"`{name}`"
+
+        if pending:
+            message += " will be deprecated in a future version"
+        else:
+            message += f" was deprecated in LangChain {since}"
+
+            if removal:
+                message += f" and will be removed {removal}"
+
+        if alternative:
+            message += f". Use {alternative} instead."
+
+        if addendum:
+            message += f" {addendum}"
+
+    warning_cls = (
+        LangChainPendingDeprecationWarning if pending else LangChainDeprecationWarning
+    )
+    warning = warning_cls(message)
+    warnings.warn(warning, category=LangChainDeprecationWarning, stacklevel=2)
+
+
+def surface_langchain_deprecation_warnings() -> None:
+    """Unmute LangChain deprecation warnings."""
+    warnings.filterwarnings(
+        "default",
+        category=LangChainPendingDeprecationWarning,
+    )
+
+    warnings.filterwarnings(
+        "default",
+        category=LangChainDeprecationWarning,
+    )
--- a/libs/core/langchain_core/_api/path.py
+++ b/libs/core/langchain_core/_api/path.py
@@ -0,0 +1,36 @@
+import os
+from pathlib import Path
+from typing import Optional, Union
+
+HERE = Path(__file__).parent
+
+# Get directory of langchain package
+PACKAGE_DIR = HERE.parent
+SEPARATOR = os.sep
+
+
+def get_relative_path(
+    file: Union[Path, str], *, relative_to: Path = PACKAGE_DIR
+) -> str:
+    """Get the path of the file as a relative path to the package directory."""
+    if isinstance(file, str):
+        file = Path(file)
+    return str(file.relative_to(relative_to))
+
+
+def as_import_path(
+    file: Union[Path, str],
+    *,
+    suffix: Optional[str] = None,
+    relative_to: Path = PACKAGE_DIR,
+) -> str:
+    """Path of the file as a LangChain import exclude langchain top namespace."""
+    if isinstance(file, str):
+        file = Path(file)
+    path = get_relative_path(file, relative_to=relative_to)
+    if file.is_file():
+        path = path[: -len(file.suffix)]
+    import_path = path.replace(SEPARATOR, ".")
+    if suffix:
+        import_path += "." + suffix
+    return import_path
--- a/libs/core/langchain_core/agents.py
+++ b/libs/core/langchain_core/agents.py
@@ -0,0 +1,74 @@
+from __future__ import annotations
+
+from typing import Any, Literal, Sequence, Union
+
+from langchain_core.load.serializable import Serializable
+from langchain_core.messages import BaseMessage
+
+
+class AgentAction(Serializable):
+    """A full description of an action for an ActionAgent to execute."""
+
+    tool: str
+    """The name of the Tool to execute."""
+    tool_input: Union[str, dict]
+    """The input to pass in to the Tool."""
+    log: str
+    """Additional information to log about the action.
+    This log can be used in a few ways. First, it can be used to audit
+    what exactly the LLM predicted to lead to this (tool, tool_input).
+    Second, it can be used in future iterations to show the LLMs prior
+    thoughts. This is useful when (tool, tool_input) does not contain
+    full information about the LLM prediction (for example, any `thought`
+    before the tool/tool_input)."""
+    type: Literal["AgentAction"] = "AgentAction"
+
+    def __init__(
+        self, tool: str, tool_input: Union[str, dict], log: str, **kwargs: Any
+    ):
+        """Override init to support instantiation by position for backward compat."""
+        super().__init__(tool=tool, tool_input=tool_input, log=log, **kwargs)
+
+    @classmethod
+    def is_lc_serializable(cls) -> bool:
+        """Return whether or not the class is serializable."""
+        return True
+
+
+class AgentActionMessageLog(AgentAction):
+    message_log: Sequence[BaseMessage]
+    """Similar to log, this can be used to pass along extra
+    information about what exact messages were predicted by the LLM
+    before parsing out the (tool, tool_input). This is again useful
+    if (tool, tool_input) cannot be used to fully recreate the LLM
+    prediction, and you need that LLM prediction (for future agent iteration).
+    Compared to `log`, this is useful when the underlying LLM is a
+    ChatModel (and therefore returns messages rather than a string)."""
+    # Ignoring type because we're overriding the type from AgentAction.
+    # And this is the correct thing to do in this case.
+    # The type literal is used for serialization purposes.
+    type: Literal["AgentActionMessageLog"] = "AgentActionMessageLog"  # type: ignore
+
+
+class AgentFinish(Serializable):
+    """The final return value of an ActionAgent."""
+
+    return_values: dict
+    """Dictionary of return values."""
+    log: str
+    """Additional information to log about the return value.
+    This is used to pass along the full LLM prediction, not just the parsed out
+    return value. For example, if the full LLM prediction was
+    `Final Answer: 2` you may want to just return `2` as a return value, but pass
+    along the full string as a `log` (for debugging or observability purposes).
+    """
+    type: Literal["AgentFinish"] = "AgentFinish"
+
+    def __init__(self, return_values: dict, log: str, **kwargs: Any):
+        """Override init to support instantiation by position for backward compat."""
+        super().__init__(return_values=return_values, log=log, **kwargs)
+
+    @classmethod
+    def is_lc_serializable(cls) -> bool:
+        """Return whether or not the class is serializable."""
+        return True
--- a/libs/core/langchain_core/caches.py
+++ b/libs/core/langchain_core/caches.py
@@ -0,0 +1,24 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from typing import Any, Optional, Sequence
+
+from langchain_core.outputs import Generation
+
+RETURN_VAL_TYPE = Sequence[Generation]
+
+
+class BaseCache(ABC):
+    """Base interface for cache."""
+
+    @abstractmethod
+    def lookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYPE]:
+        """Look up based on prompt and llm_string."""
+
+    @abstractmethod
+    def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> None:
+        """Update cache based on prompt and llm_string."""
+
+    @abstractmethod
+    def clear(self, **kwargs: Any) -> None:
+        """Clear cache that can take additional keyword arguments."""
--- a/libs/core/langchain_core/callbacks/init.py
+++ b/libs/core/langchain_core/callbacks/init.py
@@ -0,0 +1,65 @@
+from langchain_core.callbacks.base import (
+    AsyncCallbackHandler,
+    BaseCallbackHandler,
+    BaseCallbackManager,
+    CallbackManagerMixin,
+    Callbacks,
+    ChainManagerMixin,
+    LLMManagerMixin,
+    RetrieverManagerMixin,
+    RunManagerMixin,
+    ToolManagerMixin,
+)
+from langchain_core.callbacks.manager import (
+    AsyncCallbackManager,
+    AsyncCallbackManagerForChainGroup,
+    AsyncCallbackManagerForChainRun,
+    AsyncCallbackManagerForLLMRun,
+    AsyncCallbackManagerForRetrieverRun,
+    AsyncCallbackManagerForToolRun,
+    AsyncParentRunManager,
+    AsyncRunManager,
+    BaseRunManager,
+    CallbackManager,
+    CallbackManagerForChainGroup,
+    CallbackManagerForChainRun,
+    CallbackManagerForLLMRun,
+    CallbackManagerForRetrieverRun,
+    CallbackManagerForToolRun,
+    ParentRunManager,
+    RunManager,
+)
+from langchain_core.callbacks.stdout import StdOutCallbackHandler
+from langchain_core.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
+
+__all__ = [
+    "RetrieverManagerMixin",
+    "LLMManagerMixin",
+    "ChainManagerMixin",
+    "ToolManagerMixin",
+    "Callbacks",
+    "CallbackManagerMixin",
+    "RunManagerMixin",
+    "BaseCallbackHandler",
+    "AsyncCallbackHandler",
+    "BaseCallbackManager",
+    "BaseRunManager",
+    "RunManager",
+    "ParentRunManager",
+    "AsyncRunManager",
+    "AsyncParentRunManager",
+    "CallbackManagerForLLMRun",
+    "AsyncCallbackManagerForLLMRun",
+    "CallbackManagerForChainRun",
+    "AsyncCallbackManagerForChainRun",
+    "CallbackManagerForToolRun",
+    "AsyncCallbackManagerForToolRun",
+    "CallbackManagerForRetrieverRun",
+    "AsyncCallbackManagerForRetrieverRun",
+    "CallbackManager",
+    "CallbackManagerForChainGroup",
+    "AsyncCallbackManager",
+    "AsyncCallbackManagerForChainGroup",
+    "StdOutCallbackHandler",
+    "StreamingStdOutCallbackHandler",
+]
--- a/libs/core/langchain_core/callbacks/base.py
+++ b/libs/core/langchain_core/callbacks/base.py
@@ -0,0 +1,599 @@
+"""Base callback handler that can be used to handle callbacks in langchain."""
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, TypeVar, Union
+from uuid import UUID
+
+from tenacity import RetryCallState
+
+if TYPE_CHECKING:
+    from langchain_core.agents import AgentAction, AgentFinish
+    from langchain_core.documents import Document
+    from langchain_core.messages import BaseMessage
+    from langchain_core.outputs import ChatGenerationChunk, GenerationChunk, LLMResult
+
+
+class RetrieverManagerMixin:
+    """Mixin for Retriever callbacks."""
+
+    def on_retriever_error(
+        self,
+        error: BaseException,
+        *,
+        run_id: UUID,
+        parent_run_id: Optional[UUID] = None,
+        **kwargs: Any,
+    ) -> Any:
+        """Run when Retriever errors."""
+
+    def on_retriever_end(
+        self,
+        documents: Sequence[Document],
+        *,
+        run_id: UUID,
+        parent_run_id: Optional[UUID] = None,
+        **kwargs: Any,
+    ) -> Any:
+        """Run when Retriever ends running."""
+
+
+class LLMManagerMixin:
+    """Mixin for LLM callbacks."""
+
+    def on_llm_new_token(
+        self,
+        token: str,
+        *,
+        chunk: Optional[Union[GenerationChunk, ChatGenerationChunk]] = None,
+        run_id: UUID,
+        parent_run_id: Optional[UUID] = None,
+        **kwargs: Any,
+    ) -> Any:
+        """Run on new LLM token. Only available when streaming is enabled.
+
+        Args:
+            token (str): The new token.
+            chunk (GenerationChunk | ChatGenerationChunk): The new generated chunk,
+            containing content and other information.
+        """
+
+    def on_llm_end(
+        self,
+        response: LLMResult,
+        *,
+        run_id: UUID,
+        parent_run_id: Optional[UUID] = None,
+        **kwargs: Any,
+    ) -> Any:
+        """Run when LLM ends running."""
+
+    def on_llm_error(
+        self,
+        error: BaseException,
+        *,
+        run_id: UUID,
+        parent_run_id: Optional[UUID] = None,
+        **kwargs: Any,
+    ) -> Any:
+        """Run when LLM errors."""
+
+
+class ChainManagerMixin:
+    """Mixin for chain callbacks."""
+
+    def on_chain_end(
+        self,
+        outputs: Dict[str, Any],
+        *,
+        run_id: UUID,
+        parent_run_id: Optional[UUID] = None,
+        **kwargs: Any,
+    ) -> Any:
+        """Run when chain ends running."""
+
+    def on_chain_error(
+        self,
+        error: BaseException,
+        *,
+        run_id: UUID,
+        parent_run_id: Optional[UUID] = None,
+        **kwargs: Any,
+    ) -> Any:
+        """Run when chain errors."""
+
+    def on_agent_action(
+        self,
+        action: AgentAction,
+        *,
+        run_id: UUID,
+        parent_run_id: Optional[UUID] = None,
+        **kwargs: Any,
+    ) -> Any:
+        """Run on agent action."""
+
+    def on_agent_finish(
+        self,
+        finish: AgentFinish,
+        *,
+        run_id: UUID,
+        parent_run_id: Optional[UUID] = None,
+        **kwargs: Any,
+    ) -> Any:
+        """Run on agent end."""
+
+
+class ToolManagerMixin:
+    """Mixin for tool callbacks."""
+
+    def on_tool_end(
+        self,
+        output: str,
+        *,
+        run_id: UUID,
+        parent_run_id: Optional[UUID] = None,
+        **kwargs: Any,
+    ) -> Any:
+        """Run when tool ends running."""
+
+    def on_tool_error(
+        self,
+        error: BaseException,
+        *,
+        run_id: UUID,
+        parent_run_id: Optional[UUID] = None,
+        **kwargs: Any,
+    ) -> Any:
+        """Run when tool errors."""
+
+
+class CallbackManagerMixin:
+    """Mixin for callback manager."""
+
+    def on_llm_start(
+        self,
+        serialized: Dict[str, Any],
+        prompts: List[str],
+        *,
+        run_id: UUID,
+        parent_run_id: Optional[UUID] = None,
+        tags: Optional[List[str]] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> Any:
+        """Run when LLM starts running."""
+
+    def on_chat_model_start(
+        self,
+        serialized: Dict[str, Any],
+        messages: List[List[BaseMessage]],
+        *,
+        run_id: UUID,
+        parent_run_id: Optional[UUID] = None,
+        tags: Optional[List[str]] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> Any:
+        """Run when a chat model starts running."""
+        raise NotImplementedError(
+            f"{self.__class__.__name__} does not implement `on_chat_model_start`"
+        )
+
+    def on_retriever_start(
+        self,
+        serialized: Dict[str, Any],
+        query: str,
+        *,
+        run_id: UUID,
+        parent_run_id: Optional[UUID] = None,
+        tags: Optional[List[str]] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> Any:
+        """Run when Retriever starts running."""
+
+    def on_chain_start(
+        self,
+        serialized: Dict[str, Any],
+        inputs: Dict[str, Any],
+        *,
+        run_id: UUID,
+        parent_run_id: Optional[UUID] = None,
+        tags: Optional[List[str]] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> Any:
+        """Run when chain starts running."""
+
+    def on_tool_start(
+        self,
+        serialized: Dict[str, Any],
+        input_str: str,
+        *,
+        run_id: UUID,
+        parent_run_id: Optional[UUID] = None,
+        tags: Optional[List[str]] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> Any:
+        """Run when tool starts running."""
+
+
+class RunManagerMixin:
+    """Mixin for run manager."""
+
+    def on_text(
+        self,
+        text: str,
+        *,
+        run_id: UUID,
+        parent_run_id: Optional[UUID] = None,
+        **kwargs: Any,
+    ) -> Any:
+        """Run on arbitrary text."""
+
+    def on_retry(
+        self,
+        retry_state: RetryCallState,
+        *,
+        run_id: UUID,
+        parent_run_id: Optional[UUID] = None,
+        **kwargs: Any,
+    ) -> Any:
+        """Run on a retry event."""
+
+
+class BaseCallbackHandler(
+    LLMManagerMixin,
+    ChainManagerMixin,
+    ToolManagerMixin,
+    RetrieverManagerMixin,
+    CallbackManagerMixin,
+    RunManagerMixin,
+):
+    """Base callback handler that handles callbacks from LangChain."""
+
+    raise_error: bool = False
+
+    run_inline: bool = False
+
+    @property
+    def ignore_llm(self) -> bool:
+        """Whether to ignore LLM callbacks."""
+        return False
+
+    @property
+    def ignore_retry(self) -> bool:
+        """Whether to ignore retry callbacks."""
+        return False
+
+    @property
+    def ignore_chain(self) -> bool:
+        """Whether to ignore chain callbacks."""
+        return False
+
+    @property
+    def ignore_agent(self) -> bool:
+        """Whether to ignore agent callbacks."""
+        return False
+
+    @property
+    def ignore_retriever(self) -> bool:
+        """Whether to ignore retriever callbacks."""
+        return False
+
+    @property
+    def ignore_chat_model(self) -> bool:
+        """Whether to ignore chat model callbacks."""
+        return False
+
+
+class AsyncCallbackHandler(BaseCallbackHandler):
+    """Async callback handler that handles callbacks from LangChain."""
+
+    async def on_llm_start(
+        self,
+        serialized: Dict[str, Any],
+        prompts: List[str],
+        *,
+        run_id: UUID,
+        parent_run_id: Optional[UUID] = None,
+        tags: Optional[List[str]] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> None:
+        """Run when LLM starts running."""
+
+    async def on_chat_model_start(
+        self,
+        serialized: Dict[str, Any],
+        messages: List[List[BaseMessage]],
+        *,
+        run_id: UUID,
+        parent_run_id: Optional[UUID] = None,
+        tags: Optional[List[str]] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> Any:
+        """Run when a chat model starts running."""
+        raise NotImplementedError(
+            f"{self.__class__.__name__} does not implement `on_chat_model_start`"
+        )
+
+    async def on_llm_new_token(
+        self,
+        token: str,
+        *,
+        chunk: Optional[Union[GenerationChunk, ChatGenerationChunk]] = None,
+        run_id: UUID,
+        parent_run_id: Optional[UUID] = None,
+        tags: Optional[List[str]] = None,
+        **kwargs: Any,
+    ) -> None:
+        """Run on new LLM token. Only available when streaming is enabled."""
+
+    async def on_llm_end(
+        self,
+        response: LLMResult,
+        *,
+        run_id: UUID,
+        parent_run_id: Optional[UUID] = None,
+        tags: Optional[List[str]] = None,
+        **kwargs: Any,
+    ) -> None:
+        """Run when LLM ends running."""
+
+    async def on_llm_error(
+        self,
+        error: BaseException,
+        *,
+        run_id: UUID,
+        parent_run_id: Optional[UUID] = None,
+        tags: Optional[List[str]] = None,
+        **kwargs: Any,
+    ) -> None:
+        """Run when LLM errors."""
+
+    async def on_chain_start(
+        self,
+        serialized: Dict[str, Any],
+        inputs: Dict[str, Any],
+        *,
+        run_id: UUID,
+        parent_run_id: Optional[UUID] = None,
+        tags: Optional[List[str]] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> None:
+        """Run when chain starts running."""
+
+    async def on_chain_end(
+        self,
+        outputs: Dict[str, Any],
+        *,
+        run_id: UUID,
+        parent_run_id: Optional[UUID] = None,
+        tags: Optional[List[str]] = None,
+        **kwargs: Any,
+    ) -> None:
+        """Run when chain ends running."""
+
+    async def on_chain_error(
+        self,
+        error: BaseException,
+        *,
+        run_id: UUID,
+        parent_run_id: Optional[UUID] = None,
+        tags: Optional[List[str]] = None,
+        **kwargs: Any,
+    ) -> None:
+        """Run when chain errors."""
+
+    async def on_tool_start(
+        self,
+        serialized: Dict[str, Any],
+        input_str: str,
+        *,
+        run_id: UUID,
+        parent_run_id: Optional[UUID] = None,
+        tags: Optional[List[str]] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> None:
+        """Run when tool starts running."""
+
+    async def on_tool_end(
+        self,
+        output: str,
+        *,
+        run_id: UUID,
+        parent_run_id: Optional[UUID] = None,
+        tags: Optional[List[str]] = None,
+        **kwargs: Any,
+    ) -> None:
+        """Run when tool ends running."""
+
+    async def on_tool_error(
+        self,
+        error: BaseException,
+        *,
+        run_id: UUID,
+        parent_run_id: Optional[UUID] = None,
+        tags: Optional[List[str]] = None,
+        **kwargs: Any,
+    ) -> None:
+        """Run when tool errors."""
+
+    async def on_text(
+        self,
+        text: str,
+        *,
+        run_id: UUID,
+        parent_run_id: Optional[UUID] = None,
+        tags: Optional[List[str]] = None,
+        **kwargs: Any,
+    ) -> None:
+        """Run on arbitrary text."""
+
+    async def on_retry(
+        self,
+        retry_state: RetryCallState,
+        *,
+        run_id: UUID,
+        parent_run_id: Optional[UUID] = None,
+        **kwargs: Any,
+    ) -> Any:
+        """Run on a retry event."""
+
+    async def on_agent_action(
+        self,
+        action: AgentAction,
+        *,
+        run_id: UUID,
+        parent_run_id: Optional[UUID] = None,
+        tags: Optional[List[str]] = None,
+        **kwargs: Any,
+    ) -> None:
+        """Run on agent action."""
+
+    async def on_agent_finish(
+        self,
+        finish: AgentFinish,
+        *,
+        run_id: UUID,
+        parent_run_id: Optional[UUID] = None,
+        tags: Optional[List[str]] = None,
+        **kwargs: Any,
+    ) -> None:
+        """Run on agent end."""
+
+    async def on_retriever_start(
+        self,
+        serialized: Dict[str, Any],
+        query: str,
+        *,
+        run_id: UUID,
+        parent_run_id: Optional[UUID] = None,
+        tags: Optional[List[str]] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> None:
+        """Run on retriever start."""
+
+    async def on_retriever_end(
+        self,
+        documents: Sequence[Document],
+        *,
+        run_id: UUID,
+        parent_run_id: Optional[UUID] = None,
+        tags: Optional[List[str]] = None,
+        **kwargs: Any,
+    ) -> None:
+        """Run on retriever end."""
+
+    async def on_retriever_error(
+        self,
+        error: BaseException,
+        *,
+        run_id: UUID,
+        parent_run_id: Optional[UUID] = None,
+        tags: Optional[List[str]] = None,
+        **kwargs: Any,
+    ) -> None:
+        """Run on retriever error."""
+
+
+T = TypeVar("T", bound="BaseCallbackManager")
+
+
+class BaseCallbackManager(CallbackManagerMixin):
+    """Base callback manager that handles callbacks from LangChain."""
+
+    def __init__(
+        self,
+        handlers: List[BaseCallbackHandler],
+        inheritable_handlers: Optional[List[BaseCallbackHandler]] = None,
+        parent_run_id: Optional[UUID] = None,
+        *,
+        tags: Optional[List[str]] = None,
+        inheritable_tags: Optional[List[str]] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+        inheritable_metadata: Optional[Dict[str, Any]] = None,
+    ) -> None:
+        """Initialize callback manager."""
+        self.handlers: List[BaseCallbackHandler] = handlers
+        self.inheritable_handlers: List[BaseCallbackHandler] = (
+            inheritable_handlers or []
+        )
+        self.parent_run_id: Optional[UUID] = parent_run_id
+        self.tags = tags or []
+        self.inheritable_tags = inheritable_tags or []
+        self.metadata = metadata or {}
+        self.inheritable_metadata = inheritable_metadata or {}
+
+    def copy(self: T) -> T:
+        """Copy the callback manager."""
+        return self.__class__(
+            handlers=self.handlers,
+            inheritable_handlers=self.inheritable_handlers,
+            parent_run_id=self.parent_run_id,
+            tags=self.tags,
+            inheritable_tags=self.inheritable_tags,
+            metadata=self.metadata,
+            inheritable_metadata=self.inheritable_metadata,
+        )
+
+    @property
+    def is_async(self) -> bool:
+        """Whether the callback manager is async."""
+        return False
+
+    def add_handler(self, handler: BaseCallbackHandler, inherit: bool = True) -> None:
+        """Add a handler to the callback manager."""
+        if handler not in self.handlers:
+            self.handlers.append(handler)
+        if inherit and handler not in self.inheritable_handlers:
+            self.inheritable_handlers.append(handler)
+
+    def remove_handler(self, handler: BaseCallbackHandler) -> None:
+        """Remove a handler from the callback manager."""
+        self.handlers.remove(handler)
+        self.inheritable_handlers.remove(handler)
+
+    def set_handlers(
+        self, handlers: List[BaseCallbackHandler], inherit: bool = True
+    ) -> None:
+        """Set handlers as the only handlers on the callback manager."""
+        self.handlers = []
+        self.inheritable_handlers = []
+        for handler in handlers:
+            self.add_handler(handler, inherit=inherit)
+
+    def set_handler(self, handler: BaseCallbackHandler, inherit: bool = True) -> None:
+        """Set handler as the only handler on the callback manager."""
+        self.set_handlers([handler], inherit=inherit)
+
+    def add_tags(self, tags: List[str], inherit: bool = True) -> None:
+        for tag in tags:
+            if tag in self.tags:
+                self.remove_tags([tag])
+        self.tags.extend(tags)
+        if inherit:
+            self.inheritable_tags.extend(tags)
+
+    def remove_tags(self, tags: List[str]) -> None:
+        for tag in tags:
+            self.tags.remove(tag)
+            self.inheritable_tags.remove(tag)
+
+    def add_metadata(self, metadata: Dict[str, Any], inherit: bool = True) -> None:
+        self.metadata.update(metadata)
+        if inherit:
+            self.inheritable_metadata.update(metadata)
+
+    def remove_metadata(self, keys: List[str]) -> None:
+        for key in keys:
+            self.metadata.pop(key)
+            self.inheritable_metadata.pop(key)
+
+
+Callbacks = Optional[Union[List[BaseCallbackHandler], BaseCallbackManager]]
--- a/libs/core/langchain_core/callbacks/manager.py
+++ b/libs/core/langchain_core/callbacks/manager.py
--- a/libs/core/langchain_core/callbacks/stdout.py
+++ b/libs/core/langchain_core/callbacks/stdout.py
@@ -0,0 +1,102 @@
+"""Callback Handler that prints to std out."""
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, Dict, List, Optional
+
+from langchain_core.callbacks.base import BaseCallbackHandler
+from langchain_core.utils import print_text
+
+if TYPE_CHECKING:
+    from langchain_core.agents import AgentAction, AgentFinish
+    from langchain_core.outputs import LLMResult
+
+
+class StdOutCallbackHandler(BaseCallbackHandler):
+    """Callback Handler that prints to std out."""
+
+    def __init__(self, color: Optional[str] = None) -> None:
+        """Initialize callback handler."""
+        self.color = color
+
+    def on_llm_start(
+        self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
+    ) -> None:
+        """Print out the prompts."""
+        pass
+
+    def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
+        """Do nothing."""
+        pass
+
+    def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
+        """Do nothing."""
+        pass
+
+    def on_llm_error(self, error: BaseException, **kwargs: Any) -> None:
+        """Do nothing."""
+        pass
+
+    def on_chain_start(
+        self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any
+    ) -> None:
+        """Print out that we are entering a chain."""
+        class_name = serialized.get("name", serialized.get("id", ["<unknown>"])[-1])
+        print(f"\n\n\033[1m> Entering new {class_name} chain...\033[0m")
+
+    def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> None:
+        """Print out that we finished a chain."""
+        print("\n\033[1m> Finished chain.\033[0m")
+
+    def on_chain_error(self, error: BaseException, **kwargs: Any) -> None:
+        """Do nothing."""
+        pass
+
+    def on_tool_start(
+        self,
+        serialized: Dict[str, Any],
+        input_str: str,
+        **kwargs: Any,
+    ) -> None:
+        """Do nothing."""
+        pass
+
+    def on_agent_action(
+        self, action: AgentAction, color: Optional[str] = None, **kwargs: Any
+    ) -> Any:
+        """Run on agent action."""
+        print_text(action.log, color=color or self.color)
+
+    def on_tool_end(
+        self,
+        output: str,
+        color: Optional[str] = None,
+        observation_prefix: Optional[str] = None,
+        llm_prefix: Optional[str] = None,
+        **kwargs: Any,
+    ) -> None:
+        """If not the final action, print out observation."""
+        if observation_prefix is not None:
+            print_text(f"\n{observation_prefix}")
+        print_text(output, color=color or self.color)
+        if llm_prefix is not None:
+            print_text(f"\n{llm_prefix}")
+
+    def on_tool_error(self, error: BaseException, **kwargs: Any) -> None:
+        """Do nothing."""
+        pass
+
+    def on_text(
+        self,
+        text: str,
+        color: Optional[str] = None,
+        end: str = "",
+        **kwargs: Any,
+    ) -> None:
+        """Run when agent ends."""
+        print_text(text, color=color or self.color, end=end)
+
+    def on_agent_finish(
+        self, finish: AgentFinish, color: Optional[str] = None, **kwargs: Any
+    ) -> None:
+        """Run on agent end."""
+        print_text(finish.log, color=color or self.color, end="\n")
--- a/libs/core/langchain_core/callbacks/streaming_stdout.py
+++ b/libs/core/langchain_core/callbacks/streaming_stdout.py
@@ -0,0 +1,72 @@
+"""Callback Handler streams to stdout on new llm token."""
+from __future__ import annotations
+
+import sys
+from typing import TYPE_CHECKING, Any, Dict, List
+
+from langchain_core.callbacks.base import BaseCallbackHandler
+
+if TYPE_CHECKING:
+    from langchain_core.agents import AgentAction, AgentFinish
+    from langchain_core.messages import BaseMessage
+    from langchain_core.outputs import LLMResult
+
+
+class StreamingStdOutCallbackHandler(BaseCallbackHandler):
+    """Callback handler for streaming. Only works with LLMs that support streaming."""
+
+    def on_llm_start(
+        self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
+    ) -> None:
+        """Run when LLM starts running."""
+
+    def on_chat_model_start(
+        self,
+        serialized: Dict[str, Any],
+        messages: List[List[BaseMessage]],
+        **kwargs: Any,
+    ) -> None:
+        """Run when LLM starts running."""
+
+    def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
+        """Run on new LLM token. Only available when streaming is enabled."""
+        sys.stdout.write(token)
+        sys.stdout.flush()
+
+    def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
+        """Run when LLM ends running."""
+
+    def on_llm_error(self, error: BaseException, **kwargs: Any) -> None:
+        """Run when LLM errors."""
+
+    def on_chain_start(
+        self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any
+    ) -> None:
+        """Run when chain starts running."""
+
+    def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> None:
+        """Run when chain ends running."""
+
+    def on_chain_error(self, error: BaseException, **kwargs: Any) -> None:
+        """Run when chain errors."""
+
+    def on_tool_start(
+        self, serialized: Dict[str, Any], input_str: str, **kwargs: Any
+    ) -> None:
+        """Run when tool starts running."""
+
+    def on_agent_action(self, action: AgentAction, **kwargs: Any) -> Any:
+        """Run on agent action."""
+        pass
+
+    def on_tool_end(self, output: str, **kwargs: Any) -> None:
+        """Run when tool ends running."""
+
+    def on_tool_error(self, error: BaseException, **kwargs: Any) -> None:
+        """Run when tool errors."""
+
+    def on_text(self, text: str, **kwargs: Any) -> None:
+        """Run on arbitrary text."""
+
+    def on_agent_finish(self, finish: AgentFinish, **kwargs: Any) -> None:
+        """Run on agent end."""
--- a/libs/core/langchain_core/chat_history.py
+++ b/libs/core/langchain_core/chat_history.py
@@ -0,0 +1,67 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from typing import List
+
+from langchain_core.messages import AIMessage, BaseMessage, HumanMessage
+
+
+class BaseChatMessageHistory(ABC):
+    """Abstract base class for storing chat message history.
+
+    See `ChatMessageHistory` for default implementation.
+
+    Example:
+        .. code-block:: python
+
+            class FileChatMessageHistory(BaseChatMessageHistory):
+                storage_path:  str
+                session_id: str
+
+               @property
+               def messages(self):
+                   with open(os.path.join(storage_path, session_id), 'r:utf-8') as f:
+                       messages = json.loads(f.read())
+                    return messages_from_dict(messages)
+
+               def add_message(self, message: BaseMessage) -> None:
+                   messages = self.messages.append(_message_to_dict(message))
+                   with open(os.path.join(storage_path, session_id), 'w') as f:
+                       json.dump(f, messages)
+
+               def clear(self):
+                   with open(os.path.join(storage_path, session_id), 'w') as f:
+                       f.write("[]")
+    """
+
+    messages: List[BaseMessage]
+    """A list of Messages stored in-memory."""
+
+    def add_user_message(self, message: str) -> None:
+        """Convenience method for adding a human message string to the store.
+
+        Args:
+            message: The string contents of a human message.
+        """
+        self.add_message(HumanMessage(content=message))
+
+    def add_ai_message(self, message: str) -> None:
+        """Convenience method for adding an AI message string to the store.
+
+        Args:
+            message: The string contents of an AI message.
+        """
+        self.add_message(AIMessage(content=message))
+
+    @abstractmethod
+    def add_message(self, message: BaseMessage) -> None:
+        """Add a Message object to the store.
+
+        Args:
+            message: A BaseMessage object to store.
+        """
+        raise NotImplementedError()
+
+    @abstractmethod
+    def clear(self) -> None:
+        """Remove all messages from the store"""
--- a/libs/core/langchain_core/chat_sessions.py
+++ b/libs/core/langchain_core/chat_sessions.py
@@ -0,0 +1,13 @@
+from typing import Sequence, TypedDict
+
+from langchain_core.messages import BaseMessage
+
+
+class ChatSession(TypedDict, total=False):
+    """Chat Session represents a single
+    conversation, channel, or other group of messages."""
+
+    messages: Sequence[BaseMessage]
+    """The LangChain chat messages loaded from the source."""
+    functions: Sequence[dict]
+    """The function calling specs for the messages."""
--- a/libs/core/langchain_core/documents/init.py
+++ b/libs/core/langchain_core/documents/init.py
@@ -0,0 +1,4 @@
+from langchain_core.documents.base import Document
+from langchain_core.documents.transformers import BaseDocumentTransformer
+
+__all__ = ["Document", "BaseDocumentTransformer"]
--- a/libs/core/langchain_core/documents/base.py
+++ b/libs/core/langchain_core/documents/base.py
@@ -0,0 +1,23 @@
+from __future__ import annotations
+
+from typing import Literal
+
+from langchain_core.load.serializable import Serializable
+from langchain_core.pydantic_v1 import Field
+
+
+class Document(Serializable):
+    """Class for storing a piece of text and associated metadata."""
+
+    page_content: str
+    """String text."""
+    metadata: dict = Field(default_factory=dict)
+    """Arbitrary metadata about the page content (e.g., source, relationships to other
+        documents, etc.).
+    """
+    type: Literal["Document"] = "Document"
+
+    @classmethod
+    def is_lc_serializable(cls) -> bool:
+        """Return whether this class is serializable."""
+        return True
--- a/libs/core/langchain_core/documents/transformers.py
+++ b/libs/core/langchain_core/documents/transformers.py
@@ -0,0 +1,74 @@
+from __future__ import annotations
+
+import asyncio
+from abc import ABC, abstractmethod
+from functools import partial
+from typing import TYPE_CHECKING, Any, Sequence
+
+if TYPE_CHECKING:
+    from langchain_core.documents import Document
+
+
+class BaseDocumentTransformer(ABC):
+    """Abstract base class for document transformation systems.
+
+    A document transformation system takes a sequence of Documents and returns a
+    sequence of transformed Documents.
+
+    Example:
+        .. code-block:: python
+
+            class EmbeddingsRedundantFilter(BaseDocumentTransformer, BaseModel):
+                embeddings: Embeddings
+                similarity_fn: Callable = cosine_similarity
+                similarity_threshold: float = 0.95
+
+                class Config:
+                    arbitrary_types_allowed = True
+
+                def transform_documents(
+                    self, documents: Sequence[Document], **kwargs: Any
+                ) -> Sequence[Document]:
+                    stateful_documents = get_stateful_documents(documents)
+                    embedded_documents = _get_embeddings_from_stateful_docs(
+                        self.embeddings, stateful_documents
+                    )
+                    included_idxs = _filter_similar_embeddings(
+                        embedded_documents, self.similarity_fn, self.similarity_threshold
+                    )
+                    return [stateful_documents[i] for i in sorted(included_idxs)]
+
+                async def atransform_documents(
+                    self, documents: Sequence[Document], **kwargs: Any
+                ) -> Sequence[Document]:
+                    raise NotImplementedError
+
+    """  # noqa: E501
+
+    @abstractmethod
+    def transform_documents(
+        self, documents: Sequence[Document], **kwargs: Any
+    ) -> Sequence[Document]:
+        """Transform a list of documents.
+
+        Args:
+            documents: A sequence of Documents to be transformed.
+
+        Returns:
+            A list of transformed Documents.
+        """
+
+    async def atransform_documents(
+        self, documents: Sequence[Document], **kwargs: Any
+    ) -> Sequence[Document]:
+        """Asynchronously transform a list of documents.
+
+        Args:
+            documents: A sequence of Documents to be transformed.
+
+        Returns:
+            A list of transformed Documents.
+        """
+        return await asyncio.get_running_loop().run_in_executor(
+            None, partial(self.transform_documents, **kwargs), documents
+        )
--- a/libs/core/langchain_core/embeddings.py
+++ b/libs/core/langchain_core/embeddings.py
@@ -0,0 +1,27 @@
+import asyncio
+from abc import ABC, abstractmethod
+from typing import List
+
+
+class Embeddings(ABC):
+    """Interface for embedding models."""
+
+    @abstractmethod
+    def embed_documents(self, texts: List[str]) -> List[List[float]]:
+        """Embed search docs."""
+
+    @abstractmethod
+    def embed_query(self, text: str) -> List[float]:
+        """Embed query text."""
+
+    async def aembed_documents(self, texts: List[str]) -> List[List[float]]:
+        """Asynchronous Embed search docs."""
+        return await asyncio.get_running_loop().run_in_executor(
+            None, self.embed_documents, texts
+        )
+
+    async def aembed_query(self, text: str) -> List[float]:
+        """Asynchronous Embed query text."""
+        return await asyncio.get_running_loop().run_in_executor(
+            None, self.embed_query, text
+        )
--- a/libs/core/langchain_core/env.py
+++ b/libs/core/langchain_core/env.py
@@ -0,0 +1,17 @@
+import platform
+from functools import lru_cache
+
+
+@lru_cache(maxsize=1)
+def get_runtime_environment() -> dict:
+    """Get information about the LangChain runtime environment."""
+    # Lazy import to avoid circular imports
+    from langchain_core import __version__
+
+    return {
+        "library_version": __version__,
+        "library": "langchain",
+        "platform": platform.platform(),
+        "runtime": "python",
+        "runtime_version": platform.python_version(),
+    }
--- a/libs/core/langchain_core/example_selectors/init.py
+++ b/libs/core/langchain_core/example_selectors/init.py
@@ -0,0 +1,18 @@
+"""Logic for selecting examples to include in prompts."""
+from langchain_core.example_selectors.base import BaseExampleSelector
+from langchain_core.example_selectors.length_based import (
+    LengthBasedExampleSelector,
+)
+from langchain_core.example_selectors.semantic_similarity import (
+    MaxMarginalRelevanceExampleSelector,
+    SemanticSimilarityExampleSelector,
+    sorted_values,
+)
+
+__all__ = [
+    "BaseExampleSelector",
+    "LengthBasedExampleSelector",
+    "MaxMarginalRelevanceExampleSelector",
+    "SemanticSimilarityExampleSelector",
+    "sorted_values",
+]
--- a/libs/core/langchain_core/example_selectors/base.py
+++ b/libs/core/langchain_core/example_selectors/base.py
@@ -0,0 +1,15 @@
+"""Interface for selecting examples to include in prompts."""
+from abc import ABC, abstractmethod
+from typing import Any, Dict, List
+
+
+class BaseExampleSelector(ABC):
+    """Interface for selecting examples to include in prompts."""
+
+    @abstractmethod
+    def add_example(self, example: Dict[str, str]) -> Any:
+        """Add new example to store for a key."""
+
+    @abstractmethod
+    def select_examples(self, input_variables: Dict[str, str]) -> List[dict]:
+        """Select which examples to use based on the inputs."""
--- a/libs/core/langchain_core/example_selectors/length_based.py
+++ b/libs/core/langchain_core/example_selectors/length_based.py
@@ -0,0 +1,63 @@
+"""Select examples based on length."""
+import re
+from typing import Callable, Dict, List
+
+from langchain_core.example_selectors.base import BaseExampleSelector
+from langchain_core.prompts.prompt import PromptTemplate
+from langchain_core.pydantic_v1 import BaseModel, validator
+
+
+def _get_length_based(text: str) -> int:
+    return len(re.split("\n| ", text))
+
+
+class LengthBasedExampleSelector(BaseExampleSelector, BaseModel):
+    """Select examples based on length."""
+
+    examples: List[dict]
+    """A list of the examples that the prompt template expects."""
+
+    example_prompt: PromptTemplate
+    """Prompt template used to format the examples."""
+
+    get_text_length: Callable[[str], int] = _get_length_based
+    """Function to measure prompt length. Defaults to word count."""
+
+    max_length: int = 2048
+    """Max length for the prompt, beyond which examples are cut."""
+
+    example_text_lengths: List[int] = []  #: :meta private:
+
+    def add_example(self, example: Dict[str, str]) -> None:
+        """Add new example to list."""
+        self.examples.append(example)
+        string_example = self.example_prompt.format(**example)
+        self.example_text_lengths.append(self.get_text_length(string_example))
+
+    @validator("example_text_lengths", always=True)
+    def calculate_example_text_lengths(cls, v: List[int], values: Dict) -> List[int]:
+        """Calculate text lengths if they don't exist."""
+        # Check if text lengths were passed in
+        if v:
+            return v
+        # If they were not, calculate them
+        example_prompt = values["example_prompt"]
+        get_text_length = values["get_text_length"]
+        string_examples = [example_prompt.format(**eg) for eg in values["examples"]]
+        return [get_text_length(eg) for eg in string_examples]
+
+    def select_examples(self, input_variables: Dict[str, str]) -> List[dict]:
+        """Select which examples to use based on the input lengths."""
+        inputs = " ".join(input_variables.values())
+        remaining_length = self.max_length - self.get_text_length(inputs)
+        i = 0
+        examples = []
+        while remaining_length > 0 and i < len(self.examples):
+            new_length = remaining_length - self.example_text_lengths[i]
+            if new_length < 0:
+                break
+            else:
+                examples.append(self.examples[i])
+                remaining_length = new_length
+            i += 1
+        return examples
--- a/libs/core/langchain_core/example_selectors/semantic_similarity.py
+++ b/libs/core/langchain_core/example_selectors/semantic_similarity.py
@@ -0,0 +1,167 @@
+"""Example selector that selects examples based on SemanticSimilarity."""
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Type
+
+from langchain_core.example_selectors.base import BaseExampleSelector
+from langchain_core.pydantic_v1 import BaseModel, Extra
+from langchain_core.vectorstores import VectorStore
+
+if TYPE_CHECKING:
+    from langchain_core.embeddings import Embeddings
+
+
+def sorted_values(values: Dict[str, str]) -> List[Any]:
+    """Return a list of values in dict sorted by key."""
+    return [values[val] for val in sorted(values)]
+
+
+class SemanticSimilarityExampleSelector(BaseExampleSelector, BaseModel):
+    """Example selector that selects examples based on SemanticSimilarity."""
+
+    vectorstore: VectorStore
+    """VectorStore than contains information about examples."""
+    k: int = 4
+    """Number of examples to select."""
+    example_keys: Optional[List[str]] = None
+    """Optional keys to filter examples to."""
+    input_keys: Optional[List[str]] = None
+    """Optional keys to filter input to. If provided, the search is based on
+    the input variables instead of all variables."""
+
+    class Config:
+        """Configuration for this pydantic object."""
+
+        extra = Extra.forbid
+        arbitrary_types_allowed = True
+
+    def add_example(self, example: Dict[str, str]) -> str:
+        """Add new example to vectorstore."""
+        if self.input_keys:
+            string_example = " ".join(
+                sorted_values({key: example[key] for key in self.input_keys})
+            )
+        else:
+            string_example = " ".join(sorted_values(example))
+        ids = self.vectorstore.add_texts([string_example], metadatas=[example])
+        return ids[0]
+
+    def select_examples(self, input_variables: Dict[str, str]) -> List[dict]:
+        """Select which examples to use based on semantic similarity."""
+        # Get the docs with the highest similarity.
+        if self.input_keys:
+            input_variables = {key: input_variables[key] for key in self.input_keys}
+        query = " ".join(sorted_values(input_variables))
+        example_docs = self.vectorstore.similarity_search(query, k=self.k)
+        # Get the examples from the metadata.
+        # This assumes that examples are stored in metadata.
+        examples = [dict(e.metadata) for e in example_docs]
+        # If example keys are provided, filter examples to those keys.
+        if self.example_keys:
+            examples = [{k: eg[k] for k in self.example_keys} for eg in examples]
+        return examples
+
+    @classmethod
+    def from_examples(
+        cls,
+        examples: List[dict],
+        embeddings: Embeddings,
+        vectorstore_cls: Type[VectorStore],
+        k: int = 4,
+        input_keys: Optional[List[str]] = None,
+        **vectorstore_cls_kwargs: Any,
+    ) -> SemanticSimilarityExampleSelector:
+        """Create k-shot example selector using example list and embeddings.
+
+        Reshuffles examples dynamically based on query similarity.
+
+        Args:
+            examples: List of examples to use in the prompt.
+            embeddings: An initialized embedding API interface, e.g. OpenAIEmbeddings().
+            vectorstore_cls: A vector store DB interface class, e.g. FAISS.
+            k: Number of examples to select
+            input_keys: If provided, the search is based on the input variables
+                instead of all variables.
+            vectorstore_cls_kwargs: optional kwargs containing url for vector store
+
+        Returns:
+            The ExampleSelector instantiated, backed by a vector store.
+        """
+        if input_keys:
+            string_examples = [
+                " ".join(sorted_values({k: eg[k] for k in input_keys}))
+                for eg in examples
+            ]
+        else:
+            string_examples = [" ".join(sorted_values(eg)) for eg in examples]
+        vectorstore = vectorstore_cls.from_texts(
+            string_examples, embeddings, metadatas=examples, **vectorstore_cls_kwargs
+        )
+        return cls(vectorstore=vectorstore, k=k, input_keys=input_keys)
+
+
+class MaxMarginalRelevanceExampleSelector(SemanticSimilarityExampleSelector):
+    """ExampleSelector that selects examples based on Max Marginal Relevance.
+
+    This was shown to improve performance in this paper:
+    https://arxiv.org/pdf/2211.13892.pdf
+    """
+
+    fetch_k: int = 20
+    """Number of examples to fetch to rerank."""
+
+    def select_examples(self, input_variables: Dict[str, str]) -> List[dict]:
+        """Select which examples to use based on semantic similarity."""
+        # Get the docs with the highest similarity.
+        if self.input_keys:
+            input_variables = {key: input_variables[key] for key in self.input_keys}
+        query = " ".join(sorted_values(input_variables))
+        example_docs = self.vectorstore.max_marginal_relevance_search(
+            query, k=self.k, fetch_k=self.fetch_k
+        )
+        # Get the examples from the metadata.
+        # This assumes that examples are stored in metadata.
+        examples = [dict(e.metadata) for e in example_docs]
+        # If example keys are provided, filter examples to those keys.
+        if self.example_keys:
+            examples = [{k: eg[k] for k in self.example_keys} for eg in examples]
+        return examples
+
+    @classmethod
+    def from_examples(
+        cls,
+        examples: List[dict],
+        embeddings: Embeddings,
+        vectorstore_cls: Type[VectorStore],
+        k: int = 4,
+        input_keys: Optional[List[str]] = None,
+        fetch_k: int = 20,
+        **vectorstore_cls_kwargs: Any,
+    ) -> MaxMarginalRelevanceExampleSelector:
+        """Create k-shot example selector using example list and embeddings.
+
+        Reshuffles examples dynamically based on query similarity.
+
+        Args:
+            examples: List of examples to use in the prompt.
+            embeddings: An iniialized embedding API interface, e.g. OpenAIEmbeddings().
+            vectorstore_cls: A vector store DB interface class, e.g. FAISS.
+            k: Number of examples to select
+            input_keys: If provided, the search is based on the input variables
+                instead of all variables.
+            vectorstore_cls_kwargs: optional kwargs containing url for vector store
+
+        Returns:
+            The ExampleSelector instantiated, backed by a vector store.
+        """
+        if input_keys:
+            string_examples = [
+                " ".join(sorted_values({k: eg[k] for k in input_keys}))
+                for eg in examples
+            ]
+        else:
+            string_examples = [" ".join(sorted_values(eg)) for eg in examples]
+        vectorstore = vectorstore_cls.from_texts(
+            string_examples, embeddings, metadatas=examples, **vectorstore_cls_kwargs
+        )
+        return cls(vectorstore=vectorstore, k=k, fetch_k=fetch_k, input_keys=input_keys)
--- a/libs/core/langchain_core/exceptions.py
+++ b/libs/core/langchain_core/exceptions.py
@@ -0,0 +1,48 @@
+from typing import Any, Optional
+
+
+class LangChainException(Exception):
+    """General LangChain exception."""
+
+
+class TracerException(LangChainException):
+    """Base class for exceptions in tracers module."""
+
+
+class OutputParserException(ValueError, LangChainException):
+    """Exception that output parsers should raise to signify a parsing error.
+
+    This exists to differentiate parsing errors from other code or execution errors
+    that also may arise inside the output parser. OutputParserExceptions will be
+    available to catch and handle in ways to fix the parsing error, while other
+    errors will be raised.
+
+    Args:
+        error: The error that's being re-raised or an error message.
+        observation: String explanation of error which can be passed to a
+            model to try and remediate the issue.
+        llm_output: String model output which is error-ing.
+        send_to_llm: Whether to send the observation and llm_output back to an Agent
+            after an OutputParserException has been raised. This gives the underlying
+            model driving the agent the context that the previous output was improperly
+            structured, in the hopes that it will update the output to the correct
+            format.
+    """
+
+    def __init__(
+        self,
+        error: Any,
+        observation: Optional[str] = None,
+        llm_output: Optional[str] = None,
+        send_to_llm: bool = False,
+    ):
+        super(OutputParserException, self).__init__(error)
+        if send_to_llm:
+            if observation is None or llm_output is None:
+                raise ValueError(
+                    "Arguments 'observation' & 'llm_output'"
+                    " are required if 'send_to_llm' is True"
+                )
+        self.observation = observation
+        self.llm_output = llm_output
+        self.send_to_llm = send_to_llm
--- a/libs/core/langchain_core/globals/init.py
+++ b/libs/core/langchain_core/globals/init.py
@@ -0,0 +1,197 @@
+# flake8: noqa
+"""Global values and configuration that apply to all of LangChain."""
+import warnings
+from typing import TYPE_CHECKING, Optional
+
+if TYPE_CHECKING:
+    from langchain_core.caches import BaseCache
+
+
+# DO NOT USE THESE VALUES DIRECTLY!
+# Use them only via `get_<X>()` and `set_<X>()` below,
+# or else your code may behave unexpectedly with other uses of these global settings:
+# https://github.com/langchain-ai/langchain/pull/11311#issuecomment-1743780004
+_verbose: bool = False
+_debug: bool = False
+_llm_cache: Optional["BaseCache"] = None
+
+
+def set_verbose(value: bool) -> None:
+    """Set a new value for the `verbose` global setting."""
+    try:
+        import langchain  # type: ignore[import]
+
+        # We're about to run some deprecated code, don't report warnings from it.
+        # The user called the correct (non-deprecated) code path and shouldn't get warnings.
+        with warnings.catch_warnings():
+            warnings.filterwarnings(
+                "ignore",
+                message=(
+                    "Importing verbose from langchain root module is no longer supported"
+                ),
+            )
+            # N.B.: This is a workaround for an unfortunate quirk of Python's
+            #       module-level `__getattr__()` implementation:
+            # https://github.com/langchain-ai/langchain/pull/11311#issuecomment-1743780004
+            #
+            # Remove it once `langchain.verbose` is no longer supported, and once all users
+            # have migrated to using `set_verbose()` here.
+            langchain.verbose = value
+    except ImportError:
+        pass
+
+    global _verbose
+    _verbose = value
+
+
+def get_verbose() -> bool:
+    """Get the value of the `verbose` global setting."""
+    try:
+        import langchain  # type: ignore[import]
+
+        # We're about to run some deprecated code, don't report warnings from it.
+        # The user called the correct (non-deprecated) code path and shouldn't get warnings.
+        with warnings.catch_warnings():
+            warnings.filterwarnings(
+                "ignore",
+                message=(
+                    ".*Importing verbose from langchain root module is no longer supported"
+                ),
+            )
+            # N.B.: This is a workaround for an unfortunate quirk of Python's
+            #       module-level `__getattr__()` implementation:
+            # https://github.com/langchain-ai/langchain/pull/11311#issuecomment-1743780004
+            #
+            # Remove it once `langchain.verbose` is no longer supported, and once all users
+            # have migrated to using `set_verbose()` here.
+            #
+            # In the meantime, the `verbose` setting is considered True if either the old
+            # or the new value are True. This accommodates users who haven't migrated
+            # to using `set_verbose()` yet. Those users are getting deprecation warnings
+            # directing them to use `set_verbose()` when they import `langhchain.verbose`.
+            old_verbose = langchain.verbose
+    except ImportError:
+        old_verbose = False
+
+    global _verbose
+    return _verbose or old_verbose
+
+
+def set_debug(value: bool) -> None:
+    """Set a new value for the `debug` global setting."""
+    try:
+        import langchain  # type: ignore[import]
+
+        # We're about to run some deprecated code, don't report warnings from it.
+        # The user called the correct (non-deprecated) code path and shouldn't get warnings.
+        with warnings.catch_warnings():
+            warnings.filterwarnings(
+                "ignore",
+                message="Importing debug from langchain root module is no longer supported",
+            )
+            # N.B.: This is a workaround for an unfortunate quirk of Python's
+            #       module-level `__getattr__()` implementation:
+            # https://github.com/langchain-ai/langchain/pull/11311#issuecomment-1743780004
+            #
+            # Remove it once `langchain.debug` is no longer supported, and once all users
+            # have migrated to using `set_debug()` here.
+            langchain.debug = value
+    except ImportError:
+        pass
+
+    global _debug
+    _debug = value
+
+
+def get_debug() -> bool:
+    """Get the value of the `debug` global setting."""
+    try:
+        import langchain  # type: ignore[import]
+
+        # We're about to run some deprecated code, don't report warnings from it.
+        # The user called the correct (non-deprecated) code path and shouldn't get warnings.
+        with warnings.catch_warnings():
+            warnings.filterwarnings(
+                "ignore",
+                message="Importing debug from langchain root module is no longer supported",
+            )
+            # N.B.: This is a workaround for an unfortunate quirk of Python's
+            #       module-level `__getattr__()` implementation:
+            # https://github.com/langchain-ai/langchain/pull/11311#issuecomment-1743780004
+            #
+            # Remove it once `langchain.debug` is no longer supported, and once all users
+            # have migrated to using `set_debug()` here.
+            #
+            # In the meantime, the `debug` setting is considered True if either the old
+            # or the new value are True. This accommodates users who haven't migrated
+            # to using `set_debug()` yet. Those users are getting deprecation warnings
+            # directing them to use `set_debug()` when they import `langhchain.debug`.
+            old_debug = langchain.debug
+    except ImportError:
+        old_debug = False
+
+    global _debug
+    return _debug or old_debug
+
+
+def set_llm_cache(value: Optional["BaseCache"]) -> None:
+    """Set a new LLM cache, overwriting the previous value, if any."""
+    try:
+        import langchain  # type: ignore[import]
+
+        # We're about to run some deprecated code, don't report warnings from it.
+        # The user called the correct (non-deprecated) code path and shouldn't get warnings.
+        with warnings.catch_warnings():
+            warnings.filterwarnings(
+                "ignore",
+                message=(
+                    "Importing llm_cache from langchain root module is no longer supported"
+                ),
+            )
+            # N.B.: This is a workaround for an unfortunate quirk of Python's
+            #       module-level `__getattr__()` implementation:
+            # https://github.com/langchain-ai/langchain/pull/11311#issuecomment-1743780004
+            #
+            # Remove it once `langchain.llm_cache` is no longer supported, and
+            # once all users have migrated to using `set_llm_cache()` here.
+            langchain.llm_cache = value
+    except ImportError:
+        pass
+
+    global _llm_cache
+    _llm_cache = value
+
+
+def get_llm_cache() -> "BaseCache":
+    """Get the value of the `llm_cache` global setting."""
+    try:
+        import langchain  # type: ignore[import]
+
+        # We're about to run some deprecated code, don't report warnings from it.
+        # The user called the correct (non-deprecated) code path and shouldn't get warnings.
+        with warnings.catch_warnings():
+            warnings.filterwarnings(
+                "ignore",
+                message=(
+                    "Importing llm_cache from langchain root module is no longer supported"
+                ),
+            )
+            # N.B.: This is a workaround for an unfortunate quirk of Python's
+            #       module-level `__getattr__()` implementation:
+            # https://github.com/langchain-ai/langchain/pull/11311#issuecomment-1743780004
+            #
+            # Remove it once `langchain.llm_cache` is no longer supported, and
+            # once all users have migrated to using `set_llm_cache()` here.
+            #
+            # In the meantime, the `llm_cache` setting returns whichever of
+            # its two backing sources is truthy (not `None` and non-empty),
+            # or the old value if both are falsy. This accommodates users
+            # who haven't migrated to using `set_llm_cache()` yet.
+            # Those users are getting deprecation warnings directing them
+            # to use `set_llm_cache()` when they import `langhchain.llm_cache`.
+            old_llm_cache = langchain.llm_cache
+    except ImportError:
+        old_llm_cache = None
+
+    global _llm_cache
+    return _llm_cache or old_llm_cache
--- a/libs/core/langchain_core/language_models/init.py
+++ b/libs/core/langchain_core/language_models/init.py
@@ -0,0 +1,17 @@
+from langchain_core.language_models.base import (
+    BaseLanguageModel,
+    LanguageModelInput,
+    get_tokenizer,
+)
+from langchain_core.language_models.chat_models import BaseChatModel, SimpleChatModel
+from langchain_core.language_models.llms import LLM, BaseLLM
+
+__all__ = [
+    "BaseLanguageModel",
+    "BaseChatModel",
+    "SimpleChatModel",
+    "BaseLLM",
+    "LLM",
+    "LanguageModelInput",
+    "get_tokenizer",
+]
--- a/libs/core/langchain_core/language_models/base.py
+++ b/libs/core/langchain_core/language_models/base.py
@@ -0,0 +1,293 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from functools import lru_cache
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    List,
+    Optional,
+    Sequence,
+    Set,
+    TypeVar,
+    Union,
+)
+
+from typing_extensions import TypeAlias
+
+from langchain_core.messages import AnyMessage, BaseMessage, get_buffer_string
+from langchain_core.prompt_values import PromptValue
+from langchain_core.runnables import RunnableSerializable
+from langchain_core.utils import get_pydantic_field_names
+
+if TYPE_CHECKING:
+    from langchain_core.callbacks import Callbacks
+    from langchain_core.outputs import LLMResult
+
+
+@lru_cache(maxsize=None)  # Cache the tokenizer
+def get_tokenizer() -> Any:
+    try:
+        from transformers import GPT2TokenizerFast  # type: ignore[import]
+    except ImportError:
+        raise ImportError(
+            "Could not import transformers python package. "
+            "This is needed in order to calculate get_token_ids. "
+            "Please install it with `pip install transformers`."
+        )
+    # create a GPT-2 tokenizer instance
+    return GPT2TokenizerFast.from_pretrained("gpt2")
+
+
+def _get_token_ids_default_method(text: str) -> List[int]:
+    """Encode the text into token IDs."""
+    # get the cached tokenizer
+    tokenizer = get_tokenizer()
+
+    # tokenize the text using the GPT-2 tokenizer
+    return tokenizer.encode(text)
+
+
+LanguageModelInput = Union[PromptValue, str, List[BaseMessage]]
+LanguageModelOutput = TypeVar("LanguageModelOutput")
+
+
+class BaseLanguageModel(
+    RunnableSerializable[LanguageModelInput, LanguageModelOutput], ABC
+):
+    """Abstract base class for interfacing with language models.
+
+    All language model wrappers inherit from BaseLanguageModel.
+
+    Exposes three main methods:
+    - generate_prompt: generate language model outputs for a sequence of prompt
+        values. A prompt value is a model input that can be converted to any language
+        model input format (string or messages).
+    - predict: pass in a single string to a language model and return a string
+        prediction.
+    - predict_messages: pass in a sequence of BaseMessages (corresponding to a single
+        model call) to a language model and return a BaseMessage prediction.
+
+    Each of these has an equivalent asynchronous method.
+    """
+
+    @property
+    def InputType(self) -> TypeAlias:
+        """Get the input type for this runnable."""
+        from langchain_core.prompt_values import (
+            ChatPromptValueConcrete,
+            StringPromptValue,
+        )
+
+        # This is a version of LanguageModelInput which replaces the abstract
+        # base class BaseMessage with a union of its subclasses, which makes
+        # for a much better schema.
+        return Union[
+            str,
+            Union[StringPromptValue, ChatPromptValueConcrete],
+            List[AnyMessage],
+        ]
+
+    @abstractmethod
+    def generate_prompt(
+        self,
+        prompts: List[PromptValue],
+        stop: Optional[List[str]] = None,
+        callbacks: Callbacks = None,
+        **kwargs: Any,
+    ) -> LLMResult:
+        """Pass a sequence of prompts to the model and return model generations.
+
+        This method should make use of batched calls for models that expose a batched
+        API.
+
+        Use this method when you want to:
+            1. take advantage of batched calls,
+            2. need more output from the model than just the top generated value,
+            3. are building chains that are agnostic to the underlying language model
+                type (e.g., pure text completion models vs chat models).
+
+        Args:
+            prompts: List of PromptValues. A PromptValue is an object that can be
+                converted to match the format of any language model (string for pure
+                text generation models and BaseMessages for chat models).
+            stop: Stop words to use when generating. Model output is cut off at the
+                first occurrence of any of these substrings.
+            callbacks: Callbacks to pass through. Used for executing additional
+                functionality, such as logging or streaming, throughout generation.
+            **kwargs: Arbitrary additional keyword arguments. These are usually passed
+                to the model provider API call.
+
+        Returns:
+            An LLMResult, which contains a list of candidate Generations for each input
+                prompt and additional model provider-specific output.
+        """
+
+    @abstractmethod
+    async def agenerate_prompt(
+        self,
+        prompts: List[PromptValue],
+        stop: Optional[List[str]] = None,
+        callbacks: Callbacks = None,
+        **kwargs: Any,
+    ) -> LLMResult:
+        """Asynchronously pass a sequence of prompts and return model generations.
+
+        This method should make use of batched calls for models that expose a batched
+        API.
+
+        Use this method when you want to:
+            1. take advantage of batched calls,
+            2. need more output from the model than just the top generated value,
+            3. are building chains that are agnostic to the underlying language model
+                type (e.g., pure text completion models vs chat models).
+
+        Args:
+            prompts: List of PromptValues. A PromptValue is an object that can be
+                converted to match the format of any language model (string for pure
+                text generation models and BaseMessages for chat models).
+            stop: Stop words to use when generating. Model output is cut off at the
+                first occurrence of any of these substrings.
+            callbacks: Callbacks to pass through. Used for executing additional
+                functionality, such as logging or streaming, throughout generation.
+            **kwargs: Arbitrary additional keyword arguments. These are usually passed
+                to the model provider API call.
+
+        Returns:
+            An LLMResult, which contains a list of candidate Generations for each input
+                prompt and additional model provider-specific output.
+        """
+
+    @abstractmethod
+    def predict(
+        self, text: str, *, stop: Optional[Sequence[str]] = None, **kwargs: Any
+    ) -> str:
+        """Pass a single string input to the model and return a string prediction.
+
+         Use this method when passing in raw text. If you want to pass in specific
+            types of chat messages, use predict_messages.
+
+        Args:
+            text: String input to pass to the model.
+            stop: Stop words to use when generating. Model output is cut off at the
+                first occurrence of any of these substrings.
+            **kwargs: Arbitrary additional keyword arguments. These are usually passed
+                to the model provider API call.
+
+        Returns:
+            Top model prediction as a string.
+        """
+
+    @abstractmethod
+    def predict_messages(
+        self,
+        messages: List[BaseMessage],
+        *,
+        stop: Optional[Sequence[str]] = None,
+        **kwargs: Any,
+    ) -> BaseMessage:
+        """Pass a message sequence to the model and return a message prediction.
+
+        Use this method when passing in chat messages. If you want to pass in raw text,
+            use predict.
+
+        Args:
+            messages: A sequence of chat messages corresponding to a single model input.
+            stop: Stop words to use when generating. Model output is cut off at the
+                first occurrence of any of these substrings.
+            **kwargs: Arbitrary additional keyword arguments. These are usually passed
+                to the model provider API call.
+
+        Returns:
+            Top model prediction as a message.
+        """
+
+    @abstractmethod
+    async def apredict(
+        self, text: str, *, stop: Optional[Sequence[str]] = None, **kwargs: Any
+    ) -> str:
+        """Asynchronously pass a string to the model and return a string prediction.
+
+        Use this method when calling pure text generation models and only the top
+            candidate generation is needed.
+
+        Args:
+            text: String input to pass to the model.
+            stop: Stop words to use when generating. Model output is cut off at the
+                first occurrence of any of these substrings.
+            **kwargs: Arbitrary additional keyword arguments. These are usually passed
+                to the model provider API call.
+
+        Returns:
+            Top model prediction as a string.
+        """
+
+    @abstractmethod
+    async def apredict_messages(
+        self,
+        messages: List[BaseMessage],
+        *,
+        stop: Optional[Sequence[str]] = None,
+        **kwargs: Any,
+    ) -> BaseMessage:
+        """Asynchronously pass messages to the model and return a message prediction.
+
+        Use this method when calling chat models and only the top
+            candidate generation is needed.
+
+        Args:
+            messages: A sequence of chat messages corresponding to a single model input.
+            stop: Stop words to use when generating. Model output is cut off at the
+                first occurrence of any of these substrings.
+            **kwargs: Arbitrary additional keyword arguments. These are usually passed
+                to the model provider API call.
+
+        Returns:
+            Top model prediction as a message.
+        """
+
+    def get_token_ids(self, text: str) -> List[int]:
+        """Return the ordered ids of the tokens in a text.
+
+        Args:
+            text: The string input to tokenize.
+
+        Returns:
+            A list of ids corresponding to the tokens in the text, in order they occur
+                in the text.
+        """
+        return _get_token_ids_default_method(text)
+
+    def get_num_tokens(self, text: str) -> int:
+        """Get the number of tokens present in the text.
+
+        Useful for checking if an input will fit in a model's context window.
+
+        Args:
+            text: The string input to tokenize.
+
+        Returns:
+            The integer number of tokens in the text.
+        """
+        return len(self.get_token_ids(text))
+
+    def get_num_tokens_from_messages(self, messages: List[BaseMessage]) -> int:
+        """Get the number of tokens in the messages.
+
+        Useful for checking if an input will fit in a model's context window.
+
+        Args:
+            messages: The message inputs to tokenize.
+
+        Returns:
+            The sum of the number of tokens across the messages.
+        """
+        return sum([self.get_num_tokens(get_buffer_string([m])) for m in messages])
+
+    @classmethod
+    def _all_required_field_names(cls) -> Set:
+        """DEPRECATED: Kept for backwards compatibility.
+
+        Use get_pydantic_field_names.
+        """
+        return get_pydantic_field_names(cls)
--- a/libs/core/langchain_core/language_models/chat_models.py
+++ b/libs/core/langchain_core/language_models/chat_models.py
@@ -0,0 +1,742 @@
+from __future__ import annotations
+
+import asyncio
+import inspect
+import warnings
+from abc import ABC, abstractmethod
+from functools import partial
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    AsyncIterator,
+    Dict,
+    Iterator,
+    List,
+    Optional,
+    Sequence,
+    cast,
+)
+
+from langchain_core.callbacks import (
+    AsyncCallbackManager,
+    AsyncCallbackManagerForLLMRun,
+    BaseCallbackManager,
+    CallbackManager,
+    CallbackManagerForLLMRun,
+    Callbacks,
+)
+from langchain_core.globals import get_llm_cache
+from langchain_core.language_models.base import BaseLanguageModel, LanguageModelInput
+from langchain_core.load import dumpd, dumps
+from langchain_core.messages import (
+    AIMessage,
+    AnyMessage,
+    BaseMessage,
+    BaseMessageChunk,
+    HumanMessage,
+)
+from langchain_core.outputs import (
+    ChatGeneration,
+    ChatGenerationChunk,
+    ChatResult,
+    LLMResult,
+    RunInfo,
+)
+from langchain_core.prompt_values import ChatPromptValue, PromptValue, StringPromptValue
+from langchain_core.pydantic_v1 import Field, root_validator
+
+if TYPE_CHECKING:
+    from langchain_core.runnables import RunnableConfig
+
+
+def _get_verbosity() -> bool:
+    from langchain_core.globals import get_verbose
+
+    return get_verbose()
+
+
+def _generate_from_stream(stream: Iterator[ChatGenerationChunk]) -> ChatResult:
+    generation: Optional[ChatGenerationChunk] = None
+    for chunk in stream:
+        if generation is None:
+            generation = chunk
+        else:
+            generation += chunk
+    assert generation is not None
+    return ChatResult(generations=[generation])
+
+
+async def _agenerate_from_stream(
+    stream: AsyncIterator[ChatGenerationChunk],
+) -> ChatResult:
+    generation: Optional[ChatGenerationChunk] = None
+    async for chunk in stream:
+        if generation is None:
+            generation = chunk
+        else:
+            generation += chunk
+    assert generation is not None
+    return ChatResult(generations=[generation])
+
+
+class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
+    """Base class for Chat models."""
+
+    cache: Optional[bool] = None
+    """Whether to cache the response."""
+    verbose: bool = Field(default_factory=_get_verbosity)
+    """Whether to print out response text."""
+    callbacks: Callbacks = Field(default=None, exclude=True)
+    """Callbacks to add to the run trace."""
+    callback_manager: Optional[BaseCallbackManager] = Field(default=None, exclude=True)
+    """Callback manager to add to the run trace."""
+    tags: Optional[List[str]] = Field(default=None, exclude=True)
+    """Tags to add to the run trace."""
+    metadata: Optional[Dict[str, Any]] = Field(default=None, exclude=True)
+    """Metadata to add to the run trace."""
+
+    @root_validator()
+    def raise_deprecation(cls, values: Dict) -> Dict:
+        """Raise deprecation warning if callback_manager is used."""
+        if values.get("callback_manager") is not None:
+            warnings.warn(
+                "callback_manager is deprecated. Please use callbacks instead.",
+                DeprecationWarning,
+            )
+            values["callbacks"] = values.pop("callback_manager", None)
+        return values
+
+    class Config:
+        """Configuration for this pydantic object."""
+
+        arbitrary_types_allowed = True
+
+    # --- Runnable methods ---
+
+    @property
+    def OutputType(self) -> Any:
+        """Get the output type for this runnable."""
+        return AnyMessage
+
+    def _convert_input(self, input: LanguageModelInput) -> PromptValue:
+        if isinstance(input, PromptValue):
+            return input
+        elif isinstance(input, str):
+            return StringPromptValue(text=input)
+        elif isinstance(input, list):
+            return ChatPromptValue(messages=input)
+        else:
+            raise ValueError(
+                f"Invalid input type {type(input)}. "
+                "Must be a PromptValue, str, or list of BaseMessages."
+            )
+
+    def invoke(
+        self,
+        input: LanguageModelInput,
+        config: Optional[RunnableConfig] = None,
+        *,
+        stop: Optional[List[str]] = None,
+        **kwargs: Any,
+    ) -> BaseMessage:
+        config = config or {}
+        return cast(
+            ChatGeneration,
+            self.generate_prompt(
+                [self._convert_input(input)],
+                stop=stop,
+                callbacks=config.get("callbacks"),
+                tags=config.get("tags"),
+                metadata=config.get("metadata"),
+                run_name=config.get("run_name"),
+                **kwargs,
+            ).generations[0][0],
+        ).message
+
+    async def ainvoke(
+        self,
+        input: LanguageModelInput,
+        config: Optional[RunnableConfig] = None,
+        *,
+        stop: Optional[List[str]] = None,
+        **kwargs: Any,
+    ) -> BaseMessage:
+        config = config or {}
+        llm_result = await self.agenerate_prompt(
+            [self._convert_input(input)],
+            stop=stop,
+            callbacks=config.get("callbacks"),
+            tags=config.get("tags"),
+            metadata=config.get("metadata"),
+            run_name=config.get("run_name"),
+            **kwargs,
+        )
+        return cast(ChatGeneration, llm_result.generations[0][0]).message
+
+    def stream(
+        self,
+        input: LanguageModelInput,
+        config: Optional[RunnableConfig] = None,
+        *,
+        stop: Optional[List[str]] = None,
+        **kwargs: Any,
+    ) -> Iterator[BaseMessageChunk]:
+        if type(self)._stream == BaseChatModel._stream:
+            # model doesn't implement streaming, so use default implementation
+            yield cast(
+                BaseMessageChunk, self.invoke(input, config=config, stop=stop, **kwargs)
+            )
+        else:
+            config = config or {}
+            messages = self._convert_input(input).to_messages()
+            params = self._get_invocation_params(stop=stop, **kwargs)
+            options = {"stop": stop, **kwargs}
+            callback_manager = CallbackManager.configure(
+                config.get("callbacks"),
+                self.callbacks,
+                self.verbose,
+                config.get("tags"),
+                self.tags,
+                config.get("metadata"),
+                self.metadata,
+            )
+            (run_manager,) = callback_manager.on_chat_model_start(
+                dumpd(self),
+                [messages],
+                invocation_params=params,
+                options=options,
+                name=config.get("run_name"),
+                batch_size=1,
+            )
+            try:
+                generation: Optional[ChatGenerationChunk] = None
+                for chunk in self._stream(
+                    messages, stop=stop, run_manager=run_manager, **kwargs
+                ):
+                    yield chunk.message
+                    if generation is None:
+                        generation = chunk
+                    else:
+                        generation += chunk
+                assert generation is not None
+            except BaseException as e:
+                run_manager.on_llm_error(e)
+                raise e
+            else:
+                run_manager.on_llm_end(
+                    LLMResult(generations=[[generation]]),
+                )
+
+    async def astream(
+        self,
+        input: LanguageModelInput,
+        config: Optional[RunnableConfig] = None,
+        *,
+        stop: Optional[List[str]] = None,
+        **kwargs: Any,
+    ) -> AsyncIterator[BaseMessageChunk]:
+        if type(self)._astream == BaseChatModel._astream:
+            # model doesn't implement streaming, so use default implementation
+            yield cast(
+                BaseMessageChunk, self.invoke(input, config=config, stop=stop, **kwargs)
+            )
+        else:
+            config = config or {}
+            messages = self._convert_input(input).to_messages()
+            params = self._get_invocation_params(stop=stop, **kwargs)
+            options = {"stop": stop, **kwargs}
+            callback_manager = AsyncCallbackManager.configure(
+                config.get("callbacks"),
+                self.callbacks,
+                self.verbose,
+                config.get("tags"),
+                self.tags,
+                config.get("metadata"),
+                self.metadata,
+            )
+            (run_manager,) = await callback_manager.on_chat_model_start(
+                dumpd(self),
+                [messages],
+                invocation_params=params,
+                options=options,
+                name=config.get("run_name"),
+                batch_size=1,
+            )
+            try:
+                generation: Optional[ChatGenerationChunk] = None
+                async for chunk in self._astream(
+                    messages, stop=stop, run_manager=run_manager, **kwargs
+                ):
+                    yield chunk.message
+                    if generation is None:
+                        generation = chunk
+                    else:
+                        generation += chunk
+                assert generation is not None
+            except BaseException as e:
+                await run_manager.on_llm_error(e)
+                raise e
+            else:
+                await run_manager.on_llm_end(
+                    LLMResult(generations=[[generation]]),
+                )
+
+    # --- Custom methods ---
+
+    def _combine_llm_outputs(self, llm_outputs: List[Optional[dict]]) -> dict:
+        return {}
+
+    def _get_invocation_params(
+        self,
+        stop: Optional[List[str]] = None,
+        **kwargs: Any,
+    ) -> dict:
+        params = self.dict()
+        params["stop"] = stop
+        return {**params, **kwargs}
+
+    def _get_llm_string(self, stop: Optional[List[str]] = None, **kwargs: Any) -> str:
+        if self.is_lc_serializable():
+            params = {**kwargs, **{"stop": stop}}
+            param_string = str(sorted([(k, v) for k, v in params.items()]))
+            llm_string = dumps(self)
+            return llm_string + "---" + param_string
+        else:
+            params = self._get_invocation_params(stop=stop, **kwargs)
+            params = {**params, **kwargs}
+            return str(sorted([(k, v) for k, v in params.items()]))
+
+    def generate(
+        self,
+        messages: List[List[BaseMessage]],
+        stop: Optional[List[str]] = None,
+        callbacks: Callbacks = None,
+        *,
+        tags: Optional[List[str]] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+        run_name: Optional[str] = None,
+        **kwargs: Any,
+    ) -> LLMResult:
+        """Top Level call"""
+        params = self._get_invocation_params(stop=stop, **kwargs)
+        options = {"stop": stop}
+
+        callback_manager = CallbackManager.configure(
+            callbacks,
+            self.callbacks,
+            self.verbose,
+            tags,
+            self.tags,
+            metadata,
+            self.metadata,
+        )
+        run_managers = callback_manager.on_chat_model_start(
+            dumpd(self),
+            messages,
+            invocation_params=params,
+            options=options,
+            name=run_name,
+            batch_size=len(messages),
+        )
+        results = []
+        for i, m in enumerate(messages):
+            try:
+                results.append(
+                    self._generate_with_cache(
+                        m,
+                        stop=stop,
+                        run_manager=run_managers[i] if run_managers else None,
+                        **kwargs,
+                    )
+                )
+            except BaseException as e:
+                if run_managers:
+                    run_managers[i].on_llm_error(e)
+                raise e
+        flattened_outputs = [
+            LLMResult(generations=[res.generations], llm_output=res.llm_output)
+            for res in results
+        ]
+        llm_output = self._combine_llm_outputs([res.llm_output for res in results])
+        generations = [res.generations for res in results]
+        output = LLMResult(generations=generations, llm_output=llm_output)
+        if run_managers:
+            run_infos = []
+            for manager, flattened_output in zip(run_managers, flattened_outputs):
+                manager.on_llm_end(flattened_output)
+                run_infos.append(RunInfo(run_id=manager.run_id))
+            output.run = run_infos
+        return output
+
+    async def agenerate(
+        self,
+        messages: List[List[BaseMessage]],
+        stop: Optional[List[str]] = None,
+        callbacks: Callbacks = None,
+        *,
+        tags: Optional[List[str]] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+        run_name: Optional[str] = None,
+        **kwargs: Any,
+    ) -> LLMResult:
+        """Top Level call"""
+        params = self._get_invocation_params(stop=stop, **kwargs)
+        options = {"stop": stop}
+
+        callback_manager = AsyncCallbackManager.configure(
+            callbacks,
+            self.callbacks,
+            self.verbose,
+            tags,
+            self.tags,
+            metadata,
+            self.metadata,
+        )
+
+        run_managers = await callback_manager.on_chat_model_start(
+            dumpd(self),
+            messages,
+            invocation_params=params,
+            options=options,
+            name=run_name,
+            batch_size=len(messages),
+        )
+
+        results = await asyncio.gather(
+            *[
+                self._agenerate_with_cache(
+                    m,
+                    stop=stop,
+                    run_manager=run_managers[i] if run_managers else None,
+                    **kwargs,
+                )
+                for i, m in enumerate(messages)
+            ],
+            return_exceptions=True,
+        )
+        exceptions = []
+        for i, res in enumerate(results):
+            if isinstance(res, BaseException):
+                if run_managers:
+                    await run_managers[i].on_llm_error(res)
+                exceptions.append(res)
+        if exceptions:
+            if run_managers:
+                await asyncio.gather(
+                    *[
+                        run_manager.on_llm_end(
+                            LLMResult(
+                                generations=[res.generations], llm_output=res.llm_output
+                            )
+                        )
+                        for run_manager, res in zip(run_managers, results)
+                        if not isinstance(res, Exception)
+                    ]
+                )
+            raise exceptions[0]
+        flattened_outputs = [
+            LLMResult(generations=[res.generations], llm_output=res.llm_output)
+            for res in results
+        ]
+        llm_output = self._combine_llm_outputs([res.llm_output for res in results])
+        generations = [res.generations for res in results]
+        output = LLMResult(generations=generations, llm_output=llm_output)
+        await asyncio.gather(
+            *[
+                run_manager.on_llm_end(flattened_output)
+                for run_manager, flattened_output in zip(
+                    run_managers, flattened_outputs
+                )
+            ]
+        )
+        if run_managers:
+            output.run = [
+                RunInfo(run_id=run_manager.run_id) for run_manager in run_managers
+            ]
+        return output
+
+    def generate_prompt(
+        self,
+        prompts: List[PromptValue],
+        stop: Optional[List[str]] = None,
+        callbacks: Callbacks = None,
+        **kwargs: Any,
+    ) -> LLMResult:
+        prompt_messages = [p.to_messages() for p in prompts]
+        return self.generate(prompt_messages, stop=stop, callbacks=callbacks, **kwargs)
+
+    async def agenerate_prompt(
+        self,
+        prompts: List[PromptValue],
+        stop: Optional[List[str]] = None,
+        callbacks: Callbacks = None,
+        **kwargs: Any,
+    ) -> LLMResult:
+        prompt_messages = [p.to_messages() for p in prompts]
+        return await self.agenerate(
+            prompt_messages, stop=stop, callbacks=callbacks, **kwargs
+        )
+
+    def _generate_with_cache(
+        self,
+        messages: List[BaseMessage],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> ChatResult:
+        new_arg_supported = inspect.signature(self._generate).parameters.get(
+            "run_manager"
+        )
+        disregard_cache = self.cache is not None and not self.cache
+        llm_cache = get_llm_cache()
+        if llm_cache is None or disregard_cache:
+            # This happens when langchain.cache is None, but self.cache is True
+            if self.cache is not None and self.cache:
+                raise ValueError(
+                    "Asked to cache, but no cache found at `langchain.cache`."
+                )
+            if new_arg_supported:
+                return self._generate(
+                    messages, stop=stop, run_manager=run_manager, **kwargs
+                )
+            else:
+                return self._generate(messages, stop=stop, **kwargs)
+        else:
+            llm_string = self._get_llm_string(stop=stop, **kwargs)
+            prompt = dumps(messages)
+            cache_val = llm_cache.lookup(prompt, llm_string)
+            if isinstance(cache_val, list):
+                return ChatResult(generations=cache_val)
+            else:
+                if new_arg_supported:
+                    result = self._generate(
+                        messages, stop=stop, run_manager=run_manager, **kwargs
+                    )
+                else:
+                    result = self._generate(messages, stop=stop, **kwargs)
+                llm_cache.update(prompt, llm_string, result.generations)
+                return result
+
+    async def _agenerate_with_cache(
+        self,
+        messages: List[BaseMessage],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> ChatResult:
+        new_arg_supported = inspect.signature(self._agenerate).parameters.get(
+            "run_manager"
+        )
+        disregard_cache = self.cache is not None and not self.cache
+        llm_cache = get_llm_cache()
+        if llm_cache is None or disregard_cache:
+            # This happens when langchain.cache is None, but self.cache is True
+            if self.cache is not None and self.cache:
+                raise ValueError(
+                    "Asked to cache, but no cache found at `langchain.cache`."
+                )
+            if new_arg_supported:
+                return await self._agenerate(
+                    messages, stop=stop, run_manager=run_manager, **kwargs
+                )
+            else:
+                return await self._agenerate(messages, stop=stop, **kwargs)
+        else:
+            llm_string = self._get_llm_string(stop=stop, **kwargs)
+            prompt = dumps(messages)
+            cache_val = llm_cache.lookup(prompt, llm_string)
+            if isinstance(cache_val, list):
+                return ChatResult(generations=cache_val)
+            else:
+                if new_arg_supported:
+                    result = await self._agenerate(
+                        messages, stop=stop, run_manager=run_manager, **kwargs
+                    )
+                else:
+                    result = await self._agenerate(messages, stop=stop, **kwargs)
+                llm_cache.update(prompt, llm_string, result.generations)
+                return result
+
+    @abstractmethod
+    def _generate(
+        self,
+        messages: List[BaseMessage],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> ChatResult:
+        """Top Level call"""
+
+    async def _agenerate(
+        self,
+        messages: List[BaseMessage],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> ChatResult:
+        """Top Level call"""
+        return await asyncio.get_running_loop().run_in_executor(
+            None, partial(self._generate, **kwargs), messages, stop, run_manager
+        )
+
+    def _stream(
+        self,
+        messages: List[BaseMessage],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> Iterator[ChatGenerationChunk]:
+        raise NotImplementedError()
+
+    def _astream(
+        self,
+        messages: List[BaseMessage],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> AsyncIterator[ChatGenerationChunk]:
+        raise NotImplementedError()
+
+    def __call__(
+        self,
+        messages: List[BaseMessage],
+        stop: Optional[List[str]] = None,
+        callbacks: Callbacks = None,
+        **kwargs: Any,
+    ) -> BaseMessage:
+        generation = self.generate(
+            [messages], stop=stop, callbacks=callbacks, **kwargs
+        ).generations[0][0]
+        if isinstance(generation, ChatGeneration):
+            return generation.message
+        else:
+            raise ValueError("Unexpected generation type")
+
+    async def _call_async(
+        self,
+        messages: List[BaseMessage],
+        stop: Optional[List[str]] = None,
+        callbacks: Callbacks = None,
+        **kwargs: Any,
+    ) -> BaseMessage:
+        result = await self.agenerate(
+            [messages], stop=stop, callbacks=callbacks, **kwargs
+        )
+        generation = result.generations[0][0]
+        if isinstance(generation, ChatGeneration):
+            return generation.message
+        else:
+            raise ValueError("Unexpected generation type")
+
+    def call_as_llm(
+        self, message: str, stop: Optional[List[str]] = None, **kwargs: Any
+    ) -> str:
+        return self.predict(message, stop=stop, **kwargs)
+
+    def predict(
+        self, text: str, *, stop: Optional[Sequence[str]] = None, **kwargs: Any
+    ) -> str:
+        if stop is None:
+            _stop = None
+        else:
+            _stop = list(stop)
+        result = self([HumanMessage(content=text)], stop=_stop, **kwargs)
+        if isinstance(result.content, str):
+            return result.content
+        else:
+            raise ValueError("Cannot use predict when output is not a string.")
+
+    def predict_messages(
+        self,
+        messages: List[BaseMessage],
+        *,
+        stop: Optional[Sequence[str]] = None,
+        **kwargs: Any,
+    ) -> BaseMessage:
+        if stop is None:
+            _stop = None
+        else:
+            _stop = list(stop)
+        return self(messages, stop=_stop, **kwargs)
+
+    async def apredict(
+        self, text: str, *, stop: Optional[Sequence[str]] = None, **kwargs: Any
+    ) -> str:
+        if stop is None:
+            _stop = None
+        else:
+            _stop = list(stop)
+        result = await self._call_async(
+            [HumanMessage(content=text)], stop=_stop, **kwargs
+        )
+        if isinstance(result.content, str):
+            return result.content
+        else:
+            raise ValueError("Cannot use predict when output is not a string.")
+
+    async def apredict_messages(
+        self,
+        messages: List[BaseMessage],
+        *,
+        stop: Optional[Sequence[str]] = None,
+        **kwargs: Any,
+    ) -> BaseMessage:
+        if stop is None:
+            _stop = None
+        else:
+            _stop = list(stop)
+        return await self._call_async(messages, stop=_stop, **kwargs)
+
+    @property
+    def _identifying_params(self) -> Dict[str, Any]:
+        """Get the identifying parameters."""
+        return {}
+
+    @property
+    @abstractmethod
+    def _llm_type(self) -> str:
+        """Return type of chat model."""
+
+    def dict(self, **kwargs: Any) -> Dict:
+        """Return a dictionary of the LLM."""
+        starter_dict = dict(self._identifying_params)
+        starter_dict["_type"] = self._llm_type
+        return starter_dict
+
+
+class SimpleChatModel(BaseChatModel):
+    """Simple Chat Model."""
+
+    def _generate(
+        self,
+        messages: List[BaseMessage],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> ChatResult:
+        output_str = self._call(messages, stop=stop, run_manager=run_manager, **kwargs)
+        message = AIMessage(content=output_str)
+        generation = ChatGeneration(message=message)
+        return ChatResult(generations=[generation])
+
+    @abstractmethod
+    def _call(
+        self,
+        messages: List[BaseMessage],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> str:
+        """Simpler interface."""
+
+    async def _agenerate(
+        self,
+        messages: List[BaseMessage],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> ChatResult:
+        func = partial(
+            self._generate, messages, stop=stop, run_manager=run_manager, **kwargs
+        )
+        return await asyncio.get_event_loop().run_in_executor(None, func)
--- a/libs/core/langchain_core/language_models/llms.py
+++ b/libs/core/langchain_core/language_models/llms.py
--- a/libs/core/langchain_core/load/init.py
+++ b/libs/core/langchain_core/load/init.py
@@ -0,0 +1,6 @@
+"""Serialization and deserialization."""
+from langchain_core.load.dump import dumpd, dumps
+from langchain_core.load.load import load, loads
+from langchain_core.load.serializable import Serializable
+
+__all__ = ["dumpd", "dumps", "load", "loads", "Serializable"]
--- a/libs/core/langchain_core/load/dump.py
+++ b/libs/core/langchain_core/load/dump.py
@@ -0,0 +1,26 @@
+import json
+from typing import Any, Dict
+
+from langchain_core.load.serializable import Serializable, to_json_not_implemented
+
+
+def default(obj: Any) -> Any:
+    """Return a default value for a Serializable object or
+    a SerializedNotImplemented object."""
+    if isinstance(obj, Serializable):
+        return obj.to_json()
+    else:
+        return to_json_not_implemented(obj)
+
+
+def dumps(obj: Any, *, pretty: bool = False) -> str:
+    """Return a json string representation of an object."""
+    if pretty:
+        return json.dumps(obj, default=default, indent=2)
+    else:
+        return json.dumps(obj, default=default)
+
+
+def dumpd(obj: Any) -> Dict[str, Any]:
+    """Return a json dict representation of an object."""
+    return json.loads(dumps(obj))
--- a/libs/core/langchain_core/load/load.py
+++ b/libs/core/langchain_core/load/load.py
@@ -0,0 +1,130 @@
+import importlib
+import json
+import os
+from typing import Any, Dict, List, Optional
+
+from langchain_core.load.serializable import Serializable
+
+DEFAULT_NAMESPACES = ["langchain", "langchain_core"]
+
+
+class Reviver:
+    """Reviver for JSON objects."""
+
+    def __init__(
+        self,
+        secrets_map: Optional[Dict[str, str]] = None,
+        valid_namespaces: Optional[List[str]] = None,
+    ) -> None:
+        self.secrets_map = secrets_map or dict()
+        # By default only support langchain, but user can pass in additional namespaces
+        self.valid_namespaces = (
+            [*DEFAULT_NAMESPACES, *valid_namespaces]
+            if valid_namespaces
+            else DEFAULT_NAMESPACES
+        )
+
+    def __call__(self, value: Dict[str, Any]) -> Any:
+        if (
+            value.get("lc", None) == 1
+            and value.get("type", None) == "secret"
+            and value.get("id", None) is not None
+        ):
+            [key] = value["id"]
+            if key in self.secrets_map:
+                return self.secrets_map[key]
+            else:
+                if key in os.environ and os.environ[key]:
+                    return os.environ[key]
+                raise KeyError(f'Missing key "{key}" in load(secrets_map)')
+
+        if (
+            value.get("lc", None) == 1
+            and value.get("type", None) == "not_implemented"
+            and value.get("id", None) is not None
+        ):
+            raise NotImplementedError(
+                "Trying to load an object that doesn't implement "
+                f"serialization: {value}"
+            )
+
+        if (
+            value.get("lc", None) == 1
+            and value.get("type", None) == "constructor"
+            and value.get("id", None) is not None
+        ):
+            [*namespace, name] = value["id"]
+
+            if namespace[0] not in self.valid_namespaces:
+                raise ValueError(f"Invalid namespace: {value}")
+
+            # The root namespace "langchain" is not a valid identifier.
+            if len(namespace) == 1 and namespace[0] == "langchain":
+                raise ValueError(f"Invalid namespace: {value}")
+
+            mod = importlib.import_module(".".join(namespace))
+            cls = getattr(mod, name)
+
+            # The class must be a subclass of Serializable.
+            if not issubclass(cls, Serializable):
+                raise ValueError(f"Invalid namespace: {value}")
+
+            # We don't need to recurse on kwargs
+            # as json.loads will do that for us.
+            kwargs = value.get("kwargs", dict())
+            return cls(**kwargs)
+
+        return value
+
+
+def loads(
+    text: str,
+    *,
+    secrets_map: Optional[Dict[str, str]] = None,
+    valid_namespaces: Optional[List[str]] = None,
+) -> Any:
+    """Revive a LangChain class from a JSON string.
+    Equivalent to `load(json.loads(text))`.
+
+    Args:
+        text: The string to load.
+        secrets_map: A map of secrets to load.
+        valid_namespaces: A list of additional namespaces (modules)
+            to allow to be deserialized.
+
+    Returns:
+        Revived LangChain objects.
+    """
+    return json.loads(text, object_hook=Reviver(secrets_map, valid_namespaces))
+
+
+def load(
+    obj: Any,
+    *,
+    secrets_map: Optional[Dict[str, str]] = None,
+    valid_namespaces: Optional[List[str]] = None,
+) -> Any:
+    """Revive a LangChain class from a JSON object. Use this if you already
+    have a parsed JSON object, eg. from `json.load` or `orjson.loads`.
+
+    Args:
+        obj: The object to load.
+        secrets_map: A map of secrets to load.
+        valid_namespaces: A list of additional namespaces (modules)
+            to allow to be deserialized.
+
+    Returns:
+        Revived LangChain objects.
+    """
+    reviver = Reviver(secrets_map, valid_namespaces)
+
+    def _load(obj: Any) -> Any:
+        if isinstance(obj, dict):
+            # Need to revive leaf nodes before reviving this node
+            loaded_obj = {k: _load(v) for k, v in obj.items()}
+            return reviver(loaded_obj)
+        if isinstance(obj, list):
+            return [_load(o) for o in obj]
+        return obj
+
+    return _load(obj)
--- a/libs/core/langchain_core/load/serializable.py
+++ b/libs/core/langchain_core/load/serializable.py
@@ -0,0 +1,207 @@
+from abc import ABC
+from typing import Any, Dict, List, Literal, Optional, TypedDict, Union, cast
+
+from langchain_core.pydantic_v1 import BaseModel, PrivateAttr
+
+
+class BaseSerialized(TypedDict):
+    """Base class for serialized objects."""
+
+    lc: int
+    id: List[str]
+
+
+class SerializedConstructor(BaseSerialized):
+    """Serialized constructor."""
+
+    type: Literal["constructor"]
+    kwargs: Dict[str, Any]
+
+
+class SerializedSecret(BaseSerialized):
+    """Serialized secret."""
+
+    type: Literal["secret"]
+
+
+class SerializedNotImplemented(BaseSerialized):
+    """Serialized not implemented."""
+
+    type: Literal["not_implemented"]
+    repr: Optional[str]
+
+
+def try_neq_default(value: Any, key: str, model: BaseModel) -> bool:
+    try:
+        return model.__fields__[key].get_default() != value
+    except Exception:
+        return True
+
+
+class Serializable(BaseModel, ABC):
+    """Serializable base class."""
+
+    @classmethod
+    def is_lc_serializable(cls) -> bool:
+        """Is this class serializable?"""
+        return False
+
+    @classmethod
+    def get_lc_namespace(cls) -> List[str]:
+        """Get the namespace of the langchain object.
+
+        For example, if the class is `langchain.llms.openai.OpenAI`, then the
+        namespace is ["langchain", "llms", "openai"]
+        """
+        return cls.__module__.split(".")
+
+    @property
+    def lc_secrets(self) -> Dict[str, str]:
+        """A map of constructor argument names to secret ids.
+
+        For example,
+            {"openai_api_key": "OPENAI_API_KEY"}
+        """
+        return dict()
+
+    @property
+    def lc_attributes(self) -> Dict:
+        """List of attribute names that should be included in the serialized kwargs.
+
+        These attributes must be accepted by the constructor.
+        """
+        return {}
+
+    @classmethod
+    def lc_id(cls) -> List[str]:
+        """A unique identifier for this class for serialization purposes.
+
+        The unique identifier is a list of strings that describes the path
+        to the object.
+        """
+        return [*cls.get_lc_namespace(), cls.__name__]
+
+    class Config:
+        extra = "ignore"
+
+    def __repr_args__(self) -> Any:
+        return [
+            (k, v)
+            for k, v in super().__repr_args__()
+            if (k not in self.__fields__ or try_neq_default(v, k, self))
+        ]
+
+    _lc_kwargs = PrivateAttr(default_factory=dict)
+
+    def __init__(self, **kwargs: Any) -> None:
+        super().__init__(**kwargs)
+        self._lc_kwargs = kwargs
+
+    def to_json(self) -> Union[SerializedConstructor, SerializedNotImplemented]:
+        if not self.is_lc_serializable():
+            return self.to_json_not_implemented()
+
+        secrets = dict()
+        # Get latest values for kwargs if there is an attribute with same name
+        lc_kwargs = {
+            k: getattr(self, k, v)
+            for k, v in self._lc_kwargs.items()
+            if not (self.__exclude_fields__ or {}).get(k, False)  # type: ignore
+        }
+
+        # Merge the lc_secrets and lc_attributes from every class in the MRO
+        for cls in [None, *self.__class__.mro()]:
+            # Once we get to Serializable, we're done
+            if cls is Serializable:
+                break
+
+            if cls:
+                deprecated_attributes = [
+                    "lc_namespace",
+                    "lc_serializable",
+                ]
+
+                for attr in deprecated_attributes:
+                    if hasattr(cls, attr):
+                        raise ValueError(
+                            f"Class {self.__class__} has a deprecated "
+                            f"attribute {attr}. Please use the corresponding "
+                            f"classmethod instead."
+                        )
+
+            # Get a reference to self bound to each class in the MRO
+            this = cast(Serializable, self if cls is None else super(cls, self))
+
+            secrets.update(this.lc_secrets)
+            lc_kwargs.update(this.lc_attributes)
+
+        # include all secrets, even if not specified in kwargs
+        # as these secrets may be passed as an environment variable instead
+        for key in secrets.keys():
+            secret_value = getattr(self, key, None) or lc_kwargs.get(key)
+            if secret_value is not None:
+                lc_kwargs.update({key: secret_value})
+
+        return {
+            "lc": 1,
+            "type": "constructor",
+            "id": self.lc_id(),
+            "kwargs": lc_kwargs
+            if not secrets
+            else _replace_secrets(lc_kwargs, secrets),
+        }
+
+    def to_json_not_implemented(self) -> SerializedNotImplemented:
+        return to_json_not_implemented(self)
+
+
+def _replace_secrets(
+    root: Dict[Any, Any], secrets_map: Dict[str, str]
+) -> Dict[Any, Any]:
+    result = root.copy()
+    for path, secret_id in secrets_map.items():
+        [*parts, last] = path.split(".")
+        current = result
+        for part in parts:
+            if part not in current:
+                break
+            current[part] = current[part].copy()
+            current = current[part]
+        if last in current:
+            current[last] = {
+                "lc": 1,
+                "type": "secret",
+                "id": [secret_id],
+            }
+    return result
+
+
+def to_json_not_implemented(obj: object) -> SerializedNotImplemented:
+    """Serialize a "not implemented" object.
+
+    Args:
+        obj: object to serialize
+
+    Returns:
+        SerializedNotImplemented
+    """
+    _id: List[str] = []
+    try:
+        if hasattr(obj, "__name__"):
+            _id = [*obj.__module__.split("."), obj.__name__]
+        elif hasattr(obj, "__class__"):
+            _id = [*obj.__class__.__module__.split("."), obj.__class__.__name__]
+    except Exception:
+        pass
+
+    result: SerializedNotImplemented = {
+        "lc": 1,
+        "type": "not_implemented",
+        "id": _id,
+        "repr": None,
+    }
+    try:
+        result["repr"] = repr(obj)
+    except Exception:
+        pass
+    return result
--- a/libs/core/langchain_core/memory.py
+++ b/libs/core/langchain_core/memory.py
@@ -0,0 +1,59 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from typing import Any, Dict, List
+
+from langchain_core.load.serializable import Serializable
+
+
+class BaseMemory(Serializable, ABC):
+    """Abstract base class for memory in Chains.
+
+    Memory refers to state in Chains. Memory can be used to store information about
+        past executions of a Chain and inject that information into the inputs of
+        future executions of the Chain. For example, for conversational Chains Memory
+        can be used to store conversations and automatically add them to future model
+        prompts so that the model has the necessary context to respond coherently to
+        the latest input.
+
+    Example:
+        .. code-block:: python
+
+            class SimpleMemory(BaseMemory):
+                memories: Dict[str, Any] = dict()
+
+                @property
+                def memory_variables(self) -> List[str]:
+                    return list(self.memories.keys())
+
+                def load_memory_variables(self, inputs: Dict[str, Any]) -> Dict[str, str]:
+                    return self.memories
+
+                def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]) -> None:
+                    pass
+
+                def clear(self) -> None:
+                    pass
+    """  # noqa: E501
+
+    class Config:
+        """Configuration for this pydantic object."""
+
+        arbitrary_types_allowed = True
+
+    @property
+    @abstractmethod
+    def memory_variables(self) -> List[str]:
+        """The string keys this memory class will add to chain inputs."""
+
+    @abstractmethod
+    def load_memory_variables(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+        """Return key-value pairs given the text input to the chain."""
+
+    @abstractmethod
+    def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]) -> None:
+        """Save the context of this chain run to memory."""
+
+    @abstractmethod
+    def clear(self) -> None:
+        """Clear memory contents."""
--- a/libs/core/langchain_core/messages/init.py
+++ b/libs/core/langchain_core/messages/init.py
@@ -0,0 +1,122 @@
+from typing import List, Sequence, Union
+
+from langchain_core.messages.ai import AIMessage, AIMessageChunk
+from langchain_core.messages.base import (
+    BaseMessage,
+    BaseMessageChunk,
+    merge_content,
+    message_to_dict,
+    messages_to_dict,
+)
+from langchain_core.messages.chat import ChatMessage, ChatMessageChunk
+from langchain_core.messages.function import FunctionMessage, FunctionMessageChunk
+from langchain_core.messages.human import HumanMessage, HumanMessageChunk
+from langchain_core.messages.system import SystemMessage, SystemMessageChunk
+from langchain_core.messages.tool import ToolMessage, ToolMessageChunk
+
+AnyMessage = Union[
+    AIMessage, HumanMessage, ChatMessage, SystemMessage, FunctionMessage, ToolMessage
+]
+
+
+def get_buffer_string(
+    messages: Sequence[BaseMessage], human_prefix: str = "Human", ai_prefix: str = "AI"
+) -> str:
+    """Convert sequence of Messages to strings and concatenate them into one string.
+
+    Args:
+        messages: Messages to be converted to strings.
+        human_prefix: The prefix to prepend to contents of HumanMessages.
+        ai_prefix: THe prefix to prepend to contents of AIMessages.
+
+    Returns:
+        A single string concatenation of all input messages.
+
+    Example:
+        .. code-block:: python
+
+            from langchain_core import AIMessage, HumanMessage
+
+            messages = [
+                HumanMessage(content="Hi, how are you?"),
+                AIMessage(content="Good, how are you?"),
+            ]
+            get_buffer_string(messages)
+            # -> "Human: Hi, how are you?\nAI: Good, how are you?"
+    """
+    string_messages = []
+    for m in messages:
+        if isinstance(m, HumanMessage):
+            role = human_prefix
+        elif isinstance(m, AIMessage):
+            role = ai_prefix
+        elif isinstance(m, SystemMessage):
+            role = "System"
+        elif isinstance(m, FunctionMessage):
+            role = "Function"
+        elif isinstance(m, ToolMessage):
+            role = "Tool"
+        elif isinstance(m, ChatMessage):
+            role = m.role
+        else:
+            raise ValueError(f"Got unsupported message type: {m}")
+        message = f"{role}: {m.content}"
+        if isinstance(m, AIMessage) and "function_call" in m.additional_kwargs:
+            message += f"{m.additional_kwargs['function_call']}"
+        string_messages.append(message)
+
+    return "\n".join(string_messages)
+
+
+def _message_from_dict(message: dict) -> BaseMessage:
+    _type = message["type"]
+    if _type == "human":
+        return HumanMessage(**message["data"])
+    elif _type == "ai":
+        return AIMessage(**message["data"])
+    elif _type == "system":
+        return SystemMessage(**message["data"])
+    elif _type == "chat":
+        return ChatMessage(**message["data"])
+    elif _type == "function":
+        return FunctionMessage(**message["data"])
+    elif _type == "tool":
+        return ToolMessage(**message["data"])
+    else:
+        raise ValueError(f"Got unexpected message type: {_type}")
+
+
+def messages_from_dict(messages: Sequence[dict]) -> List[BaseMessage]:
+    """Convert a sequence of messages from dicts to Message objects.
+
+    Args:
+        messages: Sequence of messages (as dicts) to convert.
+
+    Returns:
+        List of messages (BaseMessages).
+    """
+    return [_message_from_dict(m) for m in messages]
+
+
+__all__ = [
+    "AIMessage",
+    "AIMessageChunk",
+    "AnyMessage",
+    "BaseMessage",
+    "BaseMessageChunk",
+    "ChatMessage",
+    "ChatMessageChunk",
+    "FunctionMessage",
+    "FunctionMessageChunk",
+    "HumanMessage",
+    "HumanMessageChunk",
+    "SystemMessage",
+    "SystemMessageChunk",
+    "ToolMessage",
+    "ToolMessageChunk",
+    "get_buffer_string",
+    "messages_from_dict",
+    "messages_to_dict",
+    "message_to_dict",
+    "merge_content",
+]
--- a/libs/core/langchain_core/messages/ai.py
+++ b/libs/core/langchain_core/messages/ai.py
@@ -0,0 +1,47 @@
+from typing import Any, Literal
+
+from langchain_core.messages.base import (
+    BaseMessage,
+    BaseMessageChunk,
+    merge_content,
+)
+
+
+class AIMessage(BaseMessage):
+    """A Message from an AI."""
+
+    example: bool = False
+    """Whether this Message is being passed in to the model as part of an example 
+        conversation.
+    """
+
+    type: Literal["ai"] = "ai"
+
+
+AIMessage.update_forward_refs()
+
+
+class AIMessageChunk(AIMessage, BaseMessageChunk):
+    """A Message chunk from an AI."""
+
+    # Ignoring mypy re-assignment here since we're overriding the value
+    # to make sure that the chunk variant can be discriminated from the
+    # non-chunk variant.
+    type: Literal["AIMessageChunk"] = "AIMessageChunk"  # type: ignore[assignment] # noqa: E501
+
+    def __add__(self, other: Any) -> BaseMessageChunk:  # type: ignore
+        if isinstance(other, AIMessageChunk):
+            if self.example != other.example:
+                raise ValueError(
+                    "Cannot concatenate AIMessageChunks with different example values."
+                )
+
+            return self.__class__(
+                example=self.example,
+                content=merge_content(self.content, other.content),
+                additional_kwargs=self._merge_kwargs_dict(
+                    self.additional_kwargs, other.additional_kwargs
+                ),
+            )
+
+        return super().__add__(other)
--- a/libs/core/langchain_core/messages/base.py
+++ b/libs/core/langchain_core/messages/base.py
@@ -0,0 +1,126 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, Dict, List, Sequence, Union
+
+from langchain_core.load.serializable import Serializable
+from langchain_core.pydantic_v1 import Extra, Field
+
+if TYPE_CHECKING:
+    from langchain_core.prompts.chat import ChatPromptTemplate
+
+
+class BaseMessage(Serializable):
+    """The base abstract Message class.
+
+    Messages are the inputs and outputs of ChatModels.
+    """
+
+    content: Union[str, List[Union[str, Dict]]]
+    """The string contents of the message."""
+
+    additional_kwargs: dict = Field(default_factory=dict)
+    """Any additional information."""
+
+    type: str
+
+    class Config:
+        extra = Extra.allow
+
+    @classmethod
+    def is_lc_serializable(cls) -> bool:
+        """Return whether this class is serializable."""
+        return True
+
+    def __add__(self, other: Any) -> ChatPromptTemplate:
+        from langchain_core.prompts.chat import ChatPromptTemplate
+
+        prompt = ChatPromptTemplate(messages=[self])
+        return prompt + other
+
+
+def merge_content(
+    first_content: Union[str, List[Union[str, Dict]]],
+    second_content: Union[str, List[Union[str, Dict]]],
+) -> Union[str, List[Union[str, Dict]]]:
+    # If first chunk is a string
+    if isinstance(first_content, str):
+        # If the second chunk is also a string, then merge them naively
+        if isinstance(second_content, str):
+            return first_content + second_content
+        # If the second chunk is a list, add the first chunk to the start of the list
+        else:
+            return_list: List[Union[str, Dict]] = [first_content]
+            return return_list + second_content
+    # If both are lists, merge them naively
+    elif isinstance(second_content, List):
+        return first_content + second_content
+    # If the first content is a list, and the second content is a string
+    else:
+        # If the last element of the first content is a string
+        # Add the second content to the last element
+        if isinstance(first_content[-1], str):
+            return first_content[:-1] + [first_content[-1] + second_content]
+        else:
+            # Otherwise, add the second content as a new element of the list
+            return first_content + [second_content]
+
+
+class BaseMessageChunk(BaseMessage):
+    """A Message chunk, which can be concatenated with other Message chunks."""
+
+    def _merge_kwargs_dict(
+        self, left: Dict[str, Any], right: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """Merge additional_kwargs from another BaseMessageChunk into this one."""
+        merged = left.copy()
+        for k, v in right.items():
+            if k not in merged:
+                merged[k] = v
+            elif type(merged[k]) != type(v):
+                raise ValueError(
+                    f'additional_kwargs["{k}"] already exists in this message,'
+                    " but with a different type."
+                )
+            elif isinstance(merged[k], str):
+                merged[k] += v
+            elif isinstance(merged[k], dict):
+                merged[k] = self._merge_kwargs_dict(merged[k], v)
+            else:
+                raise ValueError(
+                    f"Additional kwargs key {k} already exists in this message."
+                )
+        return merged
+
+    def __add__(self, other: Any) -> BaseMessageChunk:  # type: ignore
+        if isinstance(other, BaseMessageChunk):
+            # If both are (subclasses of) BaseMessageChunk,
+            # concat into a single BaseMessageChunk
+
+            return self.__class__(
+                content=merge_content(self.content, other.content),
+                additional_kwargs=self._merge_kwargs_dict(
+                    self.additional_kwargs, other.additional_kwargs
+                ),
+            )
+        else:
+            raise TypeError(
+                'unsupported operand type(s) for +: "'
+                f"{self.__class__.__name__}"
+                f'" and "{other.__class__.__name__}"'
+            )
+
+
+def message_to_dict(message: BaseMessage) -> dict:
+    return {"type": message.type, "data": message.dict()}
+
+
+def messages_to_dict(messages: Sequence[BaseMessage]) -> List[dict]:
+    """Convert a sequence of Messages to a list of dictionaries.
+
+    Args:
+        messages: Sequence of messages (as BaseMessages) to convert.
+
+    Returns:
+        List of messages as dicts.
+    """
+    return [message_to_dict(m) for m in messages]
--- a/Show More
+++ b/Show More