x

qxqx
x
2026-02-04 08:10:25 +00:00 · 2024-09-25 13:37:13 -04:00 · 2024-09-25 13:01:26 -04:00 · 2024-09-24 16:39:25 -04:00 · 2024-09-24 14:09:56 -04:00 · 2024-09-24 13:50:11 -04:00
1812 changed files with 66097 additions and 64825 deletions
--- a/.github/ISSUE_TEMPLATE/bug-report.yml
+++ b/.github/ISSUE_TEMPLATE/bug-report.yml
@@ -96,25 +96,21 @@ body:
    attributes:
      label: System Info
      description: |
-        Please share your system info with us. 
+        Please share your system info with us. Do NOT skip this step and please don't trim
+        the output. Most users don't include enough information here and it makes it harder
+        for us to help you.
        
-        "pip freeze | grep langchain" 
-        platform (windows / linux / mac)
-        python version
-        
-        OR if you're on a recent version of langchain-core you can paste the output of:
+        Run the following command in your terminal and paste the output here:
        
        python -m langchain_core.sys_info
+        
+        or if you have an existing python interpreter running:
+        
+        from langchain_core import sys_info
+        sys_info.print_sys_info()
+        
+        alternatively, put the entire output of `pip freeze` here.
      placeholder: |
-        "pip freeze | grep langchain"
-        platform
-        python version
-        
-        Alternatively, if you're on a recent version of langchain-core you can paste the output of:
-        
        python -m langchain_core.sys_info
-        
-        These will only surface LangChain packages, don't forget to include any other relevant
-        packages you're using (if you're not sure what's relevant, you can paste the entire output of `pip freeze`).
    validations:
      required: true
--- a/.github/scripts/check_diff.py
+++ b/.github/scripts/check_diff.py
@@ -2,10 +2,12 @@ import glob
 import json
 import os
 import sys
-import tomllib
 from collections import defaultdict
 from typing import Dict, List, Set
 from pathlib import Path
+import tomllib
+
+from get_min_versions import get_min_version_from_toml


 LANGCHAIN_DIRS = [
@@ -16,6 +18,12 @@ LANGCHAIN_DIRS = [
    "libs/experimental",
 ]

+# when set to True, we are ignoring core dependents
+# in order to be able to get CI to pass for each individual
+# package that depends on core
+# e.g. if you touch core, we don't then add textsplitters/etc to CI
+IGNORE_CORE_DEPENDENTS = False
+
 # ignored partners are removed from dependents
 # but still run if directly edited
 IGNORED_PARTNERS = [
@@ -23,9 +31,6 @@ IGNORED_PARTNERS = [
    # specifically in huggingface jobs
    # https://github.com/langchain-ai/langchain/issues/25558
    "huggingface",
-    # remove ai21 because of breaking changes in sdk version 2.14.0
-    # that have not been fixed yet
-    "ai21",
 ]


@@ -102,44 +107,96 @@ def add_dependents(dirs_to_eval: Set[str], dependents: dict) -> List[str]:


 def _get_configs_for_single_dir(job: str, dir_: str) -> List[Dict[str, str]]:
-    if dir_ == "libs/core":
-        return [
-            {"working-directory": dir_, "python-version": f"3.{v}"}
-            for v in range(8, 13)
-        ]
-    min_python = "3.8"
-    max_python = "3.12"
+    if job == "test-pydantic":
+        return _get_pydantic_test_configs(dir_)

+    if dir_ == "libs/core":
+        py_versions = ["3.9", "3.10", "3.11", "3.12"]
    # custom logic for specific directories
-    if dir_ == "libs/partners/milvus":
+    elif dir_ == "libs/partners/milvus":
        # milvus poetry doesn't allow 3.12 because they
        # declare deps in funny way
-        max_python = "3.11"
+        py_versions = ["3.9", "3.11"]

-    if dir_ in ["libs/community", "libs/langchain"] and job == "extended-tests":
+    elif dir_ in ["libs/community", "libs/langchain"] and job == "extended-tests":
        # community extended test resolution in 3.12 is slow
        # even in uv
-        max_python = "3.11"
+        py_versions = ["3.9", "3.11"]

-    if dir_ == "libs/community" and job == "compile-integration-tests":
+    elif dir_ == "libs/community" and job == "compile-integration-tests":
        # community integration deps are slow in 3.12
-        max_python = "3.11"
+        py_versions = ["3.9", "3.11"]
+    else:
+        py_versions = ["3.9", "3.12"]

-    return [
-        {"working-directory": dir_, "python-version": min_python},
-        {"working-directory": dir_, "python-version": max_python},
+    return [{"working-directory": dir_, "python-version": py_v} for py_v in py_versions]
+
+
+def _get_pydantic_test_configs(
+    dir_: str, *, python_version: str = "3.11"
+) -> List[Dict[str, str]]:
+    with open("./libs/core/poetry.lock", "rb") as f:
+        core_poetry_lock_data = tomllib.load(f)
+    for package in core_poetry_lock_data["package"]:
+        if package["name"] == "pydantic":
+            core_max_pydantic_minor = package["version"].split(".")[1]
+            break
+
+    with open(f"./{dir_}/poetry.lock", "rb") as f:
+        dir_poetry_lock_data = tomllib.load(f)
+
+    for package in dir_poetry_lock_data["package"]:
+        if package["name"] == "pydantic":
+            dir_max_pydantic_minor = package["version"].split(".")[1]
+            break
+
+    core_min_pydantic_version = get_min_version_from_toml(
+        "./libs/core/pyproject.toml", "release", python_version, include=["pydantic"]
+    )["pydantic"]
+    core_min_pydantic_minor = core_min_pydantic_version.split(".")[1] if "." in core_min_pydantic_version else "0"
+    dir_min_pydantic_version = (
+        get_min_version_from_toml(
+            f"./{dir_}/pyproject.toml", "release", python_version, include=["pydantic"]
+        )
+        .get("pydantic", "0.0.0")
+    )
+    dir_min_pydantic_minor = dir_min_pydantic_version.split(".")[1] if "." in dir_min_pydantic_version else "0"
+
+    custom_mins = {
+        # depends on pydantic-settings 2.4 which requires pydantic 2.7
+        "libs/community": 7,
+    }
+
+    max_pydantic_minor = min(
+        int(dir_max_pydantic_minor),
+        int(core_max_pydantic_minor),
+    )
+    min_pydantic_minor = max(
+        int(dir_min_pydantic_minor),
+        int(core_min_pydantic_minor),
+        custom_mins.get(dir_, 0),
+    )
+
+    configs = [
+        {
+            "working-directory": dir_,
+            "pydantic-version": f"2.{v}.0",
+            "python-version": python_version,
+        }
+        for v in range(min_pydantic_minor, max_pydantic_minor + 1)
    ]
+    return configs


 def _get_configs_for_multi_dirs(
-    job: str, dirs_to_run: List[str], dependents: dict
+    job: str, dirs_to_run: Dict[str, Set[str]], dependents: dict
 ) -> List[Dict[str, str]]:
    if job == "lint":
        dirs = add_dependents(
            dirs_to_run["lint"] | dirs_to_run["test"] | dirs_to_run["extended-test"],
            dependents,
        )
-    elif job in ["test", "compile-integration-tests", "dependencies"]:
+    elif job in ["test", "compile-integration-tests", "dependencies", "test-pydantic"]:
        dirs = add_dependents(
            dirs_to_run["test"] | dirs_to_run["extended-test"], dependents
        )
@@ -168,6 +225,7 @@ if __name__ == "__main__":
        dirs_to_run["lint"] = all_package_dirs()
        dirs_to_run["test"] = all_package_dirs()
        dirs_to_run["extended-test"] = set(LANGCHAIN_DIRS)
+
    for file in files:
        if any(
            file.startswith(dir_)
@@ -185,8 +243,12 @@ if __name__ == "__main__":
        if any(file.startswith(dir_) for dir_ in LANGCHAIN_DIRS):
            # add that dir and all dirs after in LANGCHAIN_DIRS
            # for extended testing
+
            found = False
            for dir_ in LANGCHAIN_DIRS:
+                if dir_ == "libs/core" and IGNORE_CORE_DEPENDENTS:
+                    dirs_to_run["extended-test"].add(dir_)
+                    continue
                if file.startswith(dir_):
                    found = True
                if found:
@@ -198,7 +260,6 @@ if __name__ == "__main__":
            dirs_to_run["test"].add("libs/partners/mistralai")
            dirs_to_run["test"].add("libs/partners/openai")
            dirs_to_run["test"].add("libs/partners/anthropic")
-            dirs_to_run["test"].add("libs/partners/ai21")
            dirs_to_run["test"].add("libs/partners/fireworks")
            dirs_to_run["test"].add("libs/partners/groq")

@@ -228,7 +289,6 @@ if __name__ == "__main__":

    # we now have dirs_by_job
    # todo: clean this up
-
    map_job_to_configs = {
        job: _get_configs_for_multi_dirs(job, dirs_to_run, dependents)
        for job in [
@@ -237,6 +297,7 @@ if __name__ == "__main__":
            "extended-tests",
            "compile-integration-tests",
            "dependencies",
+            "test-pydantic",
        ]
    }
    map_job_to_configs["test-doc-imports"] = (
--- a/.github/scripts/check_prerelease_dependencies.py
+++ b/.github/scripts/check_prerelease_dependencies.py
@@ -11,7 +11,7 @@ if __name__ == "__main__":

    # see if we're releasing an rc
    version = toml_data["tool"]["poetry"]["version"]
-    releasing_rc = "rc" in version
+    releasing_rc = "rc" in version or "dev" in version

    # if not, iterate through dependencies and make sure none allow prereleases
    if not releasing_rc:
--- a/.github/scripts/get_min_versions.py
+++ b/.github/scripts/get_min_versions.py
@@ -1,4 +1,5 @@
 import sys
+from typing import Optional

 if sys.version_info >= (3, 11):
    import tomllib
@@ -7,6 +8,9 @@ else:
    import tomli as tomllib

 from packaging.version import parse as parse_version
+from packaging.specifiers import SpecifierSet
+from packaging.version import Version
+
 import re

 MIN_VERSION_LIBS = [
@@ -17,7 +21,14 @@ MIN_VERSION_LIBS = [
    "SQLAlchemy",
 ]

-SKIP_IF_PULL_REQUEST = ["langchain-core"]
+# some libs only get checked on release because of simultaneous changes in
+# multiple libs
+SKIP_IF_PULL_REQUEST = [
+    "langchain-core",
+    "langchain-text-splitters",
+    "langchain",
+    "langchain-community",
+]


 def get_min_version(version: str) -> str:
@@ -45,7 +56,13 @@ def get_min_version(version: str) -> str:
    raise ValueError(f"Unrecognized version format: {version}")


-def get_min_version_from_toml(toml_path: str, versions_for: str):
+def get_min_version_from_toml(
+    toml_path: str,
+    versions_for: str,
+    python_version: str,
+    *,
+    include: Optional[list] = None,
+):
    # Parse the TOML file
    with open(toml_path, "rb") as file:
        toml_data = tomllib.load(file)
@@ -57,18 +74,26 @@ def get_min_version_from_toml(toml_path: str, versions_for: str):
    min_versions = {}

    # Iterate over the libs in MIN_VERSION_LIBS
-    for lib in MIN_VERSION_LIBS:
+    for lib in set(MIN_VERSION_LIBS + (include or [])):
        if versions_for == "pull_request" and lib in SKIP_IF_PULL_REQUEST:
            # some libs only get checked on release because of simultaneous
-            # changes
+            # changes in multiple libs
            continue
        # Check if the lib is present in the dependencies
        if lib in dependencies:
+            if include and lib not in include:
+                continue
            # Get the version string
            version_string = dependencies[lib]

            if isinstance(version_string, dict):
                version_string = version_string["version"]
+            if isinstance(version_string, list):
+                version_string = [
+                    vs
+                    for vs in version_string
+                    if check_python_version(python_version, vs["python"])
+                ][0]["version"]

            # Use parse_version to get the minimum supported version from version_string
            min_version = get_min_version(version_string)
@@ -79,13 +104,31 @@ def get_min_version_from_toml(toml_path: str, versions_for: str):
    return min_versions


+def check_python_version(version_string, constraint_string):
+    """
+    Check if the given Python version matches the given constraints.
+
+    :param version_string: A string representing the Python version (e.g. "3.8.5").
+    :param constraint_string: A string representing the package's Python version constraints (e.g. ">=3.6, <4.0").
+    :return: True if the version matches the constraints, False otherwise.
+    """
+    try:
+        version = Version(version_string)
+        constraints = SpecifierSet(constraint_string)
+        return version in constraints
+    except Exception as e:
+        print(f"Error: {e}")
+        return False
+
+
 if __name__ == "__main__":
    # Get the TOML file path from the command line argument
    toml_file = sys.argv[1]
    versions_for = sys.argv[2]
+    python_version = sys.argv[3]
    assert versions_for in ["release", "pull_request"]

    # Call the function to get the minimum versions
-    min_versions = get_min_version_from_toml(toml_file, versions_for)
+    min_versions = get_min_version_from_toml(toml_file, versions_for, python_version)

    print(" ".join([f"{lib}=={version}" for lib, version in min_versions.items()]))
--- a/.github/workflows/_dependencies.yml
+++ b/.github/workflows/_dependencies.yml
@@ -1,114 +0,0 @@
-name: dependencies
-
-on:
-  workflow_call:
-    inputs:
-      working-directory:
-        required: true
-        type: string
-        description: "From which folder this pipeline executes"
-      langchain-location:
-        required: false
-        type: string
-        description: "Relative path to the langchain library folder"
-      python-version:
-        required: true
-        type: string
-        description: "Python version to use"
-
-env:
-  POETRY_VERSION: "1.7.1"
-
-jobs:
-  build:
-    defaults:
-      run:
-        working-directory: ${{ inputs.working-directory }}
-    runs-on: ubuntu-latest
-    name: dependency checks ${{ inputs.python-version }}
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Set up Python ${{ inputs.python-version }} + Poetry ${{ env.POETRY_VERSION }}
-        uses: "./.github/actions/poetry_setup"
-        with:
-          python-version: ${{ inputs.python-version }}
-          poetry-version: ${{ env.POETRY_VERSION }}
-          working-directory: ${{ inputs.working-directory }}
-          cache-key: pydantic-cross-compat
-
-      - name: Install dependencies
-        shell: bash
-        run: poetry install
-
-      - name: Check imports with base dependencies
-        shell: bash
-        run: poetry run make check_imports
-
-      - name: Install test dependencies
-        shell: bash
-        run: poetry install --with test
-
-      - name: Install langchain editable
-        working-directory: ${{ inputs.working-directory }}
-        if: ${{ inputs.langchain-location }}
-        env:
-          LANGCHAIN_LOCATION: ${{ inputs.langchain-location }}
-        run: |
-          poetry run pip install -e "$LANGCHAIN_LOCATION"
-
-      - name: Install the opposite major version of pydantic
-        # If normal tests use pydantic v1, here we'll use v2, and vice versa.
-        shell: bash
-        # airbyte currently doesn't support pydantic v2
-        if: ${{ !startsWith(inputs.working-directory, 'libs/partners/airbyte') }}
-        run: |
-          # Determine the major part of pydantic version
-          REGULAR_VERSION=$(poetry run python -c "import pydantic; print(pydantic.__version__)" | cut -d. -f1)
-
-          if [[ "$REGULAR_VERSION" == "1" ]]; then
-            PYDANTIC_DEP=">=2.1,<3"
-            TEST_WITH_VERSION="2"
-          elif [[ "$REGULAR_VERSION" == "2" ]]; then
-            PYDANTIC_DEP="<2"
-            TEST_WITH_VERSION="1"
-          else
-            echo "Unexpected pydantic major version '$REGULAR_VERSION', cannot determine which version to use for cross-compatibility test."
-            exit 1
-          fi
-
-          # Install via `pip` instead of `poetry add` to avoid changing lockfile,
-          # which would prevent caching from working: the cache would get saved
-          # to a different key than where it gets loaded from.
-          poetry run pip install "pydantic${PYDANTIC_DEP}"
-
-          # Ensure that the correct pydantic is installed now.
-          echo "Checking pydantic version... Expecting ${TEST_WITH_VERSION}"
-
-          # Determine the major part of pydantic version
-          CURRENT_VERSION=$(poetry run python -c "import pydantic; print(pydantic.__version__)" | cut -d. -f1)
-
-          # Check that the major part of pydantic version is as expected, if not
-          # raise an error
-          if [[ "$CURRENT_VERSION" != "$TEST_WITH_VERSION" ]]; then
-            echo "Error: expected pydantic version ${CURRENT_VERSION} to have been installed, but found: ${TEST_WITH_VERSION}"
-            exit 1
-          fi
-          echo "Found pydantic version ${CURRENT_VERSION}, as expected"
-      - name: Run pydantic compatibility tests
-        # airbyte currently doesn't support pydantic v2
-        if: ${{ !startsWith(inputs.working-directory, 'libs/partners/airbyte') }}
-        shell: bash
-        run: make test
-
-      - name: Ensure the tests did not create any additional files
-        shell: bash
-        run: |
-          set -eu
-
-          STATUS="$(git status)"
-          echo "$STATUS"
-
-          # grep will exit non-zero if the target message isn't found,
-          # and `set -e` above will cause the step to fail.
-          echo "$STATUS" | grep 'nothing to commit, working tree clean'
--- a/.github/workflows/_integration_test.yml
+++ b/.github/workflows/_integration_test.yml
@@ -67,6 +67,7 @@ jobs:
          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
          GOOGLE_SEARCH_API_KEY: ${{ secrets.GOOGLE_SEARCH_API_KEY }}
          GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
+          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
          EXA_API_KEY: ${{ secrets.EXA_API_KEY }}
          NOMIC_API_KEY: ${{ secrets.NOMIC_API_KEY }}
          WATSONX_APIKEY: ${{ secrets.WATSONX_APIKEY }}
--- a/.github/workflows/_lint.yml
+++ b/.github/workflows/_lint.yml
@@ -7,10 +7,6 @@ on:
        required: true
        type: string
        description: "From which folder this pipeline executes"
-      langchain-location:
-        required: false
-        type: string
-        description: "Relative path to the langchain library folder"
      python-version:
        required: true
        type: string
@@ -63,14 +59,6 @@ jobs:
        run: |
          poetry install --with lint,typing

-      - name: Install langchain editable
-        working-directory: ${{ inputs.working-directory }}
-        if: ${{ inputs.langchain-location }}
-        env:
-          LANGCHAIN_LOCATION: ${{ inputs.langchain-location }}
-        run: |
-          poetry run pip install -e "$LANGCHAIN_LOCATION"
-
      - name: Get .mypy_cache to speed up mypy
        uses: actions/cache@v4
        env:
--- a/.github/workflows/_release.yml
+++ b/.github/workflows/_release.yml
@@ -85,7 +85,7 @@ jobs:
          path: langchain
          sparse-checkout: | # this only grabs files for relevant dir
            ${{ inputs.working-directory }}
-          ref: master # this scopes to just master branch
+          ref: ${{ github.ref }} # this scopes to just ref'd branch
          fetch-depth: 0 # this fetches entire commit history
      - name: Check Tags
        id: check-tags
@@ -164,6 +164,7 @@ jobs:

      - name: Set up Python + Poetry ${{ env.POETRY_VERSION }}
        uses: "./.github/actions/poetry_setup"
+        id: setup-python
        with:
          python-version: ${{ env.PYTHON_VERSION }}
          poetry-version: ${{ env.POETRY_VERSION }}
@@ -231,7 +232,8 @@ jobs:
        id: min-version
        run: |
          poetry run pip install packaging
-          min_versions="$(poetry run python $GITHUB_WORKSPACE/.github/scripts/get_min_versions.py pyproject.toml release)"
+          python_version="$(poetry run python --version | awk '{print $2}')"
+          min_versions="$(poetry run python $GITHUB_WORKSPACE/.github/scripts/get_min_versions.py pyproject.toml release $python_version)"
          echo "min-versions=$min_versions" >> "$GITHUB_OUTPUT"
          echo "min-versions=$min_versions"

@@ -273,6 +275,7 @@ jobs:
          GOOGLE_SEARCH_API_KEY: ${{ secrets.GOOGLE_SEARCH_API_KEY }}
          GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
          GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
+          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
          EXA_API_KEY: ${{ secrets.EXA_API_KEY }}
          NOMIC_API_KEY: ${{ secrets.NOMIC_API_KEY }}
          WATSONX_APIKEY: ${{ secrets.WATSONX_APIKEY }}
--- a/.github/workflows/_test.yml
+++ b/.github/workflows/_test.yml
@@ -7,10 +7,6 @@ on:
        required: true
        type: string
        description: "From which folder this pipeline executes"
-      langchain-location:
-        required: false
-        type: string
-        description: "Relative path to the langchain library folder"
      python-version:
        required: true
        type: string
@@ -31,29 +27,41 @@ jobs:

      - name: Set up Python ${{ inputs.python-version }} + Poetry ${{ env.POETRY_VERSION }}
        uses: "./.github/actions/poetry_setup"
+        id: setup-python
        with:
          python-version: ${{ inputs.python-version }}
          poetry-version: ${{ env.POETRY_VERSION }}
          working-directory: ${{ inputs.working-directory }}
          cache-key: core
-
      - name: Install dependencies
        shell: bash
        run: poetry install --with test

-      - name: Install langchain editable
-        working-directory: ${{ inputs.working-directory }}
-        if: ${{ inputs.langchain-location }}
-        env:
-          LANGCHAIN_LOCATION: ${{ inputs.langchain-location }}
-        run: |
-          poetry run pip install -e "$LANGCHAIN_LOCATION"
-
      - name: Run core tests
        shell: bash
        run: |
          make test

+      - name: Get minimum versions
+        working-directory: ${{ inputs.working-directory }}
+        id: min-version
+        shell: bash
+        run: |
+          poetry run pip install packaging tomli
+          python_version="$(poetry run python --version | awk '{print $2}')"
+          min_versions="$(poetry run python $GITHUB_WORKSPACE/.github/scripts/get_min_versions.py pyproject.toml pull_request $python_version)"
+          echo "min-versions=$min_versions" >> "$GITHUB_OUTPUT"
+          echo "min-versions=$min_versions"
+
+      - name: Run unit tests with minimum dependency versions
+        if: ${{ steps.min-version.outputs.min-versions != '' }}
+        env:
+          MIN_VERSIONS: ${{ steps.min-version.outputs.min-versions }}
+        run: |
+          poetry run pip install $MIN_VERSIONS
+          make tests
+        working-directory: ${{ inputs.working-directory }}
+
      - name: Ensure the tests did not create any additional files
        shell: bash
        run: |
@@ -66,20 +74,3 @@ jobs:
          # and `set -e` above will cause the step to fail.
          echo "$STATUS" | grep 'nothing to commit, working tree clean'
          
-      - name: Get minimum versions
-        working-directory: ${{ inputs.working-directory }}
-        id: min-version
-        run: |
-          poetry run pip install packaging tomli
-          min_versions="$(poetry run python $GITHUB_WORKSPACE/.github/scripts/get_min_versions.py pyproject.toml pull_request)"
-          echo "min-versions=$min_versions" >> "$GITHUB_OUTPUT"
-          echo "min-versions=$min_versions"
-
-      - name: Run unit tests with minimum dependency versions
-        if: ${{ steps.min-version.outputs.min-versions != '' }}
-        env:
-          MIN_VERSIONS: ${{ steps.min-version.outputs.min-versions }}
-        run: |
-          poetry run pip install --force-reinstall $MIN_VERSIONS --editable .
-          make tests
-        working-directory: ${{ inputs.working-directory }}
--- a/.github/workflows/_test_pydantic.yml
+++ b/.github/workflows/_test_pydantic.yml
@@ -0,0 +1,64 @@
+name: test pydantic intermediate versions
+
+on:
+  workflow_call:
+    inputs:
+      working-directory:
+        required: true
+        type: string
+        description: "From which folder this pipeline executes"
+      python-version:
+        required: false
+        type: string
+        description: "Python version to use"
+        default: "3.11"
+      pydantic-version:
+        required: true
+        type: string
+        description: "Pydantic version to test."
+
+env:
+  POETRY_VERSION: "1.7.1"
+
+jobs:
+  build:
+    defaults:
+      run:
+        working-directory: ${{ inputs.working-directory }}
+    runs-on: ubuntu-latest
+    name: "make test # pydantic: ~=${{ inputs.pydantic-version }}, python: ${{ inputs.python-version }}, "
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python ${{ inputs.python-version }} + Poetry ${{ env.POETRY_VERSION }}
+        uses: "./.github/actions/poetry_setup"
+        with:
+          python-version: ${{ inputs.python-version }}
+          poetry-version: ${{ env.POETRY_VERSION }}
+          working-directory: ${{ inputs.working-directory }}
+          cache-key: core
+
+      - name: Install dependencies
+        shell: bash
+        run: poetry install --with test
+
+      - name: Overwrite pydantic version
+        shell: bash
+        run: poetry run pip install pydantic~=${{ inputs.pydantic-version }}
+
+      - name: Run core tests
+        shell: bash
+        run: |
+          make test
+
+      - name: Ensure the tests did not create any additional files
+        shell: bash
+        run: |
+          set -eu
+
+          STATUS="$(git status)"
+          echo "$STATUS"
+
+          # grep will exit non-zero if the target message isn't found,
+          # and `set -e` above will cause the step to fail.
+          echo "$STATUS" | grep 'nothing to commit, working tree clean'
--- a/.github/workflows/check_diffs.yml
+++ b/.github/workflows/check_diffs.yml
@@ -31,6 +31,7 @@ jobs:
        uses: Ana06/get-changed-files@v2.2.0
      - id: set-matrix
        run: |
+          python -m pip install packaging
          python .github/scripts/check_diff.py ${{ steps.files.outputs.all }} >> $GITHUB_OUTPUT
    outputs:
      lint: ${{ steps.set-matrix.outputs.lint }}
@@ -39,6 +40,7 @@ jobs:
      compile-integration-tests: ${{ steps.set-matrix.outputs.compile-integration-tests }}
      dependencies: ${{ steps.set-matrix.outputs.dependencies }}
      test-doc-imports: ${{ steps.set-matrix.outputs.test-doc-imports }}
+      test-pydantic: ${{ steps.set-matrix.outputs.test-pydantic }}
  lint:
    name: cd ${{ matrix.job-configs.working-directory }}
    needs: [ build ]
@@ -46,6 +48,7 @@ jobs:
    strategy:
      matrix:
        job-configs: ${{ fromJson(needs.build.outputs.lint) }}
+      fail-fast: false
    uses: ./.github/workflows/_lint.yml
    with:
      working-directory: ${{ matrix.job-configs.working-directory }}
@@ -59,18 +62,34 @@ jobs:
    strategy:
      matrix:
        job-configs: ${{ fromJson(needs.build.outputs.test) }}
+      fail-fast: false
    uses: ./.github/workflows/_test.yml
    with:
      working-directory: ${{ matrix.job-configs.working-directory }}
      python-version: ${{ matrix.job-configs.python-version }}
    secrets: inherit

+  test-pydantic:
+    name: cd ${{ matrix.job-configs.working-directory }}
+    needs: [ build ]
+    if: ${{ needs.build.outputs.test-pydantic != '[]' }}
+    strategy:
+      matrix:
+        job-configs: ${{ fromJson(needs.build.outputs.test-pydantic) }}
+      fail-fast: false
+    uses: ./.github/workflows/_test_pydantic.yml
+    with:
+      working-directory: ${{ matrix.job-configs.working-directory }}
+      pydantic-version: ${{ matrix.job-configs.pydantic-version }}
+    secrets: inherit
+
  test-doc-imports:
    needs: [ build ]
    if: ${{ needs.build.outputs.test-doc-imports != '[]' }}
    strategy:
      matrix:
        job-configs: ${{ fromJson(needs.build.outputs.test-doc-imports) }}
+      fail-fast: false
    uses: ./.github/workflows/_test_doc_imports.yml
    secrets: inherit
    with:
@@ -83,25 +102,13 @@ jobs:
    strategy:
      matrix:
        job-configs: ${{ fromJson(needs.build.outputs.compile-integration-tests) }}
+      fail-fast: false
    uses: ./.github/workflows/_compile_integration_test.yml
    with:
      working-directory: ${{ matrix.job-configs.working-directory }}
      python-version: ${{ matrix.job-configs.python-version }}
    secrets: inherit

-  dependencies:
-    name: cd ${{ matrix.job-configs.working-directory }}
-    needs: [ build ]
-    if: ${{ needs.build.outputs.dependencies != '[]' }}
-    strategy:
-      matrix:
-        job-configs: ${{ fromJson(needs.build.outputs.dependencies) }}
-    uses: ./.github/workflows/_dependencies.yml
-    with:
-      working-directory: ${{ matrix.job-configs.working-directory }}
-      python-version: ${{ matrix.job-configs.python-version }}
-    secrets: inherit
-
  extended-tests:
    name: "cd ${{ matrix.job-configs.working-directory }} / make extended_tests #${{ matrix.job-configs.python-version }}"
    needs: [ build ]
@@ -110,6 +117,7 @@ jobs:
      matrix:
        # note different variable for extended test dirs
        job-configs: ${{ fromJson(needs.build.outputs.extended-tests) }}
+      fail-fast: false
    runs-on: ubuntu-latest
    defaults:
      run:
@@ -149,7 +157,7 @@ jobs:
          echo "$STATUS" | grep 'nothing to commit, working tree clean'
  ci_success:
    name: "CI Success"
-    needs: [build, lint, test, compile-integration-tests, dependencies, extended-tests, test-doc-imports]
+    needs: [build, lint, test, compile-integration-tests, extended-tests, test-doc-imports, test-pydantic]
    if: |
      always()
    runs-on: ubuntu-latest
--- a/.github/workflows/codespell.yml
+++ b/.github/workflows/codespell.yml
@@ -3,9 +3,8 @@ name: CI / cd . / make spell_check

 on:
  push:
-    branches: [master, v0.1]
+    branches: [master, v0.1, v0.2]
  pull_request:
-    branches: [master, v0.1]

 permissions:
  contents: read
--- a/.github/workflows/scheduled_test.yml
+++ b/.github/workflows/scheduled_test.yml
@@ -17,16 +17,14 @@ jobs:
      fail-fast: false
      matrix:
        python-version:
-          - "3.8"
+          - "3.9"
          - "3.11"
        working-directory:
          - "libs/partners/openai"
          - "libs/partners/anthropic"
-          - "libs/partners/ai21"
          - "libs/partners/fireworks"
          - "libs/partners/groq"
          - "libs/partners/mistralai"
-          - "libs/partners/together"
          - "libs/partners/google-vertexai"
          - "libs/partners/google-genai"
          - "libs/partners/aws"
@@ -90,11 +88,10 @@ jobs:
          AZURE_OPENAI_CHAT_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_CHAT_DEPLOYMENT_NAME }}
          AZURE_OPENAI_LLM_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_LLM_DEPLOYMENT_NAME }}
          AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME }}
-          AI21_API_KEY: ${{ secrets.AI21_API_KEY }}
          FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
          GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
+          HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}
          MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
-          TOGETHER_API_KEY: ${{ secrets.TOGETHER_API_KEY }}
          COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
          GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
--- a/1
+++ b/1
@@ -36,7 +36,6 @@ api_docs_build:
 API_PKG ?= text-splitters

 api_docs_quick_preview:
-	poetry run pip install "pydantic<2"
 	poetry run python docs/api_reference/create_api_rst.py $(API_PKG)
 	cd docs/api_reference && poetry run make html
 	poetry run python docs/api_reference/scripts/custom_formatter.py docs/api_reference/_build/html/
--- a/README.md
+++ b/README.md
@@ -39,7 +39,7 @@ conda install langchain -c conda-forge
 For these applications, LangChain simplifies the entire application lifecycle:

 - **Open-source libraries**: Build your applications using LangChain's open-source [building blocks](https://python.langchain.com/v0.2/docs/concepts#langchain-expression-language-lcel), [components](https://python.langchain.com/v0.2/docs/concepts), and [third-party integrations](https://python.langchain.com/v0.2/docs/integrations/platforms/).
-  Use [LangGraph](/docs/concepts/#langgraph) to build stateful agents with first-class streaming and human-in-the-loop support.
+  Use [LangGraph](https://langchain-ai.github.io/langgraph/) to build stateful agents with first-class streaming and human-in-the-loop support.
 - **Productionization**: Inspect, monitor, and evaluate your apps with [LangSmith](https://docs.smith.langchain.com/) so that you can constantly optimize and deploy with confidence.
 - **Deployment**: Turn your LangGraph applications into production-ready APIs and Assistants with [LangGraph Cloud](https://langchain-ai.github.io/langgraph/cloud/).

@@ -49,7 +49,7 @@ For these applications, LangChain simplifies the entire application lifecycle:
 - **`langchain-community`**: Third party integrations.
  - Some integrations have been further split into **partner packages** that only rely on **`langchain-core`**. Examples include **`langchain_openai`** and **`langchain_anthropic`**.
 - **`langchain`**: Chains, agents, and retrieval strategies that make up an application's cognitive architecture.
- **[`LangGraph`](https://langchain-ai.github.io/langgraph/)**: A library for building robust and stateful multi-actor applications with LLMs by modeling steps as edges and nodes in a graph. Integrates smoothly with LangChain, but can be used without it.
+- **[`LangGraph`](https://langchain-ai.github.io/langgraph/)**: A library for building robust and stateful multi-actor applications with LLMs by modeling steps as edges and nodes in a graph. Integrates smoothly with LangChain, but can be used without it. To learn more about LangGraph, check out our first LangChain Academy course, *Introduction to LangGraph*, available [here](https://academy.langchain.com/courses/intro-to-langgraph).

 ### Productionization:

--- a/cookbook/README.md
+++ b/cookbook/README.md
@@ -4,6 +4,8 @@ Example code for building applications with LangChain, with an emphasis on more

 Notebook | Description
 :- | :-
+[agent_fireworks_ai_langchain_mongodb.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/agent_fireworks_ai_langchain_mongodb.ipynb) | Build an AI Agent With Memory Using MongoDB, LangChain and FireWorksAI.
+[mongodb-langchain-cache-memory.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/mongodb-langchain-cache-memory.ipynb) | Build a RAG Application with Semantic Cache Using MongoDB and LangChain.
 [LLaMA2_sql_chat.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/LLaMA2_sql_chat.ipynb) | Build a chat application that interacts with a SQL database using an open source llm (llama2), specifically demonstrated on an SQLite database containing rosters.
 [Semi_Structured_RAG.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/Semi_Structured_RAG.ipynb) | Perform retrieval-augmented generation (rag) on documents with semi-structured data, including text and tables, using unstructured for parsing, multi-vector retriever for storing, and lcel for implementing chains.
 [Semi_structured_and_multi_moda...](https://github.com/langchain-ai/langchain/tree/master/cookbook/Semi_structured_and_multi_modal_RAG.ipynb) | Perform retrieval-augmented generation (rag) on documents with semi-structured data and images, using unstructured for parsing, multi-vector retriever for storage and retrieval, and lcel for implementing chains.
--- a/cookbook/agent_fireworks_ai_langchain_mongodb.ipynb
+++ b/cookbook/agent_fireworks_ai_langchain_mongodb.ipynb
--- a/cookbook/code-analysis-deeplake.ipynb
+++ b/cookbook/code-analysis-deeplake.ipynb
@@ -90,7 +90,8 @@
    "import os\n",
    "from getpass import getpass\n",
    "\n",
-    "os.environ[\"OPENAI_API_KEY\"] = getpass()\n",
+    "if \"OPENAI_API_KEY\" not in os.environ:\n",
+    "    os.environ[\"OPENAI_API_KEY\"] = getpass()\n",
    "# Please manually enter OpenAI Key"
   ]
  },
--- a/cookbook/cql_agent.ipynb
+++ b/cookbook/cql_agent.ipynb
@@ -38,7 +38,7 @@
   "source": [
    "Connection is via `cassio` using `auto=True` parameter, and the notebook uses OpenAI. You should create a `.env` file accordingly.\n",
    "\n",
-    "For Casssandra, set:\n",
+    "For Cassandra, set:\n",
    "```bash\n",
    "CASSANDRA_CONTACT_POINTS\n",
    "CASSANDRA_USERNAME\n",
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -33,8 +33,8 @@ install-py-deps:
 	python3 -m venv .venv
 	$(PYTHON) -m pip install --upgrade pip
 	$(PYTHON) -m pip install --upgrade uv
-	$(PYTHON) -m uv pip install -r vercel_requirements.txt
-	$(PYTHON) -m uv pip install --editable $(PARTNER_DEPS_LIST)
+	$(PYTHON) -m uv pip install --pre -r vercel_requirements.txt
+	$(PYTHON) -m uv pip install --pre --editable $(PARTNER_DEPS_LIST)

 generate-files:
 	mkdir -p $(INTERMEDIATE_DIR)
@@ -73,6 +73,8 @@ append-related:
 generate-references:
 	$(PYTHON) scripts/generate_api_reference_links.py --docs_dir $(OUTPUT_NEW_DOCS_DIR)

+update-md: generate-files md-sync
+
 build: install-py-deps generate-files copy-infra render md-sync append-related

 vercel-build: install-vercel-deps build generate-references
@@ -84,10 +86,6 @@ vercel-build: install-vercel-deps build generate-references
 	mv langchain-api-docs-build/api_reference_build/html/* static/api_reference/
 	rm -rf langchain-api-docs-build
 	NODE_OPTIONS="--max-old-space-size=5000" yarn run docusaurus build
-	mv build v0.2
-	mkdir build
-	mv v0.2 build
-	mv build/v0.2/404.html build

 start:
 	cd $(OUTPUT_NEW_DIR) && yarn && yarn start --port=$(PORT)
--- a/docs/api_reference/guide_imports.json
+++ b/docs/api_reference/guide_imports.json
--- a/docs/api_reference/requirements.txt
+++ b/docs/api_reference/requirements.txt
@@ -1,5 +1,5 @@
-autodoc_pydantic>=1,<2
-sphinx<=7
+autodoc_pydantic>=2,<3
+sphinx>=8,<9
 myst-parser>=3
 sphinx-autobuild>=2024
 pydata-sphinx-theme>=0.15
@@ -8,4 +8,4 @@ myst-nb>=1.1.1
 pyyaml
 sphinx-design
 sphinx-copybutton
-beautifulsoup4
+beautifulsoup4
--- a/docs/api_reference/scripts/custom_formatter.py
+++ b/docs/api_reference/scripts/custom_formatter.py
@@ -17,7 +17,10 @@ def process_toc_h3_elements(html_content: str) -> str:

    # Process each element
    for element in toc_h3_elements:
-        element = element.a.code.span
+        try:
+            element = element.a.code.span
+        except Exception:
+            continue
        # Get the text content of the element
        content = element.get_text()

--- a/docs/api_reference/templates/pydantic.rst
+++ b/docs/api_reference/templates/pydantic.rst
@@ -15,7 +15,7 @@
    :member-order: groupwise
    :show-inheritance: True
    :special-members: __call__
-    :exclude-members: construct, copy, dict, from_orm, parse_file, parse_obj, parse_raw, schema, schema_json, update_forward_refs, validate, json, is_lc_serializable, to_json, to_json_not_implemented, lc_secrets, lc_attributes, lc_id, get_lc_namespace
+    :exclude-members: construct, copy, dict, from_orm, parse_file, parse_obj, parse_raw, schema, schema_json, update_forward_refs, validate, json, is_lc_serializable, to_json, to_json_not_implemented, lc_secrets, lc_attributes, lc_id, get_lc_namespace, model_construct, model_copy, model_dump, model_dump_json, model_parametrized_name, model_post_init, model_rebuild, model_validate, model_validate_json, model_validate_strings, model_extra, model_fields_set, model_json_schema


    {% block attributes %}
--- a/docs/api_reference/templates/runnable_pydantic.rst
+++ b/docs/api_reference/templates/runnable_pydantic.rst
@@ -15,7 +15,7 @@
    :member-order: groupwise
    :show-inheritance: True
    :special-members: __call__
-    :exclude-members: construct, copy, dict, from_orm, parse_file, parse_obj, parse_raw, schema, schema_json, update_forward_refs, validate, json, is_lc_serializable, to_json_not_implemented, lc_secrets, lc_attributes, lc_id, get_lc_namespace, astream_log, transform, atransform, get_output_schema, get_prompts, config_schema, map, pick, pipe, with_listeners, with_alisteners, with_config, with_fallbacks, with_types, with_retry, InputType, OutputType, config_specs, output_schema, get_input_schema, get_graph, get_name, input_schema, name, bind, assign, as_tool
+    :exclude-members: construct, copy, dict, from_orm, parse_file, parse_obj, parse_raw, schema, schema_json, update_forward_refs, validate, json, is_lc_serializable, to_json_not_implemented, lc_secrets, lc_attributes, lc_id, get_lc_namespace, astream_log, transform, atransform, get_output_schema, get_prompts, config_schema, map, pick, pipe, InputType, OutputType, config_specs, output_schema, get_input_schema, get_graph, get_name, input_schema, name, assign, as_tool, get_config_jsonschema, get_input_jsonschema, get_output_jsonschema, model_construct, model_copy, model_dump, model_dump_json, model_parametrized_name, model_post_init, model_rebuild, model_validate, model_validate_json, model_validate_strings, to_json, model_extra, model_fields_set, model_json_schema, predict, apredict, predict_messages, apredict_messages, generate, generate_prompt, agenerate, agenerate_prompt, call_as_llm

    .. NOTE:: {{objname}} implements the standard :py:class:`Runnable Interface <langchain_core.runnables.base.Runnable>`. 🏃

--- a/docs/data/people.yml
+++ b/docs/data/people.yml
--- a/docs/docs/additional_resources/arxiv_references.mdx
+++ b/docs/docs/additional_resources/arxiv_references.mdx
@@ -5,51 +5,89 @@ This page contains `arXiv` papers referenced in the LangChain Documentation, API
 Templates, and Cookbooks.

 From the opposite direction, scientists use `LangChain` in research and reference it in the research papers. 
-Here you find papers that reference:
- [LangChain](https://arxiv.org/search/?query=langchain&searchtype=all&source=header)
- [LangGraph](https://arxiv.org/search/?query=langgraph&searchtype=all&source=header)
- [LangSmith](https://arxiv.org/search/?query=langsmith&searchtype=all&source=header)
+
+`arXiv` papers with references to:
+ [LangChain](https://arxiv.org/search/?query=langchain&searchtype=all&source=header) | [LangGraph](https://arxiv.org/search/?query=langgraph&searchtype=all&source=header) | [LangSmith](https://arxiv.org/search/?query=langsmith&searchtype=all&source=header)

 ## Summary

 | arXiv id / Title | Authors | Published date 🔻 | LangChain Documentation|
 |------------------|---------|-------------------|------------------------|
-| `2402.03620v1` [Self-Discover: Large Language Models Self-Compose Reasoning Structures](http://arxiv.org/abs/2402.03620v1) | Pei Zhou, Jay Pujara, Xiang Ren,  et al. | 2024-02-06 | `Cookbook:` [self-discover](https://github.com/langchain-ai/langchain/blob/master/cookbook/self-discover.ipynb)
-| `2401.18059v1` [RAPTOR: Recursive Abstractive Processing for Tree-Organized Retrieval](http://arxiv.org/abs/2401.18059v1) | Parth Sarthi, Salman Abdullah, Aditi Tuli,  et al. | 2024-01-31 | `Cookbook:` [RAPTOR](https://github.com/langchain-ai/langchain/blob/master/cookbook/RAPTOR.ipynb)
-| `2401.15884v2` [Corrective Retrieval Augmented Generation](http://arxiv.org/abs/2401.15884v2) | Shi-Qi Yan, Jia-Chen Gu, Yun Zhu,  et al. | 2024-01-29 | `Cookbook:` [langgraph_crag](https://github.com/langchain-ai/langchain/blob/master/cookbook/langgraph_crag.ipynb)
-| `2401.04088v1` [Mixtral of Experts](http://arxiv.org/abs/2401.04088v1) | Albert Q. Jiang, Alexandre Sablayrolles, Antoine Roux,  et al. | 2024-01-08 | `Cookbook:` [together_ai](https://github.com/langchain-ai/langchain/blob/master/cookbook/together_ai.ipynb)
-| `2312.06648v2` [Dense X Retrieval: What Retrieval Granularity Should We Use?](http://arxiv.org/abs/2312.06648v2) | Tong Chen, Hongwei Wang, Sihao Chen,  et al. | 2023-12-11 | `Template:` [propositional-retrieval](https://python.langchain.com/docs/templates/propositional-retrieval)
-| `2311.09210v1` [Chain-of-Note: Enhancing Robustness in Retrieval-Augmented Language Models](http://arxiv.org/abs/2311.09210v1) | Wenhao Yu, Hongming Zhang, Xiaoman Pan,  et al. | 2023-11-15 | `Template:` [chain-of-note-wiki](https://python.langchain.com/docs/templates/chain-of-note-wiki)
-| `2310.11511v1` [Self-RAG: Learning to Retrieve, Generate, and Critique through Self-Reflection](http://arxiv.org/abs/2310.11511v1) | Akari Asai, Zeqiu Wu, Yizhong Wang,  et al. | 2023-10-17 | `Cookbook:` [langgraph_self_rag](https://github.com/langchain-ai/langchain/blob/master/cookbook/langgraph_self_rag.ipynb)
-| `2310.06117v2` [Take a Step Back: Evoking Reasoning via Abstraction in Large Language Models](http://arxiv.org/abs/2310.06117v2) | Huaixiu Steven Zheng, Swaroop Mishra, Xinyun Chen,  et al. | 2023-10-09 | `Template:` [stepback-qa-prompting](https://python.langchain.com/docs/templates/stepback-qa-prompting), `Cookbook:` [stepback-qa](https://github.com/langchain-ai/langchain/blob/master/cookbook/stepback-qa.ipynb)
-| `2307.09288v2` [Llama 2: Open Foundation and Fine-Tuned Chat Models](http://arxiv.org/abs/2307.09288v2) | Hugo Touvron, Louis Martin, Kevin Stone,  et al. | 2023-07-18 | `Cookbook:` [Semi_Structured_RAG](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_Structured_RAG.ipynb)
-| `2305.14283v3` [Query Rewriting for Retrieval-Augmented Large Language Models](http://arxiv.org/abs/2305.14283v3) | Xinbei Ma, Yeyun Gong, Pengcheng He,  et al. | 2023-05-23 | `Template:` [rewrite-retrieve-read](https://python.langchain.com/docs/templates/rewrite-retrieve-read), `Cookbook:` [rewrite](https://github.com/langchain-ai/langchain/blob/master/cookbook/rewrite.ipynb)
-| `2305.08291v1` [Large Language Model Guided Tree-of-Thought](http://arxiv.org/abs/2305.08291v1) | Jieyi Long | 2023-05-15 | `API:` [langchain_experimental.tot](https://python.langchain.com/v0.2/api_reference/experimental/index.html#module-langchain_experimental.tot), `Cookbook:` [tree_of_thought](https://github.com/langchain-ai/langchain/blob/master/cookbook/tree_of_thought.ipynb)
-| `2305.04091v3` [Plan-and-Solve Prompting: Improving Zero-Shot Chain-of-Thought Reasoning by Large Language Models](http://arxiv.org/abs/2305.04091v3) | Lei Wang, Wanyu Xu, Yihuai Lan,  et al. | 2023-05-06 | `Cookbook:` [plan_and_execute_agent](https://github.com/langchain-ai/langchain/blob/master/cookbook/plan_and_execute_agent.ipynb)
-| `2305.02156v1` [Zero-Shot Listwise Document Reranking with a Large Language Model](http://arxiv.org/abs/2305.02156v1) | Xueguang Ma, Xinyu Zhang, Ronak Pradeep,  et al. | 2023-05-03 | `API:` [langchain...LLMListwiseRerank](https://python.langchain.com/v0.2/api_reference/langchain/retrievers/langchain.retrievers.document_compressors.listwise_rerank.LLMListwiseRerank.html#langchain.retrievers.document_compressors.listwise_rerank.LLMListwiseRerank)
-| `2304.08485v2` [Visual Instruction Tuning](http://arxiv.org/abs/2304.08485v2) | Haotian Liu, Chunyuan Li, Qingyang Wu,  et al. | 2023-04-17 | `Cookbook:` [Semi_structured_and_multi_modal_RAG](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_structured_and_multi_modal_RAG.ipynb), [Semi_structured_multi_modal_RAG_LLaMA2](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_structured_multi_modal_RAG_LLaMA2.ipynb)
-| `2304.03442v2` [Generative Agents: Interactive Simulacra of Human Behavior](http://arxiv.org/abs/2304.03442v2) | Joon Sung Park, Joseph C. O'Brien, Carrie J. Cai,  et al. | 2023-04-07 | `Cookbook:` [multiagent_bidding](https://github.com/langchain-ai/langchain/blob/master/cookbook/multiagent_bidding.ipynb), [generative_agents_interactive_simulacra_of_human_behavior](https://github.com/langchain-ai/langchain/blob/master/cookbook/generative_agents_interactive_simulacra_of_human_behavior.ipynb)
-| `2303.17760v2` [CAMEL: Communicative Agents for "Mind" Exploration of Large Language Model Society](http://arxiv.org/abs/2303.17760v2) | Guohao Li, Hasan Abed Al Kader Hammoud, Hani Itani,  et al. | 2023-03-31 | `Cookbook:` [camel_role_playing](https://github.com/langchain-ai/langchain/blob/master/cookbook/camel_role_playing.ipynb)
-| `2303.17580v4` [HuggingGPT: Solving AI Tasks with ChatGPT and its Friends in Hugging Face](http://arxiv.org/abs/2303.17580v4) | Yongliang Shen, Kaitao Song, Xu Tan,  et al. | 2023-03-30 | `API:` [langchain_experimental.autonomous_agents](https://python.langchain.com/v0.2/api_reference/experimental/index.html#module-langchain_experimental.autonomous_agents), `Cookbook:` [hugginggpt](https://github.com/langchain-ai/langchain/blob/master/cookbook/hugginggpt.ipynb)
-| `2301.10226v4` [A Watermark for Large Language Models](http://arxiv.org/abs/2301.10226v4) | John Kirchenbauer, Jonas Geiping, Yuxin Wen,  et al. | 2023-01-24 | `API:` [langchain_community...OCIModelDeploymentTGI](https://python.langchain.com/v0.2/api_reference/community/llms/langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI.html#langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI), [langchain_huggingface...HuggingFaceEndpoint](https://python.langchain.com/v0.2/api_reference/huggingface/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceEndpoint](https://python.langchain.com/v0.2/api_reference/langchain_community/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://python.langchain.com/v0.2/api_reference/community/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
-| `2212.10496v1` [Precise Zero-Shot Dense Retrieval without Relevance Labels](http://arxiv.org/abs/2212.10496v1) | Luyu Gao, Xueguang Ma, Jimmy Lin,  et al. | 2022-12-20 | `API:` [langchain...HypotheticalDocumentEmbedder](https://python.langchain.com/v0.2/api_reference/langchain/chains/langchain.chains.hyde.base.HypotheticalDocumentEmbedder.html#langchain.chains.hyde.base.HypotheticalDocumentEmbedder), `Template:` [hyde](https://python.langchain.com/docs/templates/hyde), `Cookbook:` [hypothetical_document_embeddings](https://github.com/langchain-ai/langchain/blob/master/cookbook/hypothetical_document_embeddings.ipynb)
-| `2212.07425v3` [Robust and Explainable Identification of Logical Fallacies in Natural Language Arguments](http://arxiv.org/abs/2212.07425v3) | Zhivar Sourati, Vishnu Priya Prasanna Venkatesh, Darshan Deshpande,  et al. | 2022-12-12 | `API:` [langchain_experimental.fallacy_removal](https://python.langchain.com/v0.2/api_reference//arxiv/experimental_api_reference.html#module-langchain_experimental.fallacy_removal)
-| `2211.13892v2` [Complementary Explanations for Effective In-Context Learning](http://arxiv.org/abs/2211.13892v2) | Xi Ye, Srinivasan Iyer, Asli Celikyilmaz,  et al. | 2022-11-25 | `API:` [langchain_core...MaxMarginalRelevanceExampleSelector](https://python.langchain.com/v0.2/api_reference/core/example_selectors/langchain_core.example_selectors.semantic_similarity.MaxMarginalRelevanceExampleSelector.html#langchain_core.example_selectors.semantic_similarity.MaxMarginalRelevanceExampleSelector)
-| `2211.10435v2` [PAL: Program-aided Language Models](http://arxiv.org/abs/2211.10435v2) | Luyu Gao, Aman Madaan, Shuyan Zhou,  et al. | 2022-11-18 | `API:` [langchain_experimental.pal_chain](https://python.langchain.com/v0.2/api_reference//python/experimental_api_reference.html#module-langchain_experimental.pal_chain), [langchain_experimental...PALChain](https://python.langchain.com/v0.2/api_reference/experimental/pal_chain/langchain_experimental.pal_chain.base.PALChain.html#langchain_experimental.pal_chain.base.PALChain), `Cookbook:` [program_aided_language_model](https://github.com/langchain-ai/langchain/blob/master/cookbook/program_aided_language_model.ipynb)
-| `2210.03629v3` [ReAct: Synergizing Reasoning and Acting in Language Models](http://arxiv.org/abs/2210.03629v3) | Shunyu Yao, Jeffrey Zhao, Dian Yu,  et al. | 2022-10-06 | `Docs:` [docs/integrations/providers/cohere](https://python.langchain.com/docs/integrations/providers/cohere), [docs/integrations/tools/ionic_shopping](https://python.langchain.com/docs/integrations/tools/ionic_shopping), `API:` [langchain...TrajectoryEvalChain](https://python.langchain.com/v0.2/api_reference/langchain/evaluation/langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain.html#langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain), [langchain...create_react_agent](https://python.langchain.com/v0.2/api_reference/langchain/agents/langchain.agents.react.agent.create_react_agent.html#langchain.agents.react.agent.create_react_agent)
-| `2209.10785v2` [Deep Lake: a Lakehouse for Deep Learning](http://arxiv.org/abs/2209.10785v2) | Sasun Hambardzumyan, Abhinav Tuli, Levon Ghukasyan,  et al. | 2022-09-22 | `Docs:` [docs/integrations/providers/activeloop_deeplake](https://python.langchain.com/docs/integrations/providers/activeloop_deeplake)
-| `2205.13147v4` [Matryoshka Representation Learning](http://arxiv.org/abs/2205.13147v4) | Aditya Kusupati, Gantavya Bhatt, Aniket Rege,  et al. | 2022-05-26 | `Docs:` [docs/integrations/providers/snowflake](https://python.langchain.com/docs/integrations/providers/snowflake)
-| `2205.12654v1` [Bitext Mining Using Distilled Sentence Representations for Low-Resource Languages](http://arxiv.org/abs/2205.12654v1) | Kevin Heffernan, Onur Çelebi, Holger Schwenk | 2022-05-25 | `API:` [langchain_community...LaserEmbeddings](https://python.langchain.com/v0.2/api_reference/community/embeddings/langchain_community.embeddings.laser.LaserEmbeddings.html#langchain_community.embeddings.laser.LaserEmbeddings)
-| `2204.00498v1` [Evaluating the Text-to-SQL Capabilities of Large Language Models](http://arxiv.org/abs/2204.00498v1) | Nitarshan Rajkumar, Raymond Li, Dzmitry Bahdanau | 2022-03-15 | `API:` [langchain_community...SQLDatabase](https://python.langchain.com/v0.2/api_reference/community/utilities/langchain_community.utilities.sql_database.SQLDatabase.html#langchain_community.utilities.sql_database.SQLDatabase), [langchain_community...SparkSQL](https://python.langchain.com/v0.2/api_reference/community/utilities/langchain_community.utilities.spark_sql.SparkSQL.html#langchain_community.utilities.spark_sql.SparkSQL)
-| `2202.00666v5` [Locally Typical Sampling](http://arxiv.org/abs/2202.00666v5) | Clara Meister, Tiago Pimentel, Gian Wiher,  et al. | 2022-02-01 | `API:` [langchain_huggingface...HuggingFaceEndpoint](https://python.langchain.com/v0.2/api_reference/huggingface/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceEndpoint](https://python.langchain.com/v0.2/api_reference/community/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://python.langchain.com/v0.2/api_reference/community/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
-| `2103.00020v1` [Learning Transferable Visual Models From Natural Language Supervision](http://arxiv.org/abs/2103.00020v1) | Alec Radford, Jong Wook Kim, Chris Hallacy,  et al. | 2021-02-26 | `API:` [langchain_experimental.open_clip](https://python.langchain.com/v0.2/api_reference//arxiv/experimental_api_reference.html#module-langchain_experimental.open_clip)
-| `1909.05858v2` [CTRL: A Conditional Transformer Language Model for Controllable Generation](http://arxiv.org/abs/1909.05858v2) | Nitish Shirish Keskar, Bryan McCann, Lav R. Varshney,  et al. | 2019-09-11 | `API:` [langchain_huggingface...HuggingFaceEndpoint](https://python.langchain.com/v0.2/api_reference/huggingface/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceEndpoint](https://python.langchain.com/v0.2/api_reference/community/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://python.langchain.com/v0.2/api_reference/community/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
+| `2403.14403v2` [Adaptive-RAG: Learning to Adapt Retrieval-Augmented Large Language Models through Question Complexity](http://arxiv.org/abs/2403.14403v2) | Soyeong Jeong, Jinheon Baek, Sukmin Cho,  et al. | 2024&#8209;03&#8209;21 | `Docs:` [docs/concepts](https://python.langchain.com/docs/concepts)
+| `2402.03620v1` [Self-Discover: Large Language Models Self-Compose Reasoning Structures](http://arxiv.org/abs/2402.03620v1) | Pei Zhou, Jay Pujara, Xiang Ren,  et al. | 2024&#8209;02&#8209;06 | `Cookbook:` [Self-Discover](https://github.com/langchain-ai/langchain/blob/master/cookbook/self-discover.ipynb)
+| `2402.03367v2` [RAG-Fusion: a New Take on Retrieval-Augmented Generation](http://arxiv.org/abs/2402.03367v2) | Zackary Rackauckas | 2024&#8209;01&#8209;31 | `Docs:` [docs/concepts](https://python.langchain.com/docs/concepts)
+| `2401.18059v1` [RAPTOR: Recursive Abstractive Processing for Tree-Organized Retrieval](http://arxiv.org/abs/2401.18059v1) | Parth Sarthi, Salman Abdullah, Aditi Tuli,  et al. | 2024&#8209;01&#8209;31 | `Cookbook:` [Raptor](https://github.com/langchain-ai/langchain/blob/master/cookbook/RAPTOR.ipynb)
+| `2401.15884v2` [Corrective Retrieval Augmented Generation](http://arxiv.org/abs/2401.15884v2) | Shi-Qi Yan, Jia-Chen Gu, Yun Zhu,  et al. | 2024&#8209;01&#8209;29 | `Docs:` [docs/concepts](https://python.langchain.com/docs/concepts), `Cookbook:` [Langgraph Crag](https://github.com/langchain-ai/langchain/blob/master/cookbook/langgraph_crag.ipynb)
+| `2401.08500v1` [Code Generation with AlphaCodium: From Prompt Engineering to Flow Engineering](http://arxiv.org/abs/2401.08500v1) | Tal Ridnik, Dedy Kredo, Itamar Friedman | 2024&#8209;01&#8209;16 | `Docs:` [docs/concepts](https://python.langchain.com/docs/concepts)
+| `2401.04088v1` [Mixtral of Experts](http://arxiv.org/abs/2401.04088v1) | Albert Q. Jiang, Alexandre Sablayrolles, Antoine Roux,  et al. | 2024&#8209;01&#8209;08 | `Cookbook:` [Together Ai](https://github.com/langchain-ai/langchain/blob/master/cookbook/together_ai.ipynb)
+| `2312.06648v2` [Dense X Retrieval: What Retrieval Granularity Should We Use?](http://arxiv.org/abs/2312.06648v2) | Tong Chen, Hongwei Wang, Sihao Chen,  et al. | 2023&#8209;12&#8209;11 | `Template:` [propositional-retrieval](https://python.langchain.com/docs/templates/propositional-retrieval)
+| `2311.09210v1` [Chain-of-Note: Enhancing Robustness in Retrieval-Augmented Language Models](http://arxiv.org/abs/2311.09210v1) | Wenhao Yu, Hongming Zhang, Xiaoman Pan,  et al. | 2023&#8209;11&#8209;15 | `Template:` [chain-of-note-wiki](https://python.langchain.com/docs/templates/chain-of-note-wiki)
+| `2310.11511v1` [Self-RAG: Learning to Retrieve, Generate, and Critique through Self-Reflection](http://arxiv.org/abs/2310.11511v1) | Akari Asai, Zeqiu Wu, Yizhong Wang,  et al. | 2023&#8209;10&#8209;17 | `Docs:` [docs/concepts](https://python.langchain.com/docs/concepts), `Cookbook:` [Langgraph Self Rag](https://github.com/langchain-ai/langchain/blob/master/cookbook/langgraph_self_rag.ipynb)
+| `2310.06117v2` [Take a Step Back: Evoking Reasoning via Abstraction in Large Language Models](http://arxiv.org/abs/2310.06117v2) | Huaixiu Steven Zheng, Swaroop Mishra, Xinyun Chen,  et al. | 2023&#8209;10&#8209;09 | `Docs:` [docs/concepts](https://python.langchain.com/docs/concepts), `Template:` [stepback-qa-prompting](https://python.langchain.com/docs/templates/stepback-qa-prompting), `Cookbook:` [Stepback-Qa](https://github.com/langchain-ai/langchain/blob/master/cookbook/stepback-qa.ipynb)
+| `2307.15337v3` [Skeleton-of-Thought: Prompting LLMs for Efficient Parallel Generation](http://arxiv.org/abs/2307.15337v3) | Xuefei Ning, Zinan Lin, Zixuan Zhou,  et al. | 2023&#8209;07&#8209;28 | `Template:` [skeleton-of-thought](https://python.langchain.com/docs/templates/skeleton-of-thought)
+| `2307.09288v2` [Llama 2: Open Foundation and Fine-Tuned Chat Models](http://arxiv.org/abs/2307.09288v2) | Hugo Touvron, Louis Martin, Kevin Stone,  et al. | 2023&#8209;07&#8209;18 | `Cookbook:` [Semi Structured Rag](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_Structured_RAG.ipynb)
+| `2307.03172v3` [Lost in the Middle: How Language Models Use Long Contexts](http://arxiv.org/abs/2307.03172v3) | Nelson F. Liu, Kevin Lin, John Hewitt,  et al. | 2023&#8209;07&#8209;06 | `Docs:` [docs/how_to/long_context_reorder](https://python.langchain.com/docs/how_to/long_context_reorder)
+| `2305.14283v3` [Query Rewriting for Retrieval-Augmented Large Language Models](http://arxiv.org/abs/2305.14283v3) | Xinbei Ma, Yeyun Gong, Pengcheng He,  et al. | 2023&#8209;05&#8209;23 | `Template:` [rewrite-retrieve-read](https://python.langchain.com/docs/templates/rewrite-retrieve-read), `Cookbook:` [Rewrite](https://github.com/langchain-ai/langchain/blob/master/cookbook/rewrite.ipynb)
+| `2305.08291v1` [Large Language Model Guided Tree-of-Thought](http://arxiv.org/abs/2305.08291v1) | Jieyi Long | 2023&#8209;05&#8209;15 | `API:` [langchain_experimental.tot](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.tot), `Cookbook:` [Tree Of Thought](https://github.com/langchain-ai/langchain/blob/master/cookbook/tree_of_thought.ipynb)
+| `2305.04091v3` [Plan-and-Solve Prompting: Improving Zero-Shot Chain-of-Thought Reasoning by Large Language Models](http://arxiv.org/abs/2305.04091v3) | Lei Wang, Wanyu Xu, Yihuai Lan,  et al. | 2023&#8209;05&#8209;06 | `Cookbook:` [Plan And Execute Agent](https://github.com/langchain-ai/langchain/blob/master/cookbook/plan_and_execute_agent.ipynb)
+| `2305.02156v1` [Zero-Shot Listwise Document Reranking with a Large Language Model](http://arxiv.org/abs/2305.02156v1) | Xueguang Ma, Xinyu Zhang, Ronak Pradeep,  et al. | 2023&#8209;05&#8209;03 | `Docs:` [docs/how_to/contextual_compression](https://python.langchain.com/docs/how_to/contextual_compression), `API:` [langchain...LLMListwiseRerank](https://api.python.langchain.com/en/latest/retrievers/langchain.retrievers.document_compressors.listwise_rerank.LLMListwiseRerank.html#langchain.retrievers.document_compressors.listwise_rerank.LLMListwiseRerank)
+| `2304.08485v2` [Visual Instruction Tuning](http://arxiv.org/abs/2304.08485v2) | Haotian Liu, Chunyuan Li, Qingyang Wu,  et al. | 2023&#8209;04&#8209;17 | `Cookbook:` [Semi Structured Multi Modal Rag Llama2](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_structured_multi_modal_RAG_LLaMA2.ipynb), [Semi Structured And Multi Modal Rag](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_structured_and_multi_modal_RAG.ipynb)
+| `2304.03442v2` [Generative Agents: Interactive Simulacra of Human Behavior](http://arxiv.org/abs/2304.03442v2) | Joon Sung Park, Joseph C. O'Brien, Carrie J. Cai,  et al. | 2023&#8209;04&#8209;07 | `Cookbook:` [Generative Agents Interactive Simulacra Of Human Behavior](https://github.com/langchain-ai/langchain/blob/master/cookbook/generative_agents_interactive_simulacra_of_human_behavior.ipynb), [Multiagent Bidding](https://github.com/langchain-ai/langchain/blob/master/cookbook/multiagent_bidding.ipynb)
+| `2303.17760v2` [CAMEL: Communicative Agents for "Mind" Exploration of Large Language Model Society](http://arxiv.org/abs/2303.17760v2) | Guohao Li, Hasan Abed Al Kader Hammoud, Hani Itani,  et al. | 2023&#8209;03&#8209;31 | `Cookbook:` [Camel Role Playing](https://github.com/langchain-ai/langchain/blob/master/cookbook/camel_role_playing.ipynb)
+| `2303.17580v4` [HuggingGPT: Solving AI Tasks with ChatGPT and its Friends in Hugging Face](http://arxiv.org/abs/2303.17580v4) | Yongliang Shen, Kaitao Song, Xu Tan,  et al. | 2023&#8209;03&#8209;30 | `API:` [langchain_experimental.autonomous_agents](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.autonomous_agents), `Cookbook:` [Hugginggpt](https://github.com/langchain-ai/langchain/blob/master/cookbook/hugginggpt.ipynb)
+| `2301.10226v4` [A Watermark for Large Language Models](http://arxiv.org/abs/2301.10226v4) | John Kirchenbauer, Jonas Geiping, Yuxin Wen,  et al. | 2023&#8209;01&#8209;24 | `API:` [langchain_community...OCIModelDeploymentTGI](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI.html#langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI), [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference), [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint)
+| `2212.10496v1` [Precise Zero-Shot Dense Retrieval without Relevance Labels](http://arxiv.org/abs/2212.10496v1) | Luyu Gao, Xueguang Ma, Jimmy Lin,  et al. | 2022&#8209;12&#8209;20 | `Docs:` [docs/concepts](https://python.langchain.com/docs/concepts), `API:` [langchain...HypotheticalDocumentEmbedder](https://api.python.langchain.com/en/latest/chains/langchain.chains.hyde.base.HypotheticalDocumentEmbedder.html#langchain.chains.hyde.base.HypotheticalDocumentEmbedder), `Template:` [hyde](https://python.langchain.com/docs/templates/hyde), `Cookbook:` [Hypothetical Document Embeddings](https://github.com/langchain-ai/langchain/blob/master/cookbook/hypothetical_document_embeddings.ipynb)
+| `2212.08073v1` [Constitutional AI: Harmlessness from AI Feedback](http://arxiv.org/abs/2212.08073v1) | Yuntao Bai, Saurav Kadavath, Sandipan Kundu,  et al. | 2022&#8209;12&#8209;15 | `Docs:` [docs/versions/migrating_chains/constitutional_chain](https://python.langchain.com/docs/versions/migrating_chains/constitutional_chain)
+| `2212.07425v3` [Robust and Explainable Identification of Logical Fallacies in Natural Language Arguments](http://arxiv.org/abs/2212.07425v3) | Zhivar Sourati, Vishnu Priya Prasanna Venkatesh, Darshan Deshpande,  et al. | 2022&#8209;12&#8209;12 | `API:` [langchain_experimental.fallacy_removal](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.fallacy_removal)
+| `2211.13892v2` [Complementary Explanations for Effective In-Context Learning](http://arxiv.org/abs/2211.13892v2) | Xi Ye, Srinivasan Iyer, Asli Celikyilmaz,  et al. | 2022&#8209;11&#8209;25 | `API:` [langchain_core...MaxMarginalRelevanceExampleSelector](https://api.python.langchain.com/en/latest/example_selectors/langchain_core.example_selectors.semantic_similarity.MaxMarginalRelevanceExampleSelector.html#langchain_core.example_selectors.semantic_similarity.MaxMarginalRelevanceExampleSelector)
+| `2211.10435v2` [PAL: Program-aided Language Models](http://arxiv.org/abs/2211.10435v2) | Luyu Gao, Aman Madaan, Shuyan Zhou,  et al. | 2022&#8209;11&#8209;18 | `API:` [langchain_experimental.pal_chain](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.pal_chain), [langchain_experimental...PALChain](https://api.python.langchain.com/en/latest/pal_chain/langchain_experimental.pal_chain.base.PALChain.html#langchain_experimental.pal_chain.base.PALChain), `Cookbook:` [Program Aided Language Model](https://github.com/langchain-ai/langchain/blob/master/cookbook/program_aided_language_model.ipynb)
+| `2210.11934v2` [An Analysis of Fusion Functions for Hybrid Retrieval](http://arxiv.org/abs/2210.11934v2) | Sebastian Bruch, Siyu Gai, Amir Ingber | 2022&#8209;10&#8209;21 | `Docs:` [docs/concepts](https://python.langchain.com/docs/concepts)
+| `2210.03629v3` [ReAct: Synergizing Reasoning and Acting in Language Models](http://arxiv.org/abs/2210.03629v3) | Shunyu Yao, Jeffrey Zhao, Dian Yu,  et al. | 2022&#8209;10&#8209;06 | `Docs:` [docs/integrations/tools/ionic_shopping](https://python.langchain.com/docs/integrations/tools/ionic_shopping), [docs/integrations/providers/cohere](https://python.langchain.com/docs/integrations/providers/cohere), [docs/concepts](https://python.langchain.com/docs/concepts), `API:` [langchain...create_react_agent](https://api.python.langchain.com/en/latest/agents/langchain.agents.react.agent.create_react_agent.html#langchain.agents.react.agent.create_react_agent), [langchain...TrajectoryEvalChain](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain.html#langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain)
+| `2209.10785v2` [Deep Lake: a Lakehouse for Deep Learning](http://arxiv.org/abs/2209.10785v2) | Sasun Hambardzumyan, Abhinav Tuli, Levon Ghukasyan,  et al. | 2022&#8209;09&#8209;22 | `Docs:` [docs/integrations/providers/activeloop_deeplake](https://python.langchain.com/docs/integrations/providers/activeloop_deeplake)
+| `2205.13147v4` [Matryoshka Representation Learning](http://arxiv.org/abs/2205.13147v4) | Aditya Kusupati, Gantavya Bhatt, Aniket Rege,  et al. | 2022&#8209;05&#8209;26 | `Docs:` [docs/integrations/providers/snowflake](https://python.langchain.com/docs/integrations/providers/snowflake)
+| `2205.12654v1` [Bitext Mining Using Distilled Sentence Representations for Low-Resource Languages](http://arxiv.org/abs/2205.12654v1) | Kevin Heffernan, Onur Çelebi, Holger Schwenk | 2022&#8209;05&#8209;25 | `API:` [langchain_community...LaserEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.laser.LaserEmbeddings.html#langchain_community.embeddings.laser.LaserEmbeddings)
+| `2204.00498v1` [Evaluating the Text-to-SQL Capabilities of Large Language Models](http://arxiv.org/abs/2204.00498v1) | Nitarshan Rajkumar, Raymond Li, Dzmitry Bahdanau | 2022&#8209;03&#8209;15 | `Docs:` [docs/tutorials/sql_qa](https://python.langchain.com/docs/tutorials/sql_qa), `API:` [langchain_community...SQLDatabase](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.sql_database.SQLDatabase.html#langchain_community.utilities.sql_database.SQLDatabase), [langchain_community...SparkSQL](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.spark_sql.SparkSQL.html#langchain_community.utilities.spark_sql.SparkSQL)
+| `2202.00666v5` [Locally Typical Sampling](http://arxiv.org/abs/2202.00666v5) | Clara Meister, Tiago Pimentel, Gian Wiher,  et al. | 2022&#8209;02&#8209;01 | `API:` [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference), [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint)
+| `2112.01488v3` [ColBERTv2: Effective and Efficient Retrieval via Lightweight Late Interaction](http://arxiv.org/abs/2112.01488v3) | Keshav Santhanam, Omar Khattab, Jon Saad-Falcon,  et al. | 2021&#8209;12&#8209;02 | `Docs:` [docs/integrations/retrievers/ragatouille](https://python.langchain.com/docs/integrations/retrievers/ragatouille), [docs/integrations/providers/ragatouille](https://python.langchain.com/docs/integrations/providers/ragatouille), [docs/concepts](https://python.langchain.com/docs/concepts), [docs/integrations/providers/dspy](https://python.langchain.com/docs/integrations/providers/dspy)
+| `2103.00020v1` [Learning Transferable Visual Models From Natural Language Supervision](http://arxiv.org/abs/2103.00020v1) | Alec Radford, Jong Wook Kim, Chris Hallacy,  et al. | 2021&#8209;02&#8209;26 | `API:` [langchain_experimental.open_clip](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.open_clip)
+| `2005.14165v4` [Language Models are Few-Shot Learners](http://arxiv.org/abs/2005.14165v4) | Tom B. Brown, Benjamin Mann, Nick Ryder,  et al. | 2020&#8209;05&#8209;28 | `Docs:` [docs/concepts](https://python.langchain.com/docs/concepts)
+| `2005.11401v4` [Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks](http://arxiv.org/abs/2005.11401v4) | Patrick Lewis, Ethan Perez, Aleksandra Piktus,  et al. | 2020&#8209;05&#8209;22 | `Docs:` [docs/concepts](https://python.langchain.com/docs/concepts)
+| `1909.05858v2` [CTRL: A Conditional Transformer Language Model for Controllable Generation](http://arxiv.org/abs/1909.05858v2) | Nitish Shirish Keskar, Bryan McCann, Lav R. Varshney,  et al. | 2019&#8209;09&#8209;11 | `API:` [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference), [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint)

+## Adaptive-RAG: Learning to Adapt Retrieval-Augmented Large Language Models through Question Complexity
+
+- **Authors:** Soyeong Jeong, Jinheon Baek, Sukmin Cho,  et al.
+- **arXiv id:** [2403.14403v2](http://arxiv.org/abs/2403.14403v2)  **Published Date:** 2024-03-21
+- **LangChain:**
+
+   - **Documentation:** [docs/concepts](https://python.langchain.com/docs/concepts)
+
+**Abstract:** Retrieval-Augmented Large Language Models (LLMs), which incorporate the
+non-parametric knowledge from external knowledge bases into LLMs, have emerged
+as a promising approach to enhancing response accuracy in several tasks, such
+as Question-Answering (QA). However, even though there are various approaches
+dealing with queries of different complexities, they either handle simple
+queries with unnecessary computational overhead or fail to adequately address
+complex multi-step queries; yet, not all user requests fall into only one of
+the simple or complex categories. In this work, we propose a novel adaptive QA
+framework, that can dynamically select the most suitable strategy for
+(retrieval-augmented) LLMs from the simplest to the most sophisticated ones
+based on the query complexity. Also, this selection process is operationalized
+with a classifier, which is a smaller LM trained to predict the complexity
+level of incoming queries with automatically collected labels, obtained from
+actual predicted outcomes of models and inherent inductive biases in datasets.
+This approach offers a balanced strategy, seamlessly adapting between the
+iterative and single-step retrieval-augmented LLMs, as well as the no-retrieval
+methods, in response to a range of query complexities. We validate our model on
+a set of open-domain QA datasets, covering multiple query complexities, and
+show that ours enhances the overall efficiency and accuracy of QA systems,
+compared to relevant baselines including the adaptive retrieval approaches.
+Code is available at: https://github.com/starsuzi/Adaptive-RAG.
+                
 ## Self-Discover: Large Language Models Self-Compose Reasoning Structures

- **arXiv id:** [2402.03620v1](http://arxiv.org/abs/2402.03620v1)  **Published Date:** 2024-02-06
- **Title:** Self-Discover: Large Language Models Self-Compose Reasoning Structures
 - **Authors:** Pei Zhou, Jay Pujara, Xiang Ren,  et al.
+- **arXiv id:** [2402.03620v1](http://arxiv.org/abs/2402.03620v1)  **Published Date:** 2024-02-06
 - **LangChain:**

   - **Cookbook:** [self-discover](https://github.com/langchain-ai/langchain/blob/master/cookbook/self-discover.ipynb)
@@ -69,11 +107,33 @@ the self-discovered reasoning structures are universally applicable across
 model families: from PaLM 2-L to GPT-4, and from GPT-4 to Llama2, and share
 commonalities with human reasoning patterns.
                
+## RAG-Fusion: a New Take on Retrieval-Augmented Generation
+
+- **Authors:** Zackary Rackauckas
+- **arXiv id:** [2402.03367v2](http://arxiv.org/abs/2402.03367v2)  **Published Date:** 2024-01-31
+- **LangChain:**
+
+   - **Documentation:** [docs/concepts](https://python.langchain.com/docs/concepts)
+
+**Abstract:** Infineon has identified a need for engineers, account managers, and customers
+to rapidly obtain product information. This problem is traditionally addressed
+with retrieval-augmented generation (RAG) chatbots, but in this study, I
+evaluated the use of the newly popularized RAG-Fusion method. RAG-Fusion
+combines RAG and reciprocal rank fusion (RRF) by generating multiple queries,
+reranking them with reciprocal scores and fusing the documents and scores.
+Through manually evaluating answers on accuracy, relevance, and
+comprehensiveness, I found that RAG-Fusion was able to provide accurate and
+comprehensive answers due to the generated queries contextualizing the original
+query from various perspectives. However, some answers strayed off topic when
+the generated queries' relevance to the original query is insufficient. This
+research marks significant progress in artificial intelligence (AI) and natural
+language processing (NLP) applications and demonstrates transformations in a
+global and multi-industry context.
+                
 ## RAPTOR: Recursive Abstractive Processing for Tree-Organized Retrieval

- **arXiv id:** [2401.18059v1](http://arxiv.org/abs/2401.18059v1)  **Published Date:** 2024-01-31
- **Title:** RAPTOR: Recursive Abstractive Processing for Tree-Organized Retrieval
 - **Authors:** Parth Sarthi, Salman Abdullah, Aditi Tuli,  et al.
+- **arXiv id:** [2401.18059v1](http://arxiv.org/abs/2401.18059v1)  **Published Date:** 2024-01-31
 - **LangChain:**

   - **Cookbook:** [RAPTOR](https://github.com/langchain-ai/langchain/blob/master/cookbook/RAPTOR.ipynb)
@@ -95,11 +155,11 @@ benchmark by 20% in absolute accuracy.
                
 ## Corrective Retrieval Augmented Generation

- **arXiv id:** [2401.15884v2](http://arxiv.org/abs/2401.15884v2)  **Published Date:** 2024-01-29
- **Title:** Corrective Retrieval Augmented Generation
 - **Authors:** Shi-Qi Yan, Jia-Chen Gu, Yun Zhu,  et al.
+- **arXiv id:** [2401.15884v2](http://arxiv.org/abs/2401.15884v2)  **Published Date:** 2024-01-29
 - **LangChain:**

+   - **Documentation:** [docs/concepts](https://python.langchain.com/docs/concepts)
   - **Cookbook:** [langgraph_crag](https://github.com/langchain-ai/langchain/blob/master/cookbook/langgraph_crag.ipynb)

 **Abstract:** Large language models (LLMs) inevitably exhibit hallucinations since the
@@ -121,11 +181,36 @@ RAG-based approaches. Experiments on four datasets covering short- and
 long-form generation tasks show that CRAG can significantly improve the
 performance of RAG-based approaches.
                
+## Code Generation with AlphaCodium: From Prompt Engineering to Flow Engineering
+
+- **Authors:** Tal Ridnik, Dedy Kredo, Itamar Friedman
+- **arXiv id:** [2401.08500v1](http://arxiv.org/abs/2401.08500v1)  **Published Date:** 2024-01-16
+- **LangChain:**
+
+   - **Documentation:** [docs/concepts](https://python.langchain.com/docs/concepts)
+
+**Abstract:** Code generation problems differ from common natural language problems - they
+require matching the exact syntax of the target language, identifying happy
+paths and edge cases, paying attention to numerous small details in the problem
+spec, and addressing other code-specific issues and requirements. Hence, many
+of the optimizations and tricks that have been successful in natural language
+generation may not be effective for code tasks. In this work, we propose a new
+approach to code generation by LLMs, which we call AlphaCodium - a test-based,
+multi-stage, code-oriented iterative flow, that improves the performances of
+LLMs on code problems. We tested AlphaCodium on a challenging code generation
+dataset called CodeContests, which includes competitive programming problems
+from platforms such as Codeforces. The proposed flow consistently and
+significantly improves results. On the validation set, for example, GPT-4
+accuracy (pass@5) increased from 19% with a single well-designed direct prompt
+to 44% with the AlphaCodium flow. Many of the principles and best practices
+acquired in this work, we believe, are broadly applicable to general code
+generation tasks. Full implementation is available at:
+https://github.com/Codium-ai/AlphaCodium
+                
 ## Mixtral of Experts

- **arXiv id:** [2401.04088v1](http://arxiv.org/abs/2401.04088v1)  **Published Date:** 2024-01-08
- **Title:** Mixtral of Experts
 - **Authors:** Albert Q. Jiang, Alexandre Sablayrolles, Antoine Roux,  et al.
+- **arXiv id:** [2401.04088v1](http://arxiv.org/abs/2401.04088v1)  **Published Date:** 2024-01-08
 - **LangChain:**

   - **Cookbook:** [together_ai](https://github.com/langchain-ai/langchain/blob/master/cookbook/together_ai.ipynb)
@@ -147,9 +232,8 @@ the base and instruct models are released under the Apache 2.0 license.
                
 ## Dense X Retrieval: What Retrieval Granularity Should We Use?

- **arXiv id:** [2312.06648v2](http://arxiv.org/abs/2312.06648v2)  **Published Date:** 2023-12-11
- **Title:** Dense X Retrieval: What Retrieval Granularity Should We Use?
 - **Authors:** Tong Chen, Hongwei Wang, Sihao Chen,  et al.
+- **arXiv id:** [2312.06648v2](http://arxiv.org/abs/2312.06648v2)  **Published Date:** 2023-12-11
 - **LangChain:**

   - **Template:** [propositional-retrieval](https://python.langchain.com/docs/templates/propositional-retrieval)
@@ -174,9 +258,8 @@ information.
                
 ## Chain-of-Note: Enhancing Robustness in Retrieval-Augmented Language Models

- **arXiv id:** [2311.09210v1](http://arxiv.org/abs/2311.09210v1)  **Published Date:** 2023-11-15
- **Title:** Chain-of-Note: Enhancing Robustness in Retrieval-Augmented Language Models
 - **Authors:** Wenhao Yu, Hongming Zhang, Xiaoman Pan,  et al.
+- **arXiv id:** [2311.09210v1](http://arxiv.org/abs/2311.09210v1)  **Published Date:** 2023-11-15
 - **LangChain:**

   - **Template:** [chain-of-note-wiki](https://python.langchain.com/docs/templates/chain-of-note-wiki)
@@ -206,11 +289,11 @@ outside the pre-training knowledge scope.
                
 ## Self-RAG: Learning to Retrieve, Generate, and Critique through Self-Reflection

- **arXiv id:** [2310.11511v1](http://arxiv.org/abs/2310.11511v1)  **Published Date:** 2023-10-17
- **Title:** Self-RAG: Learning to Retrieve, Generate, and Critique through Self-Reflection
 - **Authors:** Akari Asai, Zeqiu Wu, Yizhong Wang,  et al.
+- **arXiv id:** [2310.11511v1](http://arxiv.org/abs/2310.11511v1)  **Published Date:** 2023-10-17
 - **LangChain:**

+   - **Documentation:** [docs/concepts](https://python.langchain.com/docs/concepts)
   - **Cookbook:** [langgraph_self_rag](https://github.com/langchain-ai/langchain/blob/master/cookbook/langgraph_self_rag.ipynb)

 **Abstract:** Despite their remarkable capabilities, large language models (LLMs) often
@@ -237,11 +320,11 @@ to these models.
                
 ## Take a Step Back: Evoking Reasoning via Abstraction in Large Language Models

- **arXiv id:** [2310.06117v2](http://arxiv.org/abs/2310.06117v2)  **Published Date:** 2023-10-09
- **Title:** Take a Step Back: Evoking Reasoning via Abstraction in Large Language Models
 - **Authors:** Huaixiu Steven Zheng, Swaroop Mishra, Xinyun Chen,  et al.
+- **arXiv id:** [2310.06117v2](http://arxiv.org/abs/2310.06117v2)  **Published Date:** 2023-10-09
 - **LangChain:**

+   - **Documentation:** [docs/concepts](https://python.langchain.com/docs/concepts)
   - **Template:** [stepback-qa-prompting](https://python.langchain.com/docs/templates/stepback-qa-prompting)
   - **Cookbook:** [stepback-qa](https://github.com/langchain-ai/langchain/blob/master/cookbook/stepback-qa.ipynb)

@@ -256,11 +339,31 @@ including STEM, Knowledge QA, and Multi-Hop Reasoning. For instance, Step-Back
 Prompting improves PaLM-2L performance on MMLU (Physics and Chemistry) by 7%
 and 11% respectively, TimeQA by 27%, and MuSiQue by 7%.
                
+## Skeleton-of-Thought: Prompting LLMs for Efficient Parallel Generation
+
+- **Authors:** Xuefei Ning, Zinan Lin, Zixuan Zhou,  et al.
+- **arXiv id:** [2307.15337v3](http://arxiv.org/abs/2307.15337v3)  **Published Date:** 2023-07-28
+- **LangChain:**
+
+   - **Template:** [skeleton-of-thought](https://python.langchain.com/docs/templates/skeleton-of-thought)
+
+**Abstract:** This work aims at decreasing the end-to-end generation latency of large
+language models (LLMs). One of the major causes of the high generation latency
+is the sequential decoding approach adopted by almost all state-of-the-art
+LLMs. In this work, motivated by the thinking and writing process of humans, we
+propose Skeleton-of-Thought (SoT), which first guides LLMs to generate the
+skeleton of the answer, and then conducts parallel API calls or batched
+decoding to complete the contents of each skeleton point in parallel. Not only
+does SoT provide considerable speed-ups across 12 LLMs, but it can also
+potentially improve the answer quality on several question categories. SoT is
+an initial attempt at data-centric optimization for inference efficiency, and
+showcases the potential of eliciting high-quality answers by explicitly
+planning the answer structure in language.
+                
 ## Llama 2: Open Foundation and Fine-Tuned Chat Models

- **arXiv id:** [2307.09288v2](http://arxiv.org/abs/2307.09288v2)  **Published Date:** 2023-07-18
- **Title:** Llama 2: Open Foundation and Fine-Tuned Chat Models
 - **Authors:** Hugo Touvron, Louis Martin, Kevin Stone,  et al.
+- **arXiv id:** [2307.09288v2](http://arxiv.org/abs/2307.09288v2)  **Published Date:** 2023-07-18
 - **LangChain:**

   - **Cookbook:** [Semi_Structured_RAG](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_Structured_RAG.ipynb)
@@ -275,11 +378,32 @@ detailed description of our approach to fine-tuning and safety improvements of
 Llama 2-Chat in order to enable the community to build on our work and
 contribute to the responsible development of LLMs.
                
+## Lost in the Middle: How Language Models Use Long Contexts
+
+- **Authors:** Nelson F. Liu, Kevin Lin, John Hewitt,  et al.
+- **arXiv id:** [2307.03172v3](http://arxiv.org/abs/2307.03172v3)  **Published Date:** 2023-07-06
+- **LangChain:**
+
+   - **Documentation:** [docs/how_to/long_context_reorder](https://python.langchain.com/docs/how_to/long_context_reorder)
+
+**Abstract:** While recent language models have the ability to take long contexts as input,
+relatively little is known about how well they use longer context. We analyze
+the performance of language models on two tasks that require identifying
+relevant information in their input contexts: multi-document question answering
+and key-value retrieval. We find that performance can degrade significantly
+when changing the position of relevant information, indicating that current
+language models do not robustly make use of information in long input contexts.
+In particular, we observe that performance is often highest when relevant
+information occurs at the beginning or end of the input context, and
+significantly degrades when models must access relevant information in the
+middle of long contexts, even for explicitly long-context models. Our analysis
+provides a better understanding of how language models use their input context
+and provides new evaluation protocols for future long-context language models.
+                
 ## Query Rewriting for Retrieval-Augmented Large Language Models

- **arXiv id:** [2305.14283v3](http://arxiv.org/abs/2305.14283v3)  **Published Date:** 2023-05-23
- **Title:** Query Rewriting for Retrieval-Augmented Large Language Models
 - **Authors:** Xinbei Ma, Yeyun Gong, Pengcheng He,  et al.
+- **arXiv id:** [2305.14283v3](http://arxiv.org/abs/2305.14283v3)  **Published Date:** 2023-05-23
 - **LangChain:**

   - **Template:** [rewrite-retrieve-read](https://python.langchain.com/docs/templates/rewrite-retrieve-read)
@@ -305,12 +429,11 @@ for retrieval-augmented LLM.
                
 ## Large Language Model Guided Tree-of-Thought

- **arXiv id:** [2305.08291v1](http://arxiv.org/abs/2305.08291v1)  **Published Date:** 2023-05-15
- **Title:** Large Language Model Guided Tree-of-Thought
 - **Authors:** Jieyi Long
+- **arXiv id:** [2305.08291v1](http://arxiv.org/abs/2305.08291v1)  **Published Date:** 2023-05-15
 - **LangChain:**

-   - **API Reference:** [langchain_experimental.tot](https://python.langchain.com/v0.2/api_reference/experimental/index.html#module-langchain_experimental.tot)
+   - **API Reference:** [langchain_experimental.tot](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.tot)
   - **Cookbook:** [tree_of_thought](https://github.com/langchain-ai/langchain/blob/master/cookbook/tree_of_thought.ipynb)

 **Abstract:** In this paper, we introduce the Tree-of-Thought (ToT) framework, a novel
@@ -333,9 +456,8 @@ implementation of the ToT-based Sudoku solver is available on GitHub:
                
 ## Plan-and-Solve Prompting: Improving Zero-Shot Chain-of-Thought Reasoning by Large Language Models

- **arXiv id:** [2305.04091v3](http://arxiv.org/abs/2305.04091v3)  **Published Date:** 2023-05-06
- **Title:** Plan-and-Solve Prompting: Improving Zero-Shot Chain-of-Thought Reasoning by Large Language Models
 - **Authors:** Lei Wang, Wanyu Xu, Yihuai Lan,  et al.
+- **arXiv id:** [2305.04091v3](http://arxiv.org/abs/2305.04091v3)  **Published Date:** 2023-05-06
 - **LangChain:**

   - **Cookbook:** [plan_and_execute_agent](https://github.com/langchain-ai/langchain/blob/master/cookbook/plan_and_execute_agent.ipynb)
@@ -364,12 +486,12 @@ https://github.com/AGI-Edgerunners/Plan-and-Solve-Prompting.
                
 ## Zero-Shot Listwise Document Reranking with a Large Language Model

- **arXiv id:** [2305.02156v1](http://arxiv.org/abs/2305.02156v1)  **Published Date:** 2023-05-03
- **Title:** Zero-Shot Listwise Document Reranking with a Large Language Model
 - **Authors:** Xueguang Ma, Xinyu Zhang, Ronak Pradeep,  et al.
+- **arXiv id:** [2305.02156v1](http://arxiv.org/abs/2305.02156v1)  **Published Date:** 2023-05-03
 - **LangChain:**

-   - **API Reference:** [langchain...LLMListwiseRerank](https://python.langchain.com/v0.2/api_reference/langchain/retrievers/langchain.retrievers.document_compressors.listwise_rerank.LLMListwiseRerank.html#langchain.retrievers.document_compressors.listwise_rerank.LLMListwiseRerank)
+   - **Documentation:** [docs/how_to/contextual_compression](https://python.langchain.com/docs/how_to/contextual_compression)
+   - **API Reference:** [langchain...LLMListwiseRerank](https://api.python.langchain.com/en/latest/retrievers/langchain.retrievers.document_compressors.listwise_rerank.LLMListwiseRerank.html#langchain.retrievers.document_compressors.listwise_rerank.LLMListwiseRerank)

 **Abstract:** Supervised ranking methods based on bi-encoder or cross-encoder architectures
 have shown success in multi-stage text ranking tasks, but they require large
@@ -388,12 +510,11 @@ with results showing its potential to generalize across different languages.
                
 ## Visual Instruction Tuning

- **arXiv id:** [2304.08485v2](http://arxiv.org/abs/2304.08485v2)  **Published Date:** 2023-04-17
- **Title:** Visual Instruction Tuning
 - **Authors:** Haotian Liu, Chunyuan Li, Qingyang Wu,  et al.
+- **arXiv id:** [2304.08485v2](http://arxiv.org/abs/2304.08485v2)  **Published Date:** 2023-04-17
 - **LangChain:**

-   - **Cookbook:** [Semi_structured_and_multi_modal_RAG](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_structured_and_multi_modal_RAG.ipynb), [Semi_structured_multi_modal_RAG_LLaMA2](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_structured_multi_modal_RAG_LLaMA2.ipynb)
+   - **Cookbook:** [Semi_structured_multi_modal_RAG_LLaMA2](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_structured_multi_modal_RAG_LLaMA2.ipynb), [Semi_structured_and_multi_modal_RAG](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_structured_and_multi_modal_RAG.ipynb)

 **Abstract:** Instruction tuning large language models (LLMs) using machine-generated
 instruction-following data has improved zero-shot capabilities on new tasks,
@@ -413,12 +534,11 @@ publicly available.
                
 ## Generative Agents: Interactive Simulacra of Human Behavior

- **arXiv id:** [2304.03442v2](http://arxiv.org/abs/2304.03442v2)  **Published Date:** 2023-04-07
- **Title:** Generative Agents: Interactive Simulacra of Human Behavior
 - **Authors:** Joon Sung Park, Joseph C. O'Brien, Carrie J. Cai,  et al.
+- **arXiv id:** [2304.03442v2](http://arxiv.org/abs/2304.03442v2)  **Published Date:** 2023-04-07
 - **LangChain:**

-   - **Cookbook:** [multiagent_bidding](https://github.com/langchain-ai/langchain/blob/master/cookbook/multiagent_bidding.ipynb), [generative_agents_interactive_simulacra_of_human_behavior](https://github.com/langchain-ai/langchain/blob/master/cookbook/generative_agents_interactive_simulacra_of_human_behavior.ipynb)
+   - **Cookbook:** [generative_agents_interactive_simulacra_of_human_behavior](https://github.com/langchain-ai/langchain/blob/master/cookbook/generative_agents_interactive_simulacra_of_human_behavior.ipynb), [multiagent_bidding](https://github.com/langchain-ai/langchain/blob/master/cookbook/multiagent_bidding.ipynb)

 **Abstract:** Believable proxies of human behavior can empower interactive applications
 ranging from immersive environments to rehearsal spaces for interpersonal
@@ -447,9 +567,8 @@ interaction patterns for enabling believable simulations of human behavior.
                
 ## CAMEL: Communicative Agents for "Mind" Exploration of Large Language Model Society

- **arXiv id:** [2303.17760v2](http://arxiv.org/abs/2303.17760v2)  **Published Date:** 2023-03-31
- **Title:** CAMEL: Communicative Agents for "Mind" Exploration of Large Language Model Society
 - **Authors:** Guohao Li, Hasan Abed Al Kader Hammoud, Hani Itani,  et al.
+- **arXiv id:** [2303.17760v2](http://arxiv.org/abs/2303.17760v2)  **Published Date:** 2023-03-31
 - **LangChain:**

   - **Cookbook:** [camel_role_playing](https://github.com/langchain-ai/langchain/blob/master/cookbook/camel_role_playing.ipynb)
@@ -475,12 +594,11 @@ agents and beyond: https://github.com/camel-ai/camel.
                
 ## HuggingGPT: Solving AI Tasks with ChatGPT and its Friends in Hugging Face

- **arXiv id:** [2303.17580v4](http://arxiv.org/abs/2303.17580v4)  **Published Date:** 2023-03-30
- **Title:** HuggingGPT: Solving AI Tasks with ChatGPT and its Friends in Hugging Face
 - **Authors:** Yongliang Shen, Kaitao Song, Xu Tan,  et al.
+- **arXiv id:** [2303.17580v4](http://arxiv.org/abs/2303.17580v4)  **Published Date:** 2023-03-30
 - **LangChain:**

-   - **API Reference:** [langchain_experimental.autonomous_agents](https://python.langchain.com/v0.2/api_reference/experimental/index.html#module-langchain_experimental.autonomous_agents)
+   - **API Reference:** [langchain_experimental.autonomous_agents](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.autonomous_agents)
   - **Cookbook:** [hugginggpt](https://github.com/langchain-ai/langchain/blob/master/cookbook/hugginggpt.ipynb)

 **Abstract:** Solving complicated AI tasks with different domains and modalities is a key
@@ -505,12 +623,11 @@ realization of artificial general intelligence.
                
 ## A Watermark for Large Language Models

- **arXiv id:** [2301.10226v4](http://arxiv.org/abs/2301.10226v4)  **Published Date:** 2023-01-24
- **Title:** A Watermark for Large Language Models
 - **Authors:** John Kirchenbauer, Jonas Geiping, Yuxin Wen,  et al.
+- **arXiv id:** [2301.10226v4](http://arxiv.org/abs/2301.10226v4)  **Published Date:** 2023-01-24
 - **LangChain:**

-   - **API Reference:** [langchain_community...OCIModelDeploymentTGI](https://python.langchain.com/v0.2/api_reference/community/llms/langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI.html#langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI), [langchain_huggingface...HuggingFaceEndpoint](https://python.langchain.com/v0.2/api_reference/huggingface/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceEndpoint](https://python.langchain.com/v0.2/api_reference/langchain_community/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://python.langchain.com/v0.2/api_reference/community/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
+   - **API Reference:** [langchain_community...OCIModelDeploymentTGI](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI.html#langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI), [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference), [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint)

 **Abstract:** Potential harms of large language models can be mitigated by watermarking
 model output, i.e., embedding signals into generated text that are invisible to
@@ -528,12 +645,12 @@ family, and discuss robustness and security.
                
 ## Precise Zero-Shot Dense Retrieval without Relevance Labels

- **arXiv id:** [2212.10496v1](http://arxiv.org/abs/2212.10496v1)  **Published Date:** 2022-12-20
- **Title:** Precise Zero-Shot Dense Retrieval without Relevance Labels
 - **Authors:** Luyu Gao, Xueguang Ma, Jimmy Lin,  et al.
+- **arXiv id:** [2212.10496v1](http://arxiv.org/abs/2212.10496v1)  **Published Date:** 2022-12-20
 - **LangChain:**

-   - **API Reference:** [langchain...HypotheticalDocumentEmbedder](https://python.langchain.com/v0.2/api_reference/langchain/chains/langchain.chains.hyde.base.HypotheticalDocumentEmbedder.html#langchain.chains.hyde.base.HypotheticalDocumentEmbedder)
+   - **Documentation:** [docs/concepts](https://python.langchain.com/docs/concepts)
+   - **API Reference:** [langchain...HypotheticalDocumentEmbedder](https://api.python.langchain.com/en/latest/chains/langchain.chains.hyde.base.HypotheticalDocumentEmbedder.html#langchain.chains.hyde.base.HypotheticalDocumentEmbedder)
   - **Template:** [hyde](https://python.langchain.com/docs/templates/hyde)
   - **Cookbook:** [hypothetical_document_embeddings](https://github.com/langchain-ai/langchain/blob/master/cookbook/hypothetical_document_embeddings.ipynb)

@@ -555,14 +672,40 @@ state-of-the-art unsupervised dense retriever Contriever and shows strong
 performance comparable to fine-tuned retrievers, across various tasks (e.g. web
 search, QA, fact verification) and languages~(e.g. sw, ko, ja).
                
-## Robust and Explainable Identification of Logical Fallacies in Natural Language Arguments
+## Constitutional AI: Harmlessness from AI Feedback

- **arXiv id:** [2212.07425v3](http://arxiv.org/abs/2212.07425v3)  **Published Date:** 2022-12-12
- **Title:** Robust and Explainable Identification of Logical Fallacies in Natural Language Arguments
- **Authors:** Zhivar Sourati, Vishnu Priya Prasanna Venkatesh, Darshan Deshpande,  et al.
+- **Authors:** Yuntao Bai, Saurav Kadavath, Sandipan Kundu,  et al.
+- **arXiv id:** [2212.08073v1](http://arxiv.org/abs/2212.08073v1)  **Published Date:** 2022-12-15
 - **LangChain:**

-   - **API Reference:** [langchain_experimental.fallacy_removal](https://python.langchain.com/v0.2/api_reference/experimental/index.html#module-langchain_experimental.fallacy_removal)
+   - **Documentation:** [docs/versions/migrating_chains/constitutional_chain](https://python.langchain.com/docs/versions/migrating_chains/constitutional_chain)
+
+**Abstract:** As AI systems become more capable, we would like to enlist their help to
+supervise other AIs. We experiment with methods for training a harmless AI
+assistant through self-improvement, without any human labels identifying
+harmful outputs. The only human oversight is provided through a list of rules
+or principles, and so we refer to the method as 'Constitutional AI'. The
+process involves both a supervised learning and a reinforcement learning phase.
+In the supervised phase we sample from an initial model, then generate
+self-critiques and revisions, and then finetune the original model on revised
+responses. In the RL phase, we sample from the finetuned model, use a model to
+evaluate which of the two samples is better, and then train a preference model
+from this dataset of AI preferences. We then train with RL using the preference
+model as the reward signal, i.e. we use 'RL from AI Feedback' (RLAIF). As a
+result we are able to train a harmless but non-evasive AI assistant that
+engages with harmful queries by explaining its objections to them. Both the SL
+and RL methods can leverage chain-of-thought style reasoning to improve the
+human-judged performance and transparency of AI decision making. These methods
+make it possible to control AI behavior more precisely and with far fewer human
+labels.
+                
+## Robust and Explainable Identification of Logical Fallacies in Natural Language Arguments
+
+- **Authors:** Zhivar Sourati, Vishnu Priya Prasanna Venkatesh, Darshan Deshpande,  et al.
+- **arXiv id:** [2212.07425v3](http://arxiv.org/abs/2212.07425v3)  **Published Date:** 2022-12-12
+- **LangChain:**
+
+   - **API Reference:** [langchain_experimental.fallacy_removal](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.fallacy_removal)

 **Abstract:** The spread of misinformation, propaganda, and flawed argumentation has been
 amplified in the Internet era. Given the volume of data and the subtlety of
@@ -588,12 +731,11 @@ further work on logical fallacy identification.
                
 ## Complementary Explanations for Effective In-Context Learning

- **arXiv id:** [2211.13892v2](http://arxiv.org/abs/2211.13892v2)  **Published Date:** 2022-11-25
- **Title:** Complementary Explanations for Effective In-Context Learning
 - **Authors:** Xi Ye, Srinivasan Iyer, Asli Celikyilmaz,  et al.
+- **arXiv id:** [2211.13892v2](http://arxiv.org/abs/2211.13892v2)  **Published Date:** 2022-11-25
 - **LangChain:**

-   - **API Reference:** [langchain_core...MaxMarginalRelevanceExampleSelector](https://python.langchain.com/v0.2/api_reference/core/example_selectors/langchain_core.example_selectors.semantic_similarity.MaxMarginalRelevanceExampleSelector.html#langchain_core.example_selectors.semantic_similarity.MaxMarginalRelevanceExampleSelector)
+   - **API Reference:** [langchain_core...MaxMarginalRelevanceExampleSelector](https://api.python.langchain.com/en/latest/example_selectors/langchain_core.example_selectors.semantic_similarity.MaxMarginalRelevanceExampleSelector.html#langchain_core.example_selectors.semantic_similarity.MaxMarginalRelevanceExampleSelector)

 **Abstract:** Large language models (LLMs) have exhibited remarkable capabilities in
 learning from explanations in prompts, but there has been limited understanding
@@ -614,12 +756,11 @@ performance across three real-world tasks on multiple LLMs.
                
 ## PAL: Program-aided Language Models

- **arXiv id:** [2211.10435v2](http://arxiv.org/abs/2211.10435v2)  **Published Date:** 2022-11-18
- **Title:** PAL: Program-aided Language Models
 - **Authors:** Luyu Gao, Aman Madaan, Shuyan Zhou,  et al.
+- **arXiv id:** [2211.10435v2](http://arxiv.org/abs/2211.10435v2)  **Published Date:** 2022-11-18
 - **LangChain:**

-   - **API Reference:** [langchain_experimental.pal_chain](https://python.langchain.com/v0.2/api_reference//python/experimental_api_reference.html#module-langchain_experimental.pal_chain), [langchain_experimental...PALChain](https://python.langchain.com/v0.2/api_reference/experimental/pal_chain/langchain_experimental.pal_chain.base.PALChain.html#langchain_experimental.pal_chain.base.PALChain)
+   - **API Reference:** [langchain_experimental.pal_chain](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.pal_chain), [langchain_experimental...PALChain](https://api.python.langchain.com/en/latest/pal_chain/langchain_experimental.pal_chain.base.PALChain.html#langchain_experimental.pal_chain.base.PALChain)
   - **Cookbook:** [program_aided_language_model](https://github.com/langchain-ai/langchain/blob/master/cookbook/program_aided_language_model.ipynb)

 **Abstract:** Large language models (LLMs) have recently demonstrated an impressive ability
@@ -645,15 +786,33 @@ accuracy on the GSM8K benchmark of math word problems, surpassing PaLM-540B
 which uses chain-of-thought by absolute 15% top-1. Our code and data are
 publicly available at http://reasonwithpal.com/ .
                
-## ReAct: Synergizing Reasoning and Acting in Language Models
+## An Analysis of Fusion Functions for Hybrid Retrieval

- **arXiv id:** [2210.03629v3](http://arxiv.org/abs/2210.03629v3)  **Published Date:** 2022-10-06
- **Title:** ReAct: Synergizing Reasoning and Acting in Language Models
- **Authors:** Shunyu Yao, Jeffrey Zhao, Dian Yu,  et al.
+- **Authors:** Sebastian Bruch, Siyu Gai, Amir Ingber
+- **arXiv id:** [2210.11934v2](http://arxiv.org/abs/2210.11934v2)  **Published Date:** 2022-10-21
 - **LangChain:**

-   - **Documentation:** [docs/integrations/providers/cohere](https://python.langchain.com/docs/integrations/providers/cohere), [docs/integrations/tools/ionic_shopping](https://python.langchain.com/docs/integrations/tools/ionic_shopping)
-   - **API Reference:** [langchain...TrajectoryEvalChain](https://python.langchain.com/v0.2/api_reference/langchain/evaluation/langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain.html#langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain), [langchain...create_react_agent](https://python.langchain.com/v0.2/api_reference/langchain/agents/langchain.agents.react.agent.create_react_agent.html#langchain.agents.react.agent.create_react_agent)
+   - **Documentation:** [docs/concepts](https://python.langchain.com/docs/concepts)
+
+**Abstract:** We study hybrid search in text retrieval where lexical and semantic search
+are fused together with the intuition that the two are complementary in how
+they model relevance. In particular, we examine fusion by a convex combination
+(CC) of lexical and semantic scores, as well as the Reciprocal Rank Fusion
+(RRF) method, and identify their advantages and potential pitfalls. Contrary to
+existing studies, we find RRF to be sensitive to its parameters; that the
+learning of a CC fusion is generally agnostic to the choice of score
+normalization; that CC outperforms RRF in in-domain and out-of-domain settings;
+and finally, that CC is sample efficient, requiring only a small set of
+training examples to tune its only parameter to a target domain.
+                
+## ReAct: Synergizing Reasoning and Acting in Language Models
+
+- **Authors:** Shunyu Yao, Jeffrey Zhao, Dian Yu,  et al.
+- **arXiv id:** [2210.03629v3](http://arxiv.org/abs/2210.03629v3)  **Published Date:** 2022-10-06
+- **LangChain:**
+
+   - **Documentation:** [docs/integrations/tools/ionic_shopping](https://python.langchain.com/docs/integrations/tools/ionic_shopping), [docs/integrations/providers/cohere](https://python.langchain.com/docs/integrations/providers/cohere), [docs/concepts](https://python.langchain.com/docs/concepts)
+   - **API Reference:** [langchain...create_react_agent](https://api.python.langchain.com/en/latest/agents/langchain.agents.react.agent.create_react_agent.html#langchain.agents.react.agent.create_react_agent), [langchain...TrajectoryEvalChain](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain.html#langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain)

 **Abstract:** While large language models (LLMs) have demonstrated impressive capabilities
 across tasks in language understanding and interactive decision making, their
@@ -680,9 +839,8 @@ Project site with code: https://react-lm.github.io
                
 ## Deep Lake: a Lakehouse for Deep Learning

- **arXiv id:** [2209.10785v2](http://arxiv.org/abs/2209.10785v2)  **Published Date:** 2022-09-22
- **Title:** Deep Lake: a Lakehouse for Deep Learning
 - **Authors:** Sasun Hambardzumyan, Abhinav Tuli, Levon Ghukasyan,  et al.
+- **arXiv id:** [2209.10785v2](http://arxiv.org/abs/2209.10785v2)  **Published Date:** 2022-09-22
 - **LangChain:**

   - **Documentation:** [docs/integrations/providers/activeloop_deeplake](https://python.langchain.com/docs/integrations/providers/activeloop_deeplake)
@@ -706,9 +864,8 @@ TensorFlow, JAX, and integrate with numerous MLOps tools.
                
 ## Matryoshka Representation Learning

- **arXiv id:** [2205.13147v4](http://arxiv.org/abs/2205.13147v4)  **Published Date:** 2022-05-26
- **Title:** Matryoshka Representation Learning
 - **Authors:** Aditya Kusupati, Gantavya Bhatt, Aniket Rege,  et al.
+- **arXiv id:** [2205.13147v4](http://arxiv.org/abs/2205.13147v4)  **Published Date:** 2022-05-26
 - **LangChain:**

   - **Documentation:** [docs/integrations/providers/snowflake](https://python.langchain.com/docs/integrations/providers/snowflake)
@@ -738,12 +895,11 @@ are open-sourced at https://github.com/RAIVNLab/MRL.
                
 ## Bitext Mining Using Distilled Sentence Representations for Low-Resource Languages

- **arXiv id:** [2205.12654v1](http://arxiv.org/abs/2205.12654v1)  **Published Date:** 2022-05-25
- **Title:** Bitext Mining Using Distilled Sentence Representations for Low-Resource Languages
 - **Authors:** Kevin Heffernan, Onur Çelebi, Holger Schwenk
+- **arXiv id:** [2205.12654v1](http://arxiv.org/abs/2205.12654v1)  **Published Date:** 2022-05-25
 - **LangChain:**

-   - **API Reference:** [langchain_community...LaserEmbeddings](https://python.langchain.com/v0.2/api_reference/community/embeddings/langchain_community.embeddings.laser.LaserEmbeddings.html#langchain_community.embeddings.laser.LaserEmbeddings)
+   - **API Reference:** [langchain_community...LaserEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.laser.LaserEmbeddings.html#langchain_community.embeddings.laser.LaserEmbeddings)

 **Abstract:** Scaling multilingual representation learning beyond the hundred most frequent
 languages is challenging, in particular to cover the long tail of low-resource
@@ -765,12 +921,12 @@ encoders, mine bitexts, and validate the bitexts by training NMT systems.
                
 ## Evaluating the Text-to-SQL Capabilities of Large Language Models

- **arXiv id:** [2204.00498v1](http://arxiv.org/abs/2204.00498v1)  **Published Date:** 2022-03-15
- **Title:** Evaluating the Text-to-SQL Capabilities of Large Language Models
 - **Authors:** Nitarshan Rajkumar, Raymond Li, Dzmitry Bahdanau
+- **arXiv id:** [2204.00498v1](http://arxiv.org/abs/2204.00498v1)  **Published Date:** 2022-03-15
 - **LangChain:**

-   - **API Reference:** [langchain_community...SQLDatabase](https://python.langchain.com/v0.2/api_reference/community/utilities/langchain_community.utilities.sql_database.SQLDatabase.html#langchain_community.utilities.sql_database.SQLDatabase), [langchain_community...SparkSQL](https://python.langchain.com/v0.2/api_reference/community/utilities/langchain_community.utilities.spark_sql.SparkSQL.html#langchain_community.utilities.spark_sql.SparkSQL)
+   - **Documentation:** [docs/tutorials/sql_qa](https://python.langchain.com/docs/tutorials/sql_qa)
+   - **API Reference:** [langchain_community...SQLDatabase](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.sql_database.SQLDatabase.html#langchain_community.utilities.sql_database.SQLDatabase), [langchain_community...SparkSQL](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.spark_sql.SparkSQL.html#langchain_community.utilities.spark_sql.SparkSQL)

 **Abstract:** We perform an empirical evaluation of Text-to-SQL capabilities of the Codex
 language model. We find that, without any finetuning, Codex is a strong
@@ -782,12 +938,11 @@ few-shot examples.
                
 ## Locally Typical Sampling

- **arXiv id:** [2202.00666v5](http://arxiv.org/abs/2202.00666v5)  **Published Date:** 2022-02-01
- **Title:** Locally Typical Sampling
 - **Authors:** Clara Meister, Tiago Pimentel, Gian Wiher,  et al.
+- **arXiv id:** [2202.00666v5](http://arxiv.org/abs/2202.00666v5)  **Published Date:** 2022-02-01
 - **LangChain:**

-   - **API Reference:** [langchain_huggingface...HuggingFaceEndpoint](https://python.langchain.com/v0.2/api_reference/huggingface/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceEndpoint](https://python.langchain.com/v0.2/api_reference/community/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://python.langchain.com/v0.2/api_reference/community/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
+   - **API Reference:** [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference), [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint)

 **Abstract:** Today's probabilistic language generators fall short when it comes to
 producing coherent and fluent text despite the fact that the underlying models
@@ -810,14 +965,35 @@ locally typical sampling offers competitive performance (in both abstractive
 summarization and story generation) in terms of quality while consistently
 reducing degenerate repetitions.
                
-## Learning Transferable Visual Models From Natural Language Supervision
+## ColBERTv2: Effective and Efficient Retrieval via Lightweight Late Interaction

- **arXiv id:** [2103.00020v1](http://arxiv.org/abs/2103.00020v1)  **Published Date:** 2021-02-26
- **Title:** Learning Transferable Visual Models From Natural Language Supervision
- **Authors:** Alec Radford, Jong Wook Kim, Chris Hallacy,  et al.
+- **Authors:** Keshav Santhanam, Omar Khattab, Jon Saad-Falcon,  et al.
+- **arXiv id:** [2112.01488v3](http://arxiv.org/abs/2112.01488v3)  **Published Date:** 2021-12-02
 - **LangChain:**

-   - **API Reference:** [langchain_experimental.open_clip](https://python.langchain.com/v0.2/api_reference/experimental/index.html#module-langchain_experimental.open_clip)
+   - **Documentation:** [docs/integrations/retrievers/ragatouille](https://python.langchain.com/docs/integrations/retrievers/ragatouille), [docs/integrations/providers/ragatouille](https://python.langchain.com/docs/integrations/providers/ragatouille), [docs/concepts](https://python.langchain.com/docs/concepts), [docs/integrations/providers/dspy](https://python.langchain.com/docs/integrations/providers/dspy)
+
+**Abstract:** Neural information retrieval (IR) has greatly advanced search and other
+knowledge-intensive language tasks. While many neural IR methods encode queries
+and documents into single-vector representations, late interaction models
+produce multi-vector representations at the granularity of each token and
+decompose relevance modeling into scalable token-level computations. This
+decomposition has been shown to make late interaction more effective, but it
+inflates the space footprint of these models by an order of magnitude. In this
+work, we introduce ColBERTv2, a retriever that couples an aggressive residual
+compression mechanism with a denoised supervision strategy to simultaneously
+improve the quality and space footprint of late interaction. We evaluate
+ColBERTv2 across a wide range of benchmarks, establishing state-of-the-art
+quality within and outside the training domain while reducing the space
+footprint of late interaction models by 6--10$\times$.
+                
+## Learning Transferable Visual Models From Natural Language Supervision
+
+- **Authors:** Alec Radford, Jong Wook Kim, Chris Hallacy,  et al.
+- **arXiv id:** [2103.00020v1](http://arxiv.org/abs/2103.00020v1)  **Published Date:** 2021-02-26
+- **LangChain:**
+
+   - **API Reference:** [langchain_experimental.open_clip](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.open_clip)

 **Abstract:** State-of-the-art computer vision systems are trained to predict a fixed set
 of predetermined object categories. This restricted form of supervision limits
@@ -840,14 +1016,77 @@ zero-shot without needing to use any of the 1.28 million training examples it
 was trained on. We release our code and pre-trained model weights at
 https://github.com/OpenAI/CLIP.
                
-## CTRL: A Conditional Transformer Language Model for Controllable Generation
+## Language Models are Few-Shot Learners

- **arXiv id:** [1909.05858v2](http://arxiv.org/abs/1909.05858v2)  **Published Date:** 2019-09-11
- **Title:** CTRL: A Conditional Transformer Language Model for Controllable Generation
- **Authors:** Nitish Shirish Keskar, Bryan McCann, Lav R. Varshney,  et al.
+- **Authors:** Tom B. Brown, Benjamin Mann, Nick Ryder,  et al.
+- **arXiv id:** [2005.14165v4](http://arxiv.org/abs/2005.14165v4)  **Published Date:** 2020-05-28
 - **LangChain:**

-   - **API Reference:** [langchain_huggingface...HuggingFaceEndpoint](https://python.langchain.com/v0.2/api_reference/huggingface/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceEndpoint](https://python.langchain.com/v0.2/api_reference/langchain_community/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://python.langchain.com/v0.2/api_reference/community/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
+   - **Documentation:** [docs/concepts](https://python.langchain.com/docs/concepts)
+
+**Abstract:** Recent work has demonstrated substantial gains on many NLP tasks and
+benchmarks by pre-training on a large corpus of text followed by fine-tuning on
+a specific task. While typically task-agnostic in architecture, this method
+still requires task-specific fine-tuning datasets of thousands or tens of
+thousands of examples. By contrast, humans can generally perform a new language
+task from only a few examples or from simple instructions - something which
+current NLP systems still largely struggle to do. Here we show that scaling up
+language models greatly improves task-agnostic, few-shot performance, sometimes
+even reaching competitiveness with prior state-of-the-art fine-tuning
+approaches. Specifically, we train GPT-3, an autoregressive language model with
+175 billion parameters, 10x more than any previous non-sparse language model,
+and test its performance in the few-shot setting. For all tasks, GPT-3 is
+applied without any gradient updates or fine-tuning, with tasks and few-shot
+demonstrations specified purely via text interaction with the model. GPT-3
+achieves strong performance on many NLP datasets, including translation,
+question-answering, and cloze tasks, as well as several tasks that require
+on-the-fly reasoning or domain adaptation, such as unscrambling words, using a
+novel word in a sentence, or performing 3-digit arithmetic. At the same time,
+we also identify some datasets where GPT-3's few-shot learning still struggles,
+as well as some datasets where GPT-3 faces methodological issues related to
+training on large web corpora. Finally, we find that GPT-3 can generate samples
+of news articles which human evaluators have difficulty distinguishing from
+articles written by humans. We discuss broader societal impacts of this finding
+and of GPT-3 in general.
+                
+## Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks
+
+- **Authors:** Patrick Lewis, Ethan Perez, Aleksandra Piktus,  et al.
+- **arXiv id:** [2005.11401v4](http://arxiv.org/abs/2005.11401v4)  **Published Date:** 2020-05-22
+- **LangChain:**
+
+   - **Documentation:** [docs/concepts](https://python.langchain.com/docs/concepts)
+
+**Abstract:** Large pre-trained language models have been shown to store factual knowledge
+in their parameters, and achieve state-of-the-art results when fine-tuned on
+downstream NLP tasks. However, their ability to access and precisely manipulate
+knowledge is still limited, and hence on knowledge-intensive tasks, their
+performance lags behind task-specific architectures. Additionally, providing
+provenance for their decisions and updating their world knowledge remain open
+research problems. Pre-trained models with a differentiable access mechanism to
+explicit non-parametric memory can overcome this issue, but have so far been
+only investigated for extractive downstream tasks. We explore a general-purpose
+fine-tuning recipe for retrieval-augmented generation (RAG) -- models which
+combine pre-trained parametric and non-parametric memory for language
+generation. We introduce RAG models where the parametric memory is a
+pre-trained seq2seq model and the non-parametric memory is a dense vector index
+of Wikipedia, accessed with a pre-trained neural retriever. We compare two RAG
+formulations, one which conditions on the same retrieved passages across the
+whole generated sequence, the other can use different passages per token. We
+fine-tune and evaluate our models on a wide range of knowledge-intensive NLP
+tasks and set the state-of-the-art on three open domain QA tasks, outperforming
+parametric seq2seq models and task-specific retrieve-and-extract architectures.
+For language generation tasks, we find that RAG models generate more specific,
+diverse and factual language than a state-of-the-art parametric-only seq2seq
+baseline.
+                
+## CTRL: A Conditional Transformer Language Model for Controllable Generation
+
+- **Authors:** Nitish Shirish Keskar, Bryan McCann, Lav R. Varshney,  et al.
+- **arXiv id:** [1909.05858v2](http://arxiv.org/abs/1909.05858v2)  **Published Date:** 2019-09-11
+- **LangChain:**
+
+   - **API Reference:** [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference), [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint)

 **Abstract:** Large-scale language models show promising text generation capabilities, but
 users cannot easily control particular aspects of the generated text. We
--- a/docs/docs/concepts.mdx
+++ b/docs/docs/concepts.mdx
@@ -15,11 +15,6 @@ The interfaces for core components like LLMs, vector stores, retrievers and more
 No third party integrations are defined here.
 The dependencies are kept purposefully very lightweight.

-### Partner packages
-
-While the long tail of integrations are in `langchain-community`, we split popular integrations into their own packages (e.g. `langchain-openai`, `langchain-anthropic`, etc).
-This was done in order to improve support for these important integrations.
-
 ### `langchain`

 The main `langchain` package contains chains, agents, and retrieval strategies that make up an application's cognitive architecture.
@@ -33,6 +28,11 @@ Key partner packages are separated out (see below).
 This contains all integrations for various components (LLMs, vector stores, retrievers).
 All dependencies in this package are optional to keep the package as lightweight as possible.

+### Partner packages
+
+While the long tail of integrations is in `langchain-community`, we split popular integrations into their own packages (e.g. `langchain-openai`, `langchain-anthropic`, etc).
+This was done in order to improve support for these important integrations.
+
 ### [`langgraph`](https://langchain-ai.github.io/langgraph)

 `langgraph` is an extension of `langchain` aimed at
@@ -61,28 +61,28 @@ A developer platform that lets you debug, test, evaluate, and monitor LLM applic
 ## LangChain Expression Language (LCEL)
 <span data-heading-keywords="lcel"></span>

-LangChain Expression Language, or LCEL, is a declarative way to chain LangChain components.
+`LangChain Expression Language`, or `LCEL`, is a declarative way to chain LangChain components.
 LCEL was designed from day 1 to **support putting prototypes in production, with no code changes**, from the simplest “prompt + LLM” chain to the most complex chains (we’ve seen folks successfully run LCEL chains with 100s of steps in production). To highlight a few of the reasons you might want to use LCEL:

-**First-class streaming support**
+- **First-class streaming support:**
 When you build your chains with LCEL you get the best possible time-to-first-token (time elapsed until the first chunk of output comes out). For some chains this means eg. we stream tokens straight from an LLM to a streaming output parser, and you get back parsed, incremental chunks of output at the same rate as the LLM provider outputs the raw tokens.

-**Async support**
+- **Async support:**
 Any chain built with LCEL can be called both with the synchronous API (eg. in your Jupyter notebook while prototyping) as well as with the asynchronous API (eg. in a [LangServe](/docs/langserve/) server). This enables using the same code for prototypes and in production, with great performance, and the ability to handle many concurrent requests in the same server.

-**Optimized parallel execution**
+- **Optimized parallel execution:**
 Whenever your LCEL chains have steps that can be executed in parallel (eg if you fetch documents from multiple retrievers) we automatically do it, both in the sync and the async interfaces, for the smallest possible latency.

-**Retries and fallbacks**
+- **Retries and fallbacks:**
 Configure retries and fallbacks for any part of your LCEL chain. This is a great way to make your chains more reliable at scale. We’re currently working on adding streaming support for retries/fallbacks, so you can get the added reliability without any latency cost.

-**Access intermediate results**
+- **Access intermediate results:**
 For more complex chains it’s often very useful to access the results of intermediate steps even before the final output is produced. This can be used to let end-users know something is happening, or even just to debug your chain. You can stream intermediate results, and it’s available on every [LangServe](/docs/langserve) server.

-**Input and output schemas**
+- **Input and output schemas**
 Input and output schemas give every LCEL chain Pydantic and JSONSchema schemas inferred from the structure of your chain. This can be used for validation of inputs and outputs, and is an integral part of LangServe.

-[**Seamless LangSmith tracing**](https://docs.smith.langchain.com)
+- [**Seamless LangSmith tracing**](https://docs.smith.langchain.com)
 As your chains get more and more complex, it becomes increasingly important to understand what exactly is happening at every step.
 With LCEL, **all** steps are automatically logged to [LangSmith](https://docs.smith.langchain.com/) for maximum observability and debuggability.

@@ -97,7 +97,7 @@ For guides on how to do specific tasks with LCEL, check out [the relevant how-to
 ### Runnable interface
 <span data-heading-keywords="invoke,runnable"></span>

-To make it as easy as possible to create custom chains, we've implemented a ["Runnable"](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable) protocol. Many LangChain components implement the `Runnable` protocol, including chat models, LLMs, output parsers, retrievers, prompt templates, and more. There are also several useful primitives for working with runnables, which you can read about below.
+To make it as easy as possible to create custom chains, we've implemented a ["Runnable"](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable) protocol. Many LangChain components implement the `Runnable` protocol, including chat models, LLMs, output parsers, retrievers, prompt templates, and more. There are also several useful primitives for working with runnables, which you can read about below.

 This is a standard interface, which makes it easy to define custom chains as well as invoke them in a standard way.
 The standard interface includes:
@@ -186,7 +186,7 @@ For a full list of LangChain model providers with multimodal models, [check out
 <span data-heading-keywords="llm,llms"></span>

 :::caution
-Pure text-in/text-out LLMs tend to be older or lower-level. Many popular models are best used as [chat completion models](/docs/concepts/#chat-models),
+Pure text-in/text-out LLMs tend to be older or lower-level. Many new popular models are best used as [chat completion models](/docs/concepts/#chat-models),
 even for non-chat use cases.

 You are probably looking for [the section above instead](/docs/concepts/#chat-models).
@@ -201,7 +201,7 @@ When messages are passed in as input, they will be formatted into a string under

 LangChain does not host any LLMs, rather we rely on third party integrations.

-For specifics on how to use LLMs, see the [relevant how-to guides here](/docs/how_to/#llms).
+For specifics on how to use LLMs, see the [how-to guides](/docs/how_to/#llms).

 ### Messages

@@ -215,7 +215,7 @@ LangChain has different message classes for different roles.
 The `content` property describes the content of the message.
 This can be a few different things:

- A string (most models deal this type of content)
+- A string (most models deal with this type of content)
 - A List of dictionaries (this is used for multimodal input, where the dictionary contains information about that input type and that input location)

 Optionally, messages can have a `name` property which allows for differentiating between multiple speakers with the same role.
@@ -365,38 +365,32 @@ See documentation for that [here](/docs/concepts/#function-tool-calling).

 :::

-Responsible for taking the output of a model and transforming it to a more suitable format for downstream tasks.
+`Output parser` is responsible for taking the output of a model and transforming it to a more suitable format for downstream tasks.
 Useful when you are using LLMs to generate structured data, or to normalize output from chat models and LLMs.

 LangChain has lots of different types of output parsers. This is a list of output parsers LangChain supports. The table below has various pieces of information:

-**Name**: The name of the output parser
-
-**Supports Streaming**: Whether the output parser supports streaming.
-
-**Has Format Instructions**: Whether the output parser has format instructions. This is generally available except when (a) the desired schema is not specified in the prompt but rather in other parameters (like OpenAI function calling), or (b) when the OutputParser wraps another OutputParser.
-
-**Calls LLM**: Whether this output parser itself calls an LLM. This is usually only done by output parsers that attempt to correct misformatted output.
-
-**Input Type**: Expected input type. Most output parsers work on both strings and messages, but some (like OpenAI Functions) need a message with specific kwargs.
-
-**Output Type**: The output type of the object returned by the parser.
-
-**Description**: Our commentary on this output parser and when to use it.
+- **Name**: The name of the output parser
+- **Supports Streaming**: Whether the output parser supports streaming.
+- **Has Format Instructions**: Whether the output parser has format instructions. This is generally available except when (a) the desired schema is not specified in the prompt but rather in other parameters (like OpenAI function calling), or (b) when the OutputParser wraps another OutputParser.
+- **Calls LLM**: Whether this output parser itself calls an LLM. This is usually only done by output parsers that attempt to correct misformatted output.
+- **Input Type**: Expected input type. Most output parsers work on both strings and messages, but some (like OpenAI Functions) need a message with specific kwargs.
+- **Output Type**: The output type of the object returned by the parser.
+- **Description**: Our commentary on this output parser and when to use it.

 | Name            | Supports Streaming | Has Format Instructions       | Calls LLM | Input Type                       | Output Type          | Description                                                                                                                                                                                                                                              |
 |-----------------|--------------------|-------------------------------|-----------|----------------------------------|----------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| [JSON](https://python.langchain.com/v0.2/api_reference/core/output_parsers/langchain_core.output_parsers.json.JsonOutputParser.html#langchain_core.output_parsers.json.JsonOutputParser)            | ✅                  | ✅                             |           | `str` \| `Message`               | JSON object          | Returns a JSON object as specified. You can specify a Pydantic model and it will return JSON for that model. Probably the most reliable output parser for getting structured data that does NOT use function calling.                                    |
-| [XML](https://python.langchain.com/v0.2/api_reference/core/output_parsers/langchain_core.output_parsers.xml.XMLOutputParser.html#langchain_core.output_parsers.xml.XMLOutputParser)            | ✅                  | ✅                             |           | `str` \| `Message`                 | `dict`               | Returns a dictionary of tags. Use when XML output is needed. Use with models that are good at writing XML (like Anthropic's).                                                                                                                            |
-| [CSV](https://python.langchain.com/v0.2/api_reference/core/output_parsers/langchain_core.output_parsers.list.CommaSeparatedListOutputParser.html#langchain_core.output_parsers.list.CommaSeparatedListOutputParser)           | ✅                  | ✅                             |           | `str` \| `Message`                 | `List[str]`          | Returns a list of comma separated values.                                                                                                                                                                                                                |
-| [OutputFixing](https://python.langchain.com/v0.2/api_reference/langchain/output_parsers/langchain.output_parsers.fix.OutputFixingParser.html#langchain.output_parsers.fix.OutputFixingParser)    |                    |                               | ✅         | `str` \| `Message`                 |                      | Wraps another output parser. If that output parser errors, then this will pass the error message and the bad output to an LLM and ask it to fix the output.                                                                                              |
-| [RetryWithError](https://python.langchain.com/v0.2/api_reference/langchain/output_parsers/langchain.output_parsers.retry.RetryWithErrorOutputParser.html#langchain.output_parsers.retry.RetryWithErrorOutputParser)  |                    |                               | ✅         | `str` \| `Message`                 |                      | Wraps another output parser. If that output parser errors, then this will pass the original inputs, the bad output, and the error message to an LLM and ask it to fix it. Compared to OutputFixingParser, this one also sends the original instructions. |
-| [Pydantic](https://python.langchain.com/v0.2/api_reference/core/output_parsers/langchain_core.output_parsers.pydantic.PydanticOutputParser.html#langchain_core.output_parsers.pydantic.PydanticOutputParser)        |                    | ✅                             |           | `str` \| `Message`                 | `pydantic.BaseModel` | Takes a user defined Pydantic model and returns data in that format.                                                                                                                                                                                     |
-| [YAML](https://python.langchain.com/v0.2/api_reference/langchain/output_parsers/langchain.output_parsers.yaml.YamlOutputParser.html#langchain.output_parsers.yaml.YamlOutputParser)        |                    | ✅                             |           | `str` \| `Message`                 | `pydantic.BaseModel` | Takes a user defined Pydantic model and returns data in that format. Uses YAML to encode it.                                                                                                                                                                                    |
-| [PandasDataFrame](https://python.langchain.com/v0.2/api_reference/langchain/output_parsers/langchain.output_parsers.pandas_dataframe.PandasDataFrameOutputParser.html#langchain.output_parsers.pandas_dataframe.PandasDataFrameOutputParser) |                    | ✅                             |           | `str` \| `Message`                 | `dict`               | Useful for doing operations with pandas DataFrames.                                                                                                                                                                                                      |
-| [Enum](https://python.langchain.com/v0.2/api_reference/langchain/output_parsers/langchain.output_parsers.enum.EnumOutputParser.html#langchain.output_parsers.enum.EnumOutputParser)            |                    | ✅                             |           | `str` \| `Message`                 | `Enum`               | Parses response into one of the provided enum values.                                                                                                                                                                                                    |
-| [Datetime](https://python.langchain.com/v0.2/api_reference/langchain/output_parsers/langchain.output_parsers.datetime.DatetimeOutputParser.html#langchain.output_parsers.datetime.DatetimeOutputParser)        |                    | ✅                             |           | `str` \| `Message`                 | `datetime.datetime`  | Parses response into a datetime string.                                                                                                                                                                                                                  |
-| [Structured](https://python.langchain.com/v0.2/api_reference/langchain/output_parsers/langchain.output_parsers.structured.StructuredOutputParser.html#langchain.output_parsers.structured.StructuredOutputParser)      |                    | ✅                             |           | `str` \| `Message`                 | `Dict[str, str]`     | An output parser that returns structured information. It is less powerful than other output parsers since it only allows for fields to be strings. This can be useful when you are working with smaller LLMs.                                            |
+| [JSON](https://python.langchain.com/api_reference/core/output_parsers/langchain_core.output_parsers.json.JsonOutputParser.html#langchain_core.output_parsers.json.JsonOutputParser)            | ✅                  | ✅                             |           | `str` \| `Message`               | JSON object          | Returns a JSON object as specified. You can specify a Pydantic model and it will return JSON for that model. Probably the most reliable output parser for getting structured data that does NOT use function calling.                                    |
+| [XML](https://python.langchain.com/api_reference/core/output_parsers/langchain_core.output_parsers.xml.XMLOutputParser.html#langchain_core.output_parsers.xml.XMLOutputParser)            | ✅                  | ✅                             |           | `str` \| `Message`                 | `dict`               | Returns a dictionary of tags. Use when XML output is needed. Use with models that are good at writing XML (like Anthropic's).                                                                                                                            |
+| [CSV](https://python.langchain.com/api_reference/core/output_parsers/langchain_core.output_parsers.list.CommaSeparatedListOutputParser.html#langchain_core.output_parsers.list.CommaSeparatedListOutputParser)           | ✅                  | ✅                             |           | `str` \| `Message`                 | `List[str]`          | Returns a list of comma separated values.                                                                                                                                                                                                                |
+| [OutputFixing](https://python.langchain.com/api_reference/langchain/output_parsers/langchain.output_parsers.fix.OutputFixingParser.html#langchain.output_parsers.fix.OutputFixingParser)    |                    |                               | ✅         | `str` \| `Message`                 |                      | Wraps another output parser. If that output parser errors, then this will pass the error message and the bad output to an LLM and ask it to fix the output.                                                                                              |
+| [RetryWithError](https://python.langchain.com/api_reference/langchain/output_parsers/langchain.output_parsers.retry.RetryWithErrorOutputParser.html#langchain.output_parsers.retry.RetryWithErrorOutputParser)  |                    |                               | ✅         | `str` \| `Message`                 |                      | Wraps another output parser. If that output parser errors, then this will pass the original inputs, the bad output, and the error message to an LLM and ask it to fix it. Compared to OutputFixingParser, this one also sends the original instructions. |
+| [Pydantic](https://python.langchain.com/api_reference/core/output_parsers/langchain_core.output_parsers.pydantic.PydanticOutputParser.html#langchain_core.output_parsers.pydantic.PydanticOutputParser)        |                    | ✅                             |           | `str` \| `Message`                 | `pydantic.BaseModel` | Takes a user defined Pydantic model and returns data in that format.                                                                                                                                                                                     |
+| [YAML](https://python.langchain.com/api_reference/langchain/output_parsers/langchain.output_parsers.yaml.YamlOutputParser.html#langchain.output_parsers.yaml.YamlOutputParser)        |                    | ✅                             |           | `str` \| `Message`                 | `pydantic.BaseModel` | Takes a user defined Pydantic model and returns data in that format. Uses YAML to encode it.                                                                                                                                                                                    |
+| [PandasDataFrame](https://python.langchain.com/api_reference/langchain/output_parsers/langchain.output_parsers.pandas_dataframe.PandasDataFrameOutputParser.html#langchain.output_parsers.pandas_dataframe.PandasDataFrameOutputParser) |                    | ✅                             |           | `str` \| `Message`                 | `dict`               | Useful for doing operations with pandas DataFrames.                                                                                                                                                                                                      |
+| [Enum](https://python.langchain.com/api_reference/langchain/output_parsers/langchain.output_parsers.enum.EnumOutputParser.html#langchain.output_parsers.enum.EnumOutputParser)            |                    | ✅                             |           | `str` \| `Message`                 | `Enum`               | Parses response into one of the provided enum values.                                                                                                                                                                                                    |
+| [Datetime](https://python.langchain.com/api_reference/langchain/output_parsers/langchain.output_parsers.datetime.DatetimeOutputParser.html#langchain.output_parsers.datetime.DatetimeOutputParser)        |                    | ✅                             |           | `str` \| `Message`                 | `datetime.datetime`  | Parses response into a datetime string.                                                                                                                                                                                                                  |
+| [Structured](https://python.langchain.com/api_reference/langchain/output_parsers/langchain.output_parsers.structured.StructuredOutputParser.html#langchain.output_parsers.structured.StructuredOutputParser)      |                    | ✅                             |           | `str` \| `Message`                 | `Dict[str, str]`     | An output parser that returns structured information. It is less powerful than other output parsers since it only allows for fields to be strings. This can be useful when you are working with smaller LLMs.                                            |

 For specifics on how to use output parsers, see the [relevant how-to guides here](/docs/how_to/#output-parsers).

@@ -507,7 +501,7 @@ For specifics on how to use retrievers, see the [relevant how-to guides here](/d
 For some techniques, such as [indexing and retrieval with multiple vectors per document](/docs/how_to/multi_vector/) or
 [caching embeddings](/docs/how_to/caching_embeddings/), having a form of key-value (KV) storage is helpful.

-LangChain includes a [`BaseStore`](https://python.langchain.com/v0.2/api_reference/core/stores/langchain_core.stores.BaseStore.html) interface,
+LangChain includes a [`BaseStore`](https://python.langchain.com/api_reference/core/stores/langchain_core.stores.BaseStore.html) interface,
 which allows for storage of arbitrary data. However, LangChain components that require KV-storage accept a
 more specific `BaseStore[str, bytes]` instance that stores binary data (referred to as a `ByteStore`), and internally take care of
 encoding and decoding data for their specific needs.
@@ -516,7 +510,7 @@ This means that as a user, you only need to think about one type of store rather

 #### Interface

-All [`BaseStores`](https://python.langchain.com/v0.2/api_reference/core/stores/langchain_core.stores.BaseStore.html) support the following interface. Note that the interface allows
+All [`BaseStores`](https://python.langchain.com/api_reference/core/stores/langchain_core.stores.BaseStore.html) support the following interface. Note that the interface allows
 for modifying **multiple** key-value pairs at once:

 - `mget(key: Sequence[str]) -> List[Optional[bytes]]`: get the contents of multiple keys, returning `None` if the key does not exist
@@ -534,10 +528,10 @@ Tools are needed whenever you want a model to control parts of your code or call

 A tool consists of:

-1. The name of the tool.
-2. A description of what the tool does.
-3. A JSON schema defining the inputs to the tool.
-4. A function (and, optionally, an async variant of the function).
+1. The `name` of the tool.
+2. A `description` of what the tool does.
+3. A `JSON schema` defining the inputs to the tool.
+4. A `function` (and, optionally, an async variant of the function).

 When a tool is bound to a model, the name, description and JSON schema are provided as context to the model.
 Given a list of tools and a set of instructions, a model can request to call one or more tools with specific inputs.
@@ -601,10 +595,10 @@ tool_call = ai_msg.tool_calls[0]
 # -> ToolCall(args={...}, id=..., ...)
 tool_message = tool.invoke(tool_call)
 # -> ToolMessage(
-    content="tool result foobar...",
-    tool_call_id=...,
-    name="tool_name"
-)
+#      content="tool result foobar...",
+#      tool_call_id=...,
+#      name="tool_name"
+#    )
 ```

 If you are invoking the tool this way and want to include an [artifact](/docs/concepts/#toolmessage) for the ToolMessage, you will need to have the tool return two things.
@@ -650,14 +644,14 @@ The results of those actions can then be fed back into the agent and it determin
 [LangGraph](https://github.com/langchain-ai/langgraph) is an extension of LangChain specifically aimed at creating highly controllable and customizable agents.
 Please check out that documentation for a more in depth overview of agent concepts.

-There is a legacy agent concept in LangChain that we are moving towards deprecating: `AgentExecutor`.
+There is a legacy `agent` concept in LangChain that we are moving towards deprecating: `AgentExecutor`.
 AgentExecutor was essentially a runtime for agents.
 It was a great place to get started, however, it was not flexible enough as you started to have more customized agents.
 In order to solve that we built LangGraph to be this flexible, highly-controllable runtime.

 If you are still using AgentExecutor, do not fear: we still have a guide on [how to use AgentExecutor](/docs/how_to/agent_executor).
 It is recommended, however, that you start to transition to LangGraph.
-In order to assist in this we have put together a [transition guide on how to do so](/docs/how_to/migrate_agent).
+In order to assist in this, we have put together a [transition guide on how to do so](/docs/how_to/migrate_agent).

 #### ReAct agents
 <span data-heading-keywords="react,react agent"></span>
@@ -714,17 +708,15 @@ You can subscribe to these events by using the `callbacks` argument available th

 Callback handlers can either be `sync` or `async`:

-* Sync callback handlers implement the [BaseCallbackHandler](https://python.langchain.com/v0.2/api_reference/core/callbacks/langchain_core.callbacks.base.BaseCallbackHandler.html) interface.
-* Async callback handlers implement the [AsyncCallbackHandler](https://python.langchain.com/v0.2/api_reference/core/callbacks/langchain_core.callbacks.base.AsyncCallbackHandler.html) interface.
+* Sync callback handlers implement the [BaseCallbackHandler](https://python.langchain.com/api_reference/core/callbacks/langchain_core.callbacks.base.BaseCallbackHandler.html) interface.
+* Async callback handlers implement the [AsyncCallbackHandler](https://python.langchain.com/api_reference/core/callbacks/langchain_core.callbacks.base.AsyncCallbackHandler.html) interface.

-During run-time LangChain configures an appropriate callback manager (e.g., [CallbackManager](https://python.langchain.com/v0.2/api_reference/core/callbacks/langchain_core.callbacks.manager.CallbackManager.html) or [AsyncCallbackManager](https://python.langchain.com/v0.2/api_reference/core/callbacks/langchain_core.callbacks.manager.AsyncCallbackManager.html) which will be responsible for calling the appropriate method on each "registered" callback handler when the event is triggered.
+During run-time LangChain configures an appropriate callback manager (e.g., [CallbackManager](https://python.langchain.com/api_reference/core/callbacks/langchain_core.callbacks.manager.CallbackManager.html) or [AsyncCallbackManager](https://python.langchain.com/api_reference/core/callbacks/langchain_core.callbacks.manager.AsyncCallbackManager.html) which will be responsible for calling the appropriate method on each "registered" callback handler when the event is triggered.

 #### Passing callbacks

 The `callbacks` property is available on most objects throughout the API (Models, Tools, Agents, etc.) in two different places:

-The callbacks are available on most objects throughout the API (Models, Tools, Agents, etc.) in two different places:
-
 - **Request time callbacks**: Passed at the time of the request in addition to the input data.
    Available on all standard `Runnable` objects. These callbacks are INHERITED by all children
    of the object they are defined on. For example, `chain.invoke({"number": 25}, {"callbacks": [handler]})`.
@@ -743,7 +735,7 @@ callbacks to any child objects.
 :::important Async in Python<=3.10

 Any `RunnableLambda`, a `RunnableGenerator`, or `Tool` that invokes other runnables
-and is running async in python<=3.10, will have to propagate callbacks to child
+and is running `async` in python<=3.10, will have to propagate callbacks to child
 objects manually. This is because LangChain cannot automatically propagate
 callbacks to child objects in this case.

@@ -785,7 +777,7 @@ For models (or other components) that don't support streaming natively, this ite
 you could still use the same general pattern when calling them. Using `.stream()` will also automatically call the model in streaming mode
 without the need to provide additional config.

-The type of each outputted chunk depends on the type of component - for example, chat models yield [`AIMessageChunks`](https://python.langchain.com/v0.2/api_reference/core/messages/langchain_core.messages.ai.AIMessageChunk.html).
+The type of each outputted chunk depends on the type of component - for example, chat models yield [`AIMessageChunks`](https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.AIMessageChunk.html).
 Because this method is part of [LangChain Expression Language](/docs/concepts/#langchain-expression-language-lcel),
 you can handle formatting differences from different outputs using an [output parser](/docs/concepts/#output-parsers) to transform
 each yielded chunk.
@@ -833,10 +825,10 @@ including a table listing available events.
 #### Callbacks

 The lowest level way to stream outputs from LLMs in LangChain is via the [callbacks](/docs/concepts/#callbacks) system. You can pass a
-callback handler that handles the [`on_llm_new_token`](https://python.langchain.com/v0.2/api_reference/langchain/callbacks/langchain.callbacks.streaming_aiter.AsyncIteratorCallbackHandler.html#langchain.callbacks.streaming_aiter.AsyncIteratorCallbackHandler.on_llm_new_token) event into LangChain components. When that component is invoked, any
+callback handler that handles the [`on_llm_new_token`](https://python.langchain.com/api_reference/langchain/callbacks/langchain.callbacks.streaming_aiter.AsyncIteratorCallbackHandler.html#langchain.callbacks.streaming_aiter.AsyncIteratorCallbackHandler.on_llm_new_token) event into LangChain components. When that component is invoked, any
 [LLM](/docs/concepts/#llms) or [chat model](/docs/concepts/#chat-models) contained in the component calls
 the callback with the generated token. Within the callback, you could pipe the tokens into some other destination, e.g. a HTTP response.
-You can also handle the [`on_llm_end`](https://python.langchain.com/v0.2/api_reference/langchain/callbacks/langchain.callbacks.streaming_aiter.AsyncIteratorCallbackHandler.html#langchain.callbacks.streaming_aiter.AsyncIteratorCallbackHandler.on_llm_end) event to perform any necessary cleanup.
+You can also handle the [`on_llm_end`](https://python.langchain.com/api_reference/langchain/callbacks/langchain.callbacks.streaming_aiter.AsyncIteratorCallbackHandler.html#langchain.callbacks.streaming_aiter.AsyncIteratorCallbackHandler.on_llm_end) event to perform any necessary cleanup.

 You can see [this how-to section](/docs/how_to/#callbacks) for more specifics on using callbacks.

@@ -873,7 +865,7 @@ Furthermore, using tokens can also improve efficiency, since the model processes
 ### Function/tool calling

 :::info
-We use the term tool calling interchangeably with function calling. Although
+We use the term `tool calling` interchangeably with `function calling`. Although
 function calling is sometimes meant to refer to invocations of a single function,
 we treat all models as though they can return multiple tool or function calls in
 each message.
@@ -951,7 +943,7 @@ Here's an example:
 ```python
 from typing import Optional

-from langchain_core.pydantic_v1 import BaseModel, Field
+from pydantic import BaseModel, Field


 class Joke(BaseModel):
@@ -968,7 +960,6 @@ structured_llm.invoke("Tell me a joke about cats")

 ```
 Joke(setup='Why was the cat sitting on the computer?', punchline='To keep an eye on the mouse!', rating=None)
-
 ```

 We recommend this method as a starting point when working with structured output:
@@ -1069,7 +1060,7 @@ a `tool_calls` field containing `args` that match the desired shape.
 There are several acceptable formats you can use to bind tools to a model in LangChain. Here's one example:

 ```python
-from langchain_core.pydantic_v1 import BaseModel, Field
+from pydantic import BaseModel, Field
 from langchain_openai import ChatOpenAI

 class ResponseFormatter(BaseModel):
@@ -1107,7 +1098,11 @@ For a full list of model providers that support tool calling, [see this table](/

 ### Few-shot prompting

-One of the most effective ways to improve model performance is to give a model examples of what you want it to do. The technique of adding example inputs and expected outputs to a model prompt is known as "few-shot prompting". There are a few things to think about when doing few-shot prompting:
+One of the most effective ways to improve model performance is to give a model examples of 
+what you want it to do. The technique of adding example inputs and expected outputs 
+to a model prompt is known as "few-shot prompting". The technique is based on the
+[Language Models are Few-Shot Learners](https://arxiv.org/abs/2005.14165) paper.
+There are a few things to think about when doing few-shot prompting:

 1. How are examples generated?
 2. How many examples are in each prompt?
@@ -1182,8 +1177,10 @@ You can see a case study of how Anthropic and OpenAI respond to different few-sh

 ### Retrieval

-LLMs are trained on a large but fixed dataset, limiting their ability to reason over private or recent information. Fine-tuning an LLM with specific facts is one way to mitigate this, but is often [poorly suited for factual recall](https://www.anyscale.com/blog/fine-tuning-is-for-form-not-facts) and [can be costly](https://www.glean.com/blog/how-to-build-an-ai-assistant-for-the-enterprise). 
-Retrieval is the process of providing relevant information to an LLM to improve its response for a given input. Retrieval augmented generation (RAG) is the process of grounding the LLM generation (output) using the retrieved information.
+LLMs are trained on a large but fixed dataset, limiting their ability to reason over private or recent information. 
+Fine-tuning an LLM with specific facts is one way to mitigate this, but is often [poorly suited for factual recall](https://www.anyscale.com/blog/fine-tuning-is-for-form-not-facts) and [can be costly](https://www.glean.com/blog/how-to-build-an-ai-assistant-for-the-enterprise). 
+`Retrieval` is the process of providing relevant information to an LLM to improve its response for a given input. 
+`Retrieval augmented generation` (`RAG`) [paper](https://arxiv.org/abs/2005.11401) is the process of grounding the LLM generation (output) using the retrieved information.

 :::tip

@@ -1203,12 +1200,12 @@ First, consider the user input(s) to your RAG system. Ideally, a RAG system can
 **Using an LLM to review and optionally modify the input is the central idea behind query translation.** This serves as a general buffer, optimizing raw user inputs for your retrieval system. 
 For example, this can be as simple as extracting keywords or as complex as generating multiple sub-questions for a complex query.

-| Name          | When to use | Description |
-|---------------|-------------|-------------|
+| Name          | When to use | Description                                                                                                                                                                                                                                                                            |
+|---------------|-------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 | [Multi-query](/docs/how_to/MultiQueryRetriever/)   | When you need to cover multiple perspectives of a question. | Rewrite the user question from multiple perspectives, retrieve documents for each rewritten question, return the unique documents for all queries. |
-| [Decomposition](https://github.com/langchain-ai/rag-from-scratch/blob/main/rag_from_scratch_5_to_9.ipynb) | When a question can be broken down into smaller subproblems. | Decompose a question into a set of subproblems / questions, which can either be solved sequentially (use the answer from first + retrieval to answer the second) or in parallel (consolidate each answer into final answer). |
-| [Step-back](https://github.com/langchain-ai/rag-from-scratch/blob/main/rag_from_scratch_5_to_9.ipynb)     | When a higher-level conceptual understanding is required. | First prompt the LLM to ask a generic step-back question about higher-level concepts or principles, and retrieve relevant facts about them. Use this grounding to help answer the user question. |
-| [HyDE](https://github.com/langchain-ai/rag-from-scratch/blob/main/rag_from_scratch_5_to_9.ipynb)          | If you have challenges retrieving relevant documents using the raw user inputs. | Use an LLM to convert questions into hypothetical documents that answer the question. Use the embedded hypothetical documents to retrieve real documents with the premise that doc-doc similarity search can produce more relevant matches. |
+| [Decomposition](https://github.com/langchain-ai/rag-from-scratch/blob/main/rag_from_scratch_5_to_9.ipynb) | When a question can be broken down into smaller subproblems. | Decompose a question into a set of subproblems / questions, which can either be solved sequentially (use the answer from first + retrieval to answer the second) or in parallel (consolidate each answer into final answer).                                                           |
+| [Step-back](https://github.com/langchain-ai/rag-from-scratch/blob/main/rag_from_scratch_5_to_9.ipynb)     | When a higher-level conceptual understanding is required. | First prompt the LLM to ask a generic step-back question about higher-level concepts or principles, and retrieve relevant facts about them. Use this grounding to help answer the user question. [Paper](https://arxiv.org/pdf/2310.06117).                                            |
+| [HyDE](https://github.com/langchain-ai/rag-from-scratch/blob/main/rag_from_scratch_5_to_9.ipynb)          | If you have challenges retrieving relevant documents using the raw user inputs. | Use an LLM to convert questions into hypothetical documents that answer the question. Use the embedded hypothetical documents to retrieve real documents with the premise that doc-doc similarity search can produce more relevant matches. [Paper](https://arxiv.org/abs/2212.10496). |

 :::tip

@@ -1282,11 +1279,11 @@ Fifth, consider ways to improve the quality of your similarity search itself. Em

 There are some additional tricks to improve the quality of your retrieval. Embeddings excel at capturing semantic information, but may struggle with keyword-based queries. Many [vector stores](/docs/integrations/retrievers/pinecone_hybrid_search/) offer built-in [hybrid-search](https://docs.pinecone.io/guides/data/understanding-hybrid-search) to combine keyword and semantic similarity, which marries the benefits of both approaches. Furthermore, many vector stores have [maximal marginal relevance](https://python.langchain.com/v0.1/docs/modules/model_io/prompts/example_selectors/mmr/), which attempts to diversify the results of a search to avoid returning similar and redundant documents. 

-| Name              | When to use                                              | Description |
-|-------------------|----------------------------------------------------------|-------------|
-| [ColBERT](/docs/integrations/providers/ragatouille/#using-colbert-as-a-reranker)           | When higher granularity embeddings are needed.           | ColBERT uses contextually influenced embeddings for each token in the document and query to get a granular query-document similarity score. |
-| [Hybrid search](/docs/integrations/retrievers/pinecone_hybrid_search/)     | When combining keyword-based and semantic similarity.    | Hybrid search combines keyword and semantic similarity, marrying the benefits of both approaches. |
-| [Maximal Marginal Relevance (MMR)](/docs/integrations/vectorstores/pinecone/#maximal-marginal-relevance-searches) | When needing to diversify search results. | MMR attempts to diversify the results of a search to avoid returning similar and redundant documents. |
+| Name              | When to use                                              | Description                                                                                                                                                                            |
+|-------------------|----------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| [ColBERT](/docs/integrations/providers/ragatouille/#using-colbert-as-a-reranker)           | When higher granularity embeddings are needed.           | ColBERT uses contextually influenced embeddings for each token in the document and query to get a granular query-document similarity score. [Paper](https://arxiv.org/abs/2112.01488). |
+| [Hybrid search](/docs/integrations/retrievers/pinecone_hybrid_search/)     | When combining keyword-based and semantic similarity.    | Hybrid search combines keyword and semantic similarity, marrying the benefits of both approaches. [Paper](https://arxiv.org/abs/2210.11934).                                                                               |
+| [Maximal Marginal Relevance (MMR)](/docs/integrations/vectorstores/pinecone/#maximal-marginal-relevance-searches) | When needing to diversify search results. | MMR attempts to diversify the results of a search to avoid returning similar and redundant documents.                                                                                  |

 :::tip

@@ -1306,7 +1303,7 @@ Sixth, consider ways to filter or rank retrieved documents. This is very useful

 :::tip

-See our RAG from Scratch video on [RAG-Fusion](https://youtu.be/77qELPbNgxA?feature=shared), on approach for post-processing across multiple queries:  Rewrite the user question from multiple perspectives, retrieve documents for each rewritten question, and combine the ranks of multiple search result lists to produce a single, unified ranking with [Reciprocal Rank Fusion (RRF)](https://towardsdatascience.com/forget-rag-the-future-is-rag-fusion-1147298d8ad1).
+See our RAG from Scratch video on [RAG-Fusion](https://youtu.be/77qELPbNgxA?feature=shared) ([paper](https://arxiv.org/abs/2402.03367)), on approach for post-processing across multiple queries:  Rewrite the user question from multiple perspectives, retrieve documents for each rewritten question, and combine the ranks of multiple search result lists to produce a single, unified ranking with [Reciprocal Rank Fusion (RRF)](https://towardsdatascience.com/forget-rag-the-future-is-rag-fusion-1147298d8ad1).

 :::

--- a/docs/docs/contributing/documentation/setup.mdx
+++ b/docs/docs/contributing/documentation/setup.mdx
@@ -12,7 +12,7 @@ It covers a wide array of topics, including tutorials, use cases, integrations,
 and more, offering extensive guidance on building with LangChain.
 The content for this documentation lives in the `/docs` directory of the monorepo.
 2. In-code Documentation: This is documentation of the codebase itself, which is also
-used to generate the externally facing [API Reference](https://python.langchain.com/v0.2/api_reference/langchain/index.html).
+used to generate the externally facing [API Reference](https://python.langchain.com/api_reference/langchain/index.html).
 The content for the API reference is autogenerated by scanning the docstrings in the codebase. For this reason we ask that
 developers document their code well.

--- a/docs/docs/contributing/faq.mdx
+++ b/docs/docs/contributing/faq.mdx
@@ -24,3 +24,16 @@ for more information.
 Notably, Github doesn't allow this setting to be enabled for forks in **organizations** ([issue](https://github.com/orgs/community/discussions/5634)).
 If you are working in an organization, we recommend submitting your PR from a personal
 fork in order to enable this setting.
+
+### Why hasn't my PR been reviewed?
+
+Please reference our [Review Process](/docs/contributing/review_process/).
+
+### Why was my PR closed?
+
+Please reference our [Review Process](/docs/contributing/review_process/).
+
+### I think my PR was closed in a way that didn't follow the review process. What should I do?
+
+Tag `@efriis` in the PR comments referencing the portion of the review
+process that you believe was not followed. We'll take a look!
--- a/docs/docs/contributing/repo_structure.mdx
+++ b/docs/docs/contributing/repo_structure.mdx
@@ -50,7 +50,7 @@ There are other files in the root directory level, but their presence should be
 ## Documentation

 The `/docs` directory contains the content for the documentation that is shown
-at https://python.langchain.com/ and the associated API Reference https://python.langchain.com/v0.2/api_reference/langchain/index.html.
+at https://python.langchain.com/ and the associated API Reference https://python.langchain.com/api_reference/langchain/index.html.

 See the [documentation](/docs/contributing/documentation/) guidelines to learn how to contribute to the documentation.

--- a/docs/docs/contributing/review_process.mdx
+++ b/docs/docs/contributing/review_process.mdx
@@ -0,0 +1,95 @@
+# Review Process
+
+## Overview
+
+This document outlines the process used by the LangChain maintainers for reviewing pull requests (PRs). The primary objective of this process is to enhance the LangChain developer experience.
+
+## Review Statuses
+
+We categorize PRs using three main statuses, which are marked as project item statuses in the right sidebar and can be viewed in detail [here](https://github.com/orgs/langchain-ai/projects/12/views/1).
+
+- **Triage**: 
+  - Initial status for all newly submitted PRs.
+  - Requires a maintainer to categorize it into one of the other statuses.
+
+- **Needs Support**:
+  - PRs that require community feedback or additional input before moving forward.
+  - Automatically promoted to the backlog if it receives 5 upvotes.
+  - An auto-comment is generated when this status is applied, explaining the flow and the upvote requirement.
+  - If the PR remains in this status for 25 days, it will be marked as “stale” via auto-comment.
+  - PRs will be auto-closed after 30 days if no further action is taken.
+
+- **In Review**:
+  - PRs that are actively under review by our team.
+  - These are regularly reviewed and monitored.
+
+**Note:** A PR may only have one status at a time.
+
+**Note:** You may notice 3 additional statuses of Done, Closed, and Internal that
+are external to this lifecycle. Done and Closed PRs have been merged or closed,
+respectively. Internal is for PRs submitted by core maintainers, and these PRs are owned
+by the submitter.
+
+## Review Guidelines
+
+1. **PRs that touch /libs/core**:
+   - PRs that directly impact core code and are likely to affect end users.
+   - **Triage Guideline**: most PRs should either go straight to `In Review` or closed.
+   - These PRs are given top priority and are reviewed the fastest.
+   - PRs that don't have a **concise** descriptions of their motivation (either in PR summary of in a linked issue) are likely to be closed without an in-depth review. Please do not generate verbose PR descriptions with an LLM.
+   - PRs that don't have unit tests are likely to be closed.
+   - Feature requests should first be opened as a GitHub issue and discussed with the LangChain maintainers. Large PRs submitted without prior discussion are likely to be closed.
+
+2. **PRs that touch /libs/langchain**:
+   - High-impact PRs that are closely related to core PRs but slightly lower in priority.
+   - **Triage Guideline**: most PRs should either go straight to `In Review` or closed.
+   - These are reviewed and closed aggressively, similar to core PRs.
+   - New feature requests should be discussed with the core maintainer team beforehand in an issue.
+
+3. **PRs that touch /libs/partners/****:
+   - PRs involving integration packages.
+   - **Triage Guideline**: most PRs should either go straight to `In Review` or closed.
+   - The review may be conducted by our team or handed off to the partner's development team, depending on the PR's content.
+   - We maintain communication lines with most partner dev teams to facilitate this process.
+
+4. **Community PRs**:
+   - Most community PRs will get an initial status of "needs support".
+   - **Triage Guideline**: most PRs should go to `Needs support`. Bugfixes on high-traffic integrations should go straight to `In review`.
+   - **Triage Guideline**: all new features and integrations should go to `Needs support` and will be closed if they do not get enough support (measured by upvotes or comments).
+   - PRs in the `Needs Support` status for 20 days are marked as “stale” and will be closed after 30 days if no action is taken.
+
+5. **Documentation PRs**:
+   - PRs that touch the documentation content in docs/docs.
+   - **Triage Guideline**:
+      - PRs that fix typos or small errors in a single file and pass CI should go straight to `In Review`.
+      - PRs that make changes that have been discussed and agreed upon in an issue should go straight to `In Review`.
+      - PRs that add new pages or change the structure of the documentation should go to `Needs Support`.
+   - We strive to standardize documentation formats to streamline the review process.
+   - CI jobs run against documentation to ensure adherence to standards, automating much of the review.
+
+6. **PRs must be in English**:
+   - PRs that are not in English will be closed without review.
+   - This is to ensure that all maintainers can review the PRs effectively.
+
+## How to see a PR's status
+
+See screenshot:
+
+![PR Status](/img/review_process_status.png)
+
+*To see the status of all open PRs, please visit the [LangChain Project Board](https://github.com/orgs/langchain-ai/projects/12/views/2).*
+
+## Review Prioritization
+
+Our goal is to provide the best possible development experience by focusing on making software that:
+
+- Works: Works as intended (is bug-free).
+- Is useful: Improves LLM app development with components that work off-the-shelf and runtimes that simplify app building.
+- Is easy: Is intuitive to use and well-documented.
+
+We believe this process reflects our priorities and are open to feedback if you feel it does not.
+
+## Github Discussion
+
+We welcome your feedback on this process. Please feel free to add a comment in 
+[this GitHub Discussion](https://github.com/langchain-ai/langchain/discussions/25920).
--- a/docs/docs/how_to/HTML_header_metadata_splitter.ipynb
+++ b/docs/docs/how_to/HTML_header_metadata_splitter.ipynb
@@ -13,7 +13,7 @@
    "# How to split by HTML header \n",
    "## Description and motivation\n",
    "\n",
-    "[HTMLHeaderTextSplitter](https://python.langchain.com/v0.2/api_reference/text_splitters/html/langchain_text_splitters.html.HTMLHeaderTextSplitter.html) is a \"structure-aware\" chunker that splits text at the HTML element level and adds metadata for each header \"relevant\" to any given chunk. It can return chunks element by element or combine elements with the same metadata, with the objectives of (a) keeping related text grouped (more or less) semantically and (b) preserving context-rich information encoded in document structures. It can be used with other text splitters as part of a chunking pipeline.\n",
+    "[HTMLHeaderTextSplitter](https://python.langchain.com/api_reference/text_splitters/html/langchain_text_splitters.html.HTMLHeaderTextSplitter.html) is a \"structure-aware\" chunker that splits text at the HTML element level and adds metadata for each header \"relevant\" to any given chunk. It can return chunks element by element or combine elements with the same metadata, with the objectives of (a) keeping related text grouped (more or less) semantically and (b) preserving context-rich information encoded in document structures. It can be used with other text splitters as part of a chunking pipeline.\n",
    "\n",
    "It is analogous to the [MarkdownHeaderTextSplitter](/docs/how_to/markdown_header_metadata_splitter) for markdown files.\n",
    "\n",
--- a/docs/docs/how_to/MultiQueryRetriever.ipynb
+++ b/docs/docs/how_to/MultiQueryRetriever.ipynb
@@ -9,7 +9,7 @@
    "\n",
    "Distance-based vector database retrieval embeds (represents) queries in high-dimensional space and finds similar embedded documents based on a distance metric. But, retrieval may produce different results with subtle changes in query wording, or if the embeddings do not capture the semantics of the data well. Prompt engineering / tuning is sometimes done to manually address these problems, but can be tedious.\n",
    "\n",
-    "The [MultiQueryRetriever](https://python.langchain.com/v0.2/api_reference/langchain/retrievers/langchain.retrievers.multi_query.MultiQueryRetriever.html) automates the process of prompt tuning by using an LLM to generate multiple queries from different perspectives for a given user input query. For each query, it retrieves a set of relevant documents and takes the unique union across all queries to get a larger set of potentially relevant documents. By generating multiple perspectives on the same question, the `MultiQueryRetriever` can mitigate some of the limitations of the distance-based retrieval and get a richer set of results.\n",
+    "The [MultiQueryRetriever](https://python.langchain.com/api_reference/langchain/retrievers/langchain.retrievers.multi_query.MultiQueryRetriever.html) automates the process of prompt tuning by using an LLM to generate multiple queries from different perspectives for a given user input query. For each query, it retrieves a set of relevant documents and takes the unique union across all queries to get a larger set of potentially relevant documents. By generating multiple perspectives on the same question, the `MultiQueryRetriever` can mitigate some of the limitations of the distance-based retrieval and get a richer set of results.\n",
    "\n",
    "Let's build a vectorstore using the [LLM Powered Autonomous Agents](https://lilianweng.github.io/posts/2023-06-23-agent/) blog post by Lilian Weng from the [RAG tutorial](/docs/tutorials/rag):"
   ]
@@ -18,8 +18,23 @@
   "cell_type": "code",
   "execution_count": 1,
   "id": "994d6c74",
-   "metadata": {},
-   "outputs": [],
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:08:00.190093Z",
+     "iopub.status.busy": "2024-09-10T20:08:00.189665Z",
+     "iopub.status.idle": "2024-09-10T20:08:05.438015Z",
+     "shell.execute_reply": "2024-09-10T20:08:05.437685Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "USER_AGENT environment variable not set, consider setting it to identify your requests.\n"
+     ]
+    }
+   ],
   "source": [
    "# Build a sample vectorDB\n",
    "from langchain_chroma import Chroma\n",
@@ -54,7 +69,14 @@
   "cell_type": "code",
   "execution_count": 2,
   "id": "edbca101",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:08:05.439930Z",
+     "iopub.status.busy": "2024-09-10T20:08:05.439810Z",
+     "iopub.status.idle": "2024-09-10T20:08:05.553766Z",
+     "shell.execute_reply": "2024-09-10T20:08:05.553520Z"
+    }
+   },
   "outputs": [],
   "source": [
    "from langchain.retrievers.multi_query import MultiQueryRetriever\n",
@@ -71,7 +93,14 @@
   "cell_type": "code",
   "execution_count": 3,
   "id": "9e6d3b69",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:08:05.555359Z",
+     "iopub.status.busy": "2024-09-10T20:08:05.555262Z",
+     "iopub.status.idle": "2024-09-10T20:08:05.557046Z",
+     "shell.execute_reply": "2024-09-10T20:08:05.556825Z"
+    }
+   },
   "outputs": [],
   "source": [
    "# Set logging for the queries\n",
@@ -85,13 +114,20 @@
   "cell_type": "code",
   "execution_count": 4,
   "id": "bc93dc2b-9407-48b0-9f9a-338247e7eb69",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:08:05.558176Z",
+     "iopub.status.busy": "2024-09-10T20:08:05.558100Z",
+     "iopub.status.idle": "2024-09-10T20:08:07.250342Z",
+     "shell.execute_reply": "2024-09-10T20:08:07.249711Z"
+    }
+   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
-      "INFO:langchain.retrievers.multi_query:Generated queries: ['1. How can Task Decomposition be achieved through different methods?', '2. What strategies are commonly used for Task Decomposition?', '3. What are the various techniques for breaking down tasks in Task Decomposition?']\n"
+      "INFO:langchain.retrievers.multi_query:Generated queries: ['1. How can Task Decomposition be achieved through different methods?', '2. What strategies are commonly used for Task Decomposition?', '3. What are the various ways to break down tasks in Task Decomposition?']\n"
     ]
    },
    {
@@ -125,9 +161,9 @@
   "source": [
    "#### Supplying your own prompt\n",
    "\n",
-    "Under the hood, `MultiQueryRetriever` generates queries using a specific [prompt](https://python.langchain.com/v0.2/api_reference/langchain/retrievers/langchain.retrievers.multi_query.MultiQueryRetriever.html). To customize this prompt:\n",
+    "Under the hood, `MultiQueryRetriever` generates queries using a specific [prompt](https://python.langchain.com/api_reference/langchain/retrievers/langchain.retrievers.multi_query.MultiQueryRetriever.html). To customize this prompt:\n",
    "\n",
-    "1. Make a [PromptTemplate](https://python.langchain.com/v0.2/api_reference/core/prompts/langchain_core.prompts.prompt.PromptTemplate.html) with an input variable for the question;\n",
+    "1. Make a [PromptTemplate](https://python.langchain.com/api_reference/core/prompts/langchain_core.prompts.prompt.PromptTemplate.html) with an input variable for the question;\n",
    "2. Implement an [output parser](/docs/concepts#output-parsers) like the one below to split the result into a list of queries.\n",
    "\n",
    "The prompt and output parser together must support the generation of a list of queries."
@@ -137,14 +173,21 @@
   "cell_type": "code",
   "execution_count": 5,
   "id": "d9afb0ca",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:08:07.253875Z",
+     "iopub.status.busy": "2024-09-10T20:08:07.253600Z",
+     "iopub.status.idle": "2024-09-10T20:08:07.277848Z",
+     "shell.execute_reply": "2024-09-10T20:08:07.277487Z"
+    }
+   },
   "outputs": [],
   "source": [
    "from typing import List\n",
    "\n",
    "from langchain_core.output_parsers import BaseOutputParser\n",
    "from langchain_core.prompts import PromptTemplate\n",
-    "from langchain_core.pydantic_v1 import BaseModel, Field\n",
+    "from pydantic import BaseModel, Field\n",
    "\n",
    "\n",
    "# Output parser will split the LLM result into a list of queries\n",
@@ -180,13 +223,20 @@
   "cell_type": "code",
   "execution_count": 6,
   "id": "59c75c56-dbd7-4887-b9ba-0b5b21069f51",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:08:07.280001Z",
+     "iopub.status.busy": "2024-09-10T20:08:07.279861Z",
+     "iopub.status.idle": "2024-09-10T20:08:09.579525Z",
+     "shell.execute_reply": "2024-09-10T20:08:09.578837Z"
+    }
+   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
-      "INFO:langchain.retrievers.multi_query:Generated queries: ['1. Can you provide insights on regression from the course material?', '2. How is regression discussed in the course content?', '3. What information does the course offer about regression?', '4. In what way is regression covered in the course?', '5. What are the teachings of the course regarding regression?']\n"
+      "INFO:langchain.retrievers.multi_query:Generated queries: ['1. Can you provide insights on regression from the course material?', '2. How is regression discussed in the course content?', '3. What information does the course offer regarding regression?', '4. In what way is regression covered in the course?', \"5. What are the course's teachings on regression?\"]\n"
     ]
    },
    {
@@ -228,7 +278,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.4"
+   "version": "3.11.9"
  }
 },
 "nbformat": 4,
--- a/docs/docs/how_to/add_scores_retriever.ipynb
+++ b/docs/docs/how_to/add_scores_retriever.ipynb
@@ -7,7 +7,7 @@
   "source": [
    "# How to add scores to retriever results\n",
    "\n",
-    "Retrievers will return sequences of [Document](https://python.langchain.com/v0.2/api_reference/core/documents/langchain_core.documents.base.Document.html) objects, which by default include no information about the process that retrieved them (e.g., a similarity score against a query). Here we demonstrate how to add retrieval scores to the `.metadata` of documents:\n",
+    "Retrievers will return sequences of [Document](https://python.langchain.com/api_reference/core/documents/langchain_core.documents.base.Document.html) objects, which by default include no information about the process that retrieved them (e.g., a similarity score against a query). Here we demonstrate how to add retrieval scores to the `.metadata` of documents:\n",
    "1. From [vectorstore retrievers](/docs/how_to/vectorstore_retriever);\n",
    "2. From higher-order LangChain retrievers, such as [SelfQueryRetriever](/docs/how_to/self_query) or [MultiVectorRetriever](/docs/how_to/multi_vector).\n",
    "\n",
@@ -15,7 +15,7 @@
    "\n",
    "## Create vector store\n",
    "\n",
-    "First we populate a vector store with some data. We will use a [PineconeVectorStore](https://python.langchain.com/v0.2/api_reference/pinecone/vectorstores/langchain_pinecone.vectorstores.PineconeVectorStore.html), but this guide is compatible with any LangChain vector store that implements a `.similarity_search_with_score` method."
+    "First we populate a vector store with some data. We will use a [PineconeVectorStore](https://python.langchain.com/api_reference/pinecone/vectorstores/langchain_pinecone.vectorstores.PineconeVectorStore.html), but this guide is compatible with any LangChain vector store that implements a `.similarity_search_with_score` method."
   ]
  },
  {
@@ -206,7 +206,7 @@
    "    ) -> List[Document]:\n",
    "        \"\"\"Get docs, adding score information.\"\"\"\n",
    "        docs, scores = zip(\n",
-    "            *vectorstore.similarity_search_with_score(query, **search_kwargs)\n",
+    "            *self.vectorstore.similarity_search_with_score(query, **search_kwargs)\n",
    "        )\n",
    "        for doc, score in zip(docs, scores):\n",
    "            doc.metadata[\"score\"] = score\n",
@@ -263,7 +263,7 @@
    "\n",
    "To propagate similarity scores through this retriever, we can again subclass `MultiVectorRetriever` and override a method. This time we will override `_get_relevant_documents`.\n",
    "\n",
-    "First, we prepare some fake data. We generate fake \"whole documents\" and store them in a document store; here we will use a simple [InMemoryStore](https://python.langchain.com/v0.2/api_reference/core/stores/langchain_core.stores.InMemoryBaseStore.html)."
+    "First, we prepare some fake data. We generate fake \"whole documents\" and store them in a document store; here we will use a simple [InMemoryStore](https://python.langchain.com/api_reference/core/stores/langchain_core.stores.InMemoryBaseStore.html)."
   ]
  },
  {
--- a/docs/docs/how_to/agent_executor.ipynb
+++ b/docs/docs/how_to/agent_executor.ipynb
@@ -461,7 +461,7 @@
   "id": "f8014c9d",
   "metadata": {},
   "source": [
-    "Now, we can initalize the agent with the LLM, the prompt, and the tools. The agent is responsible for taking in input and deciding what actions to take. Crucially, the Agent does not execute those actions - that is done by the AgentExecutor (next step). For more information about how to think about these components, see our [conceptual guide](/docs/concepts/#agents).\n",
+    "Now, we can initialize the agent with the LLM, the prompt, and the tools. The agent is responsible for taking in input and deciding what actions to take. Crucially, the Agent does not execute those actions - that is done by the AgentExecutor (next step). For more information about how to think about these components, see our [conceptual guide](/docs/concepts/#agents).\n",
    "\n",
    "Note that we are passing in the `model`, not `model_with_tools`. That is because `create_tool_calling_agent` will call `.bind_tools` for us under the hood."
   ]
--- a/docs/docs/how_to/assign.ipynb
+++ b/docs/docs/how_to/assign.ipynb
@@ -27,7 +27,7 @@
    "\n",
    ":::\n",
    "\n",
-    "An alternate way of [passing data through](/docs/how_to/passthrough) steps of a chain is to leave the current values of the chain state unchanged while assigning a new value under a given key. The [`RunnablePassthrough.assign()`](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.passthrough.RunnablePassthrough.html#langchain_core.runnables.passthrough.RunnablePassthrough.assign) static method takes an input value and adds the extra arguments passed to the assign function.\n",
+    "An alternate way of [passing data through](/docs/how_to/passthrough) steps of a chain is to leave the current values of the chain state unchanged while assigning a new value under a given key. The [`RunnablePassthrough.assign()`](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.passthrough.RunnablePassthrough.html#langchain_core.runnables.passthrough.RunnablePassthrough.assign) static method takes an input value and adds the extra arguments passed to the assign function.\n",
    "\n",
    "This is useful in the common [LangChain Expression Language](/docs/concepts/#langchain-expression-language) pattern of additively creating a dictionary to use as input to a later step.\n",
    "\n",
@@ -45,7 +45,8 @@
    "import os\n",
    "from getpass import getpass\n",
    "\n",
-    "os.environ[\"OPENAI_API_KEY\"] = getpass()"
+    "if \"OPENAI_API_KEY\" not in os.environ:\n",
+    "    os.environ[\"OPENAI_API_KEY\"] = getpass()"
   ]
  },
  {
--- a/docs/docs/how_to/binding.ipynb
+++ b/docs/docs/how_to/binding.ipynb
@@ -27,7 +27,7 @@
    "\n",
    ":::\n",
    "\n",
-    "Sometimes we want to invoke a [`Runnable`](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html) within a [RunnableSequence](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.RunnableSequence.html) with constant arguments that are not part of the output of the preceding Runnable in the sequence, and which are not part of the user input. We can use the [`Runnable.bind()`](https://python.langchain.com/v0.2/api_reference/langchain_core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.bind) method to set these arguments ahead of time.\n",
+    "Sometimes we want to invoke a [`Runnable`](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html) within a [RunnableSequence](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.base.RunnableSequence.html) with constant arguments that are not part of the output of the preceding Runnable in the sequence, and which are not part of the user input. We can use the [`Runnable.bind()`](https://python.langchain.com/api_reference/langchain_core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.bind) method to set these arguments ahead of time.\n",
    "\n",
    "## Binding stop sequences\n",
    "\n",
@@ -49,7 +49,8 @@
    "import os\n",
    "from getpass import getpass\n",
    "\n",
-    "os.environ[\"OPENAI_API_KEY\"] = getpass()"
+    "if \"OPENAI_API_KEY\" not in os.environ:\n",
+    "    os.environ[\"OPENAI_API_KEY\"] = getpass()"
   ]
  },
  {
@@ -183,7 +184,7 @@
    {
     "data": {
      "text/plain": [
-       "AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_z0OU2CytqENVrRTI6T8DkI3u', 'function': {'arguments': '{\"location\": \"San Francisco, CA\", \"unit\": \"celsius\"}', 'name': 'get_current_weather'}, 'type': 'function'}, {'id': 'call_ft96IJBh0cMKkQWrZjNg4bsw', 'function': {'arguments': '{\"location\": \"New York, NY\", \"unit\": \"celsius\"}', 'name': 'get_current_weather'}, 'type': 'function'}, {'id': 'call_tfbtGgCLmuBuWgZLvpPwvUMH', 'function': {'arguments': '{\"location\": \"Los Angeles, CA\", \"unit\": \"celsius\"}', 'name': 'get_current_weather'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 84, 'prompt_tokens': 85, 'total_tokens': 169}, 'model_name': 'gpt-3.5-turbo-1106', 'system_fingerprint': 'fp_77a673219d', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-d57ad5fa-b52a-4822-bc3e-74f838697e18-0', tool_calls=[{'name': 'get_current_weather', 'args': {'location': 'San Francisco, CA', 'unit': 'celsius'}, 'id': 'call_z0OU2CytqENVrRTI6T8DkI3u'}, {'name': 'get_current_weather', 'args': {'location': 'New York, NY', 'unit': 'celsius'}, 'id': 'call_ft96IJBh0cMKkQWrZjNg4bsw'}, {'name': 'get_current_weather', 'args': {'location': 'Los Angeles, CA', 'unit': 'celsius'}, 'id': 'call_tfbtGgCLmuBuWgZLvpPwvUMH'}])"
+       "AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_z0OU2CytqENVrRTI6T8DkI3u', 'function': {'arguments': '{\"location\": \"San Francisco, CA\", \"unit\": \"celsius\"}', 'name': 'get_current_weather'}, 'type': 'function'}, {'id': 'call_ft96IJBh0cMKkQWrZjNg4bsw', 'function': {'arguments': '{\"location\": \"New York, NY\", \"unit\": \"celsius\"}', 'name': 'get_current_weather'}, 'type': 'function'}, {'id': 'call_tfbtGgCLmuBuWgZLvpPwvUMH', 'function': {'arguments': '{\"location\": \"Los Angeles, CA\", \"unit\": \"celsius\"}', 'name': 'get_current_weather'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 84, 'prompt_tokens': 85, 'total_tokens': 169}, 'model_name': 'gpt-4o-mini', 'system_fingerprint': 'fp_77a673219d', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-d57ad5fa-b52a-4822-bc3e-74f838697e18-0', tool_calls=[{'name': 'get_current_weather', 'args': {'location': 'San Francisco, CA', 'unit': 'celsius'}, 'id': 'call_z0OU2CytqENVrRTI6T8DkI3u'}, {'name': 'get_current_weather', 'args': {'location': 'New York, NY', 'unit': 'celsius'}, 'id': 'call_ft96IJBh0cMKkQWrZjNg4bsw'}, {'name': 'get_current_weather', 'args': {'location': 'Los Angeles, CA', 'unit': 'celsius'}, 'id': 'call_tfbtGgCLmuBuWgZLvpPwvUMH'}])"
      ]
     },
     "execution_count": 5,
@@ -192,7 +193,7 @@
    }
   ],
   "source": [
-    "model = ChatOpenAI(model=\"gpt-3.5-turbo-1106\").bind(tools=tools)\n",
+    "model = ChatOpenAI(model=\"gpt-4o-mini\").bind(tools=tools)\n",
    "model.invoke(\"What's the weather in SF, NYC and LA?\")"
   ]
  },
--- a/docs/docs/how_to/callbacks_async.ipynb
+++ b/docs/docs/how_to/callbacks_async.ipynb
@@ -14,7 +14,7 @@
    "- [Custom callback handlers](/docs/how_to/custom_callbacks)\n",
    ":::\n",
    "\n",
-    "If you are planning to use the async APIs, it is recommended to use and extend [`AsyncCallbackHandler`](https://python.langchain.com/v0.2/api_reference/core/callbacks/langchain_core.callbacks.base.AsyncCallbackHandler.html) to avoid blocking the event.\n",
+    "If you are planning to use the async APIs, it is recommended to use and extend [`AsyncCallbackHandler`](https://python.langchain.com/api_reference/core/callbacks/langchain_core.callbacks.base.AsyncCallbackHandler.html) to avoid blocking the event.\n",
    "\n",
    "\n",
    ":::{.callout-warning}\n",
--- a/docs/docs/how_to/callbacks_attach.ipynb
+++ b/docs/docs/how_to/callbacks_attach.ipynb
@@ -17,7 +17,7 @@
    "\n",
    ":::\n",
    "\n",
-    "If you are composing a chain of runnables and want to reuse callbacks across multiple executions, you can attach callbacks with the [`.with_config()`](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.with_config) method. This saves you the need to pass callbacks in each time you invoke the chain.\n",
+    "If you are composing a chain of runnables and want to reuse callbacks across multiple executions, you can attach callbacks with the [`.with_config()`](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.with_config) method. This saves you the need to pass callbacks in each time you invoke the chain.\n",
    "\n",
    ":::{.callout-important}\n",
    "\n",
--- a/docs/docs/how_to/callbacks_runtime.ipynb
+++ b/docs/docs/how_to/callbacks_runtime.ipynb
@@ -15,7 +15,7 @@
    "\n",
    ":::\n",
    "\n",
-    "In many cases, it is advantageous to pass in handlers instead when running the object. When we pass through [`CallbackHandlers`](https://python.langchain.com/v0.2/api_reference/core/callbacks/langchain_core.callbacks.base.BaseCallbackHandler.html#langchain-core-callbacks-base-basecallbackhandler) using the `callbacks` keyword arg when executing an run, those callbacks will be issued by all nested objects involved in the execution. For example, when a handler is passed through to an Agent, it will be used for all callbacks related to the agent and all the objects involved in the agent's execution, in this case, the Tools and LLM.\n",
+    "In many cases, it is advantageous to pass in handlers instead when running the object. When we pass through [`CallbackHandlers`](https://python.langchain.com/api_reference/core/callbacks/langchain_core.callbacks.base.BaseCallbackHandler.html#langchain-core-callbacks-base-basecallbackhandler) using the `callbacks` keyword arg when executing an run, those callbacks will be issued by all nested objects involved in the execution. For example, when a handler is passed through to an Agent, it will be used for all callbacks related to the agent and all the objects involved in the agent's execution, in this case, the Tools and LLM.\n",
    "\n",
    "This prevents us from having to manually attach the handlers to each individual nested object. Here's an example:"
   ]
--- a/docs/docs/how_to/character_text_splitter.ipynb
+++ b/docs/docs/how_to/character_text_splitter.ipynb
@@ -28,7 +28,7 @@
    "\n",
    "To obtain the string content directly, use `.split_text`.\n",
    "\n",
-    "To create LangChain [Document](https://python.langchain.com/v0.2/api_reference/core/documents/langchain_core.documents.base.Document.html) objects (e.g., for use in downstream tasks), use `.create_documents`."
+    "To create LangChain [Document](https://python.langchain.com/api_reference/core/documents/langchain_core.documents.base.Document.html) objects (e.g., for use in downstream tasks), use `.create_documents`."
   ]
  },
  {
--- a/docs/docs/how_to/chat_model_caching.ipynb
+++ b/docs/docs/how_to/chat_model_caching.ipynb
@@ -50,7 +50,8 @@
    "\n",
    "from langchain_openai import ChatOpenAI\n",
    "\n",
-    "os.environ[\"OPENAI_API_KEY\"] = getpass()\n",
+    "if \"OPENAI_API_KEY\" not in os.environ:\n",
+    "    os.environ[\"OPENAI_API_KEY\"] = getpass()\n",
    "\n",
    "llm = ChatOpenAI()"
   ]
--- a/docs/docs/how_to/chat_models_universal_init.ipynb
+++ b/docs/docs/how_to/chat_models_universal_init.ipynb
@@ -11,16 +11,10 @@
    "\n",
    ":::tip Supported models\n",
    "\n",
-    "See the [init_chat_model()](https://python.langchain.com/v0.2/api_reference/langchain/chat_models/langchain.chat_models.base.init_chat_model.html) API reference for a full list of supported integrations.\n",
+    "See the [init_chat_model()](https://python.langchain.com/api_reference/langchain/chat_models/langchain.chat_models.base.init_chat_model.html) API reference for a full list of supported integrations.\n",
    "\n",
    "Make sure you have the integration packages installed for any model providers you want to support. E.g. you should have `langchain-openai` installed to init an OpenAI model.\n",
    "\n",
-    ":::\n",
-    "\n",
-    ":::info Requires ``langchain >= 0.2.8``\n",
-    "\n",
-    "This functionality was added in ``langchain-core == 0.2.8``. Please make sure your package is up to date.\n",
-    "\n",
    ":::"
   ]
  },
@@ -44,19 +38,48 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 2,
   "id": "79e14913-803c-4382-9009-5c6af3d75d35",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:22:33.015729Z",
+     "iopub.status.busy": "2024-09-10T20:22:33.015241Z",
+     "iopub.status.idle": "2024-09-10T20:22:39.391716Z",
+     "shell.execute_reply": "2024-09-10T20:22:39.390438Z"
+    }
+   },
   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/var/folders/4j/2rz3865x6qg07tx43146py8h0000gn/T/ipykernel_95293/571506279.py:4: LangChainBetaWarning: The function `init_chat_model` is in beta. It is actively being worked on, so the API may change.\n",
+      "  gpt_4o = init_chat_model(\"gpt-4o\", model_provider=\"openai\", temperature=0)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "GPT-4o: I'm an AI created by OpenAI, and I don't have a personal name. How can I assist you today?\n",
+      "\n"
+     ]
+    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "GPT-4o: I'm an AI created by OpenAI, and I don't have a personal name. You can call me Assistant! How can I help you today?\n",
-      "\n",
      "Claude Opus: My name is Claude. It's nice to meet you!\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Gemini 1.5: I am a large language model, trained by Google. \n",
      "\n",
-      "Gemini 1.5: I am a large language model, trained by Google. I do not have a name. \n",
+      "I don't have a name like a person does. You can call me Bard if you like! 😊 \n",
      "\n",
      "\n"
     ]
@@ -89,14 +112,21 @@
   "source": [
    "## Inferring model provider\n",
    "\n",
-    "For common and distinct model names `init_chat_model()` will attempt to infer the model provider. See the [API reference](https://python.langchain.com/v0.2/api_reference/langchain/chat_models/langchain.chat_models.base.init_chat_model.html) for a full list of inference behavior. E.g. any model that starts with `gpt-3...` or `gpt-4...` will be inferred as using model provider `openai`."
+    "For common and distinct model names `init_chat_model()` will attempt to infer the model provider. See the [API reference](https://python.langchain.com/api_reference/langchain/chat_models/langchain.chat_models.base.init_chat_model.html) for a full list of inference behavior. E.g. any model that starts with `gpt-3...` or `gpt-4...` will be inferred as using model provider `openai`."
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
   "id": "0378ccc6-95bc-4d50-be50-fccc193f0a71",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:22:39.396908Z",
+     "iopub.status.busy": "2024-09-10T20:22:39.396563Z",
+     "iopub.status.idle": "2024-09-10T20:22:39.444959Z",
+     "shell.execute_reply": "2024-09-10T20:22:39.444646Z"
+    }
+   },
   "outputs": [],
   "source": [
    "gpt_4o = init_chat_model(\"gpt-4o\", temperature=0)\n",
@@ -116,17 +146,24 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 4,
   "id": "6c037f27-12d7-4e83-811e-4245c0e3ba58",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:22:39.446901Z",
+     "iopub.status.busy": "2024-09-10T20:22:39.446773Z",
+     "iopub.status.idle": "2024-09-10T20:22:40.301906Z",
+     "shell.execute_reply": "2024-09-10T20:22:40.300918Z"
+    }
+   },
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "AIMessage(content=\"I'm an AI language model created by OpenAI, and I don't have a personal name. You can call me Assistant or any other name you prefer! How can I assist you today?\", response_metadata={'token_usage': {'completion_tokens': 37, 'prompt_tokens': 11, 'total_tokens': 48}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_d576307f90', 'finish_reason': 'stop', 'logprobs': None}, id='run-5428ab5c-b5c0-46de-9946-5d4ca40dbdc8-0', usage_metadata={'input_tokens': 11, 'output_tokens': 37, 'total_tokens': 48})"
+       "AIMessage(content=\"I'm an AI created by OpenAI, and I don't have a personal name. How can I assist you today?\", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 23, 'prompt_tokens': 11, 'total_tokens': 34}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_25624ae3a5', 'finish_reason': 'stop', 'logprobs': None}, id='run-b41df187-4627-490d-af3c-1c96282d3eb0-0', usage_metadata={'input_tokens': 11, 'output_tokens': 23, 'total_tokens': 34})"
      ]
     },
-     "execution_count": 5,
+     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -141,17 +178,24 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 5,
   "id": "321e3036-abd2-4e1f-bcc6-606efd036954",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:22:40.316030Z",
+     "iopub.status.busy": "2024-09-10T20:22:40.315628Z",
+     "iopub.status.idle": "2024-09-10T20:22:41.199134Z",
+     "shell.execute_reply": "2024-09-10T20:22:41.198173Z"
+    }
+   },
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "AIMessage(content=\"My name is Claude. It's nice to meet you!\", response_metadata={'id': 'msg_012XvotUJ3kGLXJUWKBVxJUi', 'model': 'claude-3-5-sonnet-20240620', 'stop_reason': 'end_turn', 'stop_sequence': None, 'usage': {'input_tokens': 11, 'output_tokens': 15}}, id='run-1ad1eefe-f1c6-4244-8bc6-90e2cb7ee554-0', usage_metadata={'input_tokens': 11, 'output_tokens': 15, 'total_tokens': 26})"
+       "AIMessage(content=\"My name is Claude. It's nice to meet you!\", additional_kwargs={}, response_metadata={'id': 'msg_01Fx9P74A7syoFkwE73CdMMY', 'model': 'claude-3-5-sonnet-20240620', 'stop_reason': 'end_turn', 'stop_sequence': None, 'usage': {'input_tokens': 11, 'output_tokens': 15}}, id='run-a0fd2bbd-3b7e-46bf-8d69-a48c7e60b03c-0', usage_metadata={'input_tokens': 11, 'output_tokens': 15, 'total_tokens': 26})"
      ]
     },
-     "execution_count": 6,
+     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -174,17 +218,24 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 6,
   "id": "814a2289-d0db-401e-b555-d5116112b413",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:22:41.203346Z",
+     "iopub.status.busy": "2024-09-10T20:22:41.203004Z",
+     "iopub.status.idle": "2024-09-10T20:22:41.891450Z",
+     "shell.execute_reply": "2024-09-10T20:22:41.890539Z"
+    }
+   },
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "AIMessage(content=\"I'm an AI language model created by OpenAI, and I don't have a personal name. You can call me Assistant or any other name you prefer! How can I assist you today?\", response_metadata={'token_usage': {'completion_tokens': 37, 'prompt_tokens': 11, 'total_tokens': 48}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_ce0793330f', 'finish_reason': 'stop', 'logprobs': None}, id='run-3923e328-7715-4cd6-b215-98e4b6bf7c9d-0', usage_metadata={'input_tokens': 11, 'output_tokens': 37, 'total_tokens': 48})"
+       "AIMessage(content=\"I'm an AI created by OpenAI, and I don't have a personal name. How can I assist you today?\", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 23, 'prompt_tokens': 11, 'total_tokens': 34}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_25624ae3a5', 'finish_reason': 'stop', 'logprobs': None}, id='run-3380f977-4b89-4f44-bc02-b64043b3166f-0', usage_metadata={'input_tokens': 11, 'output_tokens': 23, 'total_tokens': 34})"
      ]
     },
-     "execution_count": 9,
+     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -202,17 +253,24 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 7,
   "id": "6c8755ba-c001-4f5a-a497-be3f1db83244",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:22:41.896413Z",
+     "iopub.status.busy": "2024-09-10T20:22:41.895967Z",
+     "iopub.status.idle": "2024-09-10T20:22:42.767565Z",
+     "shell.execute_reply": "2024-09-10T20:22:42.766619Z"
+    }
+   },
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "AIMessage(content=\"My name is Claude. It's nice to meet you!\", response_metadata={'id': 'msg_01RyYR64DoMPNCfHeNnroMXm', 'model': 'claude-3-5-sonnet-20240620', 'stop_reason': 'end_turn', 'stop_sequence': None, 'usage': {'input_tokens': 11, 'output_tokens': 15}}, id='run-22446159-3723-43e6-88df-b84797e7751d-0', usage_metadata={'input_tokens': 11, 'output_tokens': 15, 'total_tokens': 26})"
+       "AIMessage(content=\"My name is Claude. It's nice to meet you!\", additional_kwargs={}, response_metadata={'id': 'msg_01EFKSWpmsn2PSYPQa4cNHWb', 'model': 'claude-3-5-sonnet-20240620', 'stop_reason': 'end_turn', 'stop_sequence': None, 'usage': {'input_tokens': 11, 'output_tokens': 15}}, id='run-3c58f47c-41b9-4e56-92e7-fb9602e3787c-0', usage_metadata={'input_tokens': 11, 'output_tokens': 15, 'total_tokens': 26})"
      ]
     },
-     "execution_count": 10,
+     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -242,28 +300,37 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
   "id": "067dabee-1050-4110-ae24-c48eba01e13b",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:22:42.771941Z",
+     "iopub.status.busy": "2024-09-10T20:22:42.771606Z",
+     "iopub.status.idle": "2024-09-10T20:22:43.909206Z",
+     "shell.execute_reply": "2024-09-10T20:22:43.908496Z"
+    }
+   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'name': 'GetPopulation',\n",
       "  'args': {'location': 'Los Angeles, CA'},\n",
-       "  'id': 'call_sYT3PFMufHGWJD32Hi2CTNUP'},\n",
+       "  'id': 'call_Ga9m8FAArIyEjItHmztPYA22',\n",
+       "  'type': 'tool_call'},\n",
       " {'name': 'GetPopulation',\n",
       "  'args': {'location': 'New York, NY'},\n",
-       "  'id': 'call_j1qjhxRnD3ffQmRyqjlI1Lnk'}]"
+       "  'id': 'call_jh2dEvBaAHRaw5JUDthOs7rt',\n",
+       "  'type': 'tool_call'}]"
      ]
     },
-     "execution_count": 7,
+     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
-    "from langchain_core.pydantic_v1 import BaseModel, Field\n",
+    "from pydantic import BaseModel, Field\n",
    "\n",
    "\n",
    "class GetWeather(BaseModel):\n",
@@ -288,22 +355,31 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 9,
   "id": "e57dfe9f-cd24-4e37-9ce9-ccf8daf78f89",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:22:43.912746Z",
+     "iopub.status.busy": "2024-09-10T20:22:43.912447Z",
+     "iopub.status.idle": "2024-09-10T20:22:46.437049Z",
+     "shell.execute_reply": "2024-09-10T20:22:46.436093Z"
+    }
+   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'name': 'GetPopulation',\n",
       "  'args': {'location': 'Los Angeles, CA'},\n",
-       "  'id': 'toolu_01CxEHxKtVbLBrvzFS7GQ5xR'},\n",
+       "  'id': 'toolu_01JMufPf4F4t2zLj7miFeqXp',\n",
+       "  'type': 'tool_call'},\n",
       " {'name': 'GetPopulation',\n",
       "  'args': {'location': 'New York City, NY'},\n",
-       "  'id': 'toolu_013A79qt5toWSsKunFBDZd5S'}]"
+       "  'id': 'toolu_01RQBHcE8kEEbYTuuS8WqY1u',\n",
+       "  'type': 'tool_call'}]"
      ]
     },
-     "execution_count": 8,
+     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
--- a/docs/docs/how_to/chat_streaming.ipynb
+++ b/docs/docs/how_to/chat_streaming.ipynb
@@ -18,7 +18,7 @@
    "# How to stream chat model responses\n",
    "\n",
    "\n",
-    "All [chat models](https://python.langchain.com/v0.2/api_reference/core/language_models/langchain_core.language_models.chat_models.BaseChatModel.html) implement the [Runnable interface](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable), which comes with a **default** implementations of standard runnable methods (i.e. `ainvoke`, `batch`, `abatch`, `stream`, `astream`, `astream_events`).\n",
+    "All [chat models](https://python.langchain.com/api_reference/core/language_models/langchain_core.language_models.chat_models.BaseChatModel.html) implement the [Runnable interface](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable), which comes with a **default** implementations of standard runnable methods (i.e. `ainvoke`, `batch`, `abatch`, `stream`, `astream`, `astream_events`).\n",
    "\n",
    "The **default** streaming implementation provides an`Iterator` (or `AsyncIterator` for asynchronous streaming) that yields a single value: the final output from the underlying chat model provider.\n",
    "\n",
@@ -120,7 +120,7 @@
   "source": [
    "## Astream events\n",
    "\n",
-    "Chat models also support the standard [astream events](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.astream_events) method.\n",
+    "Chat models also support the standard [astream events](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.astream_events) method.\n",
    "\n",
    "This method is useful if you're streaming output from a larger LLM application that contains multiple steps (e.g., an LLM chain composed of a prompt, llm and parser)."
   ]
--- a/docs/docs/how_to/chat_token_usage_tracking.ipynb
+++ b/docs/docs/how_to/chat_token_usage_tracking.ipynb
@@ -42,7 +42,7 @@
    "\n",
    "A number of model providers return token usage information as part of the chat generation response. When available, this information will be included on the `AIMessage` objects produced by the corresponding model.\n",
    "\n",
-    "LangChain `AIMessage` objects include a [usage_metadata](https://python.langchain.com/v0.2/api_reference/core/messages/langchain_core.messages.ai.AIMessage.html#langchain_core.messages.ai.AIMessage.usage_metadata) attribute. When populated, this attribute will be a [UsageMetadata](https://python.langchain.com/v0.2/api_reference/core/messages/langchain_core.messages.ai.UsageMetadata.html) dictionary with standard keys (e.g., `\"input_tokens\"` and `\"output_tokens\"`).\n",
+    "LangChain `AIMessage` objects include a [usage_metadata](https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.AIMessage.html#langchain_core.messages.ai.AIMessage.usage_metadata) attribute. When populated, this attribute will be a [UsageMetadata](https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.UsageMetadata.html) dictionary with standard keys (e.g., `\"input_tokens\"` and `\"output_tokens\"`).\n",
    "\n",
    "Examples:\n",
    "\n",
@@ -71,7 +71,7 @@
    "\n",
    "from langchain_openai import ChatOpenAI\n",
    "\n",
-    "llm = ChatOpenAI(model=\"gpt-3.5-turbo-0125\")\n",
+    "llm = ChatOpenAI(model=\"gpt-4o-mini\")\n",
    "openai_response = llm.invoke(\"hello\")\n",
    "openai_response.usage_metadata"
   ]
@@ -118,7 +118,7 @@
   "source": [
    "### Using AIMessage.response_metadata\n",
    "\n",
-    "Metadata from the model response is also included in the AIMessage [response_metadata](https://python.langchain.com/v0.2/api_reference/core/messages/langchain_core.messages.ai.AIMessage.html#langchain_core.messages.ai.AIMessage.response_metadata) attribute. These data are typically not standardized. Note that different providers adopt different conventions for representing token counts:"
+    "Metadata from the model response is also included in the AIMessage [response_metadata](https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.AIMessage.html#langchain_core.messages.ai.AIMessage.response_metadata) attribute. These data are typically not standardized. Note that different providers adopt different conventions for representing token counts:"
   ]
  },
  {
@@ -153,7 +153,7 @@
    "\n",
    "#### OpenAI\n",
    "\n",
-    "For example, OpenAI will return a message [chunk](https://python.langchain.com/v0.2/api_reference/core/messages/langchain_core.messages.ai.AIMessageChunk.html) at the end of a stream with token usage information. This behavior is supported by `langchain-openai >= 0.1.9` and can be enabled by setting `stream_usage=True`. This attribute can also be set when `ChatOpenAI` is instantiated.\n",
+    "For example, OpenAI will return a message [chunk](https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.AIMessageChunk.html) at the end of a stream with token usage information. This behavior is supported by `langchain-openai >= 0.1.9` and can be enabled by setting `stream_usage=True`. This attribute can also be set when `ChatOpenAI` is instantiated.\n",
    "\n",
    "```{=mdx}\n",
    ":::note\n",
@@ -182,13 +182,13 @@
      "content=' you' id='run-adb20c31-60c7-43a2-99b2-d4a53ca5f623'\n",
      "content=' today' id='run-adb20c31-60c7-43a2-99b2-d4a53ca5f623'\n",
      "content='?' id='run-adb20c31-60c7-43a2-99b2-d4a53ca5f623'\n",
-      "content='' response_metadata={'finish_reason': 'stop', 'model_name': 'gpt-3.5-turbo-0125'} id='run-adb20c31-60c7-43a2-99b2-d4a53ca5f623'\n",
+      "content='' response_metadata={'finish_reason': 'stop', 'model_name': 'gpt-4o-mini'} id='run-adb20c31-60c7-43a2-99b2-d4a53ca5f623'\n",
      "content='' id='run-adb20c31-60c7-43a2-99b2-d4a53ca5f623' usage_metadata={'input_tokens': 8, 'output_tokens': 9, 'total_tokens': 17}\n"
     ]
    }
   ],
   "source": [
-    "llm = ChatOpenAI(model=\"gpt-3.5-turbo-0125\")\n",
+    "llm = ChatOpenAI(model=\"gpt-4o-mini\")\n",
    "\n",
    "aggregate = None\n",
    "for chunk in llm.stream(\"hello\", stream_usage=True):\n",
@@ -252,7 +252,7 @@
      "content=' you' id='run-8e758550-94b0-4cca-a298-57482793c25d'\n",
      "content=' today' id='run-8e758550-94b0-4cca-a298-57482793c25d'\n",
      "content='?' id='run-8e758550-94b0-4cca-a298-57482793c25d'\n",
-      "content='' response_metadata={'finish_reason': 'stop', 'model_name': 'gpt-3.5-turbo-0125'} id='run-8e758550-94b0-4cca-a298-57482793c25d'\n"
+      "content='' response_metadata={'finish_reason': 'stop', 'model_name': 'gpt-4o-mini'} id='run-8e758550-94b0-4cca-a298-57482793c25d'\n"
     ]
    }
   ],
@@ -289,7 +289,7 @@
    }
   ],
   "source": [
-    "from langchain_core.pydantic_v1 import BaseModel, Field\n",
+    "from pydantic import BaseModel, Field\n",
    "\n",
    "\n",
    "class Joke(BaseModel):\n",
@@ -300,7 +300,7 @@
    "\n",
    "\n",
    "llm = ChatOpenAI(\n",
-    "    model=\"gpt-3.5-turbo-0125\",\n",
+    "    model=\"gpt-4o-mini\",\n",
    "    stream_usage=True,\n",
    ")\n",
    "# Under the hood, .with_structured_output binds tools to the\n",
@@ -362,7 +362,7 @@
    "from langchain_community.callbacks.manager import get_openai_callback\n",
    "\n",
    "llm = ChatOpenAI(\n",
-    "    model=\"gpt-3.5-turbo-0125\",\n",
+    "    model=\"gpt-4o-mini\",\n",
    "    temperature=0,\n",
    "    stream_usage=True,\n",
    ")\n",
--- a/docs/docs/how_to/chatbots_memory.ipynb
+++ b/docs/docs/how_to/chatbots_memory.ipynb
@@ -23,6 +23,14 @@
    "\n",
    "We'll go into more detail on a few techniques below!\n",
    "\n",
+    ":::{.callout-note}\n",
+    "\n",
+    "This how-to guide previously built a chatbot using [RunnableWithMessageHistory](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.history.RunnableWithMessageHistory.html). You can access this version of the tutorial in the [v0.2 docs](https://python.langchain.com/v0.2/docs/how_to/chatbots_memory/).\n",
+    "\n",
+    "The LangGraph implementation offers a number of advantages over `RunnableWithMessageHistory`, including the ability to persist arbitrary components of an application's state (instead of only messages).\n",
+    "\n",
+    ":::\n",
+    "\n",
    "## Setup\n",
    "\n",
    "You'll need to install a few packages, and have your OpenAI API key set as an environment variable named `OPENAI_API_KEY`:"
@@ -33,15 +41,6 @@
   "execution_count": 1,
   "metadata": {},
   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[33mWARNING: You are using pip version 22.0.4; however, version 23.3.2 is available.\n",
-      "You should consider upgrading via the '/Users/jacoblee/.pyenv/versions/3.10.5/bin/python -m pip install --upgrade pip' command.\u001b[0m\u001b[33m\n",
-      "\u001b[0mNote: you may need to restart the kernel to use updated packages.\n"
-     ]
-    },
    {
     "data": {
      "text/plain": [
@@ -54,12 +53,13 @@
    }
   ],
   "source": [
-    "%pip install --upgrade --quiet langchain langchain-openai\n",
+    "%pip install --upgrade --quiet langchain langchain-openai langgraph\n",
    "\n",
-    "# Set env var OPENAI_API_KEY or load from a .env file:\n",
-    "import dotenv\n",
+    "import getpass\n",
+    "import os\n",
    "\n",
-    "dotenv.load_dotenv()"
+    "if not os.environ.get(\"OPENAI_API_KEY\"):\n",
+    "    os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")"
   ]
  },
  {
@@ -71,13 +71,13 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain_openai import ChatOpenAI\n",
    "\n",
-    "chat = ChatOpenAI(model=\"gpt-3.5-turbo-0125\")"
+    "model = ChatOpenAI(model=\"gpt-4o-mini\")"
   ]
  },
  {
@@ -98,34 +98,33 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "I said \"J'adore la programmation,\" which means \"I love programming\" in French.\n"
+      "I translated the sentence \"I love programming\" into French, which is \"J'adore la programmation.\"\n"
     ]
    }
   ],
   "source": [
-    "from langchain_core.prompts import ChatPromptTemplate\n",
+    "from langchain_core.messages import AIMessage, HumanMessage, SystemMessage\n",
+    "from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder\n",
    "\n",
    "prompt = ChatPromptTemplate.from_messages(\n",
    "    [\n",
-    "        (\n",
-    "            \"system\",\n",
-    "            \"You are a helpful assistant. Answer all questions to the best of your ability.\",\n",
+    "        SystemMessage(\n",
+    "            content=\"You are a helpful assistant. Answer all questions to the best of your ability.\"\n",
    "        ),\n",
-    "        (\"placeholder\", \"{messages}\"),\n",
+    "        MessagesPlaceholder(variable_name=\"messages\"),\n",
    "    ]\n",
    ")\n",
    "\n",
-    "chain = prompt | chat\n",
+    "chain = prompt | model\n",
    "\n",
    "ai_msg = chain.invoke(\n",
    "    {\n",
    "        \"messages\": [\n",
-    "            (\n",
-    "                \"human\",\n",
-    "                \"Translate this sentence from English to French: I love programming.\",\n",
+    "            HumanMessage(\n",
+    "                content=\"Translate this sentence from English to French: I love programming.\"\n",
    "            ),\n",
-    "            (\"ai\", \"J'adore la programmation.\"),\n",
-    "            (\"human\", \"What did you just say?\"),\n",
+    "            AIMessage(content=\"J'adore la programmation.\"),\n",
+    "            HumanMessage(content=\"What did you just say?\"),\n",
    "        ],\n",
    "    }\n",
    ")\n",
@@ -136,51 +135,57 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "We can see that by passing the previous conversation into a chain, it can use it as context to answer questions. This is the basic concept underpinning chatbot memory - the rest of the guide will demonstrate convenient techniques for passing or reformatting messages.\n",
-    "\n",
-    "## Chat history\n",
-    "\n",
-    "It's perfectly fine to store and pass messages directly as an array, but we can use LangChain's built-in [message history class](https://python.langchain.com/v0.2/api_reference/langchain/index.html#module-langchain.memory) to store and load messages as well. Instances of this class are responsible for storing and loading chat messages from persistent storage. LangChain integrates with many providers - you can see a [list of integrations here](/docs/integrations/memory) - but for this demo we will use an ephemeral demo class.\n",
-    "\n",
-    "Here's an example of the API:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[HumanMessage(content='Translate this sentence from English to French: I love programming.'),\n",
-       " AIMessage(content=\"J'adore la programmation.\")]"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from langchain_community.chat_message_histories import ChatMessageHistory\n",
-    "\n",
-    "demo_ephemeral_chat_history = ChatMessageHistory()\n",
-    "\n",
-    "demo_ephemeral_chat_history.add_user_message(\n",
-    "    \"Translate this sentence from English to French: I love programming.\"\n",
-    ")\n",
-    "\n",
-    "demo_ephemeral_chat_history.add_ai_message(\"J'adore la programmation.\")\n",
-    "\n",
-    "demo_ephemeral_chat_history.messages"
+    "We can see that by passing the previous conversation into a chain, it can use it as context to answer questions. This is the basic concept underpinning chatbot memory - the rest of the guide will demonstrate convenient techniques for passing or reformatting messages."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "We can use it directly to store conversation turns for our chain:"
+    "## Automatic history management\n",
+    "\n",
+    "The previous examples pass messages to the chain (and model) explicitly. This is a completely acceptable approach, but it does require external management of new messages. LangChain also provides a way to build applications that have memory using LangGraph's [persistence](https://langchain-ai.github.io/langgraph/concepts/persistence/). You can [enable persistence](https://langchain-ai.github.io/langgraph/how-tos/persistence/) in LangGraph applications by providing a `checkpointer` when compiling the graph."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langgraph.checkpoint.memory import MemorySaver\n",
+    "from langgraph.graph import START, MessagesState, StateGraph\n",
+    "\n",
+    "workflow = StateGraph(state_schema=MessagesState)\n",
+    "\n",
+    "\n",
+    "# Define the function that calls the model\n",
+    "def call_model(state: MessagesState):\n",
+    "    system_prompt = (\n",
+    "        \"You are a helpful assistant. \"\n",
+    "        \"Answer all questions to the best of your ability.\"\n",
+    "    )\n",
+    "    messages = [SystemMessage(content=system_prompt)] + state[\"messages\"]\n",
+    "    response = model.invoke(messages)\n",
+    "    return {\"messages\": response}\n",
+    "\n",
+    "\n",
+    "# Define the node and edge\n",
+    "workflow.add_node(\"model\", call_model)\n",
+    "workflow.add_edge(START, \"model\")\n",
+    "\n",
+    "# Add simple in-memory checkpointer\n",
+    "# highlight-start\n",
+    "memory = MemorySaver()\n",
+    "app = workflow.compile(checkpointer=memory)\n",
+    "# highlight-end"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    " We'll pass the latest input to the conversation here and let the LangGraph keep track of the conversation history using the checkpointer:"
   ]
  },
  {
@@ -191,7 +196,8 @@
    {
     "data": {
      "text/plain": [
-       "AIMessage(content='You just asked me to translate the sentence \"I love programming\" from English to French.', response_metadata={'token_usage': {'completion_tokens': 18, 'prompt_tokens': 61, 'total_tokens': 79}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-5cbb21c2-9c30-4031-8ea8-bfc497989535-0', usage_metadata={'input_tokens': 61, 'output_tokens': 18, 'total_tokens': 79})"
+       "{'messages': [HumanMessage(content='Translate this sentence from English to French: I love programming.', additional_kwargs={}, response_metadata={}, id='200f88bb-936a-4877-990c-8b4112d82cfe'),\n",
+       "  AIMessage(content='The translation of \"I love programming\" in French is \"J\\'aime programmer.\"', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 16, 'prompt_tokens': 39, 'total_tokens': 55, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_1bb46167f9', 'finish_reason': 'stop', 'logprobs': None}, id='run-d4ebcdcf-9a60-4471-ad8d-96169f614ada-0', usage_metadata={'input_tokens': 39, 'output_tokens': 16, 'total_tokens': 55})]}"
      ]
     },
     "execution_count": 5,
@@ -200,159 +206,35 @@
    }
   ],
   "source": [
-    "demo_ephemeral_chat_history = ChatMessageHistory()\n",
-    "\n",
-    "input1 = \"Translate this sentence from English to French: I love programming.\"\n",
-    "\n",
-    "demo_ephemeral_chat_history.add_user_message(input1)\n",
-    "\n",
-    "response = chain.invoke(\n",
-    "    {\n",
-    "        \"messages\": demo_ephemeral_chat_history.messages,\n",
-    "    }\n",
-    ")\n",
-    "\n",
-    "demo_ephemeral_chat_history.add_ai_message(response)\n",
-    "\n",
-    "input2 = \"What did I just ask you?\"\n",
-    "\n",
-    "demo_ephemeral_chat_history.add_user_message(input2)\n",
-    "\n",
-    "chain.invoke(\n",
-    "    {\n",
-    "        \"messages\": demo_ephemeral_chat_history.messages,\n",
-    "    }\n",
+    "app.invoke(\n",
+    "    {\"messages\": [HumanMessage(content=\"Translate to French: I love programming.\")]},\n",
+    "    config={\"configurable\": {\"thread_id\": \"1\"}},\n",
    ")"
   ]
  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Automatic history management\n",
-    "\n",
-    "The previous examples pass messages to the chain explicitly. This is a completely acceptable approach, but it does require external management of new messages. LangChain also includes an wrapper for LCEL chains that can handle this process automatically called `RunnableWithMessageHistory`.\n",
-    "\n",
-    "To show how it works, let's slightly modify the above prompt to take a final `input` variable that populates a `HumanMessage` template after the chat history. This means that we will expect a `chat_history` parameter that contains all messages BEFORE the current messages instead of all messages:"
-   ]
-  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
-   "outputs": [],
-   "source": [
-    "prompt = ChatPromptTemplate.from_messages(\n",
-    "    [\n",
-    "        (\n",
-    "            \"system\",\n",
-    "            \"You are a helpful assistant. Answer all questions to the best of your ability.\",\n",
-    "        ),\n",
-    "        (\"placeholder\", \"{chat_history}\"),\n",
-    "        (\"human\", \"{input}\"),\n",
-    "    ]\n",
-    ")\n",
-    "\n",
-    "chain = prompt | chat"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    " We'll pass the latest input to the conversation here and let the `RunnableWithMessageHistory` class wrap our chain and do the work of appending that `input` variable to the chat history.\n",
-    " \n",
-    " Next, let's declare our wrapped chain:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain_core.runnables.history import RunnableWithMessageHistory\n",
-    "\n",
-    "demo_ephemeral_chat_history_for_chain = ChatMessageHistory()\n",
-    "\n",
-    "chain_with_message_history = RunnableWithMessageHistory(\n",
-    "    chain,\n",
-    "    lambda session_id: demo_ephemeral_chat_history_for_chain,\n",
-    "    input_messages_key=\"input\",\n",
-    "    history_messages_key=\"chat_history\",\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "This class takes a few parameters in addition to the chain that we want to wrap:\n",
-    "\n",
-    "- A factory function that returns a message history for a given session id. This allows your chain to handle multiple users at once by loading different messages for different conversations.\n",
-    "- An `input_messages_key` that specifies which part of the input should be tracked and stored in the chat history. In this example, we want to track the string passed in as `input`.\n",
-    "- A `history_messages_key` that specifies what the previous messages should be injected into the prompt as. Our prompt has a `MessagesPlaceholder` named `chat_history`, so we specify this property to match.\n",
-    "- (For chains with multiple outputs) an `output_messages_key` which specifies which output to store as history. This is the inverse of `input_messages_key`.\n",
-    "\n",
-    "We can invoke this new chain as normal, with an additional `configurable` field that specifies the particular `session_id` to pass to the factory function. This is unused for the demo, but in real-world chains, you'll want to return a chat history corresponding to the passed session:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Parent run dc4e2f79-4bcd-4a36-9506-55ace9040588 not found for run 34b5773e-3ced-46a6-8daf-4d464c15c940. Treating as a root run.\n"
-     ]
-    },
    {
     "data": {
      "text/plain": [
-       "AIMessage(content='\"J\\'adore la programmation.\"', response_metadata={'token_usage': {'completion_tokens': 9, 'prompt_tokens': 39, 'total_tokens': 48}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-648b0822-b0bb-47a2-8e7d-7d34744be8f2-0', usage_metadata={'input_tokens': 39, 'output_tokens': 9, 'total_tokens': 48})"
+       "{'messages': [HumanMessage(content='Translate this sentence from English to French: I love programming.', additional_kwargs={}, response_metadata={}, id='200f88bb-936a-4877-990c-8b4112d82cfe'),\n",
+       "  AIMessage(content='The translation of \"I love programming\" in French is \"J\\'aime programmer.\"', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 16, 'prompt_tokens': 39, 'total_tokens': 55, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_1bb46167f9', 'finish_reason': 'stop', 'logprobs': None}, id='run-d4ebcdcf-9a60-4471-ad8d-96169f614ada-0', usage_metadata={'input_tokens': 39, 'output_tokens': 16, 'total_tokens': 55}),\n",
+       "  HumanMessage(content='What did I just ask you?', additional_kwargs={}, response_metadata={}, id='df32f0a6-38fe-418a-98fe-7a5f17d0b812'),\n",
+       "  AIMessage(content='You asked me to translate the sentence \"I love programming\" from English to French.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 17, 'prompt_tokens': 70, 'total_tokens': 87, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_1bb46167f9', 'finish_reason': 'stop', 'logprobs': None}, id='run-1ee8ad67-d7f0-4bb9-adff-e632be6e2825-0', usage_metadata={'input_tokens': 70, 'output_tokens': 17, 'total_tokens': 87})]}"
      ]
     },
-     "execution_count": 8,
+     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
-    "chain_with_message_history.invoke(\n",
-    "    {\"input\": \"Translate this sentence from English to French: I love programming.\"},\n",
-    "    {\"configurable\": {\"session_id\": \"unused\"}},\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Parent run cc14b9d8-c59e-40db-a523-d6ab3fc2fa4f not found for run 5b75e25c-131e-46ee-9982-68569db04330. Treating as a root run.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "AIMessage(content='You asked me to translate the sentence \"I love programming\" from English to French.', response_metadata={'token_usage': {'completion_tokens': 17, 'prompt_tokens': 63, 'total_tokens': 80}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-5950435c-1dc2-43a6-836f-f989fd62c95e-0', usage_metadata={'input_tokens': 63, 'output_tokens': 17, 'total_tokens': 80})"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "chain_with_message_history.invoke(\n",
-    "    {\"input\": \"What did I just ask you?\"}, {\"configurable\": {\"session_id\": \"unused\"}}\n",
+    "app.invoke(\n",
+    "    {\"messages\": [HumanMessage(content=\"What did I just ask you?\")]},\n",
+    "    config={\"configurable\": {\"thread_id\": \"1\"}},\n",
    ")"
   ]
  },
@@ -366,80 +248,44 @@
    "\n",
    "### Trimming messages\n",
    "\n",
-    "LLMs and chat models have limited context windows, and even if you're not directly hitting limits, you may want to limit the amount of distraction the model has to deal with. One solution is trim the historic messages before passing them to the model. Let's use an example history with some preloaded messages:"
+    "LLMs and chat models have limited context windows, and even if you're not directly hitting limits, you may want to limit the amount of distraction the model has to deal with. One solution is trim the history messages before passing them to the model. Let's use an example history with the `app` we declared above:"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "[HumanMessage(content=\"Hey there! I'm Nemo.\"),\n",
-       " AIMessage(content='Hello!'),\n",
-       " HumanMessage(content='How are you today?'),\n",
-       " AIMessage(content='Fine thanks!')]"
+       "{'messages': [HumanMessage(content=\"Hey there! I'm Nemo.\", additional_kwargs={}, response_metadata={}, id='99321048-3390-4da6-919b-4ad933c4913b'),\n",
+       "  AIMessage(content='Hello!', additional_kwargs={}, response_metadata={}, id='1c3eaf4a-b698-4bc6-a7a6-549290c3fc7e'),\n",
+       "  HumanMessage(content='How are you today?', additional_kwargs={}, response_metadata={}, id='6f96db9d-ac30-4b4a-9ebc-bc11ae87646b'),\n",
+       "  AIMessage(content='Fine thanks!', additional_kwargs={}, response_metadata={}, id='e783fbb6-2892-42ea-9859-ae449e4cfdf6'),\n",
+       "  HumanMessage(content=\"What's my name?\", additional_kwargs={}, response_metadata={}, id='854065c4-09a0-4c2a-9f2c-eb7182dcc9d5'),\n",
+       "  AIMessage(content='Your name is Nemo.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 5, 'prompt_tokens': 63, 'total_tokens': 68, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_1bb46167f9', 'finish_reason': 'stop', 'logprobs': None}, id='run-eed15b83-b215-47a3-b374-404d6a05ab94-0', usage_metadata={'input_tokens': 63, 'output_tokens': 5, 'total_tokens': 68})]}"
      ]
     },
-     "execution_count": 21,
+     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
-    "demo_ephemeral_chat_history = ChatMessageHistory()\n",
+    "demo_ephemeral_chat_history = [\n",
+    "    HumanMessage(content=\"Hey there! I'm Nemo.\"),\n",
+    "    AIMessage(content=\"Hello!\"),\n",
+    "    HumanMessage(content=\"How are you today?\"),\n",
+    "    AIMessage(content=\"Fine thanks!\"),\n",
+    "]\n",
    "\n",
-    "demo_ephemeral_chat_history.add_user_message(\"Hey there! I'm Nemo.\")\n",
-    "demo_ephemeral_chat_history.add_ai_message(\"Hello!\")\n",
-    "demo_ephemeral_chat_history.add_user_message(\"How are you today?\")\n",
-    "demo_ephemeral_chat_history.add_ai_message(\"Fine thanks!\")\n",
-    "\n",
-    "demo_ephemeral_chat_history.messages"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Let's use this message history with the `RunnableWithMessageHistory` chain we declared above:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Parent run 7ff2d8ec-65e2-4f67-8961-e498e2c4a591 not found for run 3881e990-6596-4326-84f6-2b76949e0657. Treating as a root run.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "AIMessage(content='Your name is Nemo.', response_metadata={'token_usage': {'completion_tokens': 6, 'prompt_tokens': 66, 'total_tokens': 72}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-f8aabef8-631a-4238-a39b-701e881fbe47-0', usage_metadata={'input_tokens': 66, 'output_tokens': 6, 'total_tokens': 72})"
-      ]
-     },
-     "execution_count": 22,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "chain_with_message_history = RunnableWithMessageHistory(\n",
-    "    chain,\n",
-    "    lambda session_id: demo_ephemeral_chat_history,\n",
-    "    input_messages_key=\"input\",\n",
-    "    history_messages_key=\"chat_history\",\n",
-    ")\n",
-    "\n",
-    "chain_with_message_history.invoke(\n",
-    "    {\"input\": \"What's my name?\"},\n",
-    "    {\"configurable\": {\"session_id\": \"unused\"}},\n",
+    "app.invoke(\n",
+    "    {\n",
+    "        \"messages\": demo_ephemeral_chat_history\n",
+    "        + [HumanMessage(content=\"What's my name?\")]\n",
+    "    },\n",
+    "    config={\"configurable\": {\"thread_id\": \"2\"}},\n",
    ")"
   ]
  },
@@ -447,35 +293,88 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "We can see the chain remembers the preloaded name.\n",
+    "We can see the app remembers the preloaded name.\n",
    "\n",
-    "But let's say we have a very small context window, and we want to trim the number of messages passed to the chain to only the 2 most recent ones. We can use the built in [trim_messages](/docs/how_to/trim_messages/) util to trim messages based on their token count before they reach our prompt. In this case we'll count each message as 1 \"token\" and keep only the last two messages:"
+    "But let's say we have a very small context window, and we want to trim the number of messages passed to the model to only the 2 most recent ones. We can use the built in [trim_messages](/docs/how_to/trim_messages/) util to trim messages based on their token count before they reach our prompt. In this case we'll count each message as 1 \"token\" and keep only the last two messages:"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
-    "from operator import itemgetter\n",
-    "\n",
    "from langchain_core.messages import trim_messages\n",
-    "from langchain_core.runnables import RunnablePassthrough\n",
+    "from langgraph.checkpoint.memory import MemorySaver\n",
+    "from langgraph.graph import START, MessagesState, StateGraph\n",
    "\n",
+    "# Define trimmer\n",
+    "# highlight-start\n",
+    "# count each message as 1 \"token\" (token_counter=len) and keep only the last two messages\n",
    "trimmer = trim_messages(strategy=\"last\", max_tokens=2, token_counter=len)\n",
+    "# highlight-end\n",
    "\n",
-    "chain_with_trimming = (\n",
-    "    RunnablePassthrough.assign(chat_history=itemgetter(\"chat_history\") | trimmer)\n",
-    "    | prompt\n",
-    "    | chat\n",
-    ")\n",
+    "workflow = StateGraph(state_schema=MessagesState)\n",
    "\n",
-    "chain_with_trimmed_history = RunnableWithMessageHistory(\n",
-    "    chain_with_trimming,\n",
-    "    lambda session_id: demo_ephemeral_chat_history,\n",
-    "    input_messages_key=\"input\",\n",
-    "    history_messages_key=\"chat_history\",\n",
+    "\n",
+    "# Define the function that calls the model\n",
+    "def call_model(state: MessagesState):\n",
+    "    # highlight-start\n",
+    "    trimmed_messages = trimmer.invoke(state[\"messages\"])\n",
+    "    system_prompt = (\n",
+    "        \"You are a helpful assistant. \"\n",
+    "        \"Answer all questions to the best of your ability.\"\n",
+    "    )\n",
+    "    messages = [SystemMessage(content=system_prompt)] + trimmed_messages\n",
+    "    # highlight-end\n",
+    "    response = model.invoke(messages)\n",
+    "    return {\"messages\": response}\n",
+    "\n",
+    "\n",
+    "# Define the node and edge\n",
+    "workflow.add_node(\"model\", call_model)\n",
+    "workflow.add_edge(START, \"model\")\n",
+    "\n",
+    "# Add simple in-memory checkpointer\n",
+    "memory = MemorySaver()\n",
+    "app = workflow.compile(checkpointer=memory)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's call this new app and check the response"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'messages': [HumanMessage(content=\"Hey there! I'm Nemo.\", additional_kwargs={}, response_metadata={}, id='99321048-3390-4da6-919b-4ad933c4913b'),\n",
+       "  AIMessage(content='Hello!', additional_kwargs={}, response_metadata={}, id='1c3eaf4a-b698-4bc6-a7a6-549290c3fc7e'),\n",
+       "  HumanMessage(content='How are you today?', additional_kwargs={}, response_metadata={}, id='6f96db9d-ac30-4b4a-9ebc-bc11ae87646b'),\n",
+       "  AIMessage(content='Fine thanks!', additional_kwargs={}, response_metadata={}, id='e783fbb6-2892-42ea-9859-ae449e4cfdf6'),\n",
+       "  HumanMessage(content='What is my name?', additional_kwargs={}, response_metadata={}, id='c8ba5e90-89cb-4b34-ad4c-11c0478422d8'),\n",
+       "  AIMessage(content=\"I'm sorry, but I don't know your name. How can I assist you today?\", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 17, 'prompt_tokens': 39, 'total_tokens': 56, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_1bb46167f9', 'finish_reason': 'stop', 'logprobs': None}, id='run-aa86d3f8-898e-4146-aa3c-2c424934b0f5-0', usage_metadata={'input_tokens': 39, 'output_tokens': 17, 'total_tokens': 56})]}"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "app.invoke(\n",
+    "    {\n",
+    "        \"messages\": demo_ephemeral_chat_history\n",
+    "        + [HumanMessage(content=\"What is my name?\")]\n",
+    "    },\n",
+    "    config={\"configurable\": {\"thread_id\": \"3\"}},\n",
    ")"
   ]
  },
@@ -483,101 +382,7 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "Let's call this new chain and check the messages afterwards:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Parent run 775cde65-8d22-4c44-80bb-f0b9811c32ca not found for run 5cf71d0e-4663-41cd-8dbe-e9752689cfac. Treating as a root run.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "AIMessage(content='P. Sherman is a fictional character from the animated movie \"Finding Nemo\" who lives at 42 Wallaby Way, Sydney.', response_metadata={'token_usage': {'completion_tokens': 27, 'prompt_tokens': 53, 'total_tokens': 80}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-5642ef3a-fdbe-43cf-a575-d1785976a1b9-0', usage_metadata={'input_tokens': 53, 'output_tokens': 27, 'total_tokens': 80})"
-      ]
-     },
-     "execution_count": 24,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "chain_with_trimmed_history.invoke(\n",
-    "    {\"input\": \"Where does P. Sherman live?\"},\n",
-    "    {\"configurable\": {\"session_id\": \"unused\"}},\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[HumanMessage(content=\"Hey there! I'm Nemo.\"),\n",
-       " AIMessage(content='Hello!'),\n",
-       " HumanMessage(content='How are you today?'),\n",
-       " AIMessage(content='Fine thanks!'),\n",
-       " HumanMessage(content=\"What's my name?\"),\n",
-       " AIMessage(content='Your name is Nemo.', response_metadata={'token_usage': {'completion_tokens': 6, 'prompt_tokens': 66, 'total_tokens': 72}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-f8aabef8-631a-4238-a39b-701e881fbe47-0', usage_metadata={'input_tokens': 66, 'output_tokens': 6, 'total_tokens': 72}),\n",
-       " HumanMessage(content='Where does P. Sherman live?'),\n",
-       " AIMessage(content='P. Sherman is a fictional character from the animated movie \"Finding Nemo\" who lives at 42 Wallaby Way, Sydney.', response_metadata={'token_usage': {'completion_tokens': 27, 'prompt_tokens': 53, 'total_tokens': 80}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-5642ef3a-fdbe-43cf-a575-d1785976a1b9-0', usage_metadata={'input_tokens': 53, 'output_tokens': 27, 'total_tokens': 80})]"
-      ]
-     },
-     "execution_count": 25,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "demo_ephemeral_chat_history.messages"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "And we can see that our history has removed the two oldest messages while still adding the most recent conversation at the end. The next time the chain is called, `trim_messages` will be called again, and only the two most recent messages will be passed to the model. In this case, this means that the model will forget the name we gave it the next time we invoke it:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Parent run fde7123f-6fd3-421a-a3fc-2fb37dead119 not found for run 061a4563-2394-470d-a3ed-9bf1388ca431. Treating as a root run.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "AIMessage(content=\"I'm sorry, but I don't have access to your personal information, so I don't know your name. How else may I assist you today?\", response_metadata={'token_usage': {'completion_tokens': 31, 'prompt_tokens': 74, 'total_tokens': 105}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-0ab03495-1f7c-4151-9070-56d2d1c565ff-0', usage_metadata={'input_tokens': 74, 'output_tokens': 31, 'total_tokens': 105})"
-      ]
-     },
-     "execution_count": 27,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "chain_with_trimmed_history.invoke(\n",
-    "    {\"input\": \"What is my name?\"},\n",
-    "    {\"configurable\": {\"session_id\": \"unused\"}},\n",
-    ")"
+    "We can see that `trim_messages` was called and only the two most recent messages will be passed to the model. In this case, this means that the model forgot the name we gave it."
   ]
  },
  {
@@ -593,114 +398,82 @@
   "source": [
    "### Summary memory\n",
    "\n",
-    "We can use this same pattern in other ways too. For example, we could use an additional LLM call to generate a summary of the conversation before calling our chain. Let's recreate our chat history and chatbot chain:"
+    "We can use this same pattern in other ways too. For example, we could use an additional LLM call to generate a summary of the conversation before calling our app. Let's recreate our chat history:"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 10,
   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[HumanMessage(content=\"Hey there! I'm Nemo.\"),\n",
-       " AIMessage(content='Hello!'),\n",
-       " HumanMessage(content='How are you today?'),\n",
-       " AIMessage(content='Fine thanks!')]"
-      ]
-     },
-     "execution_count": 17,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
   "source": [
-    "demo_ephemeral_chat_history = ChatMessageHistory()\n",
-    "\n",
-    "demo_ephemeral_chat_history.add_user_message(\"Hey there! I'm Nemo.\")\n",
-    "demo_ephemeral_chat_history.add_ai_message(\"Hello!\")\n",
-    "demo_ephemeral_chat_history.add_user_message(\"How are you today?\")\n",
-    "demo_ephemeral_chat_history.add_ai_message(\"Fine thanks!\")\n",
-    "\n",
-    "demo_ephemeral_chat_history.messages"
+    "demo_ephemeral_chat_history = [\n",
+    "    HumanMessage(content=\"Hey there! I'm Nemo.\"),\n",
+    "    AIMessage(content=\"Hello!\"),\n",
+    "    HumanMessage(content=\"How are you today?\"),\n",
+    "    AIMessage(content=\"Fine thanks!\"),\n",
+    "]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "We'll slightly modify the prompt to make the LLM aware that will receive a condensed summary instead of a chat history:"
+    "And now, let's update the model-calling function to distill previous interactions into a summary:"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
-    "prompt = ChatPromptTemplate.from_messages(\n",
-    "    [\n",
-    "        (\n",
-    "            \"system\",\n",
-    "            \"You are a helpful assistant. Answer all questions to the best of your ability. The provided chat history includes facts about the user you are speaking with.\",\n",
-    "        ),\n",
-    "        (\"placeholder\", \"{chat_history}\"),\n",
-    "        (\"user\", \"{input}\"),\n",
-    "    ]\n",
-    ")\n",
+    "from langchain_core.messages import HumanMessage, RemoveMessage\n",
+    "from langgraph.checkpoint.memory import MemorySaver\n",
+    "from langgraph.graph import START, MessagesState, StateGraph\n",
    "\n",
-    "chain = prompt | chat\n",
+    "workflow = StateGraph(state_schema=MessagesState)\n",
    "\n",
-    "chain_with_message_history = RunnableWithMessageHistory(\n",
-    "    chain,\n",
-    "    lambda session_id: demo_ephemeral_chat_history,\n",
-    "    input_messages_key=\"input\",\n",
-    "    history_messages_key=\"chat_history\",\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "And now, let's create a function that will distill previous interactions into a summary. We can add this one to the front of the chain too:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def summarize_messages(chain_input):\n",
-    "    stored_messages = demo_ephemeral_chat_history.messages\n",
-    "    if len(stored_messages) == 0:\n",
-    "        return False\n",
-    "    summarization_prompt = ChatPromptTemplate.from_messages(\n",
-    "        [\n",
-    "            (\"placeholder\", \"{chat_history}\"),\n",
-    "            (\n",
-    "                \"user\",\n",
-    "                \"Distill the above chat messages into a single summary message. Include as many specific details as you can.\",\n",
-    "            ),\n",
-    "        ]\n",
+    "\n",
+    "# Define the function that calls the model\n",
+    "def call_model(state: MessagesState):\n",
+    "    system_prompt = (\n",
+    "        \"You are a helpful assistant. \"\n",
+    "        \"Answer all questions to the best of your ability. \"\n",
+    "        \"The provided chat history includes a summary of the earlier conversation.\"\n",
    "    )\n",
-    "    summarization_chain = summarization_prompt | chat\n",
+    "    system_message = SystemMessage(content=system_prompt)\n",
+    "    # Summarize the messages\n",
+    "    if len(state[\"messages\"]) > 1:\n",
+    "        *message_history, last_human_message = state[\"messages\"]\n",
+    "        # Invoke the model to generate conversation summary\n",
+    "        summary_prompt = (\n",
+    "            \"Distill the above chat messages into a single summary message. \"\n",
+    "            \"Include as many specific details as you can.\"\n",
+    "        )\n",
+    "        summary_message = model.invoke(\n",
+    "            message_history + [HumanMessage(content=summary_prompt)]\n",
+    "        )\n",
+    "        # Delete messages that we no longer want to show up\n",
+    "        delete_messages = [RemoveMessage(id=m.id) for m in state[\"messages\"]]\n",
+    "        # Re-add user message\n",
+    "        human_message = HumanMessage(content=last_human_message.content)\n",
+    "        # Call the model with summary & response\n",
+    "        response = model.invoke([system_message, summary_message, human_message])\n",
+    "        message_updates = [summary_message, human_message, response] + delete_messages\n",
+    "    else:\n",
+    "        message_updates = model.invoke([system_message] + state[\"messages\"])\n",
    "\n",
-    "    summary_message = summarization_chain.invoke({\"chat_history\": stored_messages})\n",
-    "\n",
-    "    demo_ephemeral_chat_history.clear()\n",
-    "\n",
-    "    demo_ephemeral_chat_history.add_message(summary_message)\n",
-    "\n",
-    "    return True\n",
+    "    return {\"messages\": message_updates}\n",
    "\n",
    "\n",
-    "chain_with_summarization = (\n",
-    "    RunnablePassthrough.assign(messages_summarized=summarize_messages)\n",
-    "    | chain_with_message_history\n",
-    ")"
+    "# Define the node and edge\n",
+    "workflow.add_node(\"model\", call_model)\n",
+    "workflow.add_edge(START, \"model\")\n",
+    "\n",
+    "# Add simple in-memory checkpointer\n",
+    "memory = MemorySaver()\n",
+    "app = workflow.compile(checkpointer=memory)"
   ]
  },
  {
@@ -712,54 +485,37 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "AIMessage(content='You introduced yourself as Nemo. How can I assist you today, Nemo?')"
+       "{'messages': [AIMessage(content='Nemo greeted me, and I responded positively, indicating that I am doing fine.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 17, 'prompt_tokens': 60, 'total_tokens': 77, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_1bb46167f9', 'finish_reason': 'stop', 'logprobs': None}, id='run-94df0e9f-6b1c-4e68-858c-5b23058b16d8-0', usage_metadata={'input_tokens': 60, 'output_tokens': 17, 'total_tokens': 77}),\n",
+       "  HumanMessage(content='What did I say my name was?', additional_kwargs={}, response_metadata={}, id='d3f57f56-dd1a-45f9-add2-146f54c1180c'),\n",
+       "  AIMessage(content='You mentioned that your name is Nemo.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 8, 'prompt_tokens': 68, 'total_tokens': 76, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_1bb46167f9', 'finish_reason': 'stop', 'logprobs': None}, id='run-ea144209-5d37-4bb5-8529-be235626fc74-0', usage_metadata={'input_tokens': 68, 'output_tokens': 8, 'total_tokens': 76})]}"
      ]
     },
-     "execution_count": 20,
+     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
-    "chain_with_summarization.invoke(\n",
-    "    {\"input\": \"What did I say my name was?\"},\n",
-    "    {\"configurable\": {\"session_id\": \"unused\"}},\n",
+    "app.invoke(\n",
+    "    {\n",
+    "        \"messages\": demo_ephemeral_chat_history\n",
+    "        + [HumanMessage(\"What did I say my name was?\")]\n",
+    "    },\n",
+    "    config={\"configurable\": {\"thread_id\": \"4\"}},\n",
    ")"
   ]
  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[AIMessage(content='The conversation is between Nemo and an AI. Nemo introduces himself and the AI responds with a greeting. Nemo then asks the AI how it is doing, and the AI responds that it is fine.'),\n",
-       " HumanMessage(content='What did I say my name was?'),\n",
-       " AIMessage(content='You introduced yourself as Nemo. How can I assist you today, Nemo?')]"
-      ]
-     },
-     "execution_count": 21,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "demo_ephemeral_chat_history.messages"
-   ]
-  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "Note that invoking the chain again will generate another summary generated from the initial summary plus new messages and so on. You could also design a hybrid approach where a certain number of messages are retained in chat history while others are summarized."
+    "Note that invoking the app again will generate another summary generated from the initial summary plus new messages and so on. You could also design a hybrid approach where a certain number of messages are retained in chat history while others are summarized."
   ]
  }
 ],
@@ -779,7 +535,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.9"
+   "version": "3.12.3"
  }
 },
 "nbformat": 4,
--- a/docs/docs/how_to/chatbots_retrieval.ipynb
+++ b/docs/docs/how_to/chatbots_retrieval.ipynb
@@ -71,7 +71,7 @@
   "source": [
    "from langchain_openai import ChatOpenAI\n",
    "\n",
-    "chat = ChatOpenAI(model=\"gpt-3.5-turbo-1106\", temperature=0.2)"
+    "chat = ChatOpenAI(model=\"gpt-4o-mini\", temperature=0.2)"
   ]
  },
  {
--- a/docs/docs/how_to/chatbots_tools.ipynb
+++ b/docs/docs/how_to/chatbots_tools.ipynb
--- a/docs/docs/how_to/code_splitter.ipynb
+++ b/docs/docs/how_to/code_splitter.ipynb
@@ -7,7 +7,7 @@
   "source": [
    "# How to split code\n",
    "\n",
-    "[RecursiveCharacterTextSplitter](https://python.langchain.com/v0.2/api_reference/text_splitters/character/langchain_text_splitters.character.RecursiveCharacterTextSplitter.html) includes pre-built lists of separators that are useful for splitting text in a specific programming language.\n",
+    "[RecursiveCharacterTextSplitter](https://python.langchain.com/api_reference/text_splitters/character/langchain_text_splitters.character.RecursiveCharacterTextSplitter.html) includes pre-built lists of separators that are useful for splitting text in a specific programming language.\n",
    "\n",
    "Supported languages are stored in the `langchain_text_splitters.Language` enum. They include:\n",
    "\n",
--- a/docs/docs/how_to/configure.ipynb
+++ b/docs/docs/how_to/configure.ipynb
@@ -58,7 +58,8 @@
    "import os\n",
    "from getpass import getpass\n",
    "\n",
-    "os.environ[\"OPENAI_API_KEY\"] = getpass()"
+    "if \"OPENAI_API_KEY\" not in os.environ:\n",
+    "    os.environ[\"OPENAI_API_KEY\"] = getpass()"
   ]
  },
  {
@@ -99,7 +100,7 @@
   "id": "b0f74589",
   "metadata": {},
   "source": [
-    "Above, we defined `temperature` as a [`ConfigurableField`](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.utils.ConfigurableField.html#langchain_core.runnables.utils.ConfigurableField) that we can set at runtime. To do so, we use the [`with_config`](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.with_config) method like this:"
+    "Above, we defined `temperature` as a [`ConfigurableField`](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.utils.ConfigurableField.html#langchain_core.runnables.utils.ConfigurableField) that we can set at runtime. To do so, we use the [`with_config`](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.with_config) method like this:"
   ]
  },
  {
@@ -281,7 +282,8 @@
    "import os\n",
    "from getpass import getpass\n",
    "\n",
-    "os.environ[\"ANTHROPIC_API_KEY\"] = getpass()"
+    "if \"ANTHROPIC_API_KEY\" not in os.environ:\n",
+    "    os.environ[\"ANTHROPIC_API_KEY\"] = getpass()"
   ]
  },
  {
--- a/docs/docs/how_to/contextual_compression.ipynb
+++ b/docs/docs/how_to/contextual_compression.ipynb
@@ -227,7 +227,7 @@
   "source": [
    "### `LLMListwiseRerank`\n",
    "\n",
-    "[LLMListwiseRerank](https://python.langchain.com/v0.2/api_reference/langchain/retrievers/langchain.retrievers.document_compressors.listwise_rerank.LLMListwiseRerank.html) uses [zero-shot listwise document reranking](https://arxiv.org/pdf/2305.02156) and functions similarly to `LLMChainFilter` as a robust but more expensive option. It is recommended to use a more powerful LLM.\n",
+    "[LLMListwiseRerank](https://python.langchain.com/api_reference/langchain/retrievers/langchain.retrievers.document_compressors.listwise_rerank.LLMListwiseRerank.html) uses [zero-shot listwise document reranking](https://arxiv.org/pdf/2305.02156) and functions similarly to `LLMChainFilter` as a robust but more expensive option. It is recommended to use a more powerful LLM.\n",
    "\n",
    "Note that `LLMListwiseRerank` requires a model with the [with_structured_output](/docs/integrations/chat/) method implemented."
   ]
@@ -258,7 +258,7 @@
    "from langchain.retrievers.document_compressors import LLMListwiseRerank\n",
    "from langchain_openai import ChatOpenAI\n",
    "\n",
-    "llm = ChatOpenAI(model=\"gpt-3.5-turbo-0125\", temperature=0)\n",
+    "llm = ChatOpenAI(model=\"gpt-4o-mini\", temperature=0)\n",
    "\n",
    "_filter = LLMListwiseRerank.from_llm(llm, top_n=1)\n",
    "compression_retriever = ContextualCompressionRetriever(\n",
--- a/docs/docs/how_to/convert_runnable_to_tool.ipynb
+++ b/docs/docs/how_to/convert_runnable_to_tool.ipynb
@@ -42,13 +42,13 @@
   "source": [
    "LangChain [tools](/docs/concepts#tools) are interfaces that an agent, chain, or chat model can use to interact with the world. See [here](/docs/how_to/#tools) for how-to guides covering tool-calling, built-in tools, custom tools, and more information.\n",
    "\n",
-    "LangChain tools-- instances of [BaseTool](https://python.langchain.com/v0.2/api_reference/core/tools/langchain_core.tools.BaseTool.html)-- are [Runnables](/docs/concepts/#runnable-interface) with additional constraints that enable them to be invoked effectively by language models:\n",
+    "LangChain tools-- instances of [BaseTool](https://python.langchain.com/api_reference/core/tools/langchain_core.tools.BaseTool.html)-- are [Runnables](/docs/concepts/#runnable-interface) with additional constraints that enable them to be invoked effectively by language models:\n",
    "\n",
    "- Their inputs are constrained to be serializable, specifically strings and Python `dict` objects;\n",
    "- They contain names and descriptions indicating how and when they should be used;\n",
-    "- They may contain a detailed [args_schema](https://python.langchain.com/v0.2/docs/how_to/custom_tools/) for their arguments. That is, while a tool (as a `Runnable`) might accept a single `dict` input, the specific keys and type information needed to populate a dict should be specified in the `args_schema`.\n",
+    "- They may contain a detailed [args_schema](https://python.langchain.com/docs/how_to/custom_tools/) for their arguments. That is, while a tool (as a `Runnable`) might accept a single `dict` input, the specific keys and type information needed to populate a dict should be specified in the `args_schema`.\n",
    "\n",
-    "Runnables that accept string or `dict` input can be converted to tools using the [as_tool](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.as_tool) method, which allows for the specification of names, descriptions, and additional schema information for arguments."
+    "Runnables that accept string or `dict` input can be converted to tools using the [as_tool](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.as_tool) method, which allows for the specification of names, descriptions, and additional schema information for arguments."
   ]
  },
  {
@@ -180,7 +180,7 @@
   "id": "32b1a992-8997-4c98-8eb2-c9fe9431b799",
   "metadata": {},
   "source": [
-    "Alternatively, the schema can be fully specified by directly passing the desired [args_schema](https://python.langchain.com/v0.2/api_reference/core/tools/langchain_core.tools.BaseTool.html#langchain_core.tools.BaseTool.args_schema) for the tool:"
+    "Alternatively, the schema can be fully specified by directly passing the desired [args_schema](https://python.langchain.com/api_reference/core/tools/langchain_core.tools.BaseTool.html#langchain_core.tools.BaseTool.args_schema) for the tool:"
   ]
  },
  {
@@ -190,7 +190,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "from langchain_core.pydantic_v1 import BaseModel, Field\n",
+    "from pydantic import BaseModel, Field\n",
    "\n",
    "\n",
    "class GSchema(BaseModel):\n",
@@ -285,7 +285,7 @@
    "\n",
    "from langchain_openai import ChatOpenAI\n",
    "\n",
-    "llm = ChatOpenAI(model=\"gpt-3.5-turbo-0125\", temperature=0)"
+    "llm = ChatOpenAI(model=\"gpt-4o-mini\", temperature=0)"
   ]
  },
  {
@@ -331,7 +331,7 @@
   "id": "9ba737ac-43a2-4a6f-b855-5bd0305017f1",
   "metadata": {},
   "source": [
-    "We next create use a simple pre-built [LangGraph agent](https://python.langchain.com/v0.2/docs/tutorials/agents/) and provide it the tool:"
+    "We next create use a simple pre-built [LangGraph agent](https://python.langchain.com/docs/tutorials/agents/) and provide it the tool:"
   ]
  },
  {
@@ -362,11 +362,11 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_W8cnfOjwqEn4cFcg19LN9mYD', 'function': {'arguments': '{\"__arg1\":\"dogs\"}', 'name': 'pet_info_retriever'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 19, 'prompt_tokens': 60, 'total_tokens': 79}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-d7f81de9-1fb7-4caf-81ed-16dcdb0b2ab4-0', tool_calls=[{'name': 'pet_info_retriever', 'args': {'__arg1': 'dogs'}, 'id': 'call_W8cnfOjwqEn4cFcg19LN9mYD'}], usage_metadata={'input_tokens': 60, 'output_tokens': 19, 'total_tokens': 79})]}}\n",
+      "{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_W8cnfOjwqEn4cFcg19LN9mYD', 'function': {'arguments': '{\"__arg1\":\"dogs\"}', 'name': 'pet_info_retriever'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 19, 'prompt_tokens': 60, 'total_tokens': 79}, 'model_name': 'gpt-4o-mini', 'system_fingerprint': None, 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-d7f81de9-1fb7-4caf-81ed-16dcdb0b2ab4-0', tool_calls=[{'name': 'pet_info_retriever', 'args': {'__arg1': 'dogs'}, 'id': 'call_W8cnfOjwqEn4cFcg19LN9mYD'}], usage_metadata={'input_tokens': 60, 'output_tokens': 19, 'total_tokens': 79})]}}\n",
      "----\n",
      "{'tools': {'messages': [ToolMessage(content=\"[Document(id='86f835fe-4bbe-4ec6-aeb4-489a8b541707', page_content='Dogs are great companions, known for their loyalty and friendliness.')]\", name='pet_info_retriever', tool_call_id='call_W8cnfOjwqEn4cFcg19LN9mYD')]}}\n",
      "----\n",
-      "{'agent': {'messages': [AIMessage(content='Dogs are known for being great companions, known for their loyalty and friendliness.', response_metadata={'token_usage': {'completion_tokens': 18, 'prompt_tokens': 134, 'total_tokens': 152}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-9ca5847a-a5eb-44c0-a774-84cc2c5bbc5b-0', usage_metadata={'input_tokens': 134, 'output_tokens': 18, 'total_tokens': 152})]}}\n",
+      "{'agent': {'messages': [AIMessage(content='Dogs are known for being great companions, known for their loyalty and friendliness.', response_metadata={'token_usage': {'completion_tokens': 18, 'prompt_tokens': 134, 'total_tokens': 152}, 'model_name': 'gpt-4o-mini', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-9ca5847a-a5eb-44c0-a774-84cc2c5bbc5b-0', usage_metadata={'input_tokens': 134, 'output_tokens': 18, 'total_tokens': 152})]}}\n",
      "----\n"
     ]
    }
@@ -497,11 +497,11 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_17iLPWvOD23zqwd1QVQ00Y63', 'function': {'arguments': '{\"question\":\"What are dogs known for according to pirates?\",\"answer_style\":\"quote\"}', 'name': 'pet_expert'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 28, 'prompt_tokens': 59, 'total_tokens': 87}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-7fef44f3-7bba-4e63-8c51-2ad9c5e65e2e-0', tool_calls=[{'name': 'pet_expert', 'args': {'question': 'What are dogs known for according to pirates?', 'answer_style': 'quote'}, 'id': 'call_17iLPWvOD23zqwd1QVQ00Y63'}], usage_metadata={'input_tokens': 59, 'output_tokens': 28, 'total_tokens': 87})]}}\n",
+      "{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_17iLPWvOD23zqwd1QVQ00Y63', 'function': {'arguments': '{\"question\":\"What are dogs known for according to pirates?\",\"answer_style\":\"quote\"}', 'name': 'pet_expert'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 28, 'prompt_tokens': 59, 'total_tokens': 87}, 'model_name': 'gpt-4o-mini', 'system_fingerprint': None, 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-7fef44f3-7bba-4e63-8c51-2ad9c5e65e2e-0', tool_calls=[{'name': 'pet_expert', 'args': {'question': 'What are dogs known for according to pirates?', 'answer_style': 'quote'}, 'id': 'call_17iLPWvOD23zqwd1QVQ00Y63'}], usage_metadata={'input_tokens': 59, 'output_tokens': 28, 'total_tokens': 87})]}}\n",
      "----\n",
      "{'tools': {'messages': [ToolMessage(content='\"Dogs are known for their loyalty and friendliness, making them great companions for pirates on long sea voyages.\"', name='pet_expert', tool_call_id='call_17iLPWvOD23zqwd1QVQ00Y63')]}}\n",
      "----\n",
-      "{'agent': {'messages': [AIMessage(content='According to pirates, dogs are known for their loyalty and friendliness, making them great companions for pirates on long sea voyages.', response_metadata={'token_usage': {'completion_tokens': 27, 'prompt_tokens': 119, 'total_tokens': 146}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-5a30edc3-7be0-4743-b980-ca2f8cad9b8d-0', usage_metadata={'input_tokens': 119, 'output_tokens': 27, 'total_tokens': 146})]}}\n",
+      "{'agent': {'messages': [AIMessage(content='According to pirates, dogs are known for their loyalty and friendliness, making them great companions for pirates on long sea voyages.', response_metadata={'token_usage': {'completion_tokens': 27, 'prompt_tokens': 119, 'total_tokens': 146}, 'model_name': 'gpt-4o-mini', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-5a30edc3-7be0-4743-b980-ca2f8cad9b8d-0', usage_metadata={'input_tokens': 119, 'output_tokens': 27, 'total_tokens': 146})]}}\n",
      "----\n"
     ]
    }
--- a/docs/docs/how_to/custom_callbacks.ipynb
+++ b/docs/docs/how_to/custom_callbacks.ipynb
@@ -16,7 +16,7 @@
    "\n",
    "LangChain has some built-in callback handlers, but you will often want to create your own handlers with custom logic.\n",
    "\n",
-    "To create a custom callback handler, we need to determine the [event(s)](https://python.langchain.com/v0.2/api_reference/core/callbacks/langchain_core.callbacks.base.BaseCallbackHandler.html#langchain-core-callbacks-base-basecallbackhandler) we want our callback handler to handle as well as what we want our callback handler to do when the event is triggered. Then all we need to do is attach the callback handler to the object, for example via [the constructor](/docs/how_to/callbacks_constructor) or [at runtime](/docs/how_to/callbacks_runtime).\n",
+    "To create a custom callback handler, we need to determine the [event(s)](https://python.langchain.com/api_reference/core/callbacks/langchain_core.callbacks.base.BaseCallbackHandler.html#langchain-core-callbacks-base-basecallbackhandler) we want our callback handler to handle as well as what we want our callback handler to do when the event is triggered. Then all we need to do is attach the callback handler to the object, for example via [the constructor](/docs/how_to/callbacks_constructor) or [at runtime](/docs/how_to/callbacks_runtime).\n",
    "\n",
    "In the example below, we'll implement streaming with a custom handler.\n",
    "\n",
@@ -107,7 +107,7 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "You can see [this reference page](https://python.langchain.com/v0.2/api_reference/core/callbacks/langchain_core.callbacks.base.BaseCallbackHandler.html#langchain-core-callbacks-base-basecallbackhandler) for a list of events you can handle. Note that the `handle_chain_*` events run for most LCEL runnables.\n",
+    "You can see [this reference page](https://python.langchain.com/api_reference/core/callbacks/langchain_core.callbacks.base.BaseCallbackHandler.html#langchain-core-callbacks-base-basecallbackhandler) for a list of events you can handle. Note that the `handle_chain_*` events run for most LCEL runnables.\n",
    "\n",
    "## Next steps\n",
    "\n",
--- a/docs/docs/how_to/custom_chat_model.ipynb
+++ b/docs/docs/how_to/custom_chat_model.ipynb
@@ -16,7 +16,7 @@
    "\n",
    "In this guide, we'll learn how to create a custom chat model using LangChain abstractions.\n",
    "\n",
-    "Wrapping your LLM with the standard [`BaseChatModel`](https://python.langchain.com/v0.2/api_reference/core/language_models/langchain_core.language_models.chat_models.BaseChatModel.html) interface allow you to use your LLM in existing LangChain programs with minimal code modifications!\n",
+    "Wrapping your LLM with the standard [`BaseChatModel`](https://python.langchain.com/api_reference/core/language_models/langchain_core.language_models.chat_models.BaseChatModel.html) interface allow you to use your LLM in existing LangChain programs with minimal code modifications!\n",
    "\n",
    "As an bonus, your LLM will automatically become a LangChain `Runnable` and will benefit from some optimizations out of the box (e.g., batch via a threadpool), async support, the `astream_events` API, etc.\n",
    "\n",
@@ -503,7 +503,7 @@
    "\n",
    "Documentation:\n",
    "\n",
-    "* The model contains doc-strings for all initialization arguments, as these will be surfaced in the [APIReference](https://python.langchain.com/v0.2/api_reference/langchain/index.html).\n",
+    "* The model contains doc-strings for all initialization arguments, as these will be surfaced in the [APIReference](https://python.langchain.com/api_reference/langchain/index.html).\n",
    "* The class doc-string for the model contains a link to the model API if the model is powered by a service.\n",
    "\n",
    "Tests:\n",
--- a/docs/docs/how_to/custom_llm.ipynb
+++ b/docs/docs/how_to/custom_llm.ipynb
@@ -402,7 +402,7 @@
    "\n",
    "Documentation:\n",
    "\n",
-    "* The model contains doc-strings for all initialization arguments, as these will be surfaced in the [APIReference](https://python.langchain.com/v0.2/api_reference/langchain/index.html).\n",
+    "* The model contains doc-strings for all initialization arguments, as these will be surfaced in the [APIReference](https://python.langchain.com/api_reference/langchain/index.html).\n",
    "* The class doc-string for the model contains a link to the model API if the model is powered by a service.\n",
    "\n",
    "Tests:\n",
--- a/docs/docs/how_to/custom_retriever.ipynb
+++ b/docs/docs/how_to/custom_retriever.ipynb
@@ -270,7 +270,7 @@
    "\n",
    "Documentation:\n",
    "\n",
-    "* The retriever contains doc-strings for all initialization arguments, as these will be surfaced in the [API Reference](https://python.langchain.com/v0.2/api_reference/langchain/index.html).\n",
+    "* The retriever contains doc-strings for all initialization arguments, as these will be surfaced in the [API Reference](https://python.langchain.com/api_reference/langchain/index.html).\n",
    "* The class doc-string for the model contains a link to any relevant APIs used for the retriever (e.g., if the retriever is retrieving from wikipedia, it'll be good to link to the wikipedia API!)\n",
    "\n",
    "Tests:\n",
--- a/docs/docs/how_to/custom_tools.ipynb
+++ b/docs/docs/how_to/custom_tools.ipynb
@@ -9,20 +9,20 @@
    "\n",
    "When constructing an agent, you will need to provide it with a list of `Tool`s that it can use. Besides the actual function that is called, the Tool consists of several components:\n",
    "\n",
-    "| Attribute       | Type                      | Description                                                                                                      |\n",
-    "|-----------------|---------------------------|------------------------------------------------------------------------------------------------------------------|\n",
-    "| name          | str                     | Must be unique within a set of tools provided to an LLM or agent.                                           |\n",
-    "| description   | str                     | Describes what the tool does. Used as context by the LLM or agent.                                       |\n",
-    "| args_schema   | Pydantic BaseModel      | Optional but recommended, can be used to provide more information (e.g., few-shot examples) or validation for expected parameters |\n",
-    "| return_direct   | boolean      | Only relevant for agents. When True, after invoking the given tool, the agent will stop and return the result direcly to the user.  |\n",
+    "| Attribute     | Type                            | Description                                                                                                                                                                    |\n",
+    "|---------------|---------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|\n",
+    "| name          | str                             | Must be unique within a set of tools provided to an LLM or agent.                                                                                                              |\n",
+    "| description   | str                             | Describes what the tool does. Used as context by the LLM or agent.                                                                                                             |\n",
+    "| args_schema   | pydantic.BaseModel | Optional but recommended, and required if using callback handlers. It can be used to provide more information (e.g., few-shot examples) or validation for expected parameters. |\n",
+    "| return_direct | boolean                         | Only relevant for agents. When True, after invoking the given tool, the agent will stop and return the result direcly to the user.                                             |\n",
    "\n",
    "LangChain supports the creation of tools from:\n",
    "\n",
    "1. Functions;\n",
    "2. LangChain [Runnables](/docs/concepts#runnable-interface);\n",
-    "3. By sub-classing from [BaseTool](https://python.langchain.com/v0.2/api_reference/core/tools/langchain_core.tools.BaseTool.html) -- This is the most flexible method, it provides the largest degree of control, at the expense of more effort and code.\n",
+    "3. By sub-classing from [BaseTool](https://python.langchain.com/api_reference/core/tools/langchain_core.tools.BaseTool.html) -- This is the most flexible method, it provides the largest degree of control, at the expense of more effort and code.\n",
    "\n",
-    "Creating tools from functions may be sufficient for most use cases, and can be done via a simple [@tool decorator](https://python.langchain.com/v0.2/api_reference/core/tools/langchain_core.tools.tool.html#langchain_core.tools.tool). If more configuration is needed-- e.g., specification of both sync and async implementations-- one can also use the [StructuredTool.from_function](https://python.langchain.com/v0.2/api_reference/core/tools/langchain_core.tools.StructuredTool.html#langchain_core.tools.StructuredTool.from_function) class method.\n",
+    "Creating tools from functions may be sufficient for most use cases, and can be done via a simple [@tool decorator](https://python.langchain.com/api_reference/core/tools/langchain_core.tools.tool.html#langchain_core.tools.tool). If more configuration is needed-- e.g., specification of both sync and async implementations-- one can also use the [StructuredTool.from_function](https://python.langchain.com/api_reference/core/tools/langchain_core.tools.StructuredTool.html#langchain_core.tools.StructuredTool.from_function) class method.\n",
    "\n",
    "In this guide we provide an overview of these methods.\n",
    "\n",
@@ -48,7 +48,14 @@
   "cell_type": "code",
   "execution_count": 1,
   "id": "cc7005cd-072f-4d37-8453-6297468e5192",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:25:52.645451Z",
+     "iopub.status.busy": "2024-09-10T20:25:52.645081Z",
+     "iopub.status.idle": "2024-09-10T20:25:53.030958Z",
+     "shell.execute_reply": "2024-09-10T20:25:53.030669Z"
+    }
+   },
   "outputs": [
    {
     "name": "stdout",
@@ -88,7 +95,14 @@
   "cell_type": "code",
   "execution_count": 2,
   "id": "0c0991db-b997-4611-be37-4346e660506b",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:25:53.032544Z",
+     "iopub.status.busy": "2024-09-10T20:25:53.032420Z",
+     "iopub.status.idle": "2024-09-10T20:25:53.035349Z",
+     "shell.execute_reply": "2024-09-10T20:25:53.035123Z"
+    }
+   },
   "outputs": [],
   "source": [
    "from langchain_core.tools import tool\n",
@@ -112,22 +126,29 @@
   "cell_type": "code",
   "execution_count": 3,
   "id": "5626423f-053e-4a66-adca-1d794d835397",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:25:53.036658Z",
+     "iopub.status.busy": "2024-09-10T20:25:53.036574Z",
+     "iopub.status.idle": "2024-09-10T20:25:53.041154Z",
+     "shell.execute_reply": "2024-09-10T20:25:53.040964Z"
+    }
+   },
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "{'title': 'multiply_by_maxSchema',\n",
-       " 'description': 'Multiply a by the maximum of b.',\n",
-       " 'type': 'object',\n",
-       " 'properties': {'a': {'title': 'A',\n",
-       "   'description': 'scale factor',\n",
+       "{'description': 'Multiply a by the maximum of b.',\n",
+       " 'properties': {'a': {'description': 'scale factor',\n",
+       "   'title': 'A',\n",
       "   'type': 'string'},\n",
-       "  'b': {'title': 'B',\n",
-       "   'description': 'list of ints over which to take maximum',\n",
-       "   'type': 'array',\n",
-       "   'items': {'type': 'integer'}}},\n",
-       " 'required': ['a', 'b']}"
+       "  'b': {'description': 'list of ints over which to take maximum',\n",
+       "   'items': {'type': 'integer'},\n",
+       "   'title': 'B',\n",
+       "   'type': 'array'}},\n",
+       " 'required': ['a', 'b'],\n",
+       " 'title': 'multiply_by_maxSchema',\n",
+       " 'type': 'object'}"
      ]
     },
     "execution_count": 3,
@@ -163,7 +184,14 @@
   "cell_type": "code",
   "execution_count": 4,
   "id": "9216d03a-f6ea-4216-b7e1-0661823a4c0b",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:25:53.042516Z",
+     "iopub.status.busy": "2024-09-10T20:25:53.042427Z",
+     "iopub.status.idle": "2024-09-10T20:25:53.045217Z",
+     "shell.execute_reply": "2024-09-10T20:25:53.045010Z"
+    }
+   },
   "outputs": [
    {
     "name": "stdout",
@@ -171,13 +199,13 @@
     "text": [
      "multiplication-tool\n",
      "Multiply two numbers.\n",
-      "{'a': {'title': 'A', 'description': 'first number', 'type': 'integer'}, 'b': {'title': 'B', 'description': 'second number', 'type': 'integer'}}\n",
+      "{'a': {'description': 'first number', 'title': 'A', 'type': 'integer'}, 'b': {'description': 'second number', 'title': 'B', 'type': 'integer'}}\n",
      "True\n"
     ]
    }
   ],
   "source": [
-    "from langchain.pydantic_v1 import BaseModel, Field\n",
+    "from pydantic import BaseModel, Field\n",
    "\n",
    "\n",
    "class CalculatorInput(BaseModel):\n",
@@ -218,19 +246,26 @@
   "cell_type": "code",
   "execution_count": 5,
   "id": "336f5538-956e-47d5-9bde-b732559f9e61",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:25:53.046526Z",
+     "iopub.status.busy": "2024-09-10T20:25:53.046456Z",
+     "iopub.status.idle": "2024-09-10T20:25:53.050045Z",
+     "shell.execute_reply": "2024-09-10T20:25:53.049836Z"
+    }
+   },
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "{'title': 'fooSchema',\n",
-       " 'description': 'The foo.',\n",
-       " 'type': 'object',\n",
-       " 'properties': {'bar': {'title': 'Bar',\n",
-       "   'description': 'The bar.',\n",
+       "{'description': 'The foo.',\n",
+       " 'properties': {'bar': {'description': 'The bar.',\n",
+       "   'title': 'Bar',\n",
       "   'type': 'string'},\n",
-       "  'baz': {'title': 'Baz', 'description': 'The baz.', 'type': 'integer'}},\n",
-       " 'required': ['bar', 'baz']}"
+       "  'baz': {'description': 'The baz.', 'title': 'Baz', 'type': 'integer'}},\n",
+       " 'required': ['bar', 'baz'],\n",
+       " 'title': 'fooSchema',\n",
+       " 'type': 'object'}"
      ]
     },
     "execution_count": 5,
@@ -259,7 +294,7 @@
   "metadata": {},
   "source": [
    ":::{.callout-caution}\n",
-    "By default, `@tool(parse_docstring=True)` will raise `ValueError` if the docstring does not parse correctly. See [API Reference](https://python.langchain.com/v0.2/api_reference/core/tools/langchain_core.tools.tool.html) for detail and examples.\n",
+    "By default, `@tool(parse_docstring=True)` will raise `ValueError` if the docstring does not parse correctly. See [API Reference](https://python.langchain.com/api_reference/core/tools/langchain_core.tools.tool.html) for detail and examples.\n",
    ":::"
   ]
  },
@@ -277,7 +312,14 @@
   "cell_type": "code",
   "execution_count": 6,
   "id": "564fbe6f-11df-402d-b135-ef6ff25e1e63",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:25:53.051302Z",
+     "iopub.status.busy": "2024-09-10T20:25:53.051218Z",
+     "iopub.status.idle": "2024-09-10T20:25:53.059704Z",
+     "shell.execute_reply": "2024-09-10T20:25:53.059490Z"
+    }
+   },
   "outputs": [
    {
     "name": "stdout",
@@ -320,7 +362,14 @@
   "cell_type": "code",
   "execution_count": 7,
   "id": "6bc055d4-1fbe-4db5-8881-9c382eba6b1b",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:25:53.060971Z",
+     "iopub.status.busy": "2024-09-10T20:25:53.060883Z",
+     "iopub.status.idle": "2024-09-10T20:25:53.064615Z",
+     "shell.execute_reply": "2024-09-10T20:25:53.064408Z"
+    }
+   },
   "outputs": [
    {
     "name": "stdout",
@@ -329,7 +378,7 @@
      "6\n",
      "Calculator\n",
      "multiply numbers\n",
-      "{'a': {'title': 'A', 'description': 'first number', 'type': 'integer'}, 'b': {'title': 'B', 'description': 'second number', 'type': 'integer'}}\n"
+      "{'a': {'description': 'first number', 'title': 'A', 'type': 'integer'}, 'b': {'description': 'second number', 'title': 'B', 'type': 'integer'}}\n"
     ]
    }
   ],
@@ -366,24 +415,39 @@
   "source": [
    "## Creating tools from Runnables\n",
    "\n",
-    "LangChain [Runnables](/docs/concepts#runnable-interface) that accept string or `dict` input can be converted to tools using the [as_tool](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.as_tool) method, which allows for the specification of names, descriptions, and additional schema information for arguments.\n",
+    "LangChain [Runnables](/docs/concepts#runnable-interface) that accept string or `dict` input can be converted to tools using the [as_tool](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.as_tool) method, which allows for the specification of names, descriptions, and additional schema information for arguments.\n",
    "\n",
    "Example usage:"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 8,
   "id": "8ef593c5-cf72-4c10-bfc9-7d21874a0c24",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:25:53.065797Z",
+     "iopub.status.busy": "2024-09-10T20:25:53.065733Z",
+     "iopub.status.idle": "2024-09-10T20:25:53.130458Z",
+     "shell.execute_reply": "2024-09-10T20:25:53.130229Z"
+    }
+   },
   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/var/folders/4j/2rz3865x6qg07tx43146py8h0000gn/T/ipykernel_95770/2548361071.py:14: LangChainBetaWarning: This API is in beta and may change in the future.\n",
+      "  as_tool = chain.as_tool(\n"
+     ]
+    },
    {
     "data": {
      "text/plain": [
       "{'answer_style': {'title': 'Answer Style', 'type': 'string'}}"
      ]
     },
-     "execution_count": 9,
+     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -428,19 +492,26 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 9,
   "id": "1dad8f8e",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:25:53.131904Z",
+     "iopub.status.busy": "2024-09-10T20:25:53.131803Z",
+     "iopub.status.idle": "2024-09-10T20:25:53.136797Z",
+     "shell.execute_reply": "2024-09-10T20:25:53.136563Z"
+    }
+   },
   "outputs": [],
   "source": [
    "from typing import Optional, Type\n",
    "\n",
-    "from langchain.pydantic_v1 import BaseModel\n",
    "from langchain_core.callbacks import (\n",
    "    AsyncCallbackManagerForToolRun,\n",
    "    CallbackManagerForToolRun,\n",
    ")\n",
    "from langchain_core.tools import BaseTool\n",
+    "from pydantic import BaseModel\n",
    "\n",
    "\n",
    "class CalculatorInput(BaseModel):\n",
@@ -448,9 +519,11 @@
    "    b: int = Field(description=\"second number\")\n",
    "\n",
    "\n",
+    "# Note: It's important that every field has type hints. BaseTool is a\n",
+    "# Pydantic class and not having type hints can lead to unexpected behavior.\n",
    "class CustomCalculatorTool(BaseTool):\n",
-    "    name = \"Calculator\"\n",
-    "    description = \"useful for when you need to answer questions about math\"\n",
+    "    name: str = \"Calculator\"\n",
+    "    description: str = \"useful for when you need to answer questions about math\"\n",
    "    args_schema: Type[BaseModel] = CalculatorInput\n",
    "    return_direct: bool = True\n",
    "\n",
@@ -477,9 +550,16 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 10,
   "id": "bb551c33",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:25:53.138074Z",
+     "iopub.status.busy": "2024-09-10T20:25:53.138007Z",
+     "iopub.status.idle": "2024-09-10T20:25:53.141360Z",
+     "shell.execute_reply": "2024-09-10T20:25:53.141158Z"
+    }
+   },
   "outputs": [
    {
     "name": "stdout",
@@ -487,7 +567,7 @@
     "text": [
      "Calculator\n",
      "useful for when you need to answer questions about math\n",
-      "{'a': {'title': 'A', 'description': 'first number', 'type': 'integer'}, 'b': {'title': 'B', 'description': 'second number', 'type': 'integer'}}\n",
+      "{'a': {'description': 'first number', 'title': 'A', 'type': 'integer'}, 'b': {'description': 'second number', 'title': 'B', 'type': 'integer'}}\n",
      "True\n",
      "6\n",
      "6\n"
@@ -512,7 +592,7 @@
   "source": [
    "## How to create async tools\n",
    "\n",
-    "LangChain Tools implement the [Runnable interface 🏃](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html).\n",
+    "LangChain Tools implement the [Runnable interface 🏃](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html).\n",
    "\n",
    "All Runnables expose the `invoke` and `ainvoke` methods (as well as other methods like `batch`, `abatch`, `astream` etc).\n",
    "\n",
@@ -528,9 +608,16 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 11,
   "id": "6615cb77-fd4c-4676-8965-f92cc71d4944",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:25:53.142587Z",
+     "iopub.status.busy": "2024-09-10T20:25:53.142504Z",
+     "iopub.status.idle": "2024-09-10T20:25:53.147205Z",
+     "shell.execute_reply": "2024-09-10T20:25:53.146995Z"
+    }
+   },
   "outputs": [
    {
     "name": "stdout",
@@ -560,9 +647,16 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 12,
   "id": "bb2af583-eadd-41f4-a645-bf8748bd3dcd",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:25:53.148383Z",
+     "iopub.status.busy": "2024-09-10T20:25:53.148307Z",
+     "iopub.status.idle": "2024-09-10T20:25:53.152684Z",
+     "shell.execute_reply": "2024-09-10T20:25:53.152486Z"
+    }
+   },
   "outputs": [
    {
     "name": "stdout",
@@ -605,9 +699,16 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 13,
   "id": "4ad0932c-8610-4278-8c57-f9218f654c8a",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:25:53.153849Z",
+     "iopub.status.busy": "2024-09-10T20:25:53.153773Z",
+     "iopub.status.idle": "2024-09-10T20:25:53.158312Z",
+     "shell.execute_reply": "2024-09-10T20:25:53.158130Z"
+    }
+   },
   "outputs": [
    {
     "name": "stdout",
@@ -650,9 +751,16 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 14,
   "id": "7094c0e8-6192-4870-a942-aad5b5ae48fd",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:25:53.159440Z",
+     "iopub.status.busy": "2024-09-10T20:25:53.159364Z",
+     "iopub.status.idle": "2024-09-10T20:25:53.160922Z",
+     "shell.execute_reply": "2024-09-10T20:25:53.160712Z"
+    }
+   },
   "outputs": [],
   "source": [
    "from langchain_core.tools import ToolException\n",
@@ -673,9 +781,16 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 15,
   "id": "b4d22022-b105-4ccc-a15b-412cb9ea3097",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:25:53.162046Z",
+     "iopub.status.busy": "2024-09-10T20:25:53.161968Z",
+     "iopub.status.idle": "2024-09-10T20:25:53.165236Z",
+     "shell.execute_reply": "2024-09-10T20:25:53.165052Z"
+    }
+   },
   "outputs": [
    {
     "data": {
@@ -683,7 +798,7 @@
       "'Error: There is no city by the name of foobar.'"
      ]
     },
-     "execution_count": 16,
+     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -707,9 +822,16 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 16,
   "id": "3fad1728-d367-4e1b-9b54-3172981271cf",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:25:53.166372Z",
+     "iopub.status.busy": "2024-09-10T20:25:53.166294Z",
+     "iopub.status.idle": "2024-09-10T20:25:53.169739Z",
+     "shell.execute_reply": "2024-09-10T20:25:53.169553Z"
+    }
+   },
   "outputs": [
    {
     "data": {
@@ -717,7 +839,7 @@
       "\"There is no such city, but it's probably above 0K there!\""
      ]
     },
-     "execution_count": 17,
+     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -741,9 +863,16 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 17,
   "id": "ebfe7c1f-318d-4e58-99e1-f31e69473c46",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:25:53.170937Z",
+     "iopub.status.busy": "2024-09-10T20:25:53.170859Z",
+     "iopub.status.idle": "2024-09-10T20:25:53.174498Z",
+     "shell.execute_reply": "2024-09-10T20:25:53.174304Z"
+    }
+   },
   "outputs": [
    {
     "data": {
@@ -751,7 +880,7 @@
       "'The following errors occurred during tool execution: `Error: There is no city by the name of foobar.`'"
      ]
     },
-     "execution_count": 18,
+     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -778,7 +907,7 @@
    "\n",
    "Sometimes there are artifacts of a tool's execution that we want to make accessible to downstream components in our chain or agent, but that we don't want to expose to the model itself. For example if a tool returns custom objects like Documents, we may want to pass some view or metadata about this output to the model without passing the raw output to the model. At the same time, we may want to be able to access this full output elsewhere, for example in downstream tools.\n",
    "\n",
-    "The Tool and [ToolMessage](https://python.langchain.com/v0.2/api_reference/core/messages/langchain_core.messages.tool.ToolMessage.html) interfaces make it possible to distinguish between the parts of the tool output meant for the model (this is the ToolMessage.content) and those parts which are meant for use outside the model (ToolMessage.artifact).\n",
+    "The Tool and [ToolMessage](https://python.langchain.com/api_reference/core/messages/langchain_core.messages.tool.ToolMessage.html) interfaces make it possible to distinguish between the parts of the tool output meant for the model (this is the ToolMessage.content) and those parts which are meant for use outside the model (ToolMessage.artifact).\n",
    "\n",
    ":::info Requires ``langchain-core >= 0.2.19``\n",
    "\n",
@@ -791,9 +920,16 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 18,
   "id": "14905425-0334-43a0-9de9-5bcf622ede0e",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:25:53.175683Z",
+     "iopub.status.busy": "2024-09-10T20:25:53.175605Z",
+     "iopub.status.idle": "2024-09-10T20:25:53.178798Z",
+     "shell.execute_reply": "2024-09-10T20:25:53.178601Z"
+    }
+   },
   "outputs": [],
   "source": [
    "import random\n",
@@ -820,9 +956,16 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 19,
   "id": "0f2e1528-404b-46e6-b87c-f0957c4b9217",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:25:53.179881Z",
+     "iopub.status.busy": "2024-09-10T20:25:53.179807Z",
+     "iopub.status.idle": "2024-09-10T20:25:53.182100Z",
+     "shell.execute_reply": "2024-09-10T20:25:53.181940Z"
+    }
+   },
   "outputs": [
    {
     "data": {
@@ -830,7 +973,7 @@
       "'Successfully generated array of 10 random ints in [0, 9].'"
      ]
     },
-     "execution_count": 9,
+     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -849,17 +992,24 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 20,
   "id": "cc197777-26eb-46b3-a83b-c2ce116c6311",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:25:53.183238Z",
+     "iopub.status.busy": "2024-09-10T20:25:53.183170Z",
+     "iopub.status.idle": "2024-09-10T20:25:53.185752Z",
+     "shell.execute_reply": "2024-09-10T20:25:53.185567Z"
+    }
+   },
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "ToolMessage(content='Successfully generated array of 10 random ints in [0, 9].', name='generate_random_ints', tool_call_id='123', artifact=[1, 4, 2, 5, 3, 9, 0, 4, 7, 7])"
+       "ToolMessage(content='Successfully generated array of 10 random ints in [0, 9].', name='generate_random_ints', tool_call_id='123', artifact=[4, 8, 2, 4, 1, 0, 9, 5, 8, 1])"
      ]
     },
-     "execution_count": 3,
+     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -885,9 +1035,16 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 21,
   "id": "fe1a09d1-378b-4b91-bb5e-0697c3d7eb92",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:25:53.186884Z",
+     "iopub.status.busy": "2024-09-10T20:25:53.186803Z",
+     "iopub.status.idle": "2024-09-10T20:25:53.190718Z",
+     "shell.execute_reply": "2024-09-10T20:25:53.190494Z"
+    }
+   },
   "outputs": [],
   "source": [
    "from langchain_core.tools import BaseTool\n",
@@ -917,17 +1074,24 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 22,
   "id": "8c3d16f6-1c4a-48ab-b05a-38547c592e79",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:25:53.191872Z",
+     "iopub.status.busy": "2024-09-10T20:25:53.191794Z",
+     "iopub.status.idle": "2024-09-10T20:25:53.194396Z",
+     "shell.execute_reply": "2024-09-10T20:25:53.194184Z"
+    }
+   },
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "ToolMessage(content='Generated 3 floats in [0.1, 3.3333], rounded to 4 decimals.', name='generate_random_floats', tool_call_id='123', artifact=[1.4277, 0.7578, 2.4871])"
+       "ToolMessage(content='Generated 3 floats in [0.1, 3.3333], rounded to 4 decimals.', name='generate_random_floats', tool_call_id='123', artifact=[1.5566, 0.5134, 2.7914])"
      ]
     },
-     "execution_count": 8,
+     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
--- a/docs/docs/how_to/document_loader_csv.ipynb
+++ b/docs/docs/how_to/document_loader_csv.ipynb
@@ -9,7 +9,7 @@
    "\n",
    "A [comma-separated values (CSV)](https://en.wikipedia.org/wiki/Comma-separated_values) file is a delimited text file that uses a comma to separate values. Each line of the file is a data record. Each record consists of one or more fields, separated by commas.\n",
    "\n",
-    "LangChain implements a [CSV Loader](https://python.langchain.com/v0.2/api_reference/community/document_loaders/langchain_community.document_loaders.csv_loader.CSVLoader.html) that will load CSV files into a sequence of [Document](https://python.langchain.com/v0.2/api_reference/core/documents/langchain_core.documents.base.Document.html#langchain_core.documents.base.Document) objects. Each row of the CSV file is translated to one document."
+    "LangChain implements a [CSV Loader](https://python.langchain.com/api_reference/community/document_loaders/langchain_community.document_loaders.csv_loader.CSVLoader.html) that will load CSV files into a sequence of [Document](https://python.langchain.com/api_reference/core/documents/langchain_core.documents.base.Document.html#langchain_core.documents.base.Document) objects. Each row of the CSV file is translated to one document."
   ]
  },
  {
@@ -88,7 +88,7 @@
   "source": [
    "## Specify a column to identify the document source\n",
    "\n",
-    "The `\"source\"` key on [Document](https://python.langchain.com/v0.2/api_reference/core/documents/langchain_core.documents.base.Document.html#langchain_core.documents.base.Document) metadata can be set using a column of the CSV. Use the `source_column` argument to specify a source for the document created from each row. Otherwise `file_path` will be used as the source for all documents created from the CSV file.\n",
+    "The `\"source\"` key on [Document](https://python.langchain.com/api_reference/core/documents/langchain_core.documents.base.Document.html#langchain_core.documents.base.Document) metadata can be set using a column of the CSV. Use the `source_column` argument to specify a source for the document created from each row. Otherwise `file_path` will be used as the source for all documents created from the CSV file.\n",
    "\n",
    "This is useful when using documents loaded from CSV files for chains that answer questions using sources."
   ]
--- a/docs/docs/how_to/document_loader_directory.ipynb
+++ b/docs/docs/how_to/document_loader_directory.ipynb
@@ -7,7 +7,7 @@
   "source": [
    "# How to load documents from a directory\n",
    "\n",
-    "LangChain's [DirectoryLoader](https://python.langchain.com/v0.2/api_reference/community/document_loaders/langchain_community.document_loaders.directory.DirectoryLoader.html) implements functionality for reading files from disk into LangChain [Document](https://python.langchain.com/v0.2/api_reference/core/documents/langchain_core.documents.base.Document.html#langchain_core.documents.base.Document) objects. Here we demonstrate:\n",
+    "LangChain's [DirectoryLoader](https://python.langchain.com/api_reference/community/document_loaders/langchain_community.document_loaders.directory.DirectoryLoader.html) implements functionality for reading files from disk into LangChain [Document](https://python.langchain.com/api_reference/core/documents/langchain_core.documents.base.Document.html#langchain_core.documents.base.Document) objects. Here we demonstrate:\n",
    "\n",
    "- How to load from a filesystem, including use of wildcard patterns;\n",
    "- How to use multithreading for file I/O;\n",
@@ -134,7 +134,7 @@
   "metadata": {},
   "source": [
    "## Change loader class\n",
-    "By default this uses the `UnstructuredLoader` class. To customize the loader, specify the loader class in the `loader_cls` kwarg. Below we show an example using [TextLoader](https://python.langchain.com/v0.2/api_reference/community/document_loaders/langchain_community.document_loaders.text.TextLoader.html):"
+    "By default this uses the `UnstructuredLoader` class. To customize the loader, specify the loader class in the `loader_cls` kwarg. Below we show an example using [TextLoader](https://python.langchain.com/api_reference/community/document_loaders/langchain_community.document_loaders.text.TextLoader.html):"
   ]
  },
  {
--- a/docs/docs/how_to/document_loader_html.ipynb
+++ b/docs/docs/how_to/document_loader_html.ipynb
@@ -9,7 +9,7 @@
    "\n",
    "The HyperText Markup Language or [HTML](https://en.wikipedia.org/wiki/HTML) is the standard markup language for documents designed to be displayed in a web browser.\n",
    "\n",
-    "This covers how to load `HTML` documents into a LangChain [Document](https://python.langchain.com/v0.2/api_reference/core/documents/langchain_core.documents.base.Document.html#langchain_core.documents.base.Document) objects that we can use downstream.\n",
+    "This covers how to load `HTML` documents into a LangChain [Document](https://python.langchain.com/api_reference/core/documents/langchain_core.documents.base.Document.html#langchain_core.documents.base.Document) objects that we can use downstream.\n",
    "\n",
    "Parsing HTML files often requires specialized tools. Here we demonstrate parsing via [Unstructured](https://unstructured-io.github.io/unstructured/) and [BeautifulSoup4](https://beautiful-soup-4.readthedocs.io/en/latest/), which can be installed via pip. Head over to the integrations page to find integrations with additional services, such as [Azure AI Document Intelligence](/docs/integrations/document_loaders/azure_document_intelligence) or [FireCrawl](/docs/integrations/document_loaders/firecrawl).\n",
    "\n",
--- a/docs/docs/how_to/document_loader_json.mdx
+++ b/docs/docs/how_to/document_loader_json.mdx
@@ -4,8 +4,8 @@

 [JSON Lines](https://jsonlines.org/) is a file format where each line is a valid JSON value.

-LangChain implements a [JSONLoader](https://python.langchain.com/v0.2/api_reference/community/document_loaders/langchain_community.document_loaders.json_loader.JSONLoader.html) 
-to convert JSON and JSONL data into LangChain [Document](https://python.langchain.com/v0.2/api_reference/core/documents/langchain_core.documents.base.Document.html#langchain_core.documents.base.Document) 
+LangChain implements a [JSONLoader](https://python.langchain.com/api_reference/community/document_loaders/langchain_community.document_loaders.json_loader.JSONLoader.html) 
+to convert JSON and JSONL data into LangChain [Document](https://python.langchain.com/api_reference/core/documents/langchain_core.documents.base.Document.html#langchain_core.documents.base.Document) 
 objects. It uses a specified [jq schema](https://en.wikipedia.org/wiki/Jq_(programming_language)) to parse the JSON files, allowing for the extraction of specific fields into the content 
 and metadata of the LangChain Document.

--- a/docs/docs/how_to/document_loader_markdown.ipynb
+++ b/docs/docs/how_to/document_loader_markdown.ipynb
@@ -9,14 +9,14 @@
    "\n",
    "[Markdown](https://en.wikipedia.org/wiki/Markdown) is a lightweight markup language for creating formatted text using a plain-text editor.\n",
    "\n",
-    "Here we cover how to load `Markdown` documents into LangChain [Document](https://python.langchain.com/v0.2/api_reference/core/documents/langchain_core.documents.base.Document.html#langchain_core.documents.base.Document) objects that we can use downstream.\n",
+    "Here we cover how to load `Markdown` documents into LangChain [Document](https://python.langchain.com/api_reference/core/documents/langchain_core.documents.base.Document.html#langchain_core.documents.base.Document) objects that we can use downstream.\n",
    "\n",
    "We will cover:\n",
    "\n",
    "- Basic usage;\n",
    "- Parsing of Markdown into elements such as titles, list items, and text.\n",
    "\n",
-    "LangChain implements an [UnstructuredMarkdownLoader](https://python.langchain.com/v0.2/api_reference/community/document_loaders/langchain_community.document_loaders.markdown.UnstructuredMarkdownLoader.html) object which requires the [Unstructured](https://unstructured-io.github.io/unstructured/) package. First we install it:"
+    "LangChain implements an [UnstructuredMarkdownLoader](https://python.langchain.com/api_reference/community/document_loaders/langchain_community.document_loaders.markdown.UnstructuredMarkdownLoader.html) object which requires the [Unstructured](https://unstructured-io.github.io/unstructured/) package. First we install it:"
   ]
  },
  {
--- a/docs/docs/how_to/document_loader_office_file.mdx
+++ b/docs/docs/how_to/document_loader_office_file.mdx
@@ -3,7 +3,7 @@
 The [Microsoft Office](https://www.office.com/) suite of productivity software includes Microsoft Word, Microsoft Excel, Microsoft PowerPoint, Microsoft Outlook, and Microsoft OneNote. It is available for Microsoft Windows and macOS operating systems. It is also available on Android and iOS.

 This covers how to load commonly used file formats including `DOCX`, `XLSX` and `PPTX` documents into a LangChain 
-[Document](https://python.langchain.com/v0.2/api_reference/core/documents/langchain_core.documents.base.Document.html#langchain_core.documents.base.Document)
+[Document](https://python.langchain.com/api_reference/core/documents/langchain_core.documents.base.Document.html#langchain_core.documents.base.Document)
 object that we can use downstream.


--- a/docs/docs/how_to/document_loader_pdf.ipynb
+++ b/docs/docs/how_to/document_loader_pdf.ipynb
--- a/docs/docs/how_to/embed_text.mdx
+++ b/docs/docs/how_to/embed_text.mdx
@@ -8,7 +8,7 @@ The Embeddings class is a class designed for interfacing with text embedding mod

 Embeddings create a vector representation of a piece of text. This is useful because it means we can think about text in the vector space, and do things like semantic search where we look for pieces of text that are most similar in the vector space.

-The base Embeddings class in LangChain provides two methods: one for embedding documents and one for embedding a query. The former, `.embed_documents`, takes as input multiple texts, while the latter, `.embed_query`, takes a single text. The reason for having these as two separate methods is that some embedding providers have different embedding methods for documents (to be searched over) vs queries (the search query itself). 
+The base Embeddings class in LangChain provides two methods: one for embedding documents and one for embedding a query. The former, `.embed_documents`, takes as input multiple texts, while the latter, `.embed_query`, takes a single text. The reason for having these as two separate methods is that some embedding providers have different embedding methods for documents (to be searched over) vs queries (the search query itself).
 `.embed_query` will return a list of floats, whereas `.embed_documents` returns a list of lists of floats.

 ## Get started
@@ -94,15 +94,6 @@ from langchain_huggingface import HuggingFaceEmbeddings

 embeddings_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
 ```
-
-You can also leave the `model_name` blank to use the default [sentence-transformers/all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2) model.
-
-```python
-from langchain_huggingface import HuggingFaceEmbeddings
-
-embeddings_model = HuggingFaceEmbeddings()
-```
-
  </TabItem>
 </Tabs>

--- a/docs/docs/how_to/ensemble_retriever.ipynb
+++ b/docs/docs/how_to/ensemble_retriever.ipynb
@@ -6,7 +6,7 @@
   "source": [
    "# How to combine results from multiple retrievers\n",
    "\n",
-    "The [EnsembleRetriever](https://python.langchain.com/v0.2/api_reference/langchain/retrievers/langchain.retrievers.ensemble.EnsembleRetriever.html) supports ensembling of results from multiple retrievers. It is initialized with a list of [BaseRetriever](https://python.langchain.com/v0.2/api_reference/core/retrievers/langchain_core.retrievers.BaseRetriever.html) objects. EnsembleRetrievers rerank the results of the constituent retrievers based on the [Reciprocal Rank Fusion](https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf) algorithm.\n",
+    "The [EnsembleRetriever](https://python.langchain.com/api_reference/langchain/retrievers/langchain.retrievers.ensemble.EnsembleRetriever.html) supports ensembling of results from multiple retrievers. It is initialized with a list of [BaseRetriever](https://python.langchain.com/api_reference/core/retrievers/langchain_core.retrievers.BaseRetriever.html) objects. EnsembleRetrievers rerank the results of the constituent retrievers based on the [Reciprocal Rank Fusion](https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf) algorithm.\n",
    "\n",
    "By leveraging the strengths of different algorithms, the `EnsembleRetriever` can achieve better performance than any single algorithm. \n",
    "\n",
@@ -14,7 +14,7 @@
    "\n",
    "## Basic usage\n",
    "\n",
-    "Below we demonstrate ensembling of a [BM25Retriever](https://python.langchain.com/v0.2/api_reference/community/retrievers/langchain_community.retrievers.bm25.BM25Retriever.html) with a retriever derived from the [FAISS vector store](https://python.langchain.com/v0.2/api_reference/community/vectorstores/langchain_community.vectorstores.faiss.FAISS.html)."
+    "Below we demonstrate ensembling of a [BM25Retriever](https://python.langchain.com/api_reference/community/retrievers/langchain_community.retrievers.bm25.BM25Retriever.html) with a retriever derived from the [FAISS vector store](https://python.langchain.com/api_reference/community/vectorstores/langchain_community.vectorstores.faiss.FAISS.html)."
   ]
  },
  {
--- a/docs/docs/how_to/extraction_examples.ipynb
+++ b/docs/docs/how_to/extraction_examples.ipynb
@@ -16,11 +16,11 @@
    "also with JSON more or prompt based techniques.\n",
    ":::\n",
    "\n",
-    "LangChain implements a [tool-call attribute](https://python.langchain.com/v0.2/api_reference/core/messages/langchain_core.messages.ai.AIMessage.html#langchain_core.messages.ai.AIMessage.tool_calls) on messages from LLMs that include tool calls. See our [how-to guide on tool calling](/docs/how_to/tool_calling) for more detail. To build reference examples for data extraction, we build a chat history containing a sequence of: \n",
+    "LangChain implements a [tool-call attribute](https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.AIMessage.html#langchain_core.messages.ai.AIMessage.tool_calls) on messages from LLMs that include tool calls. See our [how-to guide on tool calling](/docs/how_to/tool_calling) for more detail. To build reference examples for data extraction, we build a chat history containing a sequence of: \n",
    "\n",
-    "- [HumanMessage](https://python.langchain.com/v0.2/api_reference/core/messages/langchain_core.messages.human.HumanMessage.html) containing example inputs;\n",
-    "- [AIMessage](https://python.langchain.com/v0.2/api_reference/core/messages/langchain_core.messages.ai.AIMessage.html) containing example tool calls;\n",
-    "- [ToolMessage](https://python.langchain.com/v0.2/api_reference/core/messages/langchain_core.messages.tool.ToolMessage.html) containing example tool outputs.\n",
+    "- [HumanMessage](https://python.langchain.com/api_reference/core/messages/langchain_core.messages.human.HumanMessage.html) containing example inputs;\n",
+    "- [AIMessage](https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.AIMessage.html) containing example tool calls;\n",
+    "- [ToolMessage](https://python.langchain.com/api_reference/core/messages/langchain_core.messages.tool.ToolMessage.html) containing example tool outputs.\n",
    "\n",
    "LangChain adopts this convention for structuring tool calls into conversation across LLM model providers.\n",
    "\n",
@@ -29,9 +29,16 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
   "id": "89579144-bcb3-490a-8036-86a0a6bcd56b",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:26:41.780410Z",
+     "iopub.status.busy": "2024-09-10T20:26:41.780102Z",
+     "iopub.status.idle": "2024-09-10T20:26:42.147112Z",
+     "shell.execute_reply": "2024-09-10T20:26:42.146838Z"
+    }
+   },
   "outputs": [],
   "source": [
    "from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder\n",
@@ -67,17 +74,24 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
   "id": "610c3025-ea63-4cd7-88bd-c8cbcb4d8a3f",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:26:42.148746Z",
+     "iopub.status.busy": "2024-09-10T20:26:42.148621Z",
+     "iopub.status.idle": "2024-09-10T20:26:42.162044Z",
+     "shell.execute_reply": "2024-09-10T20:26:42.161794Z"
+    }
+   },
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "ChatPromptValue(messages=[SystemMessage(content=\"You are an expert extraction algorithm. Only extract relevant information from the text. If you do not know the value of an attribute asked to extract, return null for the attribute's value.\"), HumanMessage(content='testing 1 2 3'), HumanMessage(content='this is some text')])"
+       "ChatPromptValue(messages=[SystemMessage(content=\"You are an expert extraction algorithm. Only extract relevant information from the text. If you do not know the value of an attribute asked to extract, return null for the attribute's value.\", additional_kwargs={}, response_metadata={}), HumanMessage(content='testing 1 2 3', additional_kwargs={}, response_metadata={}), HumanMessage(content='this is some text', additional_kwargs={}, response_metadata={})])"
      ]
     },
-     "execution_count": 3,
+     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -104,15 +118,22 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
   "id": "d875a49a-d2cb-4b9e-b5bf-41073bc3905c",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:26:42.163477Z",
+     "iopub.status.busy": "2024-09-10T20:26:42.163391Z",
+     "iopub.status.idle": "2024-09-10T20:26:42.324449Z",
+     "shell.execute_reply": "2024-09-10T20:26:42.324206Z"
+    }
+   },
   "outputs": [],
   "source": [
    "from typing import List, Optional\n",
    "\n",
-    "from langchain_core.pydantic_v1 import BaseModel, Field\n",
    "from langchain_openai import ChatOpenAI\n",
+    "from pydantic import BaseModel, Field\n",
    "\n",
    "\n",
    "class Person(BaseModel):\n",
@@ -162,9 +183,16 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 4,
   "id": "08356810-77ce-4e68-99d9-faa0326f2cee",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:26:42.326100Z",
+     "iopub.status.busy": "2024-09-10T20:26:42.326016Z",
+     "iopub.status.idle": "2024-09-10T20:26:42.329260Z",
+     "shell.execute_reply": "2024-09-10T20:26:42.329014Z"
+    }
+   },
   "outputs": [],
   "source": [
    "import uuid\n",
@@ -177,7 +205,7 @@
    "    SystemMessage,\n",
    "    ToolMessage,\n",
    ")\n",
-    "from langchain_core.pydantic_v1 import BaseModel, Field\n",
+    "from pydantic import BaseModel, Field\n",
    "\n",
    "\n",
    "class Example(TypedDict):\n",
@@ -238,9 +266,16 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 5,
   "id": "7f59a745-5c81-4011-a4c5-a33ec1eca7ef",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:26:42.330580Z",
+     "iopub.status.busy": "2024-09-10T20:26:42.330488Z",
+     "iopub.status.idle": "2024-09-10T20:26:42.332813Z",
+     "shell.execute_reply": "2024-09-10T20:26:42.332598Z"
+    }
+   },
   "outputs": [],
   "source": [
    "examples = [\n",
@@ -273,22 +308,29 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 6,
   "id": "976bb7b8-09c4-4a3e-80df-49a483705c08",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:26:42.333955Z",
+     "iopub.status.busy": "2024-09-10T20:26:42.333876Z",
+     "iopub.status.idle": "2024-09-10T20:26:42.336841Z",
+     "shell.execute_reply": "2024-09-10T20:26:42.336635Z"
+    }
+   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "system: content=\"You are an expert extraction algorithm. Only extract relevant information from the text. If you do not know the value of an attribute asked to extract, return null for the attribute's value.\"\n",
-      "human: content=\"The ocean is vast and blue. It's more than 20,000 feet deep. There are many fish in it.\"\n",
-      "ai: content='' tool_calls=[{'name': 'Person', 'args': {'name': None, 'hair_color': None, 'height_in_meters': None}, 'id': 'b843ba77-4c9c-48ef-92a4-54e534f24521'}]\n",
-      "tool: content='You have correctly called this tool.' tool_call_id='b843ba77-4c9c-48ef-92a4-54e534f24521'\n",
-      "human: content='Fiona traveled far from France to Spain.'\n",
-      "ai: content='' tool_calls=[{'name': 'Person', 'args': {'name': 'Fiona', 'hair_color': None, 'height_in_meters': None}, 'id': '46f00d6b-50e5-4482-9406-b07bb10340f6'}]\n",
-      "tool: content='You have correctly called this tool.' tool_call_id='46f00d6b-50e5-4482-9406-b07bb10340f6'\n",
-      "human: content='this is some text'\n"
+      "system: content=\"You are an expert extraction algorithm. Only extract relevant information from the text. If you do not know the value of an attribute asked to extract, return null for the attribute's value.\" additional_kwargs={} response_metadata={}\n",
+      "human: content=\"The ocean is vast and blue. It's more than 20,000 feet deep. There are many fish in it.\" additional_kwargs={} response_metadata={}\n",
+      "ai: content='' additional_kwargs={} response_metadata={} tool_calls=[{'name': 'Data', 'args': {'people': []}, 'id': '240159b1-1405-4107-a07c-3c6b91b3d5b7', 'type': 'tool_call'}]\n",
+      "tool: content='You have correctly called this tool.' tool_call_id='240159b1-1405-4107-a07c-3c6b91b3d5b7'\n",
+      "human: content='Fiona traveled far from France to Spain.' additional_kwargs={} response_metadata={}\n",
+      "ai: content='' additional_kwargs={} response_metadata={} tool_calls=[{'name': 'Data', 'args': {'people': [{'name': 'Fiona', 'hair_color': None, 'height_in_meters': None}]}, 'id': '3fc521e4-d1d2-4c20-bf40-e3d72f1068da', 'type': 'tool_call'}]\n",
+      "tool: content='You have correctly called this tool.' tool_call_id='3fc521e4-d1d2-4c20-bf40-e3d72f1068da'\n",
+      "human: content='this is some text' additional_kwargs={} response_metadata={}\n"
     ]
    }
   ],
@@ -320,9 +362,16 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 7,
   "id": "df2e1ee1-69e8-4c4d-b349-95f2e320317b",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:26:42.338001Z",
+     "iopub.status.busy": "2024-09-10T20:26:42.337915Z",
+     "iopub.status.idle": "2024-09-10T20:26:42.349121Z",
+     "shell.execute_reply": "2024-09-10T20:26:42.348908Z"
+    }
+   },
   "outputs": [],
   "source": [
    "# | output: false\n",
@@ -343,9 +392,16 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 8,
   "id": "dbfea43d-769b-42e9-a76f-ce722f7d6f93",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:26:42.350335Z",
+     "iopub.status.busy": "2024-09-10T20:26:42.350264Z",
+     "iopub.status.idle": "2024-09-10T20:26:42.424894Z",
+     "shell.execute_reply": "2024-09-10T20:26:42.424623Z"
+    }
+   },
   "outputs": [],
   "source": [
    "runnable = prompt | llm.with_structured_output(\n",
@@ -367,18 +423,49 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 9,
   "id": "66545cab-af2a-40a4-9dc9-b4110458b7d3",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:26:42.426258Z",
+     "iopub.status.busy": "2024-09-10T20:26:42.426187Z",
+     "iopub.status.idle": "2024-09-10T20:26:46.151633Z",
+     "shell.execute_reply": "2024-09-10T20:26:46.150690Z"
+    }
+   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "people=[Person(name='earth', hair_color='null', height_in_meters='null')]\n",
-      "people=[Person(name='earth', hair_color='null', height_in_meters='null')]\n",
-      "people=[]\n",
-      "people=[Person(name='earth', hair_color='null', height_in_meters='null')]\n",
+      "people=[]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "people=[]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "people=[]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "people=[]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
      "people=[]\n"
     ]
    }
@@ -401,18 +488,49 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 10,
   "id": "1c09d805-ec16-4123-aef9-6a5b59499b5c",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:26:46.155346Z",
+     "iopub.status.busy": "2024-09-10T20:26:46.155110Z",
+     "iopub.status.idle": "2024-09-10T20:26:51.810359Z",
+     "shell.execute_reply": "2024-09-10T20:26:51.809636Z"
+    }
+   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "people=[]\n",
-      "people=[]\n",
-      "people=[]\n",
-      "people=[]\n",
+      "people=[]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "people=[]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "people=[]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "people=[]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
      "people=[]\n"
     ]
    }
@@ -435,9 +553,16 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 11,
   "id": "a9b7a762-1b75-4f9f-b9d9-6732dd05802c",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:26:51.813309Z",
+     "iopub.status.busy": "2024-09-10T20:26:51.813150Z",
+     "iopub.status.idle": "2024-09-10T20:26:53.474153Z",
+     "shell.execute_reply": "2024-09-10T20:26:53.473522Z"
+    }
+   },
   "outputs": [
    {
     "data": {
@@ -445,7 +570,7 @@
       "Data(people=[Person(name='Harrison', hair_color='black', height_in_meters=None)])"
      ]
     },
-     "execution_count": 12,
+     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -476,7 +601,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.4"
+   "version": "3.11.9"
  }
 },
 "nbformat": 4,
--- a/docs/docs/how_to/extraction_long_text.ipynb
+++ b/docs/docs/how_to/extraction_long_text.ipynb
@@ -23,16 +23,56 @@
   "id": "57969139-ad0a-487e-97d8-cb30e2af9742",
   "metadata": {},
   "source": [
-    "## Set up\n",
+    "## Setup\n",
    "\n",
-    "We need some example data! Let's download an article about [cars from wikipedia](https://en.wikipedia.org/wiki/Car) and load it as a LangChain [Document](https://python.langchain.com/v0.2/api_reference/core/documents/langchain_core.documents.base.Document.html)."
+    "First we'll install the dependencies needed for this guide:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
-   "id": "84460db2-36e1-4037-bfa6-2a11883c2ba5",
+   "id": "a3b4d838-5be4-4207-8a4a-9ef5624c48f2",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:35:19.850767Z",
+     "iopub.status.busy": "2024-09-10T20:35:19.850427Z",
+     "iopub.status.idle": "2024-09-10T20:35:21.432233Z",
+     "shell.execute_reply": "2024-09-10T20:35:21.431606Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Note: you may need to restart the kernel to use updated packages.\n"
+     ]
+    }
+   ],
+   "source": [
+    "%pip install -qU langchain-community lxml faiss-cpu langchain-openai"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ac000b03-33fc-414f-8f2c-3850df621a35",
   "metadata": {},
+   "source": [
+    "Now we need some example data! Let's download an article about [cars from wikipedia](https://en.wikipedia.org/wiki/Car) and load it as a LangChain [Document](https://python.langchain.com/api_reference/core/documents/langchain_core.documents.base.Document.html)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "84460db2-36e1-4037-bfa6-2a11883c2ba5",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:35:21.434882Z",
+     "iopub.status.busy": "2024-09-10T20:35:21.434571Z",
+     "iopub.status.idle": "2024-09-10T20:35:22.214545Z",
+     "shell.execute_reply": "2024-09-10T20:35:22.214253Z"
+    }
+   },
   "outputs": [],
   "source": [
    "import re\n",
@@ -55,15 +95,22 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
   "id": "fcb6917b-123d-4630-a0ce-ed8b293d482d",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:35:22.216143Z",
+     "iopub.status.busy": "2024-09-10T20:35:22.216039Z",
+     "iopub.status.idle": "2024-09-10T20:35:22.218117Z",
+     "shell.execute_reply": "2024-09-10T20:35:22.217854Z"
+    }
+   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "79174\n"
+      "80427\n"
     ]
    }
   ],
@@ -87,13 +134,20 @@
   "cell_type": "code",
   "execution_count": 4,
   "id": "a3b288ed-87a6-4af0-aac8-20921dc370d4",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:35:22.219468Z",
+     "iopub.status.busy": "2024-09-10T20:35:22.219395Z",
+     "iopub.status.idle": "2024-09-10T20:35:22.340594Z",
+     "shell.execute_reply": "2024-09-10T20:35:22.340319Z"
+    }
+   },
   "outputs": [],
   "source": [
    "from typing import List, Optional\n",
    "\n",
    "from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder\n",
-    "from langchain_core.pydantic_v1 import BaseModel, Field\n",
+    "from pydantic import BaseModel, Field\n",
    "\n",
    "\n",
    "class KeyDevelopment(BaseModel):\n",
@@ -156,7 +210,14 @@
   "cell_type": "code",
   "execution_count": 5,
   "id": "109f4f05-d0ff-431d-93d9-8f5aa34979a6",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:35:22.342277Z",
+     "iopub.status.busy": "2024-09-10T20:35:22.342171Z",
+     "iopub.status.idle": "2024-09-10T20:35:22.532302Z",
+     "shell.execute_reply": "2024-09-10T20:35:22.532034Z"
+    }
+   },
   "outputs": [],
   "source": [
    "# | output: false\n",
@@ -171,7 +232,14 @@
   "cell_type": "code",
   "execution_count": 6,
   "id": "aa4ae224-6d3d-4fe2-b210-7db19a9fe580",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:35:22.533795Z",
+     "iopub.status.busy": "2024-09-10T20:35:22.533708Z",
+     "iopub.status.idle": "2024-09-10T20:35:22.610573Z",
+     "shell.execute_reply": "2024-09-10T20:35:22.610307Z"
+    }
+   },
   "outputs": [],
   "source": [
    "extractor = prompt | llm.with_structured_output(\n",
@@ -194,7 +262,14 @@
   "cell_type": "code",
   "execution_count": 7,
   "id": "27b8a373-14b3-45ea-8bf5-9749122ad927",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:35:22.612123Z",
+     "iopub.status.busy": "2024-09-10T20:35:22.612052Z",
+     "iopub.status.idle": "2024-09-10T20:35:22.753493Z",
+     "shell.execute_reply": "2024-09-10T20:35:22.753179Z"
+    }
+   },
   "outputs": [],
   "source": [
    "from langchain_text_splitters import TokenTextSplitter\n",
@@ -214,7 +289,7 @@
   "id": "5b43d7e0-3c85-4d97-86c7-e8c984b60b0a",
   "metadata": {},
   "source": [
-    "Use [batch](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html) functionality to run the extraction in **parallel** across each chunk! \n",
+    "Use [batch](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html) functionality to run the extraction in **parallel** across each chunk! \n",
    "\n",
    ":::{.callout-tip}\n",
    "You can often use .batch() to parallelize the extractions! `.batch` uses a threadpool under the hood to help you parallelize workloads.\n",
@@ -227,7 +302,14 @@
   "cell_type": "code",
   "execution_count": 8,
   "id": "6ba766b5-8d6c-48e6-8d69-f391a66b65d2",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:35:22.755067Z",
+     "iopub.status.busy": "2024-09-10T20:35:22.754987Z",
+     "iopub.status.idle": "2024-09-10T20:35:36.691130Z",
+     "shell.execute_reply": "2024-09-10T20:35:36.690500Z"
+    }
+   },
   "outputs": [],
   "source": [
    "# Limit just to the first 3 chunks\n",
@@ -254,21 +336,27 @@
   "cell_type": "code",
   "execution_count": 9,
   "id": "c3f77470-ce6c-477f-8957-650913218632",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:35:36.694799Z",
+     "iopub.status.busy": "2024-09-10T20:35:36.694458Z",
+     "iopub.status.idle": "2024-09-10T20:35:36.701416Z",
+     "shell.execute_reply": "2024-09-10T20:35:36.700993Z"
+    }
+   },
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "[KeyDevelopment(year=1966, description='The Toyota Corolla began production, becoming the best-selling series of automobile in history.', evidence='The Toyota Corolla, which has been in production since 1966, is the best-selling series of automobile in history.'),\n",
-       " KeyDevelopment(year=1769, description='Nicolas-Joseph Cugnot built the first steam-powered road vehicle.', evidence='The French inventor Nicolas-Joseph Cugnot built the first steam-powered road vehicle in 1769.'),\n",
-       " KeyDevelopment(year=1808, description='François Isaac de Rivaz designed and constructed the first internal combustion-powered automobile.', evidence='the Swiss inventor François Isaac de Rivaz designed and constructed the first internal combustion-powered automobile in 1808.'),\n",
-       " KeyDevelopment(year=1886, description='Carl Benz patented his Benz Patent-Motorwagen, inventing the modern car.', evidence='The modern car—a practical, marketable automobile for everyday use—was invented in 1886, when the German inventor Carl Benz patented his Benz Patent-Motorwagen.'),\n",
-       " KeyDevelopment(year=1908, description='Ford Model T, one of the first cars affordable by the masses, began production.', evidence='One of the first cars affordable by the masses was the Ford Model T, begun in 1908, an American car manufactured by the Ford Motor Company.'),\n",
-       " KeyDevelopment(year=1888, description=\"Bertha Benz undertook the first road trip by car to prove the road-worthiness of her husband's invention.\", evidence=\"In August 1888, Bertha Benz, the wife of Carl Benz, undertook the first road trip by car, to prove the road-worthiness of her husband's invention.\"),\n",
+       "[KeyDevelopment(year=1769, description='Nicolas-Joseph Cugnot built the first full-scale, self-propelled mechanical vehicle, a steam-powered tricycle.', evidence='Nicolas-Joseph Cugnot is widely credited with building the first full-scale, self-propelled mechanical vehicle in about 1769; he created a steam-powered tricycle.'),\n",
+       " KeyDevelopment(year=1807, description=\"Nicéphore Niépce and his brother Claude created what was probably the world's first internal combustion engine.\", evidence=\"In 1807, Nicéphore Niépce and his brother Claude created what was probably the world's first internal combustion engine (which they called a Pyréolophore), but installed it in a boat on the river Saone in France.\"),\n",
+       " KeyDevelopment(year=1886, description='Carl Benz patented the Benz Patent-Motorwagen, marking the birth of the modern car.', evidence='In November 1881, French inventor Gustave Trouvé demonstrated a three-wheeled car powered by electricity at the International Exposition of Electricity. Although several other German engineers (including Gottlieb Daimler, Wilhelm Maybach, and Siegfried Marcus) were working on cars at about the same time, the year 1886 is regarded as the birth year of the modern car—a practical, marketable automobile for everyday use—when the German Carl Benz patented his Benz Patent-Motorwagen; he is generally acknowledged as the inventor of the car.'),\n",
+       " KeyDevelopment(year=1886, description='Carl Benz began promotion of his vehicle, marking the introduction of the first commercially available automobile.', evidence='Benz began promotion of the vehicle on 3 July 1886.'),\n",
+       " KeyDevelopment(year=1888, description=\"Bertha Benz undertook the first road trip by car to prove the road-worthiness of her husband's invention.\", evidence=\"In August 1888, Bertha Benz, the wife and business partner of Carl Benz, undertook the first road trip by car, to prove the road-worthiness of her husband's invention.\"),\n",
       " KeyDevelopment(year=1896, description='Benz designed and patented the first internal-combustion flat engine, called boxermotor.', evidence='In 1896, Benz designed and patented the first internal-combustion flat engine, called boxermotor.'),\n",
-       " KeyDevelopment(year=1897, description='Nesselsdorfer Wagenbau produced the Präsident automobil, one of the first factory-made cars in the world.', evidence='The first motor car in central Europe and one of the first factory-made cars in the world, was produced by Czech company Nesselsdorfer Wagenbau (later renamed to Tatra) in 1897, the Präsident automobil.'),\n",
-       " KeyDevelopment(year=1890, description='Daimler Motoren Gesellschaft (DMG) was founded by Daimler and Maybach in Cannstatt.', evidence='Daimler and Maybach founded Daimler Motoren Gesellschaft (DMG) in Cannstatt in 1890.'),\n",
-       " KeyDevelopment(year=1891, description='Auguste Doriot and Louis Rigoulot completed the longest trip by a petrol-driven vehicle with a Daimler powered Peugeot Type 3.', evidence='In 1891, Auguste Doriot and his Peugeot colleague Louis Rigoulot completed the longest trip by a petrol-driven vehicle when their self-designed and built Daimler powered Peugeot Type 3 completed 2,100 kilometres (1,300 mi) from Valentigney to Paris and Brest and back again.')]"
+       " KeyDevelopment(year=1897, description='The first motor car in central Europe and one of the first factory-made cars in the world, the Präsident automobil, was produced by Nesselsdorfer Wagenbau.', evidence='The first motor car in central Europe and one of the first factory-made cars in the world, was produced by Czech company Nesselsdorfer Wagenbau (later renamed to Tatra) in 1897, the Präsident automobil.'),\n",
+       " KeyDevelopment(year=1901, description='Ransom Olds started large-scale, production-line manufacturing of affordable cars at his Oldsmobile factory in Lansing, Michigan.', evidence='Large-scale, production-line manufacturing of affordable cars was started by Ransom Olds in 1901 at his Oldsmobile factory in Lansing, Michigan.'),\n",
+       " KeyDevelopment(year=1913, description=\"Henry Ford introduced the world's first moving assembly line for cars at the Highland Park Ford Plant.\", evidence=\"This concept was greatly expanded by Henry Ford, beginning in 1913 with the world's first moving assembly line for cars at the Highland Park Ford Plant.\")]"
      ]
     },
     "execution_count": 9,
@@ -315,7 +403,14 @@
   "cell_type": "code",
   "execution_count": 10,
   "id": "aaf37c82-625b-4fa1-8e88-73303f08ac16",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:35:36.703897Z",
+     "iopub.status.busy": "2024-09-10T20:35:36.703718Z",
+     "iopub.status.idle": "2024-09-10T20:35:38.451523Z",
+     "shell.execute_reply": "2024-09-10T20:35:38.450925Z"
+    }
+   },
   "outputs": [],
   "source": [
    "from langchain_community.vectorstores import FAISS\n",
@@ -344,7 +439,14 @@
   "cell_type": "code",
   "execution_count": 11,
   "id": "47aad00b-7013-4f7f-a1b0-02ef269093bf",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:35:38.455094Z",
+     "iopub.status.busy": "2024-09-10T20:35:38.454851Z",
+     "iopub.status.idle": "2024-09-10T20:35:38.458315Z",
+     "shell.execute_reply": "2024-09-10T20:35:38.457940Z"
+    }
+   },
   "outputs": [],
   "source": [
    "rag_extractor = {\n",
@@ -356,7 +458,14 @@
   "cell_type": "code",
   "execution_count": 12,
   "id": "68f2de01-0cd8-456e-a959-db236189d41b",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:35:38.460115Z",
+     "iopub.status.busy": "2024-09-10T20:35:38.459949Z",
+     "iopub.status.idle": "2024-09-10T20:35:43.195532Z",
+     "shell.execute_reply": "2024-09-10T20:35:43.194254Z"
+    }
+   },
   "outputs": [],
   "source": [
    "results = rag_extractor.invoke(\"Key developments associated with cars\")"
@@ -366,15 +475,21 @@
   "cell_type": "code",
   "execution_count": 13,
   "id": "1788e2d6-77bb-417f-827c-eb96c035164e",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:35:43.200497Z",
+     "iopub.status.busy": "2024-09-10T20:35:43.200037Z",
+     "iopub.status.idle": "2024-09-10T20:35:43.206773Z",
+     "shell.execute_reply": "2024-09-10T20:35:43.205426Z"
+    }
+   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "year=1869 description='Mary Ward became one of the first documented car fatalities in Parsonstown, Ireland.' evidence='Mary Ward became one of the first documented car fatalities in 1869 in Parsonstown, Ireland,'\n",
-      "year=1899 description=\"Henry Bliss one of the US's first pedestrian car casualties in New York City.\" evidence=\"Henry Bliss one of the US's first pedestrian car casualties in 1899 in New York City.\"\n",
-      "year=2030 description='All fossil fuel vehicles will be banned in Amsterdam.' evidence='all fossil fuel vehicles will be banned in Amsterdam from 2030.'\n"
+      "year=2006 description='Car-sharing services in the US experienced double-digit growth in revenue and membership.' evidence='in the US, some car-sharing services have experienced double-digit growth in revenue and membership growth between 2006 and 2007.'\n",
+      "year=2020 description='56 million cars were manufactured worldwide, with China producing the most.' evidence='In 2020, there were 56 million cars manufactured worldwide, down from 67 million the previous year. The automotive industry in China produces by far the most (20 million in 2020).'\n"
     ]
    }
   ],
@@ -416,7 +531,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.4"
+   "version": "3.11.9"
  }
 },
 "nbformat": 4,
--- a/docs/docs/how_to/extraction_parse.ipynb
+++ b/docs/docs/how_to/extraction_parse.ipynb
@@ -27,9 +27,16 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
   "id": "25487939-8713-4ec7-b774-e4a761ac8298",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:35:44.442501Z",
+     "iopub.status.busy": "2024-09-10T20:35:44.442044Z",
+     "iopub.status.idle": "2024-09-10T20:35:44.872217Z",
+     "shell.execute_reply": "2024-09-10T20:35:44.871897Z"
+    }
+   },
   "outputs": [],
   "source": [
    "# | output: false\n",
@@ -62,16 +69,23 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
   "id": "497eb023-c043-443d-ac62-2d4ea85fe1b0",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:35:44.873979Z",
+     "iopub.status.busy": "2024-09-10T20:35:44.873840Z",
+     "iopub.status.idle": "2024-09-10T20:35:44.878966Z",
+     "shell.execute_reply": "2024-09-10T20:35:44.878718Z"
+    }
+   },
   "outputs": [],
   "source": [
    "from typing import List, Optional\n",
    "\n",
    "from langchain_core.output_parsers import PydanticOutputParser\n",
    "from langchain_core.prompts import ChatPromptTemplate\n",
-    "from langchain_core.pydantic_v1 import BaseModel, Field, validator\n",
+    "from pydantic import BaseModel, Field, validator\n",
    "\n",
    "\n",
    "class Person(BaseModel):\n",
@@ -114,9 +128,16 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
   "id": "20b99ffb-a114-49a9-a7be-154c525f8ada",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:35:44.880355Z",
+     "iopub.status.busy": "2024-09-10T20:35:44.880277Z",
+     "iopub.status.idle": "2024-09-10T20:35:44.881834Z",
+     "shell.execute_reply": "2024-09-10T20:35:44.881601Z"
+    }
+   },
   "outputs": [],
   "source": [
    "query = \"Anna is 23 years old and she is 6 feet tall\""
@@ -124,9 +145,16 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 4,
   "id": "4f3a66ce-de19-4571-9e54-67504ae3fba7",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:35:44.883138Z",
+     "iopub.status.busy": "2024-09-10T20:35:44.883049Z",
+     "iopub.status.idle": "2024-09-10T20:35:44.885139Z",
+     "shell.execute_reply": "2024-09-10T20:35:44.884801Z"
+    }
+   },
   "outputs": [
    {
     "name": "stdout",
@@ -140,7 +168,7 @@
      "\n",
      "Here is the output schema:\n",
      "```\n",
-      "{\"description\": \"Identifying information about all people in a text.\", \"properties\": {\"people\": {\"title\": \"People\", \"type\": \"array\", \"items\": {\"$ref\": \"#/definitions/Person\"}}}, \"required\": [\"people\"], \"definitions\": {\"Person\": {\"title\": \"Person\", \"description\": \"Information about a person.\", \"type\": \"object\", \"properties\": {\"name\": {\"title\": \"Name\", \"description\": \"The name of the person\", \"type\": \"string\"}, \"height_in_meters\": {\"title\": \"Height In Meters\", \"description\": \"The height of the person expressed in meters.\", \"type\": \"number\"}}, \"required\": [\"name\", \"height_in_meters\"]}}}\n",
+      "{\"$defs\": {\"Person\": {\"description\": \"Information about a person.\", \"properties\": {\"name\": {\"description\": \"The name of the person\", \"title\": \"Name\", \"type\": \"string\"}, \"height_in_meters\": {\"description\": \"The height of the person expressed in meters.\", \"title\": \"Height In Meters\", \"type\": \"number\"}}, \"required\": [\"name\", \"height_in_meters\"], \"title\": \"Person\", \"type\": \"object\"}}, \"description\": \"Identifying information about all people in a text.\", \"properties\": {\"people\": {\"items\": {\"$ref\": \"#/$defs/Person\"}, \"title\": \"People\", \"type\": \"array\"}}, \"required\": [\"people\"]}\n",
      "```\n",
      "Human: Anna is 23 years old and she is 6 feet tall\n"
     ]
@@ -160,9 +188,16 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 5,
   "id": "7e0041eb-37dc-4384-9fe3-6dd8c356371e",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:35:44.886765Z",
+     "iopub.status.busy": "2024-09-10T20:35:44.886675Z",
+     "iopub.status.idle": "2024-09-10T20:35:46.835960Z",
+     "shell.execute_reply": "2024-09-10T20:35:46.835282Z"
+    }
+   },
   "outputs": [
    {
     "data": {
@@ -170,7 +205,7 @@
       "People(people=[Person(name='Anna', height_in_meters=1.83)])"
      ]
     },
-     "execution_count": 6,
+     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -202,16 +237,23 @@
    "\n",
    "If desired, it's easy to create a custom prompt and parser with `LangChain` and `LCEL`.\n",
    "\n",
-    "To create a custom parser, define a function to parse the output from the model (typically an [AIMessage](https://python.langchain.com/v0.2/api_reference/core/messages/langchain_core.messages.ai.AIMessage.html)) into an object of your choice.\n",
+    "To create a custom parser, define a function to parse the output from the model (typically an [AIMessage](https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.AIMessage.html)) into an object of your choice.\n",
    "\n",
    "See below for a simple implementation of a JSON parser."
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 6,
   "id": "b1f11912-c1bb-4a2a-a482-79bf3996961f",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:35:46.839577Z",
+     "iopub.status.busy": "2024-09-10T20:35:46.839233Z",
+     "iopub.status.idle": "2024-09-10T20:35:46.849663Z",
+     "shell.execute_reply": "2024-09-10T20:35:46.849177Z"
+    }
+   },
   "outputs": [],
   "source": [
    "import json\n",
@@ -221,7 +263,7 @@
    "from langchain_anthropic.chat_models import ChatAnthropic\n",
    "from langchain_core.messages import AIMessage\n",
    "from langchain_core.prompts import ChatPromptTemplate\n",
-    "from langchain_core.pydantic_v1 import BaseModel, Field, validator\n",
+    "from pydantic import BaseModel, Field, validator\n",
    "\n",
    "\n",
    "class Person(BaseModel):\n",
@@ -279,16 +321,23 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 7,
   "id": "9260d5e8-3b6c-4639-9f3b-fb2f90239e4b",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:35:46.851870Z",
+     "iopub.status.busy": "2024-09-10T20:35:46.851698Z",
+     "iopub.status.idle": "2024-09-10T20:35:46.854786Z",
+     "shell.execute_reply": "2024-09-10T20:35:46.854424Z"
+    }
+   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "System: Answer the user query. Output your answer as JSON that  matches the given schema: ```json\n",
-      "{'title': 'People', 'description': 'Identifying information about all people in a text.', 'type': 'object', 'properties': {'people': {'title': 'People', 'type': 'array', 'items': {'$ref': '#/definitions/Person'}}}, 'required': ['people'], 'definitions': {'Person': {'title': 'Person', 'description': 'Information about a person.', 'type': 'object', 'properties': {'name': {'title': 'Name', 'description': 'The name of the person', 'type': 'string'}, 'height_in_meters': {'title': 'Height In Meters', 'description': 'The height of the person expressed in meters.', 'type': 'number'}}, 'required': ['name', 'height_in_meters']}}}\n",
+      "{'$defs': {'Person': {'description': 'Information about a person.', 'properties': {'name': {'description': 'The name of the person', 'title': 'Name', 'type': 'string'}, 'height_in_meters': {'description': 'The height of the person expressed in meters.', 'title': 'Height In Meters', 'type': 'number'}}, 'required': ['name', 'height_in_meters'], 'title': 'Person', 'type': 'object'}}, 'description': 'Identifying information about all people in a text.', 'properties': {'people': {'items': {'$ref': '#/$defs/Person'}, 'title': 'People', 'type': 'array'}}, 'required': ['people'], 'title': 'People', 'type': 'object'}\n",
      "```. Make sure to wrap the answer in ```json and ``` tags\n",
      "Human: Anna is 23 years old and she is 6 feet tall\n"
     ]
@@ -301,17 +350,32 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 8,
   "id": "c523301d-ae0e-45e3-b195-7fd28c67a5c4",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-09-10T20:35:46.856945Z",
+     "iopub.status.busy": "2024-09-10T20:35:46.856769Z",
+     "iopub.status.idle": "2024-09-10T20:35:48.373728Z",
+     "shell.execute_reply": "2024-09-10T20:35:48.373079Z"
+    }
+   },
   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/bagatur/langchain/.venv/lib/python3.11/site-packages/pydantic/_internal/_fields.py:201: UserWarning: Field name \"schema\" in \"PromptInput\" shadows an attribute in parent \"BaseModel\"\n",
+      "  warnings.warn(\n"
+     ]
+    },
    {
     "data": {
      "text/plain": [
       "[{'people': [{'name': 'Anna', 'height_in_meters': 1.83}]}]"
      ]
     },
-     "execution_count": 9,
+     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -349,7 +413,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.4"
+   "version": "3.11.9"
  }
 },
 "nbformat": 4,
--- a/docs/docs/how_to/fallbacks.ipynb
+++ b/docs/docs/how_to/fallbacks.ipynb
@@ -90,7 +90,7 @@
   "outputs": [],
   "source": [
    "# Note that we set max_retries = 0 to avoid retrying on RateLimits, etc\n",
-    "openai_llm = ChatOpenAI(model=\"gpt-3.5-turbo-0125\", max_retries=0)\n",
+    "openai_llm = ChatOpenAI(model=\"gpt-4o-mini\", max_retries=0)\n",
    "anthropic_llm = ChatAnthropic(model=\"claude-3-haiku-20240307\")\n",
    "llm = openai_llm.with_fallbacks([anthropic_llm])"
   ]
--- a/docs/docs/how_to/few_shot_examples.ipynb
+++ b/docs/docs/how_to/few_shot_examples.ipynb
@@ -29,7 +29,7 @@
    "\n",
    "In this guide, we'll learn how to create a simple prompt template that provides the model with example inputs and outputs when generating. Providing the LLM with a few such examples is called few-shotting, and is a simple yet powerful way to guide generation and in some cases drastically improve model performance.\n",
    "\n",
-    "A few-shot prompt template can be constructed from either a set of examples, or from an [Example Selector](https://python.langchain.com/v0.2/api_reference/core/example_selectors/langchain_core.example_selectors.base.BaseExampleSelector.html) class responsible for choosing a subset of examples from the defined set.\n",
+    "A few-shot prompt template can be constructed from either a set of examples, or from an [Example Selector](https://python.langchain.com/api_reference/core/example_selectors/langchain_core.example_selectors.base.BaseExampleSelector.html) class responsible for choosing a subset of examples from the defined set.\n",
    "\n",
    "This guide will cover few-shotting with string prompt templates. For a guide on few-shotting with chat messages for chat models, see [here](/docs/how_to/few_shot_examples_chat/).\n",
    "\n",
@@ -160,7 +160,7 @@
   "source": [
    "### Pass the examples and formatter to `FewShotPromptTemplate`\n",
    "\n",
-    "Finally, create a [`FewShotPromptTemplate`](https://python.langchain.com/v0.2/api_reference/core/prompts/langchain_core.prompts.few_shot.FewShotPromptTemplate.html) object. This object takes in the few-shot examples and the formatter for the few-shot examples. When this `FewShotPromptTemplate` is formatted, it formats the passed examples using the `example_prompt`, then and adds them to the final prompt before `suffix`:"
+    "Finally, create a [`FewShotPromptTemplate`](https://python.langchain.com/api_reference/core/prompts/langchain_core.prompts.few_shot.FewShotPromptTemplate.html) object. This object takes in the few-shot examples and the formatter for the few-shot examples. When this `FewShotPromptTemplate` is formatted, it formats the passed examples using the `example_prompt`, then and adds them to the final prompt before `suffix`:"
   ]
  },
  {
@@ -251,7 +251,7 @@
   "source": [
    "## Using an example selector\n",
    "\n",
-    "We will reuse the example set and the formatter from the previous section. However, instead of feeding the examples directly into the `FewShotPromptTemplate` object, we will feed them into an implementation of `ExampleSelector` called [`SemanticSimilarityExampleSelector`](https://python.langchain.com/v0.2/api_reference/core/example_selectors/langchain_core.example_selectors.semantic_similarity.SemanticSimilarityExampleSelector.html) instance. This class selects few-shot examples from the initial set based on their similarity to the input. It uses an embedding model to compute the similarity between the input and the few-shot examples, as well as a vector store to perform the nearest neighbor search.\n",
+    "We will reuse the example set and the formatter from the previous section. However, instead of feeding the examples directly into the `FewShotPromptTemplate` object, we will feed them into an implementation of `ExampleSelector` called [`SemanticSimilarityExampleSelector`](https://python.langchain.com/api_reference/core/example_selectors/langchain_core.example_selectors.semantic_similarity.SemanticSimilarityExampleSelector.html) instance. This class selects few-shot examples from the initial set based on their similarity to the input. It uses an embedding model to compute the similarity between the input and the few-shot examples, as well as a vector store to perform the nearest neighbor search.\n",
    "\n",
    "To show what it looks like, let's initialize an instance and call it in isolation:"
   ]
--- a/docs/docs/how_to/few_shot_examples_chat.ipynb
+++ b/docs/docs/how_to/few_shot_examples_chat.ipynb
@@ -29,7 +29,7 @@
    "\n",
    "This guide covers how to prompt a chat model with example inputs and outputs. Providing the model with a few such examples is called few-shotting, and is a simple yet powerful way to guide generation and in some cases drastically improve model performance.\n",
    "\n",
-    "There does not appear to be solid consensus on how best to do few-shot prompting, and the optimal prompt compilation will likely vary by model. Because of this, we provide few-shot prompt templates like the [FewShotChatMessagePromptTemplate](https://python.langchain.com/v0.2/api_reference/core/prompts/langchain_core.prompts.few_shot.FewShotChatMessagePromptTemplate.html?highlight=fewshot#langchain_core.prompts.few_shot.FewShotChatMessagePromptTemplate) as a flexible starting point, and you can modify or replace them as you see fit.\n",
+    "There does not appear to be solid consensus on how best to do few-shot prompting, and the optimal prompt compilation will likely vary by model. Because of this, we provide few-shot prompt templates like the [FewShotChatMessagePromptTemplate](https://python.langchain.com/api_reference/core/prompts/langchain_core.prompts.few_shot.FewShotChatMessagePromptTemplate.html?highlight=fewshot#langchain_core.prompts.few_shot.FewShotChatMessagePromptTemplate) as a flexible starting point, and you can modify or replace them as you see fit.\n",
    "\n",
    "The goal of few-shot prompt templates are to dynamically select examples based on an input, and then format the examples in a final prompt to provide for the model.\n",
    "\n",
@@ -49,7 +49,7 @@
    "\n",
    "The basic components of the template are:\n",
    "- `examples`: A list of dictionary examples to include in the final prompt.\n",
-    "- `example_prompt`: converts each example into 1 or more messages through its [`format_messages`](https://python.langchain.com/v0.2/api_reference/core/prompts/langchain_core.prompts.chat.ChatPromptTemplate.html?highlight=format_messages#langchain_core.prompts.chat.ChatPromptTemplate.format_messages) method. A common example would be to convert each example into one human message and one AI message response, or a human message followed by a function call message.\n",
+    "- `example_prompt`: converts each example into 1 or more messages through its [`format_messages`](https://python.langchain.com/api_reference/core/prompts/langchain_core.prompts.chat.ChatPromptTemplate.html?highlight=format_messages#langchain_core.prompts.chat.ChatPromptTemplate.format_messages) method. A common example would be to convert each example into one human message and one AI message response, or a human message followed by a function call message.\n",
    "\n",
    "Below is a simple demonstration. First, define the examples you'd like to include. Let's give the LLM an unfamiliar mathematical operator, denoted by the \"🦜\" emoji:"
   ]
@@ -66,7 +66,8 @@
    "import os\n",
    "from getpass import getpass\n",
    "\n",
-    "os.environ[\"OPENAI_API_KEY\"] = getpass()"
+    "if \"OPENAI_API_KEY\" not in os.environ:\n",
+    "    os.environ[\"OPENAI_API_KEY\"] = getpass()"
   ]
  },
  {
@@ -86,7 +87,7 @@
    {
     "data": {
      "text/plain": [
-       "AIMessage(content='The expression \"2 🦜 9\" is not a standard mathematical operation or equation. It appears to be a combination of the number 2 and the parrot emoji 🦜 followed by the number 9. It does not have a specific mathematical meaning.', response_metadata={'token_usage': {'completion_tokens': 54, 'prompt_tokens': 17, 'total_tokens': 71}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-aad12dda-5c47-4a1e-9949-6fe94e03242a-0', usage_metadata={'input_tokens': 17, 'output_tokens': 54, 'total_tokens': 71})"
+       "AIMessage(content='The expression \"2 🦜 9\" is not a standard mathematical operation or equation. It appears to be a combination of the number 2 and the parrot emoji 🦜 followed by the number 9. It does not have a specific mathematical meaning.', response_metadata={'token_usage': {'completion_tokens': 54, 'prompt_tokens': 17, 'total_tokens': 71}, 'model_name': 'gpt-4o-mini', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-aad12dda-5c47-4a1e-9949-6fe94e03242a-0', usage_metadata={'input_tokens': 17, 'output_tokens': 54, 'total_tokens': 71})"
      ]
     },
     "execution_count": 4,
@@ -97,7 +98,7 @@
   "source": [
    "from langchain_openai import ChatOpenAI\n",
    "\n",
-    "model = ChatOpenAI(model=\"gpt-3.5-turbo-0125\", temperature=0.0)\n",
+    "model = ChatOpenAI(model=\"gpt-4o-mini\", temperature=0.0)\n",
    "\n",
    "model.invoke(\"What is 2 🦜 9?\")"
   ]
@@ -212,7 +213,7 @@
    {
     "data": {
      "text/plain": [
-       "AIMessage(content='11', response_metadata={'token_usage': {'completion_tokens': 1, 'prompt_tokens': 60, 'total_tokens': 61}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-5ec4e051-262f-408e-ad00-3f2ebeb561c3-0', usage_metadata={'input_tokens': 60, 'output_tokens': 1, 'total_tokens': 61})"
+       "AIMessage(content='11', response_metadata={'token_usage': {'completion_tokens': 1, 'prompt_tokens': 60, 'total_tokens': 61}, 'model_name': 'gpt-4o-mini', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-5ec4e051-262f-408e-ad00-3f2ebeb561c3-0', usage_metadata={'input_tokens': 60, 'output_tokens': 1, 'total_tokens': 61})"
      ]
     },
     "execution_count": 8,
@@ -239,8 +240,8 @@
    "\n",
    "Sometimes you may want to select only a few examples from your overall set to show based on the input. For this, you can replace the `examples` passed into `FewShotChatMessagePromptTemplate` with an `example_selector`. The other components remain the same as above! Our dynamic few-shot prompt template would look like:\n",
    "\n",
-    "- `example_selector`: responsible for selecting few-shot examples (and the order in which they are returned) for a given input. These implement the [BaseExampleSelector](https://python.langchain.com/v0.2/api_reference/core/example_selectors/langchain_core.example_selectors.base.BaseExampleSelector.html?highlight=baseexampleselector#langchain_core.example_selectors.base.BaseExampleSelector) interface. A common example is the vectorstore-backed [SemanticSimilarityExampleSelector](https://python.langchain.com/v0.2/api_reference/core/example_selectors/langchain_core.example_selectors.semantic_similarity.SemanticSimilarityExampleSelector.html?highlight=semanticsimilarityexampleselector#langchain_core.example_selectors.semantic_similarity.SemanticSimilarityExampleSelector)\n",
-    "- `example_prompt`: convert each example into 1 or more messages through its [`format_messages`](https://python.langchain.com/v0.2/api_reference/core/prompts/langchain_core.prompts.chat.ChatPromptTemplate.html?highlight=chatprompttemplate#langchain_core.prompts.chat.ChatPromptTemplate.format_messages) method. A common example would be to convert each example into one human message and one AI message response, or a human message followed by a function call message.\n",
+    "- `example_selector`: responsible for selecting few-shot examples (and the order in which they are returned) for a given input. These implement the [BaseExampleSelector](https://python.langchain.com/api_reference/core/example_selectors/langchain_core.example_selectors.base.BaseExampleSelector.html?highlight=baseexampleselector#langchain_core.example_selectors.base.BaseExampleSelector) interface. A common example is the vectorstore-backed [SemanticSimilarityExampleSelector](https://python.langchain.com/api_reference/core/example_selectors/langchain_core.example_selectors.semantic_similarity.SemanticSimilarityExampleSelector.html?highlight=semanticsimilarityexampleselector#langchain_core.example_selectors.semantic_similarity.SemanticSimilarityExampleSelector)\n",
+    "- `example_prompt`: convert each example into 1 or more messages through its [`format_messages`](https://python.langchain.com/api_reference/core/prompts/langchain_core.prompts.chat.ChatPromptTemplate.html?highlight=chatprompttemplate#langchain_core.prompts.chat.ChatPromptTemplate.format_messages) method. A common example would be to convert each example into one human message and one AI message response, or a human message followed by a function call message.\n",
    "\n",
    "These once again can be composed with other messages and chat templates to assemble your final prompt.\n",
    "\n",
@@ -418,7 +419,7 @@
    {
     "data": {
      "text/plain": [
-       "AIMessage(content='6', response_metadata={'token_usage': {'completion_tokens': 1, 'prompt_tokens': 60, 'total_tokens': 61}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-d1863e5e-17cd-4e9d-bf7a-b9f118747a65-0', usage_metadata={'input_tokens': 60, 'output_tokens': 1, 'total_tokens': 61})"
+       "AIMessage(content='6', response_metadata={'token_usage': {'completion_tokens': 1, 'prompt_tokens': 60, 'total_tokens': 61}, 'model_name': 'gpt-4o-mini', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-d1863e5e-17cd-4e9d-bf7a-b9f118747a65-0', usage_metadata={'input_tokens': 60, 'output_tokens': 1, 'total_tokens': 61})"
      ]
     },
     "execution_count": 13,
@@ -427,7 +428,7 @@
    }
   ],
   "source": [
-    "chain = final_prompt | ChatOpenAI(model=\"gpt-3.5-turbo-0125\", temperature=0.0)\n",
+    "chain = final_prompt | ChatOpenAI(model=\"gpt-4o-mini\", temperature=0.0)\n",
    "\n",
    "chain.invoke({\"input\": \"What's 3 🦜 3?\"})"
   ]
--- a/docs/docs/how_to/filter_messages.ipynb
+++ b/docs/docs/how_to/filter_messages.ipynb
@@ -175,7 +175,7 @@
   "source": [
    "## API reference\n",
    "\n",
-    "For a complete description of all arguments head to the API reference: https://python.langchain.com/v0.2/api_reference/core/messages/langchain_core.messages.utils.filter_messages.html"
+    "For a complete description of all arguments head to the API reference: https://python.langchain.com/api_reference/core/messages/langchain_core.messages.utils.filter_messages.html"
   ]
  }
 ],
--- a/docs/docs/how_to/function_calling.ipynb
+++ b/docs/docs/how_to/function_calling.ipynb
@@ -88,7 +88,7 @@
    "## Passing tools to LLMs\n",
    "\n",
    "Chat models supporting tool calling features implement a `.bind_tools` method, which \n",
-    "receives a list of LangChain [tool objects](https://python.langchain.com/v0.2/api_reference/core/tools/langchain_core.tools.BaseTool.html#langchain_core.tools.BaseTool) \n",
+    "receives a list of LangChain [tool objects](https://python.langchain.com/api_reference/core/tools/langchain_core.tools.BaseTool.html#langchain_core.tools.BaseTool) \n",
    "and binds them to the chat model in its expected format. Subsequent invocations of the \n",
    "chat model will include tool schemas in its calls to the LLM.\n",
    "\n",
@@ -136,7 +136,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "from langchain_core.pydantic_v1 import BaseModel, Field\n",
+    "from pydantic import BaseModel, Field\n",
    "\n",
    "\n",
    "# Note that the docstrings here are crucial, as they will be passed along\n",
@@ -191,7 +191,7 @@
    "\n",
    "from langchain_openai import ChatOpenAI\n",
    "\n",
-    "llm = ChatOpenAI(model=\"gpt-3.5-turbo-0125\", temperature=0)"
+    "llm = ChatOpenAI(model=\"gpt-4o-mini\", temperature=0)"
   ]
  },
  {
@@ -212,9 +212,9 @@
    "## Tool calls\n",
    "\n",
    "If tool calls are included in a LLM response, they are attached to the corresponding \n",
-    "[message](https://python.langchain.com/v0.2/api_reference/core/messages/langchain_core.messages.ai.AIMessage.html#langchain_core.messages.ai.AIMessage) \n",
-    "or [message chunk](https://python.langchain.com/v0.2/api_reference/core/messages/langchain_core.messages.ai.AIMessageChunk.html#langchain_core.messages.ai.AIMessageChunk) \n",
-    "as a list of [tool call](https://python.langchain.com/v0.2/api_reference/core/messages/langchain_core.messages.tool.ToolCall.html#langchain_core.messages.tool.ToolCall) \n",
+    "[message](https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.AIMessage.html#langchain_core.messages.ai.AIMessage) \n",
+    "or [message chunk](https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.AIMessageChunk.html#langchain_core.messages.ai.AIMessageChunk) \n",
+    "as a list of [tool call](https://python.langchain.com/api_reference/core/messages/langchain_core.messages.tool.ToolCall.html#langchain_core.messages.tool.ToolCall) \n",
    "objects in the `.tool_calls` attribute. A `ToolCall` is a typed dict that includes a \n",
    "tool name, dict of argument values, and (optionally) an identifier. Messages with no \n",
    "tool calls default to an empty list for this attribute.\n",
@@ -258,7 +258,7 @@
    "The `.tool_calls` attribute should contain valid tool calls. Note that on occasion, \n",
    "model providers may output malformed tool calls (e.g., arguments that are not \n",
    "valid JSON). When parsing fails in these cases, instances \n",
-    "of [InvalidToolCall](https://python.langchain.com/v0.2/api_reference/core/messages/langchain_core.messages.tool.InvalidToolCall.html#langchain_core.messages.tool.InvalidToolCall) \n",
+    "of [InvalidToolCall](https://python.langchain.com/api_reference/core/messages/langchain_core.messages.tool.InvalidToolCall.html#langchain_core.messages.tool.InvalidToolCall) \n",
    "are populated in the `.invalid_tool_calls` attribute. An `InvalidToolCall` can have \n",
    "a name, string arguments, identifier, and error message.\n",
    "\n",
@@ -298,8 +298,8 @@
    "### Streaming\n",
    "\n",
    "When tools are called in a streaming context, \n",
-    "[message chunks](https://python.langchain.com/v0.2/api_reference/core/messages/langchain_core.messages.ai.AIMessageChunk.html#langchain_core.messages.ai.AIMessageChunk) \n",
-    "will be populated with [tool call chunk](https://python.langchain.com/v0.2/api_reference/core/messages/langchain_core.messages.tool.ToolCallChunk.html#langchain_core.messages.tool.ToolCallChunk) \n",
+    "[message chunks](https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.AIMessageChunk.html#langchain_core.messages.ai.AIMessageChunk) \n",
+    "will be populated with [tool call chunk](https://python.langchain.com/api_reference/core/messages/langchain_core.messages.tool.ToolCallChunk.html#langchain_core.messages.tool.ToolCallChunk) \n",
    "objects in a list via the `.tool_call_chunks` attribute. A `ToolCallChunk` includes \n",
    "optional string fields for the tool `name`, `args`, and `id`, and includes an optional \n",
    "integer field `index` that can be used to join chunks together. Fields are optional \n",
@@ -307,7 +307,7 @@
    "that includes a substring of the arguments may have null values for the tool name and id).\n",
    "\n",
    "Because message chunks inherit from their parent message class, an \n",
-    "[AIMessageChunk](https://python.langchain.com/v0.2/api_reference/core/messages/langchain_core.messages.ai.AIMessageChunk.html#langchain_core.messages.ai.AIMessageChunk) \n",
+    "[AIMessageChunk](https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.AIMessageChunk.html#langchain_core.messages.ai.AIMessageChunk) \n",
    "with tool call chunks will also include `.tool_calls` and `.invalid_tool_calls` fields. \n",
    "These fields are parsed best-effort from the message's tool call chunks.\n",
    "\n",
@@ -696,7 +696,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.1"
+   "version": "3.11.9"
  }
 },
 "nbformat": 4,
--- a/docs/docs/how_to/functions.ipynb
+++ b/docs/docs/how_to/functions.ipynb
@@ -26,7 +26,7 @@
    "\n",
    ":::\n",
    "\n",
-    "You can use arbitrary functions as [Runnables](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable). This is useful for formatting or when you need functionality not provided by other LangChain components, and custom functions used as Runnables are called [`RunnableLambdas`](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.RunnableLambda.html).\n",
+    "You can use arbitrary functions as [Runnables](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable). This is useful for formatting or when you need functionality not provided by other LangChain components, and custom functions used as Runnables are called [`RunnableLambdas`](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.base.RunnableLambda.html).\n",
    "\n",
    "Note that all inputs to these functions need to be a SINGLE argument. If you have a function that accepts multiple arguments, you should write a wrapper that accepts a single dict input and unpacks it into multiple arguments.\n",
    "\n",
@@ -54,7 +54,8 @@
    "import os\n",
    "from getpass import getpass\n",
    "\n",
-    "os.environ[\"OPENAI_API_KEY\"] = getpass()"
+    "if \"OPENAI_API_KEY\" not in os.environ:\n",
+    "    os.environ[\"OPENAI_API_KEY\"] = getpass()"
   ]
  },
  {
@@ -210,7 +211,7 @@
    "\n",
    "## Passing run metadata\n",
    "\n",
-    "Runnable lambdas can optionally accept a [RunnableConfig](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.config.RunnableConfig.html#langchain_core.runnables.config.RunnableConfig) parameter, which they can use to pass callbacks, tags, and other configuration information to nested runs."
+    "Runnable lambdas can optionally accept a [RunnableConfig](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.config.RunnableConfig.html#langchain_core.runnables.config.RunnableConfig) parameter, which they can use to pass callbacks, tags, and other configuration information to nested runs."
   ]
  },
  {
@@ -303,7 +304,7 @@
    "## Streaming\n",
    "\n",
    ":::{.callout-note}\n",
-    "[RunnableLambda](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.RunnableLambda.html) is best suited for code that does not need to support streaming. If you need to support streaming (i.e., be able to operate on chunks of inputs and yield chunks of outputs), use [RunnableGenerator](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.RunnableGenerator.html) instead as in the example below.\n",
+    "[RunnableLambda](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.base.RunnableLambda.html) is best suited for code that does not need to support streaming. If you need to support streaming (i.e., be able to operate on chunks of inputs and yield chunks of outputs), use [RunnableGenerator](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.base.RunnableGenerator.html) instead as in the example below.\n",
    ":::\n",
    "\n",
    "You can use generator functions (ie. functions that use the `yield` keyword, and behave like iterators) in a chain.\n",
--- a/docs/docs/how_to/graph_constructing.ipynb
+++ b/docs/docs/how_to/graph_constructing.ipynb
@@ -24,7 +24,7 @@
    "\n",
    "## Architecture\n",
    "\n",
-    "At a high-level, the steps of constructing a knowledge are from text are:\n",
+    "At a high-level, the steps of constructing a knowledge graph from text are:\n",
    "\n",
    "1. **Extracting structured information from text**: Model is used to extract structured graph information from text.\n",
    "2. **Storing into graph database**: Storing the extracted structured graph information into a graph database enables downstream RAG applications\n",
--- a/docs/docs/how_to/graph_mapping.ipynb
+++ b/docs/docs/how_to/graph_mapping.ipynb
@@ -163,8 +163,8 @@
    "from typing import List, Optional\n",
    "\n",
    "from langchain_core.prompts import ChatPromptTemplate\n",
-    "from langchain_core.pydantic_v1 import BaseModel, Field\n",
    "from langchain_openai import ChatOpenAI\n",
+    "from pydantic import BaseModel, Field\n",
    "\n",
    "llm = ChatOpenAI(model=\"gpt-3.5-turbo\", temperature=0)\n",
    "\n",
--- a/docs/docs/how_to/graph_prompting.ipynb
+++ b/docs/docs/how_to/graph_prompting.ipynb
@@ -347,7 +347,7 @@
    "\n",
    "If we have enough examples, we may want to only include the most relevant ones in the prompt, either because they don't fit in the model's context window or because the long tail of examples distracts the model. And specifically, given any input we want to include the examples most relevant to that input.\n",
    "\n",
-    "We can do just this using an ExampleSelector. In this case we'll use a [SemanticSimilarityExampleSelector](https://python.langchain.com/v0.2/api_reference/core/example_selectors/langchain_core.example_selectors.semantic_similarity.SemanticSimilarityExampleSelector.html), which will store the examples in the vector database of our choosing. At runtime it will perform a similarity search between the input and our examples, and return the most semantically similar ones: "
+    "We can do just this using an ExampleSelector. In this case we'll use a [SemanticSimilarityExampleSelector](https://python.langchain.com/api_reference/core/example_selectors/langchain_core.example_selectors.semantic_similarity.SemanticSimilarityExampleSelector.html), which will store the examples in the vector database of our choosing. At runtime it will perform a similarity search between the input and our examples, and return the most semantically similar ones: "
   ]
  },
  {
--- a/docs/docs/how_to/graph_semantic.ipynb
+++ b/docs/docs/how_to/graph_semantic.ipynb
@@ -177,14 +177,15 @@
   "source": [
    "from typing import Optional, Type\n",
    "\n",
-    "# Import things that are needed generically\n",
-    "from langchain.pydantic_v1 import BaseModel, Field\n",
    "from langchain_core.callbacks import (\n",
    "    AsyncCallbackManagerForToolRun,\n",
    "    CallbackManagerForToolRun,\n",
    ")\n",
    "from langchain_core.tools import BaseTool\n",
    "\n",
+    "# Import things that are needed generically\n",
+    "from pydantic import BaseModel, Field\n",
+    "\n",
    "description_query = \"\"\"\n",
    "MATCH (m:Movie|Person)\n",
    "WHERE m.title CONTAINS $candidate OR m.name CONTAINS $candidate\n",
@@ -226,14 +227,15 @@
   "source": [
    "from typing import Optional, Type\n",
    "\n",
-    "# Import things that are needed generically\n",
-    "from langchain.pydantic_v1 import BaseModel, Field\n",
    "from langchain_core.callbacks import (\n",
    "    AsyncCallbackManagerForToolRun,\n",
    "    CallbackManagerForToolRun,\n",
    ")\n",
    "from langchain_core.tools import BaseTool\n",
    "\n",
+    "# Import things that are needed generically\n",
+    "from pydantic import BaseModel, Field\n",
+    "\n",
    "\n",
    "class InformationInput(BaseModel):\n",
    "    entity: str = Field(description=\"movie or a person mentioned in the question\")\n",
--- a/docs/docs/how_to/index.mdx
+++ b/docs/docs/how_to/index.mdx
@@ -9,7 +9,7 @@ Here you’ll find answers to “How do I….?” types of questions.
 These guides are *goal-oriented* and *concrete*; they're meant to help you complete a specific task.
 For conceptual explanations see the [Conceptual guide](/docs/concepts/).
 For end-to-end walkthroughs see [Tutorials](/docs/tutorials).
-For comprehensive descriptions of every class and function see the [API Reference](https://python.langchain.com/v0.2/api_reference/).
+For comprehensive descriptions of every class and function see the [API Reference](https://python.langchain.com/api_reference/).

 ## Installation

@@ -27,7 +27,7 @@ This highlights functionality that is core to using LangChain.

 ## LangChain Expression Language (LCEL)

-[LangChain Expression Language](/docs/concepts/#langchain-expression-language-lcel) is a way to create arbitrary custom chains. It is built on the [Runnable](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html) protocol.
+[LangChain Expression Language](/docs/concepts/#langchain-expression-language-lcel) is a way to create arbitrary custom chains. It is built on the [Runnable](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html) protocol.

 [**LCEL cheatsheet**](/docs/how_to/lcel_cheatsheet/): For a quick overview of how to use the main LCEL primitives.

--- a/docs/docs/how_to/installation.mdx
+++ b/docs/docs/how_to/installation.mdx
@@ -9,7 +9,7 @@ functionality to install.

 ## Official release

-To install the main LangChain package, run:
+To install the main `langchain` package, run:

 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
@@ -26,8 +26,7 @@ import CodeBlock from "@theme/CodeBlock";

 While this package acts as a sane starting point to using LangChain,
 much of the value of LangChain comes when integrating it with various model providers, datastores, etc.
-By default, the dependencies needed to do that are NOT installed. You will need to install the dependencies for specific integrations separately.
-We'll show how to do that in the next sections of this guide.
+By default, the dependencies needed to do that are NOT installed. You will need to install the dependencies for specific integrations separately, which we show below.

 ## Ecosystem packages

@@ -41,14 +40,6 @@ When installing a package, you do not need to explicitly install that package's
 However, you may choose to if you are using a feature only available in a certain version of that dependency.
 If you do, you should make sure that the installed or pinned version is compatible with any other integration packages you use.

-### From source
-
-If you want to install from source, you can do so by cloning the repo and be sure that the directory is `PATH/TO/REPO/langchain/libs/langchain` running:
-
-```bash
-pip install -e .
-```
-
 ### LangChain core
 The `langchain-core` package contains base abstractions that the rest of the LangChain ecosystem uses, along with the LangChain Expression Language. It is automatically installed by `langchain`, but can also be used separately. Install with:

@@ -56,8 +47,18 @@ The `langchain-core` package contains base abstractions that the rest of the Lan
 pip install langchain-core
 ```

-### LangChain community
-The `langchain-community` package contains third-party integrations. Install with:
+### Integration packages
+
+Certain integrations like OpenAI and Anthropic have their own packages.
+Any integrations that require their own package will be documented as such in the [Integration docs](/docs/integrations/platforms/).
+You can see a list of all integration packages in the [API reference](https://api.python.langchain.com) under the "Partner libs" dropdown.
+To install one of these run:
+
+```bash
+pip install langchain-openai
+```
+
+Any integrations that haven't been split out into their own packages will live in the `langchain-community` package. Install with:

 ```bash
 pip install langchain-community
@@ -89,7 +90,7 @@ pip install "langserve[all]"
 ```
 for both client and server dependencies. Or `pip install "langserve[client]"` for client code, and `pip install "langserve[server]"` for server code.

-## LangChain CLI
+### LangChain CLI
 The LangChain CLI is useful for working with LangChain templates and other LangServe projects.
 Install with:

@@ -105,3 +106,13 @@ If you are not using LangChain, you can install it with:
 ```bash
 pip install langsmith
 ```
+
+### From source
+
+If you want to install a package from source, you can do so by cloning the [main LangChain repo](https://github.com/langchain-ai/langchain), enter the directory of the package you want to install `PATH/TO/REPO/langchain/libs/{package}`, and run:
+
+```bash
+pip install -e .
+```
+
+LangGraph, LangSmith SDK, and certain integration packages live outside the main LangChain repo. You can see [all repos here](https://github.com/langchain-ai).
--- a/docs/docs/how_to/langgraph_persistence.md
+++ b/docs/docs/how_to/langgraph_persistence.md
@@ -0,0 +1,31 @@
+# How to upgrade to LangGraph persistence
+
+As of the v0.3 release of LangChain, we recommend that LangChain users take advantage of [LangGraph persistence](https://langchain-ai.github.io/langgraph/concepts/persistence/) to incorporate `memory` into their LangChain application.
+
+## Evolution of memory in LangChain 
+
+The concept of memory has evolved significantly in LangChain since its initial release.
+
+In LangChain 0.0.x, memory was based on the [BaseMemory](https://api.python.langchain.com/en/latest/memory/langchain_core.memory.BaseMemory.html) interface and the [BaseChatMessageHistory](https://api.python.langchain.com/en/latest/history/langchain_core.runnables.history.BaseChatMessageHistory.html) interface.
+
+There were number of useful [memory implementations](https://python.langchain.com/api_reference/langchain/memory.html) based
+on the `BaseMemory` interface (e.g.[ConversationBufferMemory](https://python.langchain.com/api_reference/langchain/memory/langchain.memory.buffer.ConversationBufferMemory.html), [ConversationBufferWindowMemory](https://python.langchain.com/api_reference/langchain/memory/langchain.memory.buffer_window.ConversationBufferWindowMemory.html)); however, these lacked built-in support for multi-user, multi-conversation scenarios, which are essential for practical conversational AI systems.
+
+:::note
+If you are relying on any deprecated memory abstractions in LangChain 0.0.x, we recommend that you follow
+the given steps to upgrade to the new LangGraph persistence feature in LangChain 0.3.x.
+https://python.langchain.com/docs/versions/migrating_memory/
+:::
+
+As of LangChain v0.1, we started recommending that users rely primarily on [BaseChatMessageHistory](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.history.RunnableWithMessageHistory.html#langchain_core.runnables.history.RunnableWithMessageHistory). `BaseChatMessageHistory` is a simple persistence layer for a chat history that can be used to store and retrieve messages in a conversation. At this time, the only option for orchestrating LangChain chains was via [LCEL](https://python.langchain.com/docs/how_to/#langchain-expression-language-lcel). When using `LCEL`, memory can be added using the [RunnableWithMessageHistory](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.history.RunnableWithMessageHistory.html#langchain_core.runnables.history.RunnableWithMessageHistory) interface. While this option is sufficient for building a simple chat application, many users found the API to be unintuitive and difficult to work with.
+
+As of LangChain v0.3, we are commending that new code rely on LangGraph for both orchestration and persistence.
+
+Specifically, for orchestration instead of writing `LCEL` code, users can define LangGraph [graphs](https://langchain-ai.github.io/langgraph/concepts/low_level/). This allows users to keep using `LCEL` within individual nodes when `LCEL` is needed, while
+making it easy to define complex orchestration logic that is more readable and maintainable.
+
+For persistence, users can use LangGraph's [persistence](https://langchain-ai.github.io/langgraph/concepts/persistence/) feature to store and retrieve data from a graph database. LangGraph persistence is extremely flexible and can support a much wider range of use cases than the `RunnableWithMessageHistory` interface.
+
+:::important
+If you have been using `RunnableWithMessageHistory` or `BaseChatMessageHistory`, you do not need to make any changes. We do not plan on deprecating either functionality in the near future. This functionality is sufficient for simple chat applications and any code that uses `RunnableWithMessageHistory` will continue to work as expected.
+:::
--- a/docs/docs/how_to/lcel_cheatsheet.ipynb
+++ b/docs/docs/how_to/lcel_cheatsheet.ipynb
@@ -7,10 +7,10 @@
   "source": [
    "# LangChain Expression Language Cheatsheet\n",
    "\n",
-    "This is a quick reference for all the most important LCEL primitives. For more advanced usage see the [LCEL how-to guides](/docs/how_to/#langchain-expression-language-lcel) and the [full API reference](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html).\n",
+    "This is a quick reference for all the most important LCEL primitives. For more advanced usage see the [LCEL how-to guides](/docs/how_to/#langchain-expression-language-lcel) and the [full API reference](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html).\n",
    "\n",
    "### Invoke a runnable\n",
-    "#### [Runnable.invoke()](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.invoke) / [Runnable.ainvoke()](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.ainvoke)"
+    "#### [Runnable.invoke()](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.invoke) / [Runnable.ainvoke()](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.ainvoke)"
   ]
  },
  {
@@ -46,7 +46,7 @@
   "metadata": {},
   "source": [
    "### Batch a runnable\n",
-    "#### [Runnable.batch()](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.batch) / [Runnable.abatch()](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.abatch)"
+    "#### [Runnable.batch()](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.batch) / [Runnable.abatch()](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.abatch)"
   ]
  },
  {
@@ -82,7 +82,7 @@
   "metadata": {},
   "source": [
    "### Stream a runnable\n",
-    "#### [Runnable.stream()](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.stream) / [Runnable.astream()](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.astream)"
+    "#### [Runnable.stream()](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.stream) / [Runnable.astream()](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.astream)"
   ]
  },
  {
@@ -165,7 +165,7 @@
   "metadata": {},
   "source": [
    "### Invoke runnables in parallel\n",
-    "#### [RunnableParallel](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.RunnableParallel.html)"
+    "#### [RunnableParallel](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.base.RunnableParallel.html)"
   ]
  },
  {
@@ -202,7 +202,7 @@
   "metadata": {},
   "source": [
    "### Turn any function into a runnable\n",
-    "#### [RunnableLambda](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.RunnableLambda.html)"
+    "#### [RunnableLambda](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.base.RunnableLambda.html)"
   ]
  },
  {
@@ -240,7 +240,7 @@
   "metadata": {},
   "source": [
    "### Merge input and output dicts\n",
-    "#### [RunnablePassthrough.assign](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.passthrough.RunnablePassthrough.html)"
+    "#### [RunnablePassthrough.assign](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.passthrough.RunnablePassthrough.html)"
   ]
  },
  {
@@ -276,7 +276,7 @@
   "metadata": {},
   "source": [
    "### Include input dict in output dict\n",
-    "#### [RunnablePassthrough](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.passthrough.RunnablePassthrough.html)"
+    "#### [RunnablePassthrough](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.passthrough.RunnablePassthrough.html)"
   ]
  },
  {
@@ -316,7 +316,7 @@
   "metadata": {},
   "source": [
    "### Add default invocation args\n",
-    "#### [Runnable.bind](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.bind)"
+    "#### [Runnable.bind](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.bind)"
   ]
  },
  {
@@ -360,7 +360,7 @@
   "metadata": {},
   "source": [
    "### Add fallbacks\n",
-    "#### [Runnable.with_fallbacks](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.with_fallbacks)"
+    "#### [Runnable.with_fallbacks](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.with_fallbacks)"
   ]
  },
  {
@@ -397,7 +397,7 @@
   "metadata": {},
   "source": [
    "### Add retries\n",
-    "#### [Runnable.with_retry](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.with_retry)"
+    "#### [Runnable.with_retry](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.with_retry)"
   ]
  },
  {
@@ -449,7 +449,7 @@
   "metadata": {},
   "source": [
    "### Configure runnable execution\n",
-    "#### [RunnableConfig](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.config.RunnableConfig.html)"
+    "#### [RunnableConfig](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.config.RunnableConfig.html)"
   ]
  },
  {
@@ -487,7 +487,7 @@
   "metadata": {},
   "source": [
    "### Add default config to runnable\n",
-    "#### [Runnable.with_config](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.with_config)"
+    "#### [Runnable.with_config](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.with_config)"
   ]
  },
  {
@@ -526,7 +526,7 @@
   "metadata": {},
   "source": [
    "### Make runnable attributes configurable\n",
-    "#### [Runnable.with_configurable_fields](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.RunnableSerializable.html#langchain_core.runnables.base.RunnableSerializable.configurable_fields)"
+    "#### [Runnable.with_configurable_fields](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.base.RunnableSerializable.html#langchain_core.runnables.base.RunnableSerializable.configurable_fields)"
   ]
  },
  {
@@ -605,7 +605,7 @@
   "metadata": {},
   "source": [
    "### Make chain components configurable\n",
-    "#### [Runnable.with_configurable_alternatives](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.RunnableSerializable.html#langchain_core.runnables.base.RunnableSerializable.configurable_alternatives)"
+    "#### [Runnable.with_configurable_alternatives](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.base.RunnableSerializable.html#langchain_core.runnables.base.RunnableSerializable.configurable_alternatives)"
   ]
  },
  {
@@ -745,7 +745,7 @@
   "metadata": {},
   "source": [
    "### Generate a stream of events\n",
-    "#### [Runnable.astream_events](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.astream_events)"
+    "#### [Runnable.astream_events](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.astream_events)"
   ]
  },
  {
@@ -817,7 +817,7 @@
   "metadata": {},
   "source": [
    "### Yield batched outputs as they complete\n",
-    "#### [Runnable.batch_as_completed](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.batch_as_completed) / [Runnable.abatch_as_completed](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.abatch_as_completed)"
+    "#### [Runnable.batch_as_completed](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.batch_as_completed) / [Runnable.abatch_as_completed](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.abatch_as_completed)"
   ]
  },
  {
@@ -858,7 +858,7 @@
   "metadata": {},
   "source": [
    "### Return subset of output dict\n",
-    "#### [Runnable.pick](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.pick)"
+    "#### [Runnable.pick](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.pick)"
   ]
  },
  {
@@ -893,7 +893,7 @@
   "metadata": {},
   "source": [
    "### Declaratively make a batched version of a runnable\n",
-    "#### [Runnable.map](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.map)"
+    "#### [Runnable.map](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.map)"
   ]
  },
  {
@@ -930,7 +930,7 @@
   "metadata": {},
   "source": [
    "### Get a graph representation of a runnable\n",
-    "#### [Runnable.get_graph](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.get_graph)"
+    "#### [Runnable.get_graph](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.get_graph)"
   ]
  },
  {
@@ -991,7 +991,7 @@
   "metadata": {},
   "source": [
    "### Get all prompts in a chain\n",
-    "#### [Runnable.get_prompts](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.get_prompts)"
+    "#### [Runnable.get_prompts](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.get_prompts)"
   ]
  },
  {
@@ -1071,7 +1071,7 @@
   "metadata": {},
   "source": [
    "### Add lifecycle listeners\n",
-    "#### [Runnable.with_listeners](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.with_listeners)"
+    "#### [Runnable.with_listeners](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.with_listeners)"
   ]
  },
  {
--- a/docs/docs/how_to/llm_caching.ipynb
+++ b/docs/docs/how_to/llm_caching.ipynb
@@ -25,7 +25,8 @@
    "import os\n",
    "from getpass import getpass\n",
    "\n",
-    "os.environ[\"OPENAI_API_KEY\"] = getpass()\n",
+    "if \"OPENAI_API_KEY\" not in os.environ:\n",
+    "    os.environ[\"OPENAI_API_KEY\"] = getpass()\n",
    "# Please manually enter OpenAI Key"
   ]
  },
--- a/docs/docs/how_to/llm_token_usage_tracking.ipynb
+++ b/docs/docs/how_to/llm_token_usage_tracking.ipynb
@@ -24,7 +24,7 @@
    "\n",
    "There are some API-specific callback context managers that allow you to track token usage across multiple calls. You'll need to check whether such an integration is available for your particular model.\n",
    "\n",
-    "If such an integration is not available for your model, you can create a custom callback manager by adapting the implementation of the [OpenAI callback manager](https://python.langchain.com/v0.2/api_reference/community/callbacks/langchain_community.callbacks.openai_info.OpenAICallbackHandler.html).\n",
+    "If such an integration is not available for your model, you can create a custom callback manager by adapting the implementation of the [OpenAI callback manager](https://python.langchain.com/api_reference/community/callbacks/langchain_community.callbacks.openai_info.OpenAICallbackHandler.html).\n",
    "\n",
    "### OpenAI\n",
    "\n",
--- a/docs/docs/how_to/local_llms.ipynb
+++ b/docs/docs/how_to/local_llms.ipynb
@@ -244,7 +244,7 @@
    "\n",
    "* E.g., for Llama 2 7b: `ollama pull llama2` will download the most basic version of the model (e.g., smallest # parameters and 4 bit quantization)\n",
    "* We can also specify a particular version from the [model list](https://github.com/jmorganca/ollama?tab=readme-ov-file#model-library), e.g., `ollama pull llama2:13b`\n",
-    "* See the full set of parameters on the [API reference page](https://python.langchain.com/v0.2/api_reference/community/llms/langchain_community.llms.ollama.Ollama.html)"
+    "* See the full set of parameters on the [API reference page](https://python.langchain.com/api_reference/community/llms/langchain_community.llms.ollama.Ollama.html)"
   ]
  },
  {
@@ -280,9 +280,9 @@
    "\n",
    "For example, below we run inference on `llama2-13b` with 4 bit quantization downloaded from [HuggingFace](https://huggingface.co/TheBloke/Llama-2-13B-GGML/tree/main).\n",
    "\n",
-    "As noted above, see the [API reference](https://python.langchain.com/v0.2/api_reference/langchain/llms/langchain.llms.llamacpp.LlamaCpp.html?highlight=llamacpp#langchain.llms.llamacpp.LlamaCpp) for the full set of parameters. \n",
+    "As noted above, see the [API reference](https://python.langchain.com/api_reference/langchain/llms/langchain.llms.llamacpp.LlamaCpp.html?highlight=llamacpp#langchain.llms.llamacpp.LlamaCpp) for the full set of parameters. \n",
    "\n",
-    "From the [llama.cpp API reference docs](https://python.langchain.com/v0.2/api_reference/community/llms/langchain_community.llms.llamacpp.LlamaCpp.html), a few are worth commenting on:\n",
+    "From the [llama.cpp API reference docs](https://python.langchain.com/api_reference/community/llms/langchain_community.llms.llamacpp.LlamaCpp.html), a few are worth commenting on:\n",
    "\n",
    "`n_gpu_layers`: number of layers to be loaded into GPU memory\n",
    "\n",
@@ -416,7 +416,7 @@
    "\n",
    "We can use model weights downloaded from [GPT4All](/docs/integrations/llms/gpt4all) model explorer.\n",
    "\n",
-    "Similar to what is shown above, we can run inference and use [the API reference](https://python.langchain.com/v0.2/api_reference/community/llms/langchain_community.llms.gpt4all.GPT4All.html) to set parameters of interest."
+    "Similar to what is shown above, we can run inference and use [the API reference](https://python.langchain.com/api_reference/community/llms/langchain_community.llms.gpt4all.GPT4All.html) to set parameters of interest."
   ]
  },
  {
--- a/docs/docs/how_to/logprobs.ipynb
+++ b/docs/docs/how_to/logprobs.ipynb
@@ -55,7 +55,7 @@
   "id": "f88ffa0d-f4a7-482c-88de-cbec501a79b1",
   "metadata": {},
   "source": [
-    "For the OpenAI API to return log probabilities we need to configure the `logprobs=True` param. Then, the logprobs are included on each output [`AIMessage`](https://python.langchain.com/v0.2/api_reference/core/messages/langchain_core.messages.ai.AIMessage.html) as part of the `response_metadata`:"
+    "For the OpenAI API to return log probabilities we need to configure the `logprobs=True` param. Then, the logprobs are included on each output [`AIMessage`](https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.AIMessage.html) as part of the `response_metadata`:"
   ]
  },
  {
@@ -94,7 +94,7 @@
   "source": [
    "from langchain_openai import ChatOpenAI\n",
    "\n",
-    "llm = ChatOpenAI(model=\"gpt-3.5-turbo-0125\").bind(logprobs=True)\n",
+    "llm = ChatOpenAI(model=\"gpt-4o-mini\").bind(logprobs=True)\n",
    "\n",
    "msg = llm.invoke((\"human\", \"how are you today\"))\n",
    "\n",
--- a/docs/docs/how_to/long_context_reorder.ipynb
+++ b/docs/docs/how_to/long_context_reorder.ipynb
@@ -13,7 +13,7 @@
    "\n",
    "To mitigate the [\"lost in the middle\"](https://arxiv.org/abs/2307.03172) effect, you can re-order documents after retrieval such that the most relevant documents are positioned at extrema (e.g., the first and last pieces of context), and the least relevant documents are positioned in the middle. In some cases this can help surface the most relevant information to LLMs.\n",
    "\n",
-    "The [LongContextReorder](https://python.langchain.com/v0.2/api_reference/community/document_transformers/langchain_community.document_transformers.long_context_reorder.LongContextReorder.html) document transformer implements this re-ordering procedure. Below we demonstrate an example."
+    "The [LongContextReorder](https://python.langchain.com/api_reference/community/document_transformers/langchain_community.document_transformers.long_context_reorder.LongContextReorder.html) document transformer implements this re-ordering procedure. Below we demonstrate an example."
   ]
  },
  {
--- a/docs/docs/how_to/markdown_header_metadata_splitter.ipynb
+++ b/docs/docs/how_to/markdown_header_metadata_splitter.ipynb
@@ -17,7 +17,7 @@
    "When a full paragraph or document is embedded, the embedding process considers both the overall context and the relationships between the sentences and phrases within the text. This can result in a more comprehensive vector representation that captures the broader meaning and themes of the text.\n",
    "```\n",
    " \n",
-    "As mentioned, chunking often aims to keep text with common context together. With this in mind, we might want to specifically honor the structure of the document itself. For example, a markdown file is organized by headers. Creating chunks within specific header groups is an intuitive idea. To address this challenge, we can use [MarkdownHeaderTextSplitter](https://python.langchain.com/v0.2/api_reference/text_splitters/markdown/langchain_text_splitters.markdown.MarkdownHeaderTextSplitter.html). This will split a markdown file by a specified set of headers. \n",
+    "As mentioned, chunking often aims to keep text with common context together. With this in mind, we might want to specifically honor the structure of the document itself. For example, a markdown file is organized by headers. Creating chunks within specific header groups is an intuitive idea. To address this challenge, we can use [MarkdownHeaderTextSplitter](https://python.langchain.com/api_reference/text_splitters/markdown/langchain_text_splitters.markdown.MarkdownHeaderTextSplitter.html). This will split a markdown file by a specified set of headers. \n",
    "\n",
    "For example, if we want to split this markdown:\n",
    "```\n",
--- a/docs/docs/how_to/merge_message_runs.ipynb
+++ b/docs/docs/how_to/merge_message_runs.ipynb
@@ -11,12 +11,30 @@
    "\n",
    "The `merge_message_runs` utility makes it easy to merge consecutive messages of the same type.\n",
    "\n",
+    "### Setup"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "198ce37f-4466-45a2-8878-d75cd01a5d23",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install -qU langchain-core langchain-anthropic"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b5c3ca6e-e5b3-4151-8307-9101713a20ae",
+   "metadata": {},
+   "source": [
    "## Basic usage"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 8,
   "id": "1a215bbb-c05c-40b0-a6fd-d94884d517df",
   "metadata": {},
   "outputs": [
@@ -24,11 +42,11 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "SystemMessage(content=\"you're a good assistant.\\nyou always respond with a joke.\")\n",
+      "SystemMessage(content=\"you're a good assistant.\\nyou always respond with a joke.\", additional_kwargs={}, response_metadata={})\n",
      "\n",
-      "HumanMessage(content=[{'type': 'text', 'text': \"i wonder why it's called langchain\"}, 'and who is harrison chasing anyways'])\n",
+      "HumanMessage(content=[{'type': 'text', 'text': \"i wonder why it's called langchain\"}, 'and who is harrison chasing anyways'], additional_kwargs={}, response_metadata={})\n",
      "\n",
-      "AIMessage(content='Well, I guess they thought \"WordRope\" and \"SentenceString\" just didn\\'t have the same ring to it!\\nWhy, he\\'s probably chasing after the last cup of coffee in the office!')\n"
+      "AIMessage(content='Well, I guess they thought \"WordRope\" and \"SentenceString\" just didn\\'t have the same ring to it!\\nWhy, he\\'s probably chasing after the last cup of coffee in the office!', additional_kwargs={}, response_metadata={})\n"
     ]
    }
   ],
@@ -63,38 +81,6 @@
    "Notice that if the contents of one of the messages to merge is a list of content blocks then the merged message will have a list of content blocks. And if both messages to merge have string contents then those are concatenated with a newline character."
   ]
  },
-  {
-   "cell_type": "markdown",
-   "id": "11f7e8d3",
-   "metadata": {},
-   "source": [
-    "The `merge_message_runs` utility also works with messages composed together using the overloaded `+` operation:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "b51855c5",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "messages = (\n",
-    "    SystemMessage(\"you're a good assistant.\")\n",
-    "    + SystemMessage(\"you always respond with a joke.\")\n",
-    "    + HumanMessage([{\"type\": \"text\", \"text\": \"i wonder why it's called langchain\"}])\n",
-    "    + HumanMessage(\"and who is harrison chasing anyways\")\n",
-    "    + AIMessage(\n",
-    "        'Well, I guess they thought \"WordRope\" and \"SentenceString\" just didn\\'t have the same ring to it!'\n",
-    "    )\n",
-    "    + AIMessage(\n",
-    "        \"Why, he's probably chasing after the last cup of coffee in the office!\"\n",
-    "    )\n",
-    ")\n",
-    "\n",
-    "merged = merge_message_runs(messages)\n",
-    "print(\"\\n\\n\".join([repr(x) for x in merged]))"
-   ]
-  },
  {
   "cell_type": "markdown",
   "id": "1b2eee74-71c8-4168-b968-bca580c25d18",
@@ -107,23 +93,30 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 9,
   "id": "6d5a0283-11f8-435b-b27b-7b18f7693592",
   "metadata": {},
   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Note: you may need to restart the kernel to use updated packages.\n"
+     ]
+    },
    {
     "data": {
      "text/plain": [
-       "AIMessage(content=[], response_metadata={'id': 'msg_01D6R8Naum57q8qBau9vLBUX', 'model': 'claude-3-sonnet-20240229', 'stop_reason': 'end_turn', 'stop_sequence': None, 'usage': {'input_tokens': 84, 'output_tokens': 3}}, id='run-ac0c465b-b54f-4b8b-9295-e5951250d653-0', usage_metadata={'input_tokens': 84, 'output_tokens': 3, 'total_tokens': 87})"
+       "AIMessage(content=[], additional_kwargs={}, response_metadata={'id': 'msg_01KNGUMTuzBVfwNouLDpUMwf', 'model': 'claude-3-sonnet-20240229', 'stop_reason': 'end_turn', 'stop_sequence': None, 'usage': {'input_tokens': 84, 'output_tokens': 3}}, id='run-b908b198-9c24-450b-9749-9d4a8182937b-0', usage_metadata={'input_tokens': 84, 'output_tokens': 3, 'total_tokens': 87})"
      ]
     },
-     "execution_count": 3,
+     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
-    "# pip install -U langchain-anthropic\n",
+    "%pip install -qU langchain-anthropic\n",
    "from langchain_anthropic import ChatAnthropic\n",
    "\n",
    "llm = ChatAnthropic(model=\"claude-3-sonnet-20240229\", temperature=0)\n",
@@ -146,19 +139,19 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 10,
   "id": "460817a6-c327-429d-958e-181a8c46059c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "[SystemMessage(content=\"you're a good assistant.\\nyou always respond with a joke.\"),\n",
-       " HumanMessage(content=[{'type': 'text', 'text': \"i wonder why it's called langchain\"}, 'and who is harrison chasing anyways']),\n",
-       " AIMessage(content='Well, I guess they thought \"WordRope\" and \"SentenceString\" just didn\\'t have the same ring to it!\\nWhy, he\\'s probably chasing after the last cup of coffee in the office!')]"
+       "[SystemMessage(content=\"you're a good assistant.\\nyou always respond with a joke.\", additional_kwargs={}, response_metadata={}),\n",
+       " HumanMessage(content=[{'type': 'text', 'text': \"i wonder why it's called langchain\"}, 'and who is harrison chasing anyways'], additional_kwargs={}, response_metadata={}),\n",
+       " AIMessage(content='Well, I guess they thought \"WordRope\" and \"SentenceString\" just didn\\'t have the same ring to it!\\nWhy, he\\'s probably chasing after the last cup of coffee in the office!', additional_kwargs={}, response_metadata={})]"
      ]
     },
-     "execution_count": 4,
+     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -167,6 +160,53 @@
    "merger.invoke(messages)"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "4178837d-b155-492d-9404-d567accc1fa0",
+   "metadata": {},
+   "source": [
+    "`merge_message_runs` can also be placed after a prompt:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "620530ab-ed05-4899-b984-bfa4cd738465",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "AIMessage(content='A convergent series is an infinite series whose partial sums approach a finite value as more terms are added. In other words, the sequence of partial sums has a limit.\\n\\nMore formally, an infinite series Σ an (where an are the terms of the series) is said to be convergent if the sequence of partial sums:\\n\\nS1 = a1\\nS2 = a1 + a2  \\nS3 = a1 + a2 + a3\\n...\\nSn = a1 + a2 + a3 + ... + an\\n...\\n\\nconverges to some finite number S as n goes to infinity. We write:\\n\\nlim n→∞ Sn = S\\n\\nThe finite number S is called the sum of the convergent infinite series.\\n\\nIf the sequence of partial sums does not approach any finite limit, the infinite series is said to be divergent.\\n\\nSome key properties:\\n- A series converges if and only if the sequence of its partial sums is a Cauchy sequence.\\n- Absolute/conditional convergence criteria help determine if a given series converges.\\n- Convergent series have many important applications in mathematics, physics, engineering etc.', additional_kwargs={}, response_metadata={'id': 'msg_01MfV6y2hep7ZNvDz24A36U4', 'model': 'claude-3-sonnet-20240229', 'stop_reason': 'end_turn', 'stop_sequence': None, 'usage': {'input_tokens': 29, 'output_tokens': 267}}, id='run-9d925f58-021e-4bd0-94fc-f8f5e91010a4-0', usage_metadata={'input_tokens': 29, 'output_tokens': 267, 'total_tokens': 296})"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain_core.prompts import ChatPromptTemplate\n",
+    "\n",
+    "prompt = ChatPromptTemplate(\n",
+    "    [\n",
+    "        (\"system\", \"You're great a {skill}\"),\n",
+    "        (\"system\", \"You're also great at explaining things\"),\n",
+    "        (\"human\", \"{query}\"),\n",
+    "    ]\n",
+    ")\n",
+    "chain = prompt | merger | llm\n",
+    "chain.invoke({\"skill\": \"math\", \"query\": \"what's the definition of a convergent series\"})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "51ba533a-43c7-4e5f-bd91-a4ec23ceeb34",
+   "metadata": {},
+   "source": [
+    "LangSmith Trace: https://smith.langchain.com/public/432150b6-9909-40a7-8ae7-944b7e657438/r/f4ad5fb2-4d38-42a6-b780-25f62617d53f"
+   ]
+  },
  {
   "cell_type": "markdown",
   "id": "4548d916-ce21-4dc6-8f19-eedb8003ace6",
@@ -174,7 +214,7 @@
   "source": [
    "## API reference\n",
    "\n",
-    "For a complete description of all arguments head to the API reference: https://python.langchain.com/v0.2/api_reference/core/messages/langchain_core.messages.utils.merge_message_runs.html"
+    "For a complete description of all arguments head to the API reference: https://python.langchain.com/api_reference/core/messages/langchain_core.messages.utils.merge_message_runs.html"
   ]
  }
 ],
@@ -194,7 +234,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.1"
+   "version": "3.11.9"
  }
 },
 "nbformat": 4,
--- a/docs/docs/how_to/message_history.ipynb
+++ b/docs/docs/how_to/message_history.ipynb
@@ -32,7 +32,7 @@
    "\n",
    ":::\n",
    "\n",
-    "Passing conversation state into and out a chain is vital when building a chatbot. The [`RunnableWithMessageHistory`](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.history.RunnableWithMessageHistory.html#langchain_core.runnables.history.RunnableWithMessageHistory) class lets us add message history to certain types of chains. It wraps another Runnable and manages the chat message history for it. Specifically, it loads previous messages in the conversation BEFORE passing it to the Runnable, and it saves the generated response as a message AFTER calling the runnable. This class also enables multiple conversations by saving each conversation with a `session_id` - it then expects a `session_id` to be passed in the config when calling the runnable, and uses that to look up the relevant conversation history.\n",
+    "Passing conversation state into and out a chain is vital when building a chatbot. The [`RunnableWithMessageHistory`](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.history.RunnableWithMessageHistory.html#langchain_core.runnables.history.RunnableWithMessageHistory) class lets us add message history to certain types of chains. It wraps another Runnable and manages the chat message history for it. Specifically, it loads previous messages in the conversation BEFORE passing it to the Runnable, and it saves the generated response as a message AFTER calling the runnable. This class also enables multiple conversations by saving each conversation with a `session_id` - it then expects a `session_id` to be passed in the config when calling the runnable, and uses that to look up the relevant conversation history.\n",
    "\n",
    "![index_diagram](../../static/img/message_history.png)\n",
    "\n",
--- a/docs/docs/how_to/migrate_agent.ipynb
+++ b/docs/docs/how_to/migrate_agent.ipynb
@@ -31,7 +31,7 @@
    ":::\n",
    "\n",
    "Here we focus on how to move from legacy LangChain agents to more flexible [LangGraph](https://langchain-ai.github.io/langgraph/) agents.\n",
-    "LangChain agents (the [AgentExecutor](https://python.langchain.com/v0.2/api_reference/langchain/agents/langchain.agents.agent.AgentExecutor.html#langchain.agents.agent.AgentExecutor) in particular) have multiple configuration parameters.\n",
+    "LangChain agents (the [AgentExecutor](https://python.langchain.com/api_reference/langchain/agents/langchain.agents.agent.AgentExecutor.html#langchain.agents.agent.AgentExecutor) in particular) have multiple configuration parameters.\n",
    "In this notebook we will show how those parameters map to the LangGraph react agent executor using the [create_react_agent](https://langchain-ai.github.io/langgraph/reference/prebuilt/#create_react_agent) prebuilt helper method.\n",
    "\n",
    "#### Prerequisites\n",
@@ -65,9 +65,11 @@
   "metadata": {},
   "outputs": [],
   "source": [
+    "import getpass\n",
    "import os\n",
    "\n",
-    "os.environ[\"OPENAI_API_KEY\"] = \"sk-...\""
+    "if \"OPENAI_API_KEY\" not in os.environ:\n",
+    "    os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API key:\\n\")"
   ]
  },
  {
@@ -82,7 +84,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
   "id": "1e425fea-2796-4b99-bee6-9a6ffe73f756",
   "metadata": {},
   "outputs": [],
@@ -110,12 +112,12 @@
   "id": "af002033-fe51-4d14-b47c-3e9b483c8395",
   "metadata": {},
   "source": [
-    "For the LangChain [AgentExecutor](https://python.langchain.com/v0.2/api_reference/langchain/agents/langchain.agents.agent.AgentExecutor.html#langchain.agents.agent.AgentExecutor), we define a prompt with a placeholder for the agent's scratchpad. The agent can be invoked as follows:"
+    "For the LangChain [AgentExecutor](https://python.langchain.com/api_reference/langchain/agents/langchain.agents.agent.AgentExecutor.html#langchain.agents.agent.AgentExecutor), we define a prompt with a placeholder for the agent's scratchpad. The agent can be invoked as follows:"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
   "id": "03ea357c-9c36-4464-b2cc-27bd150e1554",
   "metadata": {},
   "outputs": [
@@ -126,7 +128,7 @@
       " 'output': 'The value of `magic_function(3)` is 5.'}"
      ]
     },
-     "execution_count": 3,
+     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -162,7 +164,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
   "id": "53a3737a-d167-4255-89bf-20ac37f89a3e",
   "metadata": {},
   "outputs": [
@@ -173,7 +175,7 @@
       " 'output': 'The value of `magic_function(3)` is 5.'}"
      ]
     },
-     "execution_count": 4,
+     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -193,7 +195,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
   "id": "74ecebe3-512e-409c-a661-bdd5b0a2b782",
   "metadata": {},
   "outputs": [
@@ -201,10 +203,10 @@
     "data": {
      "text/plain": [
       "{'input': 'Pardon?',\n",
-       " 'output': 'The value you get when you apply `magic_function` to the input 3 is 5.'}"
+       " 'output': 'The value returned by `magic_function` when the input is 3 is 5.'}"
      ]
     },
-     "execution_count": 5,
+     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -243,7 +245,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 7,
   "id": "a9a11ccd-75e2-4c11-844d-a34870b0ff91",
   "metadata": {},
   "outputs": [
@@ -254,7 +256,7 @@
       " 'output': 'El valor de `magic_function(3)` es 5.'}"
      ]
     },
-     "execution_count": 6,
+     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -295,7 +297,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
   "id": "a9486805-676a-4d19-a5c4-08b41b172989",
   "metadata": {},
   "outputs": [],
@@ -324,7 +326,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 9,
   "id": "d369ab45-0c82-45f4-9d3e-8efb8dd47e2c",
   "metadata": {},
   "outputs": [
@@ -332,7 +334,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "{'input': 'what is the value of magic_function(3)?', 'output': 'El valor de magic_function(3) es 5. ¡Pandamonium!'}\n"
+      "{'input': 'what is the value of magic_function(3)?', 'output': 'The value of magic_function(3) is 5. ¡Pandamonium!'}\n"
     ]
    }
   ],
@@ -381,12 +383,12 @@
   "source": [
    "### In LangChain\n",
    "\n",
-    "With LangChain's [AgentExecutor](https://python.langchain.com/v0.2/api_reference/langchain/agents/langchain.agents.agent.AgentExecutor.html#langchain.agents.agent.AgentExecutor.iter), you could add chat [Memory](https://python.langchain.com/v0.2/api_reference/langchain/agents/langchain.agents.agent.AgentExecutor.html#langchain.agents.agent.AgentExecutor.memory) so it can engage in a multi-turn conversation."
+    "With LangChain's [AgentExecutor](https://python.langchain.com/api_reference/langchain/agents/langchain.agents.agent.AgentExecutor.html#langchain.agents.agent.AgentExecutor.iter), you could add chat [Memory](https://python.langchain.com/api_reference/langchain/agents/langchain.agents.agent.AgentExecutor.html#langchain.agents.agent.AgentExecutor.memory) so it can engage in a multi-turn conversation."
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 10,
   "id": "b97beba5-8f74-430c-9399-91b77c8fa15c",
   "metadata": {},
   "outputs": [
@@ -394,11 +396,11 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Hi Polly! The output of the magic function for the input 3 is 5.\n",
+      "Hi Polly! The output of applying the magic function to the input 3 is 5.\n",
      "---\n",
-      "Yes, your name is Polly!\n",
+      "Yes, you mentioned your name is Polly.\n",
      "---\n",
-      "The output of the magic function for the input 3 is 5.\n"
+      "The output of applying the magic function to the input 3 is 5.\n"
     ]
    }
   ],
@@ -476,7 +478,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 11,
   "id": "baca3dc6-678b-4509-9275-2fd653102898",
   "metadata": {},
   "outputs": [
@@ -484,16 +486,16 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Hi Polly! The output of the magic_function for the input of 3 is 5.\n",
+      "Hi Polly! The output of applying the magic function to the input 3 is 5.\n",
      "---\n",
      "Yes, your name is Polly!\n",
      "---\n",
-      "The output of the magic_function for the input of 3 was 5.\n"
+      "The output of applying the magic function to the input 3 was 5.\n"
     ]
    }
   ],
   "source": [
-    "from langgraph.checkpoint import MemorySaver  # an in-memory checkpointer\n",
+    "from langgraph.checkpoint.memory import MemorySaver  # an in-memory checkpointer\n",
    "from langgraph.prebuilt import create_react_agent\n",
    "\n",
    "system_message = \"You are a helpful assistant.\"\n",
@@ -539,12 +541,12 @@
    "\n",
    "### In LangChain\n",
    "\n",
-    "With LangChain's [AgentExecutor](https://python.langchain.com/v0.2/api_reference/langchain/agents/langchain.agents.agent.AgentExecutor.html#langchain.agents.agent.AgentExecutor.iter), you could iterate over the steps using the [stream](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.stream) (or async `astream`) methods or the [iter](https://python.langchain.com/v0.2/api_reference/langchain/agents/langchain.agents.agent.AgentExecutor.html#langchain.agents.agent.AgentExecutor.iter) method. LangGraph supports stepwise iteration using [stream](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.stream) "
+    "With LangChain's [AgentExecutor](https://python.langchain.com/api_reference/langchain/agents/langchain.agents.agent.AgentExecutor.html#langchain.agents.agent.AgentExecutor.iter), you could iterate over the steps using the [stream](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.stream) (or async `astream`) methods or the [iter](https://python.langchain.com/api_reference/langchain/agents/langchain.agents.agent.AgentExecutor.html#langchain.agents.agent.AgentExecutor.iter) method. LangGraph supports stepwise iteration using [stream](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.stream) "
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 12,
   "id": "e62843c4-1107-41f0-a50b-aea256e28053",
   "metadata": {},
   "outputs": [
@@ -552,8 +554,8 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "{'actions': [ToolAgentAction(tool='magic_function', tool_input={'input': 3}, log=\"\\nInvoking: `magic_function` with `{'input': 3}`\\n\\n\\n\", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_1exy0rScfPmo4fy27FbQ5qJ2', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518'}, id='run-5664e138-7085-4da7-a49e-5656a87b8d78', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_1exy0rScfPmo4fy27FbQ5qJ2', 'type': 'tool_call'}], tool_call_chunks=[{'name': 'magic_function', 'args': '{\"input\":3}', 'id': 'call_1exy0rScfPmo4fy27FbQ5qJ2', 'index': 0, 'type': 'tool_call_chunk'}])], tool_call_id='call_1exy0rScfPmo4fy27FbQ5qJ2')], 'messages': [AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_1exy0rScfPmo4fy27FbQ5qJ2', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518'}, id='run-5664e138-7085-4da7-a49e-5656a87b8d78', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_1exy0rScfPmo4fy27FbQ5qJ2', 'type': 'tool_call'}], tool_call_chunks=[{'name': 'magic_function', 'args': '{\"input\":3}', 'id': 'call_1exy0rScfPmo4fy27FbQ5qJ2', 'index': 0, 'type': 'tool_call_chunk'}])]}\n",
-      "{'steps': [AgentStep(action=ToolAgentAction(tool='magic_function', tool_input={'input': 3}, log=\"\\nInvoking: `magic_function` with `{'input': 3}`\\n\\n\\n\", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_1exy0rScfPmo4fy27FbQ5qJ2', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518'}, id='run-5664e138-7085-4da7-a49e-5656a87b8d78', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_1exy0rScfPmo4fy27FbQ5qJ2', 'type': 'tool_call'}], tool_call_chunks=[{'name': 'magic_function', 'args': '{\"input\":3}', 'id': 'call_1exy0rScfPmo4fy27FbQ5qJ2', 'index': 0, 'type': 'tool_call_chunk'}])], tool_call_id='call_1exy0rScfPmo4fy27FbQ5qJ2'), observation=5)], 'messages': [FunctionMessage(content='5', name='magic_function')]}\n",
+      "{'actions': [ToolAgentAction(tool='magic_function', tool_input={'input': 3}, log=\"\\nInvoking: `magic_function` with `{'input': 3}`\\n\\n\\n\", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_gNzQT96XWoyZqVl1jI1yMnjy', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_c9aa9c0491'}, id='run-dc7ce17d-02fd-4fdb-be82-7c902410b6b7', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_gNzQT96XWoyZqVl1jI1yMnjy', 'type': 'tool_call'}], tool_call_chunks=[{'name': 'magic_function', 'args': '{\"input\":3}', 'id': 'call_gNzQT96XWoyZqVl1jI1yMnjy', 'index': 0, 'type': 'tool_call_chunk'}])], tool_call_id='call_gNzQT96XWoyZqVl1jI1yMnjy')], 'messages': [AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_gNzQT96XWoyZqVl1jI1yMnjy', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_c9aa9c0491'}, id='run-dc7ce17d-02fd-4fdb-be82-7c902410b6b7', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_gNzQT96XWoyZqVl1jI1yMnjy', 'type': 'tool_call'}], tool_call_chunks=[{'name': 'magic_function', 'args': '{\"input\":3}', 'id': 'call_gNzQT96XWoyZqVl1jI1yMnjy', 'index': 0, 'type': 'tool_call_chunk'}])]}\n",
+      "{'steps': [AgentStep(action=ToolAgentAction(tool='magic_function', tool_input={'input': 3}, log=\"\\nInvoking: `magic_function` with `{'input': 3}`\\n\\n\\n\", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_gNzQT96XWoyZqVl1jI1yMnjy', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_c9aa9c0491'}, id='run-dc7ce17d-02fd-4fdb-be82-7c902410b6b7', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_gNzQT96XWoyZqVl1jI1yMnjy', 'type': 'tool_call'}], tool_call_chunks=[{'name': 'magic_function', 'args': '{\"input\":3}', 'id': 'call_gNzQT96XWoyZqVl1jI1yMnjy', 'index': 0, 'type': 'tool_call_chunk'}])], tool_call_id='call_gNzQT96XWoyZqVl1jI1yMnjy'), observation=5)], 'messages': [FunctionMessage(content='5', name='magic_function')]}\n",
      "{'output': 'The value of `magic_function(3)` is 5.', 'messages': [AIMessage(content='The value of `magic_function(3)` is 5.')]}\n"
     ]
    }
@@ -604,7 +606,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 13,
   "id": "076ebc85-f804-4093-a25a-a16334c9898e",
   "metadata": {},
   "outputs": [
@@ -612,9 +614,9 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_my9rzFSKR4T1yYKwCsfbZB8A', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 61, 'total_tokens': 75}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_bc2a86f5f5', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-dd705555-8fae-4fb1-a033-5d99a23e3c22-0', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_my9rzFSKR4T1yYKwCsfbZB8A', 'type': 'tool_call'}], usage_metadata={'input_tokens': 61, 'output_tokens': 14, 'total_tokens': 75})]}}\n",
-      "{'tools': {'messages': [ToolMessage(content='5', name='magic_function', tool_call_id='call_my9rzFSKR4T1yYKwCsfbZB8A')]}}\n",
-      "{'agent': {'messages': [AIMessage(content='The value of `magic_function(3)` is 5.', response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 84, 'total_tokens': 98}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518', 'finish_reason': 'stop', 'logprobs': None}, id='run-698cad05-8cb2-4d08-8c2a-881e354f6cc7-0', usage_metadata={'input_tokens': 84, 'output_tokens': 14, 'total_tokens': 98})]}}\n"
+      "{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_I0nztlIcc0e9ry5dn53YLZUM', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 61, 'total_tokens': 75}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-5f9bd87d-3692-4d13-8d27-1859e13e2156-0', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_I0nztlIcc0e9ry5dn53YLZUM', 'type': 'tool_call'}], usage_metadata={'input_tokens': 61, 'output_tokens': 14, 'total_tokens': 75})]}}\n",
+      "{'tools': {'messages': [ToolMessage(content='5', name='magic_function', tool_call_id='call_I0nztlIcc0e9ry5dn53YLZUM')]}}\n",
+      "{'agent': {'messages': [AIMessage(content='The value of `magic_function(3)` is 5.', response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 84, 'total_tokens': 98}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'stop', 'logprobs': None}, id='run-f6015ca6-93e5-45e8-8b28-b3f0a8d203dc-0', usage_metadata={'input_tokens': 84, 'output_tokens': 14, 'total_tokens': 98})]}}\n"
     ]
    }
   ],
@@ -654,7 +656,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 14,
   "id": "a2f720f3-c121-4be2-b498-92c16bb44b0a",
   "metadata": {},
   "outputs": [
@@ -662,7 +664,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "[(ToolAgentAction(tool='magic_function', tool_input={'input': 3}, log=\"\\nInvoking: `magic_function` with `{'input': 3}`\\n\\n\\n\", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_uPZ2D1Bo5mdED3gwgaeWURrf', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518'}, id='run-a792db4a-278d-4090-82ae-904a30eada93', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_uPZ2D1Bo5mdED3gwgaeWURrf', 'type': 'tool_call'}], tool_call_chunks=[{'name': 'magic_function', 'args': '{\"input\":3}', 'id': 'call_uPZ2D1Bo5mdED3gwgaeWURrf', 'index': 0, 'type': 'tool_call_chunk'}])], tool_call_id='call_uPZ2D1Bo5mdED3gwgaeWURrf'), 5)]\n"
+      "[(ToolAgentAction(tool='magic_function', tool_input={'input': 3}, log=\"\\nInvoking: `magic_function` with `{'input': 3}`\\n\\n\\n\", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_wjaAyTjI2LSYOq7C8QZYSxEs', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_c9aa9c0491'}, id='run-99e06b70-1ef6-4761-834b-87b6c5252e20', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_wjaAyTjI2LSYOq7C8QZYSxEs', 'type': 'tool_call'}], tool_call_chunks=[{'name': 'magic_function', 'args': '{\"input\":3}', 'id': 'call_wjaAyTjI2LSYOq7C8QZYSxEs', 'index': 0, 'type': 'tool_call_chunk'}])], tool_call_id='call_wjaAyTjI2LSYOq7C8QZYSxEs'), 5)]\n"
     ]
    }
   ],
@@ -684,20 +686,20 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 15,
   "id": "ef23117a-5ccb-42ce-80c3-ea49a9d3a942",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "{'messages': [HumanMessage(content='what is the value of magic_function(3)?', id='cd7d0f49-a0e0-425a-b2b0-603a716058ed'),\n",
-       "  AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_VfZ9287DuybOSrBsQH5X12xf', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 55, 'total_tokens': 69}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-a1e965cd-bf61-44f9-aec1-8aaecb80955f-0', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_VfZ9287DuybOSrBsQH5X12xf', 'type': 'tool_call'}], usage_metadata={'input_tokens': 55, 'output_tokens': 14, 'total_tokens': 69}),\n",
-       "  ToolMessage(content='5', name='magic_function', id='20d5c2fe-a5d8-47fa-9e04-5282642e2039', tool_call_id='call_VfZ9287DuybOSrBsQH5X12xf'),\n",
-       "  AIMessage(content='The value of `magic_function(3)` is 5.', response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 78, 'total_tokens': 92}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518', 'finish_reason': 'stop', 'logprobs': None}, id='run-abf9341c-ef41-4157-935d-a3be5dfa2f41-0', usage_metadata={'input_tokens': 78, 'output_tokens': 14, 'total_tokens': 92})]}"
+       "{'messages': [HumanMessage(content='what is the value of magic_function(3)?', id='2d369331-8052-4167-bd85-9f6d8ad021ae'),\n",
+       "  AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_oXiSQSe6WeWj7XIKXxZrO2IC', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 55, 'total_tokens': 69}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-297e7fc9-726f-46a0-8c67-dc28ed1724d0-0', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_oXiSQSe6WeWj7XIKXxZrO2IC', 'type': 'tool_call'}], usage_metadata={'input_tokens': 55, 'output_tokens': 14, 'total_tokens': 69}),\n",
+       "  ToolMessage(content='5', name='magic_function', id='46370faf-9598-423c-b94b-aca8cb4f035d', tool_call_id='call_oXiSQSe6WeWj7XIKXxZrO2IC'),\n",
+       "  AIMessage(content='The value of `magic_function(3)` is 5.', response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 78, 'total_tokens': 92}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'stop', 'logprobs': None}, id='run-f48efaff-0c2c-4632-bbf9-7ee626f73d02-0', usage_metadata={'input_tokens': 78, 'output_tokens': 14, 'total_tokens': 92})]}"
      ]
     },
-     "execution_count": 13,
+     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -757,7 +759,7 @@
      "Invoking: `magic_function` with `{'input': '3'}`\n",
      "\n",
      "\n",
-      "\u001b[0m\u001b[36;1m\u001b[1;3mSorry, there was an error. Please try again.\u001b[0m\u001b[32;1m\u001b[1;3mParece que hubo un error al intentar calcular el valor de la función mágica. ¿Te gustaría que lo intente de nuevo?\u001b[0m\n",
+      "\u001b[0m\u001b[36;1m\u001b[1;3mSorry, there was an error. Please try again.\u001b[0m\u001b[32;1m\u001b[1;3mHubo un error al intentar obtener el valor de `magic_function(3)`. ¿Podrías intentarlo de nuevo o proporcionar más detalles?\u001b[0m\n",
      "\n",
      "\u001b[1m> Finished chain.\u001b[0m\n"
     ]
@@ -766,7 +768,7 @@
     "data": {
      "text/plain": [
       "{'input': 'what is the value of magic_function(3)?',\n",
-       " 'output': 'Parece que hubo un error al intentar calcular el valor de la función mágica. ¿Te gustaría que lo intente de nuevo?'}"
+       " 'output': 'Hubo un error al intentar obtener el valor de `magic_function(3)`. ¿Podrías intentarlo de nuevo o proporcionar más detalles?'}"
      ]
     },
     "execution_count": 17,
@@ -819,12 +821,15 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "content='what is the value of magic_function(3)?' id='74e2d5e8-2b59-4820-979c-8d11ecfc14c2'\n",
-      "content='' additional_kwargs={'tool_calls': [{'id': 'call_ihtrH6IG95pDXpKluIwAgi3J', 'function': {'arguments': '{\"input\":\"3\"}', 'name': 'magic_function'}, 'type': 'function'}]} response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 55, 'total_tokens': 69}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-5a35e465-8a08-43dd-ac8b-4a76dcace305-0' tool_calls=[{'name': 'magic_function', 'args': {'input': '3'}, 'id': 'call_ihtrH6IG95pDXpKluIwAgi3J', 'type': 'tool_call'}] usage_metadata={'input_tokens': 55, 'output_tokens': 14, 'total_tokens': 69}\n",
-      "content='Sorry, there was an error. Please try again.' name='magic_function' id='8c37c19b-3586-46b1-aab9-a045786801a2' tool_call_id='call_ihtrH6IG95pDXpKluIwAgi3J'\n",
-      "content='It seems there was an error in processing the request. Let me try again.' additional_kwargs={'tool_calls': [{'id': 'call_iF0vYWAd6rfely0cXSqdMOnF', 'function': {'arguments': '{\"input\":\"3\"}', 'name': 'magic_function'}, 'type': 'function'}]} response_metadata={'token_usage': {'completion_tokens': 31, 'prompt_tokens': 88, 'total_tokens': 119}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-eb88ec77-d492-43a5-a5dd-4cefef9a6920-0' tool_calls=[{'name': 'magic_function', 'args': {'input': '3'}, 'id': 'call_iF0vYWAd6rfely0cXSqdMOnF', 'type': 'tool_call'}] usage_metadata={'input_tokens': 88, 'output_tokens': 31, 'total_tokens': 119}\n",
-      "content='Sorry, there was an error. Please try again.' name='magic_function' id='c9ff261f-a0f1-4c92-a9f2-cd749f62d911' tool_call_id='call_iF0vYWAd6rfely0cXSqdMOnF'\n",
-      "content='I am currently unable to process the request with the input \"3\" for the `magic_function`. If you have any other questions or need assistance with something else, please let me know!' response_metadata={'token_usage': {'completion_tokens': 39, 'prompt_tokens': 141, 'total_tokens': 180}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518', 'finish_reason': 'stop', 'logprobs': None} id='run-d42508aa-f286-4b57-80fb-f8a76736d470-0' usage_metadata={'input_tokens': 141, 'output_tokens': 39, 'total_tokens': 180}\n"
+      "content='what is the value of magic_function(3)?' id='fe74bb30-45b8-4a40-a5ed-fd6678da5428'\n",
+      "content='' additional_kwargs={'tool_calls': [{'id': 'call_TNKfNy6fgZNdJAvHUMXwtp8f', 'function': {'arguments': '{\"input\":\"3\"}', 'name': 'magic_function'}, 'type': 'function'}]} response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 55, 'total_tokens': 69}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-dad8bfc1-477c-40d2-9016-243d25c0dd13-0' tool_calls=[{'name': 'magic_function', 'args': {'input': '3'}, 'id': 'call_TNKfNy6fgZNdJAvHUMXwtp8f', 'type': 'tool_call'}] usage_metadata={'input_tokens': 55, 'output_tokens': 14, 'total_tokens': 69}\n",
+      "content='Sorry, there was an error. Please try again.' name='magic_function' id='653226e0-3187-40be-a774-4c7c2612239e' tool_call_id='call_TNKfNy6fgZNdJAvHUMXwtp8f'\n",
+      "content='It looks like there was an issue with processing the request. Let me try that again.' additional_kwargs={'tool_calls': [{'id': 'call_K0wJ8fQLYGv8fYXY1Uo5U5sG', 'function': {'arguments': '{\"input\":\"3\"}', 'name': 'magic_function'}, 'type': 'function'}]} response_metadata={'token_usage': {'completion_tokens': 33, 'prompt_tokens': 88, 'total_tokens': 121}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-d4c85437-6625-4e57-81f9-86de6842be7b-0' tool_calls=[{'name': 'magic_function', 'args': {'input': '3'}, 'id': 'call_K0wJ8fQLYGv8fYXY1Uo5U5sG', 'type': 'tool_call'}] usage_metadata={'input_tokens': 88, 'output_tokens': 33, 'total_tokens': 121}\n",
+      "content='Sorry, there was an error. Please try again.' name='magic_function' id='9b530d03-95df-401e-bb4f-5cada1195033' tool_call_id='call_K0wJ8fQLYGv8fYXY1Uo5U5sG'\n",
+      "content='It seems that there is a persistent issue with processing the request. Let me attempt it one more time.' additional_kwargs={'tool_calls': [{'id': 'call_7ECwwNBDo4SH56oczErZJVRT', 'function': {'arguments': '{\"input\":\"3\"}', 'name': 'magic_function'}, 'type': 'function'}]} response_metadata={'token_usage': {'completion_tokens': 36, 'prompt_tokens': 143, 'total_tokens': 179}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-9f3f651e-a641-4112-99ed-d1ac11169582-0' tool_calls=[{'name': 'magic_function', 'args': {'input': '3'}, 'id': 'call_7ECwwNBDo4SH56oczErZJVRT', 'type': 'tool_call'}] usage_metadata={'input_tokens': 143, 'output_tokens': 36, 'total_tokens': 179}\n",
+      "content='Sorry, there was an error. Please try again.' name='magic_function' id='e4cd152b-4eb1-47df-ac76-f88e79adbe19' tool_call_id='call_7ECwwNBDo4SH56oczErZJVRT'\n",
+      "content=\"It seems there is a consistent issue with processing the request for the magic function. Let's try using a different approach to resolve this.\" additional_kwargs={'tool_calls': [{'id': 'call_DMAL0UwBRijzuPjCTSwR2r17', 'function': {'arguments': '{\"input\":\"three\"}', 'name': 'magic_function'}, 'type': 'function'}]} response_metadata={'token_usage': {'completion_tokens': 41, 'prompt_tokens': 201, 'total_tokens': 242}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_c9aa9c0491', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-cd9f4e5c-f881-462c-abe3-890e73f46a01-0' tool_calls=[{'name': 'magic_function', 'args': {'input': 'three'}, 'id': 'call_DMAL0UwBRijzuPjCTSwR2r17', 'type': 'tool_call'}] usage_metadata={'input_tokens': 201, 'output_tokens': 41, 'total_tokens': 242}\n",
+      "{'input': 'what is the value of magic_function(3)?', 'output': 'Agent stopped due to max iterations.'}\n"
     ]
    }
   ],
@@ -939,9 +944,9 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_FKiTkTd0Ffd4rkYSzERprf1M', 'function': {'arguments': '{\"input\":\"3\"}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 55, 'total_tokens': 69}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-b842f7b6-ec10-40f8-8c0e-baa220b77e91-0', tool_calls=[{'name': 'magic_function', 'args': {'input': '3'}, 'id': 'call_FKiTkTd0Ffd4rkYSzERprf1M', 'type': 'tool_call'}], usage_metadata={'input_tokens': 55, 'output_tokens': 14, 'total_tokens': 69})]}}\n",
+      "{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_o8Ym0u9UfzArhIm1lV7O0CXF', 'function': {'arguments': '{\"input\":\"3\"}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 55, 'total_tokens': 69}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-d9faf125-1ff8-4de2-a75b-97e07d28dc4d-0', tool_calls=[{'name': 'magic_function', 'args': {'input': '3'}, 'id': 'call_o8Ym0u9UfzArhIm1lV7O0CXF', 'type': 'tool_call'}], usage_metadata={'input_tokens': 55, 'output_tokens': 14, 'total_tokens': 69})]}}\n",
      "------\n",
-      "{'input': 'what is the value of magic_function(3)?', 'output': 'Agent stopped due to max iterations.'}\n"
+      "{'input': 'what is the value of magic_function(3)?', 'output': 'Agent stopped due to a step timeout.'}\n"
     ]
    }
   ],
@@ -957,7 +962,7 @@
    "        print(chunk)\n",
    "        print(\"------\")\n",
    "except TimeoutError:\n",
-    "    print({\"input\": query, \"output\": \"Agent stopped due to max iterations.\"})"
+    "    print({\"input\": query, \"output\": \"Agent stopped due to a step timeout.\"})"
   ]
  },
  {
@@ -978,7 +983,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_WoOB8juagB08xrP38twYlYKR', 'function': {'arguments': '{\"input\":\"3\"}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 55, 'total_tokens': 69}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-73dee47e-30ab-42c9-bb0c-6f227cac96cd-0', tool_calls=[{'name': 'magic_function', 'args': {'input': '3'}, 'id': 'call_WoOB8juagB08xrP38twYlYKR', 'type': 'tool_call'}], usage_metadata={'input_tokens': 55, 'output_tokens': 14, 'total_tokens': 69})]}}\n",
+      "{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_gsGzyhyvR25iNV6W9VR2TIdQ', 'function': {'arguments': '{\"input\":\"3\"}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 55, 'total_tokens': 69}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-9ad8f834-06c5-41cf-9eec-6b7e0f5e777e-0', tool_calls=[{'name': 'magic_function', 'args': {'input': '3'}, 'id': 'call_gsGzyhyvR25iNV6W9VR2TIdQ', 'type': 'tool_call'}], usage_metadata={'input_tokens': 55, 'output_tokens': 14, 'total_tokens': 69})]}}\n",
      "------\n",
      "Task Cancelled.\n"
     ]
@@ -1014,7 +1019,7 @@
    "\n",
    "### In LangChain\n",
    "\n",
-    "With LangChain's [AgentExecutor](https://python.langchain.com/v0.2/api_reference/langchain/agents/langchain.agents.agent.AgentExecutor.html#langchain.agents.agent.AgentExecutor.iter), you could configure an [early_stopping_method](https://python.langchain.com/v0.2/api_reference/langchain/agents/langchain.agents.agent.AgentExecutor.html#langchain.agents.agent.AgentExecutor.early_stopping_method) to either return a string saying \"Agent stopped due to iteration limit or time limit.\" (`\"force\"`) or prompt the LLM a final time to respond (`\"generate\"`)."
+    "With LangChain's [AgentExecutor](https://python.langchain.com/api_reference/langchain/agents/langchain.agents.agent.AgentExecutor.html#langchain.agents.agent.AgentExecutor.iter), you could configure an [early_stopping_method](https://python.langchain.com/api_reference/langchain/agents/langchain.agents.agent.AgentExecutor.html#langchain.agents.agent.AgentExecutor.early_stopping_method) to either return a string saying \"Agent stopped due to iteration limit or time limit.\" (`\"force\"`) or prompt the LLM a final time to respond (`\"generate\"`)."
   ]
  },
  {
@@ -1089,10 +1094,10 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "content='what is the value of magic_function(3)?' id='4fa7fbe5-758c-47a3-9268-717665d10680'\n",
-      "content='' additional_kwargs={'tool_calls': [{'id': 'call_ujE0IQBbIQnxcF9gsZXQfdhF', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]} response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 55, 'total_tokens': 69}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-65d689aa-baee-4342-a5d2-048feefab418-0' tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_ujE0IQBbIQnxcF9gsZXQfdhF', 'type': 'tool_call'}] usage_metadata={'input_tokens': 55, 'output_tokens': 14, 'total_tokens': 69}\n",
-      "content='Sorry there was an error, please try again.' name='magic_function' id='ef8ddf1d-9ad7-4ac0-b784-b673c4d94bbd' tool_call_id='call_ujE0IQBbIQnxcF9gsZXQfdhF'\n",
-      "content='It seems there was an issue with the previous attempt. Let me try that again.' additional_kwargs={'tool_calls': [{'id': 'call_GcsAfCFUHJ50BN2IOWnwTbQ7', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]} response_metadata={'token_usage': {'completion_tokens': 32, 'prompt_tokens': 87, 'total_tokens': 119}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-54527c4b-8ff0-4ee8-8abf-224886bd222e-0' tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_GcsAfCFUHJ50BN2IOWnwTbQ7', 'type': 'tool_call'}] usage_metadata={'input_tokens': 87, 'output_tokens': 32, 'total_tokens': 119}\n",
+      "content='what is the value of magic_function(3)?' id='6487a942-0a9a-4e8a-9556-553a45fa9c5a'\n",
+      "content='' additional_kwargs={'tool_calls': [{'id': 'call_pe5KVY5No9iT4JWqrm5MwL1D', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]} response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 55, 'total_tokens': 69}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-04147325-fb72-462a-a1d9-6aa4e86e3d8a-0' tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_pe5KVY5No9iT4JWqrm5MwL1D', 'type': 'tool_call'}] usage_metadata={'input_tokens': 55, 'output_tokens': 14, 'total_tokens': 69}\n",
+      "content='Sorry there was an error, please try again.' name='magic_function' id='bc0bf58f-7c6c-42ed-a96d-a2afa79f16a9' tool_call_id='call_pe5KVY5No9iT4JWqrm5MwL1D'\n",
+      "content=\"It seems there was an issue with processing the request. I'll try again.\" additional_kwargs={'tool_calls': [{'id': 'call_5rV7k3g7oW38bD9KUTsSxK8l', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]} response_metadata={'token_usage': {'completion_tokens': 30, 'prompt_tokens': 87, 'total_tokens': 117}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-6e43ffd4-fb6f-4222-8503-a50ae268c0be-0' tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_5rV7k3g7oW38bD9KUTsSxK8l', 'type': 'tool_call'}] usage_metadata={'input_tokens': 87, 'output_tokens': 30, 'total_tokens': 117}\n",
      "{'input': 'what is the value of magic_function(3)?', 'output': 'Agent stopped due to max iterations.'}\n"
     ]
    }
@@ -1125,7 +1130,7 @@
    "\n",
    "### In LangChain\n",
    "\n",
-    "With LangChain's [AgentExecutor](https://python.langchain.com/v0.2/api_reference/langchain/agents/langchain.agents.agent.AgentExecutor.html#langchain.agents.agent.AgentExecutor), you could trim the intermediate steps of long-running agents using [trim_intermediate_steps](https://python.langchain.com/v0.2/api_reference/langchain/agents/langchain.agents.agent.AgentExecutor.html#langchain.agents.agent.AgentExecutor.trim_intermediate_steps), which is either an integer (indicating the agent should keep the last N steps) or a custom function.\n",
+    "With LangChain's [AgentExecutor](https://python.langchain.com/api_reference/langchain/agents/langchain.agents.agent.AgentExecutor.html#langchain.agents.agent.AgentExecutor), you could trim the intermediate steps of long-running agents using [trim_intermediate_steps](https://python.langchain.com/api_reference/langchain/agents/langchain.agents.agent.AgentExecutor.html#langchain.agents.agent.AgentExecutor.trim_intermediate_steps), which is either an integer (indicating the agent should keep the last N steps) or a custom function.\n",
    "\n",
    "For instance, we could trim the value so the agent only sees the most recent intermediate step."
   ]
@@ -1322,7 +1327,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.4"
+   "version": "3.11.9"
  }
 },
 "nbformat": 4,
--- a/docs/docs/how_to/multi_vector.ipynb
+++ b/docs/docs/how_to/multi_vector.ipynb
@@ -9,17 +9,17 @@
    "\n",
    "It can often be useful to store multiple vectors per document. There are multiple use cases where this is beneficial. For example, we can embed multiple chunks of a document and associate those embeddings with the parent document, allowing retriever hits on the chunks to return the larger document.\n",
    "\n",
-    "LangChain implements a base [MultiVectorRetriever](https://python.langchain.com/v0.2/api_reference/langchain/retrievers/langchain.retrievers.multi_vector.MultiVectorRetriever.html), which simplifies this process. Much of the complexity lies in how to create the multiple vectors per document. This notebook covers some of the common ways to create those vectors and use the `MultiVectorRetriever`.\n",
+    "LangChain implements a base [MultiVectorRetriever](https://python.langchain.com/api_reference/langchain/retrievers/langchain.retrievers.multi_vector.MultiVectorRetriever.html), which simplifies this process. Much of the complexity lies in how to create the multiple vectors per document. This notebook covers some of the common ways to create those vectors and use the `MultiVectorRetriever`.\n",
    "\n",
    "The methods to create multiple vectors per document include:\n",
    "\n",
-    "- Smaller chunks: split a document into smaller chunks, and embed those (this is [ParentDocumentRetriever](https://python.langchain.com/v0.2/api_reference/langchain/retrievers/langchain.retrievers.parent_document_retriever.ParentDocumentRetriever.html)).\n",
+    "- Smaller chunks: split a document into smaller chunks, and embed those (this is [ParentDocumentRetriever](https://python.langchain.com/api_reference/langchain/retrievers/langchain.retrievers.parent_document_retriever.ParentDocumentRetriever.html)).\n",
    "- Summary: create a summary for each document, embed that along with (or instead of) the document.\n",
    "- Hypothetical questions: create hypothetical questions that each document would be appropriate to answer, embed those along with (or instead of) the document.\n",
    "\n",
    "Note that this also enables another method of adding embeddings - manually. This is useful because you can explicitly add questions or queries that should lead to a document being recovered, giving you more control.\n",
    "\n",
-    "Below we walk through an example. First we instantiate some documents. We will index them in an (in-memory) [Chroma](/docs/integrations/providers/chroma/) vector store using [OpenAI](https://python.langchain.com/v0.2/docs/integrations/text_embedding/openai/) embeddings, but any LangChain vector store or embeddings model will suffice."
+    "Below we walk through an example. First we instantiate some documents. We will index them in an (in-memory) [Chroma](/docs/integrations/providers/chroma/) vector store using [OpenAI](https://python.langchain.com/docs/integrations/text_embedding/openai/) embeddings, but any LangChain vector store or embeddings model will suffice."
   ]
  },
  {
@@ -68,7 +68,7 @@
   "source": [
    "## Smaller chunks\n",
    "\n",
-    "Often times it can be useful to retrieve larger chunks of information, but embed smaller chunks. This allows for embeddings to capture the semantic meaning as closely as possible, but for as much context as possible to be passed downstream. Note that this is what the [ParentDocumentRetriever](https://python.langchain.com/v0.2/api_reference/langchain/retrievers/langchain.retrievers.parent_document_retriever.ParentDocumentRetriever.html) does. Here we show what is going on under the hood.\n",
+    "Often times it can be useful to retrieve larger chunks of information, but embed smaller chunks. This allows for embeddings to capture the semantic meaning as closely as possible, but for as much context as possible to be passed downstream. Note that this is what the [ParentDocumentRetriever](https://python.langchain.com/api_reference/langchain/retrievers/langchain.retrievers.parent_document_retriever.ParentDocumentRetriever.html) does. Here we show what is going on under the hood.\n",
    "\n",
    "We will make a distinction between the vector store, which indexes embeddings of the (sub) documents, and the document store, which houses the \"parent\" documents and associates them with an identifier."
   ]
@@ -103,7 +103,7 @@
   "id": "d4feded4-856a-4282-91c3-53aabc62e6ff",
   "metadata": {},
   "source": [
-    "We next generate the \"sub\" documents by splitting the original documents. Note that we store the document identifier in the `metadata` of the corresponding [Document](https://python.langchain.com/v0.2/api_reference/core/documents/langchain_core.documents.base.Document.html) object."
+    "We next generate the \"sub\" documents by splitting the original documents. Note that we store the document identifier in the `metadata` of the corresponding [Document](https://python.langchain.com/api_reference/core/documents/langchain_core.documents.base.Document.html) object."
   ]
  },
  {
@@ -207,7 +207,7 @@
   "id": "cdef8339-f9fa-4b3b-955f-ad9dbdf2734f",
   "metadata": {},
   "source": [
-    "The default search type the retriever performs on the vector database is a similarity search. LangChain vector stores also support searching via [Max Marginal Relevance](https://python.langchain.com/v0.2/api_reference/core/vectorstores/langchain_core.vectorstores.VectorStore.html#langchain_core.vectorstores.VectorStore.max_marginal_relevance_search). This can be controlled via the `search_type` parameter of the retriever:"
+    "The default search type the retriever performs on the vector database is a similarity search. LangChain vector stores also support searching via [Max Marginal Relevance](https://python.langchain.com/api_reference/core/vectorstores/langchain_core.vectorstores.VectorStore.html#langchain_core.vectorstores.VectorStore.max_marginal_relevance_search). This can be controlled via the `search_type` parameter of the retriever:"
   ]
  },
  {
@@ -244,7 +244,7 @@
    "\n",
    "A summary may be able to distill more accurately what a chunk is about, leading to better retrieval. Here we show how to create summaries, and then embed those.\n",
    "\n",
-    "We construct a simple [chain](/docs/how_to/sequence) that will receive an input [Document](https://python.langchain.com/v0.2/api_reference/core/documents/langchain_core.documents.base.Document.html) object and generate a summary using a LLM.\n",
+    "We construct a simple [chain](/docs/how_to/sequence) that will receive an input [Document](https://python.langchain.com/api_reference/core/documents/langchain_core.documents.base.Document.html) object and generate a summary using a LLM.\n",
    "\n",
    "```{=mdx}\n",
    "import ChatModelTabs from \"@theme/ChatModelTabs\";\n",
@@ -294,7 +294,7 @@
   "id": "3faa9fde-1b09-4849-a815-8b2e89c30a02",
   "metadata": {},
   "source": [
-    "Note that we can [batch](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable) the chain accross documents:"
+    "Note that we can [batch](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable) the chain accross documents:"
   ]
  },
  {
@@ -440,7 +440,7 @@
   "source": [
    "from typing import List\n",
    "\n",
-    "from langchain_core.pydantic_v1 import BaseModel, Field\n",
+    "from pydantic import BaseModel, Field\n",
    "\n",
    "\n",
    "class HypotheticalQuestions(BaseModel):\n",
--- a/docs/docs/how_to/output_parser_fixing.ipynb
+++ b/docs/docs/how_to/output_parser_fixing.ipynb
@@ -24,8 +24,8 @@
    "from typing import List\n",
    "\n",
    "from langchain_core.output_parsers import PydanticOutputParser\n",
-    "from langchain_core.pydantic_v1 import BaseModel, Field\n",
-    "from langchain_openai import ChatOpenAI"
+    "from langchain_openai import ChatOpenAI\n",
+    "from pydantic import BaseModel, Field"
   ]
  },
  {
@@ -131,7 +131,7 @@
   "id": "84498e02",
   "metadata": {},
   "source": [
-    "Find out api documentation for [OutputFixingParser](https://python.langchain.com/v0.2/api_reference/langchain/output_parsers/langchain.output_parsers.fix.OutputFixingParser.html#langchain.output_parsers.fix.OutputFixingParser)."
+    "Find out api documentation for [OutputFixingParser](https://python.langchain.com/api_reference/langchain/output_parsers/langchain.output_parsers.fix.OutputFixingParser.html#langchain.output_parsers.fix.OutputFixingParser)."
   ]
  },
  {
--- a/docs/docs/how_to/output_parser_json.ipynb
+++ b/docs/docs/how_to/output_parser_json.ipynb
@@ -30,7 +30,7 @@
   "id": "ae909b7a",
   "metadata": {},
   "source": [
-    "The [`JsonOutputParser`](https://python.langchain.com/v0.2/api_reference/core/output_parsers/langchain_core.output_parsers.json.JsonOutputParser.html) is one built-in option for prompting for and then parsing JSON output. While it is similar in functionality to the [`PydanticOutputParser`](https://python.langchain.com/v0.2/api_reference/core/output_parsers/langchain_core.output_parsers.pydantic.PydanticOutputParser.html), it also supports streaming back partial JSON objects.\n",
+    "The [`JsonOutputParser`](https://python.langchain.com/api_reference/core/output_parsers/langchain_core.output_parsers.json.JsonOutputParser.html) is one built-in option for prompting for and then parsing JSON output. While it is similar in functionality to the [`PydanticOutputParser`](https://python.langchain.com/api_reference/core/output_parsers/langchain_core.output_parsers.pydantic.PydanticOutputParser.html), it also supports streaming back partial JSON objects.\n",
    "\n",
    "Here's an example of how it can be used alongside [Pydantic](https://docs.pydantic.dev/) to conveniently declare the expected schema:"
   ]
@@ -47,7 +47,8 @@
    "import os\n",
    "from getpass import getpass\n",
    "\n",
-    "os.environ[\"OPENAI_API_KEY\"] = getpass()"
+    "if \"OPENAI_API_KEY\" not in os.environ:\n",
+    "    os.environ[\"OPENAI_API_KEY\"] = getpass()"
   ]
  },
  {
@@ -71,8 +72,8 @@
   "source": [
    "from langchain_core.output_parsers import JsonOutputParser\n",
    "from langchain_core.prompts import PromptTemplate\n",
-    "from langchain_core.pydantic_v1 import BaseModel, Field\n",
    "from langchain_openai import ChatOpenAI\n",
+    "from pydantic import BaseModel, Field\n",
    "\n",
    "model = ChatOpenAI(temperature=0)\n",
    "\n",
--- a/docs/docs/how_to/output_parser_retry.ipynb
+++ b/docs/docs/how_to/output_parser_retry.ipynb
@@ -20,8 +20,8 @@
    "from langchain.output_parsers import OutputFixingParser\n",
    "from langchain_core.output_parsers import PydanticOutputParser\n",
    "from langchain_core.prompts import PromptTemplate\n",
-    "from langchain_core.pydantic_v1 import BaseModel, Field\n",
-    "from langchain_openai import ChatOpenAI, OpenAI"
+    "from langchain_openai import ChatOpenAI, OpenAI\n",
+    "from pydantic import BaseModel, Field"
   ]
  },
  {
@@ -244,7 +244,7 @@
   "id": "e3a2513a",
   "metadata": {},
   "source": [
-    "Find out api documentation for [RetryOutputParser](https://python.langchain.com/v0.2/api_reference/langchain/output_parsers/langchain.output_parsers.retry.RetryOutputParser.html#langchain.output_parsers.retry.RetryOutputParser)."
+    "Find out api documentation for [RetryOutputParser](https://python.langchain.com/api_reference/langchain/output_parsers/langchain.output_parsers.retry.RetryOutputParser.html#langchain.output_parsers.retry.RetryOutputParser)."
   ]
  },
  {
--- a/docs/docs/how_to/output_parser_structured.ipynb
+++ b/docs/docs/how_to/output_parser_structured.ipynb
@@ -35,17 +35,17 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 1,
   "id": "1594b2bf-2a6f-47bb-9a81-38930f8e606b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "Joke(setup='Why did the chicken cross the road?', punchline='To get to the other side!')"
+       "Joke(setup='Why did the tomato turn red?', punchline='Because it saw the salad dressing!')"
      ]
     },
-     "execution_count": 6,
+     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -53,8 +53,8 @@
   "source": [
    "from langchain_core.output_parsers import PydanticOutputParser\n",
    "from langchain_core.prompts import PromptTemplate\n",
-    "from langchain_core.pydantic_v1 import BaseModel, Field, validator\n",
    "from langchain_openai import OpenAI\n",
+    "from pydantic import BaseModel, Field, model_validator\n",
    "\n",
    "model = OpenAI(model_name=\"gpt-3.5-turbo-instruct\", temperature=0.0)\n",
    "\n",
@@ -65,11 +65,13 @@
    "    punchline: str = Field(description=\"answer to resolve the joke\")\n",
    "\n",
    "    # You can add custom validation logic easily with Pydantic.\n",
-    "    @validator(\"setup\")\n",
-    "    def question_ends_with_question_mark(cls, field):\n",
-    "        if field[-1] != \"?\":\n",
+    "    @model_validator(mode=\"before\")\n",
+    "    @classmethod\n",
+    "    def question_ends_with_question_mark(cls, values: dict) -> dict:\n",
+    "        setup = values[\"setup\"]\n",
+    "        if setup[-1] != \"?\":\n",
    "            raise ValueError(\"Badly formed question!\")\n",
-    "        return field\n",
+    "        return values\n",
    "\n",
    "\n",
    "# Set up a parser + inject instructions into the prompt template.\n",
@@ -239,9 +241,9 @@
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "poetry-venv-311",
   "language": "python",
-   "name": "python3"
+   "name": "poetry-venv-311"
  },
  "language_info": {
   "codemirror_mode": {
@@ -253,7 +255,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.1"
+   "version": "3.11.9"
  }
 },
 "nbformat": 4,
--- a/docs/docs/how_to/output_parser_xml.ipynb
+++ b/docs/docs/how_to/output_parser_xml.ipynb
@@ -20,7 +20,7 @@
    "\n",
    "LLMs from different providers often have different strengths depending on the specific data they are trianed on. This also means that some may be \"better\" and more reliable at generating output in formats other than JSON.\n",
    "\n",
-    "This guide shows you how to use the [`XMLOutputParser`](https://python.langchain.com/v0.2/api_reference/core/output_parsers/langchain_core.output_parsers.xml.XMLOutputParser.html) to prompt models for XML output, then and parse that output into a usable format.\n",
+    "This guide shows you how to use the [`XMLOutputParser`](https://python.langchain.com/api_reference/core/output_parsers/langchain_core.output_parsers.xml.XMLOutputParser.html) to prompt models for XML output, then and parse that output into a usable format.\n",
    "\n",
    ":::{.callout-note}\n",
    "Keep in mind that large language models are leaky abstractions! You'll have to use an LLM with sufficient capacity to generate well-formed XML.\n",
@@ -41,7 +41,8 @@
    "import os\n",
    "from getpass import getpass\n",
    "\n",
-    "os.environ[\"ANTHROPIC_API_KEY\"] = getpass()"
+    "if \"ANTHROPIC_API_KEY\" not in os.environ:\n",
+    "    os.environ[\"ANTHROPIC_API_KEY\"] = getpass()"
   ]
  },
  {
--- a/docs/docs/how_to/output_parser_yaml.ipynb
+++ b/docs/docs/how_to/output_parser_yaml.ipynb
@@ -39,7 +39,8 @@
    "import os\n",
    "from getpass import getpass\n",
    "\n",
-    "os.environ[\"OPENAI_API_KEY\"] = getpass()"
+    "if \"OPENAI_API_KEY\" not in os.environ:\n",
+    "    os.environ[\"OPENAI_API_KEY\"] = getpass()"
   ]
  },
  {
@@ -47,7 +48,7 @@
   "id": "cc479f3a",
   "metadata": {},
   "source": [
-    "We use [Pydantic](https://docs.pydantic.dev) with the [`YamlOutputParser`](https://python.langchain.com/v0.2/api_reference/langchain/output_parsers/langchain.output_parsers.yaml.YamlOutputParser.html#langchain.output_parsers.yaml.YamlOutputParser) to declare our data model and give the model more context as to what type of YAML it should generate:"
+    "We use [Pydantic](https://docs.pydantic.dev) with the [`YamlOutputParser`](https://python.langchain.com/api_reference/langchain/output_parsers/langchain.output_parsers.yaml.YamlOutputParser.html#langchain.output_parsers.yaml.YamlOutputParser) to declare our data model and give the model more context as to what type of YAML it should generate:"
   ]
  },
  {
@@ -70,8 +71,8 @@
   "source": [
    "from langchain.output_parsers import YamlOutputParser\n",
    "from langchain_core.prompts import PromptTemplate\n",
-    "from langchain_core.pydantic_v1 import BaseModel, Field\n",
    "from langchain_openai import ChatOpenAI\n",
+    "from pydantic import BaseModel, Field\n",
    "\n",
    "\n",
    "# Define your desired data structure.\n",
--- a/Show More
+++ b/Show More