groq, fireworks, text-splitters (#26104 )

Merge branch 'v0.3rc' into v0.3/dev_releases
text-splitters[major]: update core dep + drop support for python 3.8 (#26102 )
2026-02-04 08:10:25 +00:00 · 2024-09-05 13:51:41 -04:00 · 2024-09-05 13:42:00 -04:00 · 2024-09-05 13:41:28 -04:00 · 2024-09-05 13:26:40 -04:00 · 2024-09-05 12:27:01 -04:00
1137 changed files with 54524 additions and 32679 deletions
--- a/.github/scripts/check_diff.py
+++ b/.github/scripts/check_diff.py
@@ -16,6 +16,19 @@ LANGCHAIN_DIRS = [
    "libs/experimental",
 ]

+# for 0.3rc, we are ignoring core dependents
+# in order to be able to get CI to pass for individual PRs.
+IGNORE_CORE_DEPENDENTS = True
+
+# ignored partners are removed from dependents
+# but still run if directly edited
+IGNORED_PARTNERS = [
+    # remove huggingface from dependents because of CI instability
+    # specifically in huggingface jobs
+    # https://github.com/langchain-ai/langchain/issues/25558
+    "huggingface",
+]
+

 def all_package_dirs() -> Set[str]:
    return {
@@ -68,6 +81,11 @@ def dependents_graph() -> dict:

                    if "langchain" in dep:
                        dependents[dep].add(pkg_dir)
+
+    for k in dependents:
+        for partner in IGNORED_PARTNERS:
+            if f"libs/partners/{partner}" in dependents[k]:
+                dependents[k].remove(f"libs/partners/{partner}")
    return dependents


@@ -90,7 +108,7 @@ def _get_configs_for_single_dir(job: str, dir_: str) -> List[Dict[str, str]]:
            {"working-directory": dir_, "python-version": f"3.{v}"}
            for v in range(8, 13)
        ]
-    min_python = "3.8"
+    min_python = "3.9"
    max_python = "3.12"

    # custom logic for specific directories
@@ -170,6 +188,9 @@ if __name__ == "__main__":
            # for extended testing
            found = False
            for dir_ in LANGCHAIN_DIRS:
+                if dir_ == "libs/core" and IGNORE_CORE_DEPENDENTS:
+                    dirs_to_run["extended-test"].add(dir_)
+                    continue
                if file.startswith(dir_):
                    found = True
                if found:
@@ -181,7 +202,6 @@ if __name__ == "__main__":
            dirs_to_run["test"].add("libs/partners/mistralai")
            dirs_to_run["test"].add("libs/partners/openai")
            dirs_to_run["test"].add("libs/partners/anthropic")
-            dirs_to_run["test"].add("libs/partners/ai21")
            dirs_to_run["test"].add("libs/partners/fireworks")
            dirs_to_run["test"].add("libs/partners/groq")

--- a/.github/scripts/check_prerelease_dependencies.py
+++ b/.github/scripts/check_prerelease_dependencies.py
@@ -11,7 +11,7 @@ if __name__ == "__main__":

    # see if we're releasing an rc
    version = toml_data["tool"]["poetry"]["version"]
-    releasing_rc = "rc" in version
+    releasing_rc = "rc" in version or "dev" in version

    # if not, iterate through dependencies and make sure none allow prereleases
    if not releasing_rc:
--- a/.github/workflows/_dependencies.yml
+++ b/.github/workflows/_dependencies.yml
@@ -1,114 +0,0 @@
-name: dependencies
-
-on:
-  workflow_call:
-    inputs:
-      working-directory:
-        required: true
-        type: string
-        description: "From which folder this pipeline executes"
-      langchain-location:
-        required: false
-        type: string
-        description: "Relative path to the langchain library folder"
-      python-version:
-        required: true
-        type: string
-        description: "Python version to use"
-
-env:
-  POETRY_VERSION: "1.7.1"
-
-jobs:
-  build:
-    defaults:
-      run:
-        working-directory: ${{ inputs.working-directory }}
-    runs-on: ubuntu-latest
-    name: dependency checks ${{ inputs.python-version }}
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Set up Python ${{ inputs.python-version }} + Poetry ${{ env.POETRY_VERSION }}
-        uses: "./.github/actions/poetry_setup"
-        with:
-          python-version: ${{ inputs.python-version }}
-          poetry-version: ${{ env.POETRY_VERSION }}
-          working-directory: ${{ inputs.working-directory }}
-          cache-key: pydantic-cross-compat
-
-      - name: Install dependencies
-        shell: bash
-        run: poetry install
-
-      - name: Check imports with base dependencies
-        shell: bash
-        run: poetry run make check_imports
-
-      - name: Install test dependencies
-        shell: bash
-        run: poetry install --with test
-
-      - name: Install langchain editable
-        working-directory: ${{ inputs.working-directory }}
-        if: ${{ inputs.langchain-location }}
-        env:
-          LANGCHAIN_LOCATION: ${{ inputs.langchain-location }}
-        run: |
-          poetry run pip install -e "$LANGCHAIN_LOCATION"
-
-      - name: Install the opposite major version of pydantic
-        # If normal tests use pydantic v1, here we'll use v2, and vice versa.
-        shell: bash
-        # airbyte currently doesn't support pydantic v2
-        if: ${{ !startsWith(inputs.working-directory, 'libs/partners/airbyte') }}
-        run: |
-          # Determine the major part of pydantic version
-          REGULAR_VERSION=$(poetry run python -c "import pydantic; print(pydantic.__version__)" | cut -d. -f1)
-
-          if [[ "$REGULAR_VERSION" == "1" ]]; then
-            PYDANTIC_DEP=">=2.1,<3"
-            TEST_WITH_VERSION="2"
-          elif [[ "$REGULAR_VERSION" == "2" ]]; then
-            PYDANTIC_DEP="<2"
-            TEST_WITH_VERSION="1"
-          else
-            echo "Unexpected pydantic major version '$REGULAR_VERSION', cannot determine which version to use for cross-compatibility test."
-            exit 1
-          fi
-
-          # Install via `pip` instead of `poetry add` to avoid changing lockfile,
-          # which would prevent caching from working: the cache would get saved
-          # to a different key than where it gets loaded from.
-          poetry run pip install "pydantic${PYDANTIC_DEP}"
-
-          # Ensure that the correct pydantic is installed now.
-          echo "Checking pydantic version... Expecting ${TEST_WITH_VERSION}"
-
-          # Determine the major part of pydantic version
-          CURRENT_VERSION=$(poetry run python -c "import pydantic; print(pydantic.__version__)" | cut -d. -f1)
-
-          # Check that the major part of pydantic version is as expected, if not
-          # raise an error
-          if [[ "$CURRENT_VERSION" != "$TEST_WITH_VERSION" ]]; then
-            echo "Error: expected pydantic version ${CURRENT_VERSION} to have been installed, but found: ${TEST_WITH_VERSION}"
-            exit 1
-          fi
-          echo "Found pydantic version ${CURRENT_VERSION}, as expected"
-      - name: Run pydantic compatibility tests
-        # airbyte currently doesn't support pydantic v2
-        if: ${{ !startsWith(inputs.working-directory, 'libs/partners/airbyte') }}
-        shell: bash
-        run: make test
-
-      - name: Ensure the tests did not create any additional files
-        shell: bash
-        run: |
-          set -eu
-
-          STATUS="$(git status)"
-          echo "$STATUS"
-
-          # grep will exit non-zero if the target message isn't found,
-          # and `set -e` above will cause the step to fail.
-          echo "$STATUS" | grep 'nothing to commit, working tree clean'
--- a/.github/workflows/_test.yml
+++ b/.github/workflows/_test.yml
@@ -75,12 +75,11 @@ jobs:
          echo "min-versions=$min_versions" >> "$GITHUB_OUTPUT"
          echo "min-versions=$min_versions"

-# Temporarily disabled until we can get the minimum versions working
-#      - name: Run unit tests with minimum dependency versions
-#        if: ${{ steps.min-version.outputs.min-versions != '' }}
-#        env:
-#          MIN_VERSIONS: ${{ steps.min-version.outputs.min-versions }}
-#        run: |
-#          poetry run pip install --force-reinstall $MIN_VERSIONS --editable .
-#          make tests
-#        working-directory: ${{ inputs.working-directory }}
+      - name: Run unit tests with minimum dependency versions
+        if: ${{ steps.min-version.outputs.min-versions != '' }}
+        env:
+          MIN_VERSIONS: ${{ steps.min-version.outputs.min-versions }}
+        run: |
+          poetry run pip install --force-reinstall $MIN_VERSIONS --editable .
+          make tests
+        working-directory: ${{ inputs.working-directory }}
--- a/.github/workflows/check_diffs.yml
+++ b/.github/workflows/check_diffs.yml
@@ -89,19 +89,6 @@ jobs:
      python-version: ${{ matrix.job-configs.python-version }}
    secrets: inherit

-  dependencies:
-    name: cd ${{ matrix.job-configs.working-directory }}
-    needs: [ build ]
-    if: ${{ needs.build.outputs.dependencies != '[]' }}
-    strategy:
-      matrix:
-        job-configs: ${{ fromJson(needs.build.outputs.dependencies) }}
-    uses: ./.github/workflows/_dependencies.yml
-    with:
-      working-directory: ${{ matrix.job-configs.working-directory }}
-      python-version: ${{ matrix.job-configs.python-version }}
-    secrets: inherit
-
  extended-tests:
    name: "cd ${{ matrix.job-configs.working-directory }} / make extended_tests #${{ matrix.job-configs.python-version }}"
    needs: [ build ]
@@ -149,7 +136,7 @@ jobs:
          echo "$STATUS" | grep 'nothing to commit, working tree clean'
  ci_success:
    name: "CI Success"
-    needs: [build, lint, test, compile-integration-tests, dependencies, extended-tests, test-doc-imports]
+    needs: [build, lint, test, compile-integration-tests, extended-tests, test-doc-imports]
    if: |
      always()
    runs-on: ubuntu-latest
--- a/.github/workflows/scheduled_test.yml
+++ b/.github/workflows/scheduled_test.yml
@@ -17,16 +17,14 @@ jobs:
      fail-fast: false
      matrix:
        python-version:
-          - "3.8"
+          - "3.9"
          - "3.11"
        working-directory:
          - "libs/partners/openai"
          - "libs/partners/anthropic"
-          - "libs/partners/ai21"
          - "libs/partners/fireworks"
          - "libs/partners/groq"
          - "libs/partners/mistralai"
-          - "libs/partners/together"
          - "libs/partners/google-vertexai"
          - "libs/partners/google-genai"
          - "libs/partners/aws"
@@ -90,11 +88,9 @@ jobs:
          AZURE_OPENAI_CHAT_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_CHAT_DEPLOYMENT_NAME }}
          AZURE_OPENAI_LLM_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_LLM_DEPLOYMENT_NAME }}
          AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME: ${{ secrets.AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME }}
-          AI21_API_KEY: ${{ secrets.AI21_API_KEY }}
          FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
          GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
          MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
-          TOGETHER_API_KEY: ${{ secrets.TOGETHER_API_KEY }}
          COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
          NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
          GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
--- a/.gitignore
+++ b/.gitignore
@@ -167,6 +167,7 @@ docs/.docusaurus/
 docs/.cache-loader/
 docs/_dist
 docs/api_reference/*api_reference.rst
+docs/api_reference/*.md
 docs/api_reference/_build
 docs/api_reference/*/
 !docs/api_reference/_static/
--- a/README.md
+++ b/README.md
@@ -14,18 +14,20 @@

 Looking for the JS/TS library? Check out [LangChain.js](https://github.com/langchain-ai/langchainjs).

-To help you ship LangChain apps to production faster, check out [LangSmith](https://smith.langchain.com). 
-[LangSmith](https://smith.langchain.com) is a unified developer platform for building, testing, and monitoring LLM applications. 
+To help you ship LangChain apps to production faster, check out [LangSmith](https://smith.langchain.com).
+[LangSmith](https://smith.langchain.com) is a unified developer platform for building, testing, and monitoring LLM applications.
 Fill out [this form](https://www.langchain.com/contact-sales) to speak with our sales team.

 ## Quick Install

 With pip:
+
 ```bash
 pip install langchain
 ```

 With conda:
+
 ```bash
 conda install langchain -c conda-forge
 ```
@@ -36,12 +38,13 @@ conda install langchain -c conda-forge

 For these applications, LangChain simplifies the entire application lifecycle:

- **Open-source libraries**:  Build your applications using LangChain's open-source [building blocks](https://python.langchain.com/v0.2/docs/concepts#langchain-expression-language-lcel), [components](https://python.langchain.com/v0.2/docs/concepts), and [third-party integrations](https://python.langchain.com/v0.2/docs/integrations/platforms/).
-Use [LangGraph](/docs/concepts/#langgraph) to build stateful agents with first-class streaming and human-in-the-loop support.
+- **Open-source libraries**: Build your applications using LangChain's open-source [building blocks](https://python.langchain.com/v0.2/docs/concepts#langchain-expression-language-lcel), [components](https://python.langchain.com/v0.2/docs/concepts), and [third-party integrations](https://python.langchain.com/v0.2/docs/integrations/platforms/).
+  Use [LangGraph](/docs/concepts/#langgraph) to build stateful agents with first-class streaming and human-in-the-loop support.
 - **Productionization**: Inspect, monitor, and evaluate your apps with [LangSmith](https://docs.smith.langchain.com/) so that you can constantly optimize and deploy with confidence.
 - **Deployment**: Turn your LangGraph applications into production-ready APIs and Assistants with [LangGraph Cloud](https://langchain-ai.github.io/langgraph/cloud/).

 ### Open-source libraries
+
 - **`langchain-core`**: Base abstractions and LangChain Expression Language.
 - **`langchain-community`**: Third party integrations.
  - Some integrations have been further split into **partner packages** that only rely on **`langchain-core`**. Examples include **`langchain_openai`** and **`langchain_anthropic`**.
@@ -49,9 +52,11 @@ Use [LangGraph](/docs/concepts/#langgraph) to build stateful agents with first-c
 - **[`LangGraph`](https://langchain-ai.github.io/langgraph/)**: A library for building robust and stateful multi-actor applications with LLMs by modeling steps as edges and nodes in a graph. Integrates smoothly with LangChain, but can be used without it.

 ### Productionization:
+
 - **[LangSmith](https://docs.smith.langchain.com/)**: A developer platform that lets you debug, test, evaluate, and monitor chains built on any LLM framework and seamlessly integrates with LangChain.

 ### Deployment:
+
 - **[LangGraph Cloud](https://langchain-ai.github.io/langgraph/cloud/)**: Turn your LangGraph applications into production-ready APIs and Assistants.

 ![Diagram outlining the hierarchical organization of the LangChain framework, displaying the interconnected parts across multiple layers.](docs/static/svg/langchain_stack_062024.svg "LangChain Architecture Overview")
@@ -76,15 +81,17 @@ Use [LangGraph](/docs/concepts/#langgraph) to build stateful agents with first-c
 And much more! Head to the [Tutorials](https://python.langchain.com/v0.2/docs/tutorials/) section of the docs for more.

 ## 🚀 How does LangChain help?
+
 The main value props of the LangChain libraries are:
+
 1. **Components**: composable building blocks, tools and integrations for working with language models. Components are modular and easy-to-use, whether you are using the rest of the LangChain framework or not
 2. **Off-the-shelf chains**: built-in assemblages of components for accomplishing higher-level tasks

-Off-the-shelf chains make it easy to get started. Components make it easy to customize existing chains and build new ones. 
+Off-the-shelf chains make it easy to get started. Components make it easy to customize existing chains and build new ones.

 ## LangChain Expression Language (LCEL)

-LCEL is the foundation of many of LangChain's components, and is a declarative way to compose chains. LCEL was designed from day 1 to support putting prototypes in production, with no code changes, from the simplest “prompt + LLM” chain to the most complex chains.
+LCEL is a key part of LangChain, allowing you to build and organize chains of processes in a straightforward, declarative manner. It was designed to support taking prototypes directly into production without needing to alter any code. This means you can use LCEL to set up everything from basic "prompt + LLM" setups to intricate, multi-step workflows.

 - **[Overview](https://python.langchain.com/v0.2/docs/concepts/#langchain-expression-language-lcel)**: LCEL and its benefits
 - **[Interface](https://python.langchain.com/v0.2/docs/concepts/#runnable-interface)**: The standard Runnable interface for LCEL objects
@@ -123,7 +130,6 @@ Please see [here](https://python.langchain.com) for full documentation, which in
 - [🦜🕸️ LangGraph](https://langchain-ai.github.io/langgraph/): Create stateful, multi-actor applications with LLMs. Integrates smoothly with LangChain, but can be used without it.
 - [🦜🏓 LangServe](https://python.langchain.com/docs/langserve): Deploy LangChain runnables and chains as REST APIs.

-
 ## 💁 Contributing

 As an open-source project in a rapidly developing field, we are extremely open to contributions, whether it be in the form of a new feature, improved infrastructure, or better documentation.
--- a/cookbook/Multi_modal_RAG_google.ipynb
+++ b/cookbook/Multi_modal_RAG_google.ipynb
@@ -445,7 +445,7 @@
    "\n",
    "\n",
    "def plt_img_base64(img_base64):\n",
-    "    \"\"\"Disply base64 encoded string as image\"\"\"\n",
+    "    \"\"\"Display base64 encoded string as image\"\"\"\n",
    "    # Create an HTML img tag with the base64 string as the source\n",
    "    image_html = f'<img src=\"data:image/jpeg;base64,{img_base64}\" />'\n",
    "    # Display the image by rendering the HTML\n",
--- a/cookbook/README.md
+++ b/cookbook/README.md
@@ -4,6 +4,8 @@ Example code for building applications with LangChain, with an emphasis on more

 Notebook | Description
 :- | :-
+[agent_fireworks_ai_langchain_mongodb.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/agent_fireworks_ai_langchain_mongodb.ipynb) | Build an AI Agent With Memory Using MongoDB, LangChain and FireWorksAI.
+[mongodb-langchain-cache-memory.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/mongodb-langchain-cache-memory.ipynb) | Build a RAG Application with Semantic Cache Using MongoDB and LangChain.
 [LLaMA2_sql_chat.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/LLaMA2_sql_chat.ipynb) | Build a chat application that interacts with a SQL database using an open source llm (llama2), specifically demonstrated on an SQLite database containing rosters.
 [Semi_Structured_RAG.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/Semi_Structured_RAG.ipynb) | Perform retrieval-augmented generation (rag) on documents with semi-structured data, including text and tables, using unstructured for parsing, multi-vector retriever for storing, and lcel for implementing chains.
 [Semi_structured_and_multi_moda...](https://github.com/langchain-ai/langchain/tree/master/cookbook/Semi_structured_and_multi_modal_RAG.ipynb) | Perform retrieval-augmented generation (rag) on documents with semi-structured data and images, using unstructured for parsing, multi-vector retriever for storage and retrieval, and lcel for implementing chains.
--- a/cookbook/agent_fireworks_ai_langchain_mongodb.ipynb
+++ b/cookbook/agent_fireworks_ai_langchain_mongodb.ipynb
--- a/cookbook/azure_container_apps_dynamic_sessions_data_analyst.ipynb
+++ b/cookbook/azure_container_apps_dynamic_sessions_data_analyst.ipynb
@@ -135,7 +135,7 @@
   "source": [
    "## Instantiate model, DB, code interpreter\n",
    "\n",
-    "We'll use the LangChain [SQLDatabase](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.sql_database.SQLDatabase.html#langchain_community.utilities.sql_database.SQLDatabase) interface to connect to our DB and query it. This works with any SQL database supported by [SQLAlchemy](https://www.sqlalchemy.org/)."
+    "We'll use the LangChain [SQLDatabase](https://python.langchain.com/v0.2/api_reference/community/utilities/langchain_community.utilities.sql_database.SQLDatabase.html#langchain_community.utilities.sql_database.SQLDatabase) interface to connect to our DB and query it. This works with any SQL database supported by [SQLAlchemy](https://www.sqlalchemy.org/)."
   ]
  },
  {
--- a/cookbook/cql_agent.ipynb
+++ b/cookbook/cql_agent.ipynb
@@ -38,7 +38,7 @@
   "source": [
    "Connection is via `cassio` using `auto=True` parameter, and the notebook uses OpenAI. You should create a `.env` file accordingly.\n",
    "\n",
-    "For Casssandra, set:\n",
+    "For Cassandra, set:\n",
    "```bash\n",
    "CASSANDRA_CONTACT_POINTS\n",
    "CASSANDRA_USERNAME\n",
--- a/cookbook/langgraph_self_rag.ipynb
+++ b/cookbook/langgraph_self_rag.ipynb
@@ -336,7 +336,7 @@
    "    # Create a prompt template with format instructions and the query\n",
    "    prompt = PromptTemplate(\n",
    "        template=\"\"\"You are generating questions that is well optimized for retrieval. \\n \n",
-    "        Look at the input and try to reason about the underlying sematic intent / meaning. \\n \n",
+    "        Look at the input and try to reason about the underlying semantic intent / meaning. \\n \n",
    "        Here is the initial question:\n",
    "        \\n ------- \\n\n",
    "        {question} \n",
@@ -643,7 +643,7 @@
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "Python 3.11.1 64-bit",
   "language": "python",
   "name": "python3"
  },
@@ -657,7 +657,12 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.16"
+   "version": "3.11.1"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "1a1af0ee75eeea9e2e1ee996c87e7a2b11a0bebd85af04bb136d915cefc0abce"
+   }
  }
 },
 "nbformat": 4,
--- a/cookbook/rag-locally-on-intel-cpu.ipynb
+++ b/cookbook/rag-locally-on-intel-cpu.ipynb
@@ -647,7 +647,7 @@
   "metadata": {},
   "source": [
    "**Now we see the results are correct as it is mentioned in earnings release.** <br>\n",
-    "**To further automate, we will create a chain that will take input as question and retriever so that we don't need to retrieve documents seperately**"
+    "**To further automate, we will create a chain that will take input as question and retriever so that we don't need to retrieve documents separately**"
   ]
  },
  {
@@ -734,9 +734,9 @@
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "rag-on-intel",
+   "display_name": "Python 3.11.1 64-bit",
   "language": "python",
-   "name": "rag-on-intel"
+   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
@@ -748,7 +748,12 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.9"
+   "version": "3.11.1"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "1a1af0ee75eeea9e2e1ee996c87e7a2b11a0bebd85af04bb136d915cefc0abce"
+   }
  }
 },
 "nbformat": 4,
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -18,7 +18,7 @@ for dir; do \
    if find "$$dir" -maxdepth 1 -type f \( -name "pyproject.toml" -o -name "setup.py" \) | grep -q .; then \
        echo "$$dir"; \
    fi \
-done' sh {} + | grep -vE "airbyte|ibm|couchbase" | tr '\n' ' ')
+done' sh {} + | grep -vE "airbyte|ibm|couchbase|databricks" | tr '\n' ' ')

 PORT ?= 3001

@@ -39,7 +39,6 @@ install-py-deps:
 generate-files:
 	mkdir -p $(INTERMEDIATE_DIR)
 	cp -r $(SOURCE_DIR)/* $(INTERMEDIATE_DIR)
-	mkdir -p $(INTERMEDIATE_DIR)/templates

 	$(PYTHON) scripts/tool_feat_table.py $(INTERMEDIATE_DIR)

@@ -47,8 +46,6 @@ generate-files:

 	$(PYTHON) scripts/partner_pkg_table.py $(INTERMEDIATE_DIR)

-	$(PYTHON) scripts/copy_templates.py $(INTERMEDIATE_DIR)
-
 	wget -q https://raw.githubusercontent.com/langchain-ai/langserve/main/README.md -O $(INTERMEDIATE_DIR)/langserve.md
 	$(PYTHON) scripts/resolve_local_links.py $(INTERMEDIATE_DIR)/langserve.md https://github.com/langchain-ai/langserve/tree/main/

@@ -76,6 +73,8 @@ append-related:
 generate-references:
 	$(PYTHON) scripts/generate_api_reference_links.py --docs_dir $(OUTPUT_NEW_DOCS_DIR)

+update-md: generate-files md-sync
+
 build: install-py-deps generate-files copy-infra render md-sync append-related

 vercel-build: install-vercel-deps build generate-references
--- a/docs/api_reference/create_api_rst.py
+++ b/docs/api_reference/create_api_rst.py
@@ -281,7 +281,7 @@ def _construct_doc(
        module_doc = f"""\
 .. currentmodule:: {package_namespace}

-.. _{module}:
+.. _{package_namespace}_{module}:
 """
        _members = members_by_namespace[module]
        classes = [
@@ -317,8 +317,8 @@ def _construct_doc(
 """

        index_autosummary += f"""
-:ref:`{module}`
-{'^' * (len(module) + 5)}
+:ref:`{package_namespace}_{module}`
+{'^' * (len(package_namespace) + len(module) + 8)}
 """

        if classes:
@@ -394,7 +394,7 @@ def _construct_doc(
 """

            index_autosummary += """
-**Deprecated classes*
+**Deprecated classes**

 .. autosummary::
 """
@@ -438,7 +438,7 @@ def _construct_doc(
    {fstring}

 """
-            index_autosummary += """
+            index_autosummary += f"""
 **Deprecated functions**

 .. autosummary::
@@ -448,7 +448,6 @@ def _construct_doc(
 """
        docs.append((f"{module}.rst", module_doc))
    docs.append(("index.rst", index_doc + index_autosummary))
-
    return docs


@@ -472,6 +471,14 @@ def _build_rst_file(package_name: str = "langchain") -> None:


 def _package_namespace(package_name: str) -> str:
+    """Returns the package name used.
+
+    Args:
+        package_name: Can be either "langchain" or "core" or "experimental".
+
+    Returns:
+        modified package_name: Can be either "langchain" or "langchain_{package_name}"
+    """
    return (
        package_name
        if package_name == "langchain"
@@ -530,47 +537,7 @@ def _build_index(dirs: List[str]) -> None:
    ordered = ["core", "langchain", "text-splitters", "community", "experimental"]
    main_ = [dir_ for dir_ in ordered if dir_ in dirs]
    integrations = sorted(dir_ for dir_ in dirs if dir_ not in main_)
-    main_headers = [
-        " ".join(custom_names.get(x, x.title()) for x in dir_.split("-"))
-        for dir_ in main_
-    ]
-    integration_headers = [
-        " ".join(
-            custom_names.get(x, x.title().replace("ai", "AI").replace("db", "DB"))
-            for x in dir_.split("-")
-        )
-        for dir_ in integrations
-    ]
-    main_tree = "\n".join(
-        f"{header_name}<{dir_.replace('-', '_')}/index>"
-        for header_name, dir_ in zip(main_headers, main_)
-    )
-    main_grid = "\n".join(
-        f'- header: "**{header_name}**"\n  content: "{_package_namespace(dir_).replace("_", "-")}: {_get_package_version(_package_dir(dir_))}"\n  link: {dir_.replace("-", "_")}/index.html'
-        for header_name, dir_ in zip(main_headers, main_)
-    )
-    integration_tree = "\n".join(
-        f"{header_name}<{dir_.replace('-', '_')}/index>"
-        for header_name, dir_ in zip(integration_headers, integrations)
-    )
-
-    integration_grid = ""
-    integrations_to_show = [
-        "openai",
-        "anthropic",
-        "google-vertexai",
-        "aws",
-        "huggingface",
-        "mistralai",
-    ]
-    for header_name, dir_ in sorted(
-        zip(integration_headers, integrations),
-        key=lambda h_d: integrations_to_show.index(h_d[1])
-        if h_d[1] in integrations_to_show
-        else len(integrations_to_show),
-    )[: len(integrations_to_show)]:
-        integration_grid += f'\n- header: "**{header_name}**"\n  content: {_package_namespace(dir_).replace("_", "-")} {_get_package_version(_package_dir(dir_))}\n  link: {dir_.replace("-", "_")}/index.html'
-    doc = f"""# LangChain Python API Reference
+    doc = """# LangChain Python API Reference

 Welcome to the LangChain Python API reference. This is a reference for all 
 `langchain-x` packages. 
@@ -578,8 +545,22 @@ Welcome to the LangChain Python API reference. This is a reference for all
 For user guides see [https://python.langchain.com](https://python.langchain.com).

 For the legacy API reference hosted on ReadTheDocs see [https://api.python.langchain.com/](https://api.python.langchain.com/).
+"""

-## Base packages
+    if main_:
+        main_headers = [
+            " ".join(custom_names.get(x, x.title()) for x in dir_.split("-"))
+            for dir_ in main_
+        ]
+        main_tree = "\n".join(
+            f"{header_name}<{dir_.replace('-', '_')}/index>"
+            for header_name, dir_ in zip(main_headers, main_)
+        )
+        main_grid = "\n".join(
+            f'- header: "**{header_name}**"\n  content: "{_package_namespace(dir_).replace("_", "-")}: {_get_package_version(_package_dir(dir_))}"\n  link: {dir_.replace("-", "_")}/index.html'
+            for header_name, dir_ in zip(main_headers, main_)
+        )
+        doc += f"""## Base packages

 ```{{gallery-grid}}
 :grid-columns: "1 2 2 3"
@@ -594,8 +575,37 @@ For the legacy API reference hosted on ReadTheDocs see [https://api.python.langc

 {main_tree}
 ```
+"""
+    if integrations:
+        integration_headers = [
+            " ".join(
+                custom_names.get(x, x.title().replace("ai", "AI").replace("db", "DB"))
+                for x in dir_.split("-")
+            )
+            for dir_ in integrations
+        ]
+        integration_tree = "\n".join(
+            f"{header_name}<{dir_.replace('-', '_')}/index>"
+            for header_name, dir_ in zip(integration_headers, integrations)
+        )

-## Integrations
+        integration_grid = ""
+        integrations_to_show = [
+            "openai",
+            "anthropic",
+            "google-vertexai",
+            "aws",
+            "huggingface",
+            "mistralai",
+        ]
+        for header_name, dir_ in sorted(
+            zip(integration_headers, integrations),
+            key=lambda h_d: integrations_to_show.index(h_d[1])
+            if h_d[1] in integrations_to_show
+            else len(integrations_to_show),
+        )[: len(integrations_to_show)]:
+            integration_grid += f'\n- header: "**{header_name}**"\n  content: {_package_namespace(dir_).replace("_", "-")} {_get_package_version(_package_dir(dir_))}\n  link: {dir_.replace("-", "_")}/index.html'
+        doc += f"""## Integrations

 ```{{gallery-grid}}
 :grid-columns: "1 2 2 3"
@@ -612,7 +622,6 @@ See the full list of integrations in the Section Navigation.

 {integration_tree}
 ```
-
 """
    with open(HERE / "reference.md", "w") as f:
        f.write(doc)
--- a/docs/api_reference/guide_imports.json
+++ b/docs/api_reference/guide_imports.json
--- a/docs/data/people.yml
+++ b/docs/data/people.yml
--- a/docs/docs/additional_resources/arxiv_references.mdx
+++ b/docs/docs/additional_resources/arxiv_references.mdx
@@ -4,51 +4,90 @@ LangChain implements the latest research in the field of Natural Language Proces
 This page contains `arXiv` papers referenced in the LangChain Documentation, API Reference,
 Templates, and Cookbooks.

-From the opposite direction, scientists use LangChain in research and reference LangChain in the research papers. 
-Here you find [such papers](https://arxiv.org/search/?query=langchain&searchtype=all&source=header).
+From the opposite direction, scientists use `LangChain` in research and reference it in the research papers. 
+
+`arXiv` papers with references to:
+ [LangChain](https://arxiv.org/search/?query=langchain&searchtype=all&source=header) | [LangGraph](https://arxiv.org/search/?query=langgraph&searchtype=all&source=header) | [LangSmith](https://arxiv.org/search/?query=langsmith&searchtype=all&source=header)

 ## Summary

 | arXiv id / Title | Authors | Published date 🔻 | LangChain Documentation|
 |------------------|---------|-------------------|------------------------|
-| `2402.03620v1` [Self-Discover: Large Language Models Self-Compose Reasoning Structures](http://arxiv.org/abs/2402.03620v1) | Pei Zhou, Jay Pujara, Xiang Ren,  et al. | 2024-02-06 | `Cookbook:` [self-discover](https://github.com/langchain-ai/langchain/blob/master/cookbook/self-discover.ipynb)
-| `2401.18059v1` [RAPTOR: Recursive Abstractive Processing for Tree-Organized Retrieval](http://arxiv.org/abs/2401.18059v1) | Parth Sarthi, Salman Abdullah, Aditi Tuli,  et al. | 2024-01-31 | `Cookbook:` [RAPTOR](https://github.com/langchain-ai/langchain/blob/master/cookbook/RAPTOR.ipynb)
-| `2401.15884v2` [Corrective Retrieval Augmented Generation](http://arxiv.org/abs/2401.15884v2) | Shi-Qi Yan, Jia-Chen Gu, Yun Zhu,  et al. | 2024-01-29 | `Cookbook:` [langgraph_crag](https://github.com/langchain-ai/langchain/blob/master/cookbook/langgraph_crag.ipynb)
-| `2401.04088v1` [Mixtral of Experts](http://arxiv.org/abs/2401.04088v1) | Albert Q. Jiang, Alexandre Sablayrolles, Antoine Roux,  et al. | 2024-01-08 | `Cookbook:` [together_ai](https://github.com/langchain-ai/langchain/blob/master/cookbook/together_ai.ipynb)
-| `2312.06648v2` [Dense X Retrieval: What Retrieval Granularity Should We Use?](http://arxiv.org/abs/2312.06648v2) | Tong Chen, Hongwei Wang, Sihao Chen,  et al. | 2023-12-11 | `Template:` [propositional-retrieval](https://python.langchain.com/docs/templates/propositional-retrieval)
-| `2311.09210v1` [Chain-of-Note: Enhancing Robustness in Retrieval-Augmented Language Models](http://arxiv.org/abs/2311.09210v1) | Wenhao Yu, Hongming Zhang, Xiaoman Pan,  et al. | 2023-11-15 | `Template:` [chain-of-note-wiki](https://python.langchain.com/docs/templates/chain-of-note-wiki)
-| `2310.11511v1` [Self-RAG: Learning to Retrieve, Generate, and Critique through Self-Reflection](http://arxiv.org/abs/2310.11511v1) | Akari Asai, Zeqiu Wu, Yizhong Wang,  et al. | 2023-10-17 | `Cookbook:` [langgraph_self_rag](https://github.com/langchain-ai/langchain/blob/master/cookbook/langgraph_self_rag.ipynb)
-| `2310.06117v2` [Take a Step Back: Evoking Reasoning via Abstraction in Large Language Models](http://arxiv.org/abs/2310.06117v2) | Huaixiu Steven Zheng, Swaroop Mishra, Xinyun Chen,  et al. | 2023-10-09 | `Template:` [stepback-qa-prompting](https://python.langchain.com/docs/templates/stepback-qa-prompting), `Cookbook:` [stepback-qa](https://github.com/langchain-ai/langchain/blob/master/cookbook/stepback-qa.ipynb)
-| `2307.09288v2` [Llama 2: Open Foundation and Fine-Tuned Chat Models](http://arxiv.org/abs/2307.09288v2) | Hugo Touvron, Louis Martin, Kevin Stone,  et al. | 2023-07-18 | `Cookbook:` [Semi_Structured_RAG](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_Structured_RAG.ipynb)
-| `2305.14283v3` [Query Rewriting for Retrieval-Augmented Large Language Models](http://arxiv.org/abs/2305.14283v3) | Xinbei Ma, Yeyun Gong, Pengcheng He,  et al. | 2023-05-23 | `Template:` [rewrite-retrieve-read](https://python.langchain.com/docs/templates/rewrite-retrieve-read), `Cookbook:` [rewrite](https://github.com/langchain-ai/langchain/blob/master/cookbook/rewrite.ipynb)
-| `2305.08291v1` [Large Language Model Guided Tree-of-Thought](http://arxiv.org/abs/2305.08291v1) | Jieyi Long | 2023-05-15 | `API:` [langchain_experimental.tot](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.tot), `Cookbook:` [tree_of_thought](https://github.com/langchain-ai/langchain/blob/master/cookbook/tree_of_thought.ipynb)
-| `2305.04091v3` [Plan-and-Solve Prompting: Improving Zero-Shot Chain-of-Thought Reasoning by Large Language Models](http://arxiv.org/abs/2305.04091v3) | Lei Wang, Wanyu Xu, Yihuai Lan,  et al. | 2023-05-06 | `Cookbook:` [plan_and_execute_agent](https://github.com/langchain-ai/langchain/blob/master/cookbook/plan_and_execute_agent.ipynb)
-| `2304.08485v2` [Visual Instruction Tuning](http://arxiv.org/abs/2304.08485v2) | Haotian Liu, Chunyuan Li, Qingyang Wu,  et al. | 2023-04-17 | `Cookbook:` [Semi_structured_and_multi_modal_RAG](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_structured_and_multi_modal_RAG.ipynb), [Semi_structured_multi_modal_RAG_LLaMA2](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_structured_multi_modal_RAG_LLaMA2.ipynb)
-| `2304.03442v2` [Generative Agents: Interactive Simulacra of Human Behavior](http://arxiv.org/abs/2304.03442v2) | Joon Sung Park, Joseph C. O'Brien, Carrie J. Cai,  et al. | 2023-04-07 | `Cookbook:` [multiagent_bidding](https://github.com/langchain-ai/langchain/blob/master/cookbook/multiagent_bidding.ipynb), [generative_agents_interactive_simulacra_of_human_behavior](https://github.com/langchain-ai/langchain/blob/master/cookbook/generative_agents_interactive_simulacra_of_human_behavior.ipynb)
-| `2303.17760v2` [CAMEL: Communicative Agents for "Mind" Exploration of Large Language Model Society](http://arxiv.org/abs/2303.17760v2) | Guohao Li, Hasan Abed Al Kader Hammoud, Hani Itani,  et al. | 2023-03-31 | `Cookbook:` [camel_role_playing](https://github.com/langchain-ai/langchain/blob/master/cookbook/camel_role_playing.ipynb)
-| `2303.17580v4` [HuggingGPT: Solving AI Tasks with ChatGPT and its Friends in Hugging Face](http://arxiv.org/abs/2303.17580v4) | Yongliang Shen, Kaitao Song, Xu Tan,  et al. | 2023-03-30 | `API:` [langchain_experimental.autonomous_agents](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.autonomous_agents), `Cookbook:` [hugginggpt](https://github.com/langchain-ai/langchain/blob/master/cookbook/hugginggpt.ipynb)
-| `2303.08774v6` [GPT-4 Technical Report](http://arxiv.org/abs/2303.08774v6) | OpenAI, Josh Achiam, Steven Adler,  et al. | 2023-03-15 | `Docs:` [docs/integrations/vectorstores/mongodb_atlas](https://python.langchain.com/docs/integrations/vectorstores/mongodb_atlas)
-| `2301.10226v4` [A Watermark for Large Language Models](http://arxiv.org/abs/2301.10226v4) | John Kirchenbauer, Jonas Geiping, Yuxin Wen,  et al. | 2023-01-24 | `API:` [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...OCIModelDeploymentTGI](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI.html#langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
-| `2212.10496v1` [Precise Zero-Shot Dense Retrieval without Relevance Labels](http://arxiv.org/abs/2212.10496v1) | Luyu Gao, Xueguang Ma, Jimmy Lin,  et al. | 2022-12-20 | `API:` [langchain...HypotheticalDocumentEmbedder](https://api.python.langchain.com/en/latest/chains/langchain.chains.hyde.base.HypotheticalDocumentEmbedder.html#langchain.chains.hyde.base.HypotheticalDocumentEmbedder), `Template:` [hyde](https://python.langchain.com/docs/templates/hyde), `Cookbook:` [hypothetical_document_embeddings](https://github.com/langchain-ai/langchain/blob/master/cookbook/hypothetical_document_embeddings.ipynb)
-| `2212.07425v3` [Robust and Explainable Identification of Logical Fallacies in Natural Language Arguments](http://arxiv.org/abs/2212.07425v3) | Zhivar Sourati, Vishnu Priya Prasanna Venkatesh, Darshan Deshpande,  et al. | 2022-12-12 | `API:` [langchain_experimental.fallacy_removal](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.fallacy_removal)
-| `2211.13892v2` [Complementary Explanations for Effective In-Context Learning](http://arxiv.org/abs/2211.13892v2) | Xi Ye, Srinivasan Iyer, Asli Celikyilmaz,  et al. | 2022-11-25 | `API:` [langchain_core...MaxMarginalRelevanceExampleSelector](https://api.python.langchain.com/en/latest/example_selectors/langchain_core.example_selectors.semantic_similarity.MaxMarginalRelevanceExampleSelector.html#langchain_core.example_selectors.semantic_similarity.MaxMarginalRelevanceExampleSelector)
-| `2211.10435v2` [PAL: Program-aided Language Models](http://arxiv.org/abs/2211.10435v2) | Luyu Gao, Aman Madaan, Shuyan Zhou,  et al. | 2022-11-18 | `API:` [langchain_experimental...PALChain](https://api.python.langchain.com/en/latest/pal_chain/langchain_experimental.pal_chain.base.PALChain.html#langchain_experimental.pal_chain.base.PALChain), [langchain_experimental.pal_chain](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.pal_chain), `Cookbook:` [program_aided_language_model](https://github.com/langchain-ai/langchain/blob/master/cookbook/program_aided_language_model.ipynb)
-| `2210.03629v3` [ReAct: Synergizing Reasoning and Acting in Language Models](http://arxiv.org/abs/2210.03629v3) | Shunyu Yao, Jeffrey Zhao, Dian Yu,  et al. | 2022-10-06 | `Docs:` [docs/integrations/providers/cohere](https://python.langchain.com/docs/integrations/providers/cohere), [docs/integrations/chat/huggingface](https://python.langchain.com/docs/integrations/chat/huggingface), [docs/integrations/tools/ionic_shopping](https://python.langchain.com/docs/integrations/tools/ionic_shopping), `API:` [langchain...create_react_agent](https://api.python.langchain.com/en/latest/agents/langchain.agents.react.agent.create_react_agent.html#langchain.agents.react.agent.create_react_agent), [langchain...TrajectoryEvalChain](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain.html#langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain)
-| `2209.10785v2` [Deep Lake: a Lakehouse for Deep Learning](http://arxiv.org/abs/2209.10785v2) | Sasun Hambardzumyan, Abhinav Tuli, Levon Ghukasyan,  et al. | 2022-09-22 | `Docs:` [docs/integrations/providers/activeloop_deeplake](https://python.langchain.com/docs/integrations/providers/activeloop_deeplake)
-| `2205.12654v1` [Bitext Mining Using Distilled Sentence Representations for Low-Resource Languages](http://arxiv.org/abs/2205.12654v1) | Kevin Heffernan, Onur Çelebi, Holger Schwenk | 2022-05-25 | `API:` [langchain_community...LaserEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.laser.LaserEmbeddings.html#langchain_community.embeddings.laser.LaserEmbeddings)
-| `2204.00498v1` [Evaluating the Text-to-SQL Capabilities of Large Language Models](http://arxiv.org/abs/2204.00498v1) | Nitarshan Rajkumar, Raymond Li, Dzmitry Bahdanau | 2022-03-15 | `API:` [langchain_community...SparkSQL](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.spark_sql.SparkSQL.html#langchain_community.utilities.spark_sql.SparkSQL), [langchain_community...SQLDatabase](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.sql_database.SQLDatabase.html#langchain_community.utilities.sql_database.SQLDatabase)
-| `2202.00666v5` [Locally Typical Sampling](http://arxiv.org/abs/2202.00666v5) | Clara Meister, Tiago Pimentel, Gian Wiher,  et al. | 2022-02-01 | `API:` [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
-| `2103.00020v1` [Learning Transferable Visual Models From Natural Language Supervision](http://arxiv.org/abs/2103.00020v1) | Alec Radford, Jong Wook Kim, Chris Hallacy,  et al. | 2021-02-26 | `API:` [langchain_experimental.open_clip](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.open_clip)
-| `1909.05858v2` [CTRL: A Conditional Transformer Language Model for Controllable Generation](http://arxiv.org/abs/1909.05858v2) | Nitish Shirish Keskar, Bryan McCann, Lav R. Varshney,  et al. | 2019-09-11 | `API:` [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
-| `1908.10084v1` [Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks](http://arxiv.org/abs/1908.10084v1) | Nils Reimers, Iryna Gurevych | 2019-08-27 | `Docs:` [docs/integrations/text_embedding/sentence_transformers](https://python.langchain.com/docs/integrations/text_embedding/sentence_transformers)
+| `2403.14403v2` [Adaptive-RAG: Learning to Adapt Retrieval-Augmented Large Language Models through Question Complexity](http://arxiv.org/abs/2403.14403v2) | Soyeong Jeong, Jinheon Baek, Sukmin Cho,  et al. | 2024&#8209;03&#8209;21 | `Docs:` [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
+| `2402.03620v1` [Self-Discover: Large Language Models Self-Compose Reasoning Structures](http://arxiv.org/abs/2402.03620v1) | Pei Zhou, Jay Pujara, Xiang Ren,  et al. | 2024&#8209;02&#8209;06 | `Cookbook:` [Self-Discover](https://github.com/langchain-ai/langchain/blob/master/cookbook/self-discover.ipynb)
+| `2402.03367v2` [RAG-Fusion: a New Take on Retrieval-Augmented Generation](http://arxiv.org/abs/2402.03367v2) | Zackary Rackauckas | 2024&#8209;01&#8209;31 | `Docs:` [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
+| `2401.18059v1` [RAPTOR: Recursive Abstractive Processing for Tree-Organized Retrieval](http://arxiv.org/abs/2401.18059v1) | Parth Sarthi, Salman Abdullah, Aditi Tuli,  et al. | 2024&#8209;01&#8209;31 | `Cookbook:` [Raptor](https://github.com/langchain-ai/langchain/blob/master/cookbook/RAPTOR.ipynb)
+| `2401.15884v2` [Corrective Retrieval Augmented Generation](http://arxiv.org/abs/2401.15884v2) | Shi-Qi Yan, Jia-Chen Gu, Yun Zhu,  et al. | 2024&#8209;01&#8209;29 | `Docs:` [docs/concepts](https://python.langchain.com/v0.2/docs/concepts), `Cookbook:` [Langgraph Crag](https://github.com/langchain-ai/langchain/blob/master/cookbook/langgraph_crag.ipynb)
+| `2401.08500v1` [Code Generation with AlphaCodium: From Prompt Engineering to Flow Engineering](http://arxiv.org/abs/2401.08500v1) | Tal Ridnik, Dedy Kredo, Itamar Friedman | 2024&#8209;01&#8209;16 | `Docs:` [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
+| `2401.04088v1` [Mixtral of Experts](http://arxiv.org/abs/2401.04088v1) | Albert Q. Jiang, Alexandre Sablayrolles, Antoine Roux,  et al. | 2024&#8209;01&#8209;08 | `Cookbook:` [Together Ai](https://github.com/langchain-ai/langchain/blob/master/cookbook/together_ai.ipynb)
+| `2312.06648v2` [Dense X Retrieval: What Retrieval Granularity Should We Use?](http://arxiv.org/abs/2312.06648v2) | Tong Chen, Hongwei Wang, Sihao Chen,  et al. | 2023&#8209;12&#8209;11 | `Template:` [propositional-retrieval](https://python.langchain.com/docs/templates/propositional-retrieval)
+| `2311.09210v1` [Chain-of-Note: Enhancing Robustness in Retrieval-Augmented Language Models](http://arxiv.org/abs/2311.09210v1) | Wenhao Yu, Hongming Zhang, Xiaoman Pan,  et al. | 2023&#8209;11&#8209;15 | `Template:` [chain-of-note-wiki](https://python.langchain.com/docs/templates/chain-of-note-wiki)
+| `2310.11511v1` [Self-RAG: Learning to Retrieve, Generate, and Critique through Self-Reflection](http://arxiv.org/abs/2310.11511v1) | Akari Asai, Zeqiu Wu, Yizhong Wang,  et al. | 2023&#8209;10&#8209;17 | `Docs:` [docs/concepts](https://python.langchain.com/v0.2/docs/concepts), `Cookbook:` [Langgraph Self Rag](https://github.com/langchain-ai/langchain/blob/master/cookbook/langgraph_self_rag.ipynb)
+| `2310.06117v2` [Take a Step Back: Evoking Reasoning via Abstraction in Large Language Models](http://arxiv.org/abs/2310.06117v2) | Huaixiu Steven Zheng, Swaroop Mishra, Xinyun Chen,  et al. | 2023&#8209;10&#8209;09 | `Docs:` [docs/concepts](https://python.langchain.com/v0.2/docs/concepts), `Template:` [stepback-qa-prompting](https://python.langchain.com/docs/templates/stepback-qa-prompting), `Cookbook:` [Stepback-Qa](https://github.com/langchain-ai/langchain/blob/master/cookbook/stepback-qa.ipynb)
+| `2307.15337v3` [Skeleton-of-Thought: Prompting LLMs for Efficient Parallel Generation](http://arxiv.org/abs/2307.15337v3) | Xuefei Ning, Zinan Lin, Zixuan Zhou,  et al. | 2023&#8209;07&#8209;28 | `Template:` [skeleton-of-thought](https://python.langchain.com/docs/templates/skeleton-of-thought)
+| `2307.09288v2` [Llama 2: Open Foundation and Fine-Tuned Chat Models](http://arxiv.org/abs/2307.09288v2) | Hugo Touvron, Louis Martin, Kevin Stone,  et al. | 2023&#8209;07&#8209;18 | `Cookbook:` [Semi Structured Rag](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_Structured_RAG.ipynb)
+| `2307.03172v3` [Lost in the Middle: How Language Models Use Long Contexts](http://arxiv.org/abs/2307.03172v3) | Nelson F. Liu, Kevin Lin, John Hewitt,  et al. | 2023&#8209;07&#8209;06 | `Docs:` [docs/how_to/long_context_reorder](https://python.langchain.com/v0.2/docs/how_to/long_context_reorder)
+| `2305.14283v3` [Query Rewriting for Retrieval-Augmented Large Language Models](http://arxiv.org/abs/2305.14283v3) | Xinbei Ma, Yeyun Gong, Pengcheng He,  et al. | 2023&#8209;05&#8209;23 | `Template:` [rewrite-retrieve-read](https://python.langchain.com/docs/templates/rewrite-retrieve-read), `Cookbook:` [Rewrite](https://github.com/langchain-ai/langchain/blob/master/cookbook/rewrite.ipynb)
+| `2305.08291v1` [Large Language Model Guided Tree-of-Thought](http://arxiv.org/abs/2305.08291v1) | Jieyi Long | 2023&#8209;05&#8209;15 | `API:` [langchain_experimental.tot](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.tot), `Cookbook:` [Tree Of Thought](https://github.com/langchain-ai/langchain/blob/master/cookbook/tree_of_thought.ipynb)
+| `2305.04091v3` [Plan-and-Solve Prompting: Improving Zero-Shot Chain-of-Thought Reasoning by Large Language Models](http://arxiv.org/abs/2305.04091v3) | Lei Wang, Wanyu Xu, Yihuai Lan,  et al. | 2023&#8209;05&#8209;06 | `Cookbook:` [Plan And Execute Agent](https://github.com/langchain-ai/langchain/blob/master/cookbook/plan_and_execute_agent.ipynb)
+| `2305.02156v1` [Zero-Shot Listwise Document Reranking with a Large Language Model](http://arxiv.org/abs/2305.02156v1) | Xueguang Ma, Xinyu Zhang, Ronak Pradeep,  et al. | 2023&#8209;05&#8209;03 | `Docs:` [docs/how_to/contextual_compression](https://python.langchain.com/v0.2/docs/how_to/contextual_compression), `API:` [langchain...LLMListwiseRerank](https://api.python.langchain.com/en/latest/retrievers/langchain.retrievers.document_compressors.listwise_rerank.LLMListwiseRerank.html#langchain.retrievers.document_compressors.listwise_rerank.LLMListwiseRerank)
+| `2304.08485v2` [Visual Instruction Tuning](http://arxiv.org/abs/2304.08485v2) | Haotian Liu, Chunyuan Li, Qingyang Wu,  et al. | 2023&#8209;04&#8209;17 | `Cookbook:` [Semi Structured Multi Modal Rag Llama2](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_structured_multi_modal_RAG_LLaMA2.ipynb), [Semi Structured And Multi Modal Rag](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_structured_and_multi_modal_RAG.ipynb)
+| `2304.03442v2` [Generative Agents: Interactive Simulacra of Human Behavior](http://arxiv.org/abs/2304.03442v2) | Joon Sung Park, Joseph C. O'Brien, Carrie J. Cai,  et al. | 2023&#8209;04&#8209;07 | `Cookbook:` [Generative Agents Interactive Simulacra Of Human Behavior](https://github.com/langchain-ai/langchain/blob/master/cookbook/generative_agents_interactive_simulacra_of_human_behavior.ipynb), [Multiagent Bidding](https://github.com/langchain-ai/langchain/blob/master/cookbook/multiagent_bidding.ipynb)
+| `2303.17760v2` [CAMEL: Communicative Agents for "Mind" Exploration of Large Language Model Society](http://arxiv.org/abs/2303.17760v2) | Guohao Li, Hasan Abed Al Kader Hammoud, Hani Itani,  et al. | 2023&#8209;03&#8209;31 | `Cookbook:` [Camel Role Playing](https://github.com/langchain-ai/langchain/blob/master/cookbook/camel_role_playing.ipynb)
+| `2303.17580v4` [HuggingGPT: Solving AI Tasks with ChatGPT and its Friends in Hugging Face](http://arxiv.org/abs/2303.17580v4) | Yongliang Shen, Kaitao Song, Xu Tan,  et al. | 2023&#8209;03&#8209;30 | `API:` [langchain_experimental.autonomous_agents](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.autonomous_agents), `Cookbook:` [Hugginggpt](https://github.com/langchain-ai/langchain/blob/master/cookbook/hugginggpt.ipynb)
+| `2301.10226v4` [A Watermark for Large Language Models](http://arxiv.org/abs/2301.10226v4) | John Kirchenbauer, Jonas Geiping, Yuxin Wen,  et al. | 2023&#8209;01&#8209;24 | `API:` [langchain_community...OCIModelDeploymentTGI](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI.html#langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI), [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference), [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint)
+| `2212.10496v1` [Precise Zero-Shot Dense Retrieval without Relevance Labels](http://arxiv.org/abs/2212.10496v1) | Luyu Gao, Xueguang Ma, Jimmy Lin,  et al. | 2022&#8209;12&#8209;20 | `Docs:` [docs/concepts](https://python.langchain.com/v0.2/docs/concepts), `API:` [langchain...HypotheticalDocumentEmbedder](https://api.python.langchain.com/en/latest/chains/langchain.chains.hyde.base.HypotheticalDocumentEmbedder.html#langchain.chains.hyde.base.HypotheticalDocumentEmbedder), `Template:` [hyde](https://python.langchain.com/docs/templates/hyde), `Cookbook:` [Hypothetical Document Embeddings](https://github.com/langchain-ai/langchain/blob/master/cookbook/hypothetical_document_embeddings.ipynb)
+| `2212.08073v1` [Constitutional AI: Harmlessness from AI Feedback](http://arxiv.org/abs/2212.08073v1) | Yuntao Bai, Saurav Kadavath, Sandipan Kundu,  et al. | 2022&#8209;12&#8209;15 | `Docs:` [docs/versions/migrating_chains/constitutional_chain](https://python.langchain.com/v0.2/docs/versions/migrating_chains/constitutional_chain)
+| `2212.07425v3` [Robust and Explainable Identification of Logical Fallacies in Natural Language Arguments](http://arxiv.org/abs/2212.07425v3) | Zhivar Sourati, Vishnu Priya Prasanna Venkatesh, Darshan Deshpande,  et al. | 2022&#8209;12&#8209;12 | `API:` [langchain_experimental.fallacy_removal](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.fallacy_removal)
+| `2211.13892v2` [Complementary Explanations for Effective In-Context Learning](http://arxiv.org/abs/2211.13892v2) | Xi Ye, Srinivasan Iyer, Asli Celikyilmaz,  et al. | 2022&#8209;11&#8209;25 | `API:` [langchain_core...MaxMarginalRelevanceExampleSelector](https://api.python.langchain.com/en/latest/example_selectors/langchain_core.example_selectors.semantic_similarity.MaxMarginalRelevanceExampleSelector.html#langchain_core.example_selectors.semantic_similarity.MaxMarginalRelevanceExampleSelector)
+| `2211.10435v2` [PAL: Program-aided Language Models](http://arxiv.org/abs/2211.10435v2) | Luyu Gao, Aman Madaan, Shuyan Zhou,  et al. | 2022&#8209;11&#8209;18 | `API:` [langchain_experimental.pal_chain](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.pal_chain), [langchain_experimental...PALChain](https://api.python.langchain.com/en/latest/pal_chain/langchain_experimental.pal_chain.base.PALChain.html#langchain_experimental.pal_chain.base.PALChain), `Cookbook:` [Program Aided Language Model](https://github.com/langchain-ai/langchain/blob/master/cookbook/program_aided_language_model.ipynb)
+| `2210.11934v2` [An Analysis of Fusion Functions for Hybrid Retrieval](http://arxiv.org/abs/2210.11934v2) | Sebastian Bruch, Siyu Gai, Amir Ingber | 2022&#8209;10&#8209;21 | `Docs:` [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
+| `2210.03629v3` [ReAct: Synergizing Reasoning and Acting in Language Models](http://arxiv.org/abs/2210.03629v3) | Shunyu Yao, Jeffrey Zhao, Dian Yu,  et al. | 2022&#8209;10&#8209;06 | `Docs:` [docs/integrations/tools/ionic_shopping](https://python.langchain.com/v0.2/docs/integrations/tools/ionic_shopping), [docs/integrations/providers/cohere](https://python.langchain.com/v0.2/docs/integrations/providers/cohere), [docs/concepts](https://python.langchain.com/v0.2/docs/concepts), `API:` [langchain...create_react_agent](https://api.python.langchain.com/en/latest/agents/langchain.agents.react.agent.create_react_agent.html#langchain.agents.react.agent.create_react_agent), [langchain...TrajectoryEvalChain](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain.html#langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain)
+| `2209.10785v2` [Deep Lake: a Lakehouse for Deep Learning](http://arxiv.org/abs/2209.10785v2) | Sasun Hambardzumyan, Abhinav Tuli, Levon Ghukasyan,  et al. | 2022&#8209;09&#8209;22 | `Docs:` [docs/integrations/providers/activeloop_deeplake](https://python.langchain.com/v0.2/docs/integrations/providers/activeloop_deeplake)
+| `2205.13147v4` [Matryoshka Representation Learning](http://arxiv.org/abs/2205.13147v4) | Aditya Kusupati, Gantavya Bhatt, Aniket Rege,  et al. | 2022&#8209;05&#8209;26 | `Docs:` [docs/integrations/providers/snowflake](https://python.langchain.com/v0.2/docs/integrations/providers/snowflake)
+| `2205.12654v1` [Bitext Mining Using Distilled Sentence Representations for Low-Resource Languages](http://arxiv.org/abs/2205.12654v1) | Kevin Heffernan, Onur Çelebi, Holger Schwenk | 2022&#8209;05&#8209;25 | `API:` [langchain_community...LaserEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.laser.LaserEmbeddings.html#langchain_community.embeddings.laser.LaserEmbeddings)
+| `2204.00498v1` [Evaluating the Text-to-SQL Capabilities of Large Language Models](http://arxiv.org/abs/2204.00498v1) | Nitarshan Rajkumar, Raymond Li, Dzmitry Bahdanau | 2022&#8209;03&#8209;15 | `Docs:` [docs/tutorials/sql_qa](https://python.langchain.com/v0.2/docs/tutorials/sql_qa), `API:` [langchain_community...SQLDatabase](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.sql_database.SQLDatabase.html#langchain_community.utilities.sql_database.SQLDatabase), [langchain_community...SparkSQL](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.spark_sql.SparkSQL.html#langchain_community.utilities.spark_sql.SparkSQL)
+| `2202.00666v5` [Locally Typical Sampling](http://arxiv.org/abs/2202.00666v5) | Clara Meister, Tiago Pimentel, Gian Wiher,  et al. | 2022&#8209;02&#8209;01 | `API:` [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference), [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint)
+| `2112.01488v3` [ColBERTv2: Effective and Efficient Retrieval via Lightweight Late Interaction](http://arxiv.org/abs/2112.01488v3) | Keshav Santhanam, Omar Khattab, Jon Saad-Falcon,  et al. | 2021&#8209;12&#8209;02 | `Docs:` [docs/integrations/retrievers/ragatouille](https://python.langchain.com/v0.2/docs/integrations/retrievers/ragatouille), [docs/integrations/providers/ragatouille](https://python.langchain.com/v0.2/docs/integrations/providers/ragatouille), [docs/concepts](https://python.langchain.com/v0.2/docs/concepts), [docs/integrations/providers/dspy](https://python.langchain.com/v0.2/docs/integrations/providers/dspy)
+| `2103.00020v1` [Learning Transferable Visual Models From Natural Language Supervision](http://arxiv.org/abs/2103.00020v1) | Alec Radford, Jong Wook Kim, Chris Hallacy,  et al. | 2021&#8209;02&#8209;26 | `API:` [langchain_experimental.open_clip](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.open_clip)
+| `2005.14165v4` [Language Models are Few-Shot Learners](http://arxiv.org/abs/2005.14165v4) | Tom B. Brown, Benjamin Mann, Nick Ryder,  et al. | 2020&#8209;05&#8209;28 | `Docs:` [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
+| `2005.11401v4` [Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks](http://arxiv.org/abs/2005.11401v4) | Patrick Lewis, Ethan Perez, Aleksandra Piktus,  et al. | 2020&#8209;05&#8209;22 | `Docs:` [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
+| `1909.05858v2` [CTRL: A Conditional Transformer Language Model for Controllable Generation](http://arxiv.org/abs/1909.05858v2) | Nitish Shirish Keskar, Bryan McCann, Lav R. Varshney,  et al. | 2019&#8209;09&#8209;11 | `API:` [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference), [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint)

+## Adaptive-RAG: Learning to Adapt Retrieval-Augmented Large Language Models through Question Complexity
+
+- **Authors:** Soyeong Jeong, Jinheon Baek, Sukmin Cho,  et al.
+- **arXiv id:** [2403.14403v2](http://arxiv.org/abs/2403.14403v2)  **Published Date:** 2024-03-21
+- **LangChain:**
+
+   - **Documentation:** [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
+
+**Abstract:** Retrieval-Augmented Large Language Models (LLMs), which incorporate the
+non-parametric knowledge from external knowledge bases into LLMs, have emerged
+as a promising approach to enhancing response accuracy in several tasks, such
+as Question-Answering (QA). However, even though there are various approaches
+dealing with queries of different complexities, they either handle simple
+queries with unnecessary computational overhead or fail to adequately address
+complex multi-step queries; yet, not all user requests fall into only one of
+the simple or complex categories. In this work, we propose a novel adaptive QA
+framework, that can dynamically select the most suitable strategy for
+(retrieval-augmented) LLMs from the simplest to the most sophisticated ones
+based on the query complexity. Also, this selection process is operationalized
+with a classifier, which is a smaller LM trained to predict the complexity
+level of incoming queries with automatically collected labels, obtained from
+actual predicted outcomes of models and inherent inductive biases in datasets.
+This approach offers a balanced strategy, seamlessly adapting between the
+iterative and single-step retrieval-augmented LLMs, as well as the no-retrieval
+methods, in response to a range of query complexities. We validate our model on
+a set of open-domain QA datasets, covering multiple query complexities, and
+show that ours enhances the overall efficiency and accuracy of QA systems,
+compared to relevant baselines including the adaptive retrieval approaches.
+Code is available at: https://github.com/starsuzi/Adaptive-RAG.
+                
 ## Self-Discover: Large Language Models Self-Compose Reasoning Structures

- **arXiv id:** 2402.03620v1
- **Title:** Self-Discover: Large Language Models Self-Compose Reasoning Structures
 - **Authors:** Pei Zhou, Jay Pujara, Xiang Ren,  et al.
- **Published Date:** 2024-02-06
- **URL:** http://arxiv.org/abs/2402.03620v1
+- **arXiv id:** [2402.03620v1](http://arxiv.org/abs/2402.03620v1)  **Published Date:** 2024-02-06
 - **LangChain:**

   - **Cookbook:** [self-discover](https://github.com/langchain-ai/langchain/blob/master/cookbook/self-discover.ipynb)
@@ -68,13 +107,33 @@ the self-discovered reasoning structures are universally applicable across
 model families: from PaLM 2-L to GPT-4, and from GPT-4 to Llama2, and share
 commonalities with human reasoning patterns.
                
+## RAG-Fusion: a New Take on Retrieval-Augmented Generation
+
+- **Authors:** Zackary Rackauckas
+- **arXiv id:** [2402.03367v2](http://arxiv.org/abs/2402.03367v2)  **Published Date:** 2024-01-31
+- **LangChain:**
+
+   - **Documentation:** [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
+
+**Abstract:** Infineon has identified a need for engineers, account managers, and customers
+to rapidly obtain product information. This problem is traditionally addressed
+with retrieval-augmented generation (RAG) chatbots, but in this study, I
+evaluated the use of the newly popularized RAG-Fusion method. RAG-Fusion
+combines RAG and reciprocal rank fusion (RRF) by generating multiple queries,
+reranking them with reciprocal scores and fusing the documents and scores.
+Through manually evaluating answers on accuracy, relevance, and
+comprehensiveness, I found that RAG-Fusion was able to provide accurate and
+comprehensive answers due to the generated queries contextualizing the original
+query from various perspectives. However, some answers strayed off topic when
+the generated queries' relevance to the original query is insufficient. This
+research marks significant progress in artificial intelligence (AI) and natural
+language processing (NLP) applications and demonstrates transformations in a
+global and multi-industry context.
+                
 ## RAPTOR: Recursive Abstractive Processing for Tree-Organized Retrieval

- **arXiv id:** 2401.18059v1
- **Title:** RAPTOR: Recursive Abstractive Processing for Tree-Organized Retrieval
 - **Authors:** Parth Sarthi, Salman Abdullah, Aditi Tuli,  et al.
- **Published Date:** 2024-01-31
- **URL:** http://arxiv.org/abs/2401.18059v1
+- **arXiv id:** [2401.18059v1](http://arxiv.org/abs/2401.18059v1)  **Published Date:** 2024-01-31
 - **LangChain:**

   - **Cookbook:** [RAPTOR](https://github.com/langchain-ai/langchain/blob/master/cookbook/RAPTOR.ipynb)
@@ -96,13 +155,11 @@ benchmark by 20% in absolute accuracy.
                
 ## Corrective Retrieval Augmented Generation

- **arXiv id:** 2401.15884v2
- **Title:** Corrective Retrieval Augmented Generation
 - **Authors:** Shi-Qi Yan, Jia-Chen Gu, Yun Zhu,  et al.
- **Published Date:** 2024-01-29
- **URL:** http://arxiv.org/abs/2401.15884v2
+- **arXiv id:** [2401.15884v2](http://arxiv.org/abs/2401.15884v2)  **Published Date:** 2024-01-29
 - **LangChain:**

+   - **Documentation:** [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
   - **Cookbook:** [langgraph_crag](https://github.com/langchain-ai/langchain/blob/master/cookbook/langgraph_crag.ipynb)

 **Abstract:** Large language models (LLMs) inevitably exhibit hallucinations since the
@@ -124,13 +181,36 @@ RAG-based approaches. Experiments on four datasets covering short- and
 long-form generation tasks show that CRAG can significantly improve the
 performance of RAG-based approaches.
                
+## Code Generation with AlphaCodium: From Prompt Engineering to Flow Engineering
+
+- **Authors:** Tal Ridnik, Dedy Kredo, Itamar Friedman
+- **arXiv id:** [2401.08500v1](http://arxiv.org/abs/2401.08500v1)  **Published Date:** 2024-01-16
+- **LangChain:**
+
+   - **Documentation:** [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
+
+**Abstract:** Code generation problems differ from common natural language problems - they
+require matching the exact syntax of the target language, identifying happy
+paths and edge cases, paying attention to numerous small details in the problem
+spec, and addressing other code-specific issues and requirements. Hence, many
+of the optimizations and tricks that have been successful in natural language
+generation may not be effective for code tasks. In this work, we propose a new
+approach to code generation by LLMs, which we call AlphaCodium - a test-based,
+multi-stage, code-oriented iterative flow, that improves the performances of
+LLMs on code problems. We tested AlphaCodium on a challenging code generation
+dataset called CodeContests, which includes competitive programming problems
+from platforms such as Codeforces. The proposed flow consistently and
+significantly improves results. On the validation set, for example, GPT-4
+accuracy (pass@5) increased from 19% with a single well-designed direct prompt
+to 44% with the AlphaCodium flow. Many of the principles and best practices
+acquired in this work, we believe, are broadly applicable to general code
+generation tasks. Full implementation is available at:
+https://github.com/Codium-ai/AlphaCodium
+                
 ## Mixtral of Experts

- **arXiv id:** 2401.04088v1
- **Title:** Mixtral of Experts
 - **Authors:** Albert Q. Jiang, Alexandre Sablayrolles, Antoine Roux,  et al.
- **Published Date:** 2024-01-08
- **URL:** http://arxiv.org/abs/2401.04088v1
+- **arXiv id:** [2401.04088v1](http://arxiv.org/abs/2401.04088v1)  **Published Date:** 2024-01-08
 - **LangChain:**

   - **Cookbook:** [together_ai](https://github.com/langchain-ai/langchain/blob/master/cookbook/together_ai.ipynb)
@@ -152,11 +232,8 @@ the base and instruct models are released under the Apache 2.0 license.
                
 ## Dense X Retrieval: What Retrieval Granularity Should We Use?

- **arXiv id:** 2312.06648v2
- **Title:** Dense X Retrieval: What Retrieval Granularity Should We Use?
 - **Authors:** Tong Chen, Hongwei Wang, Sihao Chen,  et al.
- **Published Date:** 2023-12-11
- **URL:** http://arxiv.org/abs/2312.06648v2
+- **arXiv id:** [2312.06648v2](http://arxiv.org/abs/2312.06648v2)  **Published Date:** 2023-12-11
 - **LangChain:**

   - **Template:** [propositional-retrieval](https://python.langchain.com/docs/templates/propositional-retrieval)
@@ -181,11 +258,8 @@ information.
                
 ## Chain-of-Note: Enhancing Robustness in Retrieval-Augmented Language Models

- **arXiv id:** 2311.09210v1
- **Title:** Chain-of-Note: Enhancing Robustness in Retrieval-Augmented Language Models
 - **Authors:** Wenhao Yu, Hongming Zhang, Xiaoman Pan,  et al.
- **Published Date:** 2023-11-15
- **URL:** http://arxiv.org/abs/2311.09210v1
+- **arXiv id:** [2311.09210v1](http://arxiv.org/abs/2311.09210v1)  **Published Date:** 2023-11-15
 - **LangChain:**

   - **Template:** [chain-of-note-wiki](https://python.langchain.com/docs/templates/chain-of-note-wiki)
@@ -215,13 +289,11 @@ outside the pre-training knowledge scope.
                
 ## Self-RAG: Learning to Retrieve, Generate, and Critique through Self-Reflection

- **arXiv id:** 2310.11511v1
- **Title:** Self-RAG: Learning to Retrieve, Generate, and Critique through Self-Reflection
 - **Authors:** Akari Asai, Zeqiu Wu, Yizhong Wang,  et al.
- **Published Date:** 2023-10-17
- **URL:** http://arxiv.org/abs/2310.11511v1
+- **arXiv id:** [2310.11511v1](http://arxiv.org/abs/2310.11511v1)  **Published Date:** 2023-10-17
 - **LangChain:**

+   - **Documentation:** [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
   - **Cookbook:** [langgraph_self_rag](https://github.com/langchain-ai/langchain/blob/master/cookbook/langgraph_self_rag.ipynb)

 **Abstract:** Despite their remarkable capabilities, large language models (LLMs) often
@@ -248,13 +320,11 @@ to these models.
                
 ## Take a Step Back: Evoking Reasoning via Abstraction in Large Language Models

- **arXiv id:** 2310.06117v2
- **Title:** Take a Step Back: Evoking Reasoning via Abstraction in Large Language Models
 - **Authors:** Huaixiu Steven Zheng, Swaroop Mishra, Xinyun Chen,  et al.
- **Published Date:** 2023-10-09
- **URL:** http://arxiv.org/abs/2310.06117v2
+- **arXiv id:** [2310.06117v2](http://arxiv.org/abs/2310.06117v2)  **Published Date:** 2023-10-09
 - **LangChain:**

+   - **Documentation:** [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
   - **Template:** [stepback-qa-prompting](https://python.langchain.com/docs/templates/stepback-qa-prompting)
   - **Cookbook:** [stepback-qa](https://github.com/langchain-ai/langchain/blob/master/cookbook/stepback-qa.ipynb)

@@ -269,13 +339,31 @@ including STEM, Knowledge QA, and Multi-Hop Reasoning. For instance, Step-Back
 Prompting improves PaLM-2L performance on MMLU (Physics and Chemistry) by 7%
 and 11% respectively, TimeQA by 27%, and MuSiQue by 7%.
                
+## Skeleton-of-Thought: Prompting LLMs for Efficient Parallel Generation
+
+- **Authors:** Xuefei Ning, Zinan Lin, Zixuan Zhou,  et al.
+- **arXiv id:** [2307.15337v3](http://arxiv.org/abs/2307.15337v3)  **Published Date:** 2023-07-28
+- **LangChain:**
+
+   - **Template:** [skeleton-of-thought](https://python.langchain.com/docs/templates/skeleton-of-thought)
+
+**Abstract:** This work aims at decreasing the end-to-end generation latency of large
+language models (LLMs). One of the major causes of the high generation latency
+is the sequential decoding approach adopted by almost all state-of-the-art
+LLMs. In this work, motivated by the thinking and writing process of humans, we
+propose Skeleton-of-Thought (SoT), which first guides LLMs to generate the
+skeleton of the answer, and then conducts parallel API calls or batched
+decoding to complete the contents of each skeleton point in parallel. Not only
+does SoT provide considerable speed-ups across 12 LLMs, but it can also
+potentially improve the answer quality on several question categories. SoT is
+an initial attempt at data-centric optimization for inference efficiency, and
+showcases the potential of eliciting high-quality answers by explicitly
+planning the answer structure in language.
+                
 ## Llama 2: Open Foundation and Fine-Tuned Chat Models

- **arXiv id:** 2307.09288v2
- **Title:** Llama 2: Open Foundation and Fine-Tuned Chat Models
 - **Authors:** Hugo Touvron, Louis Martin, Kevin Stone,  et al.
- **Published Date:** 2023-07-18
- **URL:** http://arxiv.org/abs/2307.09288v2
+- **arXiv id:** [2307.09288v2](http://arxiv.org/abs/2307.09288v2)  **Published Date:** 2023-07-18
 - **LangChain:**

   - **Cookbook:** [Semi_Structured_RAG](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_Structured_RAG.ipynb)
@@ -290,13 +378,32 @@ detailed description of our approach to fine-tuning and safety improvements of
 Llama 2-Chat in order to enable the community to build on our work and
 contribute to the responsible development of LLMs.
                
+## Lost in the Middle: How Language Models Use Long Contexts
+
+- **Authors:** Nelson F. Liu, Kevin Lin, John Hewitt,  et al.
+- **arXiv id:** [2307.03172v3](http://arxiv.org/abs/2307.03172v3)  **Published Date:** 2023-07-06
+- **LangChain:**
+
+   - **Documentation:** [docs/how_to/long_context_reorder](https://python.langchain.com/v0.2/docs/how_to/long_context_reorder)
+
+**Abstract:** While recent language models have the ability to take long contexts as input,
+relatively little is known about how well they use longer context. We analyze
+the performance of language models on two tasks that require identifying
+relevant information in their input contexts: multi-document question answering
+and key-value retrieval. We find that performance can degrade significantly
+when changing the position of relevant information, indicating that current
+language models do not robustly make use of information in long input contexts.
+In particular, we observe that performance is often highest when relevant
+information occurs at the beginning or end of the input context, and
+significantly degrades when models must access relevant information in the
+middle of long contexts, even for explicitly long-context models. Our analysis
+provides a better understanding of how language models use their input context
+and provides new evaluation protocols for future long-context language models.
+                
 ## Query Rewriting for Retrieval-Augmented Large Language Models

- **arXiv id:** 2305.14283v3
- **Title:** Query Rewriting for Retrieval-Augmented Large Language Models
 - **Authors:** Xinbei Ma, Yeyun Gong, Pengcheng He,  et al.
- **Published Date:** 2023-05-23
- **URL:** http://arxiv.org/abs/2305.14283v3
+- **arXiv id:** [2305.14283v3](http://arxiv.org/abs/2305.14283v3)  **Published Date:** 2023-05-23
 - **LangChain:**

   - **Template:** [rewrite-retrieve-read](https://python.langchain.com/docs/templates/rewrite-retrieve-read)
@@ -322,11 +429,8 @@ for retrieval-augmented LLM.
                
 ## Large Language Model Guided Tree-of-Thought

- **arXiv id:** 2305.08291v1
- **Title:** Large Language Model Guided Tree-of-Thought
 - **Authors:** Jieyi Long
- **Published Date:** 2023-05-15
- **URL:** http://arxiv.org/abs/2305.08291v1
+- **arXiv id:** [2305.08291v1](http://arxiv.org/abs/2305.08291v1)  **Published Date:** 2023-05-15
 - **LangChain:**

   - **API Reference:** [langchain_experimental.tot](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.tot)
@@ -352,11 +456,8 @@ implementation of the ToT-based Sudoku solver is available on GitHub:
                
 ## Plan-and-Solve Prompting: Improving Zero-Shot Chain-of-Thought Reasoning by Large Language Models

- **arXiv id:** 2305.04091v3
- **Title:** Plan-and-Solve Prompting: Improving Zero-Shot Chain-of-Thought Reasoning by Large Language Models
 - **Authors:** Lei Wang, Wanyu Xu, Yihuai Lan,  et al.
- **Published Date:** 2023-05-06
- **URL:** http://arxiv.org/abs/2305.04091v3
+- **arXiv id:** [2305.04091v3](http://arxiv.org/abs/2305.04091v3)  **Published Date:** 2023-05-06
 - **LangChain:**

   - **Cookbook:** [plan_and_execute_agent](https://github.com/langchain-ai/langchain/blob/master/cookbook/plan_and_execute_agent.ipynb)
@@ -383,16 +484,37 @@ Prompting, and has comparable performance with 8-shot CoT prompting on the math
 reasoning problem. The code can be found at
 https://github.com/AGI-Edgerunners/Plan-and-Solve-Prompting.
                
-## Visual Instruction Tuning
+## Zero-Shot Listwise Document Reranking with a Large Language Model

- **arXiv id:** 2304.08485v2
- **Title:** Visual Instruction Tuning
- **Authors:** Haotian Liu, Chunyuan Li, Qingyang Wu,  et al.
- **Published Date:** 2023-04-17
- **URL:** http://arxiv.org/abs/2304.08485v2
+- **Authors:** Xueguang Ma, Xinyu Zhang, Ronak Pradeep,  et al.
+- **arXiv id:** [2305.02156v1](http://arxiv.org/abs/2305.02156v1)  **Published Date:** 2023-05-03
 - **LangChain:**

-   - **Cookbook:** [Semi_structured_and_multi_modal_RAG](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_structured_and_multi_modal_RAG.ipynb), [Semi_structured_multi_modal_RAG_LLaMA2](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_structured_multi_modal_RAG_LLaMA2.ipynb)
+   - **Documentation:** [docs/how_to/contextual_compression](https://python.langchain.com/v0.2/docs/how_to/contextual_compression)
+   - **API Reference:** [langchain...LLMListwiseRerank](https://api.python.langchain.com/en/latest/retrievers/langchain.retrievers.document_compressors.listwise_rerank.LLMListwiseRerank.html#langchain.retrievers.document_compressors.listwise_rerank.LLMListwiseRerank)
+
+**Abstract:** Supervised ranking methods based on bi-encoder or cross-encoder architectures
+have shown success in multi-stage text ranking tasks, but they require large
+amounts of relevance judgments as training data. In this work, we propose
+Listwise Reranker with a Large Language Model (LRL), which achieves strong
+reranking effectiveness without using any task-specific training data.
+Different from the existing pointwise ranking methods, where documents are
+scored independently and ranked according to the scores, LRL directly generates
+a reordered list of document identifiers given the candidate documents.
+Experiments on three TREC web search datasets demonstrate that LRL not only
+outperforms zero-shot pointwise methods when reranking first-stage retrieval
+results, but can also act as a final-stage reranker to improve the top-ranked
+results of a pointwise method for improved efficiency. Additionally, we apply
+our approach to subsets of MIRACL, a recent multilingual retrieval dataset,
+with results showing its potential to generalize across different languages.
+                
+## Visual Instruction Tuning
+
+- **Authors:** Haotian Liu, Chunyuan Li, Qingyang Wu,  et al.
+- **arXiv id:** [2304.08485v2](http://arxiv.org/abs/2304.08485v2)  **Published Date:** 2023-04-17
+- **LangChain:**
+
+   - **Cookbook:** [Semi_structured_multi_modal_RAG_LLaMA2](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_structured_multi_modal_RAG_LLaMA2.ipynb), [Semi_structured_and_multi_modal_RAG](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_structured_and_multi_modal_RAG.ipynb)

 **Abstract:** Instruction tuning large language models (LLMs) using machine-generated
 instruction-following data has improved zero-shot capabilities on new tasks,
@@ -412,14 +534,11 @@ publicly available.
                
 ## Generative Agents: Interactive Simulacra of Human Behavior

- **arXiv id:** 2304.03442v2
- **Title:** Generative Agents: Interactive Simulacra of Human Behavior
 - **Authors:** Joon Sung Park, Joseph C. O'Brien, Carrie J. Cai,  et al.
- **Published Date:** 2023-04-07
- **URL:** http://arxiv.org/abs/2304.03442v2
+- **arXiv id:** [2304.03442v2](http://arxiv.org/abs/2304.03442v2)  **Published Date:** 2023-04-07
 - **LangChain:**

-   - **Cookbook:** [multiagent_bidding](https://github.com/langchain-ai/langchain/blob/master/cookbook/multiagent_bidding.ipynb), [generative_agents_interactive_simulacra_of_human_behavior](https://github.com/langchain-ai/langchain/blob/master/cookbook/generative_agents_interactive_simulacra_of_human_behavior.ipynb)
+   - **Cookbook:** [generative_agents_interactive_simulacra_of_human_behavior](https://github.com/langchain-ai/langchain/blob/master/cookbook/generative_agents_interactive_simulacra_of_human_behavior.ipynb), [multiagent_bidding](https://github.com/langchain-ai/langchain/blob/master/cookbook/multiagent_bidding.ipynb)

 **Abstract:** Believable proxies of human behavior can empower interactive applications
 ranging from immersive environments to rehearsal spaces for interpersonal
@@ -448,11 +567,8 @@ interaction patterns for enabling believable simulations of human behavior.
                
 ## CAMEL: Communicative Agents for "Mind" Exploration of Large Language Model Society

- **arXiv id:** 2303.17760v2
- **Title:** CAMEL: Communicative Agents for "Mind" Exploration of Large Language Model Society
 - **Authors:** Guohao Li, Hasan Abed Al Kader Hammoud, Hani Itani,  et al.
- **Published Date:** 2023-03-31
- **URL:** http://arxiv.org/abs/2303.17760v2
+- **arXiv id:** [2303.17760v2](http://arxiv.org/abs/2303.17760v2)  **Published Date:** 2023-03-31
 - **LangChain:**

   - **Cookbook:** [camel_role_playing](https://github.com/langchain-ai/langchain/blob/master/cookbook/camel_role_playing.ipynb)
@@ -478,11 +594,8 @@ agents and beyond: https://github.com/camel-ai/camel.
                
 ## HuggingGPT: Solving AI Tasks with ChatGPT and its Friends in Hugging Face

- **arXiv id:** 2303.17580v4
- **Title:** HuggingGPT: Solving AI Tasks with ChatGPT and its Friends in Hugging Face
 - **Authors:** Yongliang Shen, Kaitao Song, Xu Tan,  et al.
- **Published Date:** 2023-03-30
- **URL:** http://arxiv.org/abs/2303.17580v4
+- **arXiv id:** [2303.17580v4](http://arxiv.org/abs/2303.17580v4)  **Published Date:** 2023-03-30
 - **LangChain:**

   - **API Reference:** [langchain_experimental.autonomous_agents](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.autonomous_agents)
@@ -508,40 +621,13 @@ modalities and domains and achieve impressive results in language, vision,
 speech, and other challenging tasks, which paves a new way towards the
 realization of artificial general intelligence.
                
-## GPT-4 Technical Report
-
- **arXiv id:** 2303.08774v6
- **Title:** GPT-4 Technical Report
- **Authors:** OpenAI, Josh Achiam, Steven Adler,  et al.
- **Published Date:** 2023-03-15
- **URL:** http://arxiv.org/abs/2303.08774v6
- **LangChain:**
-
-   - **Documentation:** [docs/integrations/vectorstores/mongodb_atlas](https://python.langchain.com/docs/integrations/vectorstores/mongodb_atlas)
-
-**Abstract:** We report the development of GPT-4, a large-scale, multimodal model which can
-accept image and text inputs and produce text outputs. While less capable than
-humans in many real-world scenarios, GPT-4 exhibits human-level performance on
-various professional and academic benchmarks, including passing a simulated bar
-exam with a score around the top 10% of test takers. GPT-4 is a
-Transformer-based model pre-trained to predict the next token in a document.
-The post-training alignment process results in improved performance on measures
-of factuality and adherence to desired behavior. A core component of this
-project was developing infrastructure and optimization methods that behave
-predictably across a wide range of scales. This allowed us to accurately
-predict some aspects of GPT-4's performance based on models trained with no
-more than 1/1,000th the compute of GPT-4.
-                
 ## A Watermark for Large Language Models

- **arXiv id:** 2301.10226v4
- **Title:** A Watermark for Large Language Models
 - **Authors:** John Kirchenbauer, Jonas Geiping, Yuxin Wen,  et al.
- **Published Date:** 2023-01-24
- **URL:** http://arxiv.org/abs/2301.10226v4
+- **arXiv id:** [2301.10226v4](http://arxiv.org/abs/2301.10226v4)  **Published Date:** 2023-01-24
 - **LangChain:**

-   - **API Reference:** [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...OCIModelDeploymentTGI](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI.html#langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
+   - **API Reference:** [langchain_community...OCIModelDeploymentTGI](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI.html#langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI), [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference), [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint)

 **Abstract:** Potential harms of large language models can be mitigated by watermarking
 model output, i.e., embedding signals into generated text that are invisible to
@@ -559,13 +645,11 @@ family, and discuss robustness and security.
                
 ## Precise Zero-Shot Dense Retrieval without Relevance Labels

- **arXiv id:** 2212.10496v1
- **Title:** Precise Zero-Shot Dense Retrieval without Relevance Labels
 - **Authors:** Luyu Gao, Xueguang Ma, Jimmy Lin,  et al.
- **Published Date:** 2022-12-20
- **URL:** http://arxiv.org/abs/2212.10496v1
+- **arXiv id:** [2212.10496v1](http://arxiv.org/abs/2212.10496v1)  **Published Date:** 2022-12-20
 - **LangChain:**

+   - **Documentation:** [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
   - **API Reference:** [langchain...HypotheticalDocumentEmbedder](https://api.python.langchain.com/en/latest/chains/langchain.chains.hyde.base.HypotheticalDocumentEmbedder.html#langchain.chains.hyde.base.HypotheticalDocumentEmbedder)
   - **Template:** [hyde](https://python.langchain.com/docs/templates/hyde)
   - **Cookbook:** [hypothetical_document_embeddings](https://github.com/langchain-ai/langchain/blob/master/cookbook/hypothetical_document_embeddings.ipynb)
@@ -588,13 +672,37 @@ state-of-the-art unsupervised dense retriever Contriever and shows strong
 performance comparable to fine-tuned retrievers, across various tasks (e.g. web
 search, QA, fact verification) and languages~(e.g. sw, ko, ja).
                
+## Constitutional AI: Harmlessness from AI Feedback
+
+- **Authors:** Yuntao Bai, Saurav Kadavath, Sandipan Kundu,  et al.
+- **arXiv id:** [2212.08073v1](http://arxiv.org/abs/2212.08073v1)  **Published Date:** 2022-12-15
+- **LangChain:**
+
+   - **Documentation:** [docs/versions/migrating_chains/constitutional_chain](https://python.langchain.com/v0.2/docs/versions/migrating_chains/constitutional_chain)
+
+**Abstract:** As AI systems become more capable, we would like to enlist their help to
+supervise other AIs. We experiment with methods for training a harmless AI
+assistant through self-improvement, without any human labels identifying
+harmful outputs. The only human oversight is provided through a list of rules
+or principles, and so we refer to the method as 'Constitutional AI'. The
+process involves both a supervised learning and a reinforcement learning phase.
+In the supervised phase we sample from an initial model, then generate
+self-critiques and revisions, and then finetune the original model on revised
+responses. In the RL phase, we sample from the finetuned model, use a model to
+evaluate which of the two samples is better, and then train a preference model
+from this dataset of AI preferences. We then train with RL using the preference
+model as the reward signal, i.e. we use 'RL from AI Feedback' (RLAIF). As a
+result we are able to train a harmless but non-evasive AI assistant that
+engages with harmful queries by explaining its objections to them. Both the SL
+and RL methods can leverage chain-of-thought style reasoning to improve the
+human-judged performance and transparency of AI decision making. These methods
+make it possible to control AI behavior more precisely and with far fewer human
+labels.
+                
 ## Robust and Explainable Identification of Logical Fallacies in Natural Language Arguments

- **arXiv id:** 2212.07425v3
- **Title:** Robust and Explainable Identification of Logical Fallacies in Natural Language Arguments
 - **Authors:** Zhivar Sourati, Vishnu Priya Prasanna Venkatesh, Darshan Deshpande,  et al.
- **Published Date:** 2022-12-12
- **URL:** http://arxiv.org/abs/2212.07425v3
+- **arXiv id:** [2212.07425v3](http://arxiv.org/abs/2212.07425v3)  **Published Date:** 2022-12-12
 - **LangChain:**

   - **API Reference:** [langchain_experimental.fallacy_removal](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.fallacy_removal)
@@ -623,11 +731,8 @@ further work on logical fallacy identification.
                
 ## Complementary Explanations for Effective In-Context Learning

- **arXiv id:** 2211.13892v2
- **Title:** Complementary Explanations for Effective In-Context Learning
 - **Authors:** Xi Ye, Srinivasan Iyer, Asli Celikyilmaz,  et al.
- **Published Date:** 2022-11-25
- **URL:** http://arxiv.org/abs/2211.13892v2
+- **arXiv id:** [2211.13892v2](http://arxiv.org/abs/2211.13892v2)  **Published Date:** 2022-11-25
 - **LangChain:**

   - **API Reference:** [langchain_core...MaxMarginalRelevanceExampleSelector](https://api.python.langchain.com/en/latest/example_selectors/langchain_core.example_selectors.semantic_similarity.MaxMarginalRelevanceExampleSelector.html#langchain_core.example_selectors.semantic_similarity.MaxMarginalRelevanceExampleSelector)
@@ -651,14 +756,11 @@ performance across three real-world tasks on multiple LLMs.
                
 ## PAL: Program-aided Language Models

- **arXiv id:** 2211.10435v2
- **Title:** PAL: Program-aided Language Models
 - **Authors:** Luyu Gao, Aman Madaan, Shuyan Zhou,  et al.
- **Published Date:** 2022-11-18
- **URL:** http://arxiv.org/abs/2211.10435v2
+- **arXiv id:** [2211.10435v2](http://arxiv.org/abs/2211.10435v2)  **Published Date:** 2022-11-18
 - **LangChain:**

-   - **API Reference:** [langchain_experimental...PALChain](https://api.python.langchain.com/en/latest/pal_chain/langchain_experimental.pal_chain.base.PALChain.html#langchain_experimental.pal_chain.base.PALChain), [langchain_experimental.pal_chain](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.pal_chain)
+   - **API Reference:** [langchain_experimental.pal_chain](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.pal_chain), [langchain_experimental...PALChain](https://api.python.langchain.com/en/latest/pal_chain/langchain_experimental.pal_chain.base.PALChain.html#langchain_experimental.pal_chain.base.PALChain)
   - **Cookbook:** [program_aided_language_model](https://github.com/langchain-ai/langchain/blob/master/cookbook/program_aided_language_model.ipynb)

 **Abstract:** Large language models (LLMs) have recently demonstrated an impressive ability
@@ -684,16 +786,32 @@ accuracy on the GSM8K benchmark of math word problems, surpassing PaLM-540B
 which uses chain-of-thought by absolute 15% top-1. Our code and data are
 publicly available at http://reasonwithpal.com/ .
                
-## ReAct: Synergizing Reasoning and Acting in Language Models
+## An Analysis of Fusion Functions for Hybrid Retrieval

- **arXiv id:** 2210.03629v3
- **Title:** ReAct: Synergizing Reasoning and Acting in Language Models
- **Authors:** Shunyu Yao, Jeffrey Zhao, Dian Yu,  et al.
- **Published Date:** 2022-10-06
- **URL:** http://arxiv.org/abs/2210.03629v3
+- **Authors:** Sebastian Bruch, Siyu Gai, Amir Ingber
+- **arXiv id:** [2210.11934v2](http://arxiv.org/abs/2210.11934v2)  **Published Date:** 2022-10-21
 - **LangChain:**

-   - **Documentation:** [docs/integrations/providers/cohere](https://python.langchain.com/docs/integrations/providers/cohere), [docs/integrations/chat/huggingface](https://python.langchain.com/docs/integrations/chat/huggingface), [docs/integrations/tools/ionic_shopping](https://python.langchain.com/docs/integrations/tools/ionic_shopping)
+   - **Documentation:** [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
+
+**Abstract:** We study hybrid search in text retrieval where lexical and semantic search
+are fused together with the intuition that the two are complementary in how
+they model relevance. In particular, we examine fusion by a convex combination
+(CC) of lexical and semantic scores, as well as the Reciprocal Rank Fusion
+(RRF) method, and identify their advantages and potential pitfalls. Contrary to
+existing studies, we find RRF to be sensitive to its parameters; that the
+learning of a CC fusion is generally agnostic to the choice of score
+normalization; that CC outperforms RRF in in-domain and out-of-domain settings;
+and finally, that CC is sample efficient, requiring only a small set of
+training examples to tune its only parameter to a target domain.
+                
+## ReAct: Synergizing Reasoning and Acting in Language Models
+
+- **Authors:** Shunyu Yao, Jeffrey Zhao, Dian Yu,  et al.
+- **arXiv id:** [2210.03629v3](http://arxiv.org/abs/2210.03629v3)  **Published Date:** 2022-10-06
+- **LangChain:**
+
+   - **Documentation:** [docs/integrations/tools/ionic_shopping](https://python.langchain.com/v0.2/docs/integrations/tools/ionic_shopping), [docs/integrations/providers/cohere](https://python.langchain.com/v0.2/docs/integrations/providers/cohere), [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
   - **API Reference:** [langchain...create_react_agent](https://api.python.langchain.com/en/latest/agents/langchain.agents.react.agent.create_react_agent.html#langchain.agents.react.agent.create_react_agent), [langchain...TrajectoryEvalChain](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain.html#langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain)

 **Abstract:** While large language models (LLMs) have demonstrated impressive capabilities
@@ -721,14 +839,11 @@ Project site with code: https://react-lm.github.io
                
 ## Deep Lake: a Lakehouse for Deep Learning

- **arXiv id:** 2209.10785v2
- **Title:** Deep Lake: a Lakehouse for Deep Learning
 - **Authors:** Sasun Hambardzumyan, Abhinav Tuli, Levon Ghukasyan,  et al.
- **Published Date:** 2022-09-22
- **URL:** http://arxiv.org/abs/2209.10785v2
+- **arXiv id:** [2209.10785v2](http://arxiv.org/abs/2209.10785v2)  **Published Date:** 2022-09-22
 - **LangChain:**

-   - **Documentation:** [docs/integrations/providers/activeloop_deeplake](https://python.langchain.com/docs/integrations/providers/activeloop_deeplake)
+   - **Documentation:** [docs/integrations/providers/activeloop_deeplake](https://python.langchain.com/v0.2/docs/integrations/providers/activeloop_deeplake)

 **Abstract:** Traditional data lakes provide critical data infrastructure for analytical
 workloads by enabling time travel, running SQL queries, ingesting data with
@@ -747,13 +862,41 @@ visualization engine, or (c) deep learning frameworks without sacrificing GPU
 utilization. Datasets stored in Deep Lake can be accessed from PyTorch,
 TensorFlow, JAX, and integrate with numerous MLOps tools.
                
+## Matryoshka Representation Learning
+
+- **Authors:** Aditya Kusupati, Gantavya Bhatt, Aniket Rege,  et al.
+- **arXiv id:** [2205.13147v4](http://arxiv.org/abs/2205.13147v4)  **Published Date:** 2022-05-26
+- **LangChain:**
+
+   - **Documentation:** [docs/integrations/providers/snowflake](https://python.langchain.com/v0.2/docs/integrations/providers/snowflake)
+
+**Abstract:** Learned representations are a central component in modern ML systems, serving
+a multitude of downstream tasks. When training such representations, it is
+often the case that computational and statistical constraints for each
+downstream task are unknown. In this context rigid, fixed capacity
+representations can be either over or under-accommodating to the task at hand.
+This leads us to ask: can we design a flexible representation that can adapt to
+multiple downstream tasks with varying computational resources? Our main
+contribution is Matryoshka Representation Learning (MRL) which encodes
+information at different granularities and allows a single embedding to adapt
+to the computational constraints of downstream tasks. MRL minimally modifies
+existing representation learning pipelines and imposes no additional cost
+during inference and deployment. MRL learns coarse-to-fine representations that
+are at least as accurate and rich as independently trained low-dimensional
+representations. The flexibility within the learned Matryoshka Representations
+offer: (a) up to 14x smaller embedding size for ImageNet-1K classification at
+the same level of accuracy; (b) up to 14x real-world speed-ups for large-scale
+retrieval on ImageNet-1K and 4K; and (c) up to 2% accuracy improvements for
+long-tail few-shot classification, all while being as robust as the original
+representations. Finally, we show that MRL extends seamlessly to web-scale
+datasets (ImageNet, JFT) across various modalities -- vision (ViT, ResNet),
+vision + language (ALIGN) and language (BERT). MRL code and pretrained models
+are open-sourced at https://github.com/RAIVNLab/MRL.
+                
 ## Bitext Mining Using Distilled Sentence Representations for Low-Resource Languages

- **arXiv id:** 2205.12654v1
- **Title:** Bitext Mining Using Distilled Sentence Representations for Low-Resource Languages
 - **Authors:** Kevin Heffernan, Onur Çelebi, Holger Schwenk
- **Published Date:** 2022-05-25
- **URL:** http://arxiv.org/abs/2205.12654v1
+- **arXiv id:** [2205.12654v1](http://arxiv.org/abs/2205.12654v1)  **Published Date:** 2022-05-25
 - **LangChain:**

   - **API Reference:** [langchain_community...LaserEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.laser.LaserEmbeddings.html#langchain_community.embeddings.laser.LaserEmbeddings)
@@ -778,14 +921,12 @@ encoders, mine bitexts, and validate the bitexts by training NMT systems.
                
 ## Evaluating the Text-to-SQL Capabilities of Large Language Models

- **arXiv id:** 2204.00498v1
- **Title:** Evaluating the Text-to-SQL Capabilities of Large Language Models
 - **Authors:** Nitarshan Rajkumar, Raymond Li, Dzmitry Bahdanau
- **Published Date:** 2022-03-15
- **URL:** http://arxiv.org/abs/2204.00498v1
+- **arXiv id:** [2204.00498v1](http://arxiv.org/abs/2204.00498v1)  **Published Date:** 2022-03-15
 - **LangChain:**

-   - **API Reference:** [langchain_community...SparkSQL](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.spark_sql.SparkSQL.html#langchain_community.utilities.spark_sql.SparkSQL), [langchain_community...SQLDatabase](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.sql_database.SQLDatabase.html#langchain_community.utilities.sql_database.SQLDatabase)
+   - **Documentation:** [docs/tutorials/sql_qa](https://python.langchain.com/v0.2/docs/tutorials/sql_qa)
+   - **API Reference:** [langchain_community...SQLDatabase](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.sql_database.SQLDatabase.html#langchain_community.utilities.sql_database.SQLDatabase), [langchain_community...SparkSQL](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.spark_sql.SparkSQL.html#langchain_community.utilities.spark_sql.SparkSQL)

 **Abstract:** We perform an empirical evaluation of Text-to-SQL capabilities of the Codex
 language model. We find that, without any finetuning, Codex is a strong
@@ -797,14 +938,11 @@ few-shot examples.
                
 ## Locally Typical Sampling

- **arXiv id:** 2202.00666v5
- **Title:** Locally Typical Sampling
 - **Authors:** Clara Meister, Tiago Pimentel, Gian Wiher,  et al.
- **Published Date:** 2022-02-01
- **URL:** http://arxiv.org/abs/2202.00666v5
+- **arXiv id:** [2202.00666v5](http://arxiv.org/abs/2202.00666v5)  **Published Date:** 2022-02-01
 - **LangChain:**

-   - **API Reference:** [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
+   - **API Reference:** [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference), [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint)

 **Abstract:** Today's probabilistic language generators fall short when it comes to
 producing coherent and fluent text despite the fact that the underlying models
@@ -827,13 +965,32 @@ locally typical sampling offers competitive performance (in both abstractive
 summarization and story generation) in terms of quality while consistently
 reducing degenerate repetitions.
                
+## ColBERTv2: Effective and Efficient Retrieval via Lightweight Late Interaction
+
+- **Authors:** Keshav Santhanam, Omar Khattab, Jon Saad-Falcon,  et al.
+- **arXiv id:** [2112.01488v3](http://arxiv.org/abs/2112.01488v3)  **Published Date:** 2021-12-02
+- **LangChain:**
+
+   - **Documentation:** [docs/integrations/retrievers/ragatouille](https://python.langchain.com/v0.2/docs/integrations/retrievers/ragatouille), [docs/integrations/providers/ragatouille](https://python.langchain.com/v0.2/docs/integrations/providers/ragatouille), [docs/concepts](https://python.langchain.com/v0.2/docs/concepts), [docs/integrations/providers/dspy](https://python.langchain.com/v0.2/docs/integrations/providers/dspy)
+
+**Abstract:** Neural information retrieval (IR) has greatly advanced search and other
+knowledge-intensive language tasks. While many neural IR methods encode queries
+and documents into single-vector representations, late interaction models
+produce multi-vector representations at the granularity of each token and
+decompose relevance modeling into scalable token-level computations. This
+decomposition has been shown to make late interaction more effective, but it
+inflates the space footprint of these models by an order of magnitude. In this
+work, we introduce ColBERTv2, a retriever that couples an aggressive residual
+compression mechanism with a denoised supervision strategy to simultaneously
+improve the quality and space footprint of late interaction. We evaluate
+ColBERTv2 across a wide range of benchmarks, establishing state-of-the-art
+quality within and outside the training domain while reducing the space
+footprint of late interaction models by 6--10$\times$.
+                
 ## Learning Transferable Visual Models From Natural Language Supervision

- **arXiv id:** 2103.00020v1
- **Title:** Learning Transferable Visual Models From Natural Language Supervision
 - **Authors:** Alec Radford, Jong Wook Kim, Chris Hallacy,  et al.
- **Published Date:** 2021-02-26
- **URL:** http://arxiv.org/abs/2103.00020v1
+- **arXiv id:** [2103.00020v1](http://arxiv.org/abs/2103.00020v1)  **Published Date:** 2021-02-26
 - **LangChain:**

   - **API Reference:** [langchain_experimental.open_clip](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.open_clip)
@@ -859,16 +1016,77 @@ zero-shot without needing to use any of the 1.28 million training examples it
 was trained on. We release our code and pre-trained model weights at
 https://github.com/OpenAI/CLIP.
                
-## CTRL: A Conditional Transformer Language Model for Controllable Generation
+## Language Models are Few-Shot Learners

- **arXiv id:** 1909.05858v2
- **Title:** CTRL: A Conditional Transformer Language Model for Controllable Generation
- **Authors:** Nitish Shirish Keskar, Bryan McCann, Lav R. Varshney,  et al.
- **Published Date:** 2019-09-11
- **URL:** http://arxiv.org/abs/1909.05858v2
+- **Authors:** Tom B. Brown, Benjamin Mann, Nick Ryder,  et al.
+- **arXiv id:** [2005.14165v4](http://arxiv.org/abs/2005.14165v4)  **Published Date:** 2020-05-28
 - **LangChain:**

-   - **API Reference:** [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
+   - **Documentation:** [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
+
+**Abstract:** Recent work has demonstrated substantial gains on many NLP tasks and
+benchmarks by pre-training on a large corpus of text followed by fine-tuning on
+a specific task. While typically task-agnostic in architecture, this method
+still requires task-specific fine-tuning datasets of thousands or tens of
+thousands of examples. By contrast, humans can generally perform a new language
+task from only a few examples or from simple instructions - something which
+current NLP systems still largely struggle to do. Here we show that scaling up
+language models greatly improves task-agnostic, few-shot performance, sometimes
+even reaching competitiveness with prior state-of-the-art fine-tuning
+approaches. Specifically, we train GPT-3, an autoregressive language model with
+175 billion parameters, 10x more than any previous non-sparse language model,
+and test its performance in the few-shot setting. For all tasks, GPT-3 is
+applied without any gradient updates or fine-tuning, with tasks and few-shot
+demonstrations specified purely via text interaction with the model. GPT-3
+achieves strong performance on many NLP datasets, including translation,
+question-answering, and cloze tasks, as well as several tasks that require
+on-the-fly reasoning or domain adaptation, such as unscrambling words, using a
+novel word in a sentence, or performing 3-digit arithmetic. At the same time,
+we also identify some datasets where GPT-3's few-shot learning still struggles,
+as well as some datasets where GPT-3 faces methodological issues related to
+training on large web corpora. Finally, we find that GPT-3 can generate samples
+of news articles which human evaluators have difficulty distinguishing from
+articles written by humans. We discuss broader societal impacts of this finding
+and of GPT-3 in general.
+                
+## Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks
+
+- **Authors:** Patrick Lewis, Ethan Perez, Aleksandra Piktus,  et al.
+- **arXiv id:** [2005.11401v4](http://arxiv.org/abs/2005.11401v4)  **Published Date:** 2020-05-22
+- **LangChain:**
+
+   - **Documentation:** [docs/concepts](https://python.langchain.com/v0.2/docs/concepts)
+
+**Abstract:** Large pre-trained language models have been shown to store factual knowledge
+in their parameters, and achieve state-of-the-art results when fine-tuned on
+downstream NLP tasks. However, their ability to access and precisely manipulate
+knowledge is still limited, and hence on knowledge-intensive tasks, their
+performance lags behind task-specific architectures. Additionally, providing
+provenance for their decisions and updating their world knowledge remain open
+research problems. Pre-trained models with a differentiable access mechanism to
+explicit non-parametric memory can overcome this issue, but have so far been
+only investigated for extractive downstream tasks. We explore a general-purpose
+fine-tuning recipe for retrieval-augmented generation (RAG) -- models which
+combine pre-trained parametric and non-parametric memory for language
+generation. We introduce RAG models where the parametric memory is a
+pre-trained seq2seq model and the non-parametric memory is a dense vector index
+of Wikipedia, accessed with a pre-trained neural retriever. We compare two RAG
+formulations, one which conditions on the same retrieved passages across the
+whole generated sequence, the other can use different passages per token. We
+fine-tune and evaluate our models on a wide range of knowledge-intensive NLP
+tasks and set the state-of-the-art on three open domain QA tasks, outperforming
+parametric seq2seq models and task-specific retrieve-and-extract architectures.
+For language generation tasks, we find that RAG models generate more specific,
+diverse and factual language than a state-of-the-art parametric-only seq2seq
+baseline.
+                
+## CTRL: A Conditional Transformer Language Model for Controllable Generation
+
+- **Authors:** Nitish Shirish Keskar, Bryan McCann, Lav R. Varshney,  et al.
+- **arXiv id:** [1909.05858v2](http://arxiv.org/abs/1909.05858v2)  **Published Date:** 2019-09-11
+- **LangChain:**
+
+   - **API Reference:** [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference), [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint)

 **Abstract:** Large-scale language models show promising text generation capabilities, but
 users cannot easily control particular aspects of the generated text. We
@@ -881,32 +1099,4 @@ codes also allow CTRL to predict which parts of the training data are most
 likely given a sequence. This provides a potential method for analyzing large
 amounts of data via model-based source attribution. We have released multiple
 full-sized, pretrained versions of CTRL at https://github.com/salesforce/ctrl.
-                
-## Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks
-
- **arXiv id:** 1908.10084v1
- **Title:** Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks
- **Authors:** Nils Reimers, Iryna Gurevych
- **Published Date:** 2019-08-27
- **URL:** http://arxiv.org/abs/1908.10084v1
- **LangChain:**
-
-   - **Documentation:** [docs/integrations/text_embedding/sentence_transformers](https://python.langchain.com/docs/integrations/text_embedding/sentence_transformers)
-
-**Abstract:** BERT (Devlin et al., 2018) and RoBERTa (Liu et al., 2019) has set a new
-state-of-the-art performance on sentence-pair regression tasks like semantic
-textual similarity (STS). However, it requires that both sentences are fed into
-the network, which causes a massive computational overhead: Finding the most
-similar pair in a collection of 10,000 sentences requires about 50 million
-inference computations (~65 hours) with BERT. The construction of BERT makes it
-unsuitable for semantic similarity search as well as for unsupervised tasks
-like clustering.
-  In this publication, we present Sentence-BERT (SBERT), a modification of the
-pretrained BERT network that use siamese and triplet network structures to
-derive semantically meaningful sentence embeddings that can be compared using
-cosine-similarity. This reduces the effort for finding the most similar pair
-from 65 hours with BERT / RoBERTa to about 5 seconds with SBERT, while
-maintaining the accuracy from BERT.
-  We evaluate SBERT and SRoBERTa on common STS tasks and transfer learning
-tasks, where it outperforms other state-of-the-art sentence embeddings methods.
                
--- a/docs/docs/concepts.mdx
+++ b/docs/docs/concepts.mdx
@@ -15,11 +15,6 @@ The interfaces for core components like LLMs, vector stores, retrievers and more
 No third party integrations are defined here.
 The dependencies are kept purposefully very lightweight.

-### Partner packages
-
-While the long tail of integrations are in `langchain-community`, we split popular integrations into their own packages (e.g. `langchain-openai`, `langchain-anthropic`, etc).
-This was done in order to improve support for these important integrations.
-
 ### `langchain`

 The main `langchain` package contains chains, agents, and retrieval strategies that make up an application's cognitive architecture.
@@ -33,6 +28,11 @@ Key partner packages are separated out (see below).
 This contains all integrations for various components (LLMs, vector stores, retrievers).
 All dependencies in this package are optional to keep the package as lightweight as possible.

+### Partner packages
+
+While the long tail of integrations is in `langchain-community`, we split popular integrations into their own packages (e.g. `langchain-openai`, `langchain-anthropic`, etc).
+This was done in order to improve support for these important integrations.
+
 ### [`langgraph`](https://langchain-ai.github.io/langgraph)

 `langgraph` is an extension of `langchain` aimed at
@@ -61,28 +61,28 @@ A developer platform that lets you debug, test, evaluate, and monitor LLM applic
 ## LangChain Expression Language (LCEL)
 <span data-heading-keywords="lcel"></span>

-LangChain Expression Language, or LCEL, is a declarative way to chain LangChain components.
+`LangChain Expression Language`, or `LCEL`, is a declarative way to chain LangChain components.
 LCEL was designed from day 1 to **support putting prototypes in production, with no code changes**, from the simplest “prompt + LLM” chain to the most complex chains (we’ve seen folks successfully run LCEL chains with 100s of steps in production). To highlight a few of the reasons you might want to use LCEL:

-**First-class streaming support**
+- **First-class streaming support:**
 When you build your chains with LCEL you get the best possible time-to-first-token (time elapsed until the first chunk of output comes out). For some chains this means eg. we stream tokens straight from an LLM to a streaming output parser, and you get back parsed, incremental chunks of output at the same rate as the LLM provider outputs the raw tokens.

-**Async support**
+- **Async support:**
 Any chain built with LCEL can be called both with the synchronous API (eg. in your Jupyter notebook while prototyping) as well as with the asynchronous API (eg. in a [LangServe](/docs/langserve/) server). This enables using the same code for prototypes and in production, with great performance, and the ability to handle many concurrent requests in the same server.

-**Optimized parallel execution**
+- **Optimized parallel execution:**
 Whenever your LCEL chains have steps that can be executed in parallel (eg if you fetch documents from multiple retrievers) we automatically do it, both in the sync and the async interfaces, for the smallest possible latency.

-**Retries and fallbacks**
+- **Retries and fallbacks:**
 Configure retries and fallbacks for any part of your LCEL chain. This is a great way to make your chains more reliable at scale. We’re currently working on adding streaming support for retries/fallbacks, so you can get the added reliability without any latency cost.

-**Access intermediate results**
+- **Access intermediate results:**
 For more complex chains it’s often very useful to access the results of intermediate steps even before the final output is produced. This can be used to let end-users know something is happening, or even just to debug your chain. You can stream intermediate results, and it’s available on every [LangServe](/docs/langserve) server.

-**Input and output schemas**
+- **Input and output schemas**
 Input and output schemas give every LCEL chain Pydantic and JSONSchema schemas inferred from the structure of your chain. This can be used for validation of inputs and outputs, and is an integral part of LangServe.

-[**Seamless LangSmith tracing**](https://docs.smith.langchain.com)
+- [**Seamless LangSmith tracing**](https://docs.smith.langchain.com)
 As your chains get more and more complex, it becomes increasingly important to understand what exactly is happening at every step.
 With LCEL, **all** steps are automatically logged to [LangSmith](https://docs.smith.langchain.com/) for maximum observability and debuggability.

@@ -97,7 +97,7 @@ For guides on how to do specific tasks with LCEL, check out [the relevant how-to
 ### Runnable interface
 <span data-heading-keywords="invoke,runnable"></span>

-To make it as easy as possible to create custom chains, we've implemented a ["Runnable"](https://api.python.langchain.com/en/stable/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable) protocol. Many LangChain components implement the `Runnable` protocol, including chat models, LLMs, output parsers, retrievers, prompt templates, and more. There are also several useful primitives for working with runnables, which you can read about below.
+To make it as easy as possible to create custom chains, we've implemented a ["Runnable"](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable) protocol. Many LangChain components implement the `Runnable` protocol, including chat models, LLMs, output parsers, retrievers, prompt templates, and more. There are also several useful primitives for working with runnables, which you can read about below.

 This is a standard interface, which makes it easy to define custom chains as well as invoke them in a standard way.
 The standard interface includes:
@@ -186,7 +186,7 @@ For a full list of LangChain model providers with multimodal models, [check out
 <span data-heading-keywords="llm,llms"></span>

 :::caution
-Pure text-in/text-out LLMs tend to be older or lower-level. Many popular models are best used as [chat completion models](/docs/concepts/#chat-models),
+Pure text-in/text-out LLMs tend to be older or lower-level. Many new popular models are best used as [chat completion models](/docs/concepts/#chat-models),
 even for non-chat use cases.

 You are probably looking for [the section above instead](/docs/concepts/#chat-models).
@@ -201,7 +201,7 @@ When messages are passed in as input, they will be formatted into a string under

 LangChain does not host any LLMs, rather we rely on third party integrations.

-For specifics on how to use LLMs, see the [relevant how-to guides here](/docs/how_to/#llms).
+For specifics on how to use LLMs, see the [how-to guides](/docs/how_to/#llms).

 ### Messages

@@ -209,22 +209,25 @@ Some language models take a list of messages as input and return a message.
 There are a few different types of messages.
 All messages have a `role`, `content`, and `response_metadata` property.

-The `role` describes WHO is saying the message.
+The `role` describes WHO is saying the message. The standard roles are "user", "assistant", "system", and "tool".
 LangChain has different message classes for different roles.

 The `content` property describes the content of the message.
 This can be a few different things:

- A string (most models deal this type of content)
+- A string (most models deal with this type of content)
 - A List of dictionaries (this is used for multimodal input, where the dictionary contains information about that input type and that input location)

+Optionally, messages can have a `name` property which allows for differentiating between multiple speakers with the same role.
+For example, if there are two users in the chat history it can be useful to differentiate between them. Not all models support this.
+
 #### HumanMessage

-This represents a message from the user.
+This represents a message with role "user".

 #### AIMessage

-This represents a message from the model. In addition to the `content` property, these messages also have:
+This represents a message with role "assistant". In addition to the `content` property, these messages also have:

 **`response_metadata`**

@@ -244,11 +247,11 @@ This property returns a list of `ToolCall`s. A `ToolCall` is a dictionary with t

 #### SystemMessage

-This represents a system message, which tells the model how to behave. Not every model provider supports this.
+This represents a message with role "system", which tells the model how to behave. Not every model provider supports this.

 #### ToolMessage

-This represents the result of a tool call. In addition to `role` and `content`, this message has:
+This represents a message with role "tool", which contains the result of calling a tool. In addition to `role` and `content`, this message has:

 - a `tool_call_id` field which conveys the id of the call to the tool that was called to produce this result.
 - an `artifact` field which can be used to pass along arbitrary artifacts of the tool execution which are useful to track but which should not be sent to the model.
@@ -343,6 +346,7 @@ For specifics on how to use prompt templates, see the [relevant how-to guides he

 ### Example selectors
 One common prompting technique for achieving better performance is to include examples as part of the prompt.
+This is known as [few-shot prompting](/docs/concepts/#few-shot-prompting).
 This gives the language model concrete examples of how it should behave.
 Sometimes these examples are hardcoded into the prompt, but for more advanced situations it may be nice to dynamically select them.
 Example Selectors are classes responsible for selecting and then formatting examples into prompts.
@@ -361,38 +365,32 @@ See documentation for that [here](/docs/concepts/#function-tool-calling).

 :::

-Responsible for taking the output of a model and transforming it to a more suitable format for downstream tasks.
+`Output parser` is responsible for taking the output of a model and transforming it to a more suitable format for downstream tasks.
 Useful when you are using LLMs to generate structured data, or to normalize output from chat models and LLMs.

 LangChain has lots of different types of output parsers. This is a list of output parsers LangChain supports. The table below has various pieces of information:

-**Name**: The name of the output parser
-
-**Supports Streaming**: Whether the output parser supports streaming.
-
-**Has Format Instructions**: Whether the output parser has format instructions. This is generally available except when (a) the desired schema is not specified in the prompt but rather in other parameters (like OpenAI function calling), or (b) when the OutputParser wraps another OutputParser.
-
-**Calls LLM**: Whether this output parser itself calls an LLM. This is usually only done by output parsers that attempt to correct misformatted output.
-
-**Input Type**: Expected input type. Most output parsers work on both strings and messages, but some (like OpenAI Functions) need a message with specific kwargs.
-
-**Output Type**: The output type of the object returned by the parser.
-
-**Description**: Our commentary on this output parser and when to use it.
+- **Name**: The name of the output parser
+- **Supports Streaming**: Whether the output parser supports streaming.
+- **Has Format Instructions**: Whether the output parser has format instructions. This is generally available except when (a) the desired schema is not specified in the prompt but rather in other parameters (like OpenAI function calling), or (b) when the OutputParser wraps another OutputParser.
+- **Calls LLM**: Whether this output parser itself calls an LLM. This is usually only done by output parsers that attempt to correct misformatted output.
+- **Input Type**: Expected input type. Most output parsers work on both strings and messages, but some (like OpenAI Functions) need a message with specific kwargs.
+- **Output Type**: The output type of the object returned by the parser.
+- **Description**: Our commentary on this output parser and when to use it.

 | Name            | Supports Streaming | Has Format Instructions       | Calls LLM | Input Type                       | Output Type          | Description                                                                                                                                                                                                                                              |
 |-----------------|--------------------|-------------------------------|-----------|----------------------------------|----------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| [JSON](https://api.python.langchain.com/en/latest/output_parsers/langchain_core.output_parsers.json.JsonOutputParser.html#langchain_core.output_parsers.json.JsonOutputParser)            | ✅                  | ✅                             |           | `str` \| `Message`               | JSON object          | Returns a JSON object as specified. You can specify a Pydantic model and it will return JSON for that model. Probably the most reliable output parser for getting structured data that does NOT use function calling.                                    |
-| [XML](https://api.python.langchain.com/en/latest/output_parsers/langchain_core.output_parsers.xml.XMLOutputParser.html#langchain_core.output_parsers.xml.XMLOutputParser)            | ✅                  | ✅                             |           | `str` \| `Message`                 | `dict`               | Returns a dictionary of tags. Use when XML output is needed. Use with models that are good at writing XML (like Anthropic's).                                                                                                                            |
-| [CSV](https://api.python.langchain.com/en/latest/output_parsers/langchain_core.output_parsers.list.CommaSeparatedListOutputParser.html#langchain_core.output_parsers.list.CommaSeparatedListOutputParser)           | ✅                  | ✅                             |           | `str` \| `Message`                 | `List[str]`          | Returns a list of comma separated values.                                                                                                                                                                                                                |
-| [OutputFixing](https://api.python.langchain.com/en/latest/output_parsers/langchain.output_parsers.fix.OutputFixingParser.html#langchain.output_parsers.fix.OutputFixingParser)    |                    |                               | ✅         | `str` \| `Message`                 |                      | Wraps another output parser. If that output parser errors, then this will pass the error message and the bad output to an LLM and ask it to fix the output.                                                                                              |
-| [RetryWithError](https://api.python.langchain.com/en/latest/output_parsers/langchain.output_parsers.retry.RetryWithErrorOutputParser.html#langchain.output_parsers.retry.RetryWithErrorOutputParser)  |                    |                               | ✅         | `str` \| `Message`                 |                      | Wraps another output parser. If that output parser errors, then this will pass the original inputs, the bad output, and the error message to an LLM and ask it to fix it. Compared to OutputFixingParser, this one also sends the original instructions. |
-| [Pydantic](https://api.python.langchain.com/en/latest/output_parsers/langchain_core.output_parsers.pydantic.PydanticOutputParser.html#langchain_core.output_parsers.pydantic.PydanticOutputParser)        |                    | ✅                             |           | `str` \| `Message`                 | `pydantic.BaseModel` | Takes a user defined Pydantic model and returns data in that format.                                                                                                                                                                                     |
-| [YAML](https://api.python.langchain.com/en/latest/output_parsers/langchain.output_parsers.yaml.YamlOutputParser.html#langchain.output_parsers.yaml.YamlOutputParser)        |                    | ✅                             |           | `str` \| `Message`                 | `pydantic.BaseModel` | Takes a user defined Pydantic model and returns data in that format. Uses YAML to encode it.                                                                                                                                                                                    |
-| [PandasDataFrame](https://api.python.langchain.com/en/latest/output_parsers/langchain.output_parsers.pandas_dataframe.PandasDataFrameOutputParser.html#langchain.output_parsers.pandas_dataframe.PandasDataFrameOutputParser) |                    | ✅                             |           | `str` \| `Message`                 | `dict`               | Useful for doing operations with pandas DataFrames.                                                                                                                                                                                                      |
-| [Enum](https://api.python.langchain.com/en/latest/output_parsers/langchain.output_parsers.enum.EnumOutputParser.html#langchain.output_parsers.enum.EnumOutputParser)            |                    | ✅                             |           | `str` \| `Message`                 | `Enum`               | Parses response into one of the provided enum values.                                                                                                                                                                                                    |
-| [Datetime](https://api.python.langchain.com/en/latest/output_parsers/langchain.output_parsers.datetime.DatetimeOutputParser.html#langchain.output_parsers.datetime.DatetimeOutputParser)        |                    | ✅                             |           | `str` \| `Message`                 | `datetime.datetime`  | Parses response into a datetime string.                                                                                                                                                                                                                  |
-| [Structured](https://api.python.langchain.com/en/latest/output_parsers/langchain.output_parsers.structured.StructuredOutputParser.html#langchain.output_parsers.structured.StructuredOutputParser)      |                    | ✅                             |           | `str` \| `Message`                 | `Dict[str, str]`     | An output parser that returns structured information. It is less powerful than other output parsers since it only allows for fields to be strings. This can be useful when you are working with smaller LLMs.                                            |
+| [JSON](https://python.langchain.com/v0.2/api_reference/core/output_parsers/langchain_core.output_parsers.json.JsonOutputParser.html#langchain_core.output_parsers.json.JsonOutputParser)            | ✅                  | ✅                             |           | `str` \| `Message`               | JSON object          | Returns a JSON object as specified. You can specify a Pydantic model and it will return JSON for that model. Probably the most reliable output parser for getting structured data that does NOT use function calling.                                    |
+| [XML](https://python.langchain.com/v0.2/api_reference/core/output_parsers/langchain_core.output_parsers.xml.XMLOutputParser.html#langchain_core.output_parsers.xml.XMLOutputParser)            | ✅                  | ✅                             |           | `str` \| `Message`                 | `dict`               | Returns a dictionary of tags. Use when XML output is needed. Use with models that are good at writing XML (like Anthropic's).                                                                                                                            |
+| [CSV](https://python.langchain.com/v0.2/api_reference/core/output_parsers/langchain_core.output_parsers.list.CommaSeparatedListOutputParser.html#langchain_core.output_parsers.list.CommaSeparatedListOutputParser)           | ✅                  | ✅                             |           | `str` \| `Message`                 | `List[str]`          | Returns a list of comma separated values.                                                                                                                                                                                                                |
+| [OutputFixing](https://python.langchain.com/v0.2/api_reference/langchain/output_parsers/langchain.output_parsers.fix.OutputFixingParser.html#langchain.output_parsers.fix.OutputFixingParser)    |                    |                               | ✅         | `str` \| `Message`                 |                      | Wraps another output parser. If that output parser errors, then this will pass the error message and the bad output to an LLM and ask it to fix the output.                                                                                              |
+| [RetryWithError](https://python.langchain.com/v0.2/api_reference/langchain/output_parsers/langchain.output_parsers.retry.RetryWithErrorOutputParser.html#langchain.output_parsers.retry.RetryWithErrorOutputParser)  |                    |                               | ✅         | `str` \| `Message`                 |                      | Wraps another output parser. If that output parser errors, then this will pass the original inputs, the bad output, and the error message to an LLM and ask it to fix it. Compared to OutputFixingParser, this one also sends the original instructions. |
+| [Pydantic](https://python.langchain.com/v0.2/api_reference/core/output_parsers/langchain_core.output_parsers.pydantic.PydanticOutputParser.html#langchain_core.output_parsers.pydantic.PydanticOutputParser)        |                    | ✅                             |           | `str` \| `Message`                 | `pydantic.BaseModel` | Takes a user defined Pydantic model and returns data in that format.                                                                                                                                                                                     |
+| [YAML](https://python.langchain.com/v0.2/api_reference/langchain/output_parsers/langchain.output_parsers.yaml.YamlOutputParser.html#langchain.output_parsers.yaml.YamlOutputParser)        |                    | ✅                             |           | `str` \| `Message`                 | `pydantic.BaseModel` | Takes a user defined Pydantic model and returns data in that format. Uses YAML to encode it.                                                                                                                                                                                    |
+| [PandasDataFrame](https://python.langchain.com/v0.2/api_reference/langchain/output_parsers/langchain.output_parsers.pandas_dataframe.PandasDataFrameOutputParser.html#langchain.output_parsers.pandas_dataframe.PandasDataFrameOutputParser) |                    | ✅                             |           | `str` \| `Message`                 | `dict`               | Useful for doing operations with pandas DataFrames.                                                                                                                                                                                                      |
+| [Enum](https://python.langchain.com/v0.2/api_reference/langchain/output_parsers/langchain.output_parsers.enum.EnumOutputParser.html#langchain.output_parsers.enum.EnumOutputParser)            |                    | ✅                             |           | `str` \| `Message`                 | `Enum`               | Parses response into one of the provided enum values.                                                                                                                                                                                                    |
+| [Datetime](https://python.langchain.com/v0.2/api_reference/langchain/output_parsers/langchain.output_parsers.datetime.DatetimeOutputParser.html#langchain.output_parsers.datetime.DatetimeOutputParser)        |                    | ✅                             |           | `str` \| `Message`                 | `datetime.datetime`  | Parses response into a datetime string.                                                                                                                                                                                                                  |
+| [Structured](https://python.langchain.com/v0.2/api_reference/langchain/output_parsers/langchain.output_parsers.structured.StructuredOutputParser.html#langchain.output_parsers.structured.StructuredOutputParser)      |                    | ✅                             |           | `str` \| `Message`                 | `Dict[str, str]`     | An output parser that returns structured information. It is less powerful than other output parsers since it only allows for fields to be strings. This can be useful when you are working with smaller LLMs.                                            |

 For specifics on how to use output parsers, see the [relevant how-to guides here](/docs/how_to/#output-parsers).

@@ -503,7 +501,7 @@ For specifics on how to use retrievers, see the [relevant how-to guides here](/d
 For some techniques, such as [indexing and retrieval with multiple vectors per document](/docs/how_to/multi_vector/) or
 [caching embeddings](/docs/how_to/caching_embeddings/), having a form of key-value (KV) storage is helpful.

-LangChain includes a [`BaseStore`](https://api.python.langchain.com/en/latest/stores/langchain_core.stores.BaseStore.html) interface,
+LangChain includes a [`BaseStore`](https://python.langchain.com/v0.2/api_reference/core/stores/langchain_core.stores.BaseStore.html) interface,
 which allows for storage of arbitrary data. However, LangChain components that require KV-storage accept a
 more specific `BaseStore[str, bytes]` instance that stores binary data (referred to as a `ByteStore`), and internally take care of
 encoding and decoding data for their specific needs.
@@ -512,7 +510,7 @@ This means that as a user, you only need to think about one type of store rather

 #### Interface

-All [`BaseStores`](https://api.python.langchain.com/en/latest/stores/langchain_core.stores.BaseStore.html) support the following interface. Note that the interface allows
+All [`BaseStores`](https://python.langchain.com/v0.2/api_reference/core/stores/langchain_core.stores.BaseStore.html) support the following interface. Note that the interface allows
 for modifying **multiple** key-value pairs at once:

 - `mget(key: Sequence[str]) -> List[Optional[bytes]]`: get the contents of multiple keys, returning `None` if the key does not exist
@@ -530,10 +528,10 @@ Tools are needed whenever you want a model to control parts of your code or call

 A tool consists of:

-1. The name of the tool.
-2. A description of what the tool does.
-3. A JSON schema defining the inputs to the tool.
-4. A function (and, optionally, an async variant of the function).
+1. The `name` of the tool.
+2. A `description` of what the tool does.
+3. A `JSON schema` defining the inputs to the tool.
+4. A `function` (and, optionally, an async variant of the function).

 When a tool is bound to a model, the name, description and JSON schema are provided as context to the model.
 Given a list of tools and a set of instructions, a model can request to call one or more tools with specific inputs.
@@ -646,14 +644,14 @@ The results of those actions can then be fed back into the agent and it determin
 [LangGraph](https://github.com/langchain-ai/langgraph) is an extension of LangChain specifically aimed at creating highly controllable and customizable agents.
 Please check out that documentation for a more in depth overview of agent concepts.

-There is a legacy agent concept in LangChain that we are moving towards deprecating: `AgentExecutor`.
+There is a legacy `agent` concept in LangChain that we are moving towards deprecating: `AgentExecutor`.
 AgentExecutor was essentially a runtime for agents.
 It was a great place to get started, however, it was not flexible enough as you started to have more customized agents.
 In order to solve that we built LangGraph to be this flexible, highly-controllable runtime.

 If you are still using AgentExecutor, do not fear: we still have a guide on [how to use AgentExecutor](/docs/how_to/agent_executor).
 It is recommended, however, that you start to transition to LangGraph.
-In order to assist in this we have put together a [transition guide on how to do so](/docs/how_to/migrate_agent).
+In order to assist in this, we have put together a [transition guide on how to do so](/docs/how_to/migrate_agent).

 #### ReAct agents
 <span data-heading-keywords="react,react agent"></span>
@@ -710,10 +708,10 @@ You can subscribe to these events by using the `callbacks` argument available th

 Callback handlers can either be `sync` or `async`:

-* Sync callback handlers implement the [BaseCallbackHandler](https://api.python.langchain.com/en/latest/callbacks/langchain_core.callbacks.base.BaseCallbackHandler.html) interface.
-* Async callback handlers implement the [AsyncCallbackHandler](https://api.python.langchain.com/en/latest/callbacks/langchain_core.callbacks.base.AsyncCallbackHandler.html) interface.
+* Sync callback handlers implement the [BaseCallbackHandler](https://python.langchain.com/v0.2/api_reference/core/callbacks/langchain_core.callbacks.base.BaseCallbackHandler.html) interface.
+* Async callback handlers implement the [AsyncCallbackHandler](https://python.langchain.com/v0.2/api_reference/core/callbacks/langchain_core.callbacks.base.AsyncCallbackHandler.html) interface.

-During run-time LangChain configures an appropriate callback manager (e.g., [CallbackManager](https://api.python.langchain.com/en/latest/callbacks/langchain_core.callbacks.manager.CallbackManager.html) or [AsyncCallbackManager](https://api.python.langchain.com/en/latest/callbacks/langchain_core.callbacks.manager.AsyncCallbackManager.html) which will be responsible for calling the appropriate method on each "registered" callback handler when the event is triggered.
+During run-time LangChain configures an appropriate callback manager (e.g., [CallbackManager](https://python.langchain.com/v0.2/api_reference/core/callbacks/langchain_core.callbacks.manager.CallbackManager.html) or [AsyncCallbackManager](https://python.langchain.com/v0.2/api_reference/core/callbacks/langchain_core.callbacks.manager.AsyncCallbackManager.html) which will be responsible for calling the appropriate method on each "registered" callback handler when the event is triggered.

 #### Passing callbacks

@@ -739,7 +737,7 @@ callbacks to any child objects.
 :::important Async in Python<=3.10

 Any `RunnableLambda`, a `RunnableGenerator`, or `Tool` that invokes other runnables
-and is running async in python<=3.10, will have to propagate callbacks to child
+and is running `async` in python<=3.10, will have to propagate callbacks to child
 objects manually. This is because LangChain cannot automatically propagate
 callbacks to child objects in this case.

@@ -781,7 +779,7 @@ For models (or other components) that don't support streaming natively, this ite
 you could still use the same general pattern when calling them. Using `.stream()` will also automatically call the model in streaming mode
 without the need to provide additional config.

-The type of each outputted chunk depends on the type of component - for example, chat models yield [`AIMessageChunks`](https://api.python.langchain.com/en/latest/messages/langchain_core.messages.ai.AIMessageChunk.html).
+The type of each outputted chunk depends on the type of component - for example, chat models yield [`AIMessageChunks`](https://python.langchain.com/v0.2/api_reference/core/messages/langchain_core.messages.ai.AIMessageChunk.html).
 Because this method is part of [LangChain Expression Language](/docs/concepts/#langchain-expression-language-lcel),
 you can handle formatting differences from different outputs using an [output parser](/docs/concepts/#output-parsers) to transform
 each yielded chunk.
@@ -829,10 +827,10 @@ including a table listing available events.
 #### Callbacks

 The lowest level way to stream outputs from LLMs in LangChain is via the [callbacks](/docs/concepts/#callbacks) system. You can pass a
-callback handler that handles the [`on_llm_new_token`](https://api.python.langchain.com/en/latest/callbacks/langchain.callbacks.streaming_aiter.AsyncIteratorCallbackHandler.html#langchain.callbacks.streaming_aiter.AsyncIteratorCallbackHandler.on_llm_new_token) event into LangChain components. When that component is invoked, any
+callback handler that handles the [`on_llm_new_token`](https://python.langchain.com/v0.2/api_reference/langchain/callbacks/langchain.callbacks.streaming_aiter.AsyncIteratorCallbackHandler.html#langchain.callbacks.streaming_aiter.AsyncIteratorCallbackHandler.on_llm_new_token) event into LangChain components. When that component is invoked, any
 [LLM](/docs/concepts/#llms) or [chat model](/docs/concepts/#chat-models) contained in the component calls
 the callback with the generated token. Within the callback, you could pipe the tokens into some other destination, e.g. a HTTP response.
-You can also handle the [`on_llm_end`](https://api.python.langchain.com/en/latest/callbacks/langchain.callbacks.streaming_aiter.AsyncIteratorCallbackHandler.html#langchain.callbacks.streaming_aiter.AsyncIteratorCallbackHandler.on_llm_end) event to perform any necessary cleanup.
+You can also handle the [`on_llm_end`](https://python.langchain.com/v0.2/api_reference/langchain/callbacks/langchain.callbacks.streaming_aiter.AsyncIteratorCallbackHandler.html#langchain.callbacks.streaming_aiter.AsyncIteratorCallbackHandler.on_llm_end) event to perform any necessary cleanup.

 You can see [this how-to section](/docs/how_to/#callbacks) for more specifics on using callbacks.

@@ -869,7 +867,7 @@ Furthermore, using tokens can also improve efficiency, since the model processes
 ### Function/tool calling

 :::info
-We use the term tool calling interchangeably with function calling. Although
+We use the term `tool calling` interchangeably with `function calling`. Although
 function calling is sometimes meant to refer to invocations of a single function,
 we treat all models as though they can return multiple tool or function calls in
 each message.
@@ -964,7 +962,6 @@ structured_llm.invoke("Tell me a joke about cats")

 ```
 Joke(setup='Why was the cat sitting on the computer?', punchline='To keep an eye on the mouse!', rating=None)
-
 ```

 We recommend this method as a starting point when working with structured output:
@@ -1101,10 +1098,91 @@ The following how-to guides are good practical resources for using function/tool

 For a full list of model providers that support tool calling, [see this table](/docs/integrations/chat/#advanced-features).

+### Few-shot prompting
+
+One of the most effective ways to improve model performance is to give a model examples of 
+what you want it to do. The technique of adding example inputs and expected outputs 
+to a model prompt is known as "few-shot prompting". The technique is based on the
+[Language Models are Few-Shot Learners](https://arxiv.org/abs/2005.14165) paper.
+There are a few things to think about when doing few-shot prompting:
+
+1. How are examples generated?
+2. How many examples are in each prompt?
+3. How are examples selected at runtime?
+4. How are examples formatted in the prompt?
+
+Here are the considerations for each.
+
+#### 1. Generating examples
+
+The first and most important step of few-shot prompting is coming up with a good dataset of examples. Good examples should be relevant at runtime, clear, informative, and provide information that was not already known to the model.
+
+At a high-level, the basic ways to generate examples are:
+- Manual: a person/people generates examples they think are useful.
+- Better model: a better (presumably more expensive/slower) model's responses are used as examples for a worse (presumably cheaper/faster) model.
+- User feedback: users (or labelers) leave feedback on interactions with the application and examples are generated based on that feedback (for example, all interactions with positive feedback could be turned into examples).
+- LLM feedback: same as user feedback but the process is automated by having models evaluate themselves.
+
+Which approach is best depends on your task. For tasks where a small number core principles need to be understood really well, it can be valuable hand-craft a few really good examples.
+For tasks where the space of correct behaviors is broader and more nuanced, it can be useful to generate many examples in a more automated fashion so that there's a higher likelihood of there being some highly relevant examples for any runtime input.
+
+**Single-turn v.s. multi-turn examples**
+
+Another dimension to think about when generating examples is what the example is actually showing.
+
+The simplest types of examples just have a user input and an expected model output. These are single-turn examples.
+
+One more complex type if example is where the example is an entire conversation, usually in which a model initially responds incorrectly and a user then tells the model how to correct its answer.
+This is called a multi-turn example. Multi-turn examples can be useful for more nuanced tasks where its useful to show common errors and spell out exactly why they're wrong and what should be done instead.
+
+#### 2. Number of examples
+
+Once we have a dataset of examples, we need to think about how many examples should be in each prompt.
+The key tradeoff is that more examples generally improve performance, but larger prompts increase costs and latency.
+And beyond some threshold having too many examples can start to confuse the model.
+Finding the right number of examples is highly dependent on the model, the task, the quality of the examples, and your cost and latency constraints.
+Anecdotally, the better the model is the fewer examples it needs to perform well and the more quickly you hit steeply diminishing returns on adding more examples.
+But, the best/only way to reliably answer this question is to run some experiments with different numbers of examples.
+
+#### 3. Selecting examples
+
+Assuming we are not adding our entire example dataset into each prompt, we need to have a way of selecting examples from our dataset based on a given input. We can do this:
+- Randomly
+- By (semantic or keyword-based) similarity of the inputs
+- Based on some other constraints, like token size
+
+LangChain has a number of [`ExampleSelectors`](/docs/concepts/#example-selectors) which make it easy to use any of these techniques.
+
+Generally, selecting by semantic similarity leads to the best model performance. But how important this is is again model and task specific, and is something worth experimenting with.
+
+#### 4. Formatting examples
+
+Most state-of-the-art models these days are chat models, so we'll focus on formatting examples for those. Our basic options are to insert the examples:
+- In the system prompt as a string
+- As their own messages
+
+If we insert our examples into the system prompt as a string, we'll need to make sure it's clear to the model where each example begins and which parts are the input versus output. Different models respond better to different syntaxes, like [ChatML](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/chat-markup-language), XML, TypeScript, etc.
+
+If we insert our examples as messages, where each example is represented as a sequence of Human, AI messages, we might want to also assign [names](/docs/concepts/#messages) to our messages like `"example_user"` and `"example_assistant"` to make it clear that these messages correspond to different actors than the latest input message.
+
+**Formatting tool call examples**
+
+One area where formatting examples as messages can be tricky is when our example outputs have tool calls. This is because different models have different constraints on what types of message sequences are allowed when any tool calls are generated.
+- Some models require that any AIMessage with tool calls be immediately followed by ToolMessages for every tool call,
+- Some models additionally require that any ToolMessages be immediately followed by an AIMessage before the next HumanMessage,
+- Some models require that tools are passed in to the model if there are any tool calls / ToolMessages in the chat history.
+
+These requirements are model-specific and should be checked for the model you are using. If your model requires ToolMessages after tool calls and/or AIMessages after ToolMessages and your examples only include expected tool calls and not the actual tool outputs, you can try adding dummy ToolMessages / AIMessages to the end of each example with generic contents to satisfy the API constraints.
+In these cases it's especially worth experimenting with inserting your examples as strings versus messages, as having dummy messages can adversely affect certain models.
+
+You can see a case study of how Anthropic and OpenAI respond to different few-shot prompting techniques on two different tool calling benchmarks [here](https://blog.langchain.dev/few-shot-prompting-to-improve-tool-calling-performance/).
+
 ### Retrieval

-LLMs are trained on a large but fixed dataset, limiting their ability to reason over private or recent information. Fine-tuning an LLM with specific facts is one way to mitigate this, but is often [poorly suited for factual recall](https://www.anyscale.com/blog/fine-tuning-is-for-form-not-facts) and [can be costly](https://www.glean.com/blog/how-to-build-an-ai-assistant-for-the-enterprise). 
-Retrieval is the process of providing relevant information to an LLM to improve its response for a given input. Retrieval augmented generation (RAG) is the process of grounding the LLM generation (output) using the retrieved information.
+LLMs are trained on a large but fixed dataset, limiting their ability to reason over private or recent information. 
+Fine-tuning an LLM with specific facts is one way to mitigate this, but is often [poorly suited for factual recall](https://www.anyscale.com/blog/fine-tuning-is-for-form-not-facts) and [can be costly](https://www.glean.com/blog/how-to-build-an-ai-assistant-for-the-enterprise). 
+`Retrieval` is the process of providing relevant information to an LLM to improve its response for a given input. 
+`Retrieval augmented generation` (`RAG`) [paper](https://arxiv.org/abs/2005.11401) is the process of grounding the LLM generation (output) using the retrieved information.

 :::tip

@@ -1124,12 +1202,12 @@ First, consider the user input(s) to your RAG system. Ideally, a RAG system can
 **Using an LLM to review and optionally modify the input is the central idea behind query translation.** This serves as a general buffer, optimizing raw user inputs for your retrieval system. 
 For example, this can be as simple as extracting keywords or as complex as generating multiple sub-questions for a complex query.

-| Name          | When to use | Description |
-|---------------|-------------|-------------|
+| Name          | When to use | Description                                                                                                                                                                                                                                                                            |
+|---------------|-------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 | [Multi-query](/docs/how_to/MultiQueryRetriever/)   | When you need to cover multiple perspectives of a question. | Rewrite the user question from multiple perspectives, retrieve documents for each rewritten question, return the unique documents for all queries. |
-| [Decomposition](https://github.com/langchain-ai/rag-from-scratch/blob/main/rag_from_scratch_5_to_9.ipynb) | When a question can be broken down into smaller subproblems. | Decompose a question into a set of subproblems / questions, which can either be solved sequentially (use the answer from first + retrieval to answer the second) or in parallel (consolidate each answer into final answer). |
-| [Step-back](https://github.com/langchain-ai/rag-from-scratch/blob/main/rag_from_scratch_5_to_9.ipynb)     | When a higher-level conceptual understanding is required. | First prompt the LLM to ask a generic step-back question about higher-level concepts or principles, and retrieve relevant facts about them. Use this grounding to help answer the user question. |
-| [HyDE](https://github.com/langchain-ai/rag-from-scratch/blob/main/rag_from_scratch_5_to_9.ipynb)          | If you have challenges retrieving relevant documents using the raw user inputs. | Use an LLM to convert questions into hypothetical documents that answer the question. Use the embedded hypothetical documents to retrieve real documents with the premise that doc-doc similarity search can produce more relevant matches. |
+| [Decomposition](https://github.com/langchain-ai/rag-from-scratch/blob/main/rag_from_scratch_5_to_9.ipynb) | When a question can be broken down into smaller subproblems. | Decompose a question into a set of subproblems / questions, which can either be solved sequentially (use the answer from first + retrieval to answer the second) or in parallel (consolidate each answer into final answer).                                                           |
+| [Step-back](https://github.com/langchain-ai/rag-from-scratch/blob/main/rag_from_scratch_5_to_9.ipynb)     | When a higher-level conceptual understanding is required. | First prompt the LLM to ask a generic step-back question about higher-level concepts or principles, and retrieve relevant facts about them. Use this grounding to help answer the user question. [Paper](https://arxiv.org/pdf/2310.06117).                                            |
+| [HyDE](https://github.com/langchain-ai/rag-from-scratch/blob/main/rag_from_scratch_5_to_9.ipynb)          | If you have challenges retrieving relevant documents using the raw user inputs. | Use an LLM to convert questions into hypothetical documents that answer the question. Use the embedded hypothetical documents to retrieve real documents with the premise that doc-doc similarity search can produce more relevant matches. [Paper](https://arxiv.org/abs/2212.10496). |

 :::tip

@@ -1203,11 +1281,11 @@ Fifth, consider ways to improve the quality of your similarity search itself. Em

 There are some additional tricks to improve the quality of your retrieval. Embeddings excel at capturing semantic information, but may struggle with keyword-based queries. Many [vector stores](/docs/integrations/retrievers/pinecone_hybrid_search/) offer built-in [hybrid-search](https://docs.pinecone.io/guides/data/understanding-hybrid-search) to combine keyword and semantic similarity, which marries the benefits of both approaches. Furthermore, many vector stores have [maximal marginal relevance](https://python.langchain.com/v0.1/docs/modules/model_io/prompts/example_selectors/mmr/), which attempts to diversify the results of a search to avoid returning similar and redundant documents. 

-| Name              | When to use                                              | Description |
-|-------------------|----------------------------------------------------------|-------------|
-| [ColBERT](/docs/integrations/providers/ragatouille/#using-colbert-as-a-reranker)           | When higher granularity embeddings are needed.           | ColBERT uses contextually influenced embeddings for each token in the document and query to get a granular query-document similarity score. |
-| [Hybrid search](/docs/integrations/retrievers/pinecone_hybrid_search/)     | When combining keyword-based and semantic similarity.    | Hybrid search combines keyword and semantic similarity, marrying the benefits of both approaches. |
-| [Maximal Marginal Relevance (MMR)](/docs/integrations/vectorstores/pinecone/#maximal-marginal-relevance-searches) | When needing to diversify search results. | MMR attempts to diversify the results of a search to avoid returning similar and redundant documents. |
+| Name              | When to use                                              | Description                                                                                                                                                                            |
+|-------------------|----------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| [ColBERT](/docs/integrations/providers/ragatouille/#using-colbert-as-a-reranker)           | When higher granularity embeddings are needed.           | ColBERT uses contextually influenced embeddings for each token in the document and query to get a granular query-document similarity score. [Paper](https://arxiv.org/abs/2112.01488). |
+| [Hybrid search](/docs/integrations/retrievers/pinecone_hybrid_search/)     | When combining keyword-based and semantic similarity.    | Hybrid search combines keyword and semantic similarity, marrying the benefits of both approaches. [Paper](https://arxiv.org/abs/2210.11934).                                                                               |
+| [Maximal Marginal Relevance (MMR)](/docs/integrations/vectorstores/pinecone/#maximal-marginal-relevance-searches) | When needing to diversify search results. | MMR attempts to diversify the results of a search to avoid returning similar and redundant documents.                                                                                  |

 :::tip

@@ -1227,7 +1305,7 @@ Sixth, consider ways to filter or rank retrieved documents. This is very useful

 :::tip

-See our RAG from Scratch video on [RAG-Fusion](https://youtu.be/77qELPbNgxA?feature=shared), on approach for post-processing across multiple queries:  Rewrite the user question from multiple perspectives, retrieve documents for each rewritten question, and combine the ranks of multiple search result lists to produce a single, unified ranking with [Reciprocal Rank Fusion (RRF)](https://towardsdatascience.com/forget-rag-the-future-is-rag-fusion-1147298d8ad1).
+See our RAG from Scratch video on [RAG-Fusion](https://youtu.be/77qELPbNgxA?feature=shared) ([paper](https://arxiv.org/abs/2402.03367)), on approach for post-processing across multiple queries:  Rewrite the user question from multiple perspectives, retrieve documents for each rewritten question, and combine the ranks of multiple search result lists to produce a single, unified ranking with [Reciprocal Rank Fusion (RRF)](https://towardsdatascience.com/forget-rag-the-future-is-rag-fusion-1147298d8ad1).

 :::

--- a/docs/docs/contributing/documentation/setup.mdx
+++ b/docs/docs/contributing/documentation/setup.mdx
@@ -12,7 +12,7 @@ It covers a wide array of topics, including tutorials, use cases, integrations,
 and more, offering extensive guidance on building with LangChain.
 The content for this documentation lives in the `/docs` directory of the monorepo.
 2. In-code Documentation: This is documentation of the codebase itself, which is also
-used to generate the externally facing [API Reference](https://api.python.langchain.com/en/latest/langchain_api_reference.html).
+used to generate the externally facing [API Reference](https://python.langchain.com/v0.2/api_reference/langchain/index.html).
 The content for the API reference is autogenerated by scanning the docstrings in the codebase. For this reason we ask that
 developers document their code well.

--- a/docs/docs/contributing/faq.mdx
+++ b/docs/docs/contributing/faq.mdx
@@ -24,3 +24,16 @@ for more information.
 Notably, Github doesn't allow this setting to be enabled for forks in **organizations** ([issue](https://github.com/orgs/community/discussions/5634)).
 If you are working in an organization, we recommend submitting your PR from a personal
 fork in order to enable this setting.
+
+### Why hasn't my PR been reviewed?
+
+Please reference our [Review Process](/docs/contributing/review_process/).
+
+### Why was my PR closed?
+
+Please reference our [Review Process](/docs/contributing/review_process/).
+
+### I think my PR was closed in a way that didn't follow the review process. What should I do?
+
+Tag `@efriis` in the PR comments referencing the portion of the review
+process that you believe was not followed. We'll take a look!
--- a/docs/docs/contributing/repo_structure.mdx
+++ b/docs/docs/contributing/repo_structure.mdx
@@ -50,7 +50,7 @@ There are other files in the root directory level, but their presence should be
 ## Documentation

 The `/docs` directory contains the content for the documentation that is shown
-at https://python.langchain.com/ and the associated API Reference https://api.python.langchain.com/en/latest/langchain_api_reference.html.
+at https://python.langchain.com/ and the associated API Reference https://python.langchain.com/v0.2/api_reference/langchain/index.html.

 See the [documentation](/docs/contributing/documentation/) guidelines to learn how to contribute to the documentation.

--- a/docs/docs/contributing/review_process.mdx
+++ b/docs/docs/contributing/review_process.mdx
@@ -0,0 +1,95 @@
+# Review Process
+
+## Overview
+
+This document outlines the process used by the LangChain maintainers for reviewing pull requests (PRs). The primary objective of this process is to enhance the LangChain developer experience.
+
+## Review Statuses
+
+We categorize PRs using three main statuses, which are marked as project item statuses in the right sidebar and can be viewed in detail [here](https://github.com/orgs/langchain-ai/projects/12/views/1).
+
+- **Triage**: 
+  - Initial status for all newly submitted PRs.
+  - Requires a maintainer to categorize it into one of the other statuses.
+
+- **Needs Support**:
+  - PRs that require community feedback or additional input before moving forward.
+  - Automatically promoted to the backlog if it receives 5 upvotes.
+  - An auto-comment is generated when this status is applied, explaining the flow and the upvote requirement.
+  - If the PR remains in this status for 25 days, it will be marked as “stale” via auto-comment.
+  - PRs will be auto-closed after 30 days if no further action is taken.
+
+- **In Review**:
+  - PRs that are actively under review by our team.
+  - These are regularly reviewed and monitored.
+
+**Note:** A PR may only have one status at a time.
+
+**Note:** You may notice 3 additional statuses of Done, Closed, and Internal that
+are external to this lifecycle. Done and Closed PRs have been merged or closed,
+respectively. Internal is for PRs submitted by core maintainers, and these PRs are owned
+by the submitter.
+
+## Review Guidelines
+
+1. **PRs that touch /libs/core**:
+   - PRs that directly impact core code and are likely to affect end users.
+   - **Triage Guideline**: most PRs should either go straight to `In Review` or closed.
+   - These PRs are given top priority and are reviewed the fastest.
+   - PRs that don't have a **concise** descriptions of their motivation (either in PR summary of in a linked issue) are likely to be closed without an in-depth review. Please do not generate verbose PR descriptions with an LLM.
+   - PRs that don't have unit tests are likely to be closed.
+   - Feature requests should first be opened as a GitHub issue and discussed with the LangChain maintainers. Large PRs submitted without prior discussion are likely to be closed.
+
+2. **PRs that touch /libs/langchain**:
+   - High-impact PRs that are closely related to core PRs but slightly lower in priority.
+   - **Triage Guideline**: most PRs should either go straight to `In Review` or closed.
+   - These are reviewed and closed aggressively, similar to core PRs.
+   - New feature requests should be discussed with the core maintainer team beforehand in an issue.
+
+3. **PRs that touch /libs/partners/****:
+   - PRs involving integration packages.
+   - **Triage Guideline**: most PRs should either go straight to `In Review` or closed.
+   - The review may be conducted by our team or handed off to the partner's development team, depending on the PR's content.
+   - We maintain communication lines with most partner dev teams to facilitate this process.
+
+4. **Community PRs**:
+   - Most community PRs will get an initial status of "needs support".
+   - **Triage Guideline**: most PRs should go to `Needs support`. Bugfixes on high-traffic integrations should go straight to `In review`.
+   - **Triage Guideline**: all new features and integrations should go to `Needs support` and will be closed if they do not get enough support (measured by upvotes or comments).
+   - PRs in the `Needs Support` status for 20 days are marked as “stale” and will be closed after 30 days if no action is taken.
+
+5. **Documentation PRs**:
+   - PRs that touch the documentation content in docs/docs.
+   - **Triage Guideline**:
+      - PRs that fix typos or small errors in a single file and pass CI should go straight to `In Review`.
+      - PRs that make changes that have been discussed and agreed upon in an issue should go straight to `In Review`.
+      - PRs that add new pages or change the structure of the documentation should go to `Needs Support`.
+   - We strive to standardize documentation formats to streamline the review process.
+   - CI jobs run against documentation to ensure adherence to standards, automating much of the review.
+
+6. **PRs must be in English**:
+   - PRs that are not in English will be closed without review.
+   - This is to ensure that all maintainers can review the PRs effectively.
+
+## How to see a PR's status
+
+See screenshot:
+
+![PR Status](/img/review_process_status.png)
+
+*To see the status of all open PRs, please visit the [LangChain Project Board](https://github.com/orgs/langchain-ai/projects/12/views/2).*
+
+## Review Prioritization
+
+Our goal is to provide the best possible development experience by focusing on making software that:
+
+- Works: Works as intended (is bug-free).
+- Is useful: Improves LLM app development with components that work off-the-shelf and runtimes that simplify app building.
+- Is easy: Is intuitive to use and well-documented.
+
+We believe this process reflects our priorities and are open to feedback if you feel it does not.
+
+## Github Discussion
+
+We welcome your feedback on this process. Please feel free to add a comment in 
+[this GitHub Discussion](https://github.com/langchain-ai/langchain/discussions/25920).
--- a/docs/docs/how_to/HTML_header_metadata_splitter.ipynb
+++ b/docs/docs/how_to/HTML_header_metadata_splitter.ipynb
@@ -13,7 +13,7 @@
    "# How to split by HTML header \n",
    "## Description and motivation\n",
    "\n",
-    "[HTMLHeaderTextSplitter](https://api.python.langchain.com/en/latest/html/langchain_text_splitters.html.HTMLHeaderTextSplitter.html) is a \"structure-aware\" chunker that splits text at the HTML element level and adds metadata for each header \"relevant\" to any given chunk. It can return chunks element by element or combine elements with the same metadata, with the objectives of (a) keeping related text grouped (more or less) semantically and (b) preserving context-rich information encoded in document structures. It can be used with other text splitters as part of a chunking pipeline.\n",
+    "[HTMLHeaderTextSplitter](https://python.langchain.com/v0.2/api_reference/text_splitters/html/langchain_text_splitters.html.HTMLHeaderTextSplitter.html) is a \"structure-aware\" chunker that splits text at the HTML element level and adds metadata for each header \"relevant\" to any given chunk. It can return chunks element by element or combine elements with the same metadata, with the objectives of (a) keeping related text grouped (more or less) semantically and (b) preserving context-rich information encoded in document structures. It can be used with other text splitters as part of a chunking pipeline.\n",
    "\n",
    "It is analogous to the [MarkdownHeaderTextSplitter](/docs/how_to/markdown_header_metadata_splitter) for markdown files.\n",
    "\n",
--- a/docs/docs/how_to/MultiQueryRetriever.ipynb
+++ b/docs/docs/how_to/MultiQueryRetriever.ipynb
@@ -9,7 +9,7 @@
    "\n",
    "Distance-based vector database retrieval embeds (represents) queries in high-dimensional space and finds similar embedded documents based on a distance metric. But, retrieval may produce different results with subtle changes in query wording, or if the embeddings do not capture the semantics of the data well. Prompt engineering / tuning is sometimes done to manually address these problems, but can be tedious.\n",
    "\n",
-    "The [MultiQueryRetriever](https://api.python.langchain.com/en/latest/retrievers/langchain.retrievers.multi_query.MultiQueryRetriever.html) automates the process of prompt tuning by using an LLM to generate multiple queries from different perspectives for a given user input query. For each query, it retrieves a set of relevant documents and takes the unique union across all queries to get a larger set of potentially relevant documents. By generating multiple perspectives on the same question, the `MultiQueryRetriever` can mitigate some of the limitations of the distance-based retrieval and get a richer set of results.\n",
+    "The [MultiQueryRetriever](https://python.langchain.com/v0.2/api_reference/langchain/retrievers/langchain.retrievers.multi_query.MultiQueryRetriever.html) automates the process of prompt tuning by using an LLM to generate multiple queries from different perspectives for a given user input query. For each query, it retrieves a set of relevant documents and takes the unique union across all queries to get a larger set of potentially relevant documents. By generating multiple perspectives on the same question, the `MultiQueryRetriever` can mitigate some of the limitations of the distance-based retrieval and get a richer set of results.\n",
    "\n",
    "Let's build a vectorstore using the [LLM Powered Autonomous Agents](https://lilianweng.github.io/posts/2023-06-23-agent/) blog post by Lilian Weng from the [RAG tutorial](/docs/tutorials/rag):"
   ]
@@ -125,9 +125,9 @@
   "source": [
    "#### Supplying your own prompt\n",
    "\n",
-    "Under the hood, `MultiQueryRetriever` generates queries using a specific [prompt](https://api.python.langchain.com/en/latest/_modules/langchain/retrievers/multi_query.html#MultiQueryRetriever). To customize this prompt:\n",
+    "Under the hood, `MultiQueryRetriever` generates queries using a specific [prompt](https://python.langchain.com/v0.2/api_reference/langchain/retrievers/langchain.retrievers.multi_query.MultiQueryRetriever.html). To customize this prompt:\n",
    "\n",
-    "1. Make a [PromptTemplate](https://api.python.langchain.com/en/latest/prompts/langchain_core.prompts.prompt.PromptTemplate.html) with an input variable for the question;\n",
+    "1. Make a [PromptTemplate](https://python.langchain.com/v0.2/api_reference/core/prompts/langchain_core.prompts.prompt.PromptTemplate.html) with an input variable for the question;\n",
    "2. Implement an [output parser](/docs/concepts#output-parsers) like the one below to split the result into a list of queries.\n",
    "\n",
    "The prompt and output parser together must support the generation of a list of queries."
--- a/docs/docs/how_to/add_scores_retriever.ipynb
+++ b/docs/docs/how_to/add_scores_retriever.ipynb
@@ -7,7 +7,7 @@
   "source": [
    "# How to add scores to retriever results\n",
    "\n",
-    "Retrievers will return sequences of [Document](https://api.python.langchain.com/en/latest/documents/langchain_core.documents.base.Document.html) objects, which by default include no information about the process that retrieved them (e.g., a similarity score against a query). Here we demonstrate how to add retrieval scores to the `.metadata` of documents:\n",
+    "Retrievers will return sequences of [Document](https://python.langchain.com/v0.2/api_reference/core/documents/langchain_core.documents.base.Document.html) objects, which by default include no information about the process that retrieved them (e.g., a similarity score against a query). Here we demonstrate how to add retrieval scores to the `.metadata` of documents:\n",
    "1. From [vectorstore retrievers](/docs/how_to/vectorstore_retriever);\n",
    "2. From higher-order LangChain retrievers, such as [SelfQueryRetriever](/docs/how_to/self_query) or [MultiVectorRetriever](/docs/how_to/multi_vector).\n",
    "\n",
@@ -15,7 +15,7 @@
    "\n",
    "## Create vector store\n",
    "\n",
-    "First we populate a vector store with some data. We will use a [PineconeVectorStore](https://api.python.langchain.com/en/latest/vectorstores/langchain_pinecone.vectorstores.PineconeVectorStore.html), but this guide is compatible with any LangChain vector store that implements a `.similarity_search_with_score` method."
+    "First we populate a vector store with some data. We will use a [PineconeVectorStore](https://python.langchain.com/v0.2/api_reference/pinecone/vectorstores/langchain_pinecone.vectorstores.PineconeVectorStore.html), but this guide is compatible with any LangChain vector store that implements a `.similarity_search_with_score` method."
   ]
  },
  {
@@ -263,7 +263,7 @@
    "\n",
    "To propagate similarity scores through this retriever, we can again subclass `MultiVectorRetriever` and override a method. This time we will override `_get_relevant_documents`.\n",
    "\n",
-    "First, we prepare some fake data. We generate fake \"whole documents\" and store them in a document store; here we will use a simple [InMemoryStore](https://api.python.langchain.com/en/latest/stores/langchain_core.stores.InMemoryBaseStore.html)."
+    "First, we prepare some fake data. We generate fake \"whole documents\" and store them in a document store; here we will use a simple [InMemoryStore](https://python.langchain.com/v0.2/api_reference/core/stores/langchain_core.stores.InMemoryBaseStore.html)."
   ]
  },
  {
--- a/docs/docs/how_to/assign.ipynb
+++ b/docs/docs/how_to/assign.ipynb
@@ -27,7 +27,7 @@
    "\n",
    ":::\n",
    "\n",
-    "An alternate way of [passing data through](/docs/how_to/passthrough) steps of a chain is to leave the current values of the chain state unchanged while assigning a new value under a given key. The [`RunnablePassthrough.assign()`](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.passthrough.RunnablePassthrough.html#langchain_core.runnables.passthrough.RunnablePassthrough.assign) static method takes an input value and adds the extra arguments passed to the assign function.\n",
+    "An alternate way of [passing data through](/docs/how_to/passthrough) steps of a chain is to leave the current values of the chain state unchanged while assigning a new value under a given key. The [`RunnablePassthrough.assign()`](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.passthrough.RunnablePassthrough.html#langchain_core.runnables.passthrough.RunnablePassthrough.assign) static method takes an input value and adds the extra arguments passed to the assign function.\n",
    "\n",
    "This is useful in the common [LangChain Expression Language](/docs/concepts/#langchain-expression-language) pattern of additively creating a dictionary to use as input to a later step.\n",
    "\n",
--- a/docs/docs/how_to/binding.ipynb
+++ b/docs/docs/how_to/binding.ipynb
@@ -27,7 +27,7 @@
    "\n",
    ":::\n",
    "\n",
-    "Sometimes we want to invoke a [`Runnable`](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.Runnable.html) within a [RunnableSequence](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.RunnableSequence.html) with constant arguments that are not part of the output of the preceding Runnable in the sequence, and which are not part of the user input. We can use the [`Runnable.bind()`](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.bind) method to set these arguments ahead of time.\n",
+    "Sometimes we want to invoke a [`Runnable`](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html) within a [RunnableSequence](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.RunnableSequence.html) with constant arguments that are not part of the output of the preceding Runnable in the sequence, and which are not part of the user input. We can use the [`Runnable.bind()`](https://python.langchain.com/v0.2/api_reference/langchain_core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.bind) method to set these arguments ahead of time.\n",
    "\n",
    "## Binding stop sequences\n",
    "\n",
--- a/docs/docs/how_to/callbacks_async.ipynb
+++ b/docs/docs/how_to/callbacks_async.ipynb
@@ -14,7 +14,7 @@
    "- [Custom callback handlers](/docs/how_to/custom_callbacks)\n",
    ":::\n",
    "\n",
-    "If you are planning to use the async APIs, it is recommended to use and extend [`AsyncCallbackHandler`](https://api.python.langchain.com/en/latest/callbacks/langchain_core.callbacks.base.AsyncCallbackHandler.html) to avoid blocking the event.\n",
+    "If you are planning to use the async APIs, it is recommended to use and extend [`AsyncCallbackHandler`](https://python.langchain.com/v0.2/api_reference/core/callbacks/langchain_core.callbacks.base.AsyncCallbackHandler.html) to avoid blocking the event.\n",
    "\n",
    "\n",
    ":::{.callout-warning}\n",
--- a/docs/docs/how_to/callbacks_attach.ipynb
+++ b/docs/docs/how_to/callbacks_attach.ipynb
@@ -17,7 +17,7 @@
    "\n",
    ":::\n",
    "\n",
-    "If you are composing a chain of runnables and want to reuse callbacks across multiple executions, you can attach callbacks with the [`.with_config()`](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.with_config) method. This saves you the need to pass callbacks in each time you invoke the chain.\n",
+    "If you are composing a chain of runnables and want to reuse callbacks across multiple executions, you can attach callbacks with the [`.with_config()`](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.with_config) method. This saves you the need to pass callbacks in each time you invoke the chain.\n",
    "\n",
    ":::{.callout-important}\n",
    "\n",
--- a/docs/docs/how_to/callbacks_runtime.ipynb
+++ b/docs/docs/how_to/callbacks_runtime.ipynb
@@ -15,7 +15,7 @@
    "\n",
    ":::\n",
    "\n",
-    "In many cases, it is advantageous to pass in handlers instead when running the object. When we pass through [`CallbackHandlers`](https://api.python.langchain.com/en/latest/callbacks/langchain_core.callbacks.base.BaseCallbackHandler.html#langchain-core-callbacks-base-basecallbackhandler) using the `callbacks` keyword arg when executing an run, those callbacks will be issued by all nested objects involved in the execution. For example, when a handler is passed through to an Agent, it will be used for all callbacks related to the agent and all the objects involved in the agent's execution, in this case, the Tools and LLM.\n",
+    "In many cases, it is advantageous to pass in handlers instead when running the object. When we pass through [`CallbackHandlers`](https://python.langchain.com/v0.2/api_reference/core/callbacks/langchain_core.callbacks.base.BaseCallbackHandler.html#langchain-core-callbacks-base-basecallbackhandler) using the `callbacks` keyword arg when executing an run, those callbacks will be issued by all nested objects involved in the execution. For example, when a handler is passed through to an Agent, it will be used for all callbacks related to the agent and all the objects involved in the agent's execution, in this case, the Tools and LLM.\n",
    "\n",
    "This prevents us from having to manually attach the handlers to each individual nested object. Here's an example:"
   ]
--- a/docs/docs/how_to/character_text_splitter.ipynb
+++ b/docs/docs/how_to/character_text_splitter.ipynb
@@ -28,7 +28,7 @@
    "\n",
    "To obtain the string content directly, use `.split_text`.\n",
    "\n",
-    "To create LangChain [Document](https://api.python.langchain.com/en/latest/documents/langchain_core.documents.base.Document.html) objects (e.g., for use in downstream tasks), use `.create_documents`."
+    "To create LangChain [Document](https://python.langchain.com/v0.2/api_reference/core/documents/langchain_core.documents.base.Document.html) objects (e.g., for use in downstream tasks), use `.create_documents`."
   ]
  },
  {
--- a/docs/docs/how_to/chat_models_universal_init.ipynb
+++ b/docs/docs/how_to/chat_models_universal_init.ipynb
@@ -11,7 +11,7 @@
    "\n",
    ":::tip Supported models\n",
    "\n",
-    "See the [init_chat_model()](https://api.python.langchain.com/en/latest/chat_models/langchain.chat_models.base.init_chat_model.html) API reference for a full list of supported integrations.\n",
+    "See the [init_chat_model()](https://python.langchain.com/v0.2/api_reference/langchain/chat_models/langchain.chat_models.base.init_chat_model.html) API reference for a full list of supported integrations.\n",
    "\n",
    "Make sure you have the integration packages installed for any model providers you want to support. E.g. you should have `langchain-openai` installed to init an OpenAI model.\n",
    "\n",
@@ -89,7 +89,7 @@
   "source": [
    "## Inferring model provider\n",
    "\n",
-    "For common and distinct model names `init_chat_model()` will attempt to infer the model provider. See the [API reference](https://api.python.langchain.com/en/latest/chat_models/langchain.chat_models.base.init_chat_model.html) for a full list of inference behavior. E.g. any model that starts with `gpt-3...` or `gpt-4...` will be inferred as using model provider `openai`."
+    "For common and distinct model names `init_chat_model()` will attempt to infer the model provider. See the [API reference](https://python.langchain.com/v0.2/api_reference/langchain/chat_models/langchain.chat_models.base.init_chat_model.html) for a full list of inference behavior. E.g. any model that starts with `gpt-3...` or `gpt-4...` will be inferred as using model provider `openai`."
   ]
  },
  {
--- a/docs/docs/how_to/chat_streaming.ipynb
+++ b/docs/docs/how_to/chat_streaming.ipynb
@@ -18,7 +18,7 @@
    "# How to stream chat model responses\n",
    "\n",
    "\n",
-    "All [chat models](https://api.python.langchain.com/en/latest/language_models/langchain_core.language_models.chat_models.BaseChatModel.html) implement the [Runnable interface](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable), which comes with a **default** implementations of standard runnable methods (i.e. `ainvoke`, `batch`, `abatch`, `stream`, `astream`, `astream_events`).\n",
+    "All [chat models](https://python.langchain.com/v0.2/api_reference/core/language_models/langchain_core.language_models.chat_models.BaseChatModel.html) implement the [Runnable interface](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable), which comes with a **default** implementations of standard runnable methods (i.e. `ainvoke`, `batch`, `abatch`, `stream`, `astream`, `astream_events`).\n",
    "\n",
    "The **default** streaming implementation provides an`Iterator` (or `AsyncIterator` for asynchronous streaming) that yields a single value: the final output from the underlying chat model provider.\n",
    "\n",
@@ -120,7 +120,7 @@
   "source": [
    "## Astream events\n",
    "\n",
-    "Chat models also support the standard [astream events](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.astream_events) method.\n",
+    "Chat models also support the standard [astream events](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.astream_events) method.\n",
    "\n",
    "This method is useful if you're streaming output from a larger LLM application that contains multiple steps (e.g., an LLM chain composed of a prompt, llm and parser)."
   ]
--- a/docs/docs/how_to/chat_token_usage_tracking.ipynb
+++ b/docs/docs/how_to/chat_token_usage_tracking.ipynb
@@ -42,7 +42,7 @@
    "\n",
    "A number of model providers return token usage information as part of the chat generation response. When available, this information will be included on the `AIMessage` objects produced by the corresponding model.\n",
    "\n",
-    "LangChain `AIMessage` objects include a [usage_metadata](https://api.python.langchain.com/en/latest/messages/langchain_core.messages.ai.AIMessage.html#langchain_core.messages.ai.AIMessage.usage_metadata) attribute. When populated, this attribute will be a [UsageMetadata](https://api.python.langchain.com/en/latest/messages/langchain_core.messages.ai.UsageMetadata.html) dictionary with standard keys (e.g., `\"input_tokens\"` and `\"output_tokens\"`).\n",
+    "LangChain `AIMessage` objects include a [usage_metadata](https://python.langchain.com/v0.2/api_reference/core/messages/langchain_core.messages.ai.AIMessage.html#langchain_core.messages.ai.AIMessage.usage_metadata) attribute. When populated, this attribute will be a [UsageMetadata](https://python.langchain.com/v0.2/api_reference/core/messages/langchain_core.messages.ai.UsageMetadata.html) dictionary with standard keys (e.g., `\"input_tokens\"` and `\"output_tokens\"`).\n",
    "\n",
    "Examples:\n",
    "\n",
@@ -118,7 +118,7 @@
   "source": [
    "### Using AIMessage.response_metadata\n",
    "\n",
-    "Metadata from the model response is also included in the AIMessage [response_metadata](https://api.python.langchain.com/en/latest/messages/langchain_core.messages.ai.AIMessage.html#langchain_core.messages.ai.AIMessage.response_metadata) attribute. These data are typically not standardized. Note that different providers adopt different conventions for representing token counts:"
+    "Metadata from the model response is also included in the AIMessage [response_metadata](https://python.langchain.com/v0.2/api_reference/core/messages/langchain_core.messages.ai.AIMessage.html#langchain_core.messages.ai.AIMessage.response_metadata) attribute. These data are typically not standardized. Note that different providers adopt different conventions for representing token counts:"
   ]
  },
  {
@@ -153,7 +153,7 @@
    "\n",
    "#### OpenAI\n",
    "\n",
-    "For example, OpenAI will return a message [chunk](https://api.python.langchain.com/en/latest/messages/langchain_core.messages.ai.AIMessageChunk.html) at the end of a stream with token usage information. This behavior is supported by `langchain-openai >= 0.1.9` and can be enabled by setting `stream_usage=True`. This attribute can also be set when `ChatOpenAI` is instantiated.\n",
+    "For example, OpenAI will return a message [chunk](https://python.langchain.com/v0.2/api_reference/core/messages/langchain_core.messages.ai.AIMessageChunk.html) at the end of a stream with token usage information. This behavior is supported by `langchain-openai >= 0.1.9` and can be enabled by setting `stream_usage=True`. This attribute can also be set when `ChatOpenAI` is instantiated.\n",
    "\n",
    "```{=mdx}\n",
    ":::note\n",
--- a/docs/docs/how_to/chatbots_memory.ipynb
+++ b/docs/docs/how_to/chatbots_memory.ipynb
@@ -140,7 +140,7 @@
    "\n",
    "## Chat history\n",
    "\n",
-    "It's perfectly fine to store and pass messages directly as an array, but we can use LangChain's built-in [message history class](https://api.python.langchain.com/en/latest/langchain_api_reference.html#module-langchain.memory) to store and load messages as well. Instances of this class are responsible for storing and loading chat messages from persistent storage. LangChain integrates with many providers - you can see a [list of integrations here](/docs/integrations/memory) - but for this demo we will use an ephemeral demo class.\n",
+    "It's perfectly fine to store and pass messages directly as an array, but we can use LangChain's built-in [message history class](https://python.langchain.com/v0.2/api_reference/langchain/index.html#module-langchain.memory) to store and load messages as well. Instances of this class are responsible for storing and loading chat messages from persistent storage. LangChain integrates with many providers - you can see a [list of integrations here](/docs/integrations/memory) - but for this demo we will use an ephemeral demo class.\n",
    "\n",
    "Here's an example of the API:"
   ]
--- a/docs/docs/how_to/code_splitter.ipynb
+++ b/docs/docs/how_to/code_splitter.ipynb
@@ -7,7 +7,7 @@
   "source": [
    "# How to split code\n",
    "\n",
-    "[RecursiveCharacterTextSplitter](https://api.python.langchain.com/en/latest/character/langchain_text_splitters.character.RecursiveCharacterTextSplitter.html) includes pre-built lists of separators that are useful for splitting text in a specific programming language.\n",
+    "[RecursiveCharacterTextSplitter](https://python.langchain.com/v0.2/api_reference/text_splitters/character/langchain_text_splitters.character.RecursiveCharacterTextSplitter.html) includes pre-built lists of separators that are useful for splitting text in a specific programming language.\n",
    "\n",
    "Supported languages are stored in the `langchain_text_splitters.Language` enum. They include:\n",
    "\n",
--- a/docs/docs/how_to/configure.ipynb
+++ b/docs/docs/how_to/configure.ipynb
@@ -99,7 +99,7 @@
   "id": "b0f74589",
   "metadata": {},
   "source": [
-    "Above, we defined `temperature` as a [`ConfigurableField`](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.utils.ConfigurableField.html#langchain_core.runnables.utils.ConfigurableField) that we can set at runtime. To do so, we use the [`with_config`](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.with_config) method like this:"
+    "Above, we defined `temperature` as a [`ConfigurableField`](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.utils.ConfigurableField.html#langchain_core.runnables.utils.ConfigurableField) that we can set at runtime. To do so, we use the [`with_config`](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.with_config) method like this:"
   ]
  },
  {
--- a/docs/docs/how_to/contextual_compression.ipynb
+++ b/docs/docs/how_to/contextual_compression.ipynb
@@ -227,7 +227,7 @@
   "source": [
    "### `LLMListwiseRerank`\n",
    "\n",
-    "[LLMListwiseRerank](https://api.python.langchain.com/en/latest/retrievers/langchain.retrievers.document_compressors.listwise_rerank.LLMListwiseRerank.html) uses [zero-shot listwise document reranking](https://arxiv.org/pdf/2305.02156) and functions similarly to `LLMChainFilter` as a robust but more expensive option. It is recommended to use a more powerful LLM.\n",
+    "[LLMListwiseRerank](https://python.langchain.com/v0.2/api_reference/langchain/retrievers/langchain.retrievers.document_compressors.listwise_rerank.LLMListwiseRerank.html) uses [zero-shot listwise document reranking](https://arxiv.org/pdf/2305.02156) and functions similarly to `LLMChainFilter` as a robust but more expensive option. It is recommended to use a more powerful LLM.\n",
    "\n",
    "Note that `LLMListwiseRerank` requires a model with the [with_structured_output](/docs/integrations/chat/) method implemented."
   ]
--- a/docs/docs/how_to/convert_runnable_to_tool.ipynb
+++ b/docs/docs/how_to/convert_runnable_to_tool.ipynb
@@ -42,13 +42,13 @@
   "source": [
    "LangChain [tools](/docs/concepts#tools) are interfaces that an agent, chain, or chat model can use to interact with the world. See [here](/docs/how_to/#tools) for how-to guides covering tool-calling, built-in tools, custom tools, and more information.\n",
    "\n",
-    "LangChain tools-- instances of [BaseTool](https://api.python.langchain.com/en/latest/tools/langchain_core.tools.BaseTool.html)-- are [Runnables](/docs/concepts/#runnable-interface) with additional constraints that enable them to be invoked effectively by language models:\n",
+    "LangChain tools-- instances of [BaseTool](https://python.langchain.com/v0.2/api_reference/core/tools/langchain_core.tools.BaseTool.html)-- are [Runnables](/docs/concepts/#runnable-interface) with additional constraints that enable them to be invoked effectively by language models:\n",
    "\n",
    "- Their inputs are constrained to be serializable, specifically strings and Python `dict` objects;\n",
    "- They contain names and descriptions indicating how and when they should be used;\n",
    "- They may contain a detailed [args_schema](https://python.langchain.com/v0.2/docs/how_to/custom_tools/) for their arguments. That is, while a tool (as a `Runnable`) might accept a single `dict` input, the specific keys and type information needed to populate a dict should be specified in the `args_schema`.\n",
    "\n",
-    "Runnables that accept string or `dict` input can be converted to tools using the [as_tool](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.as_tool) method, which allows for the specification of names, descriptions, and additional schema information for arguments."
+    "Runnables that accept string or `dict` input can be converted to tools using the [as_tool](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.as_tool) method, which allows for the specification of names, descriptions, and additional schema information for arguments."
   ]
  },
  {
@@ -180,7 +180,7 @@
   "id": "32b1a992-8997-4c98-8eb2-c9fe9431b799",
   "metadata": {},
   "source": [
-    "Alternatively, the schema can be fully specified by directly passing the desired [args_schema](https://api.python.langchain.com/en/latest/tools/langchain_core.tools.BaseTool.html#langchain_core.tools.BaseTool.args_schema) for the tool:"
+    "Alternatively, the schema can be fully specified by directly passing the desired [args_schema](https://python.langchain.com/v0.2/api_reference/core/tools/langchain_core.tools.BaseTool.html#langchain_core.tools.BaseTool.args_schema) for the tool:"
   ]
  },
  {
--- a/docs/docs/how_to/custom_callbacks.ipynb
+++ b/docs/docs/how_to/custom_callbacks.ipynb
@@ -16,7 +16,7 @@
    "\n",
    "LangChain has some built-in callback handlers, but you will often want to create your own handlers with custom logic.\n",
    "\n",
-    "To create a custom callback handler, we need to determine the [event(s)](https://api.python.langchain.com/en/latest/callbacks/langchain_core.callbacks.base.BaseCallbackHandler.html#langchain-core-callbacks-base-basecallbackhandler) we want our callback handler to handle as well as what we want our callback handler to do when the event is triggered. Then all we need to do is attach the callback handler to the object, for example via [the constructor](/docs/how_to/callbacks_constructor) or [at runtime](/docs/how_to/callbacks_runtime).\n",
+    "To create a custom callback handler, we need to determine the [event(s)](https://python.langchain.com/v0.2/api_reference/core/callbacks/langchain_core.callbacks.base.BaseCallbackHandler.html#langchain-core-callbacks-base-basecallbackhandler) we want our callback handler to handle as well as what we want our callback handler to do when the event is triggered. Then all we need to do is attach the callback handler to the object, for example via [the constructor](/docs/how_to/callbacks_constructor) or [at runtime](/docs/how_to/callbacks_runtime).\n",
    "\n",
    "In the example below, we'll implement streaming with a custom handler.\n",
    "\n",
@@ -107,7 +107,7 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "You can see [this reference page](https://api.python.langchain.com/en/latest/callbacks/langchain_core.callbacks.base.BaseCallbackHandler.html#langchain-core-callbacks-base-basecallbackhandler) for a list of events you can handle. Note that the `handle_chain_*` events run for most LCEL runnables.\n",
+    "You can see [this reference page](https://python.langchain.com/v0.2/api_reference/core/callbacks/langchain_core.callbacks.base.BaseCallbackHandler.html#langchain-core-callbacks-base-basecallbackhandler) for a list of events you can handle. Note that the `handle_chain_*` events run for most LCEL runnables.\n",
    "\n",
    "## Next steps\n",
    "\n",
--- a/docs/docs/how_to/custom_chat_model.ipynb
+++ b/docs/docs/how_to/custom_chat_model.ipynb
@@ -16,7 +16,7 @@
    "\n",
    "In this guide, we'll learn how to create a custom chat model using LangChain abstractions.\n",
    "\n",
-    "Wrapping your LLM with the standard [`BaseChatModel`](https://api.python.langchain.com/en/latest/language_models/langchain_core.language_models.chat_models.BaseChatModel.html) interface allow you to use your LLM in existing LangChain programs with minimal code modifications!\n",
+    "Wrapping your LLM with the standard [`BaseChatModel`](https://python.langchain.com/v0.2/api_reference/core/language_models/langchain_core.language_models.chat_models.BaseChatModel.html) interface allow you to use your LLM in existing LangChain programs with minimal code modifications!\n",
    "\n",
    "As an bonus, your LLM will automatically become a LangChain `Runnable` and will benefit from some optimizations out of the box (e.g., batch via a threadpool), async support, the `astream_events` API, etc.\n",
    "\n",
@@ -503,7 +503,7 @@
    "\n",
    "Documentation:\n",
    "\n",
-    "* The model contains doc-strings for all initialization arguments, as these will be surfaced in the [APIReference](https://api.python.langchain.com/en/stable/langchain_api_reference.html).\n",
+    "* The model contains doc-strings for all initialization arguments, as these will be surfaced in the [APIReference](https://python.langchain.com/v0.2/api_reference/langchain/index.html).\n",
    "* The class doc-string for the model contains a link to the model API if the model is powered by a service.\n",
    "\n",
    "Tests:\n",
--- a/docs/docs/how_to/custom_llm.ipynb
+++ b/docs/docs/how_to/custom_llm.ipynb
@@ -402,7 +402,7 @@
    "\n",
    "Documentation:\n",
    "\n",
-    "* The model contains doc-strings for all initialization arguments, as these will be surfaced in the [APIReference](https://api.python.langchain.com/en/stable/langchain_api_reference.html).\n",
+    "* The model contains doc-strings for all initialization arguments, as these will be surfaced in the [APIReference](https://python.langchain.com/v0.2/api_reference/langchain/index.html).\n",
    "* The class doc-string for the model contains a link to the model API if the model is powered by a service.\n",
    "\n",
    "Tests:\n",
--- a/docs/docs/how_to/custom_retriever.ipynb
+++ b/docs/docs/how_to/custom_retriever.ipynb
@@ -270,7 +270,7 @@
    "\n",
    "Documentation:\n",
    "\n",
-    "* The retriever contains doc-strings for all initialization arguments, as these will be surfaced in the [API Reference](https://api.python.langchain.com/en/stable/langchain_api_reference.html).\n",
+    "* The retriever contains doc-strings for all initialization arguments, as these will be surfaced in the [API Reference](https://python.langchain.com/v0.2/api_reference/langchain/index.html).\n",
    "* The class doc-string for the model contains a link to any relevant APIs used for the retriever (e.g., if the retriever is retrieving from wikipedia, it'll be good to link to the wikipedia API!)\n",
    "\n",
    "Tests:\n",
--- a/docs/docs/how_to/custom_tools.ipynb
+++ b/docs/docs/how_to/custom_tools.ipynb
@@ -9,20 +9,20 @@
    "\n",
    "When constructing an agent, you will need to provide it with a list of `Tool`s that it can use. Besides the actual function that is called, the Tool consists of several components:\n",
    "\n",
-    "| Attribute       | Type                      | Description                                                                                                      |\n",
-    "|-----------------|---------------------------|------------------------------------------------------------------------------------------------------------------|\n",
-    "| name          | str                     | Must be unique within a set of tools provided to an LLM or agent.                                           |\n",
-    "| description   | str                     | Describes what the tool does. Used as context by the LLM or agent.                                       |\n",
-    "| args_schema   | Pydantic BaseModel      | Optional but recommended, can be used to provide more information (e.g., few-shot examples) or validation for expected parameters |\n",
-    "| return_direct   | boolean      | Only relevant for agents. When True, after invoking the given tool, the agent will stop and return the result direcly to the user.  |\n",
+    "| Attribute     | Type                            | Description                                                                                                                                                                    |\n",
+    "|---------------|---------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|\n",
+    "| name          | str                             | Must be unique within a set of tools provided to an LLM or agent.                                                                                                              |\n",
+    "| description   | str                             | Describes what the tool does. Used as context by the LLM or agent.                                                                                                             |\n",
+    "| args_schema   | langchain.pydantic_v1.BaseModel | Optional but recommended, and required if using callback handlers. It can be used to provide more information (e.g., few-shot examples) or validation for expected parameters. |\n",
+    "| return_direct | boolean                         | Only relevant for agents. When True, after invoking the given tool, the agent will stop and return the result direcly to the user.                                             |\n",
    "\n",
    "LangChain supports the creation of tools from:\n",
    "\n",
    "1. Functions;\n",
    "2. LangChain [Runnables](/docs/concepts#runnable-interface);\n",
-    "3. By sub-classing from [BaseTool](https://api.python.langchain.com/en/latest/tools/langchain_core.tools.BaseTool.html) -- This is the most flexible method, it provides the largest degree of control, at the expense of more effort and code.\n",
+    "3. By sub-classing from [BaseTool](https://python.langchain.com/v0.2/api_reference/core/tools/langchain_core.tools.BaseTool.html) -- This is the most flexible method, it provides the largest degree of control, at the expense of more effort and code.\n",
    "\n",
-    "Creating tools from functions may be sufficient for most use cases, and can be done via a simple [@tool decorator](https://api.python.langchain.com/en/latest/tools/langchain_core.tools.tool.html#langchain_core.tools.tool). If more configuration is needed-- e.g., specification of both sync and async implementations-- one can also use the [StructuredTool.from_function](https://api.python.langchain.com/en/latest/tools/langchain_core.tools.StructuredTool.html#langchain_core.tools.StructuredTool.from_function) class method.\n",
+    "Creating tools from functions may be sufficient for most use cases, and can be done via a simple [@tool decorator](https://python.langchain.com/v0.2/api_reference/core/tools/langchain_core.tools.tool.html#langchain_core.tools.tool). If more configuration is needed-- e.g., specification of both sync and async implementations-- one can also use the [StructuredTool.from_function](https://python.langchain.com/v0.2/api_reference/core/tools/langchain_core.tools.StructuredTool.html#langchain_core.tools.StructuredTool.from_function) class method.\n",
    "\n",
    "In this guide we provide an overview of these methods.\n",
    "\n",
@@ -259,7 +259,7 @@
   "metadata": {},
   "source": [
    ":::{.callout-caution}\n",
-    "By default, `@tool(parse_docstring=True)` will raise `ValueError` if the docstring does not parse correctly. See [API Reference](https://api.python.langchain.com/en/latest/tools/langchain_core.tools.tool.html) for detail and examples.\n",
+    "By default, `@tool(parse_docstring=True)` will raise `ValueError` if the docstring does not parse correctly. See [API Reference](https://python.langchain.com/v0.2/api_reference/core/tools/langchain_core.tools.tool.html) for detail and examples.\n",
    ":::"
   ]
  },
@@ -366,7 +366,7 @@
   "source": [
    "## Creating tools from Runnables\n",
    "\n",
-    "LangChain [Runnables](/docs/concepts#runnable-interface) that accept string or `dict` input can be converted to tools using the [as_tool](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.as_tool) method, which allows for the specification of names, descriptions, and additional schema information for arguments.\n",
+    "LangChain [Runnables](/docs/concepts#runnable-interface) that accept string or `dict` input can be converted to tools using the [as_tool](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.as_tool) method, which allows for the specification of names, descriptions, and additional schema information for arguments.\n",
    "\n",
    "Example usage:"
   ]
@@ -512,7 +512,7 @@
   "source": [
    "## How to create async tools\n",
    "\n",
-    "LangChain Tools implement the [Runnable interface 🏃](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.Runnable.html).\n",
+    "LangChain Tools implement the [Runnable interface 🏃](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html).\n",
    "\n",
    "All Runnables expose the `invoke` and `ainvoke` methods (as well as other methods like `batch`, `abatch`, `astream` etc).\n",
    "\n",
@@ -778,7 +778,7 @@
    "\n",
    "Sometimes there are artifacts of a tool's execution that we want to make accessible to downstream components in our chain or agent, but that we don't want to expose to the model itself. For example if a tool returns custom objects like Documents, we may want to pass some view or metadata about this output to the model without passing the raw output to the model. At the same time, we may want to be able to access this full output elsewhere, for example in downstream tools.\n",
    "\n",
-    "The Tool and [ToolMessage](https://api.python.langchain.com/en/latest/messages/langchain_core.messages.tool.ToolMessage.html) interfaces make it possible to distinguish between the parts of the tool output meant for the model (this is the ToolMessage.content) and those parts which are meant for use outside the model (ToolMessage.artifact).\n",
+    "The Tool and [ToolMessage](https://python.langchain.com/v0.2/api_reference/core/messages/langchain_core.messages.tool.ToolMessage.html) interfaces make it possible to distinguish between the parts of the tool output meant for the model (this is the ToolMessage.content) and those parts which are meant for use outside the model (ToolMessage.artifact).\n",
    "\n",
    ":::info Requires ``langchain-core >= 0.2.19``\n",
    "\n",
--- a/docs/docs/how_to/document_loader_csv.ipynb
+++ b/docs/docs/how_to/document_loader_csv.ipynb
@@ -9,7 +9,7 @@
    "\n",
    "A [comma-separated values (CSV)](https://en.wikipedia.org/wiki/Comma-separated_values) file is a delimited text file that uses a comma to separate values. Each line of the file is a data record. Each record consists of one or more fields, separated by commas.\n",
    "\n",
-    "LangChain implements a [CSV Loader](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.csv_loader.CSVLoader.html) that will load CSV files into a sequence of [Document](https://api.python.langchain.com/en/latest/documents/langchain_core.documents.base.Document.html#langchain_core.documents.base.Document) objects. Each row of the CSV file is translated to one document."
+    "LangChain implements a [CSV Loader](https://python.langchain.com/v0.2/api_reference/community/document_loaders/langchain_community.document_loaders.csv_loader.CSVLoader.html) that will load CSV files into a sequence of [Document](https://python.langchain.com/v0.2/api_reference/core/documents/langchain_core.documents.base.Document.html#langchain_core.documents.base.Document) objects. Each row of the CSV file is translated to one document."
   ]
  },
  {
@@ -88,7 +88,7 @@
   "source": [
    "## Specify a column to identify the document source\n",
    "\n",
-    "The `\"source\"` key on [Document](https://api.python.langchain.com/en/latest/documents/langchain_core.documents.base.Document.html#langchain_core.documents.base.Document) metadata can be set using a column of the CSV. Use the `source_column` argument to specify a source for the document created from each row. Otherwise `file_path` will be used as the source for all documents created from the CSV file.\n",
+    "The `\"source\"` key on [Document](https://python.langchain.com/v0.2/api_reference/core/documents/langchain_core.documents.base.Document.html#langchain_core.documents.base.Document) metadata can be set using a column of the CSV. Use the `source_column` argument to specify a source for the document created from each row. Otherwise `file_path` will be used as the source for all documents created from the CSV file.\n",
    "\n",
    "This is useful when using documents loaded from CSV files for chains that answer questions using sources."
   ]
--- a/docs/docs/how_to/document_loader_directory.ipynb
+++ b/docs/docs/how_to/document_loader_directory.ipynb
@@ -7,7 +7,7 @@
   "source": [
    "# How to load documents from a directory\n",
    "\n",
-    "LangChain's [DirectoryLoader](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.directory.DirectoryLoader.html) implements functionality for reading files from disk into LangChain [Document](https://api.python.langchain.com/en/latest/documents/langchain_core.documents.base.Document.html#langchain_core.documents.base.Document) objects. Here we demonstrate:\n",
+    "LangChain's [DirectoryLoader](https://python.langchain.com/v0.2/api_reference/community/document_loaders/langchain_community.document_loaders.directory.DirectoryLoader.html) implements functionality for reading files from disk into LangChain [Document](https://python.langchain.com/v0.2/api_reference/core/documents/langchain_core.documents.base.Document.html#langchain_core.documents.base.Document) objects. Here we demonstrate:\n",
    "\n",
    "- How to load from a filesystem, including use of wildcard patterns;\n",
    "- How to use multithreading for file I/O;\n",
@@ -134,7 +134,7 @@
   "metadata": {},
   "source": [
    "## Change loader class\n",
-    "By default this uses the `UnstructuredLoader` class. To customize the loader, specify the loader class in the `loader_cls` kwarg. Below we show an example using [TextLoader](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.text.TextLoader.html):"
+    "By default this uses the `UnstructuredLoader` class. To customize the loader, specify the loader class in the `loader_cls` kwarg. Below we show an example using [TextLoader](https://python.langchain.com/v0.2/api_reference/community/document_loaders/langchain_community.document_loaders.text.TextLoader.html):"
   ]
  },
  {
--- a/docs/docs/how_to/document_loader_html.ipynb
+++ b/docs/docs/how_to/document_loader_html.ipynb
@@ -9,7 +9,7 @@
    "\n",
    "The HyperText Markup Language or [HTML](https://en.wikipedia.org/wiki/HTML) is the standard markup language for documents designed to be displayed in a web browser.\n",
    "\n",
-    "This covers how to load `HTML` documents into a LangChain [Document](https://api.python.langchain.com/en/latest/documents/langchain_core.documents.base.Document.html#langchain_core.documents.base.Document) objects that we can use downstream.\n",
+    "This covers how to load `HTML` documents into a LangChain [Document](https://python.langchain.com/v0.2/api_reference/core/documents/langchain_core.documents.base.Document.html#langchain_core.documents.base.Document) objects that we can use downstream.\n",
    "\n",
    "Parsing HTML files often requires specialized tools. Here we demonstrate parsing via [Unstructured](https://unstructured-io.github.io/unstructured/) and [BeautifulSoup4](https://beautiful-soup-4.readthedocs.io/en/latest/), which can be installed via pip. Head over to the integrations page to find integrations with additional services, such as [Azure AI Document Intelligence](/docs/integrations/document_loaders/azure_document_intelligence) or [FireCrawl](/docs/integrations/document_loaders/firecrawl).\n",
    "\n",
--- a/docs/docs/how_to/document_loader_json.mdx
+++ b/docs/docs/how_to/document_loader_json.mdx
@@ -4,8 +4,8 @@

 [JSON Lines](https://jsonlines.org/) is a file format where each line is a valid JSON value.

-LangChain implements a [JSONLoader](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.json_loader.JSONLoader.html) 
-to convert JSON and JSONL data into LangChain [Document](https://api.python.langchain.com/en/latest/documents/langchain_core.documents.base.Document.html#langchain_core.documents.base.Document) 
+LangChain implements a [JSONLoader](https://python.langchain.com/v0.2/api_reference/community/document_loaders/langchain_community.document_loaders.json_loader.JSONLoader.html) 
+to convert JSON and JSONL data into LangChain [Document](https://python.langchain.com/v0.2/api_reference/core/documents/langchain_core.documents.base.Document.html#langchain_core.documents.base.Document) 
 objects. It uses a specified [jq schema](https://en.wikipedia.org/wiki/Jq_(programming_language)) to parse the JSON files, allowing for the extraction of specific fields into the content 
 and metadata of the LangChain Document.

@@ -182,7 +182,7 @@ pprint(data)
 </CodeOutputBlock>


-Another option is set `jq_schema='.'` and provide `content_key`:
+Another option is to set `jq_schema='.'` and provide `content_key`:

 ```python
 loader = JSONLoader(
--- a/docs/docs/how_to/document_loader_markdown.ipynb
+++ b/docs/docs/how_to/document_loader_markdown.ipynb
@@ -9,14 +9,14 @@
    "\n",
    "[Markdown](https://en.wikipedia.org/wiki/Markdown) is a lightweight markup language for creating formatted text using a plain-text editor.\n",
    "\n",
-    "Here we cover how to load `Markdown` documents into LangChain [Document](https://api.python.langchain.com/en/latest/documents/langchain_core.documents.base.Document.html#langchain_core.documents.base.Document) objects that we can use downstream.\n",
+    "Here we cover how to load `Markdown` documents into LangChain [Document](https://python.langchain.com/v0.2/api_reference/core/documents/langchain_core.documents.base.Document.html#langchain_core.documents.base.Document) objects that we can use downstream.\n",
    "\n",
    "We will cover:\n",
    "\n",
    "- Basic usage;\n",
    "- Parsing of Markdown into elements such as titles, list items, and text.\n",
    "\n",
-    "LangChain implements an [UnstructuredMarkdownLoader](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.markdown.UnstructuredMarkdownLoader.html) object which requires the [Unstructured](https://unstructured-io.github.io/unstructured/) package. First we install it:"
+    "LangChain implements an [UnstructuredMarkdownLoader](https://python.langchain.com/v0.2/api_reference/community/document_loaders/langchain_community.document_loaders.markdown.UnstructuredMarkdownLoader.html) object which requires the [Unstructured](https://unstructured-io.github.io/unstructured/) package. First we install it:"
   ]
  },
  {
@@ -26,7 +26,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "%pip install \"unstructured[md]\""
+    "%pip install \"unstructured[md]\" nltk"
   ]
  },
  {
--- a/docs/docs/how_to/document_loader_office_file.mdx
+++ b/docs/docs/how_to/document_loader_office_file.mdx
@@ -3,7 +3,7 @@
 The [Microsoft Office](https://www.office.com/) suite of productivity software includes Microsoft Word, Microsoft Excel, Microsoft PowerPoint, Microsoft Outlook, and Microsoft OneNote. It is available for Microsoft Windows and macOS operating systems. It is also available on Android and iOS.

 This covers how to load commonly used file formats including `DOCX`, `XLSX` and `PPTX` documents into a LangChain 
-[Document](https://api.python.langchain.com/en/latest/documents/langchain_core.documents.base.Document.html#langchain_core.documents.base.Document)
+[Document](https://python.langchain.com/v0.2/api_reference/core/documents/langchain_core.documents.base.Document.html#langchain_core.documents.base.Document)
 object that we can use downstream.


--- a/docs/docs/how_to/document_loader_pdf.ipynb
+++ b/docs/docs/how_to/document_loader_pdf.ipynb
--- a/docs/docs/how_to/embed_text.mdx
+++ b/docs/docs/how_to/embed_text.mdx
@@ -8,7 +8,7 @@ The Embeddings class is a class designed for interfacing with text embedding mod

 Embeddings create a vector representation of a piece of text. This is useful because it means we can think about text in the vector space, and do things like semantic search where we look for pieces of text that are most similar in the vector space.

-The base Embeddings class in LangChain provides two methods: one for embedding documents and one for embedding a query. The former, `.embed_documents`, takes as input multiple texts, while the latter, `.embed_query`, takes a single text. The reason for having these as two separate methods is that some embedding providers have different embedding methods for documents (to be searched over) vs queries (the search query itself). 
+The base Embeddings class in LangChain provides two methods: one for embedding documents and one for embedding a query. The former, `.embed_documents`, takes as input multiple texts, while the latter, `.embed_query`, takes a single text. The reason for having these as two separate methods is that some embedding providers have different embedding methods for documents (to be searched over) vs queries (the search query itself).
 `.embed_query` will return a list of floats, whereas `.embed_documents` returns a list of lists of floats.

 ## Get started
@@ -94,15 +94,6 @@ from langchain_huggingface import HuggingFaceEmbeddings

 embeddings_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
 ```
-
-You can also leave the `model_name` blank to use the default [sentence-transformers/all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2) model.
-
-```python
-from langchain_huggingface import HuggingFaceEmbeddings
-
-embeddings_model = HuggingFaceEmbeddings()
-```
-
  </TabItem>
 </Tabs>

--- a/docs/docs/how_to/ensemble_retriever.ipynb
+++ b/docs/docs/how_to/ensemble_retriever.ipynb
@@ -6,7 +6,7 @@
   "source": [
    "# How to combine results from multiple retrievers\n",
    "\n",
-    "The [EnsembleRetriever](https://api.python.langchain.com/en/latest/retrievers/langchain.retrievers.ensemble.EnsembleRetriever.html) supports ensembling of results from multiple retrievers. It is initialized with a list of [BaseRetriever](https://api.python.langchain.com/en/latest/retrievers/langchain_core.retrievers.BaseRetriever.html) objects. EnsembleRetrievers rerank the results of the constituent retrievers based on the [Reciprocal Rank Fusion](https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf) algorithm.\n",
+    "The [EnsembleRetriever](https://python.langchain.com/v0.2/api_reference/langchain/retrievers/langchain.retrievers.ensemble.EnsembleRetriever.html) supports ensembling of results from multiple retrievers. It is initialized with a list of [BaseRetriever](https://python.langchain.com/v0.2/api_reference/core/retrievers/langchain_core.retrievers.BaseRetriever.html) objects. EnsembleRetrievers rerank the results of the constituent retrievers based on the [Reciprocal Rank Fusion](https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf) algorithm.\n",
    "\n",
    "By leveraging the strengths of different algorithms, the `EnsembleRetriever` can achieve better performance than any single algorithm. \n",
    "\n",
@@ -14,7 +14,7 @@
    "\n",
    "## Basic usage\n",
    "\n",
-    "Below we demonstrate ensembling of a [BM25Retriever](https://api.python.langchain.com/en/latest/retrievers/langchain_community.retrievers.bm25.BM25Retriever.html) with a retriever derived from the [FAISS vector store](https://api.python.langchain.com/en/latest/vectorstores/langchain_community.vectorstores.faiss.FAISS.html)."
+    "Below we demonstrate ensembling of a [BM25Retriever](https://python.langchain.com/v0.2/api_reference/community/retrievers/langchain_community.retrievers.bm25.BM25Retriever.html) with a retriever derived from the [FAISS vector store](https://python.langchain.com/v0.2/api_reference/community/vectorstores/langchain_community.vectorstores.faiss.FAISS.html)."
   ]
  },
  {
--- a/docs/docs/how_to/example_selectors_langsmith.ipynb
+++ b/docs/docs/how_to/example_selectors_langsmith.ipynb
@@ -0,0 +1,353 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "4f7e423b",
+   "metadata": {},
+   "source": [
+    "# How to select examples from a LangSmith dataset\n",
+    "\n",
+    "import Prerequisites from \"@theme/Prerequisites\";\n",
+    "import Compatibility from \"@theme/Compatibility\";\n",
+    "\n",
+    "<Prerequisites titlesAndLinks={[\n",
+    "  [\"Chat models\", \"/docs/concepts/#chat-models\"],\n",
+    "  [\"Few-shot-prompting\", \"/docs/concepts/#few-shot-prompting\"],\n",
+    "  [\"LangSmith\", \"/docs/concepts/#langsmith\"],\n",
+    "]} />\n",
+    "\n",
+    "\n",
+    "<Compatibility packagesAndVersions={[\n",
+    "  [\"langsmith\", \"0.1.101\"],\n",
+    "  [\"langchain-core\", \"0.2.34\"],\n",
+    "]} />\n",
+    "\n",
+    "\n",
+    "LangSmith datasets have built-in support for similarity search, making them a great tool for building and querying few-shot examples.\n",
+    "\n",
+    "In this guide we'll see how to use an indexed LangSmith dataset as a few-shot example selector.\n",
+    "\n",
+    "## Setup\n",
+    "\n",
+    "Before getting started make sure you've [created a LangSmith account](https://smith.langchain.com/) and set your credentials:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "85445e0e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Set LangSmith API key:\n",
+      "\n",
+      "········\n"
+     ]
+    }
+   ],
+   "source": [
+    "import getpass\n",
+    "import os\n",
+    "\n",
+    "if not os.environ.get(\"LANGSMITH_API_KEY\"):\n",
+    "    os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Set LangSmith API key:\\n\\n\")\n",
+    "\n",
+    "os.environ[\"LANGSMITH_TRACING\"] = \"true\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ca899e29",
+   "metadata": {},
+   "source": [
+    "We'll need to install the `langsmith` SDK. In this example we'll also make use of `langchain`, `langchain-openai`, and `langchain-benchmarks`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b4fa7810",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install -qU \"langsmith>=0.1.101\" \"langchain-core>=0.2.34\" langchain langchain-openai langchain-benchmarks"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fc716e12",
+   "metadata": {},
+   "source": [
+    "Now we'll clone a public dataset and turn on indexing for the dataset. We can also turn on indexing via the [LangSmith UI](https://docs.smith.langchain.com/how_to_guides/datasets/index_datasets_for_dynamic_few_shot_example_selection).\n",
+    "\n",
+    "We'll clone the [Multiverse math few shot example dataset](https://blog.langchain.dev/few-shot-prompting-to-improve-tool-calling-performance/).\n",
+    "\n",
+    "This enables searching over the dataset and will make sure that anytime we update/add examples they are also indexed."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cf53d280",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langsmith import Client as LangSmith\n",
+    "\n",
+    "ls_client = LangSmith()\n",
+    "\n",
+    "dataset_name = \"multiverse-math-few-shot-examples-v2\"\n",
+    "dataset_public_url = (\n",
+    "    \"https://smith.langchain.com/public/620596ee-570b-4d2b-8c8f-f828adbe5242/d\"\n",
+    ")\n",
+    "\n",
+    "ls_client.clone_public_dataset(dataset_public_url)\n",
+    "\n",
+    "dataset_id = ls_client.read_dataset(dataset_name=dataset_name).id\n",
+    "\n",
+    "ls_client.index_dataset(dataset_id=dataset_id)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5767d171",
+   "metadata": {},
+   "source": [
+    "## Querying dataset\n",
+    "\n",
+    "Indexing can take a few seconds. Once the dataset is indexed, we can search for similar examples. Note that the input to the `similar_examples` method must have the same schema as the examples inputs. In this case our example inputs are a dictionary with a \"question\" key:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "5013a56f",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "3"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "examples = ls_client.similar_examples(\n",
+    "    {\"question\": \"whats the negation of the negation of the negation of 3\"},\n",
+    "    limit=3,\n",
+    "    dataset_id=dataset_id,\n",
+    ")\n",
+    "len(examples)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "a142db06",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'evaluate the negation of -100'"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "examples[0].inputs[\"question\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d2627125",
+   "metadata": {},
+   "source": [
+    "For this dataset, the outputs are the conversation that followed the question in OpenAI message format:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "af5b9191",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[{'role': 'assistant',\n",
+       "  'content': None,\n",
+       "  'tool_calls': [{'id': 'toolu_01HTpq4cYNUac6F7omUc2Wz3',\n",
+       "    'type': 'function',\n",
+       "    'function': {'name': 'negate', 'arguments': '{\"a\": -100}'}}]},\n",
+       " {'role': 'tool',\n",
+       "  'content': '-100.0',\n",
+       "  'tool_call_id': 'toolu_01HTpq4cYNUac6F7omUc2Wz3'},\n",
+       " {'role': 'assistant', 'content': 'So the answer is 100.'},\n",
+       " {'role': 'user',\n",
+       "  'content': '100 is incorrect. Please refer to the output of your tool call.'},\n",
+       " {'role': 'assistant',\n",
+       "  'content': [{'text': \"You're right, my previous answer was incorrect. Let me re-evaluate using the tool output:\",\n",
+       "    'type': 'text'}],\n",
+       "  'tool_calls': [{'id': 'toolu_01XsJQboYghGDygQpPjJkeRq',\n",
+       "    'type': 'function',\n",
+       "    'function': {'name': 'negate', 'arguments': '{\"a\": -100}'}}]},\n",
+       " {'role': 'tool',\n",
+       "  'content': '-100.0',\n",
+       "  'tool_call_id': 'toolu_01XsJQboYghGDygQpPjJkeRq'},\n",
+       " {'role': 'assistant', 'content': 'The answer is -100.0'},\n",
+       " {'role': 'user',\n",
+       "  'content': 'You have the correct numerical answer but are returning additional text. Please only respond with the numerical answer.'},\n",
+       " {'role': 'assistant', 'content': '-100.0'}]"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "examples[0].outputs[\"conversation\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e852c8ef",
+   "metadata": {},
+   "source": [
+    "## Creating dynamic few-shot prompts\n",
+    "\n",
+    "The search returns the examples whose inputs are most similar to the query input. We can use this for few-shot prompting a model like so:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "12cba1e1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.chat_models import init_chat_model\n",
+    "from langchain_benchmarks.tool_usage.tasks.multiverse_math import (\n",
+    "    add,\n",
+    "    cos,\n",
+    "    divide,\n",
+    "    log,\n",
+    "    multiply,\n",
+    "    negate,\n",
+    "    pi,\n",
+    "    power,\n",
+    "    sin,\n",
+    "    subtract,\n",
+    ")\n",
+    "from langchain_core.runnables import RunnableLambda\n",
+    "from langsmith import AsyncClient as AsyncLangSmith\n",
+    "\n",
+    "async_ls_client = AsyncLangSmith()\n",
+    "\n",
+    "\n",
+    "def similar_examples(input_: dict) -> dict:\n",
+    "    examples = ls_client.similar_examples(input_, limit=5, dataset_id=dataset_id)\n",
+    "    return {**input_, \"examples\": examples}\n",
+    "\n",
+    "\n",
+    "async def asimilar_examples(input_: dict) -> dict:\n",
+    "    examples = await async_ls_client.similar_examples(\n",
+    "        input_, limit=5, dataset_id=dataset_id\n",
+    "    )\n",
+    "    return {**input_, \"examples\": examples}\n",
+    "\n",
+    "\n",
+    "def construct_prompt(input_: dict) -> list:\n",
+    "    instructions = \"\"\"You are great at using mathematical tools.\"\"\"\n",
+    "    examples = []\n",
+    "    for ex in input_[\"examples\"]:\n",
+    "        examples.append({\"role\": \"user\", \"content\": ex.inputs[\"question\"]})\n",
+    "        for msg in ex.outputs[\"conversation\"]:\n",
+    "            if msg[\"role\"] == \"assistant\":\n",
+    "                msg[\"name\"] = \"example_assistant\"\n",
+    "            if msg[\"role\"] == \"user\":\n",
+    "                msg[\"name\"] = \"example_user\"\n",
+    "            examples.append(msg)\n",
+    "    return [\n",
+    "        {\"role\": \"system\", \"content\": instructions},\n",
+    "        *examples,\n",
+    "        {\"role\": \"user\", \"content\": input_[\"question\"]},\n",
+    "    ]\n",
+    "\n",
+    "\n",
+    "tools = [add, cos, divide, log, multiply, negate, pi, power, sin, subtract]\n",
+    "llm = init_chat_model(\"gpt-4o-2024-08-06\")\n",
+    "llm_with_tools = llm.bind_tools(tools)\n",
+    "\n",
+    "example_selector = RunnableLambda(func=similar_examples, afunc=asimilar_examples)\n",
+    "\n",
+    "chain = example_selector | construct_prompt | llm_with_tools"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "c423b367",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[{'name': 'negate',\n",
+       "  'args': {'a': 3},\n",
+       "  'id': 'call_uMSdoTl6ehfHh5a6JQUb2NoZ',\n",
+       "  'type': 'tool_call'}]"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ai_msg = await chain.ainvoke({\"question\": \"whats the negation of the negation of 3\"})\n",
+    "ai_msg.tool_calls"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "94489b4a",
+   "metadata": {},
+   "source": [
+    "Looking at the LangSmith trace, we can see that relevant examples were pulled in in the `similar_examples` step and passed as messages to ChatOpenAI: https://smith.langchain.com/public/9585e30f-765a-4ed9-b964-2211420cd2f8/r/fdea98d6-e90f-49d4-ac22-dfd012e9e0d9."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "poetry-venv-311",
+   "language": "python",
+   "name": "poetry-venv-311"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/docs/docs/how_to/extraction_examples.ipynb
+++ b/docs/docs/how_to/extraction_examples.ipynb
@@ -16,11 +16,11 @@
    "also with JSON more or prompt based techniques.\n",
    ":::\n",
    "\n",
-    "LangChain implements a [tool-call attribute](https://api.python.langchain.com/en/latest/messages/langchain_core.messages.ai.AIMessage.html#langchain_core.messages.ai.AIMessage.tool_calls) on messages from LLMs that include tool calls. See our [how-to guide on tool calling](/docs/how_to/tool_calling) for more detail. To build reference examples for data extraction, we build a chat history containing a sequence of: \n",
+    "LangChain implements a [tool-call attribute](https://python.langchain.com/v0.2/api_reference/core/messages/langchain_core.messages.ai.AIMessage.html#langchain_core.messages.ai.AIMessage.tool_calls) on messages from LLMs that include tool calls. See our [how-to guide on tool calling](/docs/how_to/tool_calling) for more detail. To build reference examples for data extraction, we build a chat history containing a sequence of: \n",
    "\n",
-    "- [HumanMessage](https://api.python.langchain.com/en/latest/messages/langchain_core.messages.human.HumanMessage.html) containing example inputs;\n",
-    "- [AIMessage](https://api.python.langchain.com/en/latest/messages/langchain_core.messages.ai.AIMessage.html) containing example tool calls;\n",
-    "- [ToolMessage](https://api.python.langchain.com/en/latest/messages/langchain_core.messages.tool.ToolMessage.html) containing example tool outputs.\n",
+    "- [HumanMessage](https://python.langchain.com/v0.2/api_reference/core/messages/langchain_core.messages.human.HumanMessage.html) containing example inputs;\n",
+    "- [AIMessage](https://python.langchain.com/v0.2/api_reference/core/messages/langchain_core.messages.ai.AIMessage.html) containing example tool calls;\n",
+    "- [ToolMessage](https://python.langchain.com/v0.2/api_reference/core/messages/langchain_core.messages.tool.ToolMessage.html) containing example tool outputs.\n",
    "\n",
    "LangChain adopts this convention for structuring tool calls into conversation across LLM model providers.\n",
    "\n",
--- a/docs/docs/how_to/extraction_long_text.ipynb
+++ b/docs/docs/how_to/extraction_long_text.ipynb
@@ -25,7 +25,7 @@
   "source": [
    "## Set up\n",
    "\n",
-    "We need some example data! Let's download an article about [cars from wikipedia](https://en.wikipedia.org/wiki/Car) and load it as a LangChain [Document](https://api.python.langchain.com/en/latest/documents/langchain_core.documents.base.Document.html)."
+    "We need some example data! Let's download an article about [cars from wikipedia](https://en.wikipedia.org/wiki/Car) and load it as a LangChain [Document](https://python.langchain.com/v0.2/api_reference/core/documents/langchain_core.documents.base.Document.html)."
   ]
  },
  {
@@ -214,7 +214,7 @@
   "id": "5b43d7e0-3c85-4d97-86c7-e8c984b60b0a",
   "metadata": {},
   "source": [
-    "Use [batch](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.Runnable.html) functionality to run the extraction in **parallel** across each chunk! \n",
+    "Use [batch](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html) functionality to run the extraction in **parallel** across each chunk! \n",
    "\n",
    ":::{.callout-tip}\n",
    "You can often use .batch() to parallelize the extractions! `.batch` uses a threadpool under the hood to help you parallelize workloads.\n",
--- a/docs/docs/how_to/extraction_parse.ipynb
+++ b/docs/docs/how_to/extraction_parse.ipynb
@@ -202,7 +202,7 @@
    "\n",
    "If desired, it's easy to create a custom prompt and parser with `LangChain` and `LCEL`.\n",
    "\n",
-    "To create a custom parser, define a function to parse the output from the model (typically an [AIMessage](https://api.python.langchain.com/en/latest/messages/langchain_core.messages.ai.AIMessage.html)) into an object of your choice.\n",
+    "To create a custom parser, define a function to parse the output from the model (typically an [AIMessage](https://python.langchain.com/v0.2/api_reference/core/messages/langchain_core.messages.ai.AIMessage.html)) into an object of your choice.\n",
    "\n",
    "See below for a simple implementation of a JSON parser."
   ]
--- a/docs/docs/how_to/few_shot_examples.ipynb
+++ b/docs/docs/how_to/few_shot_examples.ipynb
@@ -29,7 +29,7 @@
    "\n",
    "In this guide, we'll learn how to create a simple prompt template that provides the model with example inputs and outputs when generating. Providing the LLM with a few such examples is called few-shotting, and is a simple yet powerful way to guide generation and in some cases drastically improve model performance.\n",
    "\n",
-    "A few-shot prompt template can be constructed from either a set of examples, or from an [Example Selector](https://api.python.langchain.com/en/latest/example_selectors/langchain_core.example_selectors.base.BaseExampleSelector.html) class responsible for choosing a subset of examples from the defined set.\n",
+    "A few-shot prompt template can be constructed from either a set of examples, or from an [Example Selector](https://python.langchain.com/v0.2/api_reference/core/example_selectors/langchain_core.example_selectors.base.BaseExampleSelector.html) class responsible for choosing a subset of examples from the defined set.\n",
    "\n",
    "This guide will cover few-shotting with string prompt templates. For a guide on few-shotting with chat messages for chat models, see [here](/docs/how_to/few_shot_examples_chat/).\n",
    "\n",
@@ -160,7 +160,7 @@
   "source": [
    "### Pass the examples and formatter to `FewShotPromptTemplate`\n",
    "\n",
-    "Finally, create a [`FewShotPromptTemplate`](https://api.python.langchain.com/en/latest/prompts/langchain_core.prompts.few_shot.FewShotPromptTemplate.html) object. This object takes in the few-shot examples and the formatter for the few-shot examples. When this `FewShotPromptTemplate` is formatted, it formats the passed examples using the `example_prompt`, then and adds them to the final prompt before `suffix`:"
+    "Finally, create a [`FewShotPromptTemplate`](https://python.langchain.com/v0.2/api_reference/core/prompts/langchain_core.prompts.few_shot.FewShotPromptTemplate.html) object. This object takes in the few-shot examples and the formatter for the few-shot examples. When this `FewShotPromptTemplate` is formatted, it formats the passed examples using the `example_prompt`, then and adds them to the final prompt before `suffix`:"
   ]
  },
  {
@@ -251,7 +251,7 @@
   "source": [
    "## Using an example selector\n",
    "\n",
-    "We will reuse the example set and the formatter from the previous section. However, instead of feeding the examples directly into the `FewShotPromptTemplate` object, we will feed them into an implementation of `ExampleSelector` called [`SemanticSimilarityExampleSelector`](https://api.python.langchain.com/en/latest/example_selectors/langchain_core.example_selectors.semantic_similarity.SemanticSimilarityExampleSelector.html) instance. This class selects few-shot examples from the initial set based on their similarity to the input. It uses an embedding model to compute the similarity between the input and the few-shot examples, as well as a vector store to perform the nearest neighbor search.\n",
+    "We will reuse the example set and the formatter from the previous section. However, instead of feeding the examples directly into the `FewShotPromptTemplate` object, we will feed them into an implementation of `ExampleSelector` called [`SemanticSimilarityExampleSelector`](https://python.langchain.com/v0.2/api_reference/core/example_selectors/langchain_core.example_selectors.semantic_similarity.SemanticSimilarityExampleSelector.html) instance. This class selects few-shot examples from the initial set based on their similarity to the input. It uses an embedding model to compute the similarity between the input and the few-shot examples, as well as a vector store to perform the nearest neighbor search.\n",
    "\n",
    "To show what it looks like, let's initialize an instance and call it in isolation:"
   ]
--- a/docs/docs/how_to/few_shot_examples_chat.ipynb
+++ b/docs/docs/how_to/few_shot_examples_chat.ipynb
@@ -29,7 +29,7 @@
    "\n",
    "This guide covers how to prompt a chat model with example inputs and outputs. Providing the model with a few such examples is called few-shotting, and is a simple yet powerful way to guide generation and in some cases drastically improve model performance.\n",
    "\n",
-    "There does not appear to be solid consensus on how best to do few-shot prompting, and the optimal prompt compilation will likely vary by model. Because of this, we provide few-shot prompt templates like the [FewShotChatMessagePromptTemplate](https://api.python.langchain.com/en/latest/prompts/langchain_core.prompts.few_shot.FewShotChatMessagePromptTemplate.html?highlight=fewshot#langchain_core.prompts.few_shot.FewShotChatMessagePromptTemplate) as a flexible starting point, and you can modify or replace them as you see fit.\n",
+    "There does not appear to be solid consensus on how best to do few-shot prompting, and the optimal prompt compilation will likely vary by model. Because of this, we provide few-shot prompt templates like the [FewShotChatMessagePromptTemplate](https://python.langchain.com/v0.2/api_reference/core/prompts/langchain_core.prompts.few_shot.FewShotChatMessagePromptTemplate.html?highlight=fewshot#langchain_core.prompts.few_shot.FewShotChatMessagePromptTemplate) as a flexible starting point, and you can modify or replace them as you see fit.\n",
    "\n",
    "The goal of few-shot prompt templates are to dynamically select examples based on an input, and then format the examples in a final prompt to provide for the model.\n",
    "\n",
@@ -49,7 +49,7 @@
    "\n",
    "The basic components of the template are:\n",
    "- `examples`: A list of dictionary examples to include in the final prompt.\n",
-    "- `example_prompt`: converts each example into 1 or more messages through its [`format_messages`](https://api.python.langchain.com/en/latest/prompts/langchain_core.prompts.chat.ChatPromptTemplate.html?highlight=format_messages#langchain_core.prompts.chat.ChatPromptTemplate.format_messages) method. A common example would be to convert each example into one human message and one AI message response, or a human message followed by a function call message.\n",
+    "- `example_prompt`: converts each example into 1 or more messages through its [`format_messages`](https://python.langchain.com/v0.2/api_reference/core/prompts/langchain_core.prompts.chat.ChatPromptTemplate.html?highlight=format_messages#langchain_core.prompts.chat.ChatPromptTemplate.format_messages) method. A common example would be to convert each example into one human message and one AI message response, or a human message followed by a function call message.\n",
    "\n",
    "Below is a simple demonstration. First, define the examples you'd like to include. Let's give the LLM an unfamiliar mathematical operator, denoted by the \"🦜\" emoji:"
   ]
@@ -239,8 +239,8 @@
    "\n",
    "Sometimes you may want to select only a few examples from your overall set to show based on the input. For this, you can replace the `examples` passed into `FewShotChatMessagePromptTemplate` with an `example_selector`. The other components remain the same as above! Our dynamic few-shot prompt template would look like:\n",
    "\n",
-    "- `example_selector`: responsible for selecting few-shot examples (and the order in which they are returned) for a given input. These implement the [BaseExampleSelector](https://api.python.langchain.com/en/latest/example_selectors/langchain_core.example_selectors.base.BaseExampleSelector.html?highlight=baseexampleselector#langchain_core.example_selectors.base.BaseExampleSelector) interface. A common example is the vectorstore-backed [SemanticSimilarityExampleSelector](https://api.python.langchain.com/en/latest/example_selectors/langchain_core.example_selectors.semantic_similarity.SemanticSimilarityExampleSelector.html?highlight=semanticsimilarityexampleselector#langchain_core.example_selectors.semantic_similarity.SemanticSimilarityExampleSelector)\n",
-    "- `example_prompt`: convert each example into 1 or more messages through its [`format_messages`](https://api.python.langchain.com/en/latest/prompts/langchain_core.prompts.chat.ChatPromptTemplate.html?highlight=chatprompttemplate#langchain_core.prompts.chat.ChatPromptTemplate.format_messages) method. A common example would be to convert each example into one human message and one AI message response, or a human message followed by a function call message.\n",
+    "- `example_selector`: responsible for selecting few-shot examples (and the order in which they are returned) for a given input. These implement the [BaseExampleSelector](https://python.langchain.com/v0.2/api_reference/core/example_selectors/langchain_core.example_selectors.base.BaseExampleSelector.html?highlight=baseexampleselector#langchain_core.example_selectors.base.BaseExampleSelector) interface. A common example is the vectorstore-backed [SemanticSimilarityExampleSelector](https://python.langchain.com/v0.2/api_reference/core/example_selectors/langchain_core.example_selectors.semantic_similarity.SemanticSimilarityExampleSelector.html?highlight=semanticsimilarityexampleselector#langchain_core.example_selectors.semantic_similarity.SemanticSimilarityExampleSelector)\n",
+    "- `example_prompt`: convert each example into 1 or more messages through its [`format_messages`](https://python.langchain.com/v0.2/api_reference/core/prompts/langchain_core.prompts.chat.ChatPromptTemplate.html?highlight=chatprompttemplate#langchain_core.prompts.chat.ChatPromptTemplate.format_messages) method. A common example would be to convert each example into one human message and one AI message response, or a human message followed by a function call message.\n",
    "\n",
    "These once again can be composed with other messages and chat templates to assemble your final prompt.\n",
    "\n",
--- a/docs/docs/how_to/filter_messages.ipynb
+++ b/docs/docs/how_to/filter_messages.ipynb
@@ -175,7 +175,7 @@
   "source": [
    "## API reference\n",
    "\n",
-    "For a complete description of all arguments head to the API reference: https://api.python.langchain.com/en/latest/messages/langchain_core.messages.utils.filter_messages.html"
+    "For a complete description of all arguments head to the API reference: https://python.langchain.com/v0.2/api_reference/core/messages/langchain_core.messages.utils.filter_messages.html"
   ]
  }
 ],
--- a/docs/docs/how_to/function_calling.ipynb
+++ b/docs/docs/how_to/function_calling.ipynb
@@ -88,7 +88,7 @@
    "## Passing tools to LLMs\n",
    "\n",
    "Chat models supporting tool calling features implement a `.bind_tools` method, which \n",
-    "receives a list of LangChain [tool objects](https://api.python.langchain.com/en/latest/tools/langchain_core.tools.BaseTool.html#langchain_core.tools.BaseTool) \n",
+    "receives a list of LangChain [tool objects](https://python.langchain.com/v0.2/api_reference/core/tools/langchain_core.tools.BaseTool.html#langchain_core.tools.BaseTool) \n",
    "and binds them to the chat model in its expected format. Subsequent invocations of the \n",
    "chat model will include tool schemas in its calls to the LLM.\n",
    "\n",
@@ -212,9 +212,9 @@
    "## Tool calls\n",
    "\n",
    "If tool calls are included in a LLM response, they are attached to the corresponding \n",
-    "[message](https://api.python.langchain.com/en/latest/messages/langchain_core.messages.ai.AIMessage.html#langchain_core.messages.ai.AIMessage) \n",
-    "or [message chunk](https://api.python.langchain.com/en/latest/messages/langchain_core.messages.ai.AIMessageChunk.html#langchain_core.messages.ai.AIMessageChunk) \n",
-    "as a list of [tool call](https://api.python.langchain.com/en/latest/messages/langchain_core.messages.tool.ToolCall.html#langchain_core.messages.tool.ToolCall) \n",
+    "[message](https://python.langchain.com/v0.2/api_reference/core/messages/langchain_core.messages.ai.AIMessage.html#langchain_core.messages.ai.AIMessage) \n",
+    "or [message chunk](https://python.langchain.com/v0.2/api_reference/core/messages/langchain_core.messages.ai.AIMessageChunk.html#langchain_core.messages.ai.AIMessageChunk) \n",
+    "as a list of [tool call](https://python.langchain.com/v0.2/api_reference/core/messages/langchain_core.messages.tool.ToolCall.html#langchain_core.messages.tool.ToolCall) \n",
    "objects in the `.tool_calls` attribute. A `ToolCall` is a typed dict that includes a \n",
    "tool name, dict of argument values, and (optionally) an identifier. Messages with no \n",
    "tool calls default to an empty list for this attribute.\n",
@@ -258,7 +258,7 @@
    "The `.tool_calls` attribute should contain valid tool calls. Note that on occasion, \n",
    "model providers may output malformed tool calls (e.g., arguments that are not \n",
    "valid JSON). When parsing fails in these cases, instances \n",
-    "of [InvalidToolCall](https://api.python.langchain.com/en/latest/messages/langchain_core.messages.tool.InvalidToolCall.html#langchain_core.messages.tool.InvalidToolCall) \n",
+    "of [InvalidToolCall](https://python.langchain.com/v0.2/api_reference/core/messages/langchain_core.messages.tool.InvalidToolCall.html#langchain_core.messages.tool.InvalidToolCall) \n",
    "are populated in the `.invalid_tool_calls` attribute. An `InvalidToolCall` can have \n",
    "a name, string arguments, identifier, and error message.\n",
    "\n",
@@ -298,8 +298,8 @@
    "### Streaming\n",
    "\n",
    "When tools are called in a streaming context, \n",
-    "[message chunks](https://api.python.langchain.com/en/latest/messages/langchain_core.messages.ai.AIMessageChunk.html#langchain_core.messages.ai.AIMessageChunk) \n",
-    "will be populated with [tool call chunk](https://api.python.langchain.com/en/latest/messages/langchain_core.messages.tool.ToolCallChunk.html#langchain_core.messages.tool.ToolCallChunk) \n",
+    "[message chunks](https://python.langchain.com/v0.2/api_reference/core/messages/langchain_core.messages.ai.AIMessageChunk.html#langchain_core.messages.ai.AIMessageChunk) \n",
+    "will be populated with [tool call chunk](https://python.langchain.com/v0.2/api_reference/core/messages/langchain_core.messages.tool.ToolCallChunk.html#langchain_core.messages.tool.ToolCallChunk) \n",
    "objects in a list via the `.tool_call_chunks` attribute. A `ToolCallChunk` includes \n",
    "optional string fields for the tool `name`, `args`, and `id`, and includes an optional \n",
    "integer field `index` that can be used to join chunks together. Fields are optional \n",
@@ -307,7 +307,7 @@
    "that includes a substring of the arguments may have null values for the tool name and id).\n",
    "\n",
    "Because message chunks inherit from their parent message class, an \n",
-    "[AIMessageChunk](https://api.python.langchain.com/en/latest/messages/langchain_core.messages.ai.AIMessageChunk.html#langchain_core.messages.ai.AIMessageChunk) \n",
+    "[AIMessageChunk](https://python.langchain.com/v0.2/api_reference/core/messages/langchain_core.messages.ai.AIMessageChunk.html#langchain_core.messages.ai.AIMessageChunk) \n",
    "with tool call chunks will also include `.tool_calls` and `.invalid_tool_calls` fields. \n",
    "These fields are parsed best-effort from the message's tool call chunks.\n",
    "\n",
--- a/docs/docs/how_to/functions.ipynb
+++ b/docs/docs/how_to/functions.ipynb
@@ -26,7 +26,7 @@
    "\n",
    ":::\n",
    "\n",
-    "You can use arbitrary functions as [Runnables](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable). This is useful for formatting or when you need functionality not provided by other LangChain components, and custom functions used as Runnables are called [`RunnableLambdas`](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.RunnableLambda.html).\n",
+    "You can use arbitrary functions as [Runnables](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable). This is useful for formatting or when you need functionality not provided by other LangChain components, and custom functions used as Runnables are called [`RunnableLambdas`](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.RunnableLambda.html).\n",
    "\n",
    "Note that all inputs to these functions need to be a SINGLE argument. If you have a function that accepts multiple arguments, you should write a wrapper that accepts a single dict input and unpacks it into multiple arguments.\n",
    "\n",
@@ -210,7 +210,7 @@
    "\n",
    "## Passing run metadata\n",
    "\n",
-    "Runnable lambdas can optionally accept a [RunnableConfig](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.config.RunnableConfig.html#langchain_core.runnables.config.RunnableConfig) parameter, which they can use to pass callbacks, tags, and other configuration information to nested runs."
+    "Runnable lambdas can optionally accept a [RunnableConfig](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.config.RunnableConfig.html#langchain_core.runnables.config.RunnableConfig) parameter, which they can use to pass callbacks, tags, and other configuration information to nested runs."
   ]
  },
  {
@@ -303,7 +303,7 @@
    "## Streaming\n",
    "\n",
    ":::{.callout-note}\n",
-    "[RunnableLambda](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.RunnableLambda.html) is best suited for code that does not need to support streaming. If you need to support streaming (i.e., be able to operate on chunks of inputs and yield chunks of outputs), use [RunnableGenerator](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.RunnableGenerator.html) instead as in the example below.\n",
+    "[RunnableLambda](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.RunnableLambda.html) is best suited for code that does not need to support streaming. If you need to support streaming (i.e., be able to operate on chunks of inputs and yield chunks of outputs), use [RunnableGenerator](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.RunnableGenerator.html) instead as in the example below.\n",
    ":::\n",
    "\n",
    "You can use generator functions (ie. functions that use the `yield` keyword, and behave like iterators) in a chain.\n",
--- a/docs/docs/how_to/graph_prompting.ipynb
+++ b/docs/docs/how_to/graph_prompting.ipynb
@@ -347,7 +347,7 @@
    "\n",
    "If we have enough examples, we may want to only include the most relevant ones in the prompt, either because they don't fit in the model's context window or because the long tail of examples distracts the model. And specifically, given any input we want to include the examples most relevant to that input.\n",
    "\n",
-    "We can do just this using an ExampleSelector. In this case we'll use a [SemanticSimilarityExampleSelector](https://api.python.langchain.com/en/latest/example_selectors/langchain_core.example_selectors.semantic_similarity.SemanticSimilarityExampleSelector.html), which will store the examples in the vector database of our choosing. At runtime it will perform a similarity search between the input and our examples, and return the most semantically similar ones: "
+    "We can do just this using an ExampleSelector. In this case we'll use a [SemanticSimilarityExampleSelector](https://python.langchain.com/v0.2/api_reference/core/example_selectors/langchain_core.example_selectors.semantic_similarity.SemanticSimilarityExampleSelector.html), which will store the examples in the vector database of our choosing. At runtime it will perform a similarity search between the input and our examples, and return the most semantically similar ones: "
   ]
  },
  {
--- a/docs/docs/how_to/index.mdx
+++ b/docs/docs/how_to/index.mdx
@@ -9,7 +9,7 @@ Here you’ll find answers to “How do I….?” types of questions.
 These guides are *goal-oriented* and *concrete*; they're meant to help you complete a specific task.
 For conceptual explanations see the [Conceptual guide](/docs/concepts/).
 For end-to-end walkthroughs see [Tutorials](/docs/tutorials).
-For comprehensive descriptions of every class and function see the [API Reference](https://api.python.langchain.com/en/latest/).
+For comprehensive descriptions of every class and function see the [API Reference](https://python.langchain.com/v0.2/api_reference/).

 ## Installation

@@ -27,7 +27,7 @@ This highlights functionality that is core to using LangChain.

 ## LangChain Expression Language (LCEL)

-[LangChain Expression Language](/docs/concepts/#langchain-expression-language-lcel) is a way to create arbitrary custom chains. It is built on the [Runnable](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.Runnable.html) protocol.
+[LangChain Expression Language](/docs/concepts/#langchain-expression-language-lcel) is a way to create arbitrary custom chains. It is built on the [Runnable](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html) protocol.

 [**LCEL cheatsheet**](/docs/how_to/lcel_cheatsheet/): For a quick overview of how to use the main LCEL primitives.

@@ -69,6 +69,7 @@ These are the core building blocks you can use when building applications.
 - [How to: select examples by semantic similarity](/docs/how_to/example_selectors_similarity)
 - [How to: select examples by semantic ngram overlap](/docs/how_to/example_selectors_ngram)
 - [How to: select examples by maximal marginal relevance](/docs/how_to/example_selectors_mmr)
+- [How to: select examples from LangSmith few-shot datasets](/docs/how_to/example_selectors_langsmith/)

 ### Chat models

@@ -315,6 +316,15 @@ For a high-level tutorial, check out [this guide](/docs/tutorials/graph/).
 - [How to: improve results with prompting](/docs/how_to/graph_prompting)
 - [How to: construct knowledge graphs](/docs/how_to/graph_constructing)

+### Summarization
+
+LLMs can summarize and otherwise distill desired information from text, including
+large volumes of text. For a high-level tutorial, check out [this guide](/docs/tutorials/summarization).
+
+- [How to: summarize text in a single LLM call](/docs/how_to/summarize_stuff)
+- [How to: summarize text through parallelization](/docs/how_to/summarize_map_reduce)
+- [How to: summarize text through iterative refinement](/docs/how_to/summarize_refine)
+
 ## [LangGraph](https://langchain-ai.github.io/langgraph)

 LangGraph is an extension of LangChain aimed at
--- a/docs/docs/how_to/installation.mdx
+++ b/docs/docs/how_to/installation.mdx
@@ -9,7 +9,7 @@ functionality to install.

 ## Official release

-To install the main LangChain package, run:
+To install the main `langchain` package, run:

 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
@@ -26,8 +26,7 @@ import CodeBlock from "@theme/CodeBlock";

 While this package acts as a sane starting point to using LangChain,
 much of the value of LangChain comes when integrating it with various model providers, datastores, etc.
-By default, the dependencies needed to do that are NOT installed. You will need to install the dependencies for specific integrations separately.
-We'll show how to do that in the next sections of this guide.
+By default, the dependencies needed to do that are NOT installed. You will need to install the dependencies for specific integrations separately, which we show below.

 ## Ecosystem packages

@@ -41,14 +40,6 @@ When installing a package, you do not need to explicitly install that package's
 However, you may choose to if you are using a feature only available in a certain version of that dependency.
 If you do, you should make sure that the installed or pinned version is compatible with any other integration packages you use.

-### From source
-
-If you want to install from source, you can do so by cloning the repo and be sure that the directory is `PATH/TO/REPO/langchain/libs/langchain` running:
-
-```bash
-pip install -e .
-```
-
 ### LangChain core
 The `langchain-core` package contains base abstractions that the rest of the LangChain ecosystem uses, along with the LangChain Expression Language. It is automatically installed by `langchain`, but can also be used separately. Install with:

@@ -56,8 +47,18 @@ The `langchain-core` package contains base abstractions that the rest of the Lan
 pip install langchain-core
 ```

-### LangChain community
-The `langchain-community` package contains third-party integrations. Install with:
+### Integration packages
+
+Certain integrations like OpenAI and Anthropic have their own packages.
+Any integrations that require their own package will be documented as such in the [Integration docs](/docs/integrations/platforms/).
+You can see a list of all integration packages in the [API reference](https://api.python.langchain.com) under the "Partner libs" dropdown.
+To install one of these run:
+
+```bash
+pip install langchain-openai
+```
+
+Any integrations that haven't been split out into their own packages will live in the `langchain-community` package. Install with:

 ```bash
 pip install langchain-community
@@ -89,7 +90,7 @@ pip install "langserve[all]"
 ```
 for both client and server dependencies. Or `pip install "langserve[client]"` for client code, and `pip install "langserve[server]"` for server code.

-## LangChain CLI
+### LangChain CLI
 The LangChain CLI is useful for working with LangChain templates and other LangServe projects.
 Install with:

@@ -105,3 +106,13 @@ If you are not using LangChain, you can install it with:
 ```bash
 pip install langsmith
 ```
+
+### From source
+
+If you want to install a package from source, you can do so by cloning the [main LangChain repo](https://github.com/langchain-ai/langchain), enter the directory of the package you want to install `PATH/TO/REPO/langchain/libs/{package}`, and run:
+
+```bash
+pip install -e .
+```
+
+LangGraph, LangSmith SDK, and certain integration packages live outside the main LangChain repo. You can see [all repos here](https://github.com/langchain-ai).
--- a/docs/docs/how_to/lcel_cheatsheet.ipynb
+++ b/docs/docs/how_to/lcel_cheatsheet.ipynb
@@ -7,10 +7,10 @@
   "source": [
    "# LangChain Expression Language Cheatsheet\n",
    "\n",
-    "This is a quick reference for all the most important LCEL primitives. For more advanced usage see the [LCEL how-to guides](/docs/how_to/#langchain-expression-language-lcel) and the [full API reference](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.Runnable.html).\n",
+    "This is a quick reference for all the most important LCEL primitives. For more advanced usage see the [LCEL how-to guides](/docs/how_to/#langchain-expression-language-lcel) and the [full API reference](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html).\n",
    "\n",
    "### Invoke a runnable\n",
-    "#### [Runnable.invoke()](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.invoke) / [Runnable.ainvoke()](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.ainvoke)"
+    "#### [Runnable.invoke()](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.invoke) / [Runnable.ainvoke()](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.ainvoke)"
   ]
  },
  {
@@ -46,7 +46,7 @@
   "metadata": {},
   "source": [
    "### Batch a runnable\n",
-    "#### [Runnable.batch()](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.batch) / [Runnable.abatch()](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.abatch)"
+    "#### [Runnable.batch()](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.batch) / [Runnable.abatch()](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.abatch)"
   ]
  },
  {
@@ -82,7 +82,7 @@
   "metadata": {},
   "source": [
    "### Stream a runnable\n",
-    "#### [Runnable.stream()](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.stream) / [Runnable.astream()](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.astream)"
+    "#### [Runnable.stream()](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.stream) / [Runnable.astream()](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.astream)"
   ]
  },
  {
@@ -165,7 +165,7 @@
   "metadata": {},
   "source": [
    "### Invoke runnables in parallel\n",
-    "#### [RunnableParallel](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.RunnableParallel.html)"
+    "#### [RunnableParallel](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.RunnableParallel.html)"
   ]
  },
  {
@@ -202,7 +202,7 @@
   "metadata": {},
   "source": [
    "### Turn any function into a runnable\n",
-    "#### [RunnableLambda](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.RunnableLambda.html)"
+    "#### [RunnableLambda](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.RunnableLambda.html)"
   ]
  },
  {
@@ -240,7 +240,7 @@
   "metadata": {},
   "source": [
    "### Merge input and output dicts\n",
-    "#### [RunnablePassthrough.assign](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.passthrough.RunnablePassthrough.html)"
+    "#### [RunnablePassthrough.assign](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.passthrough.RunnablePassthrough.html)"
   ]
  },
  {
@@ -276,7 +276,7 @@
   "metadata": {},
   "source": [
    "### Include input dict in output dict\n",
-    "#### [RunnablePassthrough](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.passthrough.RunnablePassthrough.html)"
+    "#### [RunnablePassthrough](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.passthrough.RunnablePassthrough.html)"
   ]
  },
  {
@@ -316,7 +316,7 @@
   "metadata": {},
   "source": [
    "### Add default invocation args\n",
-    "#### [Runnable.bind](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.bind)"
+    "#### [Runnable.bind](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.bind)"
   ]
  },
  {
@@ -360,7 +360,7 @@
   "metadata": {},
   "source": [
    "### Add fallbacks\n",
-    "#### [Runnable.with_fallbacks](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.with_fallbacks)"
+    "#### [Runnable.with_fallbacks](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.with_fallbacks)"
   ]
  },
  {
@@ -397,7 +397,7 @@
   "metadata": {},
   "source": [
    "### Add retries\n",
-    "#### [Runnable.with_retry](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.with_retry)"
+    "#### [Runnable.with_retry](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.with_retry)"
   ]
  },
  {
@@ -449,7 +449,7 @@
   "metadata": {},
   "source": [
    "### Configure runnable execution\n",
-    "#### [RunnableConfig](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.config.RunnableConfig.html)"
+    "#### [RunnableConfig](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.config.RunnableConfig.html)"
   ]
  },
  {
@@ -487,7 +487,7 @@
   "metadata": {},
   "source": [
    "### Add default config to runnable\n",
-    "#### [Runnable.with_config](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.with_config)"
+    "#### [Runnable.with_config](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.with_config)"
   ]
  },
  {
@@ -526,7 +526,7 @@
   "metadata": {},
   "source": [
    "### Make runnable attributes configurable\n",
-    "#### [Runnable.with_configurable_fields](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.RunnableSerializable.html#langchain_core.runnables.base.RunnableSerializable.configurable_fields)"
+    "#### [Runnable.with_configurable_fields](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.RunnableSerializable.html#langchain_core.runnables.base.RunnableSerializable.configurable_fields)"
   ]
  },
  {
@@ -605,7 +605,7 @@
   "metadata": {},
   "source": [
    "### Make chain components configurable\n",
-    "#### [Runnable.with_configurable_alternatives](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.RunnableSerializable.html#langchain_core.runnables.base.RunnableSerializable.configurable_alternatives)"
+    "#### [Runnable.with_configurable_alternatives](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.RunnableSerializable.html#langchain_core.runnables.base.RunnableSerializable.configurable_alternatives)"
   ]
  },
  {
@@ -745,7 +745,7 @@
   "metadata": {},
   "source": [
    "### Generate a stream of events\n",
-    "#### [Runnable.astream_events](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.astream_events)"
+    "#### [Runnable.astream_events](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.astream_events)"
   ]
  },
  {
@@ -817,7 +817,7 @@
   "metadata": {},
   "source": [
    "### Yield batched outputs as they complete\n",
-    "#### [Runnable.batch_as_completed](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.batch_as_completed) / [Runnable.abatch_as_completed](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.abatch_as_completed)"
+    "#### [Runnable.batch_as_completed](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.batch_as_completed) / [Runnable.abatch_as_completed](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.abatch_as_completed)"
   ]
  },
  {
@@ -858,7 +858,7 @@
   "metadata": {},
   "source": [
    "### Return subset of output dict\n",
-    "#### [Runnable.pick](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.pick)"
+    "#### [Runnable.pick](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.pick)"
   ]
  },
  {
@@ -893,7 +893,7 @@
   "metadata": {},
   "source": [
    "### Declaratively make a batched version of a runnable\n",
-    "#### [Runnable.map](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.map)"
+    "#### [Runnable.map](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.map)"
   ]
  },
  {
@@ -930,7 +930,7 @@
   "metadata": {},
   "source": [
    "### Get a graph representation of a runnable\n",
-    "#### [Runnable.get_graph](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.get_graph)"
+    "#### [Runnable.get_graph](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.get_graph)"
   ]
  },
  {
@@ -991,7 +991,7 @@
   "metadata": {},
   "source": [
    "### Get all prompts in a chain\n",
-    "#### [Runnable.get_prompts](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.get_prompts)"
+    "#### [Runnable.get_prompts](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.get_prompts)"
   ]
  },
  {
@@ -1071,7 +1071,7 @@
   "metadata": {},
   "source": [
    "### Add lifecycle listeners\n",
-    "#### [Runnable.with_listeners](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.with_listeners)"
+    "#### [Runnable.with_listeners](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.with_listeners)"
   ]
  },
  {
--- a/docs/docs/how_to/llm_token_usage_tracking.ipynb
+++ b/docs/docs/how_to/llm_token_usage_tracking.ipynb
@@ -24,7 +24,7 @@
    "\n",
    "There are some API-specific callback context managers that allow you to track token usage across multiple calls. You'll need to check whether such an integration is available for your particular model.\n",
    "\n",
-    "If such an integration is not available for your model, you can create a custom callback manager by adapting the implementation of the [OpenAI callback manager](https://api.python.langchain.com/en/latest/_modules/langchain_community/callbacks/openai_info.html#OpenAICallbackHandler).\n",
+    "If such an integration is not available for your model, you can create a custom callback manager by adapting the implementation of the [OpenAI callback manager](https://python.langchain.com/v0.2/api_reference/community/callbacks/langchain_community.callbacks.openai_info.OpenAICallbackHandler.html).\n",
    "\n",
    "### OpenAI\n",
    "\n",
--- a/docs/docs/how_to/local_llms.ipynb
+++ b/docs/docs/how_to/local_llms.ipynb
@@ -244,7 +244,7 @@
    "\n",
    "* E.g., for Llama 2 7b: `ollama pull llama2` will download the most basic version of the model (e.g., smallest # parameters and 4 bit quantization)\n",
    "* We can also specify a particular version from the [model list](https://github.com/jmorganca/ollama?tab=readme-ov-file#model-library), e.g., `ollama pull llama2:13b`\n",
-    "* See the full set of parameters on the [API reference page](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.ollama.Ollama.html)"
+    "* See the full set of parameters on the [API reference page](https://python.langchain.com/v0.2/api_reference/community/llms/langchain_community.llms.ollama.Ollama.html)"
   ]
  },
  {
@@ -280,9 +280,9 @@
    "\n",
    "For example, below we run inference on `llama2-13b` with 4 bit quantization downloaded from [HuggingFace](https://huggingface.co/TheBloke/Llama-2-13B-GGML/tree/main).\n",
    "\n",
-    "As noted above, see the [API reference](https://api.python.langchain.com/en/latest/llms/langchain.llms.llamacpp.LlamaCpp.html?highlight=llamacpp#langchain.llms.llamacpp.LlamaCpp) for the full set of parameters. \n",
+    "As noted above, see the [API reference](https://python.langchain.com/v0.2/api_reference/langchain/llms/langchain.llms.llamacpp.LlamaCpp.html?highlight=llamacpp#langchain.llms.llamacpp.LlamaCpp) for the full set of parameters. \n",
    "\n",
-    "From the [llama.cpp API reference docs](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.llamacpp.LlamaCpp.htm), a few are worth commenting on:\n",
+    "From the [llama.cpp API reference docs](https://python.langchain.com/v0.2/api_reference/community/llms/langchain_community.llms.llamacpp.LlamaCpp.html), a few are worth commenting on:\n",
    "\n",
    "`n_gpu_layers`: number of layers to be loaded into GPU memory\n",
    "\n",
@@ -416,7 +416,7 @@
    "\n",
    "We can use model weights downloaded from [GPT4All](/docs/integrations/llms/gpt4all) model explorer.\n",
    "\n",
-    "Similar to what is shown above, we can run inference and use [the API reference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.gpt4all.GPT4All.html) to set parameters of interest."
+    "Similar to what is shown above, we can run inference and use [the API reference](https://python.langchain.com/v0.2/api_reference/community/llms/langchain_community.llms.gpt4all.GPT4All.html) to set parameters of interest."
   ]
  },
  {
--- a/docs/docs/how_to/logprobs.ipynb
+++ b/docs/docs/how_to/logprobs.ipynb
@@ -55,7 +55,7 @@
   "id": "f88ffa0d-f4a7-482c-88de-cbec501a79b1",
   "metadata": {},
   "source": [
-    "For the OpenAI API to return log probabilities we need to configure the `logprobs=True` param. Then, the logprobs are included on each output [`AIMessage`](https://api.python.langchain.com/en/latest/messages/langchain_core.messages.ai.AIMessage.html) as part of the `response_metadata`:"
+    "For the OpenAI API to return log probabilities we need to configure the `logprobs=True` param. Then, the logprobs are included on each output [`AIMessage`](https://python.langchain.com/v0.2/api_reference/core/messages/langchain_core.messages.ai.AIMessage.html) as part of the `response_metadata`:"
   ]
  },
  {
--- a/docs/docs/how_to/long_context_reorder.ipynb
+++ b/docs/docs/how_to/long_context_reorder.ipynb
@@ -13,7 +13,7 @@
    "\n",
    "To mitigate the [\"lost in the middle\"](https://arxiv.org/abs/2307.03172) effect, you can re-order documents after retrieval such that the most relevant documents are positioned at extrema (e.g., the first and last pieces of context), and the least relevant documents are positioned in the middle. In some cases this can help surface the most relevant information to LLMs.\n",
    "\n",
-    "The [LongContextReorder](https://api.python.langchain.com/en/latest/document_transformers/langchain_community.document_transformers.long_context_reorder.LongContextReorder.html) document transformer implements this re-ordering procedure. Below we demonstrate an example."
+    "The [LongContextReorder](https://python.langchain.com/v0.2/api_reference/community/document_transformers/langchain_community.document_transformers.long_context_reorder.LongContextReorder.html) document transformer implements this re-ordering procedure. Below we demonstrate an example."
   ]
  },
  {
--- a/docs/docs/how_to/markdown_header_metadata_splitter.ipynb
+++ b/docs/docs/how_to/markdown_header_metadata_splitter.ipynb
@@ -17,7 +17,7 @@
    "When a full paragraph or document is embedded, the embedding process considers both the overall context and the relationships between the sentences and phrases within the text. This can result in a more comprehensive vector representation that captures the broader meaning and themes of the text.\n",
    "```\n",
    " \n",
-    "As mentioned, chunking often aims to keep text with common context together. With this in mind, we might want to specifically honor the structure of the document itself. For example, a markdown file is organized by headers. Creating chunks within specific header groups is an intuitive idea. To address this challenge, we can use [MarkdownHeaderTextSplitter](https://api.python.langchain.com/en/latest/markdown/langchain_text_splitters.markdown.MarkdownHeaderTextSplitter.html). This will split a markdown file by a specified set of headers. \n",
+    "As mentioned, chunking often aims to keep text with common context together. With this in mind, we might want to specifically honor the structure of the document itself. For example, a markdown file is organized by headers. Creating chunks within specific header groups is an intuitive idea. To address this challenge, we can use [MarkdownHeaderTextSplitter](https://python.langchain.com/v0.2/api_reference/text_splitters/markdown/langchain_text_splitters.markdown.MarkdownHeaderTextSplitter.html). This will split a markdown file by a specified set of headers. \n",
    "\n",
    "For example, if we want to split this markdown:\n",
    "```\n",
--- a/docs/docs/how_to/merge_message_runs.ipynb
+++ b/docs/docs/how_to/merge_message_runs.ipynb
@@ -174,7 +174,7 @@
   "source": [
    "## API reference\n",
    "\n",
-    "For a complete description of all arguments head to the API reference: https://api.python.langchain.com/en/latest/messages/langchain_core.messages.utils.merge_message_runs.html"
+    "For a complete description of all arguments head to the API reference: https://python.langchain.com/v0.2/api_reference/core/messages/langchain_core.messages.utils.merge_message_runs.html"
   ]
  }
 ],
--- a/docs/docs/how_to/message_history.ipynb
+++ b/docs/docs/how_to/message_history.ipynb
@@ -32,7 +32,7 @@
    "\n",
    ":::\n",
    "\n",
-    "Passing conversation state into and out a chain is vital when building a chatbot. The [`RunnableWithMessageHistory`](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.history.RunnableWithMessageHistory.html#langchain_core.runnables.history.RunnableWithMessageHistory) class lets us add message history to certain types of chains. It wraps another Runnable and manages the chat message history for it. Specifically, it loads previous messages in the conversation BEFORE passing it to the Runnable, and it saves the generated response as a message AFTER calling the runnable. This class also enables multiple conversations by saving each conversation with a `session_id` - it then expects a `session_id` to be passed in the config when calling the runnable, and uses that to look up the relevant conversation history.\n",
+    "Passing conversation state into and out a chain is vital when building a chatbot. The [`RunnableWithMessageHistory`](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.history.RunnableWithMessageHistory.html#langchain_core.runnables.history.RunnableWithMessageHistory) class lets us add message history to certain types of chains. It wraps another Runnable and manages the chat message history for it. Specifically, it loads previous messages in the conversation BEFORE passing it to the Runnable, and it saves the generated response as a message AFTER calling the runnable. This class also enables multiple conversations by saving each conversation with a `session_id` - it then expects a `session_id` to be passed in the config when calling the runnable, and uses that to look up the relevant conversation history.\n",
    "\n",
    "![index_diagram](../../static/img/message_history.png)\n",
    "\n",
--- a/docs/docs/how_to/migrate_agent.ipynb
+++ b/docs/docs/how_to/migrate_agent.ipynb
@@ -31,7 +31,7 @@
    ":::\n",
    "\n",
    "Here we focus on how to move from legacy LangChain agents to more flexible [LangGraph](https://langchain-ai.github.io/langgraph/) agents.\n",
-    "LangChain agents (the [AgentExecutor](https://api.python.langchain.com/en/latest/agents/langchain.agents.agent.AgentExecutor.html#langchain.agents.agent.AgentExecutor) in particular) have multiple configuration parameters.\n",
+    "LangChain agents (the [AgentExecutor](https://python.langchain.com/v0.2/api_reference/langchain/agents/langchain.agents.agent.AgentExecutor.html#langchain.agents.agent.AgentExecutor) in particular) have multiple configuration parameters.\n",
    "In this notebook we will show how those parameters map to the LangGraph react agent executor using the [create_react_agent](https://langchain-ai.github.io/langgraph/reference/prebuilt/#create_react_agent) prebuilt helper method.\n",
    "\n",
    "#### Prerequisites\n",
@@ -82,7 +82,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
   "id": "1e425fea-2796-4b99-bee6-9a6ffe73f756",
   "metadata": {},
   "outputs": [],
@@ -110,12 +110,12 @@
   "id": "af002033-fe51-4d14-b47c-3e9b483c8395",
   "metadata": {},
   "source": [
-    "For the LangChain [AgentExecutor](https://api.python.langchain.com/en/latest/agents/langchain.agents.agent.AgentExecutor.html#langchain.agents.agent.AgentExecutor), we define a prompt with a placeholder for the agent's scratchpad. The agent can be invoked as follows:"
+    "For the LangChain [AgentExecutor](https://python.langchain.com/v0.2/api_reference/langchain/agents/langchain.agents.agent.AgentExecutor.html#langchain.agents.agent.AgentExecutor), we define a prompt with a placeholder for the agent's scratchpad. The agent can be invoked as follows:"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
   "id": "03ea357c-9c36-4464-b2cc-27bd150e1554",
   "metadata": {},
   "outputs": [
@@ -126,7 +126,7 @@
       " 'output': 'The value of `magic_function(3)` is 5.'}"
      ]
     },
-     "execution_count": 3,
+     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -162,7 +162,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
   "id": "53a3737a-d167-4255-89bf-20ac37f89a3e",
   "metadata": {},
   "outputs": [
@@ -173,7 +173,7 @@
       " 'output': 'The value of `magic_function(3)` is 5.'}"
      ]
     },
-     "execution_count": 4,
+     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -193,7 +193,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
   "id": "74ecebe3-512e-409c-a661-bdd5b0a2b782",
   "metadata": {},
   "outputs": [
@@ -201,10 +201,10 @@
     "data": {
      "text/plain": [
       "{'input': 'Pardon?',\n",
-       " 'output': 'The value you get when you apply `magic_function` to the input 3 is 5.'}"
+       " 'output': 'The value returned by `magic_function` when the input is 3 is 5.'}"
      ]
     },
-     "execution_count": 5,
+     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -243,7 +243,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 7,
   "id": "a9a11ccd-75e2-4c11-844d-a34870b0ff91",
   "metadata": {},
   "outputs": [
@@ -254,7 +254,7 @@
       " 'output': 'El valor de `magic_function(3)` es 5.'}"
      ]
     },
-     "execution_count": 6,
+     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -295,7 +295,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
   "id": "a9486805-676a-4d19-a5c4-08b41b172989",
   "metadata": {},
   "outputs": [],
@@ -324,7 +324,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 9,
   "id": "d369ab45-0c82-45f4-9d3e-8efb8dd47e2c",
   "metadata": {},
   "outputs": [
@@ -332,7 +332,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "{'input': 'what is the value of magic_function(3)?', 'output': 'El valor de magic_function(3) es 5. ¡Pandamonium!'}\n"
+      "{'input': 'what is the value of magic_function(3)?', 'output': 'The value of magic_function(3) is 5. ¡Pandamonium!'}\n"
     ]
    }
   ],
@@ -381,12 +381,12 @@
   "source": [
    "### In LangChain\n",
    "\n",
-    "With LangChain's [AgentExecutor](https://api.python.langchain.com/en/latest/agents/langchain.agents.agent.AgentExecutor.html#langchain.agents.agent.AgentExecutor.iter), you could add chat [Memory](https://api.python.langchain.com/en/latest/agents/langchain.agents.agent.AgentExecutor.html#langchain.agents.agent.AgentExecutor.memory) so it can engage in a multi-turn conversation."
+    "With LangChain's [AgentExecutor](https://python.langchain.com/v0.2/api_reference/langchain/agents/langchain.agents.agent.AgentExecutor.html#langchain.agents.agent.AgentExecutor.iter), you could add chat [Memory](https://python.langchain.com/v0.2/api_reference/langchain/agents/langchain.agents.agent.AgentExecutor.html#langchain.agents.agent.AgentExecutor.memory) so it can engage in a multi-turn conversation."
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 10,
   "id": "b97beba5-8f74-430c-9399-91b77c8fa15c",
   "metadata": {},
   "outputs": [
@@ -394,11 +394,11 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Hi Polly! The output of the magic function for the input 3 is 5.\n",
+      "Hi Polly! The output of applying the magic function to the input 3 is 5.\n",
      "---\n",
-      "Yes, your name is Polly!\n",
+      "Yes, you mentioned your name is Polly.\n",
      "---\n",
-      "The output of the magic function for the input 3 is 5.\n"
+      "The output of applying the magic function to the input 3 is 5.\n"
     ]
    }
   ],
@@ -476,7 +476,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 11,
   "id": "baca3dc6-678b-4509-9275-2fd653102898",
   "metadata": {},
   "outputs": [
@@ -484,16 +484,16 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Hi Polly! The output of the magic_function for the input of 3 is 5.\n",
+      "Hi Polly! The output of applying the magic function to the input 3 is 5.\n",
      "---\n",
      "Yes, your name is Polly!\n",
      "---\n",
-      "The output of the magic_function for the input of 3 was 5.\n"
+      "The output of applying the magic function to the input 3 was 5.\n"
     ]
    }
   ],
   "source": [
-    "from langgraph.checkpoint import MemorySaver  # an in-memory checkpointer\n",
+    "from langgraph.checkpoint.memory import MemorySaver  # an in-memory checkpointer\n",
    "from langgraph.prebuilt import create_react_agent\n",
    "\n",
    "system_message = \"You are a helpful assistant.\"\n",
@@ -539,12 +539,12 @@
    "\n",
    "### In LangChain\n",
    "\n",
-    "With LangChain's [AgentExecutor](https://api.python.langchain.com/en/latest/agents/langchain.agents.agent.AgentExecutor.html#langchain.agents.agent.AgentExecutor.iter), you could iterate over the steps using the [stream](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.stream) (or async `astream`) methods or the [iter](https://api.python.langchain.com/en/latest/agents/langchain.agents.agent.AgentExecutor.html#langchain.agents.agent.AgentExecutor.iter) method. LangGraph supports stepwise iteration using [stream](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.stream) "
+    "With LangChain's [AgentExecutor](https://python.langchain.com/v0.2/api_reference/langchain/agents/langchain.agents.agent.AgentExecutor.html#langchain.agents.agent.AgentExecutor.iter), you could iterate over the steps using the [stream](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.stream) (or async `astream`) methods or the [iter](https://python.langchain.com/v0.2/api_reference/langchain/agents/langchain.agents.agent.AgentExecutor.html#langchain.agents.agent.AgentExecutor.iter) method. LangGraph supports stepwise iteration using [stream](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.stream) "
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 12,
   "id": "e62843c4-1107-41f0-a50b-aea256e28053",
   "metadata": {},
   "outputs": [
@@ -552,8 +552,8 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "{'actions': [ToolAgentAction(tool='magic_function', tool_input={'input': 3}, log=\"\\nInvoking: `magic_function` with `{'input': 3}`\\n\\n\\n\", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_1exy0rScfPmo4fy27FbQ5qJ2', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518'}, id='run-5664e138-7085-4da7-a49e-5656a87b8d78', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_1exy0rScfPmo4fy27FbQ5qJ2', 'type': 'tool_call'}], tool_call_chunks=[{'name': 'magic_function', 'args': '{\"input\":3}', 'id': 'call_1exy0rScfPmo4fy27FbQ5qJ2', 'index': 0, 'type': 'tool_call_chunk'}])], tool_call_id='call_1exy0rScfPmo4fy27FbQ5qJ2')], 'messages': [AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_1exy0rScfPmo4fy27FbQ5qJ2', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518'}, id='run-5664e138-7085-4da7-a49e-5656a87b8d78', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_1exy0rScfPmo4fy27FbQ5qJ2', 'type': 'tool_call'}], tool_call_chunks=[{'name': 'magic_function', 'args': '{\"input\":3}', 'id': 'call_1exy0rScfPmo4fy27FbQ5qJ2', 'index': 0, 'type': 'tool_call_chunk'}])]}\n",
-      "{'steps': [AgentStep(action=ToolAgentAction(tool='magic_function', tool_input={'input': 3}, log=\"\\nInvoking: `magic_function` with `{'input': 3}`\\n\\n\\n\", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_1exy0rScfPmo4fy27FbQ5qJ2', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518'}, id='run-5664e138-7085-4da7-a49e-5656a87b8d78', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_1exy0rScfPmo4fy27FbQ5qJ2', 'type': 'tool_call'}], tool_call_chunks=[{'name': 'magic_function', 'args': '{\"input\":3}', 'id': 'call_1exy0rScfPmo4fy27FbQ5qJ2', 'index': 0, 'type': 'tool_call_chunk'}])], tool_call_id='call_1exy0rScfPmo4fy27FbQ5qJ2'), observation=5)], 'messages': [FunctionMessage(content='5', name='magic_function')]}\n",
+      "{'actions': [ToolAgentAction(tool='magic_function', tool_input={'input': 3}, log=\"\\nInvoking: `magic_function` with `{'input': 3}`\\n\\n\\n\", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_gNzQT96XWoyZqVl1jI1yMnjy', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_c9aa9c0491'}, id='run-dc7ce17d-02fd-4fdb-be82-7c902410b6b7', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_gNzQT96XWoyZqVl1jI1yMnjy', 'type': 'tool_call'}], tool_call_chunks=[{'name': 'magic_function', 'args': '{\"input\":3}', 'id': 'call_gNzQT96XWoyZqVl1jI1yMnjy', 'index': 0, 'type': 'tool_call_chunk'}])], tool_call_id='call_gNzQT96XWoyZqVl1jI1yMnjy')], 'messages': [AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_gNzQT96XWoyZqVl1jI1yMnjy', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_c9aa9c0491'}, id='run-dc7ce17d-02fd-4fdb-be82-7c902410b6b7', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_gNzQT96XWoyZqVl1jI1yMnjy', 'type': 'tool_call'}], tool_call_chunks=[{'name': 'magic_function', 'args': '{\"input\":3}', 'id': 'call_gNzQT96XWoyZqVl1jI1yMnjy', 'index': 0, 'type': 'tool_call_chunk'}])]}\n",
+      "{'steps': [AgentStep(action=ToolAgentAction(tool='magic_function', tool_input={'input': 3}, log=\"\\nInvoking: `magic_function` with `{'input': 3}`\\n\\n\\n\", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_gNzQT96XWoyZqVl1jI1yMnjy', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_c9aa9c0491'}, id='run-dc7ce17d-02fd-4fdb-be82-7c902410b6b7', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_gNzQT96XWoyZqVl1jI1yMnjy', 'type': 'tool_call'}], tool_call_chunks=[{'name': 'magic_function', 'args': '{\"input\":3}', 'id': 'call_gNzQT96XWoyZqVl1jI1yMnjy', 'index': 0, 'type': 'tool_call_chunk'}])], tool_call_id='call_gNzQT96XWoyZqVl1jI1yMnjy'), observation=5)], 'messages': [FunctionMessage(content='5', name='magic_function')]}\n",
      "{'output': 'The value of `magic_function(3)` is 5.', 'messages': [AIMessage(content='The value of `magic_function(3)` is 5.')]}\n"
     ]
    }
@@ -604,7 +604,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 13,
   "id": "076ebc85-f804-4093-a25a-a16334c9898e",
   "metadata": {},
   "outputs": [
@@ -612,9 +612,9 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_my9rzFSKR4T1yYKwCsfbZB8A', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 61, 'total_tokens': 75}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_bc2a86f5f5', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-dd705555-8fae-4fb1-a033-5d99a23e3c22-0', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_my9rzFSKR4T1yYKwCsfbZB8A', 'type': 'tool_call'}], usage_metadata={'input_tokens': 61, 'output_tokens': 14, 'total_tokens': 75})]}}\n",
-      "{'tools': {'messages': [ToolMessage(content='5', name='magic_function', tool_call_id='call_my9rzFSKR4T1yYKwCsfbZB8A')]}}\n",
-      "{'agent': {'messages': [AIMessage(content='The value of `magic_function(3)` is 5.', response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 84, 'total_tokens': 98}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518', 'finish_reason': 'stop', 'logprobs': None}, id='run-698cad05-8cb2-4d08-8c2a-881e354f6cc7-0', usage_metadata={'input_tokens': 84, 'output_tokens': 14, 'total_tokens': 98})]}}\n"
+      "{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_I0nztlIcc0e9ry5dn53YLZUM', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 61, 'total_tokens': 75}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-5f9bd87d-3692-4d13-8d27-1859e13e2156-0', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_I0nztlIcc0e9ry5dn53YLZUM', 'type': 'tool_call'}], usage_metadata={'input_tokens': 61, 'output_tokens': 14, 'total_tokens': 75})]}}\n",
+      "{'tools': {'messages': [ToolMessage(content='5', name='magic_function', tool_call_id='call_I0nztlIcc0e9ry5dn53YLZUM')]}}\n",
+      "{'agent': {'messages': [AIMessage(content='The value of `magic_function(3)` is 5.', response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 84, 'total_tokens': 98}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'stop', 'logprobs': None}, id='run-f6015ca6-93e5-45e8-8b28-b3f0a8d203dc-0', usage_metadata={'input_tokens': 84, 'output_tokens': 14, 'total_tokens': 98})]}}\n"
     ]
    }
   ],
@@ -654,7 +654,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 14,
   "id": "a2f720f3-c121-4be2-b498-92c16bb44b0a",
   "metadata": {},
   "outputs": [
@@ -662,7 +662,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "[(ToolAgentAction(tool='magic_function', tool_input={'input': 3}, log=\"\\nInvoking: `magic_function` with `{'input': 3}`\\n\\n\\n\", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_uPZ2D1Bo5mdED3gwgaeWURrf', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518'}, id='run-a792db4a-278d-4090-82ae-904a30eada93', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_uPZ2D1Bo5mdED3gwgaeWURrf', 'type': 'tool_call'}], tool_call_chunks=[{'name': 'magic_function', 'args': '{\"input\":3}', 'id': 'call_uPZ2D1Bo5mdED3gwgaeWURrf', 'index': 0, 'type': 'tool_call_chunk'}])], tool_call_id='call_uPZ2D1Bo5mdED3gwgaeWURrf'), 5)]\n"
+      "[(ToolAgentAction(tool='magic_function', tool_input={'input': 3}, log=\"\\nInvoking: `magic_function` with `{'input': 3}`\\n\\n\\n\", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_wjaAyTjI2LSYOq7C8QZYSxEs', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls', 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_c9aa9c0491'}, id='run-99e06b70-1ef6-4761-834b-87b6c5252e20', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_wjaAyTjI2LSYOq7C8QZYSxEs', 'type': 'tool_call'}], tool_call_chunks=[{'name': 'magic_function', 'args': '{\"input\":3}', 'id': 'call_wjaAyTjI2LSYOq7C8QZYSxEs', 'index': 0, 'type': 'tool_call_chunk'}])], tool_call_id='call_wjaAyTjI2LSYOq7C8QZYSxEs'), 5)]\n"
     ]
    }
   ],
@@ -684,20 +684,20 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 15,
   "id": "ef23117a-5ccb-42ce-80c3-ea49a9d3a942",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "{'messages': [HumanMessage(content='what is the value of magic_function(3)?', id='cd7d0f49-a0e0-425a-b2b0-603a716058ed'),\n",
-       "  AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_VfZ9287DuybOSrBsQH5X12xf', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 55, 'total_tokens': 69}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-a1e965cd-bf61-44f9-aec1-8aaecb80955f-0', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_VfZ9287DuybOSrBsQH5X12xf', 'type': 'tool_call'}], usage_metadata={'input_tokens': 55, 'output_tokens': 14, 'total_tokens': 69}),\n",
-       "  ToolMessage(content='5', name='magic_function', id='20d5c2fe-a5d8-47fa-9e04-5282642e2039', tool_call_id='call_VfZ9287DuybOSrBsQH5X12xf'),\n",
-       "  AIMessage(content='The value of `magic_function(3)` is 5.', response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 78, 'total_tokens': 92}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518', 'finish_reason': 'stop', 'logprobs': None}, id='run-abf9341c-ef41-4157-935d-a3be5dfa2f41-0', usage_metadata={'input_tokens': 78, 'output_tokens': 14, 'total_tokens': 92})]}"
+       "{'messages': [HumanMessage(content='what is the value of magic_function(3)?', id='2d369331-8052-4167-bd85-9f6d8ad021ae'),\n",
+       "  AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_oXiSQSe6WeWj7XIKXxZrO2IC', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 55, 'total_tokens': 69}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-297e7fc9-726f-46a0-8c67-dc28ed1724d0-0', tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_oXiSQSe6WeWj7XIKXxZrO2IC', 'type': 'tool_call'}], usage_metadata={'input_tokens': 55, 'output_tokens': 14, 'total_tokens': 69}),\n",
+       "  ToolMessage(content='5', name='magic_function', id='46370faf-9598-423c-b94b-aca8cb4f035d', tool_call_id='call_oXiSQSe6WeWj7XIKXxZrO2IC'),\n",
+       "  AIMessage(content='The value of `magic_function(3)` is 5.', response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 78, 'total_tokens': 92}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'stop', 'logprobs': None}, id='run-f48efaff-0c2c-4632-bbf9-7ee626f73d02-0', usage_metadata={'input_tokens': 78, 'output_tokens': 14, 'total_tokens': 92})]}"
      ]
     },
-     "execution_count": 13,
+     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -757,7 +757,7 @@
      "Invoking: `magic_function` with `{'input': '3'}`\n",
      "\n",
      "\n",
-      "\u001b[0m\u001b[36;1m\u001b[1;3mSorry, there was an error. Please try again.\u001b[0m\u001b[32;1m\u001b[1;3mParece que hubo un error al intentar calcular el valor de la función mágica. ¿Te gustaría que lo intente de nuevo?\u001b[0m\n",
+      "\u001b[0m\u001b[36;1m\u001b[1;3mSorry, there was an error. Please try again.\u001b[0m\u001b[32;1m\u001b[1;3mHubo un error al intentar obtener el valor de `magic_function(3)`. ¿Podrías intentarlo de nuevo o proporcionar más detalles?\u001b[0m\n",
      "\n",
      "\u001b[1m> Finished chain.\u001b[0m\n"
     ]
@@ -766,7 +766,7 @@
     "data": {
      "text/plain": [
       "{'input': 'what is the value of magic_function(3)?',\n",
-       " 'output': 'Parece que hubo un error al intentar calcular el valor de la función mágica. ¿Te gustaría que lo intente de nuevo?'}"
+       " 'output': 'Hubo un error al intentar obtener el valor de `magic_function(3)`. ¿Podrías intentarlo de nuevo o proporcionar más detalles?'}"
      ]
     },
     "execution_count": 17,
@@ -819,12 +819,15 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "content='what is the value of magic_function(3)?' id='74e2d5e8-2b59-4820-979c-8d11ecfc14c2'\n",
-      "content='' additional_kwargs={'tool_calls': [{'id': 'call_ihtrH6IG95pDXpKluIwAgi3J', 'function': {'arguments': '{\"input\":\"3\"}', 'name': 'magic_function'}, 'type': 'function'}]} response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 55, 'total_tokens': 69}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-5a35e465-8a08-43dd-ac8b-4a76dcace305-0' tool_calls=[{'name': 'magic_function', 'args': {'input': '3'}, 'id': 'call_ihtrH6IG95pDXpKluIwAgi3J', 'type': 'tool_call'}] usage_metadata={'input_tokens': 55, 'output_tokens': 14, 'total_tokens': 69}\n",
-      "content='Sorry, there was an error. Please try again.' name='magic_function' id='8c37c19b-3586-46b1-aab9-a045786801a2' tool_call_id='call_ihtrH6IG95pDXpKluIwAgi3J'\n",
-      "content='It seems there was an error in processing the request. Let me try again.' additional_kwargs={'tool_calls': [{'id': 'call_iF0vYWAd6rfely0cXSqdMOnF', 'function': {'arguments': '{\"input\":\"3\"}', 'name': 'magic_function'}, 'type': 'function'}]} response_metadata={'token_usage': {'completion_tokens': 31, 'prompt_tokens': 88, 'total_tokens': 119}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-eb88ec77-d492-43a5-a5dd-4cefef9a6920-0' tool_calls=[{'name': 'magic_function', 'args': {'input': '3'}, 'id': 'call_iF0vYWAd6rfely0cXSqdMOnF', 'type': 'tool_call'}] usage_metadata={'input_tokens': 88, 'output_tokens': 31, 'total_tokens': 119}\n",
-      "content='Sorry, there was an error. Please try again.' name='magic_function' id='c9ff261f-a0f1-4c92-a9f2-cd749f62d911' tool_call_id='call_iF0vYWAd6rfely0cXSqdMOnF'\n",
-      "content='I am currently unable to process the request with the input \"3\" for the `magic_function`. If you have any other questions or need assistance with something else, please let me know!' response_metadata={'token_usage': {'completion_tokens': 39, 'prompt_tokens': 141, 'total_tokens': 180}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518', 'finish_reason': 'stop', 'logprobs': None} id='run-d42508aa-f286-4b57-80fb-f8a76736d470-0' usage_metadata={'input_tokens': 141, 'output_tokens': 39, 'total_tokens': 180}\n"
+      "content='what is the value of magic_function(3)?' id='fe74bb30-45b8-4a40-a5ed-fd6678da5428'\n",
+      "content='' additional_kwargs={'tool_calls': [{'id': 'call_TNKfNy6fgZNdJAvHUMXwtp8f', 'function': {'arguments': '{\"input\":\"3\"}', 'name': 'magic_function'}, 'type': 'function'}]} response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 55, 'total_tokens': 69}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-dad8bfc1-477c-40d2-9016-243d25c0dd13-0' tool_calls=[{'name': 'magic_function', 'args': {'input': '3'}, 'id': 'call_TNKfNy6fgZNdJAvHUMXwtp8f', 'type': 'tool_call'}] usage_metadata={'input_tokens': 55, 'output_tokens': 14, 'total_tokens': 69}\n",
+      "content='Sorry, there was an error. Please try again.' name='magic_function' id='653226e0-3187-40be-a774-4c7c2612239e' tool_call_id='call_TNKfNy6fgZNdJAvHUMXwtp8f'\n",
+      "content='It looks like there was an issue with processing the request. Let me try that again.' additional_kwargs={'tool_calls': [{'id': 'call_K0wJ8fQLYGv8fYXY1Uo5U5sG', 'function': {'arguments': '{\"input\":\"3\"}', 'name': 'magic_function'}, 'type': 'function'}]} response_metadata={'token_usage': {'completion_tokens': 33, 'prompt_tokens': 88, 'total_tokens': 121}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-d4c85437-6625-4e57-81f9-86de6842be7b-0' tool_calls=[{'name': 'magic_function', 'args': {'input': '3'}, 'id': 'call_K0wJ8fQLYGv8fYXY1Uo5U5sG', 'type': 'tool_call'}] usage_metadata={'input_tokens': 88, 'output_tokens': 33, 'total_tokens': 121}\n",
+      "content='Sorry, there was an error. Please try again.' name='magic_function' id='9b530d03-95df-401e-bb4f-5cada1195033' tool_call_id='call_K0wJ8fQLYGv8fYXY1Uo5U5sG'\n",
+      "content='It seems that there is a persistent issue with processing the request. Let me attempt it one more time.' additional_kwargs={'tool_calls': [{'id': 'call_7ECwwNBDo4SH56oczErZJVRT', 'function': {'arguments': '{\"input\":\"3\"}', 'name': 'magic_function'}, 'type': 'function'}]} response_metadata={'token_usage': {'completion_tokens': 36, 'prompt_tokens': 143, 'total_tokens': 179}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-9f3f651e-a641-4112-99ed-d1ac11169582-0' tool_calls=[{'name': 'magic_function', 'args': {'input': '3'}, 'id': 'call_7ECwwNBDo4SH56oczErZJVRT', 'type': 'tool_call'}] usage_metadata={'input_tokens': 143, 'output_tokens': 36, 'total_tokens': 179}\n",
+      "content='Sorry, there was an error. Please try again.' name='magic_function' id='e4cd152b-4eb1-47df-ac76-f88e79adbe19' tool_call_id='call_7ECwwNBDo4SH56oczErZJVRT'\n",
+      "content=\"It seems there is a consistent issue with processing the request for the magic function. Let's try using a different approach to resolve this.\" additional_kwargs={'tool_calls': [{'id': 'call_DMAL0UwBRijzuPjCTSwR2r17', 'function': {'arguments': '{\"input\":\"three\"}', 'name': 'magic_function'}, 'type': 'function'}]} response_metadata={'token_usage': {'completion_tokens': 41, 'prompt_tokens': 201, 'total_tokens': 242}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_c9aa9c0491', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-cd9f4e5c-f881-462c-abe3-890e73f46a01-0' tool_calls=[{'name': 'magic_function', 'args': {'input': 'three'}, 'id': 'call_DMAL0UwBRijzuPjCTSwR2r17', 'type': 'tool_call'}] usage_metadata={'input_tokens': 201, 'output_tokens': 41, 'total_tokens': 242}\n",
+      "{'input': 'what is the value of magic_function(3)?', 'output': 'Agent stopped due to max iterations.'}\n"
     ]
    }
   ],
@@ -939,9 +942,9 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_FKiTkTd0Ffd4rkYSzERprf1M', 'function': {'arguments': '{\"input\":\"3\"}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 55, 'total_tokens': 69}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-b842f7b6-ec10-40f8-8c0e-baa220b77e91-0', tool_calls=[{'name': 'magic_function', 'args': {'input': '3'}, 'id': 'call_FKiTkTd0Ffd4rkYSzERprf1M', 'type': 'tool_call'}], usage_metadata={'input_tokens': 55, 'output_tokens': 14, 'total_tokens': 69})]}}\n",
+      "{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_o8Ym0u9UfzArhIm1lV7O0CXF', 'function': {'arguments': '{\"input\":\"3\"}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 55, 'total_tokens': 69}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-d9faf125-1ff8-4de2-a75b-97e07d28dc4d-0', tool_calls=[{'name': 'magic_function', 'args': {'input': '3'}, 'id': 'call_o8Ym0u9UfzArhIm1lV7O0CXF', 'type': 'tool_call'}], usage_metadata={'input_tokens': 55, 'output_tokens': 14, 'total_tokens': 69})]}}\n",
      "------\n",
-      "{'input': 'what is the value of magic_function(3)?', 'output': 'Agent stopped due to max iterations.'}\n"
+      "{'input': 'what is the value of magic_function(3)?', 'output': 'Agent stopped due to a step timeout.'}\n"
     ]
    }
   ],
@@ -957,7 +960,7 @@
    "        print(chunk)\n",
    "        print(\"------\")\n",
    "except TimeoutError:\n",
-    "    print({\"input\": query, \"output\": \"Agent stopped due to max iterations.\"})"
+    "    print({\"input\": query, \"output\": \"Agent stopped due to a step timeout.\"})"
   ]
  },
  {
@@ -978,7 +981,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_WoOB8juagB08xrP38twYlYKR', 'function': {'arguments': '{\"input\":\"3\"}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 55, 'total_tokens': 69}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-73dee47e-30ab-42c9-bb0c-6f227cac96cd-0', tool_calls=[{'name': 'magic_function', 'args': {'input': '3'}, 'id': 'call_WoOB8juagB08xrP38twYlYKR', 'type': 'tool_call'}], usage_metadata={'input_tokens': 55, 'output_tokens': 14, 'total_tokens': 69})]}}\n",
+      "{'agent': {'messages': [AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_gsGzyhyvR25iNV6W9VR2TIdQ', 'function': {'arguments': '{\"input\":\"3\"}', 'name': 'magic_function'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 55, 'total_tokens': 69}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-9ad8f834-06c5-41cf-9eec-6b7e0f5e777e-0', tool_calls=[{'name': 'magic_function', 'args': {'input': '3'}, 'id': 'call_gsGzyhyvR25iNV6W9VR2TIdQ', 'type': 'tool_call'}], usage_metadata={'input_tokens': 55, 'output_tokens': 14, 'total_tokens': 69})]}}\n",
      "------\n",
      "Task Cancelled.\n"
     ]
@@ -1014,7 +1017,7 @@
    "\n",
    "### In LangChain\n",
    "\n",
-    "With LangChain's [AgentExecutor](https://api.python.langchain.com/en/latest/agents/langchain.agents.agent.AgentExecutor.html#langchain.agents.agent.AgentExecutor.iter), you could configure an [early_stopping_method](https://api.python.langchain.com/en/latest/agents/langchain.agents.agent.AgentExecutor.html#langchain.agents.agent.AgentExecutor.early_stopping_method) to either return a string saying \"Agent stopped due to iteration limit or time limit.\" (`\"force\"`) or prompt the LLM a final time to respond (`\"generate\"`)."
+    "With LangChain's [AgentExecutor](https://python.langchain.com/v0.2/api_reference/langchain/agents/langchain.agents.agent.AgentExecutor.html#langchain.agents.agent.AgentExecutor.iter), you could configure an [early_stopping_method](https://python.langchain.com/v0.2/api_reference/langchain/agents/langchain.agents.agent.AgentExecutor.html#langchain.agents.agent.AgentExecutor.early_stopping_method) to either return a string saying \"Agent stopped due to iteration limit or time limit.\" (`\"force\"`) or prompt the LLM a final time to respond (`\"generate\"`)."
   ]
  },
  {
@@ -1089,10 +1092,10 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "content='what is the value of magic_function(3)?' id='4fa7fbe5-758c-47a3-9268-717665d10680'\n",
-      "content='' additional_kwargs={'tool_calls': [{'id': 'call_ujE0IQBbIQnxcF9gsZXQfdhF', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]} response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 55, 'total_tokens': 69}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-65d689aa-baee-4342-a5d2-048feefab418-0' tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_ujE0IQBbIQnxcF9gsZXQfdhF', 'type': 'tool_call'}] usage_metadata={'input_tokens': 55, 'output_tokens': 14, 'total_tokens': 69}\n",
-      "content='Sorry there was an error, please try again.' name='magic_function' id='ef8ddf1d-9ad7-4ac0-b784-b673c4d94bbd' tool_call_id='call_ujE0IQBbIQnxcF9gsZXQfdhF'\n",
-      "content='It seems there was an issue with the previous attempt. Let me try that again.' additional_kwargs={'tool_calls': [{'id': 'call_GcsAfCFUHJ50BN2IOWnwTbQ7', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]} response_metadata={'token_usage': {'completion_tokens': 32, 'prompt_tokens': 87, 'total_tokens': 119}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_4e2b2da518', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-54527c4b-8ff0-4ee8-8abf-224886bd222e-0' tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_GcsAfCFUHJ50BN2IOWnwTbQ7', 'type': 'tool_call'}] usage_metadata={'input_tokens': 87, 'output_tokens': 32, 'total_tokens': 119}\n",
+      "content='what is the value of magic_function(3)?' id='6487a942-0a9a-4e8a-9556-553a45fa9c5a'\n",
+      "content='' additional_kwargs={'tool_calls': [{'id': 'call_pe5KVY5No9iT4JWqrm5MwL1D', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]} response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 55, 'total_tokens': 69}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-04147325-fb72-462a-a1d9-6aa4e86e3d8a-0' tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_pe5KVY5No9iT4JWqrm5MwL1D', 'type': 'tool_call'}] usage_metadata={'input_tokens': 55, 'output_tokens': 14, 'total_tokens': 69}\n",
+      "content='Sorry there was an error, please try again.' name='magic_function' id='bc0bf58f-7c6c-42ed-a96d-a2afa79f16a9' tool_call_id='call_pe5KVY5No9iT4JWqrm5MwL1D'\n",
+      "content=\"It seems there was an issue with processing the request. I'll try again.\" additional_kwargs={'tool_calls': [{'id': 'call_5rV7k3g7oW38bD9KUTsSxK8l', 'function': {'arguments': '{\"input\":3}', 'name': 'magic_function'}, 'type': 'function'}]} response_metadata={'token_usage': {'completion_tokens': 30, 'prompt_tokens': 87, 'total_tokens': 117}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'tool_calls', 'logprobs': None} id='run-6e43ffd4-fb6f-4222-8503-a50ae268c0be-0' tool_calls=[{'name': 'magic_function', 'args': {'input': 3}, 'id': 'call_5rV7k3g7oW38bD9KUTsSxK8l', 'type': 'tool_call'}] usage_metadata={'input_tokens': 87, 'output_tokens': 30, 'total_tokens': 117}\n",
      "{'input': 'what is the value of magic_function(3)?', 'output': 'Agent stopped due to max iterations.'}\n"
     ]
    }
@@ -1125,7 +1128,7 @@
    "\n",
    "### In LangChain\n",
    "\n",
-    "With LangChain's [AgentExecutor](https://api.python.langchain.com/en/latest/agents/langchain.agents.agent.AgentExecutor.html#langchain.agents.agent.AgentExecutor), you could trim the intermediate steps of long-running agents using [trim_intermediate_steps](https://api.python.langchain.com/en/latest/agents/langchain.agents.agent.AgentExecutor.html#langchain.agents.agent.AgentExecutor.trim_intermediate_steps), which is either an integer (indicating the agent should keep the last N steps) or a custom function.\n",
+    "With LangChain's [AgentExecutor](https://python.langchain.com/v0.2/api_reference/langchain/agents/langchain.agents.agent.AgentExecutor.html#langchain.agents.agent.AgentExecutor), you could trim the intermediate steps of long-running agents using [trim_intermediate_steps](https://python.langchain.com/v0.2/api_reference/langchain/agents/langchain.agents.agent.AgentExecutor.html#langchain.agents.agent.AgentExecutor.trim_intermediate_steps), which is either an integer (indicating the agent should keep the last N steps) or a custom function.\n",
    "\n",
    "For instance, we could trim the value so the agent only sees the most recent intermediate step."
   ]
@@ -1322,7 +1325,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.4"
+   "version": "3.11.2"
  }
 },
 "nbformat": 4,
--- a/docs/docs/how_to/multi_vector.ipynb
+++ b/docs/docs/how_to/multi_vector.ipynb
@@ -9,11 +9,11 @@
    "\n",
    "It can often be useful to store multiple vectors per document. There are multiple use cases where this is beneficial. For example, we can embed multiple chunks of a document and associate those embeddings with the parent document, allowing retriever hits on the chunks to return the larger document.\n",
    "\n",
-    "LangChain implements a base [MultiVectorRetriever](https://api.python.langchain.com/en/latest/retrievers/langchain.retrievers.multi_vector.MultiVectorRetriever.html), which simplifies this process. Much of the complexity lies in how to create the multiple vectors per document. This notebook covers some of the common ways to create those vectors and use the `MultiVectorRetriever`.\n",
+    "LangChain implements a base [MultiVectorRetriever](https://python.langchain.com/v0.2/api_reference/langchain/retrievers/langchain.retrievers.multi_vector.MultiVectorRetriever.html), which simplifies this process. Much of the complexity lies in how to create the multiple vectors per document. This notebook covers some of the common ways to create those vectors and use the `MultiVectorRetriever`.\n",
    "\n",
    "The methods to create multiple vectors per document include:\n",
    "\n",
-    "- Smaller chunks: split a document into smaller chunks, and embed those (this is [ParentDocumentRetriever](https://api.python.langchain.com/en/latest/retrievers/langchain.retrievers.parent_document_retriever.ParentDocumentRetriever.html)).\n",
+    "- Smaller chunks: split a document into smaller chunks, and embed those (this is [ParentDocumentRetriever](https://python.langchain.com/v0.2/api_reference/langchain/retrievers/langchain.retrievers.parent_document_retriever.ParentDocumentRetriever.html)).\n",
    "- Summary: create a summary for each document, embed that along with (or instead of) the document.\n",
    "- Hypothetical questions: create hypothetical questions that each document would be appropriate to answer, embed those along with (or instead of) the document.\n",
    "\n",
@@ -68,7 +68,7 @@
   "source": [
    "## Smaller chunks\n",
    "\n",
-    "Often times it can be useful to retrieve larger chunks of information, but embed smaller chunks. This allows for embeddings to capture the semantic meaning as closely as possible, but for as much context as possible to be passed downstream. Note that this is what the [ParentDocumentRetriever](https://api.python.langchain.com/en/latest/retrievers/langchain.retrievers.parent_document_retriever.ParentDocumentRetriever.html) does. Here we show what is going on under the hood.\n",
+    "Often times it can be useful to retrieve larger chunks of information, but embed smaller chunks. This allows for embeddings to capture the semantic meaning as closely as possible, but for as much context as possible to be passed downstream. Note that this is what the [ParentDocumentRetriever](https://python.langchain.com/v0.2/api_reference/langchain/retrievers/langchain.retrievers.parent_document_retriever.ParentDocumentRetriever.html) does. Here we show what is going on under the hood.\n",
    "\n",
    "We will make a distinction between the vector store, which indexes embeddings of the (sub) documents, and the document store, which houses the \"parent\" documents and associates them with an identifier."
   ]
@@ -103,7 +103,7 @@
   "id": "d4feded4-856a-4282-91c3-53aabc62e6ff",
   "metadata": {},
   "source": [
-    "We next generate the \"sub\" documents by splitting the original documents. Note that we store the document identifier in the `metadata` of the corresponding [Document](https://api.python.langchain.com/en/latest/documents/langchain_core.documents.base.Document.html) object."
+    "We next generate the \"sub\" documents by splitting the original documents. Note that we store the document identifier in the `metadata` of the corresponding [Document](https://python.langchain.com/v0.2/api_reference/core/documents/langchain_core.documents.base.Document.html) object."
   ]
  },
  {
@@ -207,7 +207,7 @@
   "id": "cdef8339-f9fa-4b3b-955f-ad9dbdf2734f",
   "metadata": {},
   "source": [
-    "The default search type the retriever performs on the vector database is a similarity search. LangChain vector stores also support searching via [Max Marginal Relevance](https://api.python.langchain.com/en/latest/vectorstores/langchain_core.vectorstores.VectorStore.html#langchain_core.vectorstores.VectorStore.max_marginal_relevance_search). This can be controlled via the `search_type` parameter of the retriever:"
+    "The default search type the retriever performs on the vector database is a similarity search. LangChain vector stores also support searching via [Max Marginal Relevance](https://python.langchain.com/v0.2/api_reference/core/vectorstores/langchain_core.vectorstores.VectorStore.html#langchain_core.vectorstores.VectorStore.max_marginal_relevance_search). This can be controlled via the `search_type` parameter of the retriever:"
   ]
  },
  {
@@ -244,7 +244,7 @@
    "\n",
    "A summary may be able to distill more accurately what a chunk is about, leading to better retrieval. Here we show how to create summaries, and then embed those.\n",
    "\n",
-    "We construct a simple [chain](/docs/how_to/sequence) that will receive an input [Document](https://api.python.langchain.com/en/latest/documents/langchain_core.documents.base.Document.html) object and generate a summary using a LLM.\n",
+    "We construct a simple [chain](/docs/how_to/sequence) that will receive an input [Document](https://python.langchain.com/v0.2/api_reference/core/documents/langchain_core.documents.base.Document.html) object and generate a summary using a LLM.\n",
    "\n",
    "```{=mdx}\n",
    "import ChatModelTabs from \"@theme/ChatModelTabs\";\n",
@@ -294,7 +294,7 @@
   "id": "3faa9fde-1b09-4849-a815-8b2e89c30a02",
   "metadata": {},
   "source": [
-    "Note that we can [batch](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable) the chain accross documents:"
+    "Note that we can [batch](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable) the chain accross documents:"
   ]
  },
  {
--- a/docs/docs/how_to/output_parser_fixing.ipynb
+++ b/docs/docs/how_to/output_parser_fixing.ipynb
@@ -131,7 +131,7 @@
   "id": "84498e02",
   "metadata": {},
   "source": [
-    "Find out api documentation for [OutputFixingParser](https://api.python.langchain.com/en/latest/output_parsers/langchain.output_parsers.fix.OutputFixingParser.html#langchain.output_parsers.fix.OutputFixingParser)."
+    "Find out api documentation for [OutputFixingParser](https://python.langchain.com/v0.2/api_reference/langchain/output_parsers/langchain.output_parsers.fix.OutputFixingParser.html#langchain.output_parsers.fix.OutputFixingParser)."
   ]
  },
  {
--- a/docs/docs/how_to/output_parser_json.ipynb
+++ b/docs/docs/how_to/output_parser_json.ipynb
@@ -30,7 +30,7 @@
   "id": "ae909b7a",
   "metadata": {},
   "source": [
-    "The [`JsonOutputParser`](https://api.python.langchain.com/en/latest/output_parsers/langchain_core.output_parsers.json.JsonOutputParser.html) is one built-in option for prompting for and then parsing JSON output. While it is similar in functionality to the [`PydanticOutputParser`](https://api.python.langchain.com/en/latest/output_parsers/langchain_core.output_parsers.pydantic.PydanticOutputParser.html), it also supports streaming back partial JSON objects.\n",
+    "The [`JsonOutputParser`](https://python.langchain.com/v0.2/api_reference/core/output_parsers/langchain_core.output_parsers.json.JsonOutputParser.html) is one built-in option for prompting for and then parsing JSON output. While it is similar in functionality to the [`PydanticOutputParser`](https://python.langchain.com/v0.2/api_reference/core/output_parsers/langchain_core.output_parsers.pydantic.PydanticOutputParser.html), it also supports streaming back partial JSON objects.\n",
    "\n",
    "Here's an example of how it can be used alongside [Pydantic](https://docs.pydantic.dev/) to conveniently declare the expected schema:"
   ]
--- a/docs/docs/how_to/output_parser_retry.ipynb
+++ b/docs/docs/how_to/output_parser_retry.ipynb
@@ -244,7 +244,7 @@
   "id": "e3a2513a",
   "metadata": {},
   "source": [
-    "Find out api documentation for [RetryOutputParser](https://api.python.langchain.com/en/latest/output_parsers/langchain.output_parsers.retry.RetryOutputParser.html#langchain.output_parsers.retry.RetryOutputParser)."
+    "Find out api documentation for [RetryOutputParser](https://python.langchain.com/v0.2/api_reference/langchain/output_parsers/langchain.output_parsers.retry.RetryOutputParser.html#langchain.output_parsers.retry.RetryOutputParser)."
   ]
  },
  {
--- a/docs/docs/how_to/output_parser_xml.ipynb
+++ b/docs/docs/how_to/output_parser_xml.ipynb
@@ -20,7 +20,7 @@
    "\n",
    "LLMs from different providers often have different strengths depending on the specific data they are trianed on. This also means that some may be \"better\" and more reliable at generating output in formats other than JSON.\n",
    "\n",
-    "This guide shows you how to use the [`XMLOutputParser`](https://api.python.langchain.com/en/latest/output_parsers/langchain_core.output_parsers.xml.XMLOutputParser.html) to prompt models for XML output, then and parse that output into a usable format.\n",
+    "This guide shows you how to use the [`XMLOutputParser`](https://python.langchain.com/v0.2/api_reference/core/output_parsers/langchain_core.output_parsers.xml.XMLOutputParser.html) to prompt models for XML output, then and parse that output into a usable format.\n",
    "\n",
    ":::{.callout-note}\n",
    "Keep in mind that large language models are leaky abstractions! You'll have to use an LLM with sufficient capacity to generate well-formed XML.\n",
--- a/docs/docs/how_to/output_parser_yaml.ipynb
+++ b/docs/docs/how_to/output_parser_yaml.ipynb
@@ -47,7 +47,7 @@
   "id": "cc479f3a",
   "metadata": {},
   "source": [
-    "We use [Pydantic](https://docs.pydantic.dev) with the [`YamlOutputParser`](https://api.python.langchain.com/en/latest/output_parsers/langchain.output_parsers.yaml.YamlOutputParser.html#langchain.output_parsers.yaml.YamlOutputParser) to declare our data model and give the model more context as to what type of YAML it should generate:"
+    "We use [Pydantic](https://docs.pydantic.dev) with the [`YamlOutputParser`](https://python.langchain.com/v0.2/api_reference/langchain/output_parsers/langchain.output_parsers.yaml.YamlOutputParser.html#langchain.output_parsers.yaml.YamlOutputParser) to declare our data model and give the model more context as to what type of YAML it should generate:"
   ]
  },
  {
--- a/docs/docs/how_to/parallel.ipynb
+++ b/docs/docs/how_to/parallel.ipynb
@@ -26,7 +26,7 @@
    "\n",
    ":::\n",
    "\n",
-    "The [`RunnableParallel`](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.RunnableParallel.html) primitive is essentially a dict whose values are runnables (or things that can be coerced to runnables, like functions). It runs all of its values in parallel, and each value is called with the overall input of the `RunnableParallel`. The final return value is a dict with the results of each value under its appropriate key.\n",
+    "The [`RunnableParallel`](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.RunnableParallel.html) primitive is essentially a dict whose values are runnables (or things that can be coerced to runnables, like functions). It runs all of its values in parallel, and each value is called with the overall input of the `RunnableParallel`. The final return value is a dict with the results of each value under its appropriate key.\n",
    "\n",
    "## Formatting with `RunnableParallels`\n",
    "\n",
--- a/docs/docs/how_to/passthrough.ipynb
+++ b/docs/docs/how_to/passthrough.ipynb
@@ -29,7 +29,7 @@
    ":::\n",
    "\n",
    "\n",
-    "When composing chains with several steps, sometimes you will want to pass data from previous steps unchanged for use as input to a later step. The [`RunnablePassthrough`](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.passthrough.RunnablePassthrough.html) class allows you to do just this, and is typically is used in conjuction with a [RunnableParallel](/docs/how_to/parallel/) to pass data through to a later step in your constructed chains.\n",
+    "When composing chains with several steps, sometimes you will want to pass data from previous steps unchanged for use as input to a later step. The [`RunnablePassthrough`](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.passthrough.RunnablePassthrough.html) class allows you to do just this, and is typically is used in conjuction with a [RunnableParallel](/docs/how_to/parallel/) to pass data through to a later step in your constructed chains.\n",
    "\n",
    "See the example below:"
   ]
--- a/docs/docs/how_to/prompts_composition.ipynb
+++ b/docs/docs/how_to/prompts_composition.ipynb
@@ -102,7 +102,7 @@
   "source": [
    "A chat prompt is made up a of a list of messages. Similarly to the above example, we can concatenate chat prompt templates. Each new element is a new message in the final prompt.\n",
    "\n",
-    "First, let's initialize the a [`ChatPromptTemplate`](https://api.python.langchain.com/en/latest/prompts/langchain_core.prompts.chat.ChatPromptTemplate.html) with a [`SystemMessage`](https://api.python.langchain.com/en/latest/messages/langchain_core.messages.system.SystemMessage.html)."
+    "First, let's initialize the a [`ChatPromptTemplate`](https://python.langchain.com/v0.2/api_reference/core/prompts/langchain_core.prompts.chat.ChatPromptTemplate.html) with a [`SystemMessage`](https://python.langchain.com/v0.2/api_reference/core/messages/langchain_core.messages.system.SystemMessage.html)."
   ]
  },
  {
@@ -123,7 +123,7 @@
   "metadata": {},
   "source": [
    "You can then easily create a pipeline combining it with other messages *or* message templates.\n",
-    "Use a `Message` when there is no variables to be formatted, use a `MessageTemplate` when there are variables to be formatted. You can also use just a string (note: this will automatically get inferred as a [`HumanMessagePromptTemplate`](https://api.python.langchain.com/en/latest/prompts/langchain_core.prompts.chat.HumanMessagePromptTemplate.html).)"
+    "Use a `Message` when there is no variables to be formatted, use a `MessageTemplate` when there are variables to be formatted. You can also use just a string (note: this will automatically get inferred as a [`HumanMessagePromptTemplate`](https://python.langchain.com/v0.2/api_reference/core/prompts/langchain_core.prompts.chat.HumanMessagePromptTemplate.html).)"
   ]
  },
  {
@@ -183,7 +183,7 @@
   "id": "0a5892f9-e4d8-4b7c-b6a5-4651539b9734",
   "metadata": {},
   "source": [
-    "LangChain includes a class called [`PipelinePromptTemplate`](https://api.python.langchain.com/en/latest/prompts/langchain_core.prompts.pipeline.PipelinePromptTemplate.html), which can be useful when you want to reuse parts of prompts. A PipelinePrompt consists of two main parts:\n",
+    "LangChain includes a class called [`PipelinePromptTemplate`](https://python.langchain.com/v0.2/api_reference/core/prompts/langchain_core.prompts.pipeline.PipelinePromptTemplate.html), which can be useful when you want to reuse parts of prompts. A PipelinePrompt consists of two main parts:\n",
    "\n",
    "- Final prompt: The final prompt that is returned\n",
    "- Pipeline prompts: A list of tuples, consisting of a string name and a prompt template. Each prompt template will be formatted and then passed to future prompt templates as a variable with the same name."
--- a/docs/docs/how_to/qa_chat_history_how_to.ipynb
+++ b/docs/docs/how_to/qa_chat_history_how_to.ipynb
@@ -42,7 +42,7 @@
   "outputs": [],
   "source": [
    "%%capture --no-stderr\n",
-    "%pip install --upgrade --quiet  langchain langchain-community langchain-chroma bs4"
+    "%pip install --upgrade --quiet  langchain langchain-community langchain-chroma beautifulsoup4"
   ]
  },
  {
@@ -102,7 +102,7 @@
   "source": [
    "## Chains {#chains}\n",
    "\n",
-    "In a conversational RAG application, queries issued to the retriever should be informed by the context of the conversation. LangChain provides a [create_history_aware_retriever](https://api.python.langchain.com/en/latest/chains/langchain.chains.history_aware_retriever.create_history_aware_retriever.html) constructor to simplify this. It constructs a chain that accepts keys `input` and `chat_history` as input, and has the same output schema as a retriever. `create_history_aware_retriever` requires as inputs:  \n",
+    "In a conversational RAG application, queries issued to the retriever should be informed by the context of the conversation. LangChain provides a [create_history_aware_retriever](https://python.langchain.com/v0.2/api_reference/langchain/chains/langchain.chains.history_aware_retriever.create_history_aware_retriever.html) constructor to simplify this. It constructs a chain that accepts keys `input` and `chat_history` as input, and has the same output schema as a retriever. `create_history_aware_retriever` requires as inputs:  \n",
    "\n",
    "1. LLM;\n",
    "2. Retriever;\n",
@@ -155,7 +155,7 @@
   "id": "15f8ad59-19de-42e3-85a8-3ba95ee0bd43",
   "metadata": {},
   "source": [
-    "For the retriever, we will use [WebBaseLoader](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.web_base.WebBaseLoader.html) to load the content of a web page. Here we instantiate a `Chroma` vectorstore and then use its [.as_retriever](https://api.python.langchain.com/en/latest/vectorstores/langchain_core.vectorstores.VectorStore.html#langchain_core.vectorstores.VectorStore.as_retriever) method to build a retriever that can be incorporated into [LCEL](/docs/concepts/#langchain-expression-language) chains."
+    "For the retriever, we will use [WebBaseLoader](https://python.langchain.com/v0.2/api_reference/community/document_loaders/langchain_community.document_loaders.web_base.WebBaseLoader.html) to load the content of a web page. Here we instantiate a `Chroma` vectorstore and then use its [.as_retriever](https://python.langchain.com/v0.2/api_reference/core/vectorstores/langchain_core.vectorstores.VectorStore.html#langchain_core.vectorstores.VectorStore.as_retriever) method to build a retriever that can be incorporated into [LCEL](/docs/concepts/#langchain-expression-language) chains."
   ]
  },
  {
@@ -260,9 +260,9 @@
    "\n",
    "Now we can build our full QA chain.\n",
    "\n",
-    "As in the [RAG tutorial](/docs/tutorials/rag), we will use [create_stuff_documents_chain](https://api.python.langchain.com/en/latest/chains/langchain.chains.combine_documents.stuff.create_stuff_documents_chain.html) to generate a `question_answer_chain`, with input keys `context`, `chat_history`, and `input`-- it accepts the retrieved context alongside the conversation history and query to generate an answer.\n",
+    "As in the [RAG tutorial](/docs/tutorials/rag), we will use [create_stuff_documents_chain](https://python.langchain.com/v0.2/api_reference/langchain/chains/langchain.chains.combine_documents.stuff.create_stuff_documents_chain.html) to generate a `question_answer_chain`, with input keys `context`, `chat_history`, and `input`-- it accepts the retrieved context alongside the conversation history and query to generate an answer.\n",
    "\n",
-    "We build our final `rag_chain` with [create_retrieval_chain](https://api.python.langchain.com/en/latest/chains/langchain.chains.retrieval.create_retrieval_chain.html). This chain applies the `history_aware_retriever` and `question_answer_chain` in sequence, retaining intermediate outputs such as the retrieved context for convenience. It has input keys `input` and `chat_history`, and includes `input`, `chat_history`, `context`, and `answer` in its output."
+    "We build our final `rag_chain` with [create_retrieval_chain](https://python.langchain.com/v0.2/api_reference/langchain/chains/langchain.chains.retrieval.create_retrieval_chain.html). This chain applies the `history_aware_retriever` and `question_answer_chain` in sequence, retaining intermediate outputs such as the retrieved context for convenience. It has input keys `input` and `chat_history`, and includes `input`, `chat_history`, `context`, and `answer` in its output."
   ]
  },
  {
@@ -305,7 +305,7 @@
    "1. An object for storing the chat history;\n",
    "2. An object that wraps our chain and manages updates to the chat history.\n",
    "\n",
-    "For these we will use [BaseChatMessageHistory](https://api.python.langchain.com/en/latest/chat_history/langchain_core.chat_history.BaseChatMessageHistory.html) and [RunnableWithMessageHistory](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.history.RunnableWithMessageHistory.html). The latter is a wrapper for an LCEL chain and a `BaseChatMessageHistory` that handles injecting chat history into inputs and updating it after each invocation.\n",
+    "For these we will use [BaseChatMessageHistory](https://python.langchain.com/v0.2/api_reference/core/chat_history/langchain_core.chat_history.BaseChatMessageHistory.html) and [RunnableWithMessageHistory](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.history.RunnableWithMessageHistory.html). The latter is a wrapper for an LCEL chain and a `BaseChatMessageHistory` that handles injecting chat history into inputs and updating it after each invocation.\n",
    "\n",
    "For a detailed walkthrough of how to use these classes together to create a stateful conversational chain, head to the [How to add message history (memory)](/docs/how_to/message_history/) LCEL how-to guide.\n",
    "\n",
--- a/docs/docs/how_to/qa_citations.ipynb
+++ b/docs/docs/how_to/qa_citations.ipynb
@@ -19,7 +19,7 @@
    "\n",
    "We generally suggest using the first item of the list that works for your use-case. That is, if your model supports tool-calling, try methods 1 or 2; otherwise, or if those fail, advance down the list.\n",
    "\n",
-    "Let's first create a simple RAG chain. To start we'll just retrieve from Wikipedia using the [WikipediaRetriever](https://api.python.langchain.com/en/latest/retrievers/langchain_community.retrievers.wikipedia.WikipediaRetriever.html)."
+    "Let's first create a simple RAG chain. To start we'll just retrieve from Wikipedia using the [WikipediaRetriever](https://python.langchain.com/v0.2/api_reference/community/retrievers/langchain_community.retrievers.wikipedia.WikipediaRetriever.html)."
   ]
  },
  {
@@ -253,7 +253,7 @@
   "source": [
    "## Function-calling\n",
    "\n",
-    "If your LLM of choice implements a [tool-calling](/docs/concepts#functiontool-calling) feature, you can use it to make the model specify which of the provided documents it's referencing when generating its answer. LangChain tool-calling models implement a `.with_structured_output` method which will force generation adhering to a desired schema (see for example [here](https://api.python.langchain.com/en/latest/chat_models/langchain_openai.chat_models.base.ChatOpenAI.html#langchain_openai.chat_models.base.ChatOpenAI.with_structured_output)).\n",
+    "If your LLM of choice implements a [tool-calling](/docs/concepts#functiontool-calling) feature, you can use it to make the model specify which of the provided documents it's referencing when generating its answer. LangChain tool-calling models implement a `.with_structured_output` method which will force generation adhering to a desired schema (see for example [here](https://python.langchain.com/v0.2/api_reference/openai/chat_models/langchain_openai.chat_models.base.ChatOpenAI.html#langchain_openai.chat_models.base.ChatOpenAI.with_structured_output)).\n",
    "\n",
    "### Cite documents\n",
    "\n",
@@ -616,7 +616,7 @@
    "\n",
    "1. We update the formatting function to wrap the retrieved context in XML tags;\n",
    "2. We do not use `.with_structured_output` (e.g., because it does not exist for a model);\n",
-    "3. We use [XMLOutputParser](https://api.python.langchain.com/en/latest/output_parsers/langchain_core.output_parsers.xml.XMLOutputParser.html) in place of `StrOutputParser` to parse the answer into a dict."
+    "3. We use [XMLOutputParser](https://python.langchain.com/v0.2/api_reference/core/output_parsers/langchain_core.output_parsers.xml.XMLOutputParser.html) in place of `StrOutputParser` to parse the answer into a dict."
   ]
  },
  {
@@ -711,7 +711,7 @@
   "source": [
    "## Retrieval post-processing\n",
    "\n",
-    "Another approach is to post-process our retrieved documents to compress the content, so that the source content is already minimal enough that we don't need the model to cite specific sources or spans. For example, we could break up each document into a sentence or two, embed those and keep only the most relevant ones. LangChain has some built-in components for this. Here we'll use a [RecursiveCharacterTextSplitter](https://api.python.langchain.com/en/latest/text_splitter/langchain_text_splitters.RecursiveCharacterTextSplitter.html#langchain_text_splitters.RecursiveCharacterTextSplitter), which creates chunks of a sepacified size by splitting on separator substrings, and an [EmbeddingsFilter](https://api.python.langchain.com/en/latest/retrievers/langchain.retrievers.document_compressors.embeddings_filter.EmbeddingsFilter.html#langchain.retrievers.document_compressors.embeddings_filter.EmbeddingsFilter), which keeps only the texts with the most relevant embeddings.\n",
+    "Another approach is to post-process our retrieved documents to compress the content, so that the source content is already minimal enough that we don't need the model to cite specific sources or spans. For example, we could break up each document into a sentence or two, embed those and keep only the most relevant ones. LangChain has some built-in components for this. Here we'll use a [RecursiveCharacterTextSplitter](https://python.langchain.com/v0.2/api_reference/text_splitters/text_splitter/langchain_text_splitters.RecursiveCharacterTextSplitter.html#langchain_text_splitters.RecursiveCharacterTextSplitter), which creates chunks of a sepacified size by splitting on separator substrings, and an [EmbeddingsFilter](https://python.langchain.com/v0.2/api_reference/langchain/retrievers/langchain.retrievers.document_compressors.embeddings_filter.EmbeddingsFilter.html#langchain.retrievers.document_compressors.embeddings_filter.EmbeddingsFilter), which keeps only the texts with the most relevant embeddings.\n",
    "\n",
    "This approach effectively swaps our original retriever with an updated one that compresses the documents. To start, we build the retriever:"
   ]
--- a/docs/docs/how_to/qa_sources.ipynb
+++ b/docs/docs/how_to/qa_sources.ipynb
@@ -13,7 +13,7 @@
    "\n",
    "We will cover two approaches:\n",
    "\n",
-    "1. Using the built-in [create_retrieval_chain](https://api.python.langchain.com/en/latest/chains/langchain.chains.retrieval.create_retrieval_chain.html), which returns sources by default;\n",
+    "1. Using the built-in [create_retrieval_chain](https://python.langchain.com/v0.2/api_reference/langchain/chains/langchain.chains.retrieval.create_retrieval_chain.html), which returns sources by default;\n",
    "2. Using a simple [LCEL](/docs/concepts#langchain-expression-language-lcel) implementation, to show the operating principle.\n",
    "\n",
    "We will also show how to structure sources into the model response, such that a model can report what specific sources it used in generating its answer."
@@ -40,7 +40,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "%pip install --upgrade --quiet  langchain langchain-community langchainhub langchain-openai langchain-chroma bs4"
+    "%pip install --upgrade --quiet  langchain langchain-community langchainhub langchain-openai langchain-chroma beautifulsoup4"
   ]
  },
  {
--- a/docs/docs/how_to/qa_streaming.ipynb
+++ b/docs/docs/how_to/qa_streaming.ipynb
@@ -33,7 +33,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "%pip install --upgrade --quiet  langchain langchain-community langchainhub langchain-openai langchain-chroma bs4"
+    "%pip install --upgrade --quiet  langchain langchain-community langchainhub langchain-openai langchain-chroma beautifulsoup4"
   ]
  },
  {
@@ -328,7 +328,7 @@
   "id": "8b2d224d-2a82-418b-b562-01ea210b86ef",
   "metadata": {},
   "source": [
-    "More simply, we can use the [.pick](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.pick) method to select only the desired key:"
+    "More simply, we can use the [.pick](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.pick) method to select only the desired key:"
   ]
  },
  {
@@ -432,7 +432,7 @@
    "\n",
    "To stream intermediate output, we recommend use of the async `.astream_events` method. This method will stream output from all \"events\" in the chain, and can be quite verbose. We can filter using tags, event types, and other criteria, as we do here.\n",
    "\n",
-    "Below we show a typical `.astream_events` loop, where we pass in the chain input and emit desired results. See the [API reference](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.astream_events) and [streaming guide](/docs/how_to/streaming) for more detail."
+    "Below we show a typical `.astream_events` loop, where we pass in the chain input and emit desired results. See the [API reference](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable.astream_events) and [streaming guide](/docs/how_to/streaming) for more detail."
   ]
  },
  {
--- a/docs/docs/how_to/recursive_json_splitter.ipynb
+++ b/docs/docs/how_to/recursive_json_splitter.ipynb
@@ -107,7 +107,7 @@
   "id": "3f05bc21-227e-4d2c-af51-16d69ad3cd7b",
   "metadata": {},
   "source": [
-    "To obtain LangChain [Document](https://api.python.langchain.com/en/latest/documents/langchain_core.documents.base.Document.html) objects, use the `.create_documents` method:"
+    "To obtain LangChain [Document](https://python.langchain.com/v0.2/api_reference/core/documents/langchain_core.documents.base.Document.html) objects, use the `.create_documents` method:"
   ]
  },
  {
--- a/docs/docs/how_to/recursive_text_splitter.ipynb
+++ b/docs/docs/how_to/recursive_text_splitter.ipynb
@@ -30,7 +30,7 @@
    "\n",
    "To obtain the string content directly, use `.split_text`.\n",
    "\n",
-    "To create LangChain [Document](https://api.python.langchain.com/en/latest/documents/langchain_core.documents.base.Document.html) objects (e.g., for use in downstream tasks), use `.create_documents`."
+    "To create LangChain [Document](https://python.langchain.com/v0.2/api_reference/core/documents/langchain_core.documents.base.Document.html) objects (e.g., for use in downstream tasks), use `.create_documents`."
   ]
  },
  {
--- a/docs/docs/how_to/semantic-chunker.ipynb
+++ b/docs/docs/how_to/semantic-chunker.ipynb
@@ -69,7 +69,7 @@
   "id": "774a5199-c2ff-43bc-bf07-87573e0b8db4",
   "metadata": {},
   "source": [
-    "To instantiate a [SemanticChunker](https://api.python.langchain.com/en/latest/text_splitter/langchain_experimental.text_splitter.SemanticChunker.html), we must specify an embedding model. Below we will use [OpenAIEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.openai.OpenAIEmbeddings.html). "
+    "To instantiate a [SemanticChunker](https://python.langchain.com/v0.2/api_reference/experimental/text_splitter/langchain_experimental.text_splitter.SemanticChunker.html), we must specify an embedding model. Below we will use [OpenAIEmbeddings](https://python.langchain.com/v0.2/api_reference/community/embeddings/langchain_community.embeddings.openai.OpenAIEmbeddings.html). "
   ]
  },
  {
@@ -92,7 +92,7 @@
   "source": [
    "## Split Text\n",
    "\n",
-    "We split text in the usual way, e.g., by invoking `.create_documents` to create LangChain [Document](https://api.python.langchain.com/en/latest/documents/langchain_core.documents.base.Document.html) objects:"
+    "We split text in the usual way, e.g., by invoking `.create_documents` to create LangChain [Document](https://python.langchain.com/v0.2/api_reference/core/documents/langchain_core.documents.base.Document.html) objects:"
   ]
  },
  {
--- a/docs/docs/how_to/sequence.ipynb
+++ b/docs/docs/how_to/sequence.ipynb
@@ -31,7 +31,7 @@
    "\n",
    "One point about [LangChain Expression Language](/docs/concepts/#langchain-expression-language) is that any two runnables can be \"chained\" together into sequences. The output of the previous runnable's `.invoke()` call is passed as input to the next runnable. This can be done using the pipe operator (`|`), or the more explicit `.pipe()` method, which does the same thing.\n",
    "\n",
-    "The resulting [`RunnableSequence`](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.RunnableSequence.html) is itself a runnable, which means it can be invoked, streamed, or further chained just like any other runnable. Advantages of chaining runnables in this way are efficient streaming (the sequence will stream output as soon as it is available), and debugging and tracing with tools like [LangSmith](/docs/how_to/debugging).\n",
+    "The resulting [`RunnableSequence`](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.base.RunnableSequence.html) is itself a runnable, which means it can be invoked, streamed, or further chained just like any other runnable. Advantages of chaining runnables in this way are efficient streaming (the sequence will stream output as soon as it is available), and debugging and tracing with tools like [LangSmith](/docs/how_to/debugging).\n",
    "\n",
    "## The pipe operator: `|`\n",
    "\n",
--- a/docs/docs/how_to/serialization.ipynb
+++ b/docs/docs/how_to/serialization.ipynb
@@ -12,9 +12,9 @@
    "- Secrets, such as API keys, are separated from other parameters and can be loaded back to the object on de-serialization;\n",
    "- De-serialization is kept compatible across package versions, so objects that were serialized with one version of LangChain can be properly de-serialized with another.\n",
    "\n",
-    "To save and load LangChain objects using this system, use the `dumpd`, `dumps`, `load`, and `loads` functions in the [load module](https://api.python.langchain.com/en/latest/core_api_reference.html#module-langchain_core.load) of `langchain-core`. These functions support JSON and JSON-serializable objects.\n",
+    "To save and load LangChain objects using this system, use the `dumpd`, `dumps`, `load`, and `loads` functions in the [load module](https://python.langchain.com/v0.2/api_reference/core/load.html) of `langchain-core`. These functions support JSON and JSON-serializable objects.\n",
    "\n",
-    "All LangChain objects that inherit from [Serializable](https://api.python.langchain.com/en/latest/load/langchain_core.load.serializable.Serializable.html) are JSON-serializable. Examples include [messages](https://api.python.langchain.com/en/latest/core_api_reference.html#module-langchain_core.messages), [document objects](https://api.python.langchain.com/en/latest/documents/langchain_core.documents.base.Document.html) (e.g., as returned from [retrievers](/docs/concepts/#retrievers)), and most [Runnables](/docs/concepts/#langchain-expression-language-lcel), such as chat models, retrievers, and [chains](/docs/how_to/sequence) implemented with the LangChain Expression Language.\n",
+    "All LangChain objects that inherit from [Serializable](https://python.langchain.com/v0.2/api_reference/core/load/langchain_core.load.serializable.Serializable.html) are JSON-serializable. Examples include [messages](https://python.langchain.com/v0.2/api_reference//python/core_api_reference.html#module-langchain_core.messages), [document objects](https://python.langchain.com/v0.2/api_reference/core/documents/langchain_core.documents.base.Document.html) (e.g., as returned from [retrievers](/docs/concepts/#retrievers)), and most [Runnables](/docs/concepts/#langchain-expression-language-lcel), such as chat models, retrievers, and [chains](/docs/how_to/sequence) implemented with the LangChain Expression Language.\n",
    "\n",
    "Below we walk through an example with a simple [LLM chain](/docs/tutorials/llm_chain).\n",
    "\n",
--- a/docs/docs/how_to/split_by_token.ipynb
+++ b/docs/docs/how_to/split_by_token.ipynb
@@ -27,7 +27,7 @@
    "1. How the text is split: by character passed in.\n",
    "2. How the chunk size is measured: by `tiktoken` tokenizer.\n",
    "\n",
-    "[CharacterTextSplitter](https://api.python.langchain.com/en/latest/character/langchain_text_splitters.character.CharacterTextSplitter.html), [RecursiveCharacterTextSplitter](https://api.python.langchain.com/en/latest/character/langchain_text_splitters.character.RecursiveCharacterTextSplitter.html), and [TokenTextSplitter](https://api.python.langchain.com/en/latest/base/langchain_text_splitters.base.TokenTextSplitter.html) can be used with `tiktoken` directly."
+    "[CharacterTextSplitter](https://python.langchain.com/v0.2/api_reference/text_splitters/character/langchain_text_splitters.character.CharacterTextSplitter.html), [RecursiveCharacterTextSplitter](https://python.langchain.com/v0.2/api_reference/text_splitters/character/langchain_text_splitters.character.RecursiveCharacterTextSplitter.html), and [TokenTextSplitter](https://python.langchain.com/v0.2/api_reference/langchain_text_splitters/base/langchain_text_splitters.base.TokenTextSplitter.html) can be used with `tiktoken` directly."
   ]
  },
  {
@@ -59,7 +59,7 @@
   "id": "a3ba1d8a",
   "metadata": {},
   "source": [
-    "To split with a [CharacterTextSplitter](https://api.python.langchain.com/en/latest/character/langchain_text_splitters.character.CharacterTextSplitter.html) and then merge chunks with `tiktoken`, use its `.from_tiktoken_encoder()` method. Note that splits from this method can be larger than the chunk size measured by the `tiktoken` tokenizer.\n",
+    "To split with a [CharacterTextSplitter](https://python.langchain.com/v0.2/api_reference/text_splitters/character/langchain_text_splitters.character.CharacterTextSplitter.html) and then merge chunks with `tiktoken`, use its `.from_tiktoken_encoder()` method. Note that splits from this method can be larger than the chunk size measured by the `tiktoken` tokenizer.\n",
    "\n",
    "The `.from_tiktoken_encoder()` method takes either `encoding_name` as an argument (e.g. `cl100k_base`), or the `model_name` (e.g. `gpt-4`). All additional arguments like `chunk_size`, `chunk_overlap`, and `separators` are used to instantiate `CharacterTextSplitter`:"
   ]
@@ -279,7 +279,7 @@
   "source": [
    "## SentenceTransformers\n",
    "\n",
-    "The [SentenceTransformersTokenTextSplitter](https://api.python.langchain.com/en/latest/sentence_transformers/langchain_text_splitters.sentence_transformers.SentenceTransformersTokenTextSplitter.html) is a specialized text splitter for use with the sentence-transformer models. The default behaviour is to split the text into chunks that fit the token window of the sentence transformer model that you would like to use.\n",
+    "The [SentenceTransformersTokenTextSplitter](https://python.langchain.com/v0.2/api_reference/text_splitters/sentence_transformers/langchain_text_splitters.sentence_transformers.SentenceTransformersTokenTextSplitter.html) is a specialized text splitter for use with the sentence-transformer models. The default behaviour is to split the text into chunks that fit the token window of the sentence transformer model that you would like to use.\n",
    "\n",
    "To split text and constrain token counts according to the sentence-transformers tokenizer, instantiate a `SentenceTransformersTokenTextSplitter`. You can optionally specify:\n",
    "\n",
--- a/docs/docs/how_to/sql_csv.ipynb
+++ b/docs/docs/how_to/sql_csv.ipynb
@@ -609,7 +609,7 @@
   "source": [
    "### Agent\n",
    "\n",
-    "For complex questions it can be helpful for an LLM to be able to iteratively execute code while maintaining the inputs and outputs of its previous executions. This is where Agents come into play. They allow an LLM to decide how many times a tool needs to be invoked and keep track of the executions it's made so far. The [create_pandas_dataframe_agent](https://api.python.langchain.com/en/latest/agents/langchain_experimental.agents.agent_toolkits.pandas.base.create_pandas_dataframe_agent.html) is a built-in agent that makes it easy to work with dataframes:"
+    "For complex questions it can be helpful for an LLM to be able to iteratively execute code while maintaining the inputs and outputs of its previous executions. This is where Agents come into play. They allow an LLM to decide how many times a tool needs to be invoked and keep track of the executions it's made so far. The [create_pandas_dataframe_agent](https://python.langchain.com/v0.2/api_reference/experimental/agents/langchain_experimental.agents.agent_toolkits.pandas.base.create_pandas_dataframe_agent.html) is a built-in agent that makes it easy to work with dataframes:"
   ]
  },
  {
@@ -761,7 +761,7 @@
    "* [SQL tutorial](/docs/tutorials/sql_qa): Many of the challenges of working with SQL db's and CSV's are generic to any structured data type, so it's useful to read the SQL techniques even if you're using Pandas for CSV data analysis.\n",
    "* [Tool use](/docs/how_to/tool_calling): Guides on general best practices when working with chains and agents that invoke tools\n",
    "* [Agents](/docs/tutorials/agents): Understand the fundamentals of building LLM agents.\n",
-    "* Integrations: Sandboxed envs like [E2B](/docs/integrations/tools/e2b_data_analysis) and [Bearly](/docs/integrations/tools/bearly), utilities like [SQLDatabase](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.sql_database.SQLDatabase.html#langchain_community.utilities.sql_database.SQLDatabase), related agents like [Spark DataFrame agent](/docs/integrations/tools/spark_sql)."
+    "* Integrations: Sandboxed envs like [E2B](/docs/integrations/tools/e2b_data_analysis) and [Bearly](/docs/integrations/tools/bearly), utilities like [SQLDatabase](https://python.langchain.com/v0.2/api_reference/community/utilities/langchain_community.utilities.sql_database.SQLDatabase.html#langchain_community.utilities.sql_database.SQLDatabase), related agents like [Spark DataFrame agent](/docs/integrations/tools/spark_sql)."
   ]
  }
 ],
--- a/docs/docs/how_to/sql_large_db.ipynb
+++ b/docs/docs/how_to/sql_large_db.ipynb
@@ -55,7 +55,7 @@
    "* Run `.read Chinook_Sqlite.sql`\n",
    "* Test `SELECT * FROM Artist LIMIT 10;`\n",
    "\n",
-    "Now, `Chinhook.db` is in our directory and we can interface with it using the SQLAlchemy-driven [SQLDatabase](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.sql_database.SQLDatabase.html) class:"
+    "Now, `Chinhook.db` is in our directory and we can interface with it using the SQLAlchemy-driven [SQLDatabase](https://python.langchain.com/v0.2/api_reference/community/utilities/langchain_community.utilities.sql_database.SQLDatabase.html) class:"
   ]
  },
  {
@@ -271,7 +271,7 @@
   "id": "04d52d01-1ccf-4753-b34a-0dcbc4921f78",
   "metadata": {},
   "source": [
-    "Now that we've got a chain that can output the relevant tables for any query we can combine this with our [create_sql_query_chain](https://api.python.langchain.com/en/latest/chains/langchain.chains.sql_database.query.create_sql_query_chain.html), which can accept a list of `table_names_to_use` to determine which table schemas are included in the prompt:"
+    "Now that we've got a chain that can output the relevant tables for any query we can combine this with our [create_sql_query_chain](https://python.langchain.com/v0.2/api_reference/langchain/chains/langchain.chains.sql_database.query.create_sql_query_chain.html), which can accept a list of `table_names_to_use` to determine which table schemas are included in the prompt:"
   ]
  },
  {
--- a/docs/docs/how_to/sql_prompting.ipynb
+++ b/docs/docs/how_to/sql_prompting.ipynb
@@ -6,11 +6,11 @@
   "source": [
    "# How to better prompt when doing SQL question-answering\n",
    "\n",
-    "In this guide we'll go over prompting strategies to improve SQL query generation using [create_sql_query_chain](https://api.python.langchain.com/en/latest/chains/langchain.chains.sql_database.query.create_sql_query_chain.html). We'll largely focus on methods for getting relevant database-specific information in your prompt.\n",
+    "In this guide we'll go over prompting strategies to improve SQL query generation using [create_sql_query_chain](https://python.langchain.com/v0.2/api_reference/langchain/chains/langchain.chains.sql_database.query.create_sql_query_chain.html). We'll largely focus on methods for getting relevant database-specific information in your prompt.\n",
    "\n",
    "We will cover: \n",
    "\n",
-    "- How the dialect of the LangChain [SQLDatabase](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.sql_database.SQLDatabase.html) impacts the prompt of the chain;\n",
+    "- How the dialect of the LangChain [SQLDatabase](https://python.langchain.com/v0.2/api_reference/community/utilities/langchain_community.utilities.sql_database.SQLDatabase.html) impacts the prompt of the chain;\n",
    "- How to format schema information into the prompt using `SQLDatabase.get_context`;\n",
    "- How to build and select few-shot examples to assist the model.\n",
    "\n",
@@ -84,7 +84,7 @@
   "source": [
    "## Dialect-specific prompting\n",
    "\n",
-    "One of the simplest things we can do is make our prompt specific to the SQL dialect we're using. When using the built-in [create_sql_query_chain](https://api.python.langchain.com/en/latest/chains/langchain.chains.sql_database.query.create_sql_query_chain.html) and [SQLDatabase](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.sql_database.SQLDatabase.html), this is handled for you for any of the following dialects:"
+    "One of the simplest things we can do is make our prompt specific to the SQL dialect we're using. When using the built-in [create_sql_query_chain](https://python.langchain.com/v0.2/api_reference/langchain/chains/langchain.chains.sql_database.query.create_sql_query_chain.html) and [SQLDatabase](https://python.langchain.com/v0.2/api_reference/community/utilities/langchain_community.utilities.sql_database.SQLDatabase.html), this is handled for you for any of the following dialects:"
   ]
  },
  {
@@ -629,7 +629,7 @@
    "\n",
    "If we have enough examples, we may want to only include the most relevant ones in the prompt, either because they don't fit in the model's context window or because the long tail of examples distracts the model. And specifically, given any input we want to include the examples most relevant to that input.\n",
    "\n",
-    "We can do just this using an ExampleSelector. In this case we'll use a [SemanticSimilarityExampleSelector](https://api.python.langchain.com/en/latest/example_selectors/langchain_core.example_selectors.semantic_similarity.SemanticSimilarityExampleSelector.html), which will store the examples in the vector database of our choosing. At runtime it will perform a similarity search between the input and our examples, and return the most semantically similar ones.\n",
+    "We can do just this using an ExampleSelector. In this case we'll use a [SemanticSimilarityExampleSelector](https://python.langchain.com/v0.2/api_reference/core/example_selectors/langchain_core.example_selectors.semantic_similarity.SemanticSimilarityExampleSelector.html), which will store the examples in the vector database of our choosing. At runtime it will perform a similarity search between the input and our examples, and return the most semantically similar ones.\n",
    "\n",
    "We default to OpenAI embeddings here, but you can swap them out for the model provider of your choice."
   ]
--- a/docs/docs/how_to/streaming.ipynb
+++ b/docs/docs/how_to/streaming.ipynb
@@ -246,7 +246,7 @@
    "\n",
    "Let's build a simple chain using `LangChain Expression Language` (`LCEL`) that combines a prompt, model and a parser and verify that streaming works.\n",
    "\n",
-    "We will use [`StrOutputParser`](https://api.python.langchain.com/en/latest/output_parsers/langchain_core.output_parsers.string.StrOutputParser.html) to parse the output from the model. This is a simple parser that extracts the `content` field from an `AIMessageChunk`, giving us the `token` returned by the model.\n",
+    "We will use [`StrOutputParser`](https://python.langchain.com/v0.2/api_reference/core/output_parsers/langchain_core.output_parsers.string.StrOutputParser.html) to parse the output from the model. This is a simple parser that extracts the `content` field from an `AIMessageChunk`, giving us the `token` returned by the model.\n",
    "\n",
    ":::{.callout-tip}\n",
    "LCEL is a *declarative* way to specify a \"program\" by chainining together different LangChain primitives. Chains created using LCEL benefit from an automatic implementation of `stream` and `astream` allowing streaming of the final output. In fact, chains created with LCEL implement the entire standard Runnable interface.\n",
--- a/Show More
+++ b/Show More