From 20f82502e59b10fb8202f146d3eab2bdea9ca9fc Mon Sep 17 00:00:00 2001
From: Ante Javor <javor.ante@gmail.com>
Date: Wed, 26 Mar 2025 16:58:09 +0100
Subject: [PATCH 01/30] Community: Add Memgraph integration docs (#30457)

Thank you for contributing to LangChain!

**Description:**
Since we just implemented
[langchain-memgraph](https://pypi.org/project/langchain-memgraph/)
integration, we are adding basic docs to [your site based on this
comment](https://github.com/langchain-ai/langchain/pull/30197#pullrequestreview-2671616410)
from @ccurme .

 **Twitter handle:**
 [@memgraphdb](https://x.com/memgraphdb)


- [x] **Add tests and docs**: If you're adding a new integration, please
include
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in
`docs/docs/integrations` directory.


- [x] **Lint and test**: Run `make format`, `make lint` and `make test`
from the root of the package(s) you've modified. See contribution
guidelines for more: https://python.langchain.com/docs/contributing/

Additional guidelines:
- Make sure optional dependencies are imported within a function.
- Please do not add dependencies to pyproject.toml files (even optional
ones) unless they are required for unit tests.
- Most PRs should not touch more than one package.
- Changes should be backwards compatible.
- If you are adding something to community, do not re-import it in
langchain.

If no one reviews your PR within a few days, please @-mention one of
baskaryan, eyurtsev, ccurme, vbarda, hwchase17.

---------

Co-authored-by: Chester Curme <chester.curme@gmail.com>
---
 docs/docs/integrations/graphs/memgraph.ipynb  |  12 +-
 docs/docs/integrations/providers/memgraph.mdx |  40 ++++
 docs/docs/integrations/tools/memgraph.ipynb   | 215 ++++++++++++++++++
 libs/packages.yml                             |   3 +
 4 files changed, 263 insertions(+), 7 deletions(-)
 create mode 100644 docs/docs/integrations/providers/memgraph.mdx
 create mode 100644 docs/docs/integrations/tools/memgraph.ipynb

diff --git a/docs/docs/integrations/graphs/memgraph.ipynb b/docs/docs/integrations/graphs/memgraph.ipynb
index 1040d43f9e5..4ccfb6989b8 100644
--- a/docs/docs/integrations/graphs/memgraph.ipynb
+++ b/docs/docs/integrations/graphs/memgraph.ipynb
@@ -38,7 +38,7 @@
     "To use LangChain, install and import all the necessary packages. We'll use the package manager [pip](https://pip.pypa.io/en/stable/installation/), along with the `--user` flag, to ensure proper permissions. If you've installed Python 3.4 or a later version, `pip` is included by default. You can install all the required packages using the following command:\n",
     "\n",
     "```\n",
-    "pip install langchain langchain-openai neo4j --user\n",
+    "pip install langchain langchain-openai langchain-memgraph --user\n",
     "```\n",
     "\n",
     "You can either run the provided code blocks in this notebook or use a separate Python file to experiment with Memgraph and LangChain."
@@ -57,24 +57,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "import os\n",
     "\n",
-    "from langchain_community.chains.graph_qa.memgraph import MemgraphQAChain\n",
-    "from langchain_community.graphs import MemgraphGraph\n",
     "from langchain_core.prompts import PromptTemplate\n",
+    "from langchain_memgraph.chains.graph_qa import MemgraphQAChain\n",
+    "from langchain_memgraph.graphs.memgraph import Memgraph\n",
     "from langchain_openai import ChatOpenAI\n",
     "\n",
     "url = os.environ.get(\"MEMGRAPH_URI\", \"bolt://localhost:7687\")\n",
     "username = os.environ.get(\"MEMGRAPH_USERNAME\", \"\")\n",
     "password = os.environ.get(\"MEMGRAPH_PASSWORD\", \"\")\n",
     "\n",
-    "graph = MemgraphGraph(\n",
-    "    url=url, username=username, password=password, refresh_schema=False\n",
-    ")"
+    "graph = Memgraph(url=url, username=username, password=password, refresh_schema=False)"
    ]
   },
   {
diff --git a/docs/docs/integrations/providers/memgraph.mdx b/docs/docs/integrations/providers/memgraph.mdx
new file mode 100644
index 00000000000..a64203a85ba
--- /dev/null
+++ b/docs/docs/integrations/providers/memgraph.mdx
@@ -0,0 +1,40 @@
+# Memgraph
+
+>Memgraph is a high-performance, in-memory graph database that is optimized for real-time queries and analytics.
+>Get started with Memgraph by visiting [their website](https://memgraph.com/).
+
+## Installation and Setup
+
+- Install the Python SDK with `pip install langchain-memgraph`
+
+## MemgraphQAChain
+
+There exists a wrapper around Memgraph graph database that allows you to generate Cypher statements based on the user input
+and use them to retrieve relevant information from the database.
+
+```python
+from langchain_memgraph.chains.graph_qa import MemgraphQAChain
+from langchain_memgraph.graphs.memgraph import Memgraph
+```
+
+See a [usage example](/docs/integrations/graphs/memgraph)
+
+## Constructing a Knowledge Graph from unstructured data
+
+You can use the integration to construct a knowledge graph from unstructured data.
+
+```python
+from langchain_memgraph.graphs.memgraph import Memgraph
+from langchain_experimental.graph_transformers import LLMGraphTransformer
+```
+
+See a [usage example](/docs/integrations/graphs/memgraph)
+
+## Memgraph Tools and Toolkit
+
+Memgraph also provides a toolkit that allows you to interact with the Memgraph database.
+See a [usage example](/docs/integrations/tools/memgraph).
+
+```python
+from langchain_memgraph import MemgraphToolkit
+```
diff --git a/docs/docs/integrations/tools/memgraph.ipynb b/docs/docs/integrations/tools/memgraph.ipynb
new file mode 100644
index 00000000000..0797e30309b
--- /dev/null
+++ b/docs/docs/integrations/tools/memgraph.ipynb
@@ -0,0 +1,215 @@
+{
+ "cells": [
+  {
+   "cell_type": "raw",
+   "id": "afaf8039",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "sidebar_label: Memgraph\n",
+    "---"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e49f1e0d",
+   "metadata": {},
+   "source": [
+    "# MemgraphToolkit\n",
+    "\n",
+    "## Overview\n",
+    "\n",
+    "This will help you getting started with the Memgraph [toolkit](/docs/concepts/tools/#toolkits). \n",
+    "\n",
+    "Tools within `MemgraphToolkit` are designed for the interaction with the `Memgraph` database.\n",
+    "\n",
+    "## Setup\n",
+    "\n",
+    "To be able tot follow the steps below, make sure you have a running Memgraph instance on your local host. For more details on how to run Memgraph, take a look at [Memgraph docs](https://memgraph.com/docs/getting-started)\n",
+    "  "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "72ee0c4b-9764-423a-9dbf-95129e185210",
+   "metadata": {},
+   "source": [
+    "If you want to get automated tracing from runs of individual tools, you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a15d341e-3e26-4ca3-830b-5aab30ed66de",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
+    "# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0730d6a1-c893-4840-9817-5e5251676d5d",
+   "metadata": {},
+   "source": [
+    "### Installation\n",
+    "\n",
+    "This toolkit lives in the `langchain-memgraph` package:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "652d6238-1f87-422a-b135-f5abbb8652fc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install -qU langchain-memgraph "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a38cde65-254d-4219-a441-068766c0d4b5",
+   "metadata": {},
+   "source": [
+    "## Instantiation\n",
+    "\n",
+    "Now we can instantiate our toolkit:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cb09c344-1836-4e0c-acf8-11d13ac1dbae",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.chat_models import init_chat_model\n",
+    "from langchain_memgraph import MemgraphToolkit\n",
+    "from langchain_memgraph.graphs.memgraph import Memgraph\n",
+    "\n",
+    "db = Memgraph(url=url, username=username, password=password)\n",
+    "\n",
+    "llm = init_chat_model(\"gpt-4o-mini\", model_provider=\"openai\")\n",
+    "\n",
+    "toolkit = MemgraphToolkit(\n",
+    "    db=db,  # Memgraph instance\n",
+    "    llm=llm,  # LLM chat model for LLM operations\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5c5f2839-4020-424e-9fc9-07777eede442",
+   "metadata": {},
+   "source": [
+    "## Tools\n",
+    "\n",
+    "View available tools:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "51a60dbe-9f2e-4e04-bb62-23968f17164a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "toolkit.get_tools()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "608af19d",
+   "metadata": {},
+   "source": [
+    "## Invocation\n",
+    "\n",
+    "Tools can be individually called by passing an arguments, for QueryMemgraphTool it would be: \n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ffa944db",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_memgraph.tools import QueryMemgraphTool\n",
+    "\n",
+    "# Rest of the code omitted for brevity\n",
+    "\n",
+    "tool.invoke({QueryMemgraphTool({\"query\": \"MATCH (n) RETURN n LIMIT 5\"})})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "dfe8aad4-8626-4330-98a9-7ea1ca5d2e0e",
+   "metadata": {},
+   "source": [
+    "## Use within an agent"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "310bf18e-6c9a-4072-b86e-47bc1fcca29d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langgraph.prebuilt import create_react_agent\n",
+    "\n",
+    "agent_executor = create_react_agent(llm, tools)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "23e11cc9-abd6-4855-a7eb-799f45ca01ae",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "example_query = \"MATCH (n) RETURN n LIMIT 1\"\n",
+    "\n",
+    "events = agent_executor.stream(\n",
+    "    {\"messages\": [(\"user\", example_query)]},\n",
+    "    stream_mode=\"values\",\n",
+    ")\n",
+    "for event in events:\n",
+    "    event[\"messages\"][-1].pretty_print()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "29ca615b",
+   "metadata": {},
+   "source": [
+    "## API reference\n",
+    "\n",
+    "For more details on API visit [Memgraph integration docs](https://memgraph.com/docs/ai-ecosystem/integrations#langchain)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/libs/packages.yml b/libs/packages.yml
index b3d79811c2c..af7aee18995 100644
--- a/libs/packages.yml
+++ b/libs/packages.yml
@@ -551,3 +551,6 @@ packages:
   provider_page: naver
   path: .
   repo: e7217/langchain-naver-community
+- name: langchain-memgraph
+  path: .
+  repo: memgraph/langchain-memgraph

From 22d1a7d7b6d6be0655bb791a6a65b60150c9afc5 Mon Sep 17 00:00:00 2001
From: ccurme <chester.curme@gmail.com>
Date: Wed, 26 Mar 2025 12:20:53 -0400
Subject: [PATCH 02/30] standard-tests[patch]: require model_name in
 response_metadata if returns_usage_metadata (#30497)

We are implementing a token-counting callback handler in
`langchain-core` that is intended to work with all chat models
supporting usage metadata. The callback will aggregate usage metadata by
model. This requires responses to include the model name in its
metadata.

To support this, if a model `returns_usage_metadata`, we check that it
includes a string model name in its `response_metadata` in the
`"model_name"` key.

More context: https://github.com/langchain-ai/langchain/pull/30487
---
 docs/docs/how_to/custom_chat_model.ipynb      |  6 +++-
 .../integration_template/chat_models.py       |  6 +++-
 .../langchain_fireworks/chat_models.py        |  2 ++
 .../integration_tests/test_chat_models.py     | 11 +++++--
 .../langchain_mistralai/chat_models.py        |  7 ++++-
 .../integration_tests/test_chat_models.py     | 15 ++++++---
 .../integration_tests/chat_models.py          | 31 +++++++++++++++++++
 .../langchain_tests/unit_tests/chat_models.py |  3 ++
 .../tests/unit_tests/custom_chat_model.py     |  6 +++-
 9 files changed, 75 insertions(+), 12 deletions(-)

diff --git a/docs/docs/how_to/custom_chat_model.ipynb b/docs/docs/how_to/custom_chat_model.ipynb
index 36ff587e11e..b8c8d7f0067 100644
--- a/docs/docs/how_to/custom_chat_model.ipynb
+++ b/docs/docs/how_to/custom_chat_model.ipynb
@@ -247,6 +247,7 @@
     "            additional_kwargs={},  # Used to add additional payload to the message\n",
     "            response_metadata={  # Use for response metadata\n",
     "                \"time_in_seconds\": 3,\n",
+    "                \"model_name\": self.model_name,\n",
     "            },\n",
     "            usage_metadata={\n",
     "                \"input_tokens\": ct_input_tokens,\n",
@@ -309,7 +310,10 @@
     "\n",
     "        # Let's add some other information (e.g., response metadata)\n",
     "        chunk = ChatGenerationChunk(\n",
-    "            message=AIMessageChunk(content=\"\", response_metadata={\"time_in_sec\": 3})\n",
+    "            message=AIMessageChunk(\n",
+    "                content=\"\",\n",
+    "                response_metadata={\"time_in_sec\": 3, \"model_name\": self.model_name},\n",
+    "            )\n",
     "        )\n",
     "        if run_manager:\n",
     "            # This is optional in newer versions of LangChain\n",
diff --git a/libs/cli/langchain_cli/integration_template/integration_template/chat_models.py b/libs/cli/langchain_cli/integration_template/integration_template/chat_models.py
index 3de9b63179f..9703b50358a 100644
--- a/libs/cli/langchain_cli/integration_template/integration_template/chat_models.py
+++ b/libs/cli/langchain_cli/integration_template/integration_template/chat_models.py
@@ -329,6 +329,7 @@ class Chat__ModuleName__(BaseChatModel):
             additional_kwargs={},  # Used to add additional payload to the message
             response_metadata={  # Use for response metadata
                 "time_in_seconds": 3,
+                "model_name": self.model_name,
             },
             usage_metadata={
                 "input_tokens": ct_input_tokens,
@@ -391,7 +392,10 @@ class Chat__ModuleName__(BaseChatModel):
 
         # Let's add some other information (e.g., response metadata)
         chunk = ChatGenerationChunk(
-            message=AIMessageChunk(content="", response_metadata={"time_in_sec": 3})
+            message=AIMessageChunk(
+                content="",
+                response_metadata={"time_in_sec": 3, "model_name": self.model_name},
+            )
         )
         if run_manager:
             # This is optional in newer versions of LangChain
diff --git a/libs/partners/fireworks/langchain_fireworks/chat_models.py b/libs/partners/fireworks/langchain_fireworks/chat_models.py
index 3f776456559..e2953eab7fe 100644
--- a/libs/partners/fireworks/langchain_fireworks/chat_models.py
+++ b/libs/partners/fireworks/langchain_fireworks/chat_models.py
@@ -471,6 +471,7 @@ class ChatFireworks(BaseChatModel):
             generation_info = {}
             if finish_reason := choice.get("finish_reason"):
                 generation_info["finish_reason"] = finish_reason
+                generation_info["model_name"] = self.model_name
             logprobs = choice.get("logprobs")
             if logprobs:
                 generation_info["logprobs"] = logprobs
@@ -565,6 +566,7 @@ class ChatFireworks(BaseChatModel):
             generation_info = {}
             if finish_reason := choice.get("finish_reason"):
                 generation_info["finish_reason"] = finish_reason
+                generation_info["model_name"] = self.model_name
             logprobs = choice.get("logprobs")
             if logprobs:
                 generation_info["logprobs"] = logprobs
diff --git a/libs/partners/fireworks/tests/integration_tests/test_chat_models.py b/libs/partners/fireworks/tests/integration_tests/test_chat_models.py
index ecaa2ebca8a..6a019bd38b7 100644
--- a/libs/partners/fireworks/tests/integration_tests/test_chat_models.py
+++ b/libs/partners/fireworks/tests/integration_tests/test_chat_models.py
@@ -98,16 +98,19 @@ async def test_astream() -> None:
 
     full: Optional[BaseMessageChunk] = None
     chunks_with_token_counts = 0
+    chunks_with_response_metadata = 0
     async for token in llm.astream("I'm Pickle Rick"):
         assert isinstance(token, AIMessageChunk)
         assert isinstance(token.content, str)
         full = token if full is None else full + token
         if token.usage_metadata is not None:
             chunks_with_token_counts += 1
-    if chunks_with_token_counts != 1:
+        if token.response_metadata:
+            chunks_with_response_metadata += 1
+    if chunks_with_token_counts != 1 or chunks_with_response_metadata != 1:
         raise AssertionError(
-            "Expected exactly one chunk with token counts. "
-            "AIMessageChunk aggregation adds counts. Check that "
+            "Expected exactly one chunk with token counts or response_metadata. "
+            "AIMessageChunk aggregation adds / appends counts and metadata. Check that "
             "this is behaving properly."
         )
     assert isinstance(full, AIMessageChunk)
@@ -118,6 +121,8 @@ async def test_astream() -> None:
         full.usage_metadata["input_tokens"] + full.usage_metadata["output_tokens"]
         == full.usage_metadata["total_tokens"]
     )
+    assert isinstance(full.response_metadata["model_name"], str)
+    assert full.response_metadata["model_name"]
 
 
 async def test_abatch() -> None:
diff --git a/libs/partners/mistralai/langchain_mistralai/chat_models.py b/libs/partners/mistralai/langchain_mistralai/chat_models.py
index a7deb8b5471..7cdac2bcab3 100644
--- a/libs/partners/mistralai/langchain_mistralai/chat_models.py
+++ b/libs/partners/mistralai/langchain_mistralai/chat_models.py
@@ -236,13 +236,15 @@ async def acompletion_with_retry(
 def _convert_chunk_to_message_chunk(
     chunk: Dict, default_class: Type[BaseMessageChunk]
 ) -> BaseMessageChunk:
-    _delta = chunk["choices"][0]["delta"]
+    _choice = chunk["choices"][0]
+    _delta = _choice["delta"]
     role = _delta.get("role")
     content = _delta.get("content") or ""
     if role == "user" or default_class == HumanMessageChunk:
         return HumanMessageChunk(content=content)
     elif role == "assistant" or default_class == AIMessageChunk:
         additional_kwargs: Dict = {}
+        response_metadata = {}
         if raw_tool_calls := _delta.get("tool_calls"):
             additional_kwargs["tool_calls"] = raw_tool_calls
             try:
@@ -272,11 +274,14 @@ def _convert_chunk_to_message_chunk(
             }
         else:
             usage_metadata = None
+        if _choice.get("finish_reason") is not None:
+            response_metadata["model_name"] = chunk.get("model")
         return AIMessageChunk(
             content=content,
             additional_kwargs=additional_kwargs,
             tool_call_chunks=tool_call_chunks,  # type: ignore[arg-type]
             usage_metadata=usage_metadata,  # type: ignore[arg-type]
+            response_metadata=response_metadata,
         )
     elif role == "system" or default_class == SystemMessageChunk:
         return SystemMessageChunk(content=content)
diff --git a/libs/partners/mistralai/tests/integration_tests/test_chat_models.py b/libs/partners/mistralai/tests/integration_tests/test_chat_models.py
index d3592ef32fd..8bec346d29a 100644
--- a/libs/partners/mistralai/tests/integration_tests/test_chat_models.py
+++ b/libs/partners/mistralai/tests/integration_tests/test_chat_models.py
@@ -20,7 +20,7 @@ def test_stream() -> None:
     """Test streaming tokens from ChatMistralAI."""
     llm = ChatMistralAI()
 
-    for token in llm.stream("I'm Pickle Rick"):
+    for token in llm.stream("Hello"):
         assert isinstance(token.content, str)
 
 
@@ -30,16 +30,19 @@ async def test_astream() -> None:
 
     full: Optional[BaseMessageChunk] = None
     chunks_with_token_counts = 0
-    async for token in llm.astream("I'm Pickle Rick"):
+    chunks_with_response_metadata = 0
+    async for token in llm.astream("Hello"):
         assert isinstance(token, AIMessageChunk)
         assert isinstance(token.content, str)
         full = token if full is None else full + token
         if token.usage_metadata is not None:
             chunks_with_token_counts += 1
-    if chunks_with_token_counts != 1:
+        if token.response_metadata:
+            chunks_with_response_metadata += 1
+    if chunks_with_token_counts != 1 or chunks_with_response_metadata != 1:
         raise AssertionError(
-            "Expected exactly one chunk with token counts. "
-            "AIMessageChunk aggregation adds counts. Check that "
+            "Expected exactly one chunk with token counts or response_metadata. "
+            "AIMessageChunk aggregation adds / appends counts and metadata. Check that "
             "this is behaving properly."
         )
     assert isinstance(full, AIMessageChunk)
@@ -50,6 +53,8 @@ async def test_astream() -> None:
         full.usage_metadata["input_tokens"] + full.usage_metadata["output_tokens"]
         == full.usage_metadata["total_tokens"]
     )
+    assert isinstance(full.response_metadata["model_name"], str)
+    assert full.response_metadata["model_name"]
 
 
 async def test_abatch() -> None:
diff --git a/libs/standard-tests/langchain_tests/integration_tests/chat_models.py b/libs/standard-tests/langchain_tests/integration_tests/chat_models.py
index 7041f0c9f38..b5e294ffc8d 100644
--- a/libs/standard-tests/langchain_tests/integration_tests/chat_models.py
+++ b/libs/standard-tests/langchain_tests/integration_tests/chat_models.py
@@ -337,6 +337,9 @@ class ChatModelIntegrationTests(ChatModelTests):
             def returns_usage_metadata(self) -> bool:
                 return False
 
+        Models supporting ``usage_metadata`` should also return the name of the
+        underlying model in the ``response_metadata`` of the AIMessage.
+
     .. dropdown:: supports_anthropic_inputs
 
         Boolean property indicating whether the chat model supports Anthropic-style
@@ -669,6 +672,11 @@ class ChatModelIntegrationTests(ChatModelTests):
         This test is optional and should be skipped if the model does not return
         usage metadata (see Configuration below).
 
+        .. versionchanged:: 0.3.17
+
+            Additionally check for the presence of `model_name` in the response
+            metadata, which is needed for usage tracking in callback handlers.
+
         .. dropdown:: Configuration
 
             By default, this test is run.
@@ -739,6 +747,9 @@ class ChatModelIntegrationTests(ChatModelTests):
                         )
                     )]
                 )
+
+            Check also that the response includes a ``"model_name"`` key in its
+            ``usage_metadata``.
         """
         if not self.returns_usage_metadata:
             pytest.skip("Not implemented.")
@@ -750,6 +761,12 @@ class ChatModelIntegrationTests(ChatModelTests):
         assert isinstance(result.usage_metadata["output_tokens"], int)
         assert isinstance(result.usage_metadata["total_tokens"], int)
 
+        # Check model_name is in response_metadata
+        # Needed for langchain_core.callbacks.usage
+        model_name = result.response_metadata.get("model_name")
+        assert isinstance(model_name, str)
+        assert model_name
+
         if "audio_input" in self.supported_usage_metadata_details["invoke"]:
             msg = self.invoke_with_audio_input()
             assert msg.usage_metadata is not None
@@ -809,6 +826,11 @@ class ChatModelIntegrationTests(ChatModelTests):
         """
         Test to verify that the model returns correct usage metadata in streaming mode.
 
+        .. versionchanged:: 0.3.17
+
+            Additionally check for the presence of `model_name` in the response
+            metadata, which is needed for usage tracking in callback handlers.
+
         .. dropdown:: Configuration
 
             By default, this test is run.
@@ -891,6 +913,9 @@ class ChatModelIntegrationTests(ChatModelTests):
                         )
                     )]
                 )
+
+            Check also that the aggregated response includes a ``"model_name"`` key
+            in its ``usage_metadata``.
         """
         if not self.returns_usage_metadata:
             pytest.skip("Not implemented.")
@@ -915,6 +940,12 @@ class ChatModelIntegrationTests(ChatModelTests):
         assert isinstance(full.usage_metadata["output_tokens"], int)
         assert isinstance(full.usage_metadata["total_tokens"], int)
 
+        # Check model_name is in response_metadata
+        # Needed for langchain_core.callbacks.usage
+        model_name = full.response_metadata.get("model_name")
+        assert isinstance(model_name, str)
+        assert model_name
+
         if "audio_input" in self.supported_usage_metadata_details["stream"]:
             msg = self.invoke_with_audio_input(stream=True)
             assert isinstance(msg.usage_metadata["input_token_details"]["audio"], int)  # type: ignore[index]
diff --git a/libs/standard-tests/langchain_tests/unit_tests/chat_models.py b/libs/standard-tests/langchain_tests/unit_tests/chat_models.py
index a470a9b59d5..beec0b98cb1 100644
--- a/libs/standard-tests/langchain_tests/unit_tests/chat_models.py
+++ b/libs/standard-tests/langchain_tests/unit_tests/chat_models.py
@@ -412,6 +412,9 @@ class ChatModelUnitTests(ChatModelTests):
             def returns_usage_metadata(self) -> bool:
                 return False
 
+        Models supporting ``usage_metadata`` should also return the name of the
+        underlying model in the ``response_metadata`` of the AIMessage.
+
     .. dropdown:: supports_anthropic_inputs
 
         Boolean property indicating whether the chat model supports Anthropic-style
diff --git a/libs/standard-tests/tests/unit_tests/custom_chat_model.py b/libs/standard-tests/tests/unit_tests/custom_chat_model.py
index 1791138cf35..30135883469 100644
--- a/libs/standard-tests/tests/unit_tests/custom_chat_model.py
+++ b/libs/standard-tests/tests/unit_tests/custom_chat_model.py
@@ -76,6 +76,7 @@ class ChatParrotLink(BaseChatModel):
             additional_kwargs={},  # Used to add additional payload to the message
             response_metadata={  # Use for response metadata
                 "time_in_seconds": 3,
+                "model_name": self.model_name,
             },
             usage_metadata={
                 "input_tokens": ct_input_tokens,
@@ -138,7 +139,10 @@ class ChatParrotLink(BaseChatModel):
 
         # Let's add some other information (e.g., response metadata)
         chunk = ChatGenerationChunk(
-            message=AIMessageChunk(content="", response_metadata={"time_in_sec": 3})
+            message=AIMessageChunk(
+                content="",
+                response_metadata={"time_in_sec": 3, "model_name": self.model_name},
+            )
         )
         if run_manager:
             # This is optional in newer versions of LangChain

From 299b222c5330d28da3eaa5b488eb1cd17192ab23 Mon Sep 17 00:00:00 2001
From: ccurme <chester.curme@gmail.com>
Date: Wed, 26 Mar 2025 12:30:09 -0400
Subject: [PATCH 03/30] mistral[patch]: check types in adding model_name to
 response_metadata (#30499)

---
 libs/partners/mistralai/langchain_mistralai/chat_models.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/libs/partners/mistralai/langchain_mistralai/chat_models.py b/libs/partners/mistralai/langchain_mistralai/chat_models.py
index 7cdac2bcab3..6f3cac19904 100644
--- a/libs/partners/mistralai/langchain_mistralai/chat_models.py
+++ b/libs/partners/mistralai/langchain_mistralai/chat_models.py
@@ -274,7 +274,9 @@ def _convert_chunk_to_message_chunk(
             }
         else:
             usage_metadata = None
-        if _choice.get("finish_reason") is not None:
+        if _choice.get("finish_reason") is not None and isinstance(
+            chunk.get("model"), str
+        ):
             response_metadata["model_name"] = chunk.get("model")
         return AIMessageChunk(
             content=content,

From 9a80be7bb749e4a5c17ae6cc0197273247de6f9b Mon Sep 17 00:00:00 2001
From: ccurme <chester.curme@gmail.com>
Date: Wed, 26 Mar 2025 13:26:32 -0400
Subject: [PATCH 04/30] core[patch]: release 0.3.49 (#30500)

---
 libs/core/langchain_core/callbacks/usage.py | 39 +++++++++++++++------
 libs/core/pyproject.toml                    |  2 +-
 libs/core/uv.lock                           |  5 ++-
 3 files changed, 32 insertions(+), 14 deletions(-)

diff --git a/libs/core/langchain_core/callbacks/usage.py b/libs/core/langchain_core/callbacks/usage.py
index dd873adaebd..930b6f52802 100644
--- a/libs/core/langchain_core/callbacks/usage.py
+++ b/libs/core/langchain_core/callbacks/usage.py
@@ -23,18 +23,28 @@ class UsageMetadataCallbackHandler(BaseCallbackHandler):
             from langchain.chat_models import init_chat_model
             from langchain_core.callbacks import UsageMetadataCallbackHandler
 
-            llm = init_chat_model(model="openai:gpt-4o-mini")
+            llm_1 = init_chat_model(model="openai:gpt-4o-mini")
+            llm_2 = init_chat_model(model="anthropic:claude-3-5-haiku-latest")
 
             callback = UsageMetadataCallbackHandler()
-            results = llm.batch(["Hello", "Goodbye"], config={"callbacks": [callback]})
-            print(callback.usage_metadata)
+            result_1 = llm_1.invoke("Hello", config={"callbacks": [callback]})
+            result_2 = llm_2.invoke("Hello", config={"callbacks": [callback]})
+            callback.usage_metadata
 
         .. code-block:: none
 
-            {'output_token_details': {'audio': 0, 'reasoning': 0}, 'input_tokens': 17, 'output_tokens': 31, 'total_tokens': 48, 'input_token_details': {'cache_read': 0, 'audio': 0}}
+            {'gpt-4o-mini-2024-07-18': {'input_tokens': 8,
+              'output_tokens': 10,
+              'total_tokens': 18,
+              'input_token_details': {'audio': 0, 'cache_read': 0},
+              'output_token_details': {'audio': 0, 'reasoning': 0}},
+             'claude-3-5-haiku-20241022': {'input_tokens': 8,
+              'output_tokens': 21,
+              'total_tokens': 29,
+              'input_token_details': {'cache_read': 0, 'cache_creation': 0}}}
 
     .. versionadded:: 0.3.49
-    """  # noqa: E501
+    """
 
     def __init__(self) -> None:
         super().__init__()
@@ -92,19 +102,28 @@ def get_usage_metadata_callback(
             from langchain.chat_models import init_chat_model
             from langchain_core.callbacks import get_usage_metadata_callback
 
-            llm = init_chat_model(model="openai:gpt-4o-mini")
+            llm_1 = init_chat_model(model="openai:gpt-4o-mini")
+            llm_2 = init_chat_model(model="anthropic:claude-3-5-haiku-latest")
 
             with get_usage_metadata_callback() as cb:
-                llm.invoke("Hello")
-                llm.invoke("Goodbye")
+                llm_1.invoke("Hello")
+                llm_2.invoke("Hello")
                 print(cb.usage_metadata)
 
         .. code-block:: none
 
-            {'output_token_details': {'audio': 0, 'reasoning': 0}, 'input_tokens': 17, 'output_tokens': 31, 'total_tokens': 48, 'input_token_details': {'cache_read': 0, 'audio': 0}}
+            {'gpt-4o-mini-2024-07-18': {'input_tokens': 8,
+              'output_tokens': 10,
+              'total_tokens': 18,
+              'input_token_details': {'audio': 0, 'cache_read': 0},
+              'output_token_details': {'audio': 0, 'reasoning': 0}},
+             'claude-3-5-haiku-20241022': {'input_tokens': 8,
+              'output_tokens': 21,
+              'total_tokens': 29,
+              'input_token_details': {'cache_read': 0, 'cache_creation': 0}}}
 
     .. versionadded:: 0.3.49
-    """  # noqa: E501
+    """
     from langchain_core.tracers.context import register_configure_hook
 
     usage_metadata_callback_var: ContextVar[Optional[UsageMetadataCallbackHandler]] = (
diff --git a/libs/core/pyproject.toml b/libs/core/pyproject.toml
index 8c486ce610a..5e0a3e9ef26 100644
--- a/libs/core/pyproject.toml
+++ b/libs/core/pyproject.toml
@@ -17,7 +17,7 @@ dependencies = [
     "pydantic<3.0.0,>=2.7.4; python_full_version >= \"3.12.4\"",
 ]
 name = "langchain-core"
-version = "0.3.48"
+version = "0.3.49"
 description = "Building applications with LLMs through composability"
 readme = "README.md"
 
diff --git a/libs/core/uv.lock b/libs/core/uv.lock
index 8674c8fec59..fdbfad5601f 100644
--- a/libs/core/uv.lock
+++ b/libs/core/uv.lock
@@ -1,5 +1,4 @@
 version = 1
-revision = 1
 requires-python = ">=3.9, <4.0"
 resolution-markers = [
     "python_full_version >= '3.12.4'",
@@ -936,7 +935,7 @@ wheels = [
 
 [[package]]
 name = "langchain-core"
-version = "0.3.48"
+version = "0.3.49"
 source = { editable = "." }
 dependencies = [
     { name = "jsonpatch" },
@@ -1027,7 +1026,7 @@ typing = [
 
 [[package]]
 name = "langchain-tests"
-version = "0.3.15"
+version = "0.3.16"
 source = { directory = "../standard-tests" }
 dependencies = [
     { name = "httpx" },

From 422ba4cde57e5f2026d5fa0b185b8d68a00d4bfe Mon Sep 17 00:00:00 2001
From: ccurme <chester.curme@gmail.com>
Date: Wed, 26 Mar 2025 13:28:56 -0400
Subject: [PATCH 05/30] infra: handle flaky tests (#30501)

---
 libs/partners/anthropic/pyproject.toml         |  1 +
 .../integration_tests/test_chat_models.py      |  1 +
 libs/partners/anthropic/uv.lock                | 18 ++++++++++++++++--
 libs/partners/openai/pyproject.toml            |  1 +
 .../chat_models/test_responses_api.py          |  6 ++++++
 libs/partners/openai/uv.lock                   | 16 +++++++++++++++-
 6 files changed, 40 insertions(+), 3 deletions(-)

diff --git a/libs/partners/anthropic/pyproject.toml b/libs/partners/anthropic/pyproject.toml
index e3e96892ff5..8be13a8e10b 100644
--- a/libs/partners/anthropic/pyproject.toml
+++ b/libs/partners/anthropic/pyproject.toml
@@ -30,6 +30,7 @@ test = [
     "pytest-watcher<1.0.0,>=0.3.4",
     "pytest-asyncio<1.0.0,>=0.21.1",
     "defusedxml<1.0.0,>=0.7.1",
+    "pytest-retry<1.8.0,>=1.7.0",
     "pytest-timeout<3.0.0,>=2.3.1",
     "pytest-socket<1.0.0,>=0.7.0",
     "langchain-core",
diff --git a/libs/partners/anthropic/tests/integration_tests/test_chat_models.py b/libs/partners/anthropic/tests/integration_tests/test_chat_models.py
index e792c1583fd..fc24c7d515c 100644
--- a/libs/partners/anthropic/tests/integration_tests/test_chat_models.py
+++ b/libs/partners/anthropic/tests/integration_tests/test_chat_models.py
@@ -730,6 +730,7 @@ def test_thinking() -> None:
             assert block["signature"] and isinstance(block["signature"], str)
 
 
+@pytest.mark.flaky(retries=3, delay=1)
 def test_redacted_thinking() -> None:
     llm = ChatAnthropic(
         model="claude-3-7-sonnet-latest",
diff --git a/libs/partners/anthropic/uv.lock b/libs/partners/anthropic/uv.lock
index 401ae87f754..d9d0c5a1e36 100644
--- a/libs/partners/anthropic/uv.lock
+++ b/libs/partners/anthropic/uv.lock
@@ -432,6 +432,7 @@ test = [
     { name = "pytest" },
     { name = "pytest-asyncio" },
     { name = "pytest-mock" },
+    { name = "pytest-retry" },
     { name = "pytest-socket" },
     { name = "pytest-timeout" },
     { name = "pytest-watcher" },
@@ -466,6 +467,7 @@ test = [
     { name = "pytest", specifier = ">=7.3.0,<8.0.0" },
     { name = "pytest-asyncio", specifier = ">=0.21.1,<1.0.0" },
     { name = "pytest-mock", specifier = ">=3.10.0,<4.0.0" },
+    { name = "pytest-retry", specifier = ">=1.7.0,<1.8.0" },
     { name = "pytest-socket", specifier = ">=0.7.0,<1.0.0" },
     { name = "pytest-timeout", specifier = ">=2.3.1,<3.0.0" },
     { name = "pytest-watcher", specifier = ">=0.3.4,<1.0.0" },
@@ -483,7 +485,7 @@ typing = [
 
 [[package]]
 name = "langchain-core"
-version = "0.3.45"
+version = "0.3.48"
 source = { editable = "../../core" }
 dependencies = [
     { name = "jsonpatch" },
@@ -541,7 +543,7 @@ typing = [
 
 [[package]]
 name = "langchain-tests"
-version = "0.3.14"
+version = "0.3.16"
 source = { editable = "../../standard-tests" }
 dependencies = [
     { name = "httpx" },
@@ -1009,6 +1011,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/f2/3b/b26f90f74e2986a82df6e7ac7e319b8ea7ccece1caec9f8ab6104dc70603/pytest_mock-3.14.0-py3-none-any.whl", hash = "sha256:0b72c38033392a5f4621342fe11e9219ac11ec9d375f8e2a0c164539e0d70f6f", size = 9863 },
 ]
 
+[[package]]
+name = "pytest-retry"
+version = "1.7.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pytest" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c5/5b/607b017994cca28de3a1ad22a3eee8418e5d428dcd8ec25b26b18e995a73/pytest_retry-1.7.0.tar.gz", hash = "sha256:f8d52339f01e949df47c11ba9ee8d5b362f5824dff580d3870ec9ae0057df80f", size = 19977 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7c/ff/3266c8a73b9b93c4b14160a7e2b31d1e1088e28ed29f4c2d93ae34093bfd/pytest_retry-1.7.0-py3-none-any.whl", hash = "sha256:a2dac85b79a4e2375943f1429479c65beb6c69553e7dae6b8332be47a60954f4", size = 13775 },
+]
+
 [[package]]
 name = "pytest-socket"
 version = "0.7.0"
diff --git a/libs/partners/openai/pyproject.toml b/libs/partners/openai/pyproject.toml
index ce23eca19f0..42049ff477b 100644
--- a/libs/partners/openai/pyproject.toml
+++ b/libs/partners/openai/pyproject.toml
@@ -30,6 +30,7 @@ test = [
     "pytest-watcher<1.0.0,>=0.3.4",
     "pytest-asyncio<1.0.0,>=0.21.1",
     "pytest-cov<5.0.0,>=4.1.0",
+    "pytest-retry<1.8.0,>=1.7.0",
     "pytest-socket<1.0.0,>=0.6.0",
     "pytest-xdist<4.0.0,>=3.6.1",
     "numpy<2,>=1; python_version < \"3.12\"",
diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py
index 8824560eae2..9eb6be78613 100644
--- a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py
+++ b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py
@@ -53,6 +53,7 @@ def _check_response(response: Optional[BaseMessage]) -> None:
         assert tool_output["type"]
 
 
+@pytest.mark.flaky(retries=3, delay=1)
 def test_web_search() -> None:
     llm = ChatOpenAI(model=MODEL_NAME)
     first_response = llm.invoke(
@@ -108,6 +109,7 @@ def test_web_search() -> None:
     _check_response(response)
 
 
+@pytest.mark.flaky(retries=3, delay=1)
 async def test_web_search_async() -> None:
     llm = ChatOpenAI(model=MODEL_NAME)
     response = await llm.ainvoke(
@@ -129,6 +131,7 @@ async def test_web_search_async() -> None:
     _check_response(full)
 
 
+@pytest.mark.flaky(retries=3, delay=1)
 def test_function_calling() -> None:
     def multiply(x: int, y: int) -> int:
         """return x * y"""
@@ -197,6 +200,7 @@ async def test_parsed_pydantic_schema_async() -> None:
     assert parsed.response
 
 
+@pytest.mark.flaky(retries=3, delay=1)
 @pytest.mark.parametrize("schema", [Foo.model_json_schema(), FooDict])
 def test_parsed_dict_schema(schema: Any) -> None:
     llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)
@@ -241,6 +245,7 @@ def test_parsed_strict() -> None:
         )
 
 
+@pytest.mark.flaky(retries=3, delay=1)
 @pytest.mark.parametrize("schema", [Foo.model_json_schema(), FooDict])
 async def test_parsed_dict_schema_async(schema: Any) -> None:
     llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)
@@ -313,6 +318,7 @@ def test_route_from_model_kwargs() -> None:
     _ = next(llm.stream("Hello"))
 
 
+@pytest.mark.flaky(retries=3, delay=1)
 def test_computer_calls() -> None:
     llm = ChatOpenAI(model="computer-use-preview", model_kwargs={"truncation": "auto"})
     tool = {
diff --git a/libs/partners/openai/uv.lock b/libs/partners/openai/uv.lock
index df15053ab76..ccfa59aea4d 100644
--- a/libs/partners/openai/uv.lock
+++ b/libs/partners/openai/uv.lock
@@ -547,6 +547,7 @@ test = [
     { name = "pytest-asyncio" },
     { name = "pytest-cov" },
     { name = "pytest-mock" },
+    { name = "pytest-retry" },
     { name = "pytest-socket" },
     { name = "pytest-watcher" },
     { name = "pytest-xdist" },
@@ -584,6 +585,7 @@ test = [
     { name = "pytest-asyncio", specifier = ">=0.21.1,<1.0.0" },
     { name = "pytest-cov", specifier = ">=4.1.0,<5.0.0" },
     { name = "pytest-mock", specifier = ">=3.10.0,<4.0.0" },
+    { name = "pytest-retry", specifier = ">=1.7.0,<1.8.0" },
     { name = "pytest-socket", specifier = ">=0.6.0,<1.0.0" },
     { name = "pytest-watcher", specifier = ">=0.3.4,<1.0.0" },
     { name = "pytest-xdist", specifier = ">=3.6.1,<4.0.0" },
@@ -603,7 +605,7 @@ typing = [
 
 [[package]]
 name = "langchain-tests"
-version = "0.3.15"
+version = "0.3.16"
 source = { editable = "../../standard-tests" }
 dependencies = [
     { name = "httpx" },
@@ -1110,6 +1112,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/f2/3b/b26f90f74e2986a82df6e7ac7e319b8ea7ccece1caec9f8ab6104dc70603/pytest_mock-3.14.0-py3-none-any.whl", hash = "sha256:0b72c38033392a5f4621342fe11e9219ac11ec9d375f8e2a0c164539e0d70f6f", size = 9863 },
 ]
 
+[[package]]
+name = "pytest-retry"
+version = "1.7.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pytest" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c5/5b/607b017994cca28de3a1ad22a3eee8418e5d428dcd8ec25b26b18e995a73/pytest_retry-1.7.0.tar.gz", hash = "sha256:f8d52339f01e949df47c11ba9ee8d5b362f5824dff580d3870ec9ae0057df80f", size = 19977 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7c/ff/3266c8a73b9b93c4b14160a7e2b31d1e1088e28ed29f4c2d93ae34093bfd/pytest_retry-1.7.0-py3-none-any.whl", hash = "sha256:a2dac85b79a4e2375943f1429479c65beb6c69553e7dae6b8332be47a60954f4", size = 13775 },
+]
+
 [[package]]
 name = "pytest-socket"
 version = "0.7.0"

From 8e5d2a44ce42b8ec1185eb574258db65d14a075d Mon Sep 17 00:00:00 2001
From: Philippe PRADOS <github@prados.fr>
Date: Wed, 26 Mar 2025 19:16:54 +0100
Subject: [PATCH 06/30] community[patch]: update PyPDFParser to take into
 account filters returned as arrays (#30489)

The image parsing is generating a bug as the the extracted objects for
the /Filter returns sometimes an array, sometimes a string.

Fix [Issue
30098](https://github.com/langchain-ai/langchain/issues/30098)
---
 .../document_loaders/parsers/pdf.py                    | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/libs/community/langchain_community/document_loaders/parsers/pdf.py b/libs/community/langchain_community/document_loaders/parsers/pdf.py
index 2b53db28736..df4314ee661 100644
--- a/libs/community/langchain_community/document_loaders/parsers/pdf.py
+++ b/libs/community/langchain_community/document_loaders/parsers/pdf.py
@@ -428,6 +428,7 @@ class PyPDFParser(BaseBlobParser):
         """
         if not self.images_parser:
             return ""
+        import pypdf
         from PIL import Image
 
         if "/XObject" not in cast(dict, page["/Resources"]).keys():
@@ -438,13 +439,18 @@ class PyPDFParser(BaseBlobParser):
         for obj in xObject:
             np_image: Any = None
             if xObject[obj]["/Subtype"] == "/Image":
-                if xObject[obj]["/Filter"][1:] in _PDF_FILTER_WITHOUT_LOSS:
+                img_filter = (
+                    xObject[obj]["/Filter"][1:]
+                    if type(xObject[obj]["/Filter"]) is pypdf.generic._base.NameObject
+                    else xObject[obj]["/Filter"][0][1:]
+                )
+                if img_filter in _PDF_FILTER_WITHOUT_LOSS:
                     height, width = xObject[obj]["/Height"], xObject[obj]["/Width"]
 
                     np_image = np.frombuffer(
                         xObject[obj].get_data(), dtype=np.uint8
                     ).reshape(height, width, -1)
-                elif xObject[obj]["/Filter"][1:] in _PDF_FILTER_WITH_LOSS:
+                elif img_filter in _PDF_FILTER_WITH_LOSS:
                     np_image = np.array(Image.open(io.BytesIO(xObject[obj].get_data())))
 
                 else:

From fbd2e107034eee3de72b7b5d8779713631626a15 Mon Sep 17 00:00:00 2001
From: Really Him <hesreallyhim@proton.me>
Date: Wed, 26 Mar 2025 14:22:33 -0400
Subject: [PATCH 07/30] docs: hide jsx in llm chain tutorial (#30187)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## **Description:**
The Jupyter notebooks in the docs section are extremely useful and
critical for widespread adoption of LangChain amongst new developers.
However, because they are also converted to MDX and used to build the
HTML for the Docusaurus site, they contain JSX code that degrades
readability when opened in a "notebook" setting (local notebook server,
google colab, etc.). For instance, here we see the website, with a nice
React tab component for installation instructions (`pip` vs `conda`):

![Screenshot 2025-03-07 at 2 07
15 PM](https://github.com/user-attachments/assets/a528d618-f5a0-4d2e-9aed-16d4b8148b5a)

Now, here is the same notebook viewed in colab:

![Screenshot 2025-03-07 at 2 08
41 PM](https://github.com/user-attachments/assets/87acf5b7-a3e0-46ac-8126-6cac6eb93586)

Note that the text following "To install LangChain run:" contains
snippets of JSX code that is (i) confusing, (ii) bad for readability,
(iii) potentially misleading for a novice developer, who might take it
literally to mean that "to install LangChain I should run `import Tabs
from...`" and then an ill-formed command which mixes the `pip` and
`conda` installation instructions.

Ideally, we would like to have a system that presents a
similar/equivalent UI when viewing the notebooks on the documentation
site, or when interacting with them in a notebook setting - or, at a
minimum, we should not present ill-formed JSX snippets to someone trying
to execute the notebooks. As the documentation itself states, running
the notebooks yourself is a great way to learn the tools. Therefore,
these distracting and ill-formed snippets are contrary to that goal.

## **Fixes:**
* Comment out the JSX code inside the notebook
`docs/tutorials/llm_chain` with a special directive `<!-- HIDE_IN_NB`
(closed with `HIDE_IN_NB -->`). This makes the JSX code "invisible" when
viewed in a notebook setting.
* Add a custom preprocessor that runs process_cell and just erases these
comment strings. This makes sure they are rendered when converted to
MDX.
* Minor tweak: Refactor some of the Markdown instructions into an
executable codeblock for better experience when running as a notebook.
* Minor tweak: Optionally try to get the environment variables from a
`.env` file in the repo so the user doesn't have to enter it every time.
Depends on the user installing `python-dotenv` and adding their own
`.env` file.
* Add an environment variable for "LANGSMITH_PROJECT"
(default="default"), per the LangSmith docs, so a local user can target
a specific project in their LangSmith account.

**NOTE:** If this PR is approved, and the maintainers agree with the
general goal of aligning the notebook execution experience and the doc
site UI, I would plan to implement this on the rest of the JSX snippets
that are littered in the notebooks.

**NOTE:** I wasn't able to/don't know how to run the linkcheck Makefile
commands.

- [X] **Lint and test**: Run `make format`, `make lint` and `make test`
from the root of the package(s) you've modified. See contribution
guidelines for more: https://python.langchain.com/docs/contributing/

---------

Co-authored-by: Really Him <hesereallyhim@proton.me>
---
 docs/docs/tutorials/llm_chain.ipynb | 66 +++++++++++++++++++++++++----
 docs/scripts/notebook_convert.py    | 15 ++++++-
 2 files changed, 71 insertions(+), 10 deletions(-)

diff --git a/docs/docs/tutorials/llm_chain.ipynb b/docs/docs/tutorials/llm_chain.ipynb
index 0b9547568b2..690b51aadda 100644
--- a/docs/docs/tutorials/llm_chain.ipynb
+++ b/docs/docs/tutorials/llm_chain.ipynb
@@ -39,6 +39,7 @@
     "\n",
     "To install LangChain run:\n",
     "\n",
+    "<!-- HIDE_IN_NB\n",
     "import Tabs from '@theme/Tabs';\n",
     "import TabItem from '@theme/TabItem';\n",
     "import CodeBlock from \"@theme/CodeBlock\";\n",
@@ -51,9 +52,28 @@
     "    <CodeBlock language=\"bash\">conda install langchain -c conda-forge</CodeBlock>\n",
     "  </TabItem>\n",
     "</Tabs>\n",
+    "HIDE_IN_NB -->"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "86874822",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# | output: false\n",
     "\n",
-    "\n",
-    "\n",
+    "# %pip install langchain\n",
+    "# OR\n",
+    "# %conda install langchain -c conda-forge"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a546a5bc",
+   "metadata": {},
+   "source": [
     "For more details, see our [Installation guide](/docs/how_to/installation).\n",
     "\n",
     "### LangSmith\n",
@@ -67,17 +87,45 @@
     "```shell\n",
     "export LANGSMITH_TRACING=\"true\"\n",
     "export LANGSMITH_API_KEY=\"...\"\n",
+    "export LANGSMITH_PROJECT=\"default\" # or any other project name\n",
     "```\n",
     "\n",
-    "Or, if in a notebook, you can set them with:\n",
-    "\n",
-    "```python\n",
+    "Or, if in a notebook, you can set them with:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "599bb688",
+   "metadata": {},
+   "outputs": [],
+   "source": [
     "import getpass\n",
     "import os\n",
     "\n",
+    "try:\n",
+    "    # load environment variables from .env file (requires `python-dotenv`)\n",
+    "    from dotenv import load_dotenv\n",
+    "\n",
+    "    load_dotenv()\n",
+    "except ImportError:\n",
+    "    pass\n",
+    "\n",
     "os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
-    "os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass()\n",
-    "```"
+    "if \"LANGSMITH_API_KEY\" not in os.environ:\n",
+    "    os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\n",
+    "        prompt=\"Enter your LangSmith API key (optional): \"\n",
+    "    )\n",
+    "if \"LANGSMITH_PROJECT\" not in os.environ:\n",
+    "    os.environ[\"LANGSMITH_PROJECT\"] = getpass.getpass(\n",
+    "        prompt='Enter your LangSmith Project Name (default = \"default\"): '\n",
+    "    )\n",
+    "    if not os.environ.get(\"LANGSMITH_PROJECT\"):\n",
+    "        os.environ[\"LANGSMITH_PROJECT\"] = \"default\"\n",
+    "if \"OPENAI_API_KEY\" not in os.environ:\n",
+    "    os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\n",
+    "        prompt=\"Enter your OpenAI API key (required if using OpenAI): \"\n",
+    "    )"
    ]
   },
   {
@@ -89,9 +137,11 @@
     "\n",
     "First up, let's learn how to use a language model by itself. LangChain supports many different language models that you can use interchangeably. For details on getting started with a specific model, refer to [supported integrations](/docs/integrations/chat/).\n",
     "\n",
+    "<!-- HIDE_IN_NB>\n",
     "import ChatModelTabs from \"@theme/ChatModelTabs\";\n",
     "\n",
-    "<ChatModelTabs overrideParams={{openai: {model: \"gpt-4o-mini\"}}} />\n"
+    "<ChatModelTabs overrideParams={{openai: {model: \"gpt-4o-mini\"}}} />\n",
+    "HIDE_IN_NB -->"
    ]
   },
   {
diff --git a/docs/scripts/notebook_convert.py b/docs/scripts/notebook_convert.py
index fb0e3c80756..33da9a5d30b 100644
--- a/docs/scripts/notebook_convert.py
+++ b/docs/scripts/notebook_convert.py
@@ -9,9 +9,12 @@ import nbformat
 from nbconvert.exporters import MarkdownExporter
 from nbconvert.preprocessors import Preprocessor
 
+HIDE_IN_NB_MAGIC_OPEN = "<!-- HIDE_IN_NB"
+HIDE_IN_NB_MAGIC_CLOSE = "HIDE_IN_NB -->"
+
 
 class EscapePreprocessor(Preprocessor):
-    def preprocess_cell(self, cell, resources, cell_index):
+    def preprocess_cell(self, cell, resources, index):
         if cell.cell_type == "markdown":
             # rewrite .ipynb links to .md
             cell.source = re.sub(
@@ -61,7 +64,7 @@ class ExtractAttachmentsPreprocessor(Preprocessor):
     outputs are returned in the 'resources' dictionary.
     """
 
-    def preprocess_cell(self, cell, resources, cell_index):
+    def preprocess_cell(self, cell, resources, index):
         """
         Apply a transformation on each cell,
         Parameters
@@ -117,11 +120,19 @@ class CustomRegexRemovePreprocessor(Preprocessor):
         return nb, resources
 
 
+class UnHidePreprocessor(Preprocessor):
+    def preprocess_cell(self, cell, resources, index):
+        cell.source = cell.source.replace(HIDE_IN_NB_MAGIC_OPEN, "")
+        cell.source = cell.source.replace(HIDE_IN_NB_MAGIC_CLOSE, "")
+        return cell, resources
+
+
 exporter = MarkdownExporter(
     preprocessors=[
         EscapePreprocessor,
         ExtractAttachmentsPreprocessor,
         CustomRegexRemovePreprocessor,
+        UnHidePreprocessor,
     ],
     template_name="mdoutput",
     extra_template_basedirs=["./scripts/notebook_convert_templates"],

From 0b532a4ed01320b236a90a628a8e54d4162b88e1 Mon Sep 17 00:00:00 2001
From: Louis Auneau <louis.auneau.c@gmail.com>
Date: Wed, 26 Mar 2025 14:40:14 -0400
Subject: [PATCH 08/30] community: Azure Document Intelligence parser features
 not available fixed (#30370)

Thank you for contributing to LangChain!

- **Description:** Azure Document Intelligence OCR solution has a
*feature* parameter that enables some features such as high-resolution
document analysis, key-value pairs extraction, ... In langchain parser,
you could be provided as a `analysis_feature` parameter to the
constructor that was passed on the `DocumentIntelligenceClient`.
However, according to the `DocumentIntelligenceClient` [API
Reference](https://learn.microsoft.com/en-us/python/api/azure-ai-documentintelligence/azure.ai.documentintelligence.documentintelligenceclient?view=azure-python),
this is not a valid constructor parameter. It was therefore remove and
instead stored as a parser property that is used in the
`begin_analyze_document`'s `features` parameter (see [API
Reference](https://learn.microsoft.com/en-us/python/api/azure-ai-formrecognizer/azure.ai.formrecognizer.documentanalysisclient?view=azure-python#azure-ai-formrecognizer-documentanalysisclient-begin-analyze-document)).
I also removed the check for "Supported features" since all features are
supported out-of-the-box. Also I did not check if the provided `str`
actually corresponds to the Azure package enumeration of features, since
the `ValueError` when creating the enumeration object is pretty
explicit.
Last caveat, is that some features are not supported for some kind of
documents. This is documented inside Microsoft documentation and
exception are also explicit.
- **Issue:** N/A
- **Dependencies:** No
- **Twitter handle:** @Louis___A

---------

Co-authored-by: Louis Auneau <louis@handshakehealth.co>
---
 .../parsers/doc_intelligence.py               | 26 ++++++-------------
 .../parsers/test_doc_intelligence.py          |  2 --
 2 files changed, 8 insertions(+), 20 deletions(-)

diff --git a/libs/community/langchain_community/document_loaders/parsers/doc_intelligence.py b/libs/community/langchain_community/document_loaders/parsers/doc_intelligence.py
index 3bcbec6d9a4..f2c9d6b4a14 100644
--- a/libs/community/langchain_community/document_loaders/parsers/doc_intelligence.py
+++ b/libs/community/langchain_community/document_loaders/parsers/doc_intelligence.py
@@ -45,32 +45,19 @@ class AzureAIDocumentIntelligenceParser(BaseBlobParser):
         if api_version is not None:
             kwargs["api_version"] = api_version
 
-        if analysis_features is not None:
-            _SUPPORTED_FEATURES = [
-                DocumentAnalysisFeature.OCR_HIGH_RESOLUTION,
-            ]
-
-            analysis_features = [
-                DocumentAnalysisFeature(feature) for feature in analysis_features
-            ]
-            if any(
-                [feature not in _SUPPORTED_FEATURES for feature in analysis_features]
-            ):
-                logger.warning(
-                    f"The current supported features are: "
-                    f"{[f.value for f in _SUPPORTED_FEATURES]}. "
-                    "Using other features may result in unexpected behavior."
-                )
-
         self.client = DocumentIntelligenceClient(
             endpoint=api_endpoint,
             credential=azure_credential or AzureKeyCredential(api_key),
             headers={"x-ms-useragent": "langchain-parser/1.0.0"},
-            features=analysis_features,
             **kwargs,
         )
         self.api_model = api_model
         self.mode = mode
+        self.features: Optional[List[DocumentAnalysisFeature]] = None
+        if analysis_features is not None:
+            self.features = [
+                DocumentAnalysisFeature(feature) for feature in analysis_features
+            ]
         assert self.mode in ["single", "page", "markdown"]
 
     def _generate_docs_page(self, result: Any) -> Iterator[Document]:
@@ -97,6 +84,7 @@ class AzureAIDocumentIntelligenceParser(BaseBlobParser):
                 body=file_obj,
                 content_type="application/octet-stream",
                 output_content_format="markdown" if self.mode == "markdown" else "text",
+                features=self.features,
             )
             result = poller.result()
 
@@ -114,6 +102,7 @@ class AzureAIDocumentIntelligenceParser(BaseBlobParser):
             self.api_model,
             body=AnalyzeDocumentRequest(url_source=url),
             output_content_format="markdown" if self.mode == "markdown" else "text",
+            features=self.features,
         )
         result = poller.result()
 
@@ -131,6 +120,7 @@ class AzureAIDocumentIntelligenceParser(BaseBlobParser):
             self.api_model,
             body=AnalyzeDocumentRequest(bytes_source=bytes_source),
             output_content_format="markdown" if self.mode == "markdown" else "text",
+            features=self.features,
         )
         result = poller.result()
 
diff --git a/libs/community/tests/unit_tests/document_loaders/parsers/test_doc_intelligence.py b/libs/community/tests/unit_tests/document_loaders/parsers/test_doc_intelligence.py
index 820d1e56ba1..b6affb2e13f 100644
--- a/libs/community/tests/unit_tests/document_loaders/parsers/test_doc_intelligence.py
+++ b/libs/community/tests/unit_tests/document_loaders/parsers/test_doc_intelligence.py
@@ -24,7 +24,6 @@ def test_doc_intelligence(mock_credential: MagicMock, mock_client: MagicMock) ->
         headers={
             "x-ms-useragent": "langchain-parser/1.0.0",
         },
-        features=None,
     )
     assert parser.client == mock_client()
     assert parser.api_model == "prebuilt-layout"
@@ -51,7 +50,6 @@ def test_doc_intelligence_with_analysis_features(
         headers={
             "x-ms-useragent": "langchain-parser/1.0.0",
         },
-        features=analysis_features,
     )
     assert parser.client == mock_client()
     assert parser.api_model == "prebuilt-layout"

From f68eaab44f6a7342b426a537c674e8f598c88a2f Mon Sep 17 00:00:00 2001
From: ccurme <chester.curme@gmail.com>
Date: Wed, 26 Mar 2025 14:56:54 -0400
Subject: [PATCH 09/30] tests: release 0.3.17 (#30502)

---
 libs/standard-tests/pyproject.toml | 4 ++--
 libs/standard-tests/uv.lock        | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/libs/standard-tests/pyproject.toml b/libs/standard-tests/pyproject.toml
index 1f9268c7715..7205082c19f 100644
--- a/libs/standard-tests/pyproject.toml
+++ b/libs/standard-tests/pyproject.toml
@@ -7,7 +7,7 @@ authors = [{ name = "Erick Friis", email = "erick@langchain.dev" }]
 license = { text = "MIT" }
 requires-python = "<4.0,>=3.9"
 dependencies = [
-    "langchain-core<1.0.0,>=0.3.48",
+    "langchain-core<1.0.0,>=0.3.49",
     "pytest<9,>=7",
     "pytest-asyncio<1,>=0.20",
     "httpx<1,>=0.25.0",
@@ -16,7 +16,7 @@ dependencies = [
     "numpy<3,>=1.26.2",
 ]
 name = "langchain-tests"
-version = "0.3.16"
+version = "0.3.17"
 description = "Standard tests for LangChain implementations"
 readme = "README.md"
 
diff --git a/libs/standard-tests/uv.lock b/libs/standard-tests/uv.lock
index 5e32596473c..64d2b3a2292 100644
--- a/libs/standard-tests/uv.lock
+++ b/libs/standard-tests/uv.lock
@@ -288,7 +288,7 @@ wheels = [
 
 [[package]]
 name = "langchain-core"
-version = "0.3.48"
+version = "0.3.49"
 source = { editable = "../core" }
 dependencies = [
     { name = "jsonpatch" },
@@ -346,7 +346,7 @@ typing = [
 
 [[package]]
 name = "langchain-tests"
-version = "0.3.16"
+version = "0.3.17"
 source = { editable = "." }
 dependencies = [
     { name = "httpx" },

From 56629ed87b40026dc7668835c81dac0e6e1884c4 Mon Sep 17 00:00:00 2001
From: Adeel Ehsan <aadeel.ehsan@gmail.com>
Date: Thu, 27 Mar 2025 00:02:21 +0500
Subject: [PATCH 10/30] docs: updated the docs for vectara (#30398)

Thank you for contributing to LangChain!

**PR title**: Docs Update for vectara
**Description:** Vectara is moved as langchain partner package and
updating the docs according to that.
---
 .../vectara_chat.ipynb => chat/vectara.ipynb} | 193 ++++++++--
 .../docs/integrations/providers/vectara.ipynb | 348 +++++++++++++++++
 .../integrations/providers/vectara/index.mdx  | 181 ---------
 .../self_query/vectara_self_query.ipynb       | 358 ++++++++----------
 .../integrations/vectorstores/vectara.ipynb   | 187 +++++----
 5 files changed, 769 insertions(+), 498 deletions(-)
 rename docs/docs/integrations/{providers/vectara/vectara_chat.ipynb => chat/vectara.ipynb} (54%)
 create mode 100644 docs/docs/integrations/providers/vectara.ipynb
 delete mode 100644 docs/docs/integrations/providers/vectara/index.mdx

diff --git a/docs/docs/integrations/providers/vectara/vectara_chat.ipynb b/docs/docs/integrations/chat/vectara.ipynb
similarity index 54%
rename from docs/docs/integrations/providers/vectara/vectara_chat.ipynb
rename to docs/docs/integrations/chat/vectara.ipynb
index 652f5891908..920d1318775 100644
--- a/docs/docs/integrations/providers/vectara/vectara_chat.ipynb
+++ b/docs/docs/integrations/chat/vectara.ipynb
@@ -5,21 +5,38 @@
    "id": "134a0785",
    "metadata": {},
    "source": [
-    "# Vectara Chat\n",
+    "## Overview\n",
     "\n",
     "[Vectara](https://vectara.com/) is the trusted AI Assistant and Agent platform which focuses on enterprise readiness for mission-critical applications.\n",
-    "\n",
     "Vectara serverless RAG-as-a-service provides all the components of RAG behind an easy-to-use API, including:\n",
     "1. A way to extract text from files (PDF, PPT, DOCX, etc)\n",
     "2. ML-based chunking that provides state of the art performance.\n",
     "3. The [Boomerang](https://vectara.com/how-boomerang-takes-retrieval-augmented-generation-to-the-next-level-via-grounded-generation/) embeddings model.\n",
     "4. Its own internal vector database where text chunks and embedding vectors are stored.\n",
-    "5. A query service that automatically encodes the query into embedding, and retrieves the most relevant text segments (including support for [Hybrid Search](https://docs.vectara.com/docs/api-reference/search-apis/lexical-matching) as well as multiple reranking options such as the [multi-lingual relevance reranker](https://www.vectara.com/blog/deep-dive-into-vectara-multilingual-reranker-v1-state-of-the-art-reranker-across-100-languages), [MMR](https://vectara.com/get-diverse-results-and-comprehensive-summaries-with-vectaras-mmr-reranker/), [UDF reranker](https://www.vectara.com/blog/rag-with-user-defined-functions-based-reranking). \n",
+    "5. A query service that automatically encodes the query into embedding, and retrieves the most relevant text segments, including support for [Hybrid Search](https://docs.vectara.com/docs/api-reference/search-apis/lexical-matching) as well as multiple reranking options such as the [multi-lingual relevance reranker](https://www.vectara.com/blog/deep-dive-into-vectara-multilingual-reranker-v1-state-of-the-art-reranker-across-100-languages), [MMR](https://vectara.com/get-diverse-results-and-comprehensive-summaries-with-vectaras-mmr-reranker/), [UDF reranker](https://www.vectara.com/blog/rag-with-user-defined-functions-based-reranking). \n",
     "6. An LLM to for creating a [generative summary](https://docs.vectara.com/docs/learn/grounded-generation/grounded-generation-overview), based on the retrieved documents (context), including citations.\n",
     "\n",
-    "See the [Vectara API documentation](https://docs.vectara.com/docs/) for more information on how to use the API.\n",
+    "For more information:\n",
+    "- [Documentation](https://docs.vectara.com/docs/)\n",
+    "- [API Playground](https://docs.vectara.com/docs/rest-api/)\n",
+    "- [Quickstart](https://docs.vectara.com/docs/quickstart)\n",
     "\n",
-    "This notebook shows how to use Vectara's [Chat](https://docs.vectara.com/docs/api-reference/chat-apis/chat-apis-overview) functionality, which provides automatic storage of conversation history and ensures follow up questions consider that history."
+    "\n",
+    "This notebook shows how to use Vectara's [Chat](https://docs.vectara.com/docs/api-reference/chat-apis/chat-apis-overview) functionality, which provides automatic storage of conversation history and ensures follow up questions consider that history.\n",
+    "\n",
+    "### Setup\n",
+    "\n",
+    "To use the `VectaraVectorStore` you first need to install the partner package.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b4a2f525-4805-4880-8bfa-18fe6f1cd1c7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!uv pip install -U pip && uv pip install -qU langchain-vectara"
    ]
   },
   {
@@ -27,17 +44,19 @@
    "id": "56372c5b",
    "metadata": {},
    "source": [
-    "# Getting Started\n",
+    "## Getting Started\n",
     "\n",
     "To get started, use the following steps:\n",
-    "1. If you don't already have one, [Sign up](https://www.vectara.com/integrations/langchain) for your free Vectara trial. Once you have completed your sign up you will have a Vectara customer ID. You can find your customer ID by clicking on your name, on the top-right of the Vectara console window.\n",
+    "1. If you don't already have one, [Sign up](https://www.vectara.com/integrations/langchain) for your free Vectara trial.\n",
     "2. Within your account you can create one or more corpora. Each corpus represents an area that stores text data upon ingest from input documents. To create a corpus, use the **\"Create Corpus\"** button. You then provide a name to your corpus as well as a description. Optionally you can define filtering attributes and apply some advanced options. If you click on your created corpus, you can see its name and corpus ID right on the top.\n",
     "3. Next you'll need to create API keys to access the corpus. Click on the **\"Access Control\"** tab in the corpus view and then the **\"Create API Key\"** button. Give your key a name, and choose whether you want query-only or query+index for your key. Click \"Create\" and you now have an active API key. Keep this key confidential. \n",
     "\n",
-    "To use LangChain with Vectara, you'll need to have these three values: `customer ID`, `corpus ID` and `api_key`.\n",
-    "You can provide those to LangChain in two ways:\n",
+    "To use LangChain with Vectara, you'll need to have these two values: `corpus_key` and `api_key`.\n",
+    "You can provide `VECTARA_API_KEY` to LangChain in two ways:\n",
     "\n",
-    "1. Include in your environment these three variables: `VECTARA_CUSTOMER_ID`, `VECTARA_CORPUS_ID` and `VECTARA_API_KEY`.\n",
+    "## Instantiation\n",
+    "\n",
+    "1. Include in your environment these two variables: `VECTARA_API_KEY`.\n",
     "\n",
     "   For example, you can set these variables using os.environ and getpass as follows:\n",
     "\n",
@@ -45,8 +64,6 @@
     "import os\n",
     "import getpass\n",
     "\n",
-    "os.environ[\"VECTARA_CUSTOMER_ID\"] = getpass.getpass(\"Vectara Customer ID:\")\n",
-    "os.environ[\"VECTARA_CORPUS_ID\"] = getpass.getpass(\"Vectara Corpus ID:\")\n",
     "os.environ[\"VECTARA_API_KEY\"] = getpass.getpass(\"Vectara API Key:\")\n",
     "```\n",
     "\n",
@@ -54,17 +71,16 @@
     "\n",
     "```python\n",
     "vectara = Vectara(\n",
-    "                vectara_customer_id=vectara_customer_id,\n",
-    "                vectara_corpus_id=vectara_corpus_id,\n",
-    "                vectara_api_key=vectara_api_key\n",
-    "            )\n",
+    "    vectara_api_key=vectara_api_key\n",
+    ")\n",
     "```\n",
+    "\n",
     "In this notebook we assume they are provided in the environment."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 2,
    "id": "70c4e529",
    "metadata": {
     "tags": []
@@ -73,14 +89,15 @@
    "source": [
     "import os\n",
     "\n",
-    "os.environ[\"VECTARA_API_KEY\"] = \"<YOUR_VECTARA_API_KEY>\"\n",
-    "os.environ[\"VECTARA_CORPUS_ID\"] = \"<YOUR_VECTARA_CORPUS_ID>\"\n",
-    "os.environ[\"VECTARA_CUSTOMER_ID\"] = \"<YOUR_VECTARA_CUSTOMER_ID>\"\n",
+    "os.environ[\"VECTARA_API_KEY\"] = \"<VECTARA_API_KEY>\"\n",
+    "os.environ[\"VECTARA_CORPUS_KEY\"] = \"<VECTARA_CORPUS_KEY>\"\n",
     "\n",
-    "from langchain_community.vectorstores import Vectara\n",
-    "from langchain_community.vectorstores.vectara import (\n",
-    "    RerankConfig,\n",
-    "    SummaryConfig,\n",
+    "from langchain_vectara import Vectara\n",
+    "from langchain_vectara.vectorstores import (\n",
+    "    CorpusConfig,\n",
+    "    GenerationConfig,\n",
+    "    MmrReranker,\n",
+    "    SearchConfig,\n",
     "    VectaraQueryConfig,\n",
     ")"
    ]
@@ -101,7 +118,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
    "id": "01c46e92",
    "metadata": {
     "tags": []
@@ -110,10 +127,11 @@
    "source": [
     "from langchain_community.document_loaders import TextLoader\n",
     "\n",
-    "loader = TextLoader(\"state_of_the_union.txt\")\n",
+    "loader = TextLoader(\"../document_loaders/example_data/state_of_the_union.txt\")\n",
     "documents = loader.load()\n",
     "\n",
-    "vectara = Vectara.from_documents(documents, embedding=None)"
+    "corpus_key = os.getenv(\"VECTARA_CORPUS_KEY\")\n",
+    "vectara = Vectara.from_documents(documents, embedding=None, corpus_key=corpus_key)"
    ]
   },
   {
@@ -126,18 +144,29 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
    "id": "1b41a10b-bf68-4689-8f00-9aed7675e2ab",
    "metadata": {
     "tags": []
    },
    "outputs": [],
    "source": [
-    "summary_config = SummaryConfig(is_enabled=True, max_results=7, response_lang=\"eng\")\n",
-    "rerank_config = RerankConfig(reranker=\"mmr\", rerank_k=50, mmr_diversity_bias=0.2)\n",
-    "config = VectaraQueryConfig(\n",
-    "    k=10, lambda_val=0.005, rerank_config=rerank_config, summary_config=summary_config\n",
+    "generation_config = GenerationConfig(\n",
+    "    max_used_search_results=7,\n",
+    "    response_language=\"eng\",\n",
+    "    generation_preset_name=\"vectara-summary-ext-24-05-med-omni\",\n",
+    "    enable_factual_consistency_score=True,\n",
     ")\n",
+    "search_config = SearchConfig(\n",
+    "    corpora=[CorpusConfig(corpus_key=corpus_key, limit=25)],\n",
+    "    reranker=MmrReranker(diversity_bias=0.2),\n",
+    ")\n",
+    "\n",
+    "config = VectaraQueryConfig(\n",
+    "    search=search_config,\n",
+    "    generation=generation_config,\n",
+    ")\n",
+    "\n",
     "\n",
     "bot = vectara.as_chat(config)"
    ]
@@ -147,12 +176,15 @@
    "id": "83f38c18-ac82-45f4-a79e-8b37ce1ae115",
    "metadata": {},
    "source": [
+    "\n",
+    "## Invocation\n",
+    "\n",
     "Here's an example of asking a question with no chat history"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
    "id": "bc672290-8a8b-4828-a90c-f1bbdd6b3920",
    "metadata": {
     "tags": []
@@ -161,10 +193,10 @@
     {
      "data": {
       "text/plain": [
-       "'The President expressed gratitude to Justice Breyer and highlighted the significance of nominating Ketanji Brown Jackson to the Supreme Court, praising her legal expertise and commitment to upholding excellence [1]. The President also reassured the public about the situation with gas prices and the conflict in Ukraine, emphasizing unity with allies and the belief that the world will emerge stronger from these challenges [2][4]. Additionally, the President shared personal experiences related to economic struggles and the importance of passing the American Rescue Plan to support those in need [3]. The focus was also on job creation and economic growth, acknowledging the impact of inflation on families [5]. While addressing cancer as a significant issue, the President discussed plans to enhance cancer research and support for patients and families [7].'"
+       "'The president stated that nominating someone to serve on the United States Supreme Court is one of the most serious constitutional responsibilities. He nominated Circuit Court of Appeals Judge Ketanji Brown Jackson, describing her as one of the nation’s top legal minds who will continue Justice Breyer’s legacy of excellence and noting her experience as a former top litigator in private practice [1].'"
       ]
      },
-     "execution_count": 4,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -183,7 +215,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
    "id": "9c95460b-7116-4155-a9d2-c0fb027ee592",
    "metadata": {
     "tags": []
@@ -192,10 +224,10 @@
     {
      "data": {
       "text/plain": [
-       "\"In his remarks, the President specified that Ketanji Brown Jackson is succeeding Justice Breyer on the United States Supreme Court[1]. The President praised Jackson as a top legal mind who will continue Justice Breyer's legacy of excellence. The nomination of Jackson was highlighted as a significant constitutional responsibility of the President[1]. The President emphasized the importance of this nomination and the qualities that Jackson brings to the role. The focus was on the transition from Justice Breyer to Judge Ketanji Brown Jackson on the Supreme Court[1].\""
+       "'Yes, the president mentioned that Ketanji Brown Jackson succeeded Justice Breyer [1].'"
       ]
      },
-     "execution_count": 5,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -217,7 +249,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 7,
    "id": "936dc62f",
    "metadata": {
     "tags": []
@@ -227,14 +259,14 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Judge Ketanji Brown Jackson is a nominee for the United States Supreme Court, known for her legal expertise and experience as a former litigator. She is praised for her potential to continue the legacy of excellence on the Court[1]. While the search results provide information on various topics like innovation, economic growth, and healthcare initiatives, they do not directly address Judge Ketanji Brown Jackson's specific accomplishments. Therefore, I do not have enough information to answer this question."
+      "The president acknowledged the significant impact of COVID-19 on the nation, expressing understanding of the public's fatigue and frustration. He emphasized the need to view COVID-19 not as a partisan issue but as a serious disease, urging unity among Americans. The president highlighted the progress made, noting that severe cases have decreased significantly, and mentioned new CDC guidelines allowing most Americans to be mask-free. He also pointed out the efforts to vaccinate the nation and provide economic relief, and the ongoing commitment to vaccinate the world [2], [3], [5]."
      ]
     }
    ],
    "source": [
     "output = {}\n",
     "curr_key = None\n",
-    "for chunk in bot.stream(\"what about her accopmlishments?\"):\n",
+    "for chunk in bot.stream(\"what did he said about the covid?\"):\n",
     "    for key in chunk:\n",
     "        if key not in output:\n",
     "            output[key] = chunk[key]\n",
@@ -244,6 +276,83 @@
     "            print(chunk[key], end=\"\", flush=True)\n",
     "        curr_key = key"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cefdf72b1d90085a",
+   "metadata": {
+    "collapsed": false
+   },
+   "source": [
+    "## Chaining\n",
+    "\n",
+    "For additional capabilities you can use chaining."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "id": "167bc806-395e-46bf-80cc-3c5d43164f42",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "So, the president talked about how the COVID-19 sickness has affected a lot of people in the country. He said that it's important for everyone to work together to fight the sickness, no matter what political party they are in. The president also mentioned that they are working hard to give vaccines to people to help protect them from getting sick. They are also giving money and help to people who need it, like food, housing, and cheaper health insurance. The president also said that they are sending vaccines to many other countries to help people all around the world stay healthy.\n"
+     ]
+    }
+   ],
+   "source": [
+    "from langchain_core.output_parsers import StrOutputParser\n",
+    "from langchain_core.prompts import ChatPromptTemplate\n",
+    "from langchain_openai.chat_models import ChatOpenAI\n",
+    "\n",
+    "llm = ChatOpenAI(temperature=0)\n",
+    "\n",
+    "prompt = ChatPromptTemplate.from_messages(\n",
+    "    [\n",
+    "        (\n",
+    "            \"system\",\n",
+    "            \"You are a helpful assistant that explains the stuff to a five year old.  Vectara is providing the answer.\",\n",
+    "        ),\n",
+    "        (\"human\", \"{vectara_response}\"),\n",
+    "    ]\n",
+    ")\n",
+    "\n",
+    "\n",
+    "def get_vectara_response(question: dict) -> str:\n",
+    "    \"\"\"\n",
+    "    Calls Vectara as_chat and returns the answer string.  This encapsulates\n",
+    "    the Vectara call.\n",
+    "    \"\"\"\n",
+    "    try:\n",
+    "        response = bot.invoke(question[\"question\"])\n",
+    "        return response[\"answer\"]\n",
+    "    except Exception as e:\n",
+    "        return \"I'm sorry, I couldn't get an answer from Vectara.\"\n",
+    "\n",
+    "\n",
+    "# Create the chain\n",
+    "chain = get_vectara_response | prompt | llm | StrOutputParser()\n",
+    "\n",
+    "\n",
+    "# Invoke the chain\n",
+    "result = chain.invoke({\"question\": \"what did he say about the covid?\"})\n",
+    "print(result)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3b8bb761-db4a-436c-8939-41e9f8652083",
+   "metadata": {
+    "collapsed": false
+   },
+   "source": [
+    "## API reference\n",
+    "\n",
+    "You can look at the [Chat](https://docs.vectara.com/docs/api-reference/chat-apis/chat-apis-overview) documentation for the details."
+   ]
   }
  ],
  "metadata": {
@@ -262,7 +371,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.8"
+   "version": "3.12.0"
   }
  },
  "nbformat": 4,
diff --git a/docs/docs/integrations/providers/vectara.ipynb b/docs/docs/integrations/providers/vectara.ipynb
new file mode 100644
index 00000000000..2c76e824a77
--- /dev/null
+++ b/docs/docs/integrations/providers/vectara.ipynb
@@ -0,0 +1,348 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "559f8e0e",
+   "metadata": {},
+   "source": [
+    "# Vectara\n",
+    "\n",
+    "[Vectara](https://vectara.com/) is the trusted AI Assistant and Agent platform which focuses on enterprise readiness for mission-critical applications.\n",
+    "Vectara serverless RAG-as-a-service provides all the components of RAG behind an easy-to-use API, including:\n",
+    "1. A way to extract text from files (PDF, PPT, DOCX, etc)\n",
+    "2. ML-based chunking that provides state of the art performance.\n",
+    "3. The [Boomerang](https://vectara.com/how-boomerang-takes-retrieval-augmented-generation-to-the-next-level-via-grounded-generation/) embeddings model.\n",
+    "4. Its own internal vector database where text chunks and embedding vectors are stored.\n",
+    "5. A query service that automatically encodes the query into embedding, and retrieves the most relevant text segments, including support for [Hybrid Search](https://docs.vectara.com/docs/api-reference/search-apis/lexical-matching) as well as multiple reranking options such as the [multi-lingual relevance reranker](https://www.vectara.com/blog/deep-dive-into-vectara-multilingual-reranker-v1-state-of-the-art-reranker-across-100-languages), [MMR](https://vectara.com/get-diverse-results-and-comprehensive-summaries-with-vectaras-mmr-reranker/), [UDF reranker](https://www.vectara.com/blog/rag-with-user-defined-functions-based-reranking). \n",
+    "6. An LLM to for creating a [generative summary](https://docs.vectara.com/docs/learn/grounded-generation/grounded-generation-overview), based on the retrieved documents (context), including citations.\n",
+    "\n",
+    "For more information:\n",
+    "- [Documentation](https://docs.vectara.com/docs/)\n",
+    "- [API Playground](https://docs.vectara.com/docs/rest-api/)\n",
+    "- [Quickstart](https://docs.vectara.com/docs/quickstart)\n",
+    "\n",
+    "This notebook shows how to use the basic retrieval functionality, when utilizing Vectara just as a Vector Store (without summarization), incuding: `similarity_search` and `similarity_search_with_score` as well as using the LangChain `as_retriever` functionality.\n",
+    "\n",
+    "\n",
+    "## Setup\n",
+    "\n",
+    "To use the `VectaraVectorStore` you first need to install the partner package.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dfdf03ba-d6f5-4b1e-86d3-a65c4bc99aa1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!uv pip install -U pip && uv pip install -qU langchain-vectara"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e97dcf11",
+   "metadata": {},
+   "source": [
+    "# Getting Started\n",
+    "\n",
+    "To get started, use the following steps:\n",
+    "1. If you don't already have one, [Sign up](https://www.vectara.com/integrations/langchain) for your free Vectara trial.\n",
+    "2. Within your account you can create one or more corpora. Each corpus represents an area that stores text data upon ingest from input documents. To create a corpus, use the **\"Create Corpus\"** button. You then provide a name to your corpus as well as a description. Optionally you can define filtering attributes and apply some advanced options. If you click on your created corpus, you can see its name and corpus ID right on the top.\n",
+    "3. Next you'll need to create API keys to access the corpus. Click on the **\"Access Control\"** tab in the corpus view and then the **\"Create API Key\"** button. Give your key a name, and choose whether you want query-only or query+index for your key. Click \"Create\" and you now have an active API key. Keep this key confidential. \n",
+    "\n",
+    "To use LangChain with Vectara, you'll need to have these two values: `corpus_key` and `api_key`.\n",
+    "You can provide `VECTARA_API_KEY` to LangChain in two ways:\n",
+    "\n",
+    "1. Include in your environment these two variables: `VECTARA_API_KEY`.\n",
+    "\n",
+    "   For example, you can set these variables using os.environ and getpass as follows:\n",
+    "\n",
+    "```python\n",
+    "import os\n",
+    "import getpass\n",
+    "\n",
+    "os.environ[\"VECTARA_API_KEY\"] = getpass.getpass(\"Vectara API Key:\")\n",
+    "```\n",
+    "\n",
+    "2. Add them to the `Vectara` vectorstore constructor:\n",
+    "\n",
+    "```python\n",
+    "vectara = Vectara(\n",
+    "    vectara_api_key=vectara_api_key\n",
+    ")\n",
+    "```\n",
+    "\n",
+    "In this notebook we assume they are provided in the environment."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "aac7a9a6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "os.environ[\"VECTARA_API_KEY\"] = \"<VECTARA_API_KEY>\"\n",
+    "os.environ[\"VECTARA_CORPUS_KEY\"] = \"VECTARA_CORPUS_KEY\"\n",
+    "\n",
+    "from langchain_vectara import Vectara\n",
+    "from langchain_vectara.vectorstores import (\n",
+    "    ChainReranker,\n",
+    "    CorpusConfig,\n",
+    "    CustomerSpecificReranker,\n",
+    "    File,\n",
+    "    GenerationConfig,\n",
+    "    MmrReranker,\n",
+    "    SearchConfig,\n",
+    "    VectaraQueryConfig,\n",
+    ")\n",
+    "\n",
+    "vectara = Vectara(vectara_api_key=os.getenv(\"VECTARA_API_KEY\"))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "875ffb7e",
+   "metadata": {},
+   "source": [
+    "First we load the state-of-the-union text into Vectara.\n",
+    "\n",
+    "Note that we use the add_files interface which does not require any local processing or chunking - Vectara receives the file content and performs all the necessary pre-processing, chunking and embedding of the file into its knowledge store.\n",
+    "\n",
+    "In this case it uses a .txt file but the same works for many other [file types](https://docs.vectara.com/docs/api-reference/indexing-apis/file-upload/file-upload-filetypes)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "be0a4973",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['state_of_the_union.txt']"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "corpus_key = os.getenv(\"VECTARA_CORPUS_KEY\")\n",
+    "file_obj = File(\n",
+    "    file_path=\"../document_loaders/example_data/state_of_the_union.txt\",\n",
+    "    metadata={\"source\": \"text_file\"},\n",
+    ")\n",
+    "vectara.add_files([file_obj], corpus_key)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "22a6b953",
+   "metadata": {},
+   "source": [
+    "## Vectara RAG (retrieval augmented generation)\n",
+    "\n",
+    "We now create a `VectaraQueryConfig` object to control the retrieval and summarization options:\n",
+    "* We enable summarization, specifying we would like the LLM to pick the top 7 matching chunks and respond in English\n",
+    "\n",
+    "Using this configuration, let's create a LangChain `Runnable` object that encpasulates the full Vectara RAG pipeline, using the `as_rag` method:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "9ecda054-96a8-4a91-aeae-32006efb1ac8",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "\"President Biden discussed several key issues in his recent statements. He emphasized the importance of keeping schools open and noted that with a high vaccination rate and reduced hospitalizations, most Americans can safely return to normal activities without masks [1]. He addressed the need to hold social media platforms accountable for their impact on children and called for stronger privacy protections and mental health services [2]. Biden also announced measures against Russia, including preventing its central bank from defending the Ruble and targeting Russian oligarchs' assets, as part of efforts to weaken Russia's economy and military [3]. Additionally, he highlighted the importance of protecting women's rights, specifically the right to choose as affirmed in Roe v. Wade [5]. Lastly, he advocated for funding the police with necessary resources and training to ensure community safety [6].\""
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "generation_config = GenerationConfig(\n",
+    "    max_used_search_results=7,\n",
+    "    response_language=\"eng\",\n",
+    "    generation_preset_name=\"vectara-summary-ext-24-05-med-omni\",\n",
+    "    enable_factual_consistency_score=True,\n",
+    ")\n",
+    "search_config = SearchConfig(\n",
+    "    corpora=[CorpusConfig(corpus_key=corpus_key)],\n",
+    "    limit=25,\n",
+    "    reranker=ChainReranker(\n",
+    "        rerankers=[\n",
+    "            CustomerSpecificReranker(reranker_id=\"rnk_272725719\", limit=100),\n",
+    "            MmrReranker(diversity_bias=0.2, limit=100),\n",
+    "        ]\n",
+    "    ),\n",
+    ")\n",
+    "\n",
+    "config = VectaraQueryConfig(\n",
+    "    search=search_config,\n",
+    "    generation=generation_config,\n",
+    ")\n",
+    "\n",
+    "query_str = \"what did Biden say?\"\n",
+    "\n",
+    "rag = vectara.as_rag(config)\n",
+    "rag.invoke(query_str)[\"answer\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cd825d63-93a0-4e45-a455-bfabb01ee1a1",
+   "metadata": {},
+   "source": [
+    "We can also use the streaming interface like this:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "27f01330-8917-4eff-b603-59ab2571a4d2",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "President Biden emphasized several key points in his statements. He highlighted the importance of keeping schools open and noted that with a high vaccination rate and reduced hospitalizations, most Americans can safely return to normal activities without masks [1]. He addressed the need to hold social media platforms accountable for their impact on children and called for stronger privacy protections and mental health services [2]. Biden also discussed measures against Russia, including preventing their central bank from defending the Ruble and targeting Russian oligarchs' assets [3]. Additionally, he reaffirmed the commitment to protect women's rights, particularly the right to choose as affirmed in Roe v. Wade [5]. Lastly, he advocated for funding the police to ensure community safety [6]."
+     ]
+    }
+   ],
+   "source": [
+    "output = {}\n",
+    "curr_key = None\n",
+    "for chunk in rag.stream(query_str):\n",
+    "    for key in chunk:\n",
+    "        if key not in output:\n",
+    "            output[key] = chunk[key]\n",
+    "        else:\n",
+    "            output[key] += chunk[key]\n",
+    "        if key == \"answer\":\n",
+    "            print(chunk[key], end=\"\", flush=True)\n",
+    "        curr_key = key"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8f16bf8d",
+   "metadata": {},
+   "source": [
+    "For more details about Vectara as VectorStore [go to this notebook](../vectorstores/vectara.ipynb)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d49a91d2-9c53-48cb-8065-a3ba1292e8d0",
+   "metadata": {},
+   "source": [
+    "## Vectara Chat\n",
+    "\n",
+    "In most uses of LangChain to create chatbots, one must integrate a special `memory` component that maintains the history of chat sessions and then uses that history to ensure the chatbot is aware of conversation history.\n",
+    "\n",
+    "With Vectara Chat - all of that is performed in the backend by Vectara automatically."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "f57264ec-e8b5-4d55-9c16-54898d506f73",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'The president stated that nominating someone to serve on the United States Supreme Court is one of the most serious constitutional responsibilities he has. He nominated Circuit Court of Appeals Judge Ketanji Brown Jackson, describing her as one of the nation’s top legal minds who will continue Justice Breyer’s legacy of excellence [1].'"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "generation_config = GenerationConfig(\n",
+    "    max_used_search_results=7,\n",
+    "    response_language=\"eng\",\n",
+    "    generation_preset_name=\"vectara-summary-ext-24-05-med-omni\",\n",
+    "    enable_factual_consistency_score=True,\n",
+    ")\n",
+    "search_config = SearchConfig(\n",
+    "    corpora=[CorpusConfig(corpus_key=corpus_key, limit=25)],\n",
+    "    reranker=MmrReranker(diversity_bias=0.2),\n",
+    ")\n",
+    "\n",
+    "config = VectaraQueryConfig(\n",
+    "    search=search_config,\n",
+    "    generation=generation_config,\n",
+    ")\n",
+    "\n",
+    "\n",
+    "bot = vectara.as_chat(config)\n",
+    "\n",
+    "bot.invoke(\"What did the president say about Ketanji Brown Jackson?\")[\"answer\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "13714687-672d-47af-997a-61bb9dd66923",
+   "metadata": {},
+   "source": [
+    "For more details about Vectara chat [go to this notebook](../chat/vectara.ipynb)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "baf687dc-08c4-49af-98aa-0359e2591f2e",
+   "metadata": {},
+   "source": [
+    "## Vectara as self-querying retriever\n",
+    "Vectara offers Intelligent Query Rewriting option which  enhances search precision by automatically generating metadata filter expressions from natural language queries. This capability analyzes user queries, extracts relevant metadata filters, and rephrases the query to focus on the core information need. For more details [go to this notebook](../retrievers/self_query/vectara_self_query.ipynb)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8060a423-b291-4166-8fd7-ba0e01692b51",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/docs/integrations/providers/vectara/index.mdx b/docs/docs/integrations/providers/vectara/index.mdx
deleted file mode 100644
index 10de5f001f1..00000000000
--- a/docs/docs/integrations/providers/vectara/index.mdx
+++ /dev/null
@@ -1,181 +0,0 @@
-# Vectara
-
->[Vectara](https://vectara.com/) provides a Trusted Generative AI platform, allowing organizations to rapidly create a ChatGPT-like experience (an AI assistant) 
-> which is grounded in the data, documents, and knowledge that they have (technically, it is Retrieval-Augmented-Generation-as-a-service).
-
-**Vectara Overview:**
-[Vectara](https://vectara.com/) is the trusted AI Assistant and Agent platform which focuses on enterprise readiness for mission-critical applications.
-Vectara serverless RAG-as-a-service provides all the components of RAG behind an easy-to-use API, including:
-1. A way to extract text from files (PDF, PPT, DOCX, etc)
-2. ML-based chunking that provides state of the art performance.
-3. The [Boomerang](https://vectara.com/how-boomerang-takes-retrieval-augmented-generation-to-the-next-level-via-grounded-generation/) embeddings model.
-4. Its own internal vector database where text chunks and embedding vectors are stored.
-5. A query service that automatically encodes the query into embedding, and retrieves the most relevant text segments, including support for [Hybrid Search](https://docs.vectara.com/docs/api-reference/search-apis/lexical-matching) as well as multiple reranking options such as the [multi-lingual relevance reranker](https://www.vectara.com/blog/deep-dive-into-vectara-multilingual-reranker-v1-state-of-the-art-reranker-across-100-languages), [MMR](https://vectara.com/get-diverse-results-and-comprehensive-summaries-with-vectaras-mmr-reranker/), [UDF reranker](https://www.vectara.com/blog/rag-with-user-defined-functions-based-reranking). 
-6. An LLM to for creating a [generative summary](https://docs.vectara.com/docs/learn/grounded-generation/grounded-generation-overview), based on the retrieved documents (context), including citations.
-
-For more information:
-- [Documentation](https://docs.vectara.com/docs/)
-- [API Playground](https://docs.vectara.com/docs/rest-api/)
-- [Quickstart](https://docs.vectara.com/docs/quickstart)
-
-## Installation and Setup
-
-To use `Vectara` with LangChain no special installation steps are required. 
-To get started, [sign up](https://vectara.com/integrations/langchain) for a free Vectara trial,
-and follow the [quickstart](https://docs.vectara.com/docs/quickstart) guide to create a corpus and an API key. 
-Once you have these, you can provide them as arguments to the Vectara `vectorstore`, or you can set them as environment variables.
-
-- export `VECTARA_CUSTOMER_ID`="your_customer_id"
-- export `VECTARA_CORPUS_ID`="your_corpus_id"
-- export `VECTARA_API_KEY`="your-vectara-api-key"
-
-## Vectara as a Vector Store
-
-There exists a wrapper around the Vectara platform, allowing you to use it as a `vectorstore` in LangChain:
-
-To import this vectorstore:
-```python
-from langchain_community.vectorstores import Vectara
-```
-
-To create an instance of the Vectara vectorstore:
-```python
-vectara = Vectara(
-    vectara_customer_id=customer_id, 
-    vectara_corpus_id=corpus_id, 
-    vectara_api_key=api_key
-)
-```
-The `customer_id`, `corpus_id` and `api_key` are optional, and if they are not supplied will be read from 
-the environment variables `VECTARA_CUSTOMER_ID`, `VECTARA_CORPUS_ID` and `VECTARA_API_KEY`, respectively.
-
-### Adding Texts or Files
-
-After you have the vectorstore, you can `add_texts` or `add_documents` as per the standard `VectorStore` interface, for example:
-
-```python
-vectara.add_texts(["to be or not to be", "that is the question"])
-```
-
-Since Vectara supports file-upload in the platform, we also added the ability to upload files (PDF, TXT, HTML, PPT, DOC, etc) directly. 
-When using this method, each file is uploaded directly to the Vectara backend, processed and chunked optimally there, so you don't have to use the LangChain document loader or chunking mechanism.
-
-As an example:
-
-```python
-vectara.add_files(["path/to/file1.pdf", "path/to/file2.pdf",...])
-```
-
-Of course you do not have to add any data, and instead just connect to an existing Vectara corpus where data may already be indexed.
-
-### Querying the VectorStore
-
-To query the Vectara vectorstore, you can use the `similarity_search` method (or `similarity_search_with_score`), which takes a query string and returns a list of results:
-```python
-results = vectara.similarity_search_with_score("what is LangChain?")
-```
-The results are returned as a list of relevant documents, and a relevance score of each document.
-
-In this case, we used the default retrieval parameters, but you can also specify the following additional arguments in `similarity_search` or `similarity_search_with_score`:
-- `k`: number of results to return (defaults to 5)
-- `lambda_val`: the [lexical matching](https://docs.vectara.com/docs/api-reference/search-apis/lexical-matching) factor for hybrid search (defaults to 0.025)
-- `filter`: a [filter](https://docs.vectara.com/docs/common-use-cases/filtering-by-metadata/filter-overview) to apply to the results (default None)
-- `n_sentence_context`: number of sentences to include before/after the actual matching segment when returning results. This defaults to 2.
-- `rerank_config`: can be used to specify reranker for thr results
-   - `reranker`: mmr, rerank_multilingual_v1 or none. Note that "rerank_multilingual_v1" is a Scale only feature
-   - `rerank_k`: number of results to use for reranking
-   - `mmr_diversity_bias`: 0 = no diversity, 1 = full diversity. This is the lambda parameter in the MMR formula and is in the range 0...1
-
-To get results without the relevance score, you can simply use the 'similarity_search' method:
-```python   
-results = vectara.similarity_search("what is LangChain?")
-```
-
-## Vectara for Retrieval Augmented Generation (RAG)
-
-Vectara provides a full RAG pipeline, including generative summarization. To use it as a complete RAG solution, you can use the `as_rag` method.
-There are a few additional parameters that can be specified in the `VectaraQueryConfig` object to control retrieval and summarization:
-* k: number of results to return
-* lambda_val: the lexical matching factor for hybrid search
-* summary_config (optional): can be used to request an LLM summary in RAG
-   - is_enabled: True or False
-   - max_results: number of results to use for summary generation
-   - response_lang: language of the response summary, in ISO 639-2 format (e.g. 'en', 'fr', 'de', etc)
-* rerank_config (optional): can be used to specify Vectara Reranker of the results
-   - reranker: mmr, rerank_multilingual_v1 or none
-   - rerank_k: number of results to use for reranking
-   - mmr_diversity_bias: 0 = no diversity, 1 = full diversity. 
-     This is the lambda parameter in the MMR formula and is in the range 0...1
-
-For example:
-
-```python
-summary_config = SummaryConfig(is_enabled=True, max_results=7, response_lang='eng')
-rerank_config = RerankConfig(reranker="mmr", rerank_k=50, mmr_diversity_bias=0.2)
-config = VectaraQueryConfig(k=10, lambda_val=0.005, rerank_config=rerank_config, summary_config=summary_config)
-```
-Then you can use the `as_rag` method to create a RAG pipeline:
-
-```python
-query_str = "what did Biden say?"
-
-rag = vectara.as_rag(config)
-rag.invoke(query_str)['answer']
-```
-
-The `as_rag` method returns a `VectaraRAG` object, which behaves just like any LangChain Runnable, including the `invoke` or `stream` methods.
-
-## Vectara Chat
-
-The RAG functionality can be used to create a chatbot. For example, you can create a simple chatbot that responds to user input:
-
-```python
-summary_config = SummaryConfig(is_enabled=True, max_results=7, response_lang='eng')
-rerank_config = RerankConfig(reranker="mmr", rerank_k=50, mmr_diversity_bias=0.2)
-config = VectaraQueryConfig(k=10, lambda_val=0.005, rerank_config=rerank_config, summary_config=summary_config)
-
-query_str = "what did Biden say?"
-bot = vectara.as_chat(config)
-bot.invoke(query_str)['answer']
-```
-
-The main difference is the following: with `as_chat` Vectara internally tracks the chat history and conditions each response on the full chat history.
-There is no need to keep that history locally to LangChain, as Vectara will manage it internally.
-
-## Vectara as a LangChain retriever only
-
-If you want to use Vectara as a retriever only, you can use the `as_retriever` method, which returns a `VectaraRetriever` object.
-```python
-retriever = vectara.as_retriever(config=config)
-retriever.invoke(query_str)
-```
-
-Like with as_rag, you provide a `VectaraQueryConfig` object to control the retrieval parameters.
-In most cases you would not enable the summary_config, but it is left as an option for backwards compatibility. 
-If no summary is requested, the response will be a list of relevant documents, each with a relevance score.
-If a summary is requested, the response will be a list of relevant documents as before, plus an additional document that includes the generative summary.
-
-## Hallucination Detection score
-
-Vectara created [HHEM](https://huggingface.co/vectara/hallucination_evaluation_model) - an open source model that can be used to evaluate RAG responses for factual consistency. 
-As part of the Vectara RAG, the "Factual Consistency Score" (or FCS), which is an improved version of the open source HHEM is made available via the API. 
-This is automatically included in the output of the RAG pipeline
-
-```python
-summary_config = SummaryConfig(is_enabled=True, max_results=7, response_lang='eng')
-rerank_config = RerankConfig(reranker="mmr", rerank_k=50, mmr_diversity_bias=0.2)
-config = VectaraQueryConfig(k=10, lambda_val=0.005, rerank_config=rerank_config, summary_config=summary_config)
-
-rag = vectara.as_rag(config)
-resp = rag.invoke(query_str)
-print(resp['answer'])
-print(f"Vectara FCS = {resp['fcs']}")
-```
-
-## Example Notebooks
-
-For a more detailed examples of using Vectara with LangChain, see the following example notebooks:
-* [this notebook](/docs/integrations/vectorstores/vectara) shows how to use Vectara: with full RAG or just as a retriever.
-* [this notebook](/docs/integrations/retrievers/self_query/vectara_self_query) shows the self-query capability with Vectara.
-* [this notebook](/docs/integrations/providers/vectara/vectara_chat) shows how to build a chatbot with Langchain and Vectara
-
diff --git a/docs/docs/integrations/retrievers/self_query/vectara_self_query.ipynb b/docs/docs/integrations/retrievers/self_query/vectara_self_query.ipynb
index ab3dbe4f301..fc5f8938ae1 100644
--- a/docs/docs/integrations/retrievers/self_query/vectara_self_query.ipynb
+++ b/docs/docs/integrations/retrievers/self_query/vectara_self_query.ipynb
@@ -8,7 +8,6 @@
     "# Vectara self-querying \n",
     "\n",
     "[Vectara](https://vectara.com/) is the trusted AI Assistant and Agent platform which focuses on enterprise readiness for mission-critical applications.\n",
-    "\n",
     "Vectara serverless RAG-as-a-service provides all the components of RAG behind an easy-to-use API, including:\n",
     "1. A way to extract text from files (PDF, PPT, DOCX, etc)\n",
     "2. ML-based chunking that provides state of the art performance.\n",
@@ -17,9 +16,27 @@
     "5. A query service that automatically encodes the query into embedding, and retrieves the most relevant text segments, including support for [Hybrid Search](https://docs.vectara.com/docs/api-reference/search-apis/lexical-matching) as well as multiple reranking options such as the [multi-lingual relevance reranker](https://www.vectara.com/blog/deep-dive-into-vectara-multilingual-reranker-v1-state-of-the-art-reranker-across-100-languages), [MMR](https://vectara.com/get-diverse-results-and-comprehensive-summaries-with-vectaras-mmr-reranker/), [UDF reranker](https://www.vectara.com/blog/rag-with-user-defined-functions-based-reranking). \n",
     "6. An LLM to for creating a [generative summary](https://docs.vectara.com/docs/learn/grounded-generation/grounded-generation-overview), based on the retrieved documents (context), including citations.\n",
     "\n",
-    "See the [Vectara API documentation](https://docs.vectara.com/docs/) for more information on how to use the API.\n",
+    "For more information:\n",
+    "- [Documentation](https://docs.vectara.com/docs/)\n",
+    "- [API Playground](https://docs.vectara.com/docs/rest-api/)\n",
+    "- [Quickstart](https://docs.vectara.com/docs/quickstart)\n",
     "\n",
-    "This notebook shows how to use `SelfQueryRetriever` with Vectara."
+    "\n",
+    "This notebook shows how to use `Vectara` as `SelfQueryRetriever`.\n",
+    "\n",
+    "## Setup\n",
+    "\n",
+    "To use the `VectaraVectorStore` you first need to install the partner package.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "07f3f1a4-f552-4d07-ba48-18fb5d8641c6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!uv pip install -U pip && uv pip install -qU langchain-vectara"
    ]
   },
   {
@@ -30,14 +47,14 @@
     "# Getting Started\n",
     "\n",
     "To get started, use the following steps:\n",
-    "1. If you don't already have one, [Sign up](https://www.vectara.com/integrations/langchain) for your free Vectara trial. Once you have completed your sign up you will have a Vectara customer ID. You can find your customer ID by clicking on your name, on the top-right of the Vectara console window.\n",
+    "1. If you don't already have one, [Sign up](https://www.vectara.com/integrations/langchain) for your free Vectara trial.\n",
     "2. Within your account you can create one or more corpora. Each corpus represents an area that stores text data upon ingest from input documents. To create a corpus, use the **\"Create Corpus\"** button. You then provide a name to your corpus as well as a description. Optionally you can define filtering attributes and apply some advanced options. If you click on your created corpus, you can see its name and corpus ID right on the top.\n",
     "3. Next you'll need to create API keys to access the corpus. Click on the **\"Access Control\"** tab in the corpus view and then the **\"Create API Key\"** button. Give your key a name, and choose whether you want query-only or query+index for your key. Click \"Create\" and you now have an active API key. Keep this key confidential. \n",
     "\n",
-    "To use LangChain with Vectara, you'll need to have these three values: `customer ID`, `corpus ID` and `api_key`.\n",
-    "You can provide those to LangChain in two ways:\n",
+    "To use LangChain with Vectara, you'll need to have these two values: `corpus_key` and `api_key`.\n",
+    "You can provide `VECTARA_API_KEY` to LangChain in two ways:\n",
     "\n",
-    "1. Include in your environment these three variables: `VECTARA_CUSTOMER_ID`, `VECTARA_CORPUS_ID` and `VECTARA_API_KEY`.\n",
+    "1. Include in your environment these two variables: `VECTARA_API_KEY`.\n",
     "\n",
     "   For example, you can set these variables using os.environ and getpass as follows:\n",
     "\n",
@@ -45,8 +62,6 @@
     "import os\n",
     "import getpass\n",
     "\n",
-    "os.environ[\"VECTARA_CUSTOMER_ID\"] = getpass.getpass(\"Vectara Customer ID:\")\n",
-    "os.environ[\"VECTARA_CORPUS_ID\"] = getpass.getpass(\"Vectara Corpus ID:\")\n",
     "os.environ[\"VECTARA_API_KEY\"] = getpass.getpass(\"Vectara API Key:\")\n",
     "```\n",
     "\n",
@@ -54,14 +69,11 @@
     "\n",
     "```python\n",
     "vectara = Vectara(\n",
-    "                vectara_customer_id=vectara_customer_id,\n",
-    "                vectara_corpus_id=vectara_corpus_id,\n",
-    "                vectara_api_key=vectara_api_key\n",
-    "            )\n",
+    "    vectara_api_key=vectara_api_key\n",
+    ")\n",
     "```\n",
-    "In this notebook we assume they are provided in the environment.\n",
     "\n",
-    "**Notes:** The self-query retriever requires you to have `lark` installed (`pip install lark`). "
+    "In this notebook we assume they are provided in the environment."
    ]
   },
   {
@@ -71,14 +83,14 @@
    "source": [
     "## Connecting to Vectara from LangChain\n",
     "\n",
-    "In this example, we assume that you've created an account and a corpus, and added your `VECTARA_CUSTOMER_ID`, `VECTARA_CORPUS_ID` and `VECTARA_API_KEY` (created with permissions for both indexing and query) as environment variables.\n",
+    "In this example, we assume that you've created an account and a corpus, and added your `VECTARA_CORPUS_KEY` and `VECTARA_API_KEY` (created with permissions for both indexing and query) as environment variables.\n",
     "\n",
     "We further assume the corpus has 4 fields defined as filterable metadata attributes: `year`, `director`, `rating`, and `genre`"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 2,
    "id": "9d3aa44f",
    "metadata": {},
    "outputs": [],
@@ -87,14 +99,10 @@
     "\n",
     "from langchain_core.documents import Document\n",
     "\n",
-    "os.environ[\"VECTARA_API_KEY\"] = \"<YOUR_VECTARA_API_KEY>\"\n",
-    "os.environ[\"VECTARA_CORPUS_ID\"] = \"<YOUR_VECTARA_CORPUS_ID>\"\n",
-    "os.environ[\"VECTARA_CUSTOMER_ID\"] = \"<YOUR_VECTARA_CUSTOMER_ID>\"\n",
+    "os.environ[\"VECTARA_API_KEY\"] = \"VECTARA_API_KEY\"\n",
+    "os.environ[\"VECTARA_CORPUS_KEY\"] = \"VECTARA_CORPUS_KEY\"\n",
     "\n",
-    "from langchain.chains.query_constructor.schema import AttributeInfo\n",
-    "from langchain.retrievers.self_query.base import SelfQueryRetriever\n",
-    "from langchain_community.vectorstores import Vectara\n",
-    "from langchain_openai.chat_models import ChatOpenAI"
+    "from langchain_vectara import Vectara"
    ]
   },
   {
@@ -109,7 +117,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 6,
    "id": "bcbe04d9",
    "metadata": {
     "tags": []
@@ -148,9 +156,12 @@
     "    ),\n",
     "]\n",
     "\n",
+    "corpus_key = os.getenv(\"VECTARA_CORPUS_KEY\")\n",
     "vectara = Vectara()\n",
     "for doc in docs:\n",
-    "    vectara.add_texts([doc.page_content], doc_metadata=doc.metadata)"
+    "    vectara.add_texts(\n",
+    "        [doc.page_content], corpus_key=corpus_key, doc_metadata=doc.metadata\n",
+    "    )"
    ]
   },
   {
@@ -158,45 +169,32 @@
    "id": "5ecaab6d",
    "metadata": {},
    "source": [
-    "## Creating the self-querying retriever\n",
-    "Now we can instantiate our retriever. To do this we'll need to provide some information upfront about the metadata fields that our documents support and a short description of the document contents.\n",
+    "## Self-query with Vectara\n",
+    " You don't need self-query via the LangChain mechanism—enabling `intelligent_query_rewriting` on the Vectara platform achieves the same result.\n",
+    "Vectara offers Intelligent Query Rewriting option which  enhances search precision by automatically generating metadata filter expressions from natural language queries. This capability analyzes user queries, extracts relevant metadata filters, and rephrases the query to focus on the core information need. For more [details](https://docs.vectara.com/docs/search-and-retrieval/intelligent-query-rewriting).\n",
     "\n",
-    "We then provide an llm (in this case OpenAI) and the `vectara` vectorstore as arguments:"
+    "Enable intelligent query rewriting on a per-query basis by setting the `intelligent_query_rewriting` parameter to `true` in `VectaraQueryConfig`."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 7,
    "id": "86e34dbf",
    "metadata": {
     "tags": []
    },
    "outputs": [],
    "source": [
-    "metadata_field_info = [\n",
-    "    AttributeInfo(\n",
-    "        name=\"genre\",\n",
-    "        description=\"The genre of the movie\",\n",
-    "        type=\"string or list[string]\",\n",
-    "    ),\n",
-    "    AttributeInfo(\n",
-    "        name=\"year\",\n",
-    "        description=\"The year the movie was released\",\n",
-    "        type=\"integer\",\n",
-    "    ),\n",
-    "    AttributeInfo(\n",
-    "        name=\"director\",\n",
-    "        description=\"The name of the movie director\",\n",
-    "        type=\"string\",\n",
-    "    ),\n",
-    "    AttributeInfo(\n",
-    "        name=\"rating\", description=\"A 1-10 rating for the movie\", type=\"float\"\n",
-    "    ),\n",
-    "]\n",
-    "document_content_description = \"Brief summary of a movie\"\n",
-    "llm = ChatOpenAI(temperature=0, model=\"gpt-4o\", max_tokens=4069)\n",
-    "retriever = SelfQueryRetriever.from_llm(\n",
-    "    llm, vectara, document_content_description, metadata_field_info, verbose=True\n",
+    "from langchain_vectara.vectorstores import (\n",
+    "    CorpusConfig,\n",
+    "    SearchConfig,\n",
+    "    VectaraQueryConfig,\n",
+    ")\n",
+    "\n",
+    "config = VectaraQueryConfig(\n",
+    "    search=SearchConfig(corpora=[CorpusConfig(corpus_key=corpus_key)]),\n",
+    "    generation=None,\n",
+    "    intelligent_query_rewriting=True,\n",
     ")"
    ]
   },
@@ -205,116 +203,31 @@
    "id": "ea9df8d4",
    "metadata": {},
    "source": [
-    "## Self-retrieval Queries\n",
-    "And now we can try actually using our retriever!"
+    "## Queries\n",
+    "And now we can try actually using our vectara_queries method!"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 8,
    "id": "38a126e9",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "[Document(page_content='A bunch of scientists bring back dinosaurs and mayhem breaks loose', metadata={'lang': 'eng', 'offset': '0', 'len': '66', 'year': '1993', 'rating': '7.7', 'genre': 'science fiction', 'source': 'langchain'}),\n",
-       " Document(page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea', metadata={'lang': 'eng', 'offset': '0', 'len': '116', 'year': '2006', 'director': 'Satoshi Kon', 'rating': '8.6', 'source': 'langchain'}),\n",
-       " Document(page_content='Toys come alive and have a blast doing so', metadata={'lang': 'eng', 'offset': '0', 'len': '41', 'year': '1995', 'genre': 'animated', 'source': 'langchain'}),\n",
-       " Document(page_content='Three men walk into the Zone, three men walk out of the Zone', metadata={'lang': 'eng', 'offset': '0', 'len': '60', 'year': '1979', 'rating': '9.9', 'director': 'Andrei Tarkovsky', 'genre': 'science fiction', 'source': 'langchain'}),\n",
-       " Document(page_content='A bunch of normal-sized women are supremely wholesome and some men pine after them', metadata={'lang': 'eng', 'offset': '0', 'len': '82', 'year': '2019', 'director': 'Greta Gerwig', 'rating': '8.3', 'source': 'langchain'}),\n",
-       " Document(page_content='Leo DiCaprio gets lost in a dream within a dream within a dream within a ...', metadata={'lang': 'eng', 'offset': '0', 'len': '76', 'year': '2010', 'director': 'Christopher Nolan', 'rating': '8.2', 'source': 'langchain'})]"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# This example only specifies a relevant query\n",
-    "retriever.invoke(\"What are movies about scientists\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "fc3f1e6e",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[Document(page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea', metadata={'lang': 'eng', 'offset': '0', 'len': '116', 'year': '2006', 'director': 'Satoshi Kon', 'rating': '8.6', 'source': 'langchain'}),\n",
-       " Document(page_content='Three men walk into the Zone, three men walk out of the Zone', metadata={'lang': 'eng', 'offset': '0', 'len': '60', 'year': '1979', 'rating': '9.9', 'director': 'Andrei Tarkovsky', 'genre': 'science fiction', 'source': 'langchain'})]"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# This example only specifies a filter\n",
-    "retriever.invoke(\"I want to watch a movie rated higher than 8.5\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "b19d4da0",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[Document(page_content='A bunch of normal-sized women are supremely wholesome and some men pine after them', metadata={'lang': 'eng', 'offset': '0', 'len': '82', 'year': '2019', 'director': 'Greta Gerwig', 'rating': '8.3', 'source': 'langchain'})]"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# This example specifies a query and a filter\n",
-    "retriever.invoke(\"Has Greta Gerwig directed any movies about women\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "f900e40e",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[Document(page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea', metadata={'lang': 'eng', 'offset': '0', 'len': '116', 'year': '2006', 'director': 'Satoshi Kon', 'rating': '8.6', 'source': 'langchain'}),\n",
-       " Document(page_content='Three men walk into the Zone, three men walk out of the Zone', metadata={'lang': 'eng', 'offset': '0', 'len': '60', 'year': '1979', 'rating': '9.9', 'director': 'Andrei Tarkovsky', 'genre': 'science fiction', 'source': 'langchain'})]"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# This example specifies a composite filter\n",
-    "retriever.invoke(\"What's a highly rated (above 8.5) science fiction film?\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "id": "12a51522",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[Document(page_content='Toys come alive and have a blast doing so', metadata={'lang': 'eng', 'offset': '0', 'len': '41', 'year': '1995', 'genre': 'animated', 'source': 'langchain'}),\n",
-       " Document(page_content='A bunch of scientists bring back dinosaurs and mayhem breaks loose', metadata={'lang': 'eng', 'offset': '0', 'len': '66', 'year': '1993', 'rating': '7.7', 'genre': 'science fiction', 'source': 'langchain'})]"
+       "[(Document(metadata={'year': 1995, 'genre': 'animated', 'source': 'langchain'}, page_content='Toys come alive and have a blast doing so'),\n",
+       "  0.4141285717487335),\n",
+       " (Document(metadata={'year': 1979, 'rating': 9.9, 'director': 'Andrei Tarkovsky', 'genre': 'science fiction', 'source': 'langchain'}, page_content='Three men walk into the Zone, three men walk out of the Zone'),\n",
+       "  0.4046250879764557),\n",
+       " (Document(metadata={'year': 2010, 'director': 'Christopher Nolan', 'rating': 8.2, 'source': 'langchain'}, page_content='Leo DiCaprio gets lost in a dream within a dream within a dream within a ...'),\n",
+       "  0.227469339966774),\n",
+       " (Document(metadata={'year': 2019, 'director': 'Greta Gerwig', 'rating': 8.3, 'source': 'langchain'}, page_content='A bunch of normal-sized women are supremely wholesome and some men pine after them'),\n",
+       "  0.19208428263664246),\n",
+       " (Document(metadata={'year': 1993, 'rating': 7.7, 'genre': 'science fiction', 'source': 'langchain'}, page_content='A bunch of scientists bring back dinosaurs and mayhem breaks loose'),\n",
+       "  0.1902722418308258),\n",
+       " (Document(metadata={'year': 2006, 'director': 'Satoshi Kon', 'rating': 8.6, 'source': 'langchain'}, page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea'),\n",
+       "  0.08151976019144058)]"
       ]
      },
      "execution_count": 8,
@@ -323,74 +236,107 @@
     }
    ],
    "source": [
-    "# This example specifies a query and composite filter\n",
-    "retriever.invoke(\n",
-    "    \"What's a movie after 1990 but before 2005 that's all about toys, and preferably is animated\"\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "39bd1de1-b9fe-4a98-89da-58d8a7a6ae51",
-   "metadata": {},
-   "source": [
-    "## Filter k\n",
-    "\n",
-    "We can also use the self query retriever to specify `k`: the number of documents to fetch.\n",
-    "\n",
-    "We can do this by passing `enable_limit=True` to the constructor."
+    "# This example only specifies a relevant query\n",
+    "vectara.vectara_query(\"What are movies about scientists\", config)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 9,
-   "id": "bff36b88-b506-4877-9c63-e5a1a8d78e64",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "retriever = SelfQueryRetriever.from_llm(\n",
-    "    llm,\n",
-    "    vectara,\n",
-    "    document_content_description,\n",
-    "    metadata_field_info,\n",
-    "    enable_limit=True,\n",
-    "    verbose=True,\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "00e8baad-a9d7-4498-bd8d-ca41d0691386",
+   "id": "fc3f1e6e",
    "metadata": {},
-   "source": [
-    "This is cool, we can include the number of results we would like to see in the query and the self retriever would correctly understand it. For example, let's look for "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "id": "2758d229-4f97-499c-819f-888acaf8ee10",
-   "metadata": {
-    "tags": []
-   },
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "[Document(page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea', metadata={'lang': 'eng', 'offset': '0', 'len': '116', 'year': '2006', 'director': 'Satoshi Kon', 'rating': '8.6', 'source': 'langchain'}),\n",
-       " Document(page_content='Three men walk into the Zone, three men walk out of the Zone', metadata={'lang': 'eng', 'offset': '0', 'len': '60', 'year': '1979', 'rating': '9.9', 'director': 'Andrei Tarkovsky', 'genre': 'science fiction', 'source': 'langchain'})]"
+       "[(Document(metadata={'year': 2006, 'director': 'Satoshi Kon', 'rating': 8.6, 'source': 'langchain'}, page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea'),\n",
+       "  0.34279149770736694),\n",
+       " (Document(metadata={'year': 1979, 'rating': 9.9, 'director': 'Andrei Tarkovsky', 'genre': 'science fiction', 'source': 'langchain'}, page_content='Three men walk into the Zone, three men walk out of the Zone'),\n",
+       "  0.242923304438591)]"
       ]
      },
-     "execution_count": 10,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "# This example only specifies a relevant query\n",
-    "retriever.invoke(\"what are two movies with a rating above 8.5\")"
+    "# This example only specifies a filter\n",
+    "vectara.vectara_query(\"I want to watch a movie rated higher than 8.5\", config)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "b19d4da0",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[(Document(metadata={'year': 2019, 'director': 'Greta Gerwig', 'rating': 8.3, 'source': 'langchain'}, page_content='A bunch of normal-sized women are supremely wholesome and some men pine after them'),\n",
+       "  0.10141132771968842)]"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# This example specifies a query and a filter\n",
+    "vectara.vectara_query(\"Has Greta Gerwig directed any movies about women\", config)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "f900e40e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[(Document(metadata={'year': 1979, 'rating': 9.9, 'director': 'Andrei Tarkovsky', 'genre': 'science fiction', 'source': 'langchain'}, page_content='Three men walk into the Zone, three men walk out of the Zone'),\n",
+       "  0.9508692026138306)]"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# This example specifies a composite filter\n",
+    "vectara.vectara_query(\"What's a highly rated (above 8.5) science fiction film?\", config)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "12a51522",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[(Document(metadata={'year': 1995, 'genre': 'animated', 'source': 'langchain'}, page_content='Toys come alive and have a blast doing so'),\n",
+       "  0.7290377616882324),\n",
+       " (Document(metadata={'year': 1993, 'rating': 7.7, 'genre': 'science fiction', 'source': 'langchain'}, page_content='A bunch of scientists bring back dinosaurs and mayhem breaks loose'),\n",
+       "  0.4838160574436188)]"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# This example specifies a query and composite filter\n",
+    "vectara.vectara_query(\n",
+    "    \"What's a movie after 1990 but before 2005 that's all about toys, and preferably is animated\",\n",
+    "    config,\n",
+    ")"
    ]
   },
   {
@@ -418,7 +364,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.8"
+   "version": "3.12.0"
   }
  },
  "nbformat": 4,
diff --git a/docs/docs/integrations/vectorstores/vectara.ipynb b/docs/docs/integrations/vectorstores/vectara.ipynb
index 758ea4be83e..6d07b9cf8e5 100644
--- a/docs/docs/integrations/vectorstores/vectara.ipynb
+++ b/docs/docs/integrations/vectorstores/vectara.ipynb
@@ -8,20 +8,35 @@
     "# Vectara\n",
     "\n",
     "[Vectara](https://vectara.com/) is the trusted AI Assistant and Agent platform which focuses on enterprise readiness for mission-critical applications.\n",
-    "\n",
     "Vectara serverless RAG-as-a-service provides all the components of RAG behind an easy-to-use API, including:\n",
     "1. A way to extract text from files (PDF, PPT, DOCX, etc)\n",
     "2. ML-based chunking that provides state of the art performance.\n",
     "3. The [Boomerang](https://vectara.com/how-boomerang-takes-retrieval-augmented-generation-to-the-next-level-via-grounded-generation/) embeddings model.\n",
     "4. Its own internal vector database where text chunks and embedding vectors are stored.\n",
-    "5. A query service that automatically encodes the query into embedding, and retrieves the most relevant text segments (including support for [Hybrid Search](https://docs.vectara.com/docs/api-reference/search-apis/lexical-matching) as well as multiple reranking options such as the [multi-lingual relevance reranker](https://www.vectara.com/blog/deep-dive-into-vectara-multilingual-reranker-v1-state-of-the-art-reranker-across-100-languages), [MMR](https://vectara.com/get-diverse-results-and-comprehensive-summaries-with-vectaras-mmr-reranker/), [UDF reranker](https://www.vectara.com/blog/rag-with-user-defined-functions-based-reranking). \n",
+    "5. A query service that automatically encodes the query into embedding, and retrieves the most relevant text segments, including support for [Hybrid Search](https://docs.vectara.com/docs/api-reference/search-apis/lexical-matching) as well as multiple reranking options such as the [multi-lingual relevance reranker](https://www.vectara.com/blog/deep-dive-into-vectara-multilingual-reranker-v1-state-of-the-art-reranker-across-100-languages), [MMR](https://vectara.com/get-diverse-results-and-comprehensive-summaries-with-vectaras-mmr-reranker/), [UDF reranker](https://www.vectara.com/blog/rag-with-user-defined-functions-based-reranking). \n",
     "6. An LLM to for creating a [generative summary](https://docs.vectara.com/docs/learn/grounded-generation/grounded-generation-overview), based on the retrieved documents (context), including citations.\n",
     "\n",
-    "See the [Vectara API documentation](https://docs.vectara.com/docs/) for more information on how to use the API.\n",
+    "For more information:\n",
+    "- [Documentation](https://docs.vectara.com/docs/)\n",
+    "- [API Playground](https://docs.vectara.com/docs/rest-api/)\n",
+    "- [Quickstart](https://docs.vectara.com/docs/quickstart)\n",
     "\n",
     "This notebook shows how to use the basic retrieval functionality, when utilizing Vectara just as a Vector Store (without summarization), incuding: `similarity_search` and `similarity_search_with_score` as well as using the LangChain `as_retriever` functionality.\n",
     "\n",
-    "You'll need to install `langchain-community` with `pip install -qU langchain-community` to use this integration"
+    "\n",
+    "## Setup\n",
+    "\n",
+    "To use the `VectaraVectorStore` you first need to install the partner package.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dfdf03ba-d6f5-4b1e-86d3-a65c4bc99aa1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!uv pip install -U pip && uv pip install -qU langchain-vectara"
    ]
   },
   {
@@ -32,14 +47,14 @@
     "# Getting Started\n",
     "\n",
     "To get started, use the following steps:\n",
-    "1. If you don't already have one, [Sign up](https://www.vectara.com/integrations/langchain) for your free Vectara trial. Once you have completed your sign up you will have a Vectara customer ID. You can find your customer ID by clicking on your name, on the top-right of the Vectara console window.\n",
+    "1. If you don't already have one, [Sign up](https://www.vectara.com/integrations/langchain) for your free Vectara trial.\n",
     "2. Within your account you can create one or more corpora. Each corpus represents an area that stores text data upon ingest from input documents. To create a corpus, use the **\"Create Corpus\"** button. You then provide a name to your corpus as well as a description. Optionally you can define filtering attributes and apply some advanced options. If you click on your created corpus, you can see its name and corpus ID right on the top.\n",
     "3. Next you'll need to create API keys to access the corpus. Click on the **\"Access Control\"** tab in the corpus view and then the **\"Create API Key\"** button. Give your key a name, and choose whether you want query-only or query+index for your key. Click \"Create\" and you now have an active API key. Keep this key confidential. \n",
     "\n",
-    "To use LangChain with Vectara, you'll need to have these three values: `customer ID`, `corpus ID` and `api_key`.\n",
-    "You can provide those to LangChain in two ways:\n",
+    "To use LangChain with Vectara, you'll need to have these two values: `corpus_key` and `api_key`.\n",
+    "You can provide `VECTARA_API_KEY` to LangChain in two ways:\n",
     "\n",
-    "1. Include in your environment these three variables: `VECTARA_CUSTOMER_ID`, `VECTARA_CORPUS_ID` and `VECTARA_API_KEY`.\n",
+    "1. Include in your environment these two variables: `VECTARA_API_KEY`.\n",
     "\n",
     "   For example, you can set these variables using os.environ and getpass as follows:\n",
     "\n",
@@ -47,8 +62,6 @@
     "import os\n",
     "import getpass\n",
     "\n",
-    "os.environ[\"VECTARA_CUSTOMER_ID\"] = getpass.getpass(\"Vectara Customer ID:\")\n",
-    "os.environ[\"VECTARA_CORPUS_ID\"] = getpass.getpass(\"Vectara Corpus ID:\")\n",
     "os.environ[\"VECTARA_API_KEY\"] = getpass.getpass(\"Vectara API Key:\")\n",
     "```\n",
     "\n",
@@ -56,10 +69,8 @@
     "\n",
     "```python\n",
     "vectara = Vectara(\n",
-    "                vectara_customer_id=vectara_customer_id,\n",
-    "                vectara_corpus_id=vectara_corpus_id,\n",
-    "                vectara_api_key=vectara_api_key\n",
-    "            )\n",
+    "    vectara_api_key=vectara_api_key\n",
+    ")\n",
     "```\n",
     "\n",
     "In this notebook we assume they are provided in the environment."
@@ -67,23 +78,29 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 4,
    "id": "aac7a9a6",
    "metadata": {},
    "outputs": [],
    "source": [
     "import os\n",
     "\n",
-    "os.environ[\"VECTARA_API_KEY\"] = \"<YOUR_VECTARA_API_KEY>\"\n",
-    "os.environ[\"VECTARA_CORPUS_ID\"] = \"<YOUR_VECTARA_CORPUS_ID>\"\n",
-    "os.environ[\"VECTARA_CUSTOMER_ID\"] = \"<YOUR_VECTARA_CUSTOMER_ID>\"\n",
+    "os.environ[\"VECTARA_API_KEY\"] = \"<VECTARA_API_KEY>\"\n",
+    "os.environ[\"VECTARA_CORPUS_KEY\"] = \"VECTARA_CORPUS_KEY\"\n",
     "\n",
-    "from langchain_community.vectorstores import Vectara\n",
-    "from langchain_community.vectorstores.vectara import (\n",
-    "    RerankConfig,\n",
-    "    SummaryConfig,\n",
+    "from langchain_vectara import Vectara\n",
+    "from langchain_vectara.vectorstores import (\n",
+    "    ChainReranker,\n",
+    "    CorpusConfig,\n",
+    "    CustomerSpecificReranker,\n",
+    "    File,\n",
+    "    GenerationConfig,\n",
+    "    MmrReranker,\n",
+    "    SearchConfig,\n",
     "    VectaraQueryConfig,\n",
-    ")"
+    ")\n",
+    "\n",
+    "vectara = Vectara(vectara_api_key=os.getenv(\"VECTARA_API_KEY\"))"
    ]
   },
   {
@@ -91,21 +108,37 @@
    "id": "875ffb7e",
    "metadata": {},
    "source": [
-    "First we load the state-of-the-union text into Vectara. \n",
+    "First we load the state-of-the-union text into Vectara.\n",
     "\n",
-    "Note that we use the `from_files` interface which does not require any local processing or chunking - Vectara receives the file content and performs all the necessary pre-processing, chunking and embedding of the file into its knowledge store.\n",
+    "Note that we use the add_files interface which does not require any local processing or chunking - Vectara receives the file content and performs all the necessary pre-processing, chunking and embedding of the file into its knowledge store.\n",
     "\n",
-    "In this case it uses a `.txt` file but the same works for many other [file types](https://docs.vectara.com/docs/api-reference/indexing-apis/file-upload/file-upload-filetypes)."
+    "In this case it uses a .txt file but the same works for many other [file types](https://docs.vectara.com/docs/api-reference/indexing-apis/file-upload/file-upload-filetypes)."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 5,
    "id": "be0a4973",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['state_of_the_union.txt']"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "vectara = Vectara.from_files([\"state_of_the_union.txt\"])"
+    "corpus_key = os.getenv(\"VECTARA_CORPUS_KEY\")\n",
+    "file_obj = File(\n",
+    "    file_path=\"../document_loaders/example_data/state_of_the_union.txt\",\n",
+    "    metadata={\"source\": \"text_file\"},\n",
+    ")\n",
+    "vectara.add_files([file_obj], corpus_key)"
    ]
   },
   {
@@ -113,38 +146,52 @@
    "id": "22a6b953",
    "metadata": {},
    "source": [
-    "## Basic Vectara RAG (retrieval augmented generation)\n",
+    "## Vectara RAG (retrieval augmented generation)\n",
     "\n",
     "We now create a `VectaraQueryConfig` object to control the retrieval and summarization options:\n",
     "* We enable summarization, specifying we would like the LLM to pick the top 7 matching chunks and respond in English\n",
-    "* We enable MMR (max marginal relevance) in the retrieval process, with a 0.2 diversity bias factor\n",
-    "* We want the top-10 results, with hybrid search configured with a value of 0.025\n",
     "\n",
     "Using this configuration, let's create a LangChain `Runnable` object that encpasulates the full Vectara RAG pipeline, using the `as_rag` method:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 6,
    "id": "9ecda054-96a8-4a91-aeae-32006efb1ac8",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "\"Biden addressed various topics in his statements. He highlighted the need to confront Putin by building a coalition of nations[1]. He also expressed commitment to investigating the impact of burn pits on soldiers' health, including his son's case[2]. Additionally, Biden outlined a plan to fight inflation by cutting prescription drug costs[3]. He emphasized the importance of continuing to combat COVID-19 and not just accepting living with it[4]. Furthermore, he discussed measures to weaken Russia economically and target Russian oligarchs[6]. Biden also advocated for passing the Equality Act to support LGBTQ+ Americans and condemned state laws targeting transgender individuals[7].\""
+       "\"President Biden discussed several key issues in his recent statements. He emphasized the importance of keeping schools open and noted that with a high vaccination rate and reduced hospitalizations, most Americans can safely return to normal activities without masks [1]. He addressed the need to hold social media platforms accountable for their impact on children and called for stronger privacy protections and mental health services [2]. Biden also announced measures against Russian oligarchs, including closing American airspace to Russian flights and targeting their assets, as part of efforts to weaken Russia's economy [3], [7]. Additionally, he reaffirmed the need to protect women's rights, particularly the right to choose as affirmed in Roe v. Wade [5].\""
       ]
      },
-     "execution_count": 3,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "summary_config = SummaryConfig(is_enabled=True, max_results=7, response_lang=\"eng\")\n",
-    "rerank_config = RerankConfig(reranker=\"mmr\", rerank_k=50, mmr_diversity_bias=0.2)\n",
+    "generation_config = GenerationConfig(\n",
+    "    max_used_search_results=7,\n",
+    "    response_language=\"eng\",\n",
+    "    generation_preset_name=\"vectara-summary-ext-24-05-med-omni\",\n",
+    "    enable_factual_consistency_score=True,\n",
+    ")\n",
+    "search_config = SearchConfig(\n",
+    "    corpora=[CorpusConfig(corpus_key=corpus_key)],\n",
+    "    limit=25,\n",
+    "    reranker=ChainReranker(\n",
+    "        rerankers=[\n",
+    "            CustomerSpecificReranker(reranker_id=\"rnk_272725719\", limit=100),\n",
+    "            MmrReranker(diversity_bias=0.2, limit=100),\n",
+    "        ]\n",
+    "    ),\n",
+    ")\n",
+    "\n",
     "config = VectaraQueryConfig(\n",
-    "    k=10, lambda_val=0.005, rerank_config=rerank_config, summary_config=summary_config\n",
+    "    search=search_config,\n",
+    "    generation=generation_config,\n",
     ")\n",
     "\n",
     "query_str = \"what did Biden say?\"\n",
@@ -163,7 +210,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 7,
    "id": "27f01330-8917-4eff-b603-59ab2571a4d2",
    "metadata": {},
    "outputs": [
@@ -171,7 +218,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Biden addressed various topics in his statements. He highlighted the importance of building coalitions to confront global challenges [1]. He also expressed commitment to investigating the impact of burn pits on soldiers' health, including his son's case [2, 4]. Additionally, Biden outlined his plan to combat inflation by cutting prescription drug costs and reducing the deficit, with support from Nobel laureates and business leaders [3]. He emphasized the ongoing fight against COVID-19 and the need to continue combating the virus [5]. Furthermore, Biden discussed measures taken to weaken Russia's economic and military strength, targeting Russian oligarchs and corrupt leaders [6]. He also advocated for passing the Equality Act to support LGBTQ+ Americans and address discriminatory state laws [7]."
+      "President Biden discussed several key issues in his recent statements. He emphasized the importance of keeping schools open and noted that with a high vaccination rate and reduced hospitalizations, most Americans can safely return to normal activities without masks [1]. He addressed the need to hold social media platforms accountable for their impact on children and called for stronger privacy protections and mental health services [2]. Biden also announced measures against Russia, including preventing its central bank from defending the Ruble and targeting Russian oligarchs' assets, as part of efforts to weaken Russia's economy and military [3]. Additionally, he reaffirmed the commitment to protect women's rights, particularly the right to choose as affirmed in Roe v. Wade [5]. Lastly, he advocated for funding the police with necessary resources and training to ensure community safety [6]."
      ]
     }
    ],
@@ -203,7 +250,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 8,
    "id": "b2e0aa2c-7c8e-4d79-8abc-66f5a1f961b3",
    "metadata": {},
    "outputs": [
@@ -211,19 +258,12 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Biden addressed various topics in his statements. He highlighted the need to confront Putin by building a coalition of nations[1]. He also expressed his commitment to investigating the impact of burn pits on soldiers' health, referencing his son's experience[2]. Additionally, Biden discussed his plan to fight inflation by cutting prescription drug costs and garnering support from Nobel laureates and business leaders[4]. Furthermore, he emphasized the importance of continuing to combat COVID-19 and not merely accepting living with the virus[5]. Biden's remarks encompassed international relations, healthcare challenges faced by soldiers, economic strategies, and the ongoing battle against the pandemic.\n",
-      "Vectara FCS = 0.41796625\n"
+      "President Biden discussed several key topics in his recent statements. He emphasized the importance of keeping schools open and noted that with a high vaccination rate and reduced hospitalizations, most Americans can safely return to normal activities without masks [1]. He addressed the need to hold social media platforms accountable for their impact on children and called for stronger privacy protections and mental health services [2]. Biden also announced measures against Russian oligarchs, including closing American airspace to Russian flights and targeting their assets, as part of efforts to weaken Russia's economy [3], [7]. Additionally, he reaffirmed the need to protect women's rights, particularly the right to choose as affirmed in Roe v. Wade [5].\n",
+      "Vectara FCS = 0.61621094\n"
      ]
     }
    ],
    "source": [
-    "summary_config = SummaryConfig(is_enabled=True, max_results=5, response_lang=\"eng\")\n",
-    "rerank_config = RerankConfig(reranker=\"mmr\", rerank_k=50, mmr_diversity_bias=0.1)\n",
-    "config = VectaraQueryConfig(\n",
-    "    k=10, lambda_val=0.005, rerank_config=rerank_config, summary_config=summary_config\n",
-    ")\n",
-    "\n",
-    "rag = vectara.as_rag(config)\n",
     "resp = rag.invoke(query_str)\n",
     "print(resp[\"answer\"])\n",
     "print(f\"Vectara FCS = {resp['fcs']}\")"
@@ -243,26 +283,28 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 9,
    "id": "19cd2f86",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "[Document(page_content='He thought the West and NATO wouldn’t respond. And he thought he could divide us at home. We were ready.  Here is what we did. We prepared extensively and carefully. We spent months building a coalition of other freedom-loving nations from Europe and the Americas to Asia and Africa to confront Putin.', metadata={'lang': 'eng', 'section': '1', 'offset': '2160', 'len': '36', 'X-TIKA:Parsed-By': 'org.apache.tika.parser.csv.TextAndCSVParser', 'Content-Encoding': 'UTF-8', 'Content-Type': 'text/plain; charset=UTF-8', 'source': 'vectara'}),\n",
-       " Document(page_content='When they came home, many of the world’s fittest and best trained warriors were never the same. Dizziness. \\n\\nA cancer that would put them in a flag-draped coffin. I know. \\n\\nOne of those soldiers was my son Major Beau Biden. We don’t know for sure if a burn pit was the cause of his brain cancer, or the diseases of so many of our troops. But I’m committed to finding out everything we can.', metadata={'lang': 'eng', 'section': '1', 'offset': '34652', 'len': '60', 'X-TIKA:Parsed-By': 'org.apache.tika.parser.csv.TextAndCSVParser', 'Content-Encoding': 'UTF-8', 'Content-Type': 'text/plain; charset=UTF-8', 'source': 'vectara'}),\n",
-       " Document(page_content='But cancer from prolonged exposure to burn pits ravaged Heath’s lungs and body. Danielle says Heath was a fighter to the very end. He didn’t know how to stop fighting, and neither did she. Through her pain she found purpose to demand we do better. Tonight, Danielle—we are.', metadata={'lang': 'eng', 'section': '1', 'offset': '35442', 'len': '57', 'X-TIKA:Parsed-By': 'org.apache.tika.parser.csv.TextAndCSVParser', 'Content-Encoding': 'UTF-8', 'Content-Type': 'text/plain; charset=UTF-8', 'source': 'vectara'})]"
+       "[Document(metadata={'X-TIKA:Parsed-By': 'org.apache.tika.parser.csv.TextAndCSVParser', 'Content-Encoding': 'UTF-8', 'X-TIKA:detectedEncoding': 'UTF-8', 'X-TIKA:encodingDetector': 'UniversalEncodingDetector', 'Content-Type': 'text/plain; charset=UTF-8', 'source': 'text_file', 'framework': 'langchain'}, page_content='The U.S. Department of Justice is assembling a dedicated task force to go after the crimes of Russian oligarchs. We are joining with our European allies to find and seize your yachts your luxury apartments your private jets. We are coming for your ill-begotten gains. And tonight I am announcing that we will join our allies in closing off American air space to all Russian flights – further isolating Russia – and adding an additional squeeze –on their economy. The Ruble has lost 30% of its value.'),\n",
+       " Document(metadata={'X-TIKA:Parsed-By': 'org.apache.tika.parser.csv.TextAndCSVParser', 'Content-Encoding': 'UTF-8', 'X-TIKA:detectedEncoding': 'UTF-8', 'X-TIKA:encodingDetector': 'UniversalEncodingDetector', 'Content-Type': 'text/plain; charset=UTF-8', 'source': 'text_file', 'framework': 'langchain'}, page_content='When they came home, many of the world’s fittest and best trained warriors were never the same. Dizziness. \\n\\nA cancer that would put them in a flag-draped coffin. I know. \\n\\nOne of those soldiers was my son Major Beau Biden. We don’t know for sure if a burn pit was the cause of his brain cancer, or the diseases of so many of our troops. But I’m committed to finding out everything we can.'),\n",
+       " Document(metadata={'X-TIKA:Parsed-By': 'org.apache.tika.parser.csv.TextAndCSVParser', 'Content-Encoding': 'UTF-8', 'X-TIKA:detectedEncoding': 'UTF-8', 'X-TIKA:encodingDetector': 'UniversalEncodingDetector', 'Content-Type': 'text/plain; charset=UTF-8', 'source': 'text_file', 'framework': 'langchain'}, page_content='He rejected repeated efforts at diplomacy. He thought the West and NATO wouldn’t respond. And he thought he could divide us at home. We were ready.  Here is what we did. We prepared extensively and carefully.'),\n",
+       " Document(metadata={'X-TIKA:Parsed-By': 'org.apache.tika.parser.csv.TextAndCSVParser', 'Content-Encoding': 'UTF-8', 'X-TIKA:detectedEncoding': 'UTF-8', 'X-TIKA:encodingDetector': 'UniversalEncodingDetector', 'Content-Type': 'text/plain; charset=UTF-8', 'source': 'text_file', 'framework': 'langchain'}, page_content='And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson.'),\n",
+       " Document(metadata={'X-TIKA:Parsed-By': 'org.apache.tika.parser.csv.TextAndCSVParser', 'Content-Encoding': 'UTF-8', 'X-TIKA:detectedEncoding': 'UTF-8', 'X-TIKA:encodingDetector': 'UniversalEncodingDetector', 'Content-Type': 'text/plain; charset=UTF-8', 'source': 'text_file', 'framework': 'langchain'}, page_content='Putin’s latest attack on Ukraine was premeditated and unprovoked. He rejected repeated efforts at diplomacy. He thought the West and NATO wouldn’t respond. And he thought he could divide us at home. We were ready.  Here is what we did.')]"
       ]
      },
-     "execution_count": 6,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "config.summary_config.is_enabled = False\n",
-    "config.k = 3\n",
+    "config.generation = None\n",
+    "config.search.limit = 5\n",
     "retriever = vectara.as_retriever(config=config)\n",
     "retriever.invoke(query_str)"
    ]
@@ -277,27 +319,34 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 10,
    "id": "59268e9a-6089-4bb2-8c61-1ea6b956f83c",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "[Document(page_content='He thought the West and NATO wouldn’t respond. And he thought he could divide us at home. We were ready.  Here is what we did. We prepared extensively and carefully. We spent months building a coalition of other freedom-loving nations from Europe and the Americas to Asia and Africa to confront Putin.', metadata={'lang': 'eng', 'section': '1', 'offset': '2160', 'len': '36', 'X-TIKA:Parsed-By': 'org.apache.tika.parser.csv.TextAndCSVParser', 'Content-Encoding': 'UTF-8', 'Content-Type': 'text/plain; charset=UTF-8', 'source': 'vectara'}),\n",
-       " Document(page_content='When they came home, many of the world’s fittest and best trained warriors were never the same. Dizziness. \\n\\nA cancer that would put them in a flag-draped coffin. I know. \\n\\nOne of those soldiers was my son Major Beau Biden. We don’t know for sure if a burn pit was the cause of his brain cancer, or the diseases of so many of our troops. But I’m committed to finding out everything we can.', metadata={'lang': 'eng', 'section': '1', 'offset': '34652', 'len': '60', 'X-TIKA:Parsed-By': 'org.apache.tika.parser.csv.TextAndCSVParser', 'Content-Encoding': 'UTF-8', 'Content-Type': 'text/plain; charset=UTF-8', 'source': 'vectara'}),\n",
-       " Document(page_content='But cancer from prolonged exposure to burn pits ravaged Heath’s lungs and body. Danielle says Heath was a fighter to the very end. He didn’t know how to stop fighting, and neither did she. Through her pain she found purpose to demand we do better. Tonight, Danielle—we are.', metadata={'lang': 'eng', 'section': '1', 'offset': '35442', 'len': '57', 'X-TIKA:Parsed-By': 'org.apache.tika.parser.csv.TextAndCSVParser', 'Content-Encoding': 'UTF-8', 'Content-Type': 'text/plain; charset=UTF-8', 'source': 'vectara'}),\n",
-       " Document(page_content=\"Biden discussed various topics in his statements. He highlighted the importance of unity and preparation to confront challenges, such as building coalitions to address global issues [1]. Additionally, he shared personal stories about the impact of health issues on soldiers, including his son's experience with brain cancer possibly linked to burn pits [2]. Biden also outlined his plans to combat inflation by cutting prescription drug costs and emphasized the ongoing efforts to combat COVID-19, rejecting the idea of merely living with the virus [4, 5]. Overall, Biden's messages revolved around unity, healthcare challenges faced by soldiers, economic plans, and the ongoing fight against COVID-19.\", metadata={'summary': True, 'fcs': 0.54751414})]"
+       "[Document(metadata={'X-TIKA:Parsed-By': 'org.apache.tika.parser.csv.TextAndCSVParser', 'Content-Encoding': 'UTF-8', 'X-TIKA:detectedEncoding': 'UTF-8', 'X-TIKA:encodingDetector': 'UniversalEncodingDetector', 'Content-Type': 'text/plain; charset=UTF-8', 'source': 'text_file', 'framework': 'langchain'}, page_content='We won’t be able to compete for the jobs of the 21st Century if we don’t fix that. That’s why it was so important to pass the Bipartisan Infrastructure Law—the most sweeping investment to rebuild America in history. This was a bipartisan effort, and I want to thank the members of both parties who worked to make it happen. We’re done talking about infrastructure weeks. We’re going to have an infrastructure decade.'),\n",
+       " Document(metadata={'X-TIKA:Parsed-By': 'org.apache.tika.parser.csv.TextAndCSVParser', 'Content-Encoding': 'UTF-8', 'X-TIKA:detectedEncoding': 'UTF-8', 'X-TIKA:encodingDetector': 'UniversalEncodingDetector', 'Content-Type': 'text/plain; charset=UTF-8', 'source': 'text_file', 'framework': 'langchain'}, page_content='The U.S. Department of Justice is assembling a dedicated task force to go after the crimes of Russian oligarchs. We are joining with our European allies to find and seize your yachts your luxury apartments your private jets. We are coming for your ill-begotten gains. And tonight I am announcing that we will join our allies in closing off American air space to all Russian flights – further isolating Russia – and adding an additional squeeze –on their economy. The Ruble has lost 30% of its value.'),\n",
+       " Document(metadata={'X-TIKA:Parsed-By': 'org.apache.tika.parser.csv.TextAndCSVParser', 'Content-Encoding': 'UTF-8', 'X-TIKA:detectedEncoding': 'UTF-8', 'X-TIKA:encodingDetector': 'UniversalEncodingDetector', 'Content-Type': 'text/plain; charset=UTF-8', 'source': 'text_file', 'framework': 'langchain'}, page_content='When they came home, many of the world’s fittest and best trained warriors were never the same. Dizziness. \\n\\nA cancer that would put them in a flag-draped coffin. I know. \\n\\nOne of those soldiers was my son Major Beau Biden. We don’t know for sure if a burn pit was the cause of his brain cancer, or the diseases of so many of our troops. But I’m committed to finding out everything we can.'),\n",
+       " Document(metadata={'X-TIKA:Parsed-By': 'org.apache.tika.parser.csv.TextAndCSVParser', 'Content-Encoding': 'UTF-8', 'X-TIKA:detectedEncoding': 'UTF-8', 'X-TIKA:encodingDetector': 'UniversalEncodingDetector', 'Content-Type': 'text/plain; charset=UTF-8', 'source': 'text_file', 'framework': 'langchain'}, page_content='Preventing Russia’s central bank from defending the Russian Ruble making Putin’s $630 Billion “war fund” worthless. We are choking off Russia’s access to technology that will sap its economic strength and weaken its military for years to come. Tonight I say to the Russian oligarchs and corrupt leaders who have bilked billions of dollars off this violent regime no more. The U.S. Department of Justice is assembling a dedicated task force to go after the crimes of Russian oligarchs. We are joining with our European allies to find and seize your yachts your luxury apartments your private jets.'),\n",
+       " Document(metadata={'X-TIKA:Parsed-By': 'org.apache.tika.parser.csv.TextAndCSVParser', 'Content-Encoding': 'UTF-8', 'X-TIKA:detectedEncoding': 'UTF-8', 'X-TIKA:encodingDetector': 'UniversalEncodingDetector', 'Content-Type': 'text/plain; charset=UTF-8', 'source': 'text_file', 'framework': 'langchain'}, page_content='He rejected repeated efforts at diplomacy. He thought the West and NATO wouldn’t respond. And he thought he could divide us at home. We were ready.  Here is what we did. We prepared extensively and carefully.'),\n",
+       " Document(metadata={'X-TIKA:Parsed-By': 'org.apache.tika.parser.csv.TextAndCSVParser', 'Content-Encoding': 'UTF-8', 'X-TIKA:detectedEncoding': 'UTF-8', 'X-TIKA:encodingDetector': 'UniversalEncodingDetector', 'Content-Type': 'text/plain; charset=UTF-8', 'source': 'text_file', 'framework': 'langchain'}, page_content='It delivered immediate economic relief for tens of millions of Americans. Helped put food on their table, keep a roof over their heads, and cut the cost of health insurance. And as my Dad used to say, it gave people a little breathing room. And unlike the $2 Trillion tax cut passed in the previous administration that benefitted the top 1% of Americans, the American Rescue Plan helped working people—and left no one behind. Lots of jobs. \\n\\nIn fact—our economy created over 6.5 Million new jobs just last year, more jobs created in one year  \\nthan ever before in the history of America.'),\n",
+       " Document(metadata={'X-TIKA:Parsed-By': 'org.apache.tika.parser.csv.TextAndCSVParser', 'Content-Encoding': 'UTF-8', 'X-TIKA:detectedEncoding': 'UTF-8', 'X-TIKA:encodingDetector': 'UniversalEncodingDetector', 'Content-Type': 'text/plain; charset=UTF-8', 'source': 'text_file', 'framework': 'langchain'}, page_content='And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson.'),\n",
+       " Document(metadata={'X-TIKA:Parsed-By': 'org.apache.tika.parser.csv.TextAndCSVParser', 'Content-Encoding': 'UTF-8', 'X-TIKA:detectedEncoding': 'UTF-8', 'X-TIKA:encodingDetector': 'UniversalEncodingDetector', 'Content-Type': 'text/plain; charset=UTF-8', 'source': 'text_file', 'framework': 'langchain'}, page_content='All told, we created 369,000 new manufacturing jobs in America just last year. Powered by people I’ve met like JoJo Burgess, from generations of union steelworkers from Pittsburgh, who’s here with us tonight. As Ohio Senator Sherrod Brown says, “It’s time to bury the label “Rust Belt.” It’s time. \\n\\nBut with all the bright spots in our economy, record job growth and higher wages, too many families are struggling to keep up with the bills. Inflation is robbing them of the gains they might otherwise feel.'),\n",
+       " Document(metadata={'X-TIKA:Parsed-By': 'org.apache.tika.parser.csv.TextAndCSVParser', 'Content-Encoding': 'UTF-8', 'X-TIKA:detectedEncoding': 'UTF-8', 'X-TIKA:encodingDetector': 'UniversalEncodingDetector', 'Content-Type': 'text/plain; charset=UTF-8', 'source': 'text_file', 'framework': 'langchain'}, page_content='Putin’s latest attack on Ukraine was premeditated and unprovoked. He rejected repeated efforts at diplomacy. He thought the West and NATO wouldn’t respond. And he thought he could divide us at home. We were ready.  Here is what we did.'),\n",
+       " Document(metadata={'X-TIKA:Parsed-By': 'org.apache.tika.parser.csv.TextAndCSVParser', 'Content-Encoding': 'UTF-8', 'X-TIKA:detectedEncoding': 'UTF-8', 'X-TIKA:encodingDetector': 'UniversalEncodingDetector', 'Content-Type': 'text/plain; charset=UTF-8', 'source': 'text_file', 'framework': 'langchain'}, page_content='Danielle says Heath was a fighter to the very end. He didn’t know how to stop fighting, and neither did she. Through her pain she found purpose to demand we do better. Tonight, Danielle—we are. The VA is pioneering new ways of linking toxic exposures to diseases, already helping more veterans get benefits.'),\n",
+       " Document(metadata={'summary': True, 'fcs': (0.54785156,)}, page_content='President Biden spoke about several key issues. He emphasized the importance of the Bipartisan Infrastructure Law, calling it the most significant investment to rebuild America and highlighting it as a bipartisan effort [1]. He also announced measures against Russian oligarchs, including assembling a task force to seize their assets and closing American airspace to Russian flights, further isolating Russia economically [2]. Additionally, he expressed a commitment to investigating the health impacts of burn pits on military personnel, referencing his son, Major Beau Biden, who suffered from brain cancer [3].')]"
       ]
      },
-     "execution_count": 7,
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "config.summary_config.is_enabled = True\n",
-    "config.k = 3\n",
+    "config.generation = GenerationConfig()\n",
+    "config.search.limit = 10\n",
     "retriever = vectara.as_retriever(config=config)\n",
     "retriever.invoke(query_str)"
    ]
@@ -316,17 +365,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 14,
    "id": "e14325b9",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "\"Biden's statement highlighted his efforts to unite freedom-loving nations against Putin's aggression, sharing information in advance to counter Russian lies and hold Putin accountable[1]. Additionally, he emphasized his commitment to military families, like Danielle Robinson, and outlined plans for more affordable housing, Pre-K for 3- and 4-year-olds, and ensuring no additional taxes for those earning less than $400,000 a year[2][3]. The statement also touched on the readiness of the West and NATO to respond to Putin's actions, showcasing extensive preparation and coalition-building efforts[4]. Heath Robinson's story, a combat medic who succumbed to cancer from burn pits, was used to illustrate the resilience and fight for better conditions[5].\""
+       "'The remarks made by Biden include his emphasis on the importance of the Bipartisan Infrastructure Law, which he describes as the most significant investment to rebuild America in history. He highlights the bipartisan effort involved in passing this law and expresses gratitude to members of both parties for their collaboration. Biden also mentions the transition from \"infrastructure weeks\" to an \"infrastructure decade\" [1]. Additionally, he shares a personal story about his father having to leave their home in Scranton, Pennsylvania, to find work, which influenced his decision to fight for the American Rescue Plan to help those in need [2].'"
       ]
      },
-     "execution_count": 8,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -371,7 +420,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.8"
+   "version": "3.12.0"
   }
  },
  "nbformat": 4,

From 8119a7bc5c6379064eecf2890539fab90e68d8f7 Mon Sep 17 00:00:00 2001
From: ccurme <chester.curme@gmail.com>
Date: Wed, 26 Mar 2025 15:16:37 -0400
Subject: [PATCH 11/30] openai[patch]: support streaming token counts in
 AzureChatOpenAI (#30494)

When OpenAI originally released `stream_options` to enable token usage
during streaming, it was not supported in AzureOpenAI. It is now
supported.

Like the [OpenAI
SDK](https://github.com/openai/openai-python/blob/f66d2e6fdc51c4528c99bb25a8fbca6f9b9b872d/src/openai/resources/completions.py#L68),
ChatOpenAI does not return usage metadata during streaming by default
(which adds an extra chunk to the stream). The OpenAI SDK requires users
to pass `stream_options={"include_usage": True}`. ChatOpenAI implements
a convenience argument `stream_usage: Optional[bool]`, and an attribute
`stream_usage: bool = False`.

Here we extend this to AzureChatOpenAI by moving the `stream_usage`
attribute and `stream_usage` kwarg (on `_(a)stream`) from ChatOpenAI to
BaseChatOpenAI.

---

Additional consideration: we must be sensitive to the number of users
using BaseChatOpenAI to interact with other APIs that do not support the
`stream_options` parameter.

Suppose OpenAI in the future updates the default behavior to stream
token usage. Currently, BaseChatOpenAI only passes `stream_options` if
`stream_usage` is True, so there would be no way to disable this new
default behavior.

To address this, we could update the `stream_usage` attribute to
`Optional[bool] = None`, but this is technically a breaking change (as
currently values of False are not passed to the client). IMO: if / when
this change happens, we could accompany it with this update in a minor
bump.

---

Related previous PRs:
- https://github.com/langchain-ai/langchain/pull/22628
- https://github.com/langchain-ai/langchain/pull/22854
- https://github.com/langchain-ai/langchain/pull/23552

---------

Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com>
---
 .../langchain_openai/chat_models/base.py      | 82 +++++++++----------
 .../chat_models/test_azure_standard.py        | 11 +--
 2 files changed, 41 insertions(+), 52 deletions(-)

diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py
index 4e7faf2c0ed..ce9c25cb6cc 100644
--- a/libs/partners/openai/langchain_openai/chat_models/base.py
+++ b/libs/partners/openai/langchain_openai/chat_models/base.py
@@ -456,6 +456,12 @@ class BaseChatOpenAI(BaseChatModel):
     )
     """Timeout for requests to OpenAI completion API. Can be float, httpx.Timeout or 
         None."""
+    stream_usage: bool = False
+    """Whether to include usage metadata in streaming output. If True, an additional
+    message chunk will be generated during the stream including usage metadata.
+
+    .. versionadded:: 0.3.9
+    """
     max_retries: Optional[int] = None
     """Maximum number of retries to make when generating."""
     presence_penalty: Optional[float] = None
@@ -811,14 +817,38 @@ class BaseChatOpenAI(BaseChatModel):
                     is_first_chunk = False
                     yield generation_chunk
 
+    def _should_stream_usage(
+        self, stream_usage: Optional[bool] = None, **kwargs: Any
+    ) -> bool:
+        """Determine whether to include usage metadata in streaming output.
+
+        For backwards compatibility, we check for `stream_options` passed
+        explicitly to kwargs or in the model_kwargs and override self.stream_usage.
+        """
+        stream_usage_sources = [  # order of precedence
+            stream_usage,
+            kwargs.get("stream_options", {}).get("include_usage"),
+            self.model_kwargs.get("stream_options", {}).get("include_usage"),
+            self.stream_usage,
+        ]
+        for source in stream_usage_sources:
+            if isinstance(source, bool):
+                return source
+        return self.stream_usage
+
     def _stream(
         self,
         messages: List[BaseMessage],
         stop: Optional[List[str]] = None,
         run_manager: Optional[CallbackManagerForLLMRun] = None,
+        *,
+        stream_usage: Optional[bool] = None,
         **kwargs: Any,
     ) -> Iterator[ChatGenerationChunk]:
         kwargs["stream"] = True
+        stream_usage = self._should_stream_usage(stream_usage, **kwargs)
+        if stream_usage:
+            kwargs["stream_options"] = {"include_usage": stream_usage}
         payload = self._get_request_payload(messages, stop=stop, **kwargs)
         default_chunk_class: Type[BaseMessageChunk] = AIMessageChunk
         base_generation_info = {}
@@ -1005,9 +1035,14 @@ class BaseChatOpenAI(BaseChatModel):
         messages: List[BaseMessage],
         stop: Optional[List[str]] = None,
         run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
+        *,
+        stream_usage: Optional[bool] = None,
         **kwargs: Any,
     ) -> AsyncIterator[ChatGenerationChunk]:
         kwargs["stream"] = True
+        stream_usage = self._should_stream_usage(stream_usage, **kwargs)
+        if stream_usage:
+            kwargs["stream_options"] = {"include_usage": stream_usage}
         payload = self._get_request_payload(messages, stop=stop, **kwargs)
         default_chunk_class: Type[BaseMessageChunk] = AIMessageChunk
         base_generation_info = {}
@@ -2202,11 +2237,6 @@ class ChatOpenAI(BaseChatOpenAI):  # type: ignore[override]
 
     """  # noqa: E501
 
-    stream_usage: bool = False
-    """Whether to include usage metadata in streaming output. If True, additional
-    message chunks will be generated during the stream including usage metadata.
-    """
-
     max_tokens: Optional[int] = Field(default=None, alias="max_completion_tokens")
     """Maximum number of tokens to generate."""
 
@@ -2268,55 +2298,21 @@ class ChatOpenAI(BaseChatOpenAI):  # type: ignore[override]
                     message["role"] = "developer"
         return payload
 
-    def _should_stream_usage(
-        self, stream_usage: Optional[bool] = None, **kwargs: Any
-    ) -> bool:
-        """Determine whether to include usage metadata in streaming output.
-
-        For backwards compatibility, we check for `stream_options` passed
-        explicitly to kwargs or in the model_kwargs and override self.stream_usage.
-        """
-        stream_usage_sources = [  # order of preference
-            stream_usage,
-            kwargs.get("stream_options", {}).get("include_usage"),
-            self.model_kwargs.get("stream_options", {}).get("include_usage"),
-            self.stream_usage,
-        ]
-        for source in stream_usage_sources:
-            if isinstance(source, bool):
-                return source
-        return self.stream_usage
-
-    def _stream(
-        self, *args: Any, stream_usage: Optional[bool] = None, **kwargs: Any
-    ) -> Iterator[ChatGenerationChunk]:
-        """Set default stream_options."""
+    def _stream(self, *args: Any, **kwargs: Any) -> Iterator[ChatGenerationChunk]:
+        """Route to Chat Completions or Responses API."""
         if self._use_responses_api({**kwargs, **self.model_kwargs}):
             return super()._stream_responses(*args, **kwargs)
         else:
-            stream_usage = self._should_stream_usage(stream_usage, **kwargs)
-            # Note: stream_options is not a valid parameter for Azure OpenAI.
-            # To support users proxying Azure through ChatOpenAI, here we only specify
-            # stream_options if include_usage is set to True.
-            # See https://learn.microsoft.com/en-us/azure/ai-services/openai/whats-new
-            # for release notes.
-            if stream_usage:
-                kwargs["stream_options"] = {"include_usage": stream_usage}
-
             return super()._stream(*args, **kwargs)
 
     async def _astream(
-        self, *args: Any, stream_usage: Optional[bool] = None, **kwargs: Any
+        self, *args: Any, **kwargs: Any
     ) -> AsyncIterator[ChatGenerationChunk]:
-        """Set default stream_options."""
+        """Route to Chat Completions or Responses API."""
         if self._use_responses_api({**kwargs, **self.model_kwargs}):
             async for chunk in super()._astream_responses(*args, **kwargs):
                 yield chunk
         else:
-            stream_usage = self._should_stream_usage(stream_usage, **kwargs)
-            if stream_usage:
-                kwargs["stream_options"] = {"include_usage": stream_usage}
-
             async for chunk in super()._astream(*args, **kwargs):
                 yield chunk
 
diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_azure_standard.py b/libs/partners/openai/tests/integration_tests/chat_models/test_azure_standard.py
index f5820794bb3..b87be33b30e 100644
--- a/libs/partners/openai/tests/integration_tests/chat_models/test_azure_standard.py
+++ b/libs/partners/openai/tests/integration_tests/chat_models/test_azure_standard.py
@@ -3,7 +3,6 @@
 import os
 from typing import Type
 
-import pytest
 from langchain_core.language_models import BaseChatModel
 from langchain_tests.integration_tests import ChatModelIntegrationTests
 
@@ -25,6 +24,7 @@ class TestAzureOpenAIStandard(ChatModelIntegrationTests):
             "model": "gpt-4o-mini",
             "openai_api_version": OPENAI_API_VERSION,
             "azure_endpoint": OPENAI_API_BASE,
+            "stream_usage": True,
         }
 
     @property
@@ -35,10 +35,6 @@ class TestAzureOpenAIStandard(ChatModelIntegrationTests):
     def supports_json_mode(self) -> bool:
         return True
 
-    @pytest.mark.xfail(reason="Not yet supported.")
-    def test_usage_metadata_streaming(self, model: BaseChatModel) -> None:
-        super().test_usage_metadata_streaming(model)
-
 
 class TestAzureOpenAIStandardLegacy(ChatModelIntegrationTests):
     """Test a legacy model."""
@@ -53,12 +49,9 @@ class TestAzureOpenAIStandardLegacy(ChatModelIntegrationTests):
             "deployment_name": os.environ["AZURE_OPENAI_LEGACY_CHAT_DEPLOYMENT_NAME"],
             "openai_api_version": OPENAI_API_VERSION,
             "azure_endpoint": OPENAI_API_BASE,
+            "stream_usage": True,
         }
 
     @property
     def structured_output_kwargs(self) -> dict:
         return {"method": "function_calling"}
-
-    @pytest.mark.xfail(reason="Not yet supported.")
-    def test_usage_metadata_streaming(self, model: BaseChatModel) -> None:
-        super().test_usage_metadata_streaming(model)

From a9b1e1b1777150978af8307f09a016b216cc869a Mon Sep 17 00:00:00 2001
From: ccurme <chester.curme@gmail.com>
Date: Wed, 26 Mar 2025 15:24:37 -0400
Subject: [PATCH 12/30] openai: release 0.3.11 (#30503)

---
 libs/partners/openai/pyproject.toml | 4 ++--
 libs/partners/openai/uv.lock        | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/libs/partners/openai/pyproject.toml b/libs/partners/openai/pyproject.toml
index 42049ff477b..120c7137cdd 100644
--- a/libs/partners/openai/pyproject.toml
+++ b/libs/partners/openai/pyproject.toml
@@ -7,12 +7,12 @@ authors = []
 license = { text = "MIT" }
 requires-python = "<4.0,>=3.9"
 dependencies = [
-    "langchain-core<1.0.0,>=0.3.48",
+    "langchain-core<1.0.0,>=0.3.49",
     "openai<2.0.0,>=1.68.2",
     "tiktoken<1,>=0.7",
 ]
 name = "langchain-openai"
-version = "0.3.10"
+version = "0.3.11"
 description = "An integration package connecting OpenAI and LangChain"
 readme = "README.md"
 
diff --git a/libs/partners/openai/uv.lock b/libs/partners/openai/uv.lock
index ccfa59aea4d..87cbce3a373 100644
--- a/libs/partners/openai/uv.lock
+++ b/libs/partners/openai/uv.lock
@@ -462,7 +462,7 @@ wheels = [
 
 [[package]]
 name = "langchain-core"
-version = "0.3.48"
+version = "0.3.49"
 source = { editable = "../../core" }
 dependencies = [
     { name = "jsonpatch" },
@@ -520,7 +520,7 @@ typing = [
 
 [[package]]
 name = "langchain-openai"
-version = "0.3.10"
+version = "0.3.11"
 source = { editable = "." }
 dependencies = [
     { name = "langchain-core" },
@@ -605,7 +605,7 @@ typing = [
 
 [[package]]
 name = "langchain-tests"
-version = "0.3.16"
+version = "0.3.17"
 source = { editable = "../../standard-tests" }
 dependencies = [
     { name = "httpx" },

From 3781144710e75c852aad9abd2731857e31997970 Mon Sep 17 00:00:00 2001
From: ccurme <chester.curme@gmail.com>
Date: Wed, 26 Mar 2025 16:13:45 -0400
Subject: [PATCH 13/30] docs: update doc on token usage tracking (#30505)

---
 .../how_to/chat_token_usage_tracking.ipynb    | 362 ++++++------------
 1 file changed, 118 insertions(+), 244 deletions(-)

diff --git a/docs/docs/how_to/chat_token_usage_tracking.ipynb b/docs/docs/how_to/chat_token_usage_tracking.ipynb
index 95742c9371b..4ee3bb7b30b 100644
--- a/docs/docs/how_to/chat_token_usage_tracking.ipynb
+++ b/docs/docs/how_to/chat_token_usage_tracking.ipynb
@@ -16,7 +16,7 @@
     "\n",
     "Tracking [token](/docs/concepts/tokens/) usage to calculate cost is an important part of putting your app in production. This guide goes over how to obtain this information from your LangChain model calls.\n",
     "\n",
-    "This guide requires `langchain-anthropic` and `langchain-openai >= 0.1.9`."
+    "This guide requires `langchain-anthropic` and `langchain-openai >= 0.3.11`."
    ]
   },
   {
@@ -38,19 +38,9 @@
     "\n",
     "OpenAI's Chat Completions API does not stream token usage statistics by default (see API reference\n",
     "[here](https://platform.openai.com/docs/api-reference/completions/create#completions-create-stream_options)).\n",
-    "To recover token counts when streaming with `ChatOpenAI`, set `stream_usage=True` as\n",
+    "To recover token counts when streaming with `ChatOpenAI` or `AzureChatOpenAI`, set `stream_usage=True` as\n",
     "demonstrated in this guide.\n",
     "\n",
-    "For `AzureChatOpenAI`, set `stream_options={\"include_usage\": True}` when calling\n",
-    "`.(a)stream`, or initialize with:\n",
-    "\n",
-    "```python\n",
-    "AzureChatOpenAI(\n",
-    "    ...,\n",
-    "    model_kwargs={\"stream_options\": {\"include_usage\": True}},\n",
-    ")\n",
-    "```\n",
-    "\n",
     ":::"
    ]
   },
@@ -67,7 +57,7 @@
     "\n",
     "A number of model providers return token usage information as part of the chat generation response. When available, this information will be included on the `AIMessage` objects produced by the corresponding model.\n",
     "\n",
-    "LangChain `AIMessage` objects include a [usage_metadata](https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.AIMessage.html#langchain_core.messages.ai.AIMessage.usage_metadata) attribute. When populated, this attribute will be a [UsageMetadata](https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.UsageMetadata.html) dictionary with standard keys (e.g., `\"input_tokens\"` and `\"output_tokens\"`).\n",
+    "LangChain `AIMessage` objects include a [usage_metadata](https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.AIMessage.html#langchain_core.messages.ai.AIMessage.usage_metadata) attribute. When populated, this attribute will be a [UsageMetadata](https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.UsageMetadata.html) dictionary with standard keys (e.g., `\"input_tokens\"` and `\"output_tokens\"`). They will also include information on cached token usage and tokens from multi-modal data.\n",
     "\n",
     "Examples:\n",
     "\n",
@@ -92,9 +82,9 @@
     }
    ],
    "source": [
-    "from langchain_openai import ChatOpenAI\n",
+    "from langchain.chat_models import init_chat_model\n",
     "\n",
-    "llm = ChatOpenAI(model=\"gpt-4o-mini\")\n",
+    "llm = init_chat_model(model=\"gpt-4o-mini\")\n",
     "openai_response = llm.invoke(\"hello\")\n",
     "openai_response.usage_metadata"
    ]
@@ -132,37 +122,6 @@
     "anthropic_response.usage_metadata"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "id": "6d4efc15-ba9f-4b3d-9278-8e01f99f263f",
-   "metadata": {},
-   "source": [
-    "### Using AIMessage.response_metadata\n",
-    "\n",
-    "Metadata from the model response is also included in the AIMessage [response_metadata](https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.AIMessage.html#langchain_core.messages.ai.AIMessage.response_metadata) attribute. These data are typically not standardized. Note that different providers adopt different conventions for representing token counts:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "f156f9da-21f2-4c81-a714-54cbf9ad393e",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "OpenAI: {'completion_tokens': 9, 'prompt_tokens': 8, 'total_tokens': 17}\n",
-      "\n",
-      "Anthropic: {'input_tokens': 8, 'output_tokens': 12}\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(f'OpenAI: {openai_response.response_metadata[\"token_usage\"]}\\n')\n",
-    "print(f'Anthropic: {anthropic_response.response_metadata[\"usage\"]}')"
-   ]
-  },
   {
    "cell_type": "markdown",
    "id": "b4ef2c43-0ff6-49eb-9782-e4070c9da8d7",
@@ -207,7 +166,7 @@
     }
    ],
    "source": [
-    "llm = ChatOpenAI(model=\"gpt-4o-mini\")\n",
+    "llm = init_chat_model(model=\"gpt-4o-mini\")\n",
     "\n",
     "aggregate = None\n",
     "for chunk in llm.stream(\"hello\", stream_usage=True):\n",
@@ -318,7 +277,7 @@
     "    punchline: str = Field(description=\"answer to resolve the joke\")\n",
     "\n",
     "\n",
-    "llm = ChatOpenAI(\n",
+    "llm = init_chat_model(\n",
     "    model=\"gpt-4o-mini\",\n",
     "    stream_usage=True,\n",
     ")\n",
@@ -326,10 +285,10 @@
     "# chat model and appends a parser.\n",
     "structured_llm = llm.with_structured_output(Joke)\n",
     "\n",
-    "async for event in structured_llm.astream_events(\"Tell me a joke\", version=\"v2\"):\n",
+    "async for event in structured_llm.astream_events(\"Tell me a joke\"):\n",
     "    if event[\"event\"] == \"on_chat_model_end\":\n",
     "        print(f'Token usage: {event[\"data\"][\"output\"].usage_metadata}\\n')\n",
-    "    elif event[\"event\"] == \"on_chain_end\":\n",
+    "    elif event[\"event\"] == \"on_chain_end\" and event[\"name\"] == \"RunnableSequence\":\n",
     "        print(event[\"data\"][\"output\"])\n",
     "    else:\n",
     "        pass"
@@ -350,17 +309,18 @@
    "source": [
     "## Using callbacks\n",
     "\n",
-    "There are also some API-specific callback context managers that allow you to track token usage across multiple calls. They are currently only implemented for the OpenAI API and Bedrock Anthropic API, and are available in `langchain-community`:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "64e52d21",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%pip install -qU langchain-community"
+    ":::info Requires ``langchain-core>=0.3.49``\n",
+    "\n",
+    ":::\n",
+    "\n",
+    "LangChain implements a callback handler and context manager that will track token usage across calls of any chat model that returns `usage_metadata`.\n",
+    "\n",
+    "There are also some API-specific callback context managers that maintain pricing for different models, allowing for cost estimation in real time. They are currently only implemented for the OpenAI API and Bedrock Anthropic API, and are available in `langchain-community`:\n",
+    "\n",
+    "- [get_openai_callback](https://python.langchain.com/api_reference/community/callbacks/langchain_community.callbacks.manager.get_openai_callback.html)\n",
+    "- [get_bedrock_anthropic_callback](https://python.langchain.com/api_reference/community/callbacks/langchain_community.callbacks.manager.get_bedrock_anthropic_callback.html)\n",
+    "\n",
+    "Below, we demonstrate the general-purpose usage metadata callback manager. We can track token usage through configuration or as a context manager."
    ]
   },
   {
@@ -368,41 +328,84 @@
    "id": "6f043cb9",
    "metadata": {},
    "source": [
-    "### OpenAI\n",
+    "### Tracking token usage through configuration\n",
     "\n",
-    "Let's first look at an extremely simple example of tracking token usage for a single Chat model call."
+    "To track token usage through configuration, instantiate a `UsageMetadataCallbackHandler` and pass it into the config:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 17,
    "id": "b04a4486-72fd-48ce-8f9e-5d281b441195",
    "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'gpt-4o-mini-2024-07-18': {'input_tokens': 8,\n",
+       "  'output_tokens': 10,\n",
+       "  'total_tokens': 18,\n",
+       "  'input_token_details': {'audio': 0, 'cache_read': 0},\n",
+       "  'output_token_details': {'audio': 0, 'reasoning': 0}},\n",
+       " 'claude-3-5-haiku-20241022': {'input_tokens': 8,\n",
+       "  'output_tokens': 21,\n",
+       "  'total_tokens': 29,\n",
+       "  'input_token_details': {'cache_read': 0, 'cache_creation': 0}}}"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain.chat_models import init_chat_model\n",
+    "from langchain_core.callbacks import UsageMetadataCallbackHandler\n",
+    "\n",
+    "llm_1 = init_chat_model(model=\"openai:gpt-4o-mini\")\n",
+    "llm_2 = init_chat_model(model=\"anthropic:claude-3-5-haiku-latest\")\n",
+    "\n",
+    "callback = UsageMetadataCallbackHandler()\n",
+    "result_1 = llm_1.invoke(\"Hello\", config={\"callbacks\": [callback]})\n",
+    "result_2 = llm_2.invoke(\"Hello\", config={\"callbacks\": [callback]})\n",
+    "callback.usage_metadata"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7a290085-e541-4233-afe4-637ec5032bfd",
+   "metadata": {},
+   "source": [
+    "### Tracking token usage using a context manager\n",
+    "\n",
+    "You can also use `get_usage_metadata_callback` to create a context manager and aggregate usage metadata there:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "4728f55a-24e1-48cd-a195-09d037821b1e",
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Tokens Used: 27\n",
-      "\tPrompt Tokens: 11\n",
-      "\tCompletion Tokens: 16\n",
-      "Successful Requests: 1\n",
-      "Total Cost (USD): $2.95e-05\n"
+      "{'gpt-4o-mini-2024-07-18': {'input_tokens': 8, 'output_tokens': 10, 'total_tokens': 18, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}, 'claude-3-5-haiku-20241022': {'input_tokens': 8, 'output_tokens': 21, 'total_tokens': 29, 'input_token_details': {'cache_read': 0, 'cache_creation': 0}}}\n"
      ]
     }
    ],
    "source": [
-    "from langchain_community.callbacks.manager import get_openai_callback\n",
+    "from langchain.chat_models import init_chat_model\n",
+    "from langchain_core.callbacks import get_usage_metadata_callback\n",
     "\n",
-    "llm = ChatOpenAI(\n",
-    "    model=\"gpt-4o-mini\",\n",
-    "    temperature=0,\n",
-    "    stream_usage=True,\n",
-    ")\n",
+    "llm_1 = init_chat_model(model=\"openai:gpt-4o-mini\")\n",
+    "llm_2 = init_chat_model(model=\"anthropic:claude-3-5-haiku-latest\")\n",
     "\n",
-    "with get_openai_callback() as cb:\n",
-    "    result = llm.invoke(\"Tell me a joke\")\n",
-    "    print(cb)"
+    "with get_usage_metadata_callback() as cb:\n",
+    "    llm_1.invoke(\"Hello\")\n",
+    "    llm_2.invoke(\"Hello\")\n",
+    "    print(cb.usage_metadata)"
    ]
   },
   {
@@ -410,61 +413,7 @@
    "id": "c0ab6d27",
    "metadata": {},
    "source": [
-    "Anything inside the context manager will get tracked. Here's an example of using it to track multiple calls in sequence."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "id": "05f22a1d-b021-490f-8840-f628a07459f2",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "54\n"
-     ]
-    }
-   ],
-   "source": [
-    "with get_openai_callback() as cb:\n",
-    "    result = llm.invoke(\"Tell me a joke\")\n",
-    "    result2 = llm.invoke(\"Tell me a joke\")\n",
-    "    print(cb.total_tokens)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "id": "c00c9158-7bb4-4279-88e6-ea70f46e6ac2",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Tokens Used: 27\n",
-      "\tPrompt Tokens: 11\n",
-      "\tCompletion Tokens: 16\n",
-      "Successful Requests: 1\n",
-      "Total Cost (USD): $2.95e-05\n"
-     ]
-    }
-   ],
-   "source": [
-    "with get_openai_callback() as cb:\n",
-    "    for chunk in llm.stream(\"Tell me a joke\"):\n",
-    "        pass\n",
-    "    print(cb)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "d8186e7b",
-   "metadata": {},
-   "source": [
-    "If a chain or agent with multiple steps in it is used, it will track all those steps."
+    "Either of these methods will aggregate token usage across multiple calls to each model. For example, you can use it in an [agent](https://python.langchain.com/docs/concepts/agents/) to track token usage across repeated calls to one model:"
    ]
   },
   {
@@ -474,138 +423,63 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%pip install -qU langchain langchain-aws wikipedia"
+    "%pip install -qU langgraph"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
-   "id": "5d1125c6",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain.agents import AgentExecutor, create_tool_calling_agent, load_tools\n",
-    "from langchain_core.prompts import ChatPromptTemplate\n",
-    "\n",
-    "prompt = ChatPromptTemplate.from_messages(\n",
-    "    [\n",
-    "        (\"system\", \"You're a helpful assistant\"),\n",
-    "        (\"human\", \"{input}\"),\n",
-    "        (\"placeholder\", \"{agent_scratchpad}\"),\n",
-    "    ]\n",
-    ")\n",
-    "tools = load_tools([\"wikipedia\"])\n",
-    "agent = create_tool_calling_agent(llm, tools, prompt)\n",
-    "agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "id": "3950d88b-8bfb-4294-b75b-e6fd421e633c",
+   "execution_count": 20,
+   "id": "fe945078-ee2d-43ba-8cdf-afb2f2f4ecef",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "================================\u001b[1m Human Message \u001b[0m=================================\n",
       "\n",
+      "What's the weather in Boston?\n",
+      "==================================\u001b[1m Ai Message \u001b[0m==================================\n",
+      "Tool Calls:\n",
+      "  get_weather (call_izMdhUYpp9Vhx7DTNAiybzGa)\n",
+      " Call ID: call_izMdhUYpp9Vhx7DTNAiybzGa\n",
+      "  Args:\n",
+      "    location: Boston\n",
+      "=================================\u001b[1m Tool Message \u001b[0m=================================\n",
+      "Name: get_weather\n",
       "\n",
-      "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
-      "\u001b[32;1m\u001b[1;3m\n",
-      "Invoking: `wikipedia` with `{'query': 'hummingbird scientific name'}`\n",
+      "It's sunny.\n",
+      "==================================\u001b[1m Ai Message \u001b[0m==================================\n",
       "\n",
+      "The weather in Boston is sunny.\n",
       "\n",
-      "\u001b[0m\u001b[36;1m\u001b[1;3mPage: Hummingbird\n",
-      "Summary: Hummingbirds are birds native to the Americas and comprise the biological family Trochilidae. With approximately 366 species and 113 genera, they occur from Alaska to Tierra del Fuego, but most species are found in Central and South America. As of 2024, 21 hummingbird species are listed as endangered or critically endangered, with numerous species declining in population.\n",
-      "Hummingbirds have varied specialized characteristics to enable rapid, maneuverable flight: exceptional metabolic capacity, adaptations to high altitude, sensitive visual and communication abilities, and long-distance migration in some species. Among all birds, male hummingbirds have the widest diversity of plumage color, particularly in blues, greens, and purples. Hummingbirds are the smallest mature birds, measuring 7.5–13 cm (3–5 in) in length. The smallest is the 5 cm (2.0 in) bee hummingbird, which weighs less than 2.0 g (0.07 oz), and the largest is the 23 cm (9 in) giant hummingbird, weighing 18–24 grams (0.63–0.85 oz). Noted for long beaks, hummingbirds are specialized for feeding on flower nectar, but all species also consume small insects.\n",
-      "They are known as hummingbirds because of the humming sound created by their beating wings, which flap at high frequencies audible to other birds and humans. They hover at rapid wing-flapping rates, which vary from around 12 beats per second in the largest species to 80 per second in small hummingbirds.\n",
-      "Hummingbirds have the highest mass-specific metabolic rate of any homeothermic animal. To conserve energy when food is scarce and at night when not foraging, they can enter torpor, a state similar to hibernation, and slow their metabolic rate to 1⁄15 of its normal rate. While most hummingbirds do not migrate, the rufous hummingbird has one of the longest migrations among birds, traveling twice per year between Alaska and Mexico, a distance of about 3,900 miles (6,300 km).\n",
-      "Hummingbirds split from their sister group, the swifts and treeswifts, around 42 million years ago. The oldest known fossil hummingbird is Eurotrochilus, from the Rupelian Stage of Early Oligocene Europe.\n",
-      "\n",
-      "Page: Rufous hummingbird\n",
-      "Summary: The rufous hummingbird (Selasphorus rufus) is a small hummingbird, about 8 cm (3.1 in) long with a long, straight and slender bill. These birds are known for their extraordinary flight skills, flying 2,000 mi (3,200 km) during their migratory transits. It is one of nine species in the genus Selasphorus.\n",
-      "\n",
-      "\n",
-      "\n",
-      "Page: Allen's hummingbird\n",
-      "Summary: Allen's hummingbird (Selasphorus sasin) is a species of hummingbird that breeds in the western United States. It is one of seven species in the genus Selasphorus.\u001b[0m\u001b[32;1m\u001b[1;3m\n",
-      "Invoking: `wikipedia` with `{'query': 'fastest bird species'}`\n",
-      "\n",
-      "\n",
-      "\u001b[0m\u001b[36;1m\u001b[1;3mPage: List of birds by flight speed\n",
-      "Summary: This is a list of the fastest flying birds in the world. A bird's velocity is necessarily variable; a hunting bird will reach much greater speeds while diving to catch prey than when flying horizontally. The bird that can achieve the greatest airspeed is the peregrine falcon (Falco peregrinus), able to exceed 320 km/h (200 mph) in its dives. A close relative of the common swift, the white-throated needletail (Hirundapus caudacutus), is commonly reported as the fastest bird in level flight with a reported top speed of 169 km/h (105 mph). This record remains unconfirmed as the measurement methods have never been published or verified. The record for the fastest confirmed level flight by a bird is 111.5 km/h (69.3 mph) held by the common swift.\n",
-      "\n",
-      "Page: Fastest animals\n",
-      "Summary: This is a list of the fastest animals in the world, by types of animal.\n",
-      "\n",
-      "Page: Falcon\n",
-      "Summary: Falcons () are birds of prey in the genus Falco, which includes about 40 species. Falcons are widely distributed on all continents of the world except Antarctica, though closely related raptors did occur there in the Eocene.\n",
-      "Adult falcons have thin, tapered wings, which enable them to fly at high speed and change direction rapidly. Fledgling falcons, in their first year of flying, have longer flight feathers, which make their configuration more like that of a general-purpose bird such as a broad wing. This makes flying easier while learning the exceptional skills required to be effective hunters as adults.\n",
-      "The falcons are the largest genus in the Falconinae subfamily of Falconidae, which itself also includes another subfamily comprising caracaras and a few other species. All these birds kill with their beaks, using a tomial \"tooth\" on the side of their beaks—unlike the hawks, eagles, and other birds of prey in the Accipitridae, which use their feet.\n",
-      "The largest falcon is the gyrfalcon at up to 65 cm in length.  The smallest falcon species is the pygmy falcon, which measures just 20 cm.  As with hawks and owls, falcons exhibit sexual dimorphism, with the females typically larger than the males, thus allowing a wider range of prey species.\n",
-      "Some small falcons with long, narrow wings are called \"hobbies\" and some which hover while hunting are called \"kestrels\".\n",
-      "As is the case with many birds of prey, falcons have exceptional powers of vision; the visual acuity of one species has been measured at 2.6 times that of a normal human. Peregrine falcons have been recorded diving at speeds of 320 km/h (200 mph), making them the fastest-moving creatures on Earth; the fastest recorded dive attained a vertical speed of 390 km/h (240 mph).\u001b[0m\u001b[32;1m\u001b[1;3mThe scientific name for a hummingbird is Trochilidae. The fastest bird species in level flight is the common swift, which holds the record for the fastest confirmed level flight by a bird at 111.5 km/h (69.3 mph). The peregrine falcon is known to exceed speeds of 320 km/h (200 mph) in its dives, making it the fastest bird in terms of diving speed.\u001b[0m\n",
-      "\n",
-      "\u001b[1m> Finished chain.\u001b[0m\n",
-      "Total Tokens: 1675\n",
-      "Prompt Tokens: 1538\n",
-      "Completion Tokens: 137\n",
-      "Total Cost (USD): $0.0009745000000000001\n"
+      "Total usage: {'gpt-4o-mini-2024-07-18': {'input_token_details': {'audio': 0, 'cache_read': 0}, 'input_tokens': 125, 'total_tokens': 149, 'output_tokens': 24, 'output_token_details': {'audio': 0, 'reasoning': 0}}}\n"
      ]
     }
    ],
    "source": [
-    "with get_openai_callback() as cb:\n",
-    "    response = agent_executor.invoke(\n",
-    "        {\n",
-    "            \"input\": \"What's a hummingbird's scientific name and what's the fastest bird species?\"\n",
-    "        }\n",
-    "    )\n",
-    "    print(f\"Total Tokens: {cb.total_tokens}\")\n",
-    "    print(f\"Prompt Tokens: {cb.prompt_tokens}\")\n",
-    "    print(f\"Completion Tokens: {cb.completion_tokens}\")\n",
-    "    print(f\"Total Cost (USD): ${cb.total_cost}\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "ebc9122b-050b-4006-b763-264b0b26d9df",
-   "metadata": {},
-   "source": [
-    "### Bedrock Anthropic\n",
+    "from langgraph.prebuilt import create_react_agent\n",
     "\n",
-    "The `get_bedrock_anthropic_callback` works very similarly:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "id": "1837c807-136a-49d8-9c33-060e58dc16d2",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Tokens Used: 96\n",
-      "\tPrompt Tokens: 26\n",
-      "\tCompletion Tokens: 70\n",
-      "Successful Requests: 2\n",
-      "Total Cost (USD): $0.001888\n"
-     ]
-    }
-   ],
-   "source": [
-    "from langchain_aws import ChatBedrock\n",
-    "from langchain_community.callbacks.manager import get_bedrock_anthropic_callback\n",
     "\n",
-    "llm = ChatBedrock(model_id=\"anthropic.claude-v2\")\n",
+    "# Create a tool\n",
+    "def get_weather(location: str) -> str:\n",
+    "    \"\"\"Get the weather at a location.\"\"\"\n",
+    "    return \"It's sunny.\"\n",
     "\n",
-    "with get_bedrock_anthropic_callback() as cb:\n",
-    "    result = llm.invoke(\"Tell me a joke\")\n",
-    "    result2 = llm.invoke(\"Tell me a joke\")\n",
-    "    print(cb)"
+    "\n",
+    "callback = UsageMetadataCallbackHandler()\n",
+    "\n",
+    "tools = [get_weather]\n",
+    "agent = create_react_agent(\"openai:gpt-4o-mini\", tools)\n",
+    "for step in agent.stream(\n",
+    "    {\"messages\": [{\"role\": \"user\", \"content\": \"What's the weather in Boston?\"}]},\n",
+    "    stream_mode=\"values\",\n",
+    "    config={\"callbacks\": [callback]},\n",
+    "):\n",
+    "    step[\"messages\"][-1].pretty_print()\n",
+    "\n",
+    "\n",
+    "print(f\"\\nTotal usage: {callback.usage_metadata}\")"
    ]
   },
   {

From d7d0bca2bce12fecab24ed7935aeaa71f06b0121 Mon Sep 17 00:00:00 2001
From: Adeel Ehsan <aadeel.ehsan@gmail.com>
Date: Thu, 27 Mar 2025 01:47:53 +0500
Subject: [PATCH 14/30] docs: add vectara to libs package yml (#30504)

---
 libs/packages.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/libs/packages.yml b/libs/packages.yml
index af7aee18995..d53e7a24ccc 100644
--- a/libs/packages.yml
+++ b/libs/packages.yml
@@ -554,3 +554,6 @@ packages:
 - name: langchain-memgraph
   path: .
   repo: memgraph/langchain-memgraph
+- name: langchain-vectara
+  path: libs/vectara
+  repo: vectara/langchain-vectara

From 7664874a0dd03c50f76505055f4738c4edcdb685 Mon Sep 17 00:00:00 2001
From: Eugene Yurtsev <eyurtsev@gmail.com>
Date: Wed, 26 Mar 2025 22:21:59 -0400
Subject: [PATCH 15/30] docs: llms-txt (#30506)

First just verifying it's included in the manifest
---
 docs/static/llms.txt | 436 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 436 insertions(+)
 create mode 100644 docs/static/llms.txt

diff --git a/docs/static/llms.txt b/docs/static/llms.txt
new file mode 100644
index 00000000000..37c9dd44915
--- /dev/null
+++ b/docs/static/llms.txt
@@ -0,0 +1,436 @@
+# LangChain
+
+## High level
+
+- **[Why LangChain?](https://python.langchain.com/docs/concepts/why_langchain)**: Overview of the value that LangChain provides.
+- **[Architecture](https://python.langchain.com/docs/concepts/architecture)**: How packages are organized in the LangChain ecosystem.
+
+## Concepts
+
+- **[Chat models](https://python.langchain.com/docs/concepts/chat_models)**: LLMs exposed via a chat API that process sequences of messages as input and output a message.
+- **[Messages](https://python.langchain.com/docs/concepts/messages)**: The unit of communication in chat models, used to represent model input and output.
+- **[Chat history](https://python.langchain.com/docs/concepts/chat_history)**: A conversation represented as a sequence of messages, alternating between user messages and model responses.
+- **[Tools](https://python.langchain.com/docs/concepts/tools)**: A function with an associated schema defining the function's name, description, and the arguments it accepts.
+- **[Tool calling](https://python.langchain.com/docs/concepts/tool_calling)**: A type of chat model API that accepts tool schemas, along with messages, as input and returns invocations of those tools as part of the output message.
+- **[Structured output](https://python.langchain.com/docs/concepts/structured_outputs)**: A technique to make a chat model respond in a structured format, such as JSON that matches a given schema.
+- **[Memory](https://langchain-ai.github.io/langgraph/concepts/memory/)**: Information about a conversation that is persisted so that it can be used in future conversations.
+- **[Multimodality](https://python.langchain.com/docs/concepts/multimodality)**: The ability to work with data that comes in different forms, such as text, audio, images, and video.
+- **[Runnable interface](https://python.langchain.com/docs/concepts/runnables)**: The base abstraction that many LangChain components and the LangChain Expression Language are built on.
+- **[Streaming](https://python.langchain.com/docs/concepts/streaming)**: LangChain streaming APIs for surfacing results as they are generated.
+- **[LangChain Expression Language (LCEL)](https://python.langchain.com/docs/concepts/lcel)**: A syntax for orchestrating LangChain components. Most useful for simpler applications.
+- **[Document loaders](https://python.langchain.com/docs/concepts/document_loaders)**: Load a source as a list of documents.
+- **[Retrieval](https://python.langchain.com/docs/concepts/retrieval)**: Information retrieval systems can retrieve structured or unstructured data from a datasource in response to a query.
+- **[Text splitters](https://python.langchain.com/docs/concepts/text_splitters)**: Split long text into smaller chunks that can be individually indexed to enable granular retrieval.
+- **[Embedding models](https://python.langchain.com/docs/concepts/embedding_models)**: Models that represent data such as text or images in a vector space.
+- **[Vector stores](https://python.langchain.com/docs/concepts/vectorstores)**: Storage of and efficient search over vectors and associated metadata.
+- **[Retriever](https://python.langchain.com/docs/concepts/retrievers)**: A component that returns relevant documents from a knowledge base in response to a query.
+- **[Retrieval Augmented Generation (RAG)](https://python.langchain.com/docs/concepts/rag)**: A technique that enhances language models by combining them with external knowledge bases.
+- **[Agents](https://python.langchain.com/docs/concepts/agents)**: Use a [language model](https://python.langchain.com/docs/concepts/chat_models) to choose a sequence of actions to take. Agents can interact with external resources via [tool](https://python.langchain.com/docs/concepts/tools).
+- **[Prompt templates](https://python.langchain.com/docs/concepts/prompt_templates)**: Component for factoring out the static parts of a model "prompt" (usually a sequence of messages). Useful for serializing, versioning, and reusing these static parts.
+- **[Output parsers](https://python.langchain.com/docs/concepts/output_parsers)**: Responsible for taking the output of a model and transforming it into a more suitable format for downstream tasks. Output parsers were primarily useful prior to the general availability of [tool calling](https://python.langchain.com/docs/concepts/tool_calling) and [structured outputs](https://python.langchain.com/docs/concepts/structured_outputs).
+- **[Few-shot prompting](https://python.langchain.com/docs/concepts/few_shot_prompting)**: A technique for improving model performance by providing a few examples of the task to perform in the prompt.
+- **[Example selectors](https://python.langchain.com/docs/concepts/example_selectors)**: Used to select the most relevant examples from a dataset based on a given input. Example selectors are used in few-shot prompting to select examples for a prompt.
+- **[Async programming](https://python.langchain.com/docs/concepts/async)**: The basics that one should know to use LangChain in an asynchronous context.
+- **[Callbacks](https://python.langchain.com/docs/concepts/callbacks)**: Callbacks enable the execution of custom auxiliary code in built-in components. Callbacks are used to stream outputs from LLMs in LangChain, trace the intermediate steps of an application, and more.
+- **[Tracing](https://python.langchain.com/docs/concepts/tracing)**: The process of recording the steps that an application takes to go from input to output. Tracing is essential for debugging and diagnosing issues in complex applications.
+- **[Evaluation](https://python.langchain.com/docs/concepts/evaluation)**: The process of assessing the performance and effectiveness of AI applications. This involves testing the model's responses against a set of predefined criteria or benchmarks to ensure it meets the desired quality standards and fulfills the intended purpose. This process is vital for building reliable applications.
+- **[Testing](https://python.langchain.com/docs/concepts/testing)**: The process of verifying that a component of an integration or application works as expected. Testing is essential for ensuring that the application behaves correctly and that changes to the codebase do not introduce new bugs.
+
+## How-to guides
+
+### Installation
+
+- [How to: install LangChain packages](https://python.langchain.com/docs/how_to/installation/)
+- [How to: use LangChain with different Pydantic versions](https://python.langchain.com/docs/how_to/pydantic_compatibility)
+- [How to: return structured data from a model](https://python.langchain.com/docs/how_to/structured_output/)
+- [How to: use a model to call tools](https://python.langchain.com/docs/how_to/tool_calling)
+- [How to: stream runnables](https://python.langchain.com/docs/how_to/streaming)
+- [How to: debug your LLM apps](https://python.langchain.com/docs/how_to/debugging/)
+
+### Components
+
+These are the core building blocks you can use when building applications.
+
+#### Chat models
+
+[Chat Models](https://python.langchain.com/docs/concepts/chat_models) are newer forms of language models that take messages in and output a message.
+See [supported integrations](https://python.langchain.com/docs/integrations/chat/) for details on getting started with chat models from a specific provider.
+
+- [How to: do function/tool calling](https://python.langchain.com/docs/how_to/tool_calling)
+- [How to: get models to return structured output](https://python.langchain.com/docs/how_to/structured_output)
+- [How to: cache model responses](https://python.langchain.com/docs/how_to/chat_model_caching)
+- [How to: get log probabilities](https://python.langchain.com/docs/how_to/logprobs)
+- [How to: create a custom chat model class](https://python.langchain.com/docs/how_to/custom_chat_model)
+- [How to: stream a response back](https://python.langchain.com/docs/how_to/chat_streaming)
+- [How to: track token usage](https://python.langchain.com/docs/how_to/chat_token_usage_tracking)
+- [How to: track response metadata across providers](https://python.langchain.com/docs/how_to/response_metadata)
+- [How to: use chat model to call tools](https://python.langchain.com/docs/how_to/tool_calling)
+- [How to: stream tool calls](https://python.langchain.com/docs/how_to/tool_streaming)
+- [How to: handle rate limits](https://python.langchain.com/docs/how_to/chat_model_rate_limiting)
+- [How to: few shot prompt tool behavior](https://python.langchain.com/docs/how_to/tools_few_shot)
+- [How to: bind model-specific formatted tools](https://python.langchain.com/docs/how_to/tools_model_specific)
+- [How to: force a specific tool call](https://python.langchain.com/docs/how_to/tool_choice)
+- [How to: work with local models](https://python.langchain.com/docs/how_to/local_llms)
+- [How to: init any model in one line](https://python.langchain.com/docs/how_to/chat_models_universal_init/)
+
+#### Messages
+
+[Messages](https://python.langchain.com/docs/concepts/messages) are the input and output of chat models. They have some `content` and a `role`, which describes the source of the message.
+
+- [How to: trim messages](https://python.langchain.com/docs/how_to/trim_messages/)
+- [How to: filter messages](https://python.langchain.com/docs/how_to/filter_messages/)
+- [How to: merge consecutive messages of the same type](https://python.langchain.com/docs/how_to/merge_message_runs/)
+
+#### Prompt templates
+
+[Prompt Templates](https://python.langchain.com/docs/concepts/prompt_templates) are responsible for formatting user input into a format that can be passed to a language model.
+
+- [How to: use few shot examples](https://python.langchain.com/docs/how_to/few_shot_examples)
+- [How to: use few shot examples in chat models](https://python.langchain.com/docs/how_to/few_shot_examples_chat/)
+- [How to: partially format prompt templates](https://python.langchain.com/docs/how_to/prompts_partial)
+- [How to: compose prompts together](https://python.langchain.com/docs/how_to/prompts_composition)
+
+#### Example selectors
+
+[Example Selectors](https://python.langchain.com/docs/concepts/example_selectors) are responsible for selecting the correct few shot examples to pass to the prompt.
+
+- [How to: use example selectors](https://python.langchain.com/docs/how_to/example_selectors)
+- [How to: select examples by length](https://python.langchain.com/docs/how_to/example_selectors_length_based)
+- [How to: select examples by semantic similarity](https://python.langchain.com/docs/how_to/example_selectors_similarity)
+- [How to: select examples by semantic ngram overlap](https://python.langchain.com/docs/how_to/example_selectors_ngram)
+- [How to: select examples by maximal marginal relevance](https://python.langchain.com/docs/how_to/example_selectors_mmr)
+- [How to: select examples from LangSmith few-shot datasets](https://python.langchain.com/docs/how_to/example_selectors_langsmith/)
+
+#### LLMs
+
+What LangChain calls [LLMs](https://python.langchain.com/docs/concepts/text_llms) are older forms of language models that take a string in and output a string.
+
+- [How to: cache model responses](https://python.langchain.com/docs/how_to/llm_caching)
+- [How to: create a custom LLM class](https://python.langchain.com/docs/how_to/custom_llm)
+- [How to: stream a response back](https://python.langchain.com/docs/how_to/streaming_llm)
+- [How to: track token usage](https://python.langchain.com/docs/how_to/llm_token_usage_tracking)
+- [How to: work with local models](https://python.langchain.com/docs/how_to/local_llms)
+
+#### Output parsers
+
+[Output Parsers](https://python.langchain.com/docs/concepts/output_parsers) are responsible for taking the output of an LLM and parsing into more structured format.
+
+- [How to: parse text from message objects](https://python.langchain.com/docs/how_to/output_parser_string)
+- [How to: use output parsers to parse an LLM response into structured format](https://python.langchain.com/docs/how_to/output_parser_structured)
+- [How to: parse JSON output](https://python.langchain.com/docs/how_to/output_parser_json)
+- [How to: parse XML output](https://python.langchain.com/docs/how_to/output_parser_xml)
+- [How to: parse YAML output](https://python.langchain.com/docs/how_to/output_parser_yaml)
+- [How to: retry when output parsing errors occur](https://python.langchain.com/docs/how_to/output_parser_retry)
+- [How to: try to fix errors in output parsing](https://python.langchain.com/docs/how_to/output_parser_fixing)
+- [How to: write a custom output parser class](https://python.langchain.com/docs/how_to/output_parser_custom)
+
+#### Document loaders
+
+[Document Loaders](https://python.langchain.com/docs/concepts/document_loaders) are responsible for loading documents from a variety of sources.
+
+- [How to: load PDF files](https://python.langchain.com/docs/how_to/document_loader_pdf)
+- [How to: load web pages](https://python.langchain.com/docs/how_to/document_loader_web)
+- [How to: load CSV data](https://python.langchain.com/docs/how_to/document_loader_csv)
+- [How to: load data from a directory](https://python.langchain.com/docs/how_to/document_loader_directory)
+- [How to: load HTML data](https://python.langchain.com/docs/how_to/document_loader_html)
+- [How to: load JSON data](https://python.langchain.com/docs/how_to/document_loader_json)
+- [How to: load Markdown data](https://python.langchain.com/docs/how_to/document_loader_markdown)
+- [How to: load Microsoft Office data](https://python.langchain.com/docs/how_to/document_loader_office_file)
+- [How to: write a custom document loader](https://python.langchain.com/docs/how_to/document_loader_custom)
+
+#### Text splitters
+
+[Text Splitters](https://python.langchain.com/docs/concepts/text_splitters) take a document and split into chunks that can be used for retrieval.
+
+- [How to: recursively split text](https://python.langchain.com/docs/how_to/recursive_text_splitter)
+- [How to: split HTML](https://python.langchain.com/docs/how_to/split_html)
+- [How to: split by character](https://python.langchain.com/docs/how_to/character_text_splitter)
+- [How to: split code](https://python.langchain.com/docs/how_to/code_splitter)
+- [How to: split Markdown by headers](https://python.langchain.com/docs/how_to/markdown_header_metadata_splitter)
+- [How to: recursively split JSON](https://python.langchain.com/docs/how_to/recursive_json_splitter)
+- [How to: split text into semantic chunks](https://python.langchain.com/docs/how_to/semantic-chunker)
+- [How to: split by tokens](https://python.langchain.com/docs/how_to/split_by_token)
+
+#### Embedding models
+
+[Embedding Models](https://python.langchain.com/docs/concepts/embedding_models) take a piece of text and create a numerical representation of it.
+See [supported integrations](https://python.langchain.com/docs/integrations/text_embedding/) for details on getting started with embedding models from a specific provider.
+
+- [How to: embed text data](https://python.langchain.com/docs/how_to/embed_text)
+- [How to: cache embedding results](https://python.langchain.com/docs/how_to/caching_embeddings)
+- [How to: create a custom embeddings class](https://python.langchain.com/docs/how_to/custom_embeddings)
+
+#### Vector stores
+
+[Vector stores](https://python.langchain.com/docs/concepts/vectorstores) are databases that can efficiently store and retrieve embeddings.
+See [supported integrations](https://python.langchain.com/docs/integrations/vectorstores/) for details on getting started with vector stores from a specific provider.
+
+- [How to: use a vector store to retrieve data](https://python.langchain.com/docs/how_to/vectorstores)
+
+#### Retrievers
+
+[Retrievers](https://python.langchain.com/docs/concepts/retrievers) are responsible for taking a query and returning relevant documents.
+
+- [How to: use a vector store to retrieve data](https://python.langchain.com/docs/how_to/vectorstore_retriever)
+- [How to: generate multiple queries to retrieve data for](https://python.langchain.com/docs/how_to/MultiQueryRetriever)
+- [How to: use contextual compression to compress the data retrieved](https://python.langchain.com/docs/how_to/contextual_compression)
+- [How to: write a custom retriever class](https://python.langchain.com/docs/how_to/custom_retriever)
+- [How to: add similarity scores to retriever results](https://python.langchain.com/docs/how_to/add_scores_retriever)
+- [How to: combine the results from multiple retrievers](https://python.langchain.com/docs/how_to/ensemble_retriever)
+- [How to: reorder retrieved results to mitigate the "lost in the middle" effect](https://python.langchain.com/docs/how_to/long_context_reorder)
+- [How to: generate multiple embeddings per document](https://python.langchain.com/docs/how_to/multi_vector)
+- [How to: retrieve the whole document for a chunk](https://python.langchain.com/docs/how_to/parent_document_retriever)
+- [How to: generate metadata filters](https://python.langchain.com/docs/how_to/self_query)
+- [How to: create a time-weighted retriever](https://python.langchain.com/docs/how_to/time_weighted_vectorstore)
+- [How to: use hybrid vector and keyword retrieval](https://python.langchain.com/docs/how_to/hybrid)
+
+#### Indexing
+
+Indexing is the process of keeping your vectorstore in-sync with the underlying data source.
+
+- [How to: reindex data to keep your vectorstore in-sync with the underlying data source](https://python.langchain.com/docs/how_to/indexing)
+
+#### Tools
+
+LangChain [Tools](https://python.langchain.com/docs/concepts/tools) contain a description of the tool (to pass to the language model) as well as the implementation of the function to call. Refer [here](https://python.langchain.com/docs/integrations/tools/) for a list of pre-buit tools.
+
+- [How to: create tools](https://python.langchain.com/docs/how_to/custom_tools)
+- [How to: use built-in tools and toolkits](https://python.langchain.com/docs/how_to/tools_builtin)
+- [How to: use chat models to call tools](https://python.langchain.com/docs/how_to/tool_calling)
+- [How to: pass tool outputs to chat models](https://python.langchain.com/docs/how_to/tool_results_pass_to_model)
+- [How to: pass run time values to tools](https://python.langchain.com/docs/how_to/tool_runtime)
+- [How to: add a human-in-the-loop for tools](https://python.langchain.com/docs/how_to/tools_human)
+- [How to: handle tool errors](https://python.langchain.com/docs/how_to/tools_error)
+- [How to: force models to call a tool](https://python.langchain.com/docs/how_to/tool_choice)
+- [How to: disable parallel tool calling](https://python.langchain.com/docs/how_to/tool_calling_parallel)
+- [How to: access the `RunnableConfig` from a tool](https://python.langchain.com/docs/how_to/tool_configure)
+- [How to: stream events from a tool](https://python.langchain.com/docs/how_to/tool_stream_events)
+- [How to: return artifacts from a tool](https://python.langchain.com/docs/how_to/tool_artifacts/)
+- [How to: convert Runnables to tools](https://python.langchain.com/docs/how_to/convert_runnable_to_tool)
+- [How to: add ad-hoc tool calling capability to models](https://python.langchain.com/docs/how_to/tools_prompting)
+- [How to: pass in runtime secrets](https://python.langchain.com/docs/how_to/runnable_runtime_secrets)
+
+#### Multimodal
+
+- [How to: pass multimodal data directly to models](https://python.langchain.com/docs/how_to/multimodal_inputs/)
+- [How to: use multimodal prompts](https://python.langchain.com/docs/how_to/multimodal_prompts/)
+
+#### Agents
+
+:::note
+
+For in depth how-to guides for agents, please check out [LangGraph](https://langchain-ai.github.io/langgraph/) documentation.
+
+:::
+
+- [How to: use legacy LangChain Agents (AgentExecutor)](https://python.langchain.com/docs/how_to/agent_executor)
+- [How to: migrate from legacy LangChain agents to LangGraph](https://python.langchain.com/docs/how_to/migrate_agent)
+
+#### Callbacks
+
+[Callbacks](https://python.langchain.com/docs/concepts/callbacks) allow you to hook into the various stages of your LLM application's execution.
+
+- [How to: pass in callbacks at runtime](https://python.langchain.com/docs/how_to/callbacks_runtime)
+- [How to: attach callbacks to a module](https://python.langchain.com/docs/how_to/callbacks_attach)
+- [How to: pass callbacks into a module constructor](https://python.langchain.com/docs/how_to/callbacks_constructor)
+- [How to: create custom callback handlers](https://python.langchain.com/docs/how_to/custom_callbacks)
+- [How to: use callbacks in async environments](https://python.langchain.com/docs/how_to/callbacks_async)
+- [How to: dispatch custom callback events](https://python.langchain.com/docs/how_to/callbacks_custom_events)
+
+#### Custom
+
+All of LangChain components can easily be extended to support your own versions.
+
+- [How to: create a custom chat model class](https://python.langchain.com/docs/how_to/custom_chat_model)
+- [How to: create a custom LLM class](https://python.langchain.com/docs/how_to/custom_llm)
+- [How to: create a custom embeddings class](https://python.langchain.com/docs/how_to/custom_embeddings)
+- [How to: write a custom retriever class](https://python.langchain.com/docs/how_to/custom_retriever)
+- [How to: write a custom document loader](https://python.langchain.com/docs/how_to/document_loader_custom)
+- [How to: write a custom output parser class](https://python.langchain.com/docs/how_to/output_parser_custom)
+- [How to: create custom callback handlers](https://python.langchain.com/docs/how_to/custom_callbacks)
+- [How to: define a custom tool](https://python.langchain.com/docs/how_to/custom_tools)
+- [How to: dispatch custom callback events](https://python.langchain.com/docs/how_to/callbacks_custom_events)
+
+#### Serialization
+
+- [How to: save and load LangChain objects](https://python.langchain.com/docs/how_to/serialization)
+
+## Use cases
+
+These guides cover use-case specific details.
+
+### Q&A with RAG
+
+Retrieval Augmented Generation (RAG) is a way to connect LLMs to external sources of data.
+For a high-level tutorial on RAG, check out [this guide](https://python.langchain.com/docs/tutorials/rag/).
+
+- [How to: add chat history](https://python.langchain.com/docs/how_to/qa_chat_history_how_to/)
+- [How to: stream](https://python.langchain.com/docs/how_to/qa_streaming/)
+- [How to: return sources](https://python.langchain.com/docs/how_to/qa_sources/)
+- [How to: return citations](https://python.langchain.com/docs/how_to/qa_citations/)
+- [How to: do per-user retrieval](https://python.langchain.com/docs/how_to/qa_per_user/)
+
+
+### Extraction
+
+Extraction is when you use LLMs to extract structured information from unstructured text.
+For a high level tutorial on extraction, check out [this guide](https://python.langchain.com/docs/tutorials/extraction/).
+
+- [How to: use reference examples](https://python.langchain.com/docs/how_to/extraction_examples/)
+- [How to: handle long text](https://python.langchain.com/docs/how_to/extraction_long_text/)
+- [How to: do extraction without using function calling](https://python.langchain.com/docs/how_to/extraction_parse)
+
+### Chatbots
+
+Chatbots involve using an LLM to have a conversation.
+For a high-level tutorial on building chatbots, check out [this guide](https://python.langchain.com/docs/tutorials/chatbot/).
+
+- [How to: manage memory](https://python.langchain.com/docs/how_to/chatbots_memory)
+- [How to: do retrieval](https://python.langchain.com/docs/how_to/chatbots_retrieval)
+- [How to: use tools](https://python.langchain.com/docs/how_to/chatbots_tools)
+- [How to: manage large chat history](https://python.langchain.com/docs/how_to/trim_messages/)
+
+### Query analysis
+
+Query Analysis is the task of using an LLM to generate a query to send to a retriever.
+For a high-level tutorial on query analysis, check out [this guide](https://python.langchain.com/docs/tutorials/rag/#query-analysis).
+
+- [How to: add examples to the prompt](https://python.langchain.com/docs/how_to/query_few_shot)
+- [How to: handle cases where no queries are generated](https://python.langchain.com/docs/how_to/query_no_queries)
+- [How to: handle multiple queries](https://python.langchain.com/docs/how_to/query_multiple_queries)
+- [How to: handle multiple retrievers](https://python.langchain.com/docs/how_to/query_multiple_retrievers)
+- [How to: construct filters](https://python.langchain.com/docs/how_to/query_constructing_filters)
+- [How to: deal with high cardinality categorical variables](https://python.langchain.com/docs/how_to/query_high_cardinality)
+
+### Q&A over SQL + CSV
+
+You can use LLMs to do question answering over tabular data.
+For a high-level tutorial, check out [this guide](https://python.langchain.com/docs/tutorials/sql_qa/).
+
+- [How to: use prompting to improve results](https://python.langchain.com/docs/how_to/sql_prompting)
+- [How to: do query validation](https://python.langchain.com/docs/how_to/sql_query_checking)
+- [How to: deal with large databases](https://python.langchain.com/docs/how_to/sql_large_db)
+- [How to: deal with CSV files](https://python.langchain.com/docs/how_to/sql_csv)
+
+### Q&A over graph databases
+
+You can use an LLM to do question answering over graph databases.
+For a high-level tutorial, check out [this guide](https://python.langchain.com/docs/tutorials/graph/).
+
+- [How to: add a semantic layer over the database](https://python.langchain.com/docs/how_to/graph_semantic)
+- [How to: construct knowledge graphs](https://python.langchain.com/docs/how_to/graph_constructing)
+
+### Summarization
+
+LLMs can summarize and otherwise distill desired information from text, including
+large volumes of text. For a high-level tutorial, check out [this guide](https://python.langchain.com/docs/tutorials/summarization).
+
+- [How to: summarize text in a single LLM call](https://python.langchain.com/docs/how_to/summarize_stuff)
+- [How to: summarize text through parallelization](https://python.langchain.com/docs/how_to/summarize_map_reduce)
+- [How to: summarize text through iterative refinement](https://python.langchain.com/docs/how_to/summarize_refine)
+
+## LangChain Expression Language (LCEL)
+
+[LangChain Expression Language](https://python.langchain.com/docs/concepts/lcel) is a way to create arbitrary custom chains. It is built on the [Runnable](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html) protocol.
+
+[**LCEL cheatsheet**](https://python.langchain.com/docs/how_to/lcel_cheatsheet/): For a quick overview of how to use the main LCEL primitives.
+
+[**Migration guide**](https://python.langchain.com/docs/versions/migrating_chains): For migrating legacy chain abstractions to LCEL.
+
+- [How to: chain runnables](https://python.langchain.com/docs/how_to/sequence)
+- [How to: stream runnables](https://python.langchain.com/docs/how_to/streaming)
+- [How to: invoke runnables in parallel](https://python.langchain.com/docs/how_to/parallel/)
+- [How to: add default invocation args to runnables](https://python.langchain.com/docs/how_to/binding/)
+- [How to: turn any function into a runnable](https://python.langchain.com/docs/how_to/functions)
+- [How to: pass through inputs from one chain step to the next](https://python.langchain.com/docs/how_to/passthrough)
+- [How to: configure runnable behavior at runtime](https://python.langchain.com/docs/how_to/configure)
+- [How to: add message history (memory) to a chain](https://python.langchain.com/docs/how_to/message_history)
+- [How to: route between sub-chains](https://python.langchain.com/docs/how_to/routing)
+- [How to: create a dynamic (self-constructing) chain](https://python.langchain.com/docs/how_to/dynamic_chain/)
+- [How to: inspect runnables](https://python.langchain.com/docs/how_to/inspect)
+- [How to: add fallbacks to a runnable](https://python.langchain.com/docs/how_to/fallbacks)
+- [How to: pass runtime secrets to a runnable](https://python.langchain.com/docs/how_to/runnable_runtime_secrets)
+
+Tracing gives you observability inside your chains and agents, and is vital in diagnosing issues.
+
+- [How to: trace with LangChain](https://docs.smith.langchain.com/how_to_guides/tracing/trace_with_langchain)
+- [How to: add metadata and tags to traces](https://docs.smith.langchain.com/how_to_guides/tracing/trace_with_langchain#add-metadata-and-tags-to-traces)
+
+You can see general tracing-related how-tos [in this section of the LangSmith docs](https://docs.smith.langchain.com/how_to_guides/tracing).
+
+## Integrations
+
+### Featured Chat Model Providers
+
+- [ChatAnthropic](https://python.langchain.com/docs/anthropic/)
+- [ChatMistralAI](https://python.langchain.com/docs/mistralai/)
+- [ChatFireworks](https://python.langchain.com/docs/fireworks/)
+- [AzureChatOpenAI](https://python.langchain.com/docs/azure_chat_openai/)
+- [ChatOpenAI](https://python.langchain.com/docs/openai/)
+- [ChatTogether](https://python.langchain.com/docs/together/)
+- [ChatVertexAI](https://python.langchain.com/docs/google_vertex_ai_palm/)
+- [ChatGoogleGenerativeAI](https://python.langchain.com/docs/google_generative_ai/)
+- [ChatGroq](https://python.langchain.com/docs/groq/)
+- [ChatCohere](https://python.langchain.com/docs/cohere/)
+- [ChatBedrock](https://python.langchain.com/docs/bedrock/)
+- [ChatHuggingFace](https://python.langchain.com/docs/huggingface/)
+- [ChatNVIDIA](https://python.langchain.com/docs/nvidia_ai_endpoints/)
+- [ChatOllama](https://python.langchain.com/docs/ollama/)
+- [ChatLlamaCpp](https://python.langchain.com/docs/llamacpp)
+- [ChatAI21](https://python.langchain.com/docs/ai21)
+- [ChatUpstage](https://python.langchain.com/docs/upstage)
+- [ChatDatabricks](https://python.langchain.com/docs/databricks)
+- [ChatWatsonx](https://python.langchain.com/docs/ibm_watsonx)
+- [ChatXAI](https://python.langchain.com/docs/xai)
+
+Other chat model integrations can be found [here](https://python.langchain.com/docs/integrations/chat/).
+
+## Glossary
+
+- **[AIMessageChunk](https://python.langchain.com/docs/concepts/messages#aimessagechunk)**: A partial response from an AI message. Used when streaming responses from a chat model.
+- **[AIMessage](https://python.langchain.com/docs/concepts/messages#aimessage)**: Represents a complete response from an AI model.
+- **[astream_events](https://python.langchain.com/docs/concepts/chat_models#key-methods)**: Stream granular information from [LCEL](https://python.langchain.com/docs/concepts/lcel) chains.
+- **[BaseTool](https://python.langchain.com/docs/concepts/tools/#tool-interface)**: The base class for all tools in LangChain.
+- **[batch](https://python.langchain.com/docs/concepts/runnables)**: Use to execute a runnable with batch inputs.
+- **[bind_tools](https://python.langchain.com/docs/concepts/tool_calling/#tool-binding)**: Allows models to interact with tools.
+- **[Caching](https://python.langchain.com/docs/concepts/chat_models#caching)**: Storing results to avoid redundant calls to a chat model.
+- **[Chat models](https://python.langchain.com/docs/concepts/multimodality/#multimodality-in-chat-models)**: Chat models that handle multiple data modalities.
+- **[Configurable runnables](https://python.langchain.com/docs/concepts/runnables/#configurable-runnables)**: Creating configurable Runnables.
+- **[Context window](https://python.langchain.com/docs/concepts/chat_models#context-window)**: The maximum size of input a chat model can process.
+- **[Conversation patterns](https://python.langchain.com/docs/concepts/chat_history#conversation-patterns)**: Common patterns in chat interactions.
+- **[Document](https://python.langchain.com/api_reference/core/documents/langchain_core.documents.base.Document.html)**: LangChain's representation of a document.
+- **[Embedding models](https://python.langchain.com/docs/concepts/multimodality/#multimodality-in-embedding-models)**: Models that generate vector embeddings for various data types.
+- **[HumanMessage](https://python.langchain.com/docs/concepts/messages#humanmessage)**: Represents a message from a human user.
+- **[InjectedState](https://python.langchain.com/docs/concepts/tools#injectedstate)**: A state injected into a tool function.
+- **[InjectedStore](https://python.langchain.com/docs/concepts/tools#injectedstore)**: A store that can be injected into a tool for data persistence.
+- **[InjectedToolArg](https://python.langchain.com/docs/concepts/tools#injectedtoolarg)**: Mechanism to inject arguments into tool functions.
+- **[input and output types](https://python.langchain.com/docs/concepts/runnables#input-and-output-types)**: Types used for input and output in Runnables.
+- **[Integration packages](https://python.langchain.com/docs/concepts/architecture/#integration-packages)**: Third-party packages that integrate with LangChain.
+- **[Integration tests](https://python.langchain.com/docs/concepts/testing#integration-tests)**: Tests that verify the correctness of the interaction between components, usually run with access to the underlying API that powers an integration.
+- **[invoke](https://python.langchain.com/docs/concepts/runnables)**: A standard method to invoke a Runnable.
+- **[JSON mode](https://python.langchain.com/docs/concepts/structured_outputs#json-mode)**: Returning responses in JSON format.
+- **[langchain-community](https://python.langchain.com/docs/concepts/architecture#langchain-community)**: Community-driven components for LangChain.
+- **[langchain-core](https://python.langchain.com/docs/concepts/architecture#langchain-core)**: Core langchain package. Includes base interfaces and in-memory implementations.
+- **[langchain](https://python.langchain.com/docs/concepts/architecture#langchain)**: A package for higher level components (e.g., some pre-built chains).
+- **[langgraph](https://python.langchain.com/docs/concepts/architecture#langgraph)**: Powerful orchestration layer for LangChain. Use to build complex pipelines and workflows.
+- **[Managing chat history](https://python.langchain.com/docs/concepts/chat_history#managing-chat-history)**: Techniques to maintain and manage the chat history.
+- **[OpenAI format](https://python.langchain.com/docs/concepts/messages#openai-format)**: OpenAI's message format for chat models.
+- **[Propagation of RunnableConfig](https://python.langchain.com/docs/concepts/runnables/#propagation-of-runnableconfig)**: Propagating configuration through Runnables. Read if working with python 3.9, 3.10 and async.
+- **[rate-limiting](https://python.langchain.com/docs/concepts/chat_models#rate-limiting)**: Client side rate limiting for chat models.
+- **[RemoveMessage](https://python.langchain.com/docs/concepts/messages/#removemessage)**: An abstraction used to remove a message from chat history, used primarily in LangGraph.
+- **[role](https://python.langchain.com/docs/concepts/messages#role)**: Represents the role (e.g., user, assistant) of a chat message.
+- **[RunnableConfig](https://python.langchain.com/docs/concepts/runnables/#runnableconfig)**: Use to pass run time information to Runnables (e.g., `run_name`, `run_id`, `tags`, `metadata`, `max_concurrency`, `recursion_limit`, `configurable`).
+- **[Standard parameters for chat models](https://python.langchain.com/docs/concepts/chat_models#standard-parameters)**: Parameters such as API key, `temperature`, and `max_tokens`.
+- **[Standard tests](https://python.langchain.com/docs/concepts/testing#standard-tests)**: A defined set of unit and integration tests that all integrations must pass.
+- **[stream](https://python.langchain.com/docs/concepts/streaming)**: Use to stream output from a Runnable or a graph.
+- **[Tokenization](https://python.langchain.com/docs/concepts/tokens)**: The process of converting data into tokens and vice versa.
+- **[Tokens](https://python.langchain.com/docs/concepts/tokens)**: The basic unit that a language model reads, processes, and generates under the hood.
+- **[Tool artifacts](https://python.langchain.com/docs/concepts/tools#tool-artifacts)**: Add artifacts to the output of a tool that will not be sent to the model, but will be available for downstream processing.
+- **[Tool binding](https://python.langchain.com/docs/concepts/tool_calling#tool-binding)**: Binding tools to models.
+- **[@tool](https://python.langchain.com/docs/concepts/tools/#create-tools-using-the-tool-decorator)**: Decorator for creating tools in LangChain.
+- **[Toolkits](https://python.langchain.com/docs/concepts/tools#toolkits)**: A collection of tools that can be used together.
+- **[ToolMessage](https://python.langchain.com/docs/concepts/messages#toolmessage)**: Represents a message that contains the results of a tool execution.
+- **[Unit tests](https://python.langchain.com/docs/concepts/testing#unit-tests)**: Tests that verify the correctness of individual components, run in isolation without access to the Internet.
+- **[Vector stores](https://python.langchain.com/docs/concepts/vectorstores)**: Datastores specialized for storing and efficiently searching vector embeddings.
+- **[with_structured_output](https://python.langchain.com/docs/concepts/structured_outputs/#structured-output-method)**: A helper method for chat models that natively support [tool calling](https://python.langchain.com/docs/concepts/tool_calling) to get structured output matching a given schema specified via Pydantic, JSON schema or a function.
+- **[with_types](https://python.langchain.com/docs/concepts/runnables#with_types)**: Method to overwrite the input and output types of a runnable. Useful when working with complex LCEL chains and deploying with LangServe.
\ No newline at end of file

From 75823d580b0f6d7222b979cdcf29eaf1d97a7158 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?David=20S=C3=A1nchez=20S=C3=A1nchez?= <davidsansan@usal.es>
Date: Thu, 27 Mar 2025 03:28:08 +0100
Subject: [PATCH 16/30] community: fix perplexity response parameters not being
 included in model response (#30440)

This pull request includes enhancements to the `perplexity.py` file in
the `chat_models` module, focusing on improving the handling of
additional keyword arguments (`additional_kwargs`) in message processing
methods. Additionally, new unit tests have been added to ensure the
correct inclusion of citations, images, and related questions in the
`additional_kwargs`.

Issue: resolves https://github.com/langchain-ai/langchain/issues/30439

Enhancements to `perplexity.py`:

*
[`libs/community/langchain_community/chat_models/perplexity.py`](diffhunk://#diff-d3e4d7b277608683913b53dcfdbd006f0f4a94d110d8b9ac7acf855f1f22207fL208-L212):
Modified the `_convert_delta_to_message_chunk`, `_stream`, and
`_generate` methods to handle `additional_kwargs`, which include
citations, images, and related questions.
[[1]](diffhunk://#diff-d3e4d7b277608683913b53dcfdbd006f0f4a94d110d8b9ac7acf855f1f22207fL208-L212)
[[2]](diffhunk://#diff-d3e4d7b277608683913b53dcfdbd006f0f4a94d110d8b9ac7acf855f1f22207fL277-L286)
[[3]](diffhunk://#diff-d3e4d7b277608683913b53dcfdbd006f0f4a94d110d8b9ac7acf855f1f22207fR324-R331)

New unit tests:

*
[`libs/community/tests/unit_tests/chat_models/test_perplexity.py`](diffhunk://#diff-dab956d79bd7d17a0f5dea3f38ceab0d583b43b63eb1b29138ee9b6b271ba1d9R119-R275):
Added new tests `test_perplexity_stream_includes_citations_and_images`
and `test_perplexity_stream_includes_citations_and_related_questions` to
verify that the `stream` method correctly includes citations, images,
and related questions in the `additional_kwargs`.
---
 .../chat_models/perplexity.py                 |  20 ++-
 .../unit_tests/chat_models/test_perplexity.py | 157 ++++++++++++++++++
 libs/community/uv.lock                        |   6 +-
 3 files changed, 178 insertions(+), 5 deletions(-)

diff --git a/libs/community/langchain_community/chat_models/perplexity.py b/libs/community/langchain_community/chat_models/perplexity.py
index 3ddba312eab..fd9b735f4f2 100644
--- a/libs/community/langchain_community/chat_models/perplexity.py
+++ b/libs/community/langchain_community/chat_models/perplexity.py
@@ -345,16 +345,25 @@ class ChatPerplexity(BaseChatModel):
             if len(chunk["choices"]) == 0:
                 continue
             choice = chunk["choices"][0]
-            citations = chunk.get("citations", [])
+
+            additional_kwargs = {}
+            if first_chunk:
+                additional_kwargs["citations"] = chunk.get("citations", [])
+                for attr in ["images", "related_questions"]:
+                    if attr in chunk:
+                        additional_kwargs[attr] = chunk[attr]
 
             chunk = self._convert_delta_to_message_chunk(
                 choice["delta"], default_chunk_class
             )
+
             if isinstance(chunk, AIMessageChunk) and usage_metadata:
                 chunk.usage_metadata = usage_metadata
+
             if first_chunk:
-                chunk.additional_kwargs |= {"citations": citations}
+                chunk.additional_kwargs |= additional_kwargs
                 first_chunk = False
+
             finish_reason = choice.get("finish_reason")
             generation_info = (
                 dict(finish_reason=finish_reason) if finish_reason is not None else None
@@ -386,9 +395,14 @@ class ChatPerplexity(BaseChatModel):
         else:
             usage_metadata = None
 
+        additional_kwargs = {"citations": response.citations}
+        for attr in ["images", "related_questions"]:
+            if hasattr(response, attr):
+                additional_kwargs[attr] = getattr(response, attr)
+
         message = AIMessage(
             content=response.choices[0].message.content,
-            additional_kwargs={"citations": response.citations},
+            additional_kwargs=additional_kwargs,
             usage_metadata=usage_metadata,
         )
         return ChatResult(generations=[ChatGeneration(message=message)])
diff --git a/libs/community/tests/unit_tests/chat_models/test_perplexity.py b/libs/community/tests/unit_tests/chat_models/test_perplexity.py
index c5a745cbcea..45cc4ec7cdf 100644
--- a/libs/community/tests/unit_tests/chat_models/test_perplexity.py
+++ b/libs/community/tests/unit_tests/chat_models/test_perplexity.py
@@ -116,3 +116,160 @@ def test_perplexity_stream_includes_citations(mocker: MockerFixture) -> None:
     assert full.additional_kwargs == {"citations": ["example.com", "example2.com"]}
 
     patcher.assert_called_once()
+
+
+@pytest.mark.requires("openai")
+def test_perplexity_stream_includes_citations_and_images(mocker: MockerFixture) -> None:
+    """Test that the stream method includes citations in the additional_kwargs."""
+    llm = ChatPerplexity(
+        model="test",
+        timeout=30,
+        verbose=True,
+    )
+    mock_chunk_0 = {
+        "choices": [
+            {
+                "delta": {
+                    "content": "Hello ",
+                },
+                "finish_reason": None,
+            }
+        ],
+        "citations": ["example.com", "example2.com"],
+        "images": [
+            {
+                "image_url": "mock_image_url",
+                "origin_url": "mock_origin_url",
+                "height": 100,
+                "width": 100,
+            }
+        ],
+    }
+    mock_chunk_1 = {
+        "choices": [
+            {
+                "delta": {
+                    "content": "Perplexity",
+                },
+                "finish_reason": None,
+            }
+        ],
+        "citations": ["example.com", "example2.com"],
+        "images": [
+            {
+                "image_url": "mock_image_url",
+                "origin_url": "mock_origin_url",
+                "height": 100,
+                "width": 100,
+            }
+        ],
+    }
+    mock_chunks: List[Dict[str, Any]] = [mock_chunk_0, mock_chunk_1]
+    mock_stream = MagicMock()
+    mock_stream.__iter__.return_value = mock_chunks
+    patcher = mocker.patch.object(
+        llm.client.chat.completions, "create", return_value=mock_stream
+    )
+    stream = llm.stream("Hello langchain")
+    full: Optional[BaseMessageChunk] = None
+    for i, chunk in enumerate(stream):
+        full = chunk if full is None else full + chunk
+        assert chunk.content == mock_chunks[i]["choices"][0]["delta"]["content"]
+        if i == 0:
+            assert chunk.additional_kwargs["citations"] == [
+                "example.com",
+                "example2.com",
+            ]
+            assert chunk.additional_kwargs["images"] == [
+                {
+                    "image_url": "mock_image_url",
+                    "origin_url": "mock_origin_url",
+                    "height": 100,
+                    "width": 100,
+                }
+            ]
+        else:
+            assert "citations" not in chunk.additional_kwargs
+            assert "images" not in chunk.additional_kwargs
+    assert isinstance(full, AIMessageChunk)
+    assert full.content == "Hello Perplexity"
+    assert full.additional_kwargs == {
+        "citations": ["example.com", "example2.com"],
+        "images": [
+            {
+                "image_url": "mock_image_url",
+                "origin_url": "mock_origin_url",
+                "height": 100,
+                "width": 100,
+            }
+        ],
+    }
+
+    patcher.assert_called_once()
+
+
+@pytest.mark.requires("openai")
+def test_perplexity_stream_includes_citations_and_related_questions(
+    mocker: MockerFixture,
+) -> None:
+    """Test that the stream method includes citations in the additional_kwargs."""
+    llm = ChatPerplexity(
+        model="test",
+        timeout=30,
+        verbose=True,
+    )
+    mock_chunk_0 = {
+        "choices": [
+            {
+                "delta": {
+                    "content": "Hello ",
+                },
+                "finish_reason": None,
+            }
+        ],
+        "citations": ["example.com", "example2.com"],
+        "related_questions": ["example_question_1", "example_question_2"],
+    }
+    mock_chunk_1 = {
+        "choices": [
+            {
+                "delta": {
+                    "content": "Perplexity",
+                },
+                "finish_reason": None,
+            }
+        ],
+        "citations": ["example.com", "example2.com"],
+        "related_questions": ["example_question_1", "example_question_2"],
+    }
+    mock_chunks: List[Dict[str, Any]] = [mock_chunk_0, mock_chunk_1]
+    mock_stream = MagicMock()
+    mock_stream.__iter__.return_value = mock_chunks
+    patcher = mocker.patch.object(
+        llm.client.chat.completions, "create", return_value=mock_stream
+    )
+    stream = llm.stream("Hello langchain")
+    full: Optional[BaseMessageChunk] = None
+    for i, chunk in enumerate(stream):
+        full = chunk if full is None else full + chunk
+        assert chunk.content == mock_chunks[i]["choices"][0]["delta"]["content"]
+        if i == 0:
+            assert chunk.additional_kwargs["citations"] == [
+                "example.com",
+                "example2.com",
+            ]
+            assert chunk.additional_kwargs["related_questions"] == [
+                "example_question_1",
+                "example_question_2",
+            ]
+        else:
+            assert "citations" not in chunk.additional_kwargs
+            assert "related_questions" not in chunk.additional_kwargs
+    assert isinstance(full, AIMessageChunk)
+    assert full.content == "Hello Perplexity"
+    assert full.additional_kwargs == {
+        "citations": ["example.com", "example2.com"],
+        "related_questions": ["example_question_1", "example_question_2"],
+    }
+
+    patcher.assert_called_once()
diff --git a/libs/community/uv.lock b/libs/community/uv.lock
index f5ce918fd7a..91ccbf4f442 100644
--- a/libs/community/uv.lock
+++ b/libs/community/uv.lock
@@ -1,4 +1,5 @@
 version = 1
+revision = 1
 requires-python = ">=3.9, <4.0"
 resolution-markers = [
     "python_full_version >= '3.12.4' and platform_python_implementation == 'PyPy'",
@@ -1531,6 +1532,7 @@ requires-dist = [
     { name = "requests", specifier = ">=2,<3" },
     { name = "sqlalchemy", specifier = ">=1.4,<3" },
 ]
+provides-extras = ["community", "anthropic", "openai", "azure-ai", "cohere", "google-vertexai", "google-genai", "fireworks", "ollama", "together", "mistralai", "huggingface", "groq", "aws", "deepseek", "xai"]
 
 [package.metadata.requires-dev]
 codespell = [{ name = "codespell", specifier = ">=2.2.0,<3.0.0" }]
@@ -1745,7 +1747,7 @@ typing = [
 
 [[package]]
 name = "langchain-core"
-version = "0.3.45"
+version = "0.3.47"
 source = { editable = "../core" }
 dependencies = [
     { name = "jsonpatch" },
@@ -1803,7 +1805,7 @@ typing = [
 
 [[package]]
 name = "langchain-tests"
-version = "0.3.14"
+version = "0.3.15"
 source = { editable = "../standard-tests" }
 dependencies = [
     { name = "httpx" },

From b28a474e7902b3486846d95e1ccdeb28240667dd Mon Sep 17 00:00:00 2001
From: Christophe Bornet <cbornet@hotmail.com>
Date: Thu, 27 Mar 2025 11:26:12 +0100
Subject: [PATCH 17/30] core[patch]: Add ruff rules for PLW (Pylint Warnings)
 (#29288)

See https://docs.astral.sh/ruff/rules/#warning-w_1

---------

Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com>
---
 libs/core/langchain_core/messages/utils.py    |  2 +-
 .../langchain_core/output_parsers/json.py     | 10 ++++++--
 .../langchain_core/output_parsers/list.py     | 14 ++++++-----
 .../core/langchain_core/tracers/evaluation.py |  2 --
 libs/core/langchain_core/tracers/langchain.py |  3 +--
 libs/core/langchain_core/utils/_merge.py      |  9 +++++---
 libs/core/langchain_core/utils/json.py        |  7 ++++--
 libs/core/langchain_core/utils/mustache.py    | 23 +++++++++----------
 libs/core/pyproject.toml                      |  3 ++-
 libs/core/tests/unit_tests/test_imports.py    |  2 +-
 10 files changed, 43 insertions(+), 32 deletions(-)

diff --git a/libs/core/langchain_core/messages/utils.py b/libs/core/langchain_core/messages/utils.py
index 5331b1040f5..5fc86222b24 100644
--- a/libs/core/langchain_core/messages/utils.py
+++ b/libs/core/langchain_core/messages/utils.py
@@ -504,7 +504,7 @@ def filter_messages(
                         )
                     ]
 
-                msg = msg.model_copy(
+                msg = msg.model_copy(  # noqa: PLW2901
                     update={"tool_calls": tool_calls, "content": content}
                 )
             elif (
diff --git a/libs/core/langchain_core/output_parsers/json.py b/libs/core/langchain_core/output_parsers/json.py
index 18c1257a9ad..642387e7a12 100644
--- a/libs/core/langchain_core/output_parsers/json.py
+++ b/libs/core/langchain_core/output_parsers/json.py
@@ -125,5 +125,11 @@ class JsonOutputParser(BaseCumulativeTransformOutputParser[Any]):
 
 # For backwards compatibility
 SimpleJsonOutputParser = JsonOutputParser
-parse_partial_json = parse_partial_json
-parse_and_check_json_markdown = parse_and_check_json_markdown
+
+
+__all__ = [
+    "JsonOutputParser",
+    "SimpleJsonOutputParser",  # For backwards compatibility
+    "parse_partial_json",  # For backwards compatibility
+    "parse_and_check_json_markdown",  # For backwards compatibility
+]
diff --git a/libs/core/langchain_core/output_parsers/list.py b/libs/core/langchain_core/output_parsers/list.py
index 6977079f5ae..8461d08f7e4 100644
--- a/libs/core/langchain_core/output_parsers/list.py
+++ b/libs/core/langchain_core/output_parsers/list.py
@@ -73,9 +73,10 @@ class ListOutputParser(BaseTransformOutputParser[list[str]]):
                 chunk_content = chunk.content
                 if not isinstance(chunk_content, str):
                     continue
-                chunk = chunk_content
-            # add current chunk to buffer
-            buffer += chunk
+                buffer += chunk_content
+            else:
+                # add current chunk to buffer
+                buffer += chunk
             # parse buffer into a list of parts
             try:
                 done_idx = 0
@@ -105,9 +106,10 @@ class ListOutputParser(BaseTransformOutputParser[list[str]]):
                 chunk_content = chunk.content
                 if not isinstance(chunk_content, str):
                     continue
-                chunk = chunk_content
-            # add current chunk to buffer
-            buffer += chunk
+                buffer += chunk_content
+            else:
+                # add current chunk to buffer
+                buffer += chunk
             # parse buffer into a list of parts
             try:
                 done_idx = 0
diff --git a/libs/core/langchain_core/tracers/evaluation.py b/libs/core/langchain_core/tracers/evaluation.py
index 0a918cd2e9d..057b8f4565f 100644
--- a/libs/core/langchain_core/tracers/evaluation.py
+++ b/libs/core/langchain_core/tracers/evaluation.py
@@ -29,7 +29,6 @@ _TRACERS: weakref.WeakSet[EvaluatorCallbackHandler] = weakref.WeakSet()
 
 def wait_for_all_evaluators() -> None:
     """Wait for all tracers to finish."""
-    global _TRACERS
     for tracer in list(_TRACERS):
         if tracer is not None:
             tracer.wait_for_futures()
@@ -100,7 +99,6 @@ class EvaluatorCallbackHandler(BaseTracer):
         self.project_name = project_name
         self.logged_eval_results: dict[tuple[str, str], list[EvaluationResult]] = {}
         self.lock = threading.Lock()
-        global _TRACERS
         _TRACERS.add(self)
 
     def _evaluate_in_project(self, run: Run, evaluator: langsmith.RunEvaluator) -> None:
diff --git a/libs/core/langchain_core/tracers/langchain.py b/libs/core/langchain_core/tracers/langchain.py
index 9fdc76d9436..e489ab283e4 100644
--- a/libs/core/langchain_core/tracers/langchain.py
+++ b/libs/core/langchain_core/tracers/langchain.py
@@ -41,7 +41,6 @@ def log_error_once(method: str, exception: Exception) -> None:
         method: The method that raised the exception.
         exception: The exception that was raised.
     """
-    global _LOGGED
     if (method, type(exception)) in _LOGGED:
         return
     _LOGGED.add((method, type(exception)))
@@ -61,7 +60,7 @@ def get_client() -> Client:
 
 def _get_executor() -> ThreadPoolExecutor:
     """Get the executor."""
-    global _EXECUTOR
+    global _EXECUTOR  # noqa: PLW0603
     if _EXECUTOR is None:
         _EXECUTOR = ThreadPoolExecutor()
     return _EXECUTOR
diff --git a/libs/core/langchain_core/utils/_merge.py b/libs/core/langchain_core/utils/_merge.py
index be8f8293874..9e86777eff2 100644
--- a/libs/core/langchain_core/utils/_merge.py
+++ b/libs/core/langchain_core/utils/_merge.py
@@ -96,9 +96,12 @@ def merge_lists(left: Optional[list], *others: Optional[list]) -> Optional[list]
                     if to_merge:
                         # TODO: Remove this once merge_dict is updated with special
                         # handling for 'type'.
-                        if "type" in e:
-                            e = {k: v for k, v in e.items() if k != "type"}
-                        merged[to_merge[0]] = merge_dicts(merged[to_merge[0]], e)
+                        new_e = (
+                            {k: v for k, v in e.items() if k != "type"}
+                            if "type" in e
+                            else e
+                        )
+                        merged[to_merge[0]] = merge_dicts(merged[to_merge[0]], new_e)
                     else:
                         merged.append(e)
                 else:
diff --git a/libs/core/langchain_core/utils/json.py b/libs/core/langchain_core/utils/json.py
index 472ef94b7a3..28154c62b9f 100644
--- a/libs/core/langchain_core/utils/json.py
+++ b/libs/core/langchain_core/utils/json.py
@@ -64,11 +64,14 @@ def parse_partial_json(s: str, *, strict: bool = False) -> Any:
 
     # Process each character in the string one at a time.
     for char in s:
+        new_char = char
         if is_inside_string:
             if char == '"' and not escaped:
                 is_inside_string = False
             elif char == "\n" and not escaped:
-                char = "\\n"  # Replace the newline character with the escape sequence.
+                new_char = (
+                    "\\n"  # Replace the newline character with the escape sequence.
+                )
             elif char == "\\":
                 escaped = not escaped
             else:
@@ -89,7 +92,7 @@ def parse_partial_json(s: str, *, strict: bool = False) -> Any:
                     return None
 
         # Append the processed character to the new string.
-        new_chars.append(char)
+        new_chars.append(new_char)
 
     # If we're still inside a string at the end of processing,
     # we need to close the string.
diff --git a/libs/core/langchain_core/utils/mustache.py b/libs/core/langchain_core/utils/mustache.py
index 4c42c47fed9..3a95e06bb08 100644
--- a/libs/core/langchain_core/utils/mustache.py
+++ b/libs/core/langchain_core/utils/mustache.py
@@ -125,8 +125,6 @@ def parse_tag(template: str, l_del: str, r_del: str) -> tuple[tuple[str, str], s
         ChevronError: If the tag is unclosed.
         ChevronError: If the set delimiter tag is unclosed.
     """
-    global _CURRENT_LINE, _LAST_TAG_LINE
-
     tag_types = {
         "!": "comment",
         "#": "section",
@@ -352,32 +350,33 @@ def _get_key(
             if scope in (0, False):
                 return scope
 
+            resolved_scope = scope
             # For every dot separated key
             for child in key.split("."):
                 # Return an empty string if falsy, with two exceptions
                 # 0 should return 0, and False should return False
-                if scope in (0, False):
-                    return scope
+                if resolved_scope in (0, False):
+                    return resolved_scope
                 # Move into the scope
                 try:
                     # Try subscripting (Normal dictionaries)
-                    scope = cast(dict[str, Any], scope)[child]
+                    resolved_scope = cast(dict[str, Any], resolved_scope)[child]
                 except (TypeError, AttributeError):
                     try:
-                        scope = getattr(scope, child)
+                        resolved_scope = getattr(resolved_scope, child)
                     except (TypeError, AttributeError):
                         # Try as a list
-                        scope = scope[int(child)]  # type: ignore
+                        resolved_scope = resolved_scope[int(child)]  # type: ignore
 
             try:
                 # This allows for custom falsy data types
                 # https://github.com/noahmorrison/chevron/issues/35
-                if scope._CHEVRON_return_scope_when_falsy:  # type: ignore
-                    return scope
+                if resolved_scope._CHEVRON_return_scope_when_falsy:  # type: ignore
+                    return resolved_scope
             except AttributeError:
-                if scope in (0, False):
-                    return scope
-                return scope or ""
+                if resolved_scope in (0, False):
+                    return resolved_scope
+                return resolved_scope or ""
         except (AttributeError, KeyError, IndexError, ValueError):
             # We couldn't find the key in the current scope
             # We'll try again on the next pass
diff --git a/libs/core/pyproject.toml b/libs/core/pyproject.toml
index 5e0a3e9ef26..f7350e0e165 100644
--- a/libs/core/pyproject.toml
+++ b/libs/core/pyproject.toml
@@ -77,7 +77,7 @@ target-version = "py39"
 
 
 [tool.ruff.lint]
-select = [ "ANN", "ASYNC", "B", "C4", "COM", "DJ", "E", "EM", "EXE", "F", "FLY", "FURB", "I", "ICN", "INT", "LOG", "N", "NPY", "PD", "PIE", "PTH", "Q", "RSE", "S", "SIM", "SLOT", "T10", "T201", "TC", "TID", "TRY", "UP", "W", "YTT",]
+select = [ "ANN", "ASYNC", "B", "C4", "COM", "DJ", "E", "EM", "EXE", "F", "FLY", "FURB", "I", "ICN", "INT", "LOG", "N", "NPY", "PD", "PIE", "PLW", "PTH", "Q", "RSE", "S", "SIM", "SLOT", "T10", "T201", "TC", "TID", "TRY", "UP", "W", "YTT",]
 ignore = [ "ANN401", "COM812", "UP007", "S110", "S112", "TC001", "TC002", "TC003"]
 flake8-type-checking.runtime-evaluated-base-classes = ["pydantic.BaseModel","langchain_core.load.serializable.Serializable","langchain_core.runnables.base.RunnableSerializable"]
 flake8-annotations.allow-star-arg-any = true
@@ -96,6 +96,7 @@ filterwarnings = [ "ignore::langchain_core._api.beta_decorator.LangChainBetaWarn
 classmethod-decorators = [ "classmethod", "langchain_core.utils.pydantic.pre_init", "pydantic.field_validator", "pydantic.v1.root_validator",]
 
 [tool.ruff.lint.per-file-ignores]
+"langchain_core/utils/mustache.py" = [ "PLW0603",]
 "tests/unit_tests/prompts/test_chat.py" = [ "E501",]
 "tests/unit_tests/runnables/test_runnable.py" = [ "E501",]
 "tests/unit_tests/runnables/test_graph.py" = [ "E501",]
diff --git a/libs/core/tests/unit_tests/test_imports.py b/libs/core/tests/unit_tests/test_imports.py
index 64f93aa606c..30e320ee68e 100644
--- a/libs/core/tests/unit_tests/test_imports.py
+++ b/libs/core/tests/unit_tests/test_imports.py
@@ -22,7 +22,7 @@ def try_to_import(module_name: str) -> tuple[int, str]:
         getattr(module, cls_)
 
     result = subprocess.run(
-        ["python", "-c", f"import langchain_core.{module_name}"],
+        ["python", "-c", f"import langchain_core.{module_name}"], check=True
     )
     return result.returncode, module_name
 

From 956b09f4687c1a4dcbbb483798cf35991960c371 Mon Sep 17 00:00:00 2001
From: Keiichi Hirobe <chalenge.akane@gmail.com>
Date: Fri, 28 Mar 2025 00:04:34 +0900
Subject: [PATCH 18/30] core[patch]: stop deleting records with "scoped_full"
 when doc is empty (#30520)

Fix a bug that causes `scoped_full` in index to delete records when there are no input docs.
---
 libs/core/langchain_core/indexing/api.py      |   8 +-
 .../unit_tests/indexing/test_indexing.py      | 152 ++++++++++++++++++
 2 files changed, 158 insertions(+), 2 deletions(-)

diff --git a/libs/core/langchain_core/indexing/api.py b/libs/core/langchain_core/indexing/api.py
index 11343d17f71..4dd21de4f44 100644
--- a/libs/core/langchain_core/indexing/api.py
+++ b/libs/core/langchain_core/indexing/api.py
@@ -473,7 +473,9 @@ def index(
                 record_manager.delete_keys(uids_to_delete)
                 num_deleted += len(uids_to_delete)
 
-    if cleanup == "full" or cleanup == "scoped_full":
+    if cleanup == "full" or (
+        cleanup == "scoped_full" and scoped_full_cleanup_source_ids
+    ):
         delete_group_ids: Optional[Sequence[str]] = None
         if cleanup == "scoped_full":
             delete_group_ids = list(scoped_full_cleanup_source_ids)
@@ -786,7 +788,9 @@ async def aindex(
                 await record_manager.adelete_keys(uids_to_delete)
                 num_deleted += len(uids_to_delete)
 
-    if cleanup == "full" or cleanup == "scoped_full":
+    if cleanup == "full" or (
+        cleanup == "scoped_full" and scoped_full_cleanup_source_ids
+    ):
         delete_group_ids: Optional[Sequence[str]] = None
         if cleanup == "scoped_full":
             delete_group_ids = list(scoped_full_cleanup_source_ids)
diff --git a/libs/core/tests/unit_tests/indexing/test_indexing.py b/libs/core/tests/unit_tests/indexing/test_indexing.py
index 52cf3265e29..8d800c83b44 100644
--- a/libs/core/tests/unit_tests/indexing/test_indexing.py
+++ b/libs/core/tests/unit_tests/indexing/test_indexing.py
@@ -822,6 +822,158 @@ async def test_ascoped_full_fails_with_bad_source_ids(
         )
 
 
+def test_index_empty_doc_scoped_full(
+    record_manager: InMemoryRecordManager, vector_store: InMemoryVectorStore
+) -> None:
+    """Test Indexing with scoped_full strategy"""
+    loader = ToyLoader(
+        documents=[
+            Document(
+                page_content="This is a test document.",
+                metadata={"source": "1"},
+            ),
+            Document(
+                page_content="This is another document.",
+                metadata={"source": "1"},
+            ),
+            Document(
+                page_content="This is yet another document.",
+                metadata={"source": "1"},
+            ),
+            Document(
+                page_content="This is a test document from another source.",
+                metadata={"source": "2"},
+            ),
+        ]
+    )
+
+    with patch.object(
+        record_manager, "get_time", return_value=datetime(2021, 1, 1).timestamp()
+    ):
+        assert index(
+            loader,
+            record_manager,
+            vector_store,
+            cleanup="scoped_full",
+            source_id_key="source",
+        ) == {
+            "num_added": 4,
+            "num_deleted": 0,
+            "num_skipped": 0,
+            "num_updated": 0,
+        }
+
+    with patch.object(
+        record_manager, "get_time", return_value=datetime(2021, 1, 2).timestamp()
+    ):
+        assert index(
+            loader,
+            record_manager,
+            vector_store,
+            cleanup="scoped_full",
+            source_id_key="source",
+        ) == {
+            "num_added": 0,
+            "num_deleted": 0,
+            "num_skipped": 4,
+            "num_updated": 0,
+        }
+
+    loader = ToyLoader(documents=[])
+
+    with patch.object(
+        record_manager, "get_time", return_value=datetime(2021, 1, 3).timestamp()
+    ):
+        assert index(
+            loader,
+            record_manager,
+            vector_store,
+            cleanup="scoped_full",
+            source_id_key="source",
+        ) == {
+            "num_added": 0,
+            "num_deleted": 0,
+            "num_skipped": 0,
+            "num_updated": 0,
+        }
+
+
+async def test_aindex_empty_doc_scoped_full(
+    arecord_manager: InMemoryRecordManager, vector_store: InMemoryVectorStore
+) -> None:
+    """Test Indexing with scoped_full strategy."""
+    loader = ToyLoader(
+        documents=[
+            Document(
+                page_content="This is a test document.",
+                metadata={"source": "1"},
+            ),
+            Document(
+                page_content="This is another document.",
+                metadata={"source": "1"},
+            ),
+            Document(
+                page_content="This is yet another document.",
+                metadata={"source": "1"},
+            ),
+            Document(
+                page_content="This is a test document from another source.",
+                metadata={"source": "2"},
+            ),
+        ]
+    )
+
+    with patch.object(
+        arecord_manager, "get_time", return_value=datetime(2021, 1, 1).timestamp()
+    ):
+        assert await aindex(
+            loader,
+            arecord_manager,
+            vector_store,
+            cleanup="scoped_full",
+            source_id_key="source",
+        ) == {
+            "num_added": 4,
+            "num_deleted": 0,
+            "num_skipped": 0,
+            "num_updated": 0,
+        }
+
+    with patch.object(
+        arecord_manager, "get_time", return_value=datetime(2021, 1, 2).timestamp()
+    ):
+        assert await aindex(
+            loader,
+            arecord_manager,
+            vector_store,
+            cleanup="scoped_full",
+            source_id_key="source",
+        ) == {
+            "num_added": 0,
+            "num_deleted": 0,
+            "num_skipped": 4,
+            "num_updated": 0,
+        }
+
+    loader = ToyLoader(documents=[])
+
+    with patch.object(
+        arecord_manager, "get_time", return_value=datetime(2021, 1, 3).timestamp()
+    ):
+        assert await aindex(
+            loader,
+            arecord_manager,
+            vector_store,
+            cleanup="scoped_full",
+            source_id_key="source",
+        ) == {
+            "num_added": 0,
+            "num_deleted": 0,
+            "num_skipped": 0,
+            "num_updated": 0,
+        }
+
+
 def test_no_delete(
     record_manager: InMemoryRecordManager, vector_store: InMemoryVectorStore
 ) -> None:

From 80064893c1335bae23c8944a910018bf7ba27224 Mon Sep 17 00:00:00 2001
From: ccurme <chester.curme@gmail.com>
Date: Thu, 27 Mar 2025 11:07:19 -0400
Subject: [PATCH 19/30] docs: restore some content to Elasticsearch integration
 page (#30522)

https://github.com/langchain-ai/langchain/pull/24858 standardized vector
store integration pages, but deleted some content.

Here we merge some of the old content back in. We use this version as a
reference:
https://github.com/langchain-ai/langchain/blob/2c798622cd1508848a5f45546a839147745be511/docs/docs/integrations/vectorstores/elasticsearch.ipynb
---
 .../vectorstores/elasticsearch.ipynb          | 547 ++++++++++++++++++
 1 file changed, 547 insertions(+)

diff --git a/docs/docs/integrations/vectorstores/elasticsearch.ipynb b/docs/docs/integrations/vectorstores/elasticsearch.ipynb
index 1a26b1c600f..92742d4f402 100644
--- a/docs/docs/integrations/vectorstores/elasticsearch.ipynb
+++ b/docs/docs/integrations/vectorstores/elasticsearch.ipynb
@@ -391,6 +391,147 @@
     "    print(f\"* {res.page_content} [{res.metadata}]\")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "3f1d79c6",
+   "metadata": {},
+   "source": [
+    "#### Metadata filtering\n",
+    "\n",
+    "`ElasticsearchStore` supports metadata to stored along with the document. This metadata dict object is stored in a metadata object field in the Elasticsearch document. Based on the metadata value, Elasticsearch will automatically setup the mapping by infering the data type of the metadata value. For example, if the metadata value is a string, Elasticsearch will setup the mapping for the metadata object field as a string type.\n",
+    "\n",
+    "You can filter by exact keyword, as above:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e8cc5db5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "results = vector_store.similarity_search(\n",
+    "    query=\"LangChain provides abstractions to make working with LLMs easy\",\n",
+    "    k=2,\n",
+    "    filter=[{\"term\": {\"metadata.source.keyword\": \"tweet\"}}],\n",
+    ")\n",
+    "for res in results:\n",
+    "    print(f\"* {res.page_content} [{res.metadata}]\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a2f03ab8",
+   "metadata": {},
+   "source": [
+    "By partial match:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b371da9f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "results = vector_store.similarity_search(\n",
+    "    query=\"LangChain provides abstractions to make working with LLMs easy\",\n",
+    "    k=2,\n",
+    "    filter=[{\"match\": {\"metadata.source\": {\"query\": \"tweet\", \"fuzziness\": \"AUTO\"}}}],\n",
+    ")\n",
+    "for res in results:\n",
+    "    print(f\"* {res.page_content} [{res.metadata}]\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d70d8cd7",
+   "metadata": {},
+   "source": [
+    "By date range (if a date field exists):"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "72ddc0eb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "results = vector_store.similarity_search(\n",
+    "    query=\"LangChain provides abstractions to make working with LLMs easy\",\n",
+    "    k=2,\n",
+    "    filter=[{\"range\": {\"metadata.date\": {\"gte\": \"2010-01-01\"}}}],\n",
+    ")\n",
+    "for res in results:\n",
+    "    print(f\"* {res.page_content} [{res.metadata}]\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "82759079",
+   "metadata": {},
+   "source": [
+    "By numeric range (if a numeric field exists):"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7cbf8255",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "results = vector_store.similarity_search(\n",
+    "    query=\"LangChain provides abstractions to make working with LLMs easy\",\n",
+    "    k=2,\n",
+    "    filter=[{\"range\": {\"metadata.a_numeric_field\": {\"gte\": 2}}}],\n",
+    ")\n",
+    "for res in results:\n",
+    "    print(f\"* {res.page_content} [{res.metadata}]\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0ad5f8da",
+   "metadata": {},
+   "source": [
+    "By geo distance (Requires an index with a geo_point mapping to be declared for `metadata.geo_location`):"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e0efa827",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "results = vector_store.similarity_search(\n",
+    "    query=\"LangChain provides abstractions to make working with LLMs easy\",\n",
+    "    k=2,\n",
+    "    filter=[\n",
+    "        {\n",
+    "            \"geo_distance\": {\n",
+    "                \"distance\": \"200km\",\n",
+    "                \"metadata.geo_location\": {\"lat\": 40, \"lon\": -70},\n",
+    "            }\n",
+    "        }\n",
+    "    ],\n",
+    ")\n",
+    "for res in results:\n",
+    "    print(f\"* {res.page_content} [{res.metadata}]\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a883e5b0",
+   "metadata": {},
+   "source": [
+    "Filter supports many more types of queries than above. \n",
+    "\n",
+    "Read more about them in the [documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html)."
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "a0fda72e",
@@ -462,6 +603,412 @@
     "retriever.invoke(\"Stealing from the bank is a crime\")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "8ec8694f",
+   "metadata": {},
+   "source": [
+    "## Distance Similarity Algorithm\n",
+    "Elasticsearch supports the following vector distance similarity algorithms:\n",
+    "\n",
+    "- cosine\n",
+    "- euclidean\n",
+    "- dot_product\n",
+    "\n",
+    "The cosine similarity algorithm is the default.\n",
+    "\n",
+    "You can specify the similarity Algorithm needed via the similarity parameter.\n",
+    "\n",
+    "**NOTE**\n",
+    "Depending on the retrieval strategy, the similarity algorithm cannot be changed at query time. It is needed to be set when creating the index mapping for field. If you need to change the similarity algorithm, you need to delete the index and recreate it with the correct distance_strategy.\n",
+    "\n",
+    "```python\n",
+    "\n",
+    "db = ElasticsearchStore.from_documents(\n",
+    "    docs, \n",
+    "    embeddings, \n",
+    "    es_url=\"http://localhost:9200\", \n",
+    "    index_name=\"test\",\n",
+    "    distance_strategy=\"COSINE\"\n",
+    "    # distance_strategy=\"EUCLIDEAN_DISTANCE\"\n",
+    "    # distance_strategy=\"DOT_PRODUCT\"\n",
+    ")\n",
+    "\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "67115d26",
+   "metadata": {},
+   "source": [
+    "## Retrieval Strategies\n",
+    "Elasticsearch has big advantages over other vector only databases from its ability to support a wide range of retrieval strategies. In this notebook we will configure `ElasticsearchStore` to support some of the most common retrieval strategies. \n",
+    "\n",
+    "By default, `ElasticsearchStore` uses the `DenseVectorStrategy` (was called `ApproxRetrievalStrategy` prior to version 0.2.0).\n",
+    "\n",
+    "### DenseVectorStrategy\n",
+    "This will return the top `k` most similar vectors to the query vector.  The `k` parameter is set when the `ElasticsearchStore` is initialized. The default value is `10`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "946c12c4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_elasticsearch import DenseVectorStrategy\n",
+    "\n",
+    "db = ElasticsearchStore.from_documents(\n",
+    "    docs,\n",
+    "    embeddings,\n",
+    "    es_url=\"http://localhost:9200\",\n",
+    "    index_name=\"test\",\n",
+    "    strategy=DenseVectorStrategy(),\n",
+    ")\n",
+    "\n",
+    "docs = db.similarity_search(query=\"...\", k=10)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8183cb02",
+   "metadata": {},
+   "source": [
+    "### Example: Hybrid retrieval with dense vector and keyword search\n",
+    "This example will show how to configure `ElasticsearchStore` to perform a hybrid retrieval, using a combination of approximate semantic search and keyword based search. \n",
+    "\n",
+    "We use RRF to balance the two scores from different retrieval methods.\n",
+    "\n",
+    "To enable hybrid retrieval, we need to set `hybrid=True` in the `DenseVectorStrategy` constructor.\n",
+    "\n",
+    "```python\n",
+    "\n",
+    "db = ElasticsearchStore.from_documents(\n",
+    "    docs, \n",
+    "    embeddings, \n",
+    "    es_url=\"http://localhost:9200\", \n",
+    "    index_name=\"test\",\n",
+    "    strategy=DenseVectorStrategy(hybrid=True)\n",
+    ")\n",
+    "```\n",
+    "\n",
+    "When `hybrid` is enabled, the query performed will be a combination of approximate semantic search and keyword based search. \n",
+    "\n",
+    "It will use `rrf` (Reciprocal Rank Fusion) to balance the two scores from different retrieval methods.\n",
+    "\n",
+    "**Note** RRF requires Elasticsearch 8.9.0 or above.\n",
+    "\n",
+    "```json\n",
+    "{\n",
+    "    \"knn\": {\n",
+    "        \"field\": \"vector\",\n",
+    "        \"filter\": [],\n",
+    "        \"k\": 1,\n",
+    "        \"num_candidates\": 50,\n",
+    "        \"query_vector\": [1.0, ..., 0.0],\n",
+    "    },\n",
+    "    \"query\": {\n",
+    "        \"bool\": {\n",
+    "            \"filter\": [],\n",
+    "            \"must\": [{\"match\": {\"text\": {\"query\": \"foo\"}}}],\n",
+    "        }\n",
+    "    },\n",
+    "    \"rank\": {\"rrf\": {}},\n",
+    "}\n",
+    "```\n",
+    "\n",
+    "### Example: Dense vector search with Embedding Model in Elasticsearch\n",
+    "This example will show how to configure `ElasticsearchStore` to use the embedding model deployed in Elasticsearch for dense vector retrieval.\n",
+    "\n",
+    "To use this, specify the model_id in `DenseVectorStrategy` constructor via the `query_model_id` argument.\n",
+    "\n",
+    "**NOTE** This requires the model to be deployed and running in Elasticsearch ml node. See [notebook example](https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/integrations/hugging-face/loading-model-from-hugging-face.ipynb) on how to deploy the model with eland.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "993ab653",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "DENSE_SELF_DEPLOYED_INDEX_NAME = \"test-dense-self-deployed\"\n",
+    "\n",
+    "# Note: This does not have an embedding function specified\n",
+    "# Instead, we will use the embedding model deployed in Elasticsearch\n",
+    "db = ElasticsearchStore(\n",
+    "    es_cloud_id=\"<your cloud id>\",\n",
+    "    es_user=\"elastic\",\n",
+    "    es_password=\"<your password>\",\n",
+    "    index_name=DENSE_SELF_DEPLOYED_INDEX_NAME,\n",
+    "    query_field=\"text_field\",\n",
+    "    vector_query_field=\"vector_query_field.predicted_value\",\n",
+    "    strategy=DenseVectorStrategy(model_id=\"sentence-transformers__all-minilm-l6-v2\"),\n",
+    ")\n",
+    "\n",
+    "# Setup a Ingest Pipeline to perform the embedding\n",
+    "# of the text field\n",
+    "db.client.ingest.put_pipeline(\n",
+    "    id=\"test_pipeline\",\n",
+    "    processors=[\n",
+    "        {\n",
+    "            \"inference\": {\n",
+    "                \"model_id\": \"sentence-transformers__all-minilm-l6-v2\",\n",
+    "                \"field_map\": {\"query_field\": \"text_field\"},\n",
+    "                \"target_field\": \"vector_query_field\",\n",
+    "            }\n",
+    "        }\n",
+    "    ],\n",
+    ")\n",
+    "\n",
+    "# creating a new index with the pipeline,\n",
+    "# not relying on langchain to create the index\n",
+    "db.client.indices.create(\n",
+    "    index=DENSE_SELF_DEPLOYED_INDEX_NAME,\n",
+    "    mappings={\n",
+    "        \"properties\": {\n",
+    "            \"text_field\": {\"type\": \"text\"},\n",
+    "            \"vector_query_field\": {\n",
+    "                \"properties\": {\n",
+    "                    \"predicted_value\": {\n",
+    "                        \"type\": \"dense_vector\",\n",
+    "                        \"dims\": 384,\n",
+    "                        \"index\": True,\n",
+    "                        \"similarity\": \"l2_norm\",\n",
+    "                    }\n",
+    "                }\n",
+    "            },\n",
+    "        }\n",
+    "    },\n",
+    "    settings={\"index\": {\"default_pipeline\": \"test_pipeline\"}},\n",
+    ")\n",
+    "\n",
+    "db.from_texts(\n",
+    "    [\"hello world\"],\n",
+    "    es_cloud_id=\"<cloud id>\",\n",
+    "    es_user=\"elastic\",\n",
+    "    es_password=\"<cloud password>\",\n",
+    "    index_name=DENSE_SELF_DEPLOYED_INDEX_NAME,\n",
+    "    query_field=\"text_field\",\n",
+    "    vector_query_field=\"vector_query_field.predicted_value\",\n",
+    "    strategy=DenseVectorStrategy(model_id=\"sentence-transformers__all-minilm-l6-v2\"),\n",
+    ")\n",
+    "\n",
+    "# Perform search\n",
+    "db.similarity_search(\"hello world\", k=10)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "24646cf3",
+   "metadata": {},
+   "source": [
+    "### SparseVectorStrategy (ELSER)\n",
+    "This strategy uses Elasticsearch's sparse vector retrieval to retrieve the top-k results. We only support our own \"ELSER\" embedding model for now.\n",
+    "\n",
+    "**NOTE** This requires the ELSER model to be deployed and running in Elasticsearch ml node. \n",
+    "\n",
+    "To use this, specify `SparseVectorStrategy` (was called `SparseVectorRetrievalStrategy` prior to version 0.2.0) in the `ElasticsearchStore` constructor. You will need to provide a model ID."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d295c424",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_elasticsearch import SparseVectorStrategy\n",
+    "\n",
+    "# Note that this example doesn't have an embedding function. This is because we infer the tokens at index time and at query time within Elasticsearch.\n",
+    "# This requires the ELSER model to be loaded and running in Elasticsearch.\n",
+    "db = ElasticsearchStore.from_documents(\n",
+    "    docs,\n",
+    "    es_cloud_id=\"<cloud id>\",\n",
+    "    es_user=\"elastic\",\n",
+    "    es_password=\"<cloud password>\",\n",
+    "    index_name=\"test-elser\",\n",
+    "    strategy=SparseVectorStrategy(model_id=\".elser_model_2\"),\n",
+    ")\n",
+    "\n",
+    "db.client.indices.refresh(index=\"test-elser\")\n",
+    "\n",
+    "results = db.similarity_search(\"...\", k=4)\n",
+    "print(results[0])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f6c7c17b",
+   "metadata": {},
+   "source": [
+    "### DenseVectorScriptScoreStrategy\n",
+    "This strategy uses Elasticsearch's script score query to perform exact vector retrieval (also known as brute force) to retrieve the top-k results. (This strategy was called `ExactRetrievalStrategy` prior to version 0.2.0.)\n",
+    "\n",
+    "To use this, specify `DenseVectorScriptScoreStrategy` in `ElasticsearchStore` constructor.\n",
+    "\n",
+    "```python\n",
+    "from langchain_elasticsearch import SparseVectorStrategy\n",
+    "\n",
+    "db = ElasticsearchStore.from_documents(\n",
+    "    docs, \n",
+    "    embeddings, \n",
+    "    es_url=\"http://localhost:9200\", \n",
+    "    index_name=\"test\",\n",
+    "    strategy=DenseVectorScriptScoreStrategy(),\n",
+    ")\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "df4d584b",
+   "metadata": {},
+   "source": [
+    "### BM25Strategy\n",
+    "Finally, you can use full-text keyword search.\n",
+    "\n",
+    "To use this, specify `BM25Strategy` in `ElasticsearchStore` constructor.\n",
+    "\n",
+    "```python\n",
+    "from langchain_elasticsearch import BM25Strategy\n",
+    "\n",
+    "db = ElasticsearchStore.from_documents(\n",
+    "    docs, \n",
+    "    es_url=\"http://localhost:9200\", \n",
+    "    index_name=\"test\",\n",
+    "    strategy=BM25Strategy(),\n",
+    ")\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "79d35f68",
+   "metadata": {},
+   "source": [
+    "### BM25RetrievalStrategy\n",
+    "This strategy allows the user to perform searches using pure BM25 without vector search.\n",
+    "\n",
+    "To use this, specify `BM25RetrievalStrategy` in `ElasticsearchStore` constructor.\n",
+    "\n",
+    "Note that in the example below, the embedding option is not specified, indicating that the search is conducted without using embeddings."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "883b5f42",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_elasticsearch import ElasticsearchStore\n",
+    "\n",
+    "db = ElasticsearchStore(\n",
+    "    es_url=\"http://localhost:9200\",\n",
+    "    index_name=\"test_index\",\n",
+    "    strategy=ElasticsearchStore.BM25RetrievalStrategy(),\n",
+    ")\n",
+    "\n",
+    "db.add_texts(\n",
+    "    [\"foo\", \"foo bar\", \"foo bar baz\", \"bar\", \"bar baz\", \"baz\"],\n",
+    ")\n",
+    "\n",
+    "results = db.similarity_search(query=\"foo\", k=10)\n",
+    "print(results)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ee285657",
+   "metadata": {},
+   "source": [
+    "### Customise the Query\n",
+    "With `custom_query` parameter at search, you are able to adjust the query that is used to retrieve documents from Elasticsearch. This is useful if you want to use a more complex query, to support linear boosting of fields."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e275baf8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Example of a custom query thats just doing a BM25 search on the text field.\n",
+    "def custom_query(query_body: dict, query: str):\n",
+    "    \"\"\"Custom query to be used in Elasticsearch.\n",
+    "    Args:\n",
+    "        query_body (dict): Elasticsearch query body.\n",
+    "        query (str): Query string.\n",
+    "    Returns:\n",
+    "        dict: Elasticsearch query body.\n",
+    "    \"\"\"\n",
+    "    print(\"Query Retriever created by the retrieval strategy:\")\n",
+    "    print(query_body)\n",
+    "    print()\n",
+    "\n",
+    "    new_query_body = {\"query\": {\"match\": {\"text\": query}}}\n",
+    "\n",
+    "    print(\"Query thats actually used in Elasticsearch:\")\n",
+    "    print(new_query_body)\n",
+    "    print()\n",
+    "\n",
+    "    return new_query_body\n",
+    "\n",
+    "\n",
+    "results = db.similarity_search(\n",
+    "    \"...\",\n",
+    "    k=4,\n",
+    "    custom_query=custom_query,\n",
+    ")\n",
+    "print(\"Results:\")\n",
+    "print(results[0])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "32ef65d4",
+   "metadata": {},
+   "source": [
+    "### Customize the Document Builder\n",
+    "\n",
+    "With ```doc_builder``` parameter at search, you are able to adjust how a Document is being built using data retrieved from Elasticsearch. This is especially useful if you have indices which were not created using Langchain."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "09a441d4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from typing import Dict\n",
+    "\n",
+    "from langchain_core.documents import Document\n",
+    "\n",
+    "\n",
+    "def custom_document_builder(hit: Dict) -> Document:\n",
+    "    src = hit.get(\"_source\", {})\n",
+    "    return Document(\n",
+    "        page_content=src.get(\"content\", \"Missing content!\"),\n",
+    "        metadata={\n",
+    "            \"page_number\": src.get(\"page_number\", -1),\n",
+    "            \"original_filename\": src.get(\"original_filename\", \"Missing filename!\"),\n",
+    "        },\n",
+    "    )\n",
+    "\n",
+    "\n",
+    "results = db.similarity_search(\n",
+    "    \"...\",\n",
+    "    k=4,\n",
+    "    doc_builder=custom_document_builder,\n",
+    ")\n",
+    "print(\"Results:\")\n",
+    "print(results[0])"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "17b509ae",

From 0b2244ea887bdf8d24e4e1d858d4af0db7933df3 Mon Sep 17 00:00:00 2001
From: ccurme <chester.curme@gmail.com>
Date: Thu, 27 Mar 2025 11:12:36 -0400
Subject: [PATCH 20/30] Revert "docs: restore some content to Elasticsearch
 integration page" (#30523)

Reverts langchain-ai/langchain#30522 in favor of
https://github.com/langchain-ai/langchain/pull/30521.
---
 .../vectorstores/elasticsearch.ipynb          | 547 ------------------
 1 file changed, 547 deletions(-)

diff --git a/docs/docs/integrations/vectorstores/elasticsearch.ipynb b/docs/docs/integrations/vectorstores/elasticsearch.ipynb
index 92742d4f402..1a26b1c600f 100644
--- a/docs/docs/integrations/vectorstores/elasticsearch.ipynb
+++ b/docs/docs/integrations/vectorstores/elasticsearch.ipynb
@@ -391,147 +391,6 @@
     "    print(f\"* {res.page_content} [{res.metadata}]\")"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "id": "3f1d79c6",
-   "metadata": {},
-   "source": [
-    "#### Metadata filtering\n",
-    "\n",
-    "`ElasticsearchStore` supports metadata to stored along with the document. This metadata dict object is stored in a metadata object field in the Elasticsearch document. Based on the metadata value, Elasticsearch will automatically setup the mapping by infering the data type of the metadata value. For example, if the metadata value is a string, Elasticsearch will setup the mapping for the metadata object field as a string type.\n",
-    "\n",
-    "You can filter by exact keyword, as above:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e8cc5db5",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "results = vector_store.similarity_search(\n",
-    "    query=\"LangChain provides abstractions to make working with LLMs easy\",\n",
-    "    k=2,\n",
-    "    filter=[{\"term\": {\"metadata.source.keyword\": \"tweet\"}}],\n",
-    ")\n",
-    "for res in results:\n",
-    "    print(f\"* {res.page_content} [{res.metadata}]\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "a2f03ab8",
-   "metadata": {},
-   "source": [
-    "By partial match:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "b371da9f",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "results = vector_store.similarity_search(\n",
-    "    query=\"LangChain provides abstractions to make working with LLMs easy\",\n",
-    "    k=2,\n",
-    "    filter=[{\"match\": {\"metadata.source\": {\"query\": \"tweet\", \"fuzziness\": \"AUTO\"}}}],\n",
-    ")\n",
-    "for res in results:\n",
-    "    print(f\"* {res.page_content} [{res.metadata}]\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "d70d8cd7",
-   "metadata": {},
-   "source": [
-    "By date range (if a date field exists):"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "72ddc0eb",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "results = vector_store.similarity_search(\n",
-    "    query=\"LangChain provides abstractions to make working with LLMs easy\",\n",
-    "    k=2,\n",
-    "    filter=[{\"range\": {\"metadata.date\": {\"gte\": \"2010-01-01\"}}}],\n",
-    ")\n",
-    "for res in results:\n",
-    "    print(f\"* {res.page_content} [{res.metadata}]\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "82759079",
-   "metadata": {},
-   "source": [
-    "By numeric range (if a numeric field exists):"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7cbf8255",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "results = vector_store.similarity_search(\n",
-    "    query=\"LangChain provides abstractions to make working with LLMs easy\",\n",
-    "    k=2,\n",
-    "    filter=[{\"range\": {\"metadata.a_numeric_field\": {\"gte\": 2}}}],\n",
-    ")\n",
-    "for res in results:\n",
-    "    print(f\"* {res.page_content} [{res.metadata}]\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "0ad5f8da",
-   "metadata": {},
-   "source": [
-    "By geo distance (Requires an index with a geo_point mapping to be declared for `metadata.geo_location`):"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e0efa827",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "results = vector_store.similarity_search(\n",
-    "    query=\"LangChain provides abstractions to make working with LLMs easy\",\n",
-    "    k=2,\n",
-    "    filter=[\n",
-    "        {\n",
-    "            \"geo_distance\": {\n",
-    "                \"distance\": \"200km\",\n",
-    "                \"metadata.geo_location\": {\"lat\": 40, \"lon\": -70},\n",
-    "            }\n",
-    "        }\n",
-    "    ],\n",
-    ")\n",
-    "for res in results:\n",
-    "    print(f\"* {res.page_content} [{res.metadata}]\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "a883e5b0",
-   "metadata": {},
-   "source": [
-    "Filter supports many more types of queries than above. \n",
-    "\n",
-    "Read more about them in the [documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html)."
-   ]
-  },
   {
    "cell_type": "markdown",
    "id": "a0fda72e",
@@ -603,412 +462,6 @@
     "retriever.invoke(\"Stealing from the bank is a crime\")"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "id": "8ec8694f",
-   "metadata": {},
-   "source": [
-    "## Distance Similarity Algorithm\n",
-    "Elasticsearch supports the following vector distance similarity algorithms:\n",
-    "\n",
-    "- cosine\n",
-    "- euclidean\n",
-    "- dot_product\n",
-    "\n",
-    "The cosine similarity algorithm is the default.\n",
-    "\n",
-    "You can specify the similarity Algorithm needed via the similarity parameter.\n",
-    "\n",
-    "**NOTE**\n",
-    "Depending on the retrieval strategy, the similarity algorithm cannot be changed at query time. It is needed to be set when creating the index mapping for field. If you need to change the similarity algorithm, you need to delete the index and recreate it with the correct distance_strategy.\n",
-    "\n",
-    "```python\n",
-    "\n",
-    "db = ElasticsearchStore.from_documents(\n",
-    "    docs, \n",
-    "    embeddings, \n",
-    "    es_url=\"http://localhost:9200\", \n",
-    "    index_name=\"test\",\n",
-    "    distance_strategy=\"COSINE\"\n",
-    "    # distance_strategy=\"EUCLIDEAN_DISTANCE\"\n",
-    "    # distance_strategy=\"DOT_PRODUCT\"\n",
-    ")\n",
-    "\n",
-    "```"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "67115d26",
-   "metadata": {},
-   "source": [
-    "## Retrieval Strategies\n",
-    "Elasticsearch has big advantages over other vector only databases from its ability to support a wide range of retrieval strategies. In this notebook we will configure `ElasticsearchStore` to support some of the most common retrieval strategies. \n",
-    "\n",
-    "By default, `ElasticsearchStore` uses the `DenseVectorStrategy` (was called `ApproxRetrievalStrategy` prior to version 0.2.0).\n",
-    "\n",
-    "### DenseVectorStrategy\n",
-    "This will return the top `k` most similar vectors to the query vector.  The `k` parameter is set when the `ElasticsearchStore` is initialized. The default value is `10`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "946c12c4",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain_elasticsearch import DenseVectorStrategy\n",
-    "\n",
-    "db = ElasticsearchStore.from_documents(\n",
-    "    docs,\n",
-    "    embeddings,\n",
-    "    es_url=\"http://localhost:9200\",\n",
-    "    index_name=\"test\",\n",
-    "    strategy=DenseVectorStrategy(),\n",
-    ")\n",
-    "\n",
-    "docs = db.similarity_search(query=\"...\", k=10)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "8183cb02",
-   "metadata": {},
-   "source": [
-    "### Example: Hybrid retrieval with dense vector and keyword search\n",
-    "This example will show how to configure `ElasticsearchStore` to perform a hybrid retrieval, using a combination of approximate semantic search and keyword based search. \n",
-    "\n",
-    "We use RRF to balance the two scores from different retrieval methods.\n",
-    "\n",
-    "To enable hybrid retrieval, we need to set `hybrid=True` in the `DenseVectorStrategy` constructor.\n",
-    "\n",
-    "```python\n",
-    "\n",
-    "db = ElasticsearchStore.from_documents(\n",
-    "    docs, \n",
-    "    embeddings, \n",
-    "    es_url=\"http://localhost:9200\", \n",
-    "    index_name=\"test\",\n",
-    "    strategy=DenseVectorStrategy(hybrid=True)\n",
-    ")\n",
-    "```\n",
-    "\n",
-    "When `hybrid` is enabled, the query performed will be a combination of approximate semantic search and keyword based search. \n",
-    "\n",
-    "It will use `rrf` (Reciprocal Rank Fusion) to balance the two scores from different retrieval methods.\n",
-    "\n",
-    "**Note** RRF requires Elasticsearch 8.9.0 or above.\n",
-    "\n",
-    "```json\n",
-    "{\n",
-    "    \"knn\": {\n",
-    "        \"field\": \"vector\",\n",
-    "        \"filter\": [],\n",
-    "        \"k\": 1,\n",
-    "        \"num_candidates\": 50,\n",
-    "        \"query_vector\": [1.0, ..., 0.0],\n",
-    "    },\n",
-    "    \"query\": {\n",
-    "        \"bool\": {\n",
-    "            \"filter\": [],\n",
-    "            \"must\": [{\"match\": {\"text\": {\"query\": \"foo\"}}}],\n",
-    "        }\n",
-    "    },\n",
-    "    \"rank\": {\"rrf\": {}},\n",
-    "}\n",
-    "```\n",
-    "\n",
-    "### Example: Dense vector search with Embedding Model in Elasticsearch\n",
-    "This example will show how to configure `ElasticsearchStore` to use the embedding model deployed in Elasticsearch for dense vector retrieval.\n",
-    "\n",
-    "To use this, specify the model_id in `DenseVectorStrategy` constructor via the `query_model_id` argument.\n",
-    "\n",
-    "**NOTE** This requires the model to be deployed and running in Elasticsearch ml node. See [notebook example](https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/integrations/hugging-face/loading-model-from-hugging-face.ipynb) on how to deploy the model with eland.\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "993ab653",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "DENSE_SELF_DEPLOYED_INDEX_NAME = \"test-dense-self-deployed\"\n",
-    "\n",
-    "# Note: This does not have an embedding function specified\n",
-    "# Instead, we will use the embedding model deployed in Elasticsearch\n",
-    "db = ElasticsearchStore(\n",
-    "    es_cloud_id=\"<your cloud id>\",\n",
-    "    es_user=\"elastic\",\n",
-    "    es_password=\"<your password>\",\n",
-    "    index_name=DENSE_SELF_DEPLOYED_INDEX_NAME,\n",
-    "    query_field=\"text_field\",\n",
-    "    vector_query_field=\"vector_query_field.predicted_value\",\n",
-    "    strategy=DenseVectorStrategy(model_id=\"sentence-transformers__all-minilm-l6-v2\"),\n",
-    ")\n",
-    "\n",
-    "# Setup a Ingest Pipeline to perform the embedding\n",
-    "# of the text field\n",
-    "db.client.ingest.put_pipeline(\n",
-    "    id=\"test_pipeline\",\n",
-    "    processors=[\n",
-    "        {\n",
-    "            \"inference\": {\n",
-    "                \"model_id\": \"sentence-transformers__all-minilm-l6-v2\",\n",
-    "                \"field_map\": {\"query_field\": \"text_field\"},\n",
-    "                \"target_field\": \"vector_query_field\",\n",
-    "            }\n",
-    "        }\n",
-    "    ],\n",
-    ")\n",
-    "\n",
-    "# creating a new index with the pipeline,\n",
-    "# not relying on langchain to create the index\n",
-    "db.client.indices.create(\n",
-    "    index=DENSE_SELF_DEPLOYED_INDEX_NAME,\n",
-    "    mappings={\n",
-    "        \"properties\": {\n",
-    "            \"text_field\": {\"type\": \"text\"},\n",
-    "            \"vector_query_field\": {\n",
-    "                \"properties\": {\n",
-    "                    \"predicted_value\": {\n",
-    "                        \"type\": \"dense_vector\",\n",
-    "                        \"dims\": 384,\n",
-    "                        \"index\": True,\n",
-    "                        \"similarity\": \"l2_norm\",\n",
-    "                    }\n",
-    "                }\n",
-    "            },\n",
-    "        }\n",
-    "    },\n",
-    "    settings={\"index\": {\"default_pipeline\": \"test_pipeline\"}},\n",
-    ")\n",
-    "\n",
-    "db.from_texts(\n",
-    "    [\"hello world\"],\n",
-    "    es_cloud_id=\"<cloud id>\",\n",
-    "    es_user=\"elastic\",\n",
-    "    es_password=\"<cloud password>\",\n",
-    "    index_name=DENSE_SELF_DEPLOYED_INDEX_NAME,\n",
-    "    query_field=\"text_field\",\n",
-    "    vector_query_field=\"vector_query_field.predicted_value\",\n",
-    "    strategy=DenseVectorStrategy(model_id=\"sentence-transformers__all-minilm-l6-v2\"),\n",
-    ")\n",
-    "\n",
-    "# Perform search\n",
-    "db.similarity_search(\"hello world\", k=10)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "24646cf3",
-   "metadata": {},
-   "source": [
-    "### SparseVectorStrategy (ELSER)\n",
-    "This strategy uses Elasticsearch's sparse vector retrieval to retrieve the top-k results. We only support our own \"ELSER\" embedding model for now.\n",
-    "\n",
-    "**NOTE** This requires the ELSER model to be deployed and running in Elasticsearch ml node. \n",
-    "\n",
-    "To use this, specify `SparseVectorStrategy` (was called `SparseVectorRetrievalStrategy` prior to version 0.2.0) in the `ElasticsearchStore` constructor. You will need to provide a model ID."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d295c424",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain_elasticsearch import SparseVectorStrategy\n",
-    "\n",
-    "# Note that this example doesn't have an embedding function. This is because we infer the tokens at index time and at query time within Elasticsearch.\n",
-    "# This requires the ELSER model to be loaded and running in Elasticsearch.\n",
-    "db = ElasticsearchStore.from_documents(\n",
-    "    docs,\n",
-    "    es_cloud_id=\"<cloud id>\",\n",
-    "    es_user=\"elastic\",\n",
-    "    es_password=\"<cloud password>\",\n",
-    "    index_name=\"test-elser\",\n",
-    "    strategy=SparseVectorStrategy(model_id=\".elser_model_2\"),\n",
-    ")\n",
-    "\n",
-    "db.client.indices.refresh(index=\"test-elser\")\n",
-    "\n",
-    "results = db.similarity_search(\"...\", k=4)\n",
-    "print(results[0])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "f6c7c17b",
-   "metadata": {},
-   "source": [
-    "### DenseVectorScriptScoreStrategy\n",
-    "This strategy uses Elasticsearch's script score query to perform exact vector retrieval (also known as brute force) to retrieve the top-k results. (This strategy was called `ExactRetrievalStrategy` prior to version 0.2.0.)\n",
-    "\n",
-    "To use this, specify `DenseVectorScriptScoreStrategy` in `ElasticsearchStore` constructor.\n",
-    "\n",
-    "```python\n",
-    "from langchain_elasticsearch import SparseVectorStrategy\n",
-    "\n",
-    "db = ElasticsearchStore.from_documents(\n",
-    "    docs, \n",
-    "    embeddings, \n",
-    "    es_url=\"http://localhost:9200\", \n",
-    "    index_name=\"test\",\n",
-    "    strategy=DenseVectorScriptScoreStrategy(),\n",
-    ")\n",
-    "```"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "df4d584b",
-   "metadata": {},
-   "source": [
-    "### BM25Strategy\n",
-    "Finally, you can use full-text keyword search.\n",
-    "\n",
-    "To use this, specify `BM25Strategy` in `ElasticsearchStore` constructor.\n",
-    "\n",
-    "```python\n",
-    "from langchain_elasticsearch import BM25Strategy\n",
-    "\n",
-    "db = ElasticsearchStore.from_documents(\n",
-    "    docs, \n",
-    "    es_url=\"http://localhost:9200\", \n",
-    "    index_name=\"test\",\n",
-    "    strategy=BM25Strategy(),\n",
-    ")\n",
-    "```"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "79d35f68",
-   "metadata": {},
-   "source": [
-    "### BM25RetrievalStrategy\n",
-    "This strategy allows the user to perform searches using pure BM25 without vector search.\n",
-    "\n",
-    "To use this, specify `BM25RetrievalStrategy` in `ElasticsearchStore` constructor.\n",
-    "\n",
-    "Note that in the example below, the embedding option is not specified, indicating that the search is conducted without using embeddings."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "883b5f42",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain_elasticsearch import ElasticsearchStore\n",
-    "\n",
-    "db = ElasticsearchStore(\n",
-    "    es_url=\"http://localhost:9200\",\n",
-    "    index_name=\"test_index\",\n",
-    "    strategy=ElasticsearchStore.BM25RetrievalStrategy(),\n",
-    ")\n",
-    "\n",
-    "db.add_texts(\n",
-    "    [\"foo\", \"foo bar\", \"foo bar baz\", \"bar\", \"bar baz\", \"baz\"],\n",
-    ")\n",
-    "\n",
-    "results = db.similarity_search(query=\"foo\", k=10)\n",
-    "print(results)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "ee285657",
-   "metadata": {},
-   "source": [
-    "### Customise the Query\n",
-    "With `custom_query` parameter at search, you are able to adjust the query that is used to retrieve documents from Elasticsearch. This is useful if you want to use a more complex query, to support linear boosting of fields."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e275baf8",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Example of a custom query thats just doing a BM25 search on the text field.\n",
-    "def custom_query(query_body: dict, query: str):\n",
-    "    \"\"\"Custom query to be used in Elasticsearch.\n",
-    "    Args:\n",
-    "        query_body (dict): Elasticsearch query body.\n",
-    "        query (str): Query string.\n",
-    "    Returns:\n",
-    "        dict: Elasticsearch query body.\n",
-    "    \"\"\"\n",
-    "    print(\"Query Retriever created by the retrieval strategy:\")\n",
-    "    print(query_body)\n",
-    "    print()\n",
-    "\n",
-    "    new_query_body = {\"query\": {\"match\": {\"text\": query}}}\n",
-    "\n",
-    "    print(\"Query thats actually used in Elasticsearch:\")\n",
-    "    print(new_query_body)\n",
-    "    print()\n",
-    "\n",
-    "    return new_query_body\n",
-    "\n",
-    "\n",
-    "results = db.similarity_search(\n",
-    "    \"...\",\n",
-    "    k=4,\n",
-    "    custom_query=custom_query,\n",
-    ")\n",
-    "print(\"Results:\")\n",
-    "print(results[0])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "32ef65d4",
-   "metadata": {},
-   "source": [
-    "### Customize the Document Builder\n",
-    "\n",
-    "With ```doc_builder``` parameter at search, you are able to adjust how a Document is being built using data retrieved from Elasticsearch. This is especially useful if you have indices which were not created using Langchain."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "09a441d4",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from typing import Dict\n",
-    "\n",
-    "from langchain_core.documents import Document\n",
-    "\n",
-    "\n",
-    "def custom_document_builder(hit: Dict) -> Document:\n",
-    "    src = hit.get(\"_source\", {})\n",
-    "    return Document(\n",
-    "        page_content=src.get(\"content\", \"Missing content!\"),\n",
-    "        metadata={\n",
-    "            \"page_number\": src.get(\"page_number\", -1),\n",
-    "            \"original_filename\": src.get(\"original_filename\", \"Missing filename!\"),\n",
-    "        },\n",
-    "    )\n",
-    "\n",
-    "\n",
-    "results = db.similarity_search(\n",
-    "    \"...\",\n",
-    "    k=4,\n",
-    "    doc_builder=custom_document_builder,\n",
-    ")\n",
-    "print(\"Results:\")\n",
-    "print(results[0])"
-   ]
-  },
   {
    "cell_type": "markdown",
    "id": "17b509ae",

From 14b7d790c1652444f428a60826661426febd7d38 Mon Sep 17 00:00:00 2001
From: Miguel Grinberg <miguel.grinberg@gmail.com>
Date: Thu, 27 Mar 2025 15:27:20 +0000
Subject: [PATCH 21/30] docs: Restore accidentally deleted docs on
 Elasticsearch strategies (#30521)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Thank you for contributing to LangChain!

- [x] **PR title**: "package: description"
- Where "package" is whichever of langchain, community, core, etc. is
being modified. Use "docs: ..." for purely docs changes, "infra: ..."
for CI changes.
  - Example: "community: add foobar LLM"


- [x] **PR message**: ***Delete this entire checklist*** and replace
with
- **Description:** Adding back a section of the Elasticsearch
vectorstore documentation that was deleted in [this
commit]([https://github.com/langchain-ai/langchain/commit/a72fddbf8d0c440d1ebc4bb4d5f924ce5205fc38#diff-4988344c6ccc08191f89ac[…]13698d7567fde5352038cd950d77](https://github.com/langchain-ai/langchain/commit/a72fddbf8d0c440d1ebc4bb4d5f924ce5205fc38#diff-4988344c6ccc08191f89ac1ebf1caab5185e13698d7567fde5352038cd950d77)).
The only change I've made is to update the example RRF request, which
was out of date.


- [ ] **Add tests and docs**: If you're adding a new integration, please
include
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in
`docs/docs/integrations` directory.


- [x] **Lint and test**: Run `make format`, `make lint` and `make test`
from the root of the package(s) you've modified. See contribution
guidelines for more: https://python.langchain.com/docs/contributing/

Additional guidelines:
- Make sure optional dependencies are imported within a function.
- Please do not add dependencies to pyproject.toml files (even optional
ones) unless they are required for unit tests.
- Most PRs should not touch more than one package.
- Changes should be backwards compatible.
- If you are adding something to community, do not re-import it in
langchain.

If no one reviews your PR within a few days, please @-mention one of
baskaryan, eyurtsev, ccurme, vbarda, hwchase17.
---
 .../vectorstores/elasticsearch.ipynb          | 469 ++++++++++++++++++
 1 file changed, 469 insertions(+)

diff --git a/docs/docs/integrations/vectorstores/elasticsearch.ipynb b/docs/docs/integrations/vectorstores/elasticsearch.ipynb
index 1a26b1c600f..bdef297ca18 100644
--- a/docs/docs/integrations/vectorstores/elasticsearch.ipynb
+++ b/docs/docs/integrations/vectorstores/elasticsearch.ipynb
@@ -462,6 +462,475 @@
     "retriever.invoke(\"Stealing from the bank is a crime\")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "5828dda5",
+   "metadata": {},
+   "source": [
+    "## Distance Similarity Algorithm\n",
+    "\n",
+    "Elasticsearch supports the following vector distance similarity algorithms:\n",
+    "\n",
+    "- cosine\n",
+    "- euclidean\n",
+    "- dot_product\n",
+    "\n",
+    "The cosine similarity algorithm is the default.\n",
+    "\n",
+    "You can specify the similarity Algorithm needed via the similarity parameter.\n",
+    "\n",
+    "**NOTE**: Depending on the retrieval strategy, the similarity algorithm cannot be changed at query time. It is needed to be set when creating the index mapping for field. If you need to change the similarity algorithm, you need to delete the index and recreate it with the correct distance_strategy."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cec8b2ac",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "db = ElasticsearchStore.from_documents(\n",
+    "    docs,\n",
+    "    embeddings,\n",
+    "    es_url=\"http://localhost:9200\",\n",
+    "    index_name=\"test\",\n",
+    "    distance_strategy=\"COSINE\",\n",
+    "    # distance_strategy=\"EUCLIDEAN_DISTANCE\"\n",
+    "    # distance_strategy=\"DOT_PRODUCT\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0c9fb8a0",
+   "metadata": {},
+   "source": [
+    "## Retrieval Strategies\n",
+    "\n",
+    "Elasticsearch has big advantages over other vector only databases from its ability to support a wide range of retrieval strategies. In this notebook we will configure `ElasticsearchStore` to support some of the most common retrieval strategies.\n",
+    "\n",
+    "By default, `ElasticsearchStore` uses the `DenseVectorStrategy` (was called `ApproxRetrievalStrategy` prior to version 0.2.0).\n",
+    "\n",
+    "### DenseVectorStrategy\n",
+    "\n",
+    "This will return the top k most similar vectors to the query vector. The `k` parameter is set when the `ElasticsearchStore` is initialized. The default value is 10."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4d59a493",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_elasticsearch import DenseVectorStrategy\n",
+    "\n",
+    "db = ElasticsearchStore.from_documents(\n",
+    "    docs,\n",
+    "    embeddings,\n",
+    "    es_url=\"http://localhost:9200\",\n",
+    "    index_name=\"test\",\n",
+    "    strategy=DenseVectorStrategy(),\n",
+    ")\n",
+    "\n",
+    "docs = db.similarity_search(\n",
+    "    query=\"What did the president say about Ketanji Brown Jackson?\", k=10\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0cf5d3d2",
+   "metadata": {},
+   "source": [
+    "#### Example: Hybrid retrieval with dense vector and keyword search\n",
+    "\n",
+    "This example will show how to configure ElasticsearchStore to perform a hybrid retrieval, using a combination of approximate semantic search and keyword based search.\n",
+    "\n",
+    "We use RRF to balance the two scores from different retrieval methods.\n",
+    "\n",
+    "To enable hybrid retrieval, we need to set `hybrid=True` in the `DenseVectorStrategy` constructor."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "109f992a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "db = ElasticsearchStore.from_documents(\n",
+    "    docs,\n",
+    "    embeddings,\n",
+    "    es_url=\"http://localhost:9200\",\n",
+    "    index_name=\"test\",\n",
+    "    strategy=DenseVectorStrategy(hybrid=True),\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b6e62ef0",
+   "metadata": {},
+   "source": [
+    "When hybrid is enabled, the query performed will be a combination of approximate semantic search and keyword based search.\n",
+    "\n",
+    "It will use rrf (Reciprocal Rank Fusion) to balance the two scores from different retrieval methods.\n",
+    "\n",
+    "**Note**: RRF requires Elasticsearch 8.9.0 or above."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9c07444e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "{\n",
+    "    \"retriever\": {\n",
+    "        \"rrf\": {\n",
+    "            \"retrievers\": [\n",
+    "                {\n",
+    "                    \"standard\": {\n",
+    "                        \"query\": {\n",
+    "                            \"bool\": {\n",
+    "                                \"filter\": [],\n",
+    "                                \"must\": [{\"match\": {\"text\": {\"query\": \"foo\"}}}],\n",
+    "                            }\n",
+    "                        },\n",
+    "                    },\n",
+    "                },\n",
+    "                {\n",
+    "                    \"knn\": {\n",
+    "                        \"field\": \"vector\",\n",
+    "                        \"filter\": [],\n",
+    "                        \"k\": 1,\n",
+    "                        \"num_candidates\": 50,\n",
+    "                        \"query_vector\": [1.0, ..., 0.0],\n",
+    "                    },\n",
+    "                },\n",
+    "            ]\n",
+    "        }\n",
+    "    }\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2822fbf7",
+   "metadata": {},
+   "source": [
+    "#### Example: Dense vector search with Embedding Model in Elasticsearch\n",
+    "\n",
+    "This example will show how to configure `ElasticsearchStore` to use the embedding model deployed in Elasticsearch for dense vector retrieval.\n",
+    "\n",
+    "To use this, specify the model_id in `DenseVectorStrategy` constructor via the `query_model_id` argument.\n",
+    "\n",
+    "**NOTE**: This requires the model to be deployed and running in Elasticsearch ML node. See [notebook example](https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/integrations/hugging-face/loading-model-from-hugging-face.ipynb) on how to deploy the model with `eland`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d97d9db4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "DENSE_SELF_DEPLOYED_INDEX_NAME = \"test-dense-self-deployed\"\n",
+    "\n",
+    "# Note: This does not have an embedding function specified\n",
+    "# Instead, we will use the embedding model deployed in Elasticsearch\n",
+    "db = ElasticsearchStore(\n",
+    "    es_cloud_id=\"<your cloud id>\",\n",
+    "    es_user=\"elastic\",\n",
+    "    es_password=\"<your password>\",\n",
+    "    index_name=DENSE_SELF_DEPLOYED_INDEX_NAME,\n",
+    "    query_field=\"text_field\",\n",
+    "    vector_query_field=\"vector_query_field.predicted_value\",\n",
+    "    strategy=DenseVectorStrategy(model_id=\"sentence-transformers__all-minilm-l6-v2\"),\n",
+    ")\n",
+    "\n",
+    "# Setup a Ingest Pipeline to perform the embedding\n",
+    "# of the text field\n",
+    "db.client.ingest.put_pipeline(\n",
+    "    id=\"test_pipeline\",\n",
+    "    processors=[\n",
+    "        {\n",
+    "            \"inference\": {\n",
+    "                \"model_id\": \"sentence-transformers__all-minilm-l6-v2\",\n",
+    "                \"field_map\": {\"query_field\": \"text_field\"},\n",
+    "                \"target_field\": \"vector_query_field\",\n",
+    "            }\n",
+    "        }\n",
+    "    ],\n",
+    ")\n",
+    "\n",
+    "# creating a new index with the pipeline,\n",
+    "# not relying on langchain to create the index\n",
+    "db.client.indices.create(\n",
+    "    index=DENSE_SELF_DEPLOYED_INDEX_NAME,\n",
+    "    mappings={\n",
+    "        \"properties\": {\n",
+    "            \"text_field\": {\"type\": \"text\"},\n",
+    "            \"vector_query_field\": {\n",
+    "                \"properties\": {\n",
+    "                    \"predicted_value\": {\n",
+    "                        \"type\": \"dense_vector\",\n",
+    "                        \"dims\": 384,\n",
+    "                        \"index\": True,\n",
+    "                        \"similarity\": \"l2_norm\",\n",
+    "                    }\n",
+    "                }\n",
+    "            },\n",
+    "        }\n",
+    "    },\n",
+    "    settings={\"index\": {\"default_pipeline\": \"test_pipeline\"}},\n",
+    ")\n",
+    "\n",
+    "db.from_texts(\n",
+    "    [\"hello world\"],\n",
+    "    es_cloud_id=\"<cloud id>\",\n",
+    "    es_user=\"elastic\",\n",
+    "    es_password=\"<cloud password>\",\n",
+    "    index_name=DENSE_SELF_DEPLOYED_INDEX_NAME,\n",
+    "    query_field=\"text_field\",\n",
+    "    vector_query_field=\"vector_query_field.predicted_value\",\n",
+    "    strategy=DenseVectorStrategy(model_id=\"sentence-transformers__all-minilm-l6-v2\"),\n",
+    ")\n",
+    "\n",
+    "# Perform search\n",
+    "db.similarity_search(\"hello world\", k=10)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b9651b01",
+   "metadata": {},
+   "source": [
+    "### SparseVectorStrategy (ELSER)\n",
+    "\n",
+    "This strategy uses Elasticsearch's sparse vector retrieval to retrieve the top-k results. We only support our own \"ELSER\" embedding model for now.\n",
+    "\n",
+    "**NOTE**: This requires the ELSER model to be deployed and running in Elasticsearch ml node.\n",
+    "\n",
+    "To use this, specify `SparseVectorStrategy` (was called `SparseVectorRetrievalStrategy` prior to version 0.2.0) in the `ElasticsearchStore` constructor. You will need to provide a model ID."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c750ff57",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_elasticsearch import SparseVectorStrategy\n",
+    "\n",
+    "# Note that this example doesn't have an embedding function. This is because we infer the tokens at index time and at query time within Elasticsearch.\n",
+    "# This requires the ELSER model to be loaded and running in Elasticsearch.\n",
+    "db = ElasticsearchStore.from_documents(\n",
+    "    docs,\n",
+    "    es_cloud_id=\"<cloud id>\",\n",
+    "    es_user=\"elastic\",\n",
+    "    es_password=\"<cloud password>\",\n",
+    "    index_name=\"test-elser\",\n",
+    "    strategy=SparseVectorStrategy(model_id=\".elser_model_2\"),\n",
+    ")\n",
+    "\n",
+    "db.client.indices.refresh(index=\"test-elser\")\n",
+    "\n",
+    "results = db.similarity_search(\n",
+    "    \"What did the president say about Ketanji Brown Jackson\", k=4\n",
+    ")\n",
+    "print(results[0])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "416e224e",
+   "metadata": {},
+   "source": [
+    "### DenseVectorScriptScoreStrategy\n",
+    "\n",
+    "This strategy uses Elasticsearch's script score query to perform exact vector retrieval (also known as brute force) to retrieve the top-k results. (This strategy was called `ExactRetrievalStrategy` prior to version 0.2.0.)\n",
+    "\n",
+    "To use this, specify `DenseVectorScriptScoreStrategy` in `ElasticsearchStore` constructor."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ced32701",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_elasticsearch import SparseVectorStrategy\n",
+    "\n",
+    "db = ElasticsearchStore.from_documents(\n",
+    "    docs,\n",
+    "    embeddings,\n",
+    "    es_url=\"http://localhost:9200\",\n",
+    "    index_name=\"test\",\n",
+    "    strategy=DenseVectorScriptScoreStrategy(),\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "92c9cc33",
+   "metadata": {},
+   "source": [
+    "### BM25Strategy\n",
+    "\n",
+    "Finally, you can use full-text keyword search.\n",
+    "\n",
+    "To use this, specify `BM25Strategy` in `ElasticsearchStore` constructor."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9fd59f69",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_elasticsearch import BM25Strategy\n",
+    "\n",
+    "db = ElasticsearchStore.from_documents(\n",
+    "    docs,\n",
+    "    es_url=\"http://localhost:9200\",\n",
+    "    index_name=\"test\",\n",
+    "    strategy=BM25Strategy(),\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6556d3c6",
+   "metadata": {},
+   "source": [
+    "### BM25RetrievalStrategy\n",
+    "\n",
+    "This strategy allows the user to perform searches using pure BM25 without vector search.\n",
+    "\n",
+    "To use this, specify `BM25RetrievalStrategy` in `ElasticsearchStore` constructor.\n",
+    "\n",
+    "Note that in the example below, the embedding option is not specified, indicating that the search is conducted without using embeddings.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "478af4bd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_elasticsearch import ElasticsearchStore\n",
+    "\n",
+    "db = ElasticsearchStore(\n",
+    "    es_url=\"http://localhost:9200\",\n",
+    "    index_name=\"test_index\",\n",
+    "    strategy=ElasticsearchStore.BM25RetrievalStrategy(),\n",
+    ")\n",
+    "\n",
+    "db.add_texts(\n",
+    "    [\"foo\", \"foo bar\", \"foo bar baz\", \"bar\", \"bar baz\", \"baz\"],\n",
+    ")\n",
+    "\n",
+    "results = db.similarity_search(query=\"foo\", k=10)\n",
+    "print(results)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ed899034",
+   "metadata": {},
+   "source": [
+    "## Customise the Query\n",
+    "\n",
+    "With `custom_query` parameter at search, you are able to adjust the query that is used to retrieve documents from Elasticsearch. This is useful if you want to use a more complex query, to support linear boosting of fields.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e0ab7c94",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Example of a custom query thats just doing a BM25 search on the text field.\n",
+    "def custom_query(query_body: dict, query: str):\n",
+    "    \"\"\"Custom query to be used in Elasticsearch.\n",
+    "    Args:\n",
+    "        query_body (dict): Elasticsearch query body.\n",
+    "        query (str): Query string.\n",
+    "    Returns:\n",
+    "        dict: Elasticsearch query body.\n",
+    "    \"\"\"\n",
+    "    print(\"Query Retriever created by the retrieval strategy:\")\n",
+    "    print(query_body)\n",
+    "    print()\n",
+    "\n",
+    "    new_query_body = {\"query\": {\"match\": {\"text\": query}}}\n",
+    "\n",
+    "    print(\"Query thats actually used in Elasticsearch:\")\n",
+    "    print(new_query_body)\n",
+    "    print()\n",
+    "\n",
+    "    return new_query_body\n",
+    "\n",
+    "\n",
+    "results = db.similarity_search(\n",
+    "    \"What did the president say about Ketanji Brown Jackson\",\n",
+    "    k=4,\n",
+    "    custom_query=custom_query,\n",
+    ")\n",
+    "print(\"Results:\")\n",
+    "print(results[0])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "15ebbe22",
+   "metadata": {},
+   "source": [
+    "## Customize the Document Builder\n",
+    "\n",
+    "With `doc_builder` parameter at search, you are able to adjust how a Document is being built using data retrieved from Elasticsearch. This is especially useful if you have indices which were not created using Langchain.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4cf81750",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from typing import Dict\n",
+    "\n",
+    "from langchain_core.documents import Document\n",
+    "\n",
+    "\n",
+    "def custom_document_builder(hit: Dict) -> Document:\n",
+    "    src = hit.get(\"_source\", {})\n",
+    "    return Document(\n",
+    "        page_content=src.get(\"content\", \"Missing content!\"),\n",
+    "        metadata={\n",
+    "            \"page_number\": src.get(\"page_number\", -1),\n",
+    "            \"original_filename\": src.get(\"original_filename\", \"Missing filename!\"),\n",
+    "        },\n",
+    "    )\n",
+    "\n",
+    "\n",
+    "results = db.similarity_search(\n",
+    "    \"What did the president say about Ketanji Brown Jackson\",\n",
+    "    k=4,\n",
+    "    doc_builder=custom_document_builder,\n",
+    ")\n",
+    "print(\"Results:\")\n",
+    "print(results[0])"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "17b509ae",

From 3aa080c2a804e03e962b1ce33346aa27d322e614 Mon Sep 17 00:00:00 2001
From: Lakindu Boteju <lakinduboteju@gmail.com>
Date: Thu, 27 Mar 2025 22:29:11 +0700
Subject: [PATCH 22/30] Fix typos in pdfminer and pymupdf documentations
 (#30513)

This pull request includes fixes in documentation for PDF loaders to
correct the names of the loaders and the required installations. The
most important changes include updating the loader names and
installation instructions in the Jupyter notebooks.

Documentation fixes:

*
[`docs/docs/integrations/document_loaders/pdfminer.ipynb`](diffhunk://#diff-a4a0561cd4a6e876ea34b7182de64a452060b921bb32d37b02e6a7980a41729bL34-R34):
Changed references from `PyMuPDFLoader` to `PDFMinerLoader` and updated
the installation instructions to replace `pymupdf` with `pdfminer`.
[[1]](diffhunk://#diff-a4a0561cd4a6e876ea34b7182de64a452060b921bb32d37b02e6a7980a41729bL34-R34)
[[2]](diffhunk://#diff-a4a0561cd4a6e876ea34b7182de64a452060b921bb32d37b02e6a7980a41729bL63-R63)
[[3]](diffhunk://#diff-a4a0561cd4a6e876ea34b7182de64a452060b921bb32d37b02e6a7980a41729bL330-R330)

*
[`docs/docs/integrations/document_loaders/pymupdf.ipynb`](diffhunk://#diff-8487995f457e33daa2a08fdcff3b42e144eca069eeadfad5651c7c08cce7a5cdL292-R292):
Corrected the loader name from `PDFPlumberLoader` to `PyMuPDFLoader`.
---
 docs/docs/integrations/document_loaders/pdfminer.ipynb | 6 +++---
 docs/docs/integrations/document_loaders/pymupdf.ipynb  | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/docs/integrations/document_loaders/pdfminer.ipynb b/docs/docs/integrations/document_loaders/pdfminer.ipynb
index 9f71c0f1f8e..dc8a456ec35 100644
--- a/docs/docs/integrations/document_loaders/pdfminer.ipynb
+++ b/docs/docs/integrations/document_loaders/pdfminer.ipynb
@@ -31,7 +31,7 @@
     "\n",
     "### Credentials\n",
     "\n",
-    "No credentials are required to use PyMuPDFLoader"
+    "No credentials are required to use PDFMinerLoader"
    ]
   },
   {
@@ -60,7 +60,7 @@
    "source": [
     "### Installation\n",
     "\n",
-    "Install **langchain_community** and **pymupdf**."
+    "Install **langchain_community** and **pdfminer**."
    ]
   },
   {
@@ -327,7 +327,7 @@
     "- By page\n",
     "- As a single text flow\n",
     "\n",
-    "By default PDFPlumberLoader will split the PDF by page."
+    "By default PDFMinerLoader will split the PDF by page."
    ]
   },
   {
diff --git a/docs/docs/integrations/document_loaders/pymupdf.ipynb b/docs/docs/integrations/document_loaders/pymupdf.ipynb
index 973d0313a5d..4549fb42d51 100644
--- a/docs/docs/integrations/document_loaders/pymupdf.ipynb
+++ b/docs/docs/integrations/document_loaders/pymupdf.ipynb
@@ -289,7 +289,7 @@
     "- By page\n",
     "- As a single text flow\n",
     "\n",
-    "By default PDFPlumberLoader will split the PDF by page."
+    "By default PyMuPDFLoader will split the PDF by page."
    ]
   },
   {

From 63673b765b7deffcd116ba8f3833a7ff8a2ac8d2 Mon Sep 17 00:00:00 2001
From: Andras L Ferenczi <andrasf@hotmail.com>
Date: Thu, 27 Mar 2025 11:53:44 -0400
Subject: [PATCH 23/30] Fix: Enable max_retries Parameter in ChatMistralAI
 Class (#30448)

**partners: Enable max_retries in ChatMistralAI**

**Description**

- This pull request reactivates the retry logic in the
completion_with_retry method of the ChatMistralAI class, restoring the
intended functionality of the previously ineffective max_retries
parameter. New unit test that mocks failed/successful retry calls and an
integration test to confirm end-to-end functionality.

**Issue**
- Closes #30362

**Dependencies**
- No additional dependencies required

Co-authored-by: andrasfe <andrasf94@gmail.com>
---
 .../langchain_mistralai/chat_models.py        |  4 +-
 .../integration_tests/test_chat_models.py     | 39 ++++++++++++++++
 .../tests/unit_tests/test_chat_models.py      | 46 ++++++++++++++++++-
 3 files changed, 86 insertions(+), 3 deletions(-)

diff --git a/libs/partners/mistralai/langchain_mistralai/chat_models.py b/libs/partners/mistralai/langchain_mistralai/chat_models.py
index 6f3cac19904..bc761d2bb6c 100644
--- a/libs/partners/mistralai/langchain_mistralai/chat_models.py
+++ b/libs/partners/mistralai/langchain_mistralai/chat_models.py
@@ -464,9 +464,9 @@ class ChatMistralAI(BaseChatModel):
         self, run_manager: Optional[CallbackManagerForLLMRun] = None, **kwargs: Any
     ) -> Any:
         """Use tenacity to retry the completion call."""
-        # retry_decorator = _create_retry_decorator(self, run_manager=run_manager)
+        retry_decorator = _create_retry_decorator(self, run_manager=run_manager)
 
-        # @retry_decorator
+        @retry_decorator
         def _completion_with_retry(**kwargs: Any) -> Any:
             if "stream" not in kwargs:
                 kwargs["stream"] = False
diff --git a/libs/partners/mistralai/tests/integration_tests/test_chat_models.py b/libs/partners/mistralai/tests/integration_tests/test_chat_models.py
index 8bec346d29a..16c91d419fc 100644
--- a/libs/partners/mistralai/tests/integration_tests/test_chat_models.py
+++ b/libs/partners/mistralai/tests/integration_tests/test_chat_models.py
@@ -1,9 +1,12 @@
 """Test ChatMistral chat model."""
 
 import json
+import logging
+import time
 from typing import Any, Optional
 
 import pytest
+from httpx import ReadTimeout
 from langchain_core.messages import (
     AIMessage,
     AIMessageChunk,
@@ -301,3 +304,39 @@ def test_streaming_tool_call() -> None:
         acc = chunk if acc is None else acc + chunk
     assert acc.content != ""
     assert "tool_calls" not in acc.additional_kwargs
+
+
+def test_retry_parameters(caplog: pytest.LogCaptureFixture) -> None:
+    """Test that retry parameters are honored in ChatMistralAI."""
+    # Create a model with intentionally short timeout and multiple retries
+    mistral = ChatMistralAI(
+        timeout=1,  # Very short timeout to trigger timeouts
+        max_retries=3,  # Should retry 3 times
+    )
+
+    # Simple test input that should take longer than 1 second to process
+    test_input = "Write a 2 sentence story about a cat"
+
+    # Measure start time
+    t0 = time.time()
+
+    try:
+        # Try to get a response
+        response = mistral.invoke(test_input)
+
+        # If successful, validate the response
+        elapsed_time = time.time() - t0
+        logging.info(f"Request succeeded in {elapsed_time:.2f} seconds")
+        # Check that we got a valid response
+        assert response.content
+        assert isinstance(response.content, str)
+        assert "cat" in response.content.lower()
+
+    except ReadTimeout:
+        elapsed_time = time.time() - t0
+        logging.info(f"Request timed out after {elapsed_time:.2f} seconds")
+        assert elapsed_time >= 3.0
+        pytest.skip("Test timed out as expected with short timeout")
+    except Exception as e:
+        logging.error(f"Unexpected exception: {e}")
+        raise
diff --git a/libs/partners/mistralai/tests/unit_tests/test_chat_models.py b/libs/partners/mistralai/tests/unit_tests/test_chat_models.py
index 4dc251832e7..6a94f431cfd 100644
--- a/libs/partners/mistralai/tests/unit_tests/test_chat_models.py
+++ b/libs/partners/mistralai/tests/unit_tests/test_chat_models.py
@@ -2,8 +2,9 @@
 
 import os
 from typing import Any, AsyncGenerator, Dict, Generator, List, cast
-from unittest.mock import patch
+from unittest.mock import MagicMock, patch
 
+import httpx
 import pytest
 from langchain_core.callbacks.base import BaseCallbackHandler
 from langchain_core.messages import (
@@ -270,3 +271,46 @@ def test_extra_kwargs() -> None:
     # Test that if provided twice it errors
     with pytest.raises(ValueError):
         ChatMistralAI(model="my-model", foo=3, model_kwargs={"foo": 2})  # type: ignore[call-arg]
+
+
+def test_retry_with_failure_then_success() -> None:
+    """Test that retry mechanism works correctly when
+    first request fails and second succeeds."""
+    # Create a real ChatMistralAI instance
+    chat = ChatMistralAI(max_retries=3)
+
+    # Set up the actual retry mechanism (not just mocking it)
+    # We'll track how many times the function is called
+    call_count = 0
+
+    def mock_post(*args: Any, **kwargs: Any) -> MagicMock:
+        nonlocal call_count
+        call_count += 1
+
+        if call_count == 1:
+            raise httpx.RequestError("Connection error", request=MagicMock())
+
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {
+            "choices": [
+                {
+                    "message": {
+                        "role": "assistant",
+                        "content": "Hello!",
+                    },
+                    "finish_reason": "stop",
+                }
+            ],
+            "usage": {
+                "prompt_tokens": 1,
+                "completion_tokens": 1,
+                "total_tokens": 2,
+            },
+        }
+        return mock_response
+
+    with patch.object(chat.client, "post", side_effect=mock_post):
+        result = chat.invoke("Hello")
+        assert result.content == "Hello!"
+        assert call_count == 2, f"Expected 2 calls, but got {call_count}"

From 05482877beb45e44d7c2faeeafbd02bf261fc79a Mon Sep 17 00:00:00 2001
From: ccurme <chester.curme@gmail.com>
Date: Thu, 27 Mar 2025 12:01:40 -0400
Subject: [PATCH 24/30] mistralai: release 0.2.10 (#30526)

---
 libs/partners/mistralai/pyproject.toml | 4 ++--
 libs/partners/mistralai/uv.lock        | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/libs/partners/mistralai/pyproject.toml b/libs/partners/mistralai/pyproject.toml
index 36e3c26cf9b..e9aa41d2bc4 100644
--- a/libs/partners/mistralai/pyproject.toml
+++ b/libs/partners/mistralai/pyproject.toml
@@ -7,14 +7,14 @@ authors = []
 license = { text = "MIT" }
 requires-python = "<4.0,>=3.9"
 dependencies = [
-    "langchain-core<1.0.0,>=0.3.47",
+    "langchain-core<1.0.0,>=0.3.49",
     "tokenizers<1,>=0.15.1",
     "httpx<1,>=0.25.2",
     "httpx-sse<1,>=0.3.1",
     "pydantic<3,>=2",
 ]
 name = "langchain-mistralai"
-version = "0.2.9"
+version = "0.2.10"
 description = "An integration package connecting Mistral and LangChain"
 readme = "README.md"
 
diff --git a/libs/partners/mistralai/uv.lock b/libs/partners/mistralai/uv.lock
index e320388120b..bb915562f95 100644
--- a/libs/partners/mistralai/uv.lock
+++ b/libs/partners/mistralai/uv.lock
@@ -332,7 +332,7 @@ wheels = [
 
 [[package]]
 name = "langchain-core"
-version = "0.3.47"
+version = "0.3.49"
 source = { editable = "../../core" }
 dependencies = [
     { name = "jsonpatch" },
@@ -390,7 +390,7 @@ typing = [
 
 [[package]]
 name = "langchain-mistralai"
-version = "0.2.9"
+version = "0.2.10"
 source = { editable = "." }
 dependencies = [
     { name = "httpx" },
@@ -450,7 +450,7 @@ typing = [
 
 [[package]]
 name = "langchain-tests"
-version = "0.3.15"
+version = "0.3.17"
 source = { editable = "../../standard-tests" }
 dependencies = [
     { name = "httpx" },

From 59908f04d43ac6a342fa83894aaf5ee82adf018e Mon Sep 17 00:00:00 2001
From: ccurme <chester.curme@gmail.com>
Date: Thu, 27 Mar 2025 12:04:20 -0400
Subject: [PATCH 25/30] fireworks: release 0.2.9 (#30527)

---
 libs/partners/fireworks/pyproject.toml | 4 ++--
 libs/partners/fireworks/uv.lock        | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/libs/partners/fireworks/pyproject.toml b/libs/partners/fireworks/pyproject.toml
index ddcd89e7d61..e3468ef5f4a 100644
--- a/libs/partners/fireworks/pyproject.toml
+++ b/libs/partners/fireworks/pyproject.toml
@@ -7,14 +7,14 @@ authors = []
 license = { text = "MIT" }
 requires-python = "<4.0,>=3.9"
 dependencies = [
-    "langchain-core<1.0.0,>=0.3.47",
+    "langchain-core<1.0.0,>=0.3.49",
     "fireworks-ai>=0.13.0",
     "openai<2.0.0,>=1.10.0",
     "requests<3,>=2",
     "aiohttp<4.0.0,>=3.9.1",
 ]
 name = "langchain-fireworks"
-version = "0.2.8"
+version = "0.2.9"
 description = "An integration package connecting Fireworks and LangChain"
 readme = "README.md"
 
diff --git a/libs/partners/fireworks/uv.lock b/libs/partners/fireworks/uv.lock
index 6cdc91a1a0a..5ac8012e643 100644
--- a/libs/partners/fireworks/uv.lock
+++ b/libs/partners/fireworks/uv.lock
@@ -635,7 +635,7 @@ wheels = [
 
 [[package]]
 name = "langchain-core"
-version = "0.3.47"
+version = "0.3.49"
 source = { editable = "../../core" }
 dependencies = [
     { name = "jsonpatch" },
@@ -693,7 +693,7 @@ typing = [
 
 [[package]]
 name = "langchain-fireworks"
-version = "0.2.8"
+version = "0.2.9"
 source = { editable = "." }
 dependencies = [
     { name = "aiohttp" },
@@ -763,7 +763,7 @@ typing = [
 
 [[package]]
 name = "langchain-tests"
-version = "0.3.15"
+version = "0.3.17"
 source = { editable = "../../standard-tests" }
 dependencies = [
     { name = "httpx" },

From e181d4321411c6ddc0c59b66532b3ac5773abcbf Mon Sep 17 00:00:00 2001
From: Christophe Bornet <cbornet@hotmail.com>
Date: Thu, 27 Mar 2025 18:01:49 +0100
Subject: [PATCH 26/30] core: Bump ruff version to 0.11 (#30519)

Changes are from the new TC006 rule:
https://docs.astral.sh/ruff/rules/runtime-cast-value/
TC006 is auto-fixed.
---
 .../langchain_core/_api/beta_decorator.py     |   6 +-
 libs/core/langchain_core/_api/deprecation.py  |  16 +--
 libs/core/langchain_core/callbacks/file.py    |   2 +-
 libs/core/langchain_core/callbacks/manager.py |  12 +-
 libs/core/langchain_core/documents/base.py    |   2 +-
 libs/core/langchain_core/indexing/api.py      |   8 +-
 .../core/langchain_core/indexing/in_memory.py |   2 +-
 .../language_models/chat_models.py            |  19 ++--
 .../language_models/fake_chat_models.py       |   4 +-
 .../langchain_core/language_models/llms.py    |  40 ++++---
 libs/core/langchain_core/load/serializable.py |   2 +-
 libs/core/langchain_core/messages/ai.py       |  16 +--
 libs/core/langchain_core/messages/base.py     |   4 +-
 libs/core/langchain_core/messages/utils.py    |   4 +-
 libs/core/langchain_core/prompt_values.py     |   2 +-
 libs/core/langchain_core/prompts/chat.py      |  12 +-
 libs/core/langchain_core/runnables/base.py    | 104 +++++++++---------
 libs/core/langchain_core/runnables/branch.py  |   4 +-
 libs/core/langchain_core/runnables/config.py  |  10 +-
 .../langchain_core/runnables/configurable.py  |  16 +--
 .../langchain_core/runnables/fallbacks.py     |  10 +-
 .../langchain_core/runnables/passthrough.py   |   2 +-
 libs/core/langchain_core/runnables/retry.py   |   4 +-
 libs/core/langchain_core/runnables/router.py  |   2 +-
 libs/core/langchain_core/tools/base.py        |   6 +-
 libs/core/langchain_core/tracers/context.py   |   7 +-
 libs/core/langchain_core/tracers/core.py      |   2 +-
 .../core/langchain_core/tracers/evaluation.py |   4 +-
 .../langchain_core/tracers/event_stream.py    |  22 ++--
 libs/core/langchain_core/utils/aiter.py       |   2 +-
 .../langchain_core/utils/function_calling.py  |  15 +--
 libs/core/langchain_core/utils/mustache.py    |   4 +-
 libs/core/langchain_core/utils/pydantic.py    |   4 +-
 libs/core/pyproject.toml                      |   2 +-
 .../example_selectors/test_similarity.py      |   8 +-
 .../output_parsers/test_list_parser.py        |   8 +-
 .../tests/unit_tests/prompts/test_chat.py     |   2 +-
 .../unit_tests/prompts/test_structured.py     |   2 +-
 .../tests/unit_tests/runnables/test_config.py |   4 +-
 .../unit_tests/runnables/test_runnable.py     |  18 +--
 .../runnables/test_runnable_events_v1.py      |   2 +-
 .../runnables/test_runnable_events_v2.py      |   2 +-
 libs/core/tests/unit_tests/test_tools.py      |  16 +--
 libs/core/uv.lock                             |  43 ++++----
 44 files changed, 243 insertions(+), 233 deletions(-)

diff --git a/libs/core/langchain_core/_api/beta_decorator.py b/libs/core/langchain_core/_api/beta_decorator.py
index b1415263323..f527cc116af 100644
--- a/libs/core/langchain_core/_api/beta_decorator.py
+++ b/libs/core/langchain_core/_api/beta_decorator.py
@@ -143,7 +143,7 @@ def beta(
                 obj.__init__ = functools.wraps(obj.__init__)(  # type: ignore[misc]
                     warn_if_direct_instance
                 )
-                return cast(T, obj)
+                return cast("T", obj)
 
         elif isinstance(obj, property):
             # note(erick): this block doesn't seem to be used?
@@ -217,7 +217,7 @@ def beta(
                 """
                 wrapper = functools.wraps(wrapped)(wrapper)
                 wrapper.__doc__ = new_doc
-                return cast(T, wrapper)
+                return cast("T", wrapper)
 
         old_doc = inspect.cleandoc(old_doc or "").strip("\n") or ""
         components = [message, addendum]
@@ -228,7 +228,7 @@ def beta(
             finalized = finalize(awarning_emitting_wrapper, new_doc)
         else:
             finalized = finalize(warning_emitting_wrapper, new_doc)
-        return cast(T, finalized)
+        return cast("T", finalized)
 
     return beta
 
diff --git a/libs/core/langchain_core/_api/deprecation.py b/libs/core/langchain_core/_api/deprecation.py
index ad3d806b0da..ac1f1e06165 100644
--- a/libs/core/langchain_core/_api/deprecation.py
+++ b/libs/core/langchain_core/_api/deprecation.py
@@ -216,7 +216,7 @@ def deprecated(
                 obj.__init__ = functools.wraps(obj.__init__)(  # type: ignore[misc]
                     warn_if_direct_instance
                 )
-                return cast(T, obj)
+                return cast("T", obj)
 
         elif isinstance(obj, FieldInfoV1):
             wrapped = None
@@ -229,7 +229,7 @@ def deprecated(
 
             def finalize(wrapper: Callable[..., Any], new_doc: str) -> T:
                 return cast(
-                    T,
+                    "T",
                     FieldInfoV1(
                         default=obj.default,
                         default_factory=obj.default_factory,
@@ -250,7 +250,7 @@ def deprecated(
 
             def finalize(wrapper: Callable[..., Any], new_doc: str) -> T:
                 return cast(
-                    T,
+                    "T",
                     FieldInfoV2(
                         default=obj.default,
                         default_factory=obj.default_factory,
@@ -264,7 +264,7 @@ def deprecated(
             if not _obj_type:
                 _obj_type = "attribute"
             wrapped = None
-            _name = _name or cast(Union[type, Callable], obj.fget).__qualname__
+            _name = _name or cast("Union[type, Callable]", obj.fget).__qualname__
             old_doc = obj.__doc__
 
             class _DeprecatedProperty(property):
@@ -311,14 +311,14 @@ def deprecated(
             def finalize(wrapper: Callable[..., Any], new_doc: str) -> T:
                 """Finalize the property."""
                 return cast(
-                    T,
+                    "T",
                     _DeprecatedProperty(
                         fget=obj.fget, fset=obj.fset, fdel=obj.fdel, doc=new_doc
                     ),
                 )
 
         else:
-            _name = _name or cast(Union[type, Callable], obj).__qualname__
+            _name = _name or cast("Union[type, Callable]", obj).__qualname__
             if not _obj_type:
                 # edge case: when a function is within another function
                 # within a test, this will call it a "method" not a "function"
@@ -338,7 +338,7 @@ def deprecated(
                 """
                 wrapper = functools.wraps(wrapped)(wrapper)
                 wrapper.__doc__ = new_doc
-                return cast(T, wrapper)
+                return cast("T", wrapper)
 
         old_doc = inspect.cleandoc(old_doc or "").strip("\n")
 
@@ -391,7 +391,7 @@ def deprecated(
             finalized = finalize(awarning_emitting_wrapper, new_doc)
         else:
             finalized = finalize(warning_emitting_wrapper, new_doc)
-        return cast(T, finalized)
+        return cast("T", finalized)
 
     return deprecate
 
diff --git a/libs/core/langchain_core/callbacks/file.py b/libs/core/langchain_core/callbacks/file.py
index b68b3ba22e6..3ee4e18e5df 100644
--- a/libs/core/langchain_core/callbacks/file.py
+++ b/libs/core/langchain_core/callbacks/file.py
@@ -31,7 +31,7 @@ class FileCallbackHandler(BaseCallbackHandler):
             mode: The mode to open the file in. Defaults to "a".
             color: The color to use for the text. Defaults to None.
         """
-        self.file = cast(TextIO, Path(filename).open(mode, encoding="utf-8"))  # noqa: SIM115
+        self.file = cast("TextIO", Path(filename).open(mode, encoding="utf-8"))  # noqa: SIM115
         self.color = color
 
     def __del__(self) -> None:
diff --git a/libs/core/langchain_core/callbacks/manager.py b/libs/core/langchain_core/callbacks/manager.py
index 689003de3e6..62aa8f73d37 100644
--- a/libs/core/langchain_core/callbacks/manager.py
+++ b/libs/core/langchain_core/callbacks/manager.py
@@ -232,7 +232,7 @@ def shielded(func: Func) -> Func:
     async def wrapped(*args: Any, **kwargs: Any) -> Any:
         return await asyncio.shield(func(*args, **kwargs))
 
-    return cast(Func, wrapped)
+    return cast("Func", wrapped)
 
 
 def handle_event(
@@ -308,7 +308,7 @@ def handle_event(
                 # The solution is to create a new loop in a new thread.
                 with ThreadPoolExecutor(1) as executor:
                     executor.submit(
-                        cast(Callable, copy_context().run), _run_coros, coros
+                        cast("Callable", copy_context().run), _run_coros, coros
                     ).result()
             else:
                 _run_coros(coros)
@@ -362,7 +362,7 @@ async def _ahandle_event_for_handler(
                     await asyncio.get_event_loop().run_in_executor(
                         None,
                         cast(
-                            Callable,
+                            "Callable",
                             functools.partial(
                                 copy_context().run, event, *args, **kwargs
                             ),
@@ -2395,7 +2395,7 @@ def _configure(
                         run_tree.trace_id,
                         run_tree.dotted_order,
                     )
-                    handler.run_map[str(run_tree.id)] = cast(Run, run_tree)
+                    handler.run_map[str(run_tree.id)] = cast("Run", run_tree)
     for var, inheritable, handler_class, env_var in _configure_hooks:
         create_one = (
             env_var is not None
@@ -2403,7 +2403,9 @@ def _configure(
             and handler_class is not None
         )
         if var.get() is not None or create_one:
-            var_handler = var.get() or cast(type[BaseCallbackHandler], handler_class)()
+            var_handler = (
+                var.get() or cast("type[BaseCallbackHandler]", handler_class)()
+            )
             if handler_class is None:
                 if not any(
                     handler is var_handler  # direct pointer comparison
diff --git a/libs/core/langchain_core/documents/base.py b/libs/core/langchain_core/documents/base.py
index 8d39daaa0c6..d1710499edd 100644
--- a/libs/core/langchain_core/documents/base.py
+++ b/libs/core/langchain_core/documents/base.py
@@ -136,7 +136,7 @@ class Blob(BaseMedia):
         case that value will be used instead.
         """
         if self.metadata and "source" in self.metadata:
-            return cast(Optional[str], self.metadata["source"])
+            return cast("Optional[str]", self.metadata["source"])
         return str(self.path) if self.path else None
 
     @model_validator(mode="before")
diff --git a/libs/core/langchain_core/indexing/api.py b/libs/core/langchain_core/indexing/api.py
index 4dd21de4f44..80a9fb5dd77 100644
--- a/libs/core/langchain_core/indexing/api.py
+++ b/libs/core/langchain_core/indexing/api.py
@@ -395,7 +395,7 @@ def index(
                 if cleanup == "scoped_full":
                     scoped_full_cleanup_source_ids.add(source_id)
             # source ids cannot be None after for loop above.
-            source_ids = cast(Sequence[str], source_ids)  # type: ignore[assignment]
+            source_ids = cast("Sequence[str]", source_ids)  # type: ignore[assignment]
 
         exists_batch = record_manager.exists([doc.uid for doc in hashed_docs])
 
@@ -461,7 +461,7 @@ def index(
                     )
                     raise AssertionError(msg)
 
-            _source_ids = cast(Sequence[str], source_ids)
+            _source_ids = cast("Sequence[str]", source_ids)
 
             uids_to_delete = record_manager.list_keys(
                 group_ids=_source_ids, before=index_start_dt
@@ -710,7 +710,7 @@ async def aindex(
                 if cleanup == "scoped_full":
                     scoped_full_cleanup_source_ids.add(source_id)
             # source ids cannot be None after for loop above.
-            source_ids = cast(Sequence[str], source_ids)
+            source_ids = cast("Sequence[str]", source_ids)
 
         exists_batch = await record_manager.aexists([doc.uid for doc in hashed_docs])
 
@@ -776,7 +776,7 @@ async def aindex(
                     )
                     raise AssertionError(msg)
 
-            _source_ids = cast(Sequence[str], source_ids)
+            _source_ids = cast("Sequence[str]", source_ids)
 
             uids_to_delete = await record_manager.alist_keys(
                 group_ids=_source_ids, before=index_start_dt
diff --git a/libs/core/langchain_core/indexing/in_memory.py b/libs/core/langchain_core/indexing/in_memory.py
index cf12d0012f1..e166f5b158f 100644
--- a/libs/core/langchain_core/indexing/in_memory.py
+++ b/libs/core/langchain_core/indexing/in_memory.py
@@ -41,7 +41,7 @@ class InMemoryDocumentIndex(DocumentIndex):
                 id_ = item.id
 
             self.store[id_] = item_
-            ok_ids.append(cast(str, item_.id))
+            ok_ids.append(cast("str", item_.id))
 
         return UpsertResponse(succeeded=ok_ids, failed=[])
 
diff --git a/libs/core/langchain_core/language_models/chat_models.py b/libs/core/langchain_core/language_models/chat_models.py
index db9a8098199..bde6052992a 100644
--- a/libs/core/langchain_core/language_models/chat_models.py
+++ b/libs/core/langchain_core/language_models/chat_models.py
@@ -303,7 +303,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
     ) -> BaseMessage:
         config = ensure_config(config)
         return cast(
-            ChatGeneration,
+            "ChatGeneration",
             self.generate_prompt(
                 [self._convert_input(input)],
                 stop=stop,
@@ -335,7 +335,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
             run_id=config.pop("run_id", None),
             **kwargs,
         )
-        return cast(ChatGeneration, llm_result.generations[0][0]).message
+        return cast("ChatGeneration", llm_result.generations[0][0]).message
 
     def _should_stream(
         self,
@@ -383,7 +383,8 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
         if not self._should_stream(async_api=False, **{**kwargs, "stream": True}):
             # model doesn't implement streaming, so use default implementation
             yield cast(
-                BaseMessageChunk, self.invoke(input, config=config, stop=stop, **kwargs)
+                "BaseMessageChunk",
+                self.invoke(input, config=config, stop=stop, **kwargs),
             )
         else:
             config = ensure_config(config)
@@ -430,7 +431,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
                         chunk.message.id = f"run-{run_manager.run_id}"
                     chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk)
                     run_manager.on_llm_new_token(
-                        cast(str, chunk.message.content), chunk=chunk
+                        cast("str", chunk.message.content), chunk=chunk
                     )
                     yield chunk.message
                     if generation is None:
@@ -464,7 +465,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
         if not self._should_stream(async_api=True, **{**kwargs, "stream": True}):
             # No async or sync stream is implemented, so fall back to ainvoke
             yield cast(
-                BaseMessageChunk,
+                "BaseMessageChunk",
                 await self.ainvoke(input, config=config, stop=stop, **kwargs),
             )
             return
@@ -518,7 +519,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
                     chunk.message.id = f"run-{run_manager.run_id}"
                 chunk.message.response_metadata = _gen_info_and_msg_metadata(chunk)
                 await run_manager.on_llm_new_token(
-                    cast(str, chunk.message.content), chunk=chunk
+                    cast("str", chunk.message.content), chunk=chunk
                 )
                 yield chunk.message
                 if generation is None:
@@ -899,7 +900,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
                     if chunk.message.id is None:
                         chunk.message.id = f"run-{run_manager.run_id}"
                     run_manager.on_llm_new_token(
-                        cast(str, chunk.message.content), chunk=chunk
+                        cast("str", chunk.message.content), chunk=chunk
                     )
                 chunks.append(chunk)
             result = generate_from_stream(iter(chunks))
@@ -972,7 +973,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
                     if chunk.message.id is None:
                         chunk.message.id = f"run-{run_manager.run_id}"
                     await run_manager.on_llm_new_token(
-                        cast(str, chunk.message.content), chunk=chunk
+                        cast("str", chunk.message.content), chunk=chunk
                     )
                 chunks.append(chunk)
             result = generate_from_stream(iter(chunks))
@@ -1307,7 +1308,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
         )
         if isinstance(schema, type) and is_basemodel_subclass(schema):
             output_parser: OutputParserLike = PydanticToolsParser(
-                tools=[cast(TypeBaseModel, schema)], first_tool_only=True
+                tools=[cast("TypeBaseModel", schema)], first_tool_only=True
             )
         else:
             key_name = convert_to_openai_tool(schema)["function"]["name"]
diff --git a/libs/core/langchain_core/language_models/fake_chat_models.py b/libs/core/langchain_core/language_models/fake_chat_models.py
index 9bd62f1267e..5a6a6590468 100644
--- a/libs/core/langchain_core/language_models/fake_chat_models.py
+++ b/libs/core/langchain_core/language_models/fake_chat_models.py
@@ -262,7 +262,7 @@ class GenericFakeChatModel(BaseChatModel):
                 msg = "Expected content to be a string."
                 raise ValueError(msg)
 
-            content_chunks = cast(list[str], re.split(r"(\s)", content))
+            content_chunks = cast("list[str]", re.split(r"(\s)", content))
 
             for token in content_chunks:
                 chunk = ChatGenerationChunk(
@@ -280,7 +280,7 @@ class GenericFakeChatModel(BaseChatModel):
                     for fkey, fvalue in value.items():
                         if isinstance(fvalue, str):
                             # Break function call by `,`
-                            fvalue_chunks = cast(list[str], re.split(r"(,)", fvalue))
+                            fvalue_chunks = cast("list[str]", re.split(r"(,)", fvalue))
                             for fvalue_chunk in fvalue_chunks:
                                 chunk = ChatGenerationChunk(
                                     message=AIMessageChunk(
diff --git a/libs/core/langchain_core/language_models/llms.py b/libs/core/langchain_core/language_models/llms.py
index 3c5ed7337f2..5049842fd48 100644
--- a/libs/core/langchain_core/language_models/llms.py
+++ b/libs/core/langchain_core/language_models/llms.py
@@ -449,7 +449,7 @@ class BaseLLM(BaseLanguageModel[str], ABC):
                 return [g[0].text for g in llm_result.generations]
             except Exception as e:
                 if return_exceptions:
-                    return cast(list[str], [e for _ in inputs])
+                    return cast("list[str]", [e for _ in inputs])
                 else:
                     raise
         else:
@@ -495,7 +495,7 @@ class BaseLLM(BaseLanguageModel[str], ABC):
                 return [g[0].text for g in llm_result.generations]
             except Exception as e:
                 if return_exceptions:
-                    return cast(list[str], [e for _ in inputs])
+                    return cast("list[str]", [e for _ in inputs])
                 else:
                     raise
         else:
@@ -901,13 +901,15 @@ class BaseLLM(BaseLanguageModel[str], ABC):
             ):
                 msg = "run_name must be a list of the same length as prompts"
                 raise ValueError(msg)
-            callbacks = cast(list[Callbacks], callbacks)
-            tags_list = cast(list[Optional[list[str]]], tags or ([None] * len(prompts)))
+            callbacks = cast("list[Callbacks]", callbacks)
+            tags_list = cast(
+                "list[Optional[list[str]]]", tags or ([None] * len(prompts))
+            )
             metadata_list = cast(
-                list[Optional[dict[str, Any]]], metadata or ([{}] * len(prompts))
+                "list[Optional[dict[str, Any]]]", metadata or ([{}] * len(prompts))
             )
             run_name_list = run_name or cast(
-                list[Optional[str]], ([None] * len(prompts))
+                "list[Optional[str]]", ([None] * len(prompts))
             )
             callback_managers = [
                 CallbackManager.configure(
@@ -925,16 +927,16 @@ class BaseLLM(BaseLanguageModel[str], ABC):
             # We've received a single callbacks arg to apply to all inputs
             callback_managers = [
                 CallbackManager.configure(
-                    cast(Callbacks, callbacks),
+                    cast("Callbacks", callbacks),
                     self.callbacks,
                     self.verbose,
-                    cast(list[str], tags),
+                    cast("list[str]", tags),
                     self.tags,
-                    cast(dict[str, Any], metadata),
+                    cast("dict[str, Any]", metadata),
                     self.metadata,
                 )
             ] * len(prompts)
-            run_name_list = [cast(Optional[str], run_name)] * len(prompts)
+            run_name_list = [cast("Optional[str]", run_name)] * len(prompts)
         run_ids_list = self._get_run_ids_list(run_id, prompts)
         params = self.dict()
         params["stop"] = stop
@@ -1143,13 +1145,15 @@ class BaseLLM(BaseLanguageModel[str], ABC):
             ):
                 msg = "run_name must be a list of the same length as prompts"
                 raise ValueError(msg)
-            callbacks = cast(list[Callbacks], callbacks)
-            tags_list = cast(list[Optional[list[str]]], tags or ([None] * len(prompts)))
+            callbacks = cast("list[Callbacks]", callbacks)
+            tags_list = cast(
+                "list[Optional[list[str]]]", tags or ([None] * len(prompts))
+            )
             metadata_list = cast(
-                list[Optional[dict[str, Any]]], metadata or ([{}] * len(prompts))
+                "list[Optional[dict[str, Any]]]", metadata or ([{}] * len(prompts))
             )
             run_name_list = run_name or cast(
-                list[Optional[str]], ([None] * len(prompts))
+                "list[Optional[str]]", ([None] * len(prompts))
             )
             callback_managers = [
                 AsyncCallbackManager.configure(
@@ -1167,16 +1171,16 @@ class BaseLLM(BaseLanguageModel[str], ABC):
             # We've received a single callbacks arg to apply to all inputs
             callback_managers = [
                 AsyncCallbackManager.configure(
-                    cast(Callbacks, callbacks),
+                    cast("Callbacks", callbacks),
                     self.callbacks,
                     self.verbose,
-                    cast(list[str], tags),
+                    cast("list[str]", tags),
                     self.tags,
-                    cast(dict[str, Any], metadata),
+                    cast("dict[str, Any]", metadata),
                     self.metadata,
                 )
             ] * len(prompts)
-            run_name_list = [cast(Optional[str], run_name)] * len(prompts)
+            run_name_list = [cast("Optional[str]", run_name)] * len(prompts)
         run_ids_list = self._get_run_ids_list(run_id, prompts)
         params = self.dict()
         params["stop"] = stop
diff --git a/libs/core/langchain_core/load/serializable.py b/libs/core/langchain_core/load/serializable.py
index 7655438be97..d5751c96779 100644
--- a/libs/core/langchain_core/load/serializable.py
+++ b/libs/core/langchain_core/load/serializable.py
@@ -237,7 +237,7 @@ class Serializable(BaseModel, ABC):
                         raise ValueError(msg)
 
             # Get a reference to self bound to each class in the MRO
-            this = cast(Serializable, self if cls is None else super(cls, self))
+            this = cast("Serializable", self if cls is None else super(cls, self))
 
             secrets.update(this.lc_secrets)
             # Now also add the aliases for the secrets
diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py
index 19267060472..38ff432afd1 100644
--- a/libs/core/langchain_core/messages/ai.py
+++ b/libs/core/langchain_core/messages/ai.py
@@ -500,14 +500,14 @@ def add_usage(
     if not (left or right):
         return UsageMetadata(input_tokens=0, output_tokens=0, total_tokens=0)
     if not (left and right):
-        return cast(UsageMetadata, left or right)
+        return cast("UsageMetadata", left or right)
 
     return UsageMetadata(
         **cast(
-            UsageMetadata,
+            "UsageMetadata",
             _dict_int_op(
-                cast(dict, left),
-                cast(dict, right),
+                cast("dict", left),
+                cast("dict", right),
                 operator.add,
             ),
         )
@@ -557,14 +557,14 @@ def subtract_usage(
     if not (left or right):
         return UsageMetadata(input_tokens=0, output_tokens=0, total_tokens=0)
     if not (left and right):
-        return cast(UsageMetadata, left or right)
+        return cast("UsageMetadata", left or right)
 
     return UsageMetadata(
         **cast(
-            UsageMetadata,
+            "UsageMetadata",
             _dict_int_op(
-                cast(dict, left),
-                cast(dict, right),
+                cast("dict", left),
+                cast("dict", right),
                 (lambda le, ri: max(le - ri, 0)),
             ),
         )
diff --git a/libs/core/langchain_core/messages/base.py b/libs/core/langchain_core/messages/base.py
index 25e37d51744..423687426d3 100644
--- a/libs/core/langchain_core/messages/base.py
+++ b/libs/core/langchain_core/messages/base.py
@@ -160,13 +160,13 @@ def merge_content(
         if isinstance(merged, str):
             # If the next chunk is also a string, then merge them naively
             if isinstance(content, str):
-                merged = cast(str, merged) + content
+                merged = cast("str", merged) + content
             # If the next chunk is a list, add the current to the start of the list
             else:
                 merged = [merged] + content  # type: ignore
         elif isinstance(content, list):
             # If both are lists
-            merged = merge_lists(cast(list, merged), content)  # type: ignore
+            merged = merge_lists(cast("list", merged), content)  # type: ignore
         # If the first content is a list, and the second content is a string
         else:
             # If the last element of the first content is a string
diff --git a/libs/core/langchain_core/messages/utils.py b/libs/core/langchain_core/messages/utils.py
index 5fc86222b24..ce645f0ee60 100644
--- a/libs/core/langchain_core/messages/utils.py
+++ b/libs/core/langchain_core/messages/utils.py
@@ -908,7 +908,7 @@ def trim_messages(
     try:
         from langchain_text_splitters import TextSplitter
     except ImportError:
-        text_splitter_fn: Optional[Callable] = cast(Optional[Callable], text_splitter)
+        text_splitter_fn: Optional[Callable] = cast("Optional[Callable]", text_splitter)
     else:
         if isinstance(text_splitter, TextSplitter):
             text_splitter_fn = text_splitter.split_text
@@ -1148,7 +1148,7 @@ def convert_to_openai_messages(
                             raise ValueError(err)
                         if not any(
                             tool_call["id"] == block["id"]
-                            for tool_call in cast(AIMessage, message).tool_calls
+                            for tool_call in cast("AIMessage", message).tool_calls
                         ):
                             oai_msg["tool_calls"] = oai_msg.get("tool_calls", [])
                             oai_msg["tool_calls"].append(
diff --git a/libs/core/langchain_core/prompt_values.py b/libs/core/langchain_core/prompt_values.py
index c3092d92453..220550de8e2 100644
--- a/libs/core/langchain_core/prompt_values.py
+++ b/libs/core/langchain_core/prompt_values.py
@@ -124,7 +124,7 @@ class ImagePromptValue(PromptValue):
 
     def to_messages(self) -> list[BaseMessage]:
         """Return prompt (image URL) as messages."""
-        return [HumanMessage(content=[cast(dict, self.image_url)])]
+        return [HumanMessage(content=[cast("dict", self.image_url)])]
 
 
 class ChatPromptValueConcrete(ChatPromptValue):
diff --git a/libs/core/langchain_core/prompts/chat.py b/libs/core/langchain_core/prompts/chat.py
index 288e71849f3..23daa02716b 100644
--- a/libs/core/langchain_core/prompts/chat.py
+++ b/libs/core/langchain_core/prompts/chat.py
@@ -530,14 +530,14 @@ class _StringImageMessagePromptTemplate(BaseMessagePromptTemplate):
                     if isinstance(tmpl, str):
                         text: str = tmpl
                     else:
-                        text = cast(_TextTemplateParam, tmpl)["text"]  # type: ignore[assignment]
+                        text = cast("_TextTemplateParam", tmpl)["text"]  # type: ignore[assignment]
                     prompt.append(
                         PromptTemplate.from_template(
                             text, template_format=template_format
                         )
                     )
                 elif isinstance(tmpl, dict) and "image_url" in tmpl:
-                    img_template = cast(_ImageTemplateParam, tmpl)["image_url"]
+                    img_template = cast("_ImageTemplateParam", tmpl)["image_url"]
                     input_variables = []
                     if isinstance(img_template, str):
                         vars = get_template_variables(img_template, template_format)
@@ -1024,7 +1024,7 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
             "partial_variables": partial_vars,
             **kwargs,
         }
-        cast(type[ChatPromptTemplate], super()).__init__(messages=_messages, **kwargs)
+        cast("type[ChatPromptTemplate]", super()).__init__(messages=_messages, **kwargs)
 
     @classmethod
     def get_lc_namespace(cls) -> list[str]:
@@ -1382,11 +1382,11 @@ def _create_template_from_message_type(
         )
     elif message_type in ("ai", "assistant"):
         message = AIMessagePromptTemplate.from_template(
-            cast(str, template), template_format=template_format
+            cast("str", template), template_format=template_format
         )
     elif message_type == "system":
         message = SystemMessagePromptTemplate.from_template(
-            cast(str, template), template_format=template_format
+            cast("str", template), template_format=template_format
         )
     elif message_type == "placeholder":
         if isinstance(template, str):
@@ -1484,7 +1484,7 @@ def _convert_to_message(
         else:
             _message = message_type_str(
                 prompt=PromptTemplate.from_template(
-                    cast(str, template), template_format=template_format
+                    cast("str", template), template_format=template_format
                 )
             )
     else:
diff --git a/libs/core/langchain_core/runnables/base.py b/libs/core/langchain_core/runnables/base.py
index 4fbb1f96201..12c545f4b7b 100644
--- a/libs/core/langchain_core/runnables/base.py
+++ b/libs/core/langchain_core/runnables/base.py
@@ -782,10 +782,10 @@ class Runnable(Generic[Input, Output], ABC):
 
         # If there's only one input, don't bother with the executor
         if len(inputs) == 1:
-            return cast(list[Output], [invoke(inputs[0], configs[0])])
+            return cast("list[Output]", [invoke(inputs[0], configs[0])])
 
         with get_executor_for_config(configs[0]) as executor:
-            return cast(list[Output], list(executor.map(invoke, inputs, configs)))
+            return cast("list[Output]", list(executor.map(invoke, inputs, configs)))
 
     @overload
     def batch_as_completed(
@@ -1532,7 +1532,7 @@ class Runnable(Generic[Input, Output], ABC):
         return RunnableBinding(
             bound=self,
             config=cast(
-                RunnableConfig,
+                "RunnableConfig",
                 {**(config or {}), **kwargs},
             ),  # type: ignore[misc]
             kwargs={},
@@ -1921,7 +1921,7 @@ class Runnable(Generic[Input, Output], ABC):
             child_config = patch_config(config, callbacks=run_manager.get_child())
             with set_config_context(child_config) as context:
                 output = cast(
-                    Output,
+                    "Output",
                     context.run(
                         call_func_with_variable_args,  # type: ignore[arg-type]
                         func,  # type: ignore[arg-type]
@@ -2036,7 +2036,7 @@ class Runnable(Generic[Input, Output], ABC):
             for run_manager in run_managers:
                 run_manager.on_chain_error(e)
             if return_exceptions:
-                return cast(list[Output], [e for _ in input])
+                return cast("list[Output]", [e for _ in input])
             else:
                 raise
         else:
@@ -2048,7 +2048,7 @@ class Runnable(Generic[Input, Output], ABC):
                 else:
                     run_manager.on_chain_end(out)
             if return_exceptions or first_exception is None:
-                return cast(list[Output], output)
+                return cast("list[Output]", output)
             else:
                 raise first_exception
 
@@ -2112,7 +2112,7 @@ class Runnable(Generic[Input, Output], ABC):
                 *(run_manager.on_chain_error(e) for run_manager in run_managers)
             )
             if return_exceptions:
-                return cast(list[Output], [e for _ in input])
+                return cast("list[Output]", [e for _ in input])
             else:
                 raise
         else:
@@ -2126,7 +2126,7 @@ class Runnable(Generic[Input, Output], ABC):
                     coros.append(run_manager.on_chain_end(out))
             await asyncio.gather(*coros)
             if return_exceptions or first_exception is None:
-                return cast(list[Output], output)
+                return cast("list[Output]", output)
             else:
                 raise first_exception
 
@@ -2183,7 +2183,7 @@ class Runnable(Generic[Input, Output], ABC):
                 iterator = context.run(transformer, input_for_transform, **kwargs)  # type: ignore[arg-type]
                 if stream_handler := next(
                     (
-                        cast(_StreamingCallbackHandler, h)
+                        cast("_StreamingCallbackHandler", h)
                         for h in run_manager.handlers
                         # instance check OK here, it's a mixin
                         if isinstance(h, _StreamingCallbackHandler)  # type: ignore[misc]
@@ -2286,7 +2286,7 @@ class Runnable(Generic[Input, Output], ABC):
 
                 if stream_handler := next(
                     (
-                        cast(_StreamingCallbackHandler, h)
+                        cast("_StreamingCallbackHandler", h)
                         for h in run_manager.handlers
                         # instance check OK here, it's a mixin
                         if isinstance(h, _StreamingCallbackHandler)  # type: ignore[misc]
@@ -2307,7 +2307,7 @@ class Runnable(Generic[Input, Output], ABC):
                                 context=context,
                             )
                         else:
-                            chunk = cast(Output, await py_anext(iterator))
+                            chunk = cast("Output", await py_anext(iterator))
                         yield chunk
                         if final_output_supported:
                             if final_output is None:
@@ -3029,7 +3029,7 @@ class RunnableSequence(RunnableSerializable[Input, Output]):
             raise
         else:
             run_manager.on_chain_end(input)
-            return cast(Output, input)
+            return cast("Output", input)
 
     async def ainvoke(
         self,
@@ -3072,7 +3072,7 @@ class RunnableSequence(RunnableSerializable[Input, Output]):
             raise
         else:
             await run_manager.on_chain_end(input)
-            return cast(Output, input)
+            return cast("Output", input)
 
     def batch(
         self,
@@ -3162,7 +3162,7 @@ class RunnableSequence(RunnableSerializable[Input, Output]):
                 inputs = []
                 for i in range(len(configs)):
                     if i in failed_inputs_map:
-                        inputs.append(cast(Input, failed_inputs_map[i]))
+                        inputs.append(cast("Input", failed_inputs_map[i]))
                     else:
                         inputs.append(inputs_copy.pop(0))
             else:
@@ -3185,7 +3185,7 @@ class RunnableSequence(RunnableSerializable[Input, Output]):
             for rm in run_managers:
                 rm.on_chain_error(e)
             if return_exceptions:
-                return cast(list[Output], [e for _ in inputs])
+                return cast("list[Output]", [e for _ in inputs])
             else:
                 raise
         else:
@@ -3197,7 +3197,7 @@ class RunnableSequence(RunnableSerializable[Input, Output]):
                 else:
                     run_manager.on_chain_end(out)
             if return_exceptions or first_exception is None:
-                return cast(list[Output], inputs)
+                return cast("list[Output]", inputs)
             else:
                 raise first_exception
 
@@ -3292,7 +3292,7 @@ class RunnableSequence(RunnableSerializable[Input, Output]):
                 inputs = []
                 for i in range(len(configs)):
                     if i in failed_inputs_map:
-                        inputs.append(cast(Input, failed_inputs_map[i]))
+                        inputs.append(cast("Input", failed_inputs_map[i]))
                     else:
                         inputs.append(inputs_copy.pop(0))
             else:
@@ -3313,7 +3313,7 @@ class RunnableSequence(RunnableSerializable[Input, Output]):
         except BaseException as e:
             await asyncio.gather(*(rm.on_chain_error(e) for rm in run_managers))
             if return_exceptions:
-                return cast(list[Output], [e for _ in inputs])
+                return cast("list[Output]", [e for _ in inputs])
             else:
                 raise
         else:
@@ -3327,7 +3327,7 @@ class RunnableSequence(RunnableSerializable[Input, Output]):
                     coros.append(run_manager.on_chain_end(out))
             await asyncio.gather(*coros)
             if return_exceptions or first_exception is None:
-                return cast(list[Output], inputs)
+                return cast("list[Output]", inputs)
             else:
                 raise first_exception
 
@@ -3346,7 +3346,7 @@ class RunnableSequence(RunnableSerializable[Input, Output]):
         # transform the input stream of each step with the next
         # steps that don't natively support transforming an input stream will
         # buffer input in memory until all available, and then start emitting output
-        final_pipeline = cast(Iterator[Output], input)
+        final_pipeline = cast("Iterator[Output]", input)
         for idx, step in enumerate(steps):
             config = patch_config(
                 config, callbacks=run_manager.get_child(f"seq:step:{idx + 1}")
@@ -3374,7 +3374,7 @@ class RunnableSequence(RunnableSerializable[Input, Output]):
         # transform the input stream of each step with the next
         # steps that don't natively support transforming an input stream will
         # buffer input in memory until all available, and then start emitting output
-        final_pipeline = cast(AsyncIterator[Output], input)
+        final_pipeline = cast("AsyncIterator[Output]", input)
         for idx, step in enumerate(steps):
             config = patch_config(
                 config,
@@ -4189,7 +4189,7 @@ class RunnableGenerator(Runnable[Input, Output]):
         final: Optional[Output] = None
         for output in self.stream(input, config, **kwargs):
             final = output if final is None else final + output  # type: ignore[operator]
-        return cast(Output, final)
+        return cast("Output", final)
 
     def atransform(
         self,
@@ -4222,7 +4222,7 @@ class RunnableGenerator(Runnable[Input, Output]):
         final: Optional[Output] = None
         async for output in self.astream(input, config, **kwargs):
             final = output if final is None else final + output  # type: ignore[operator]
-        return cast(Output, final)
+        return cast("Output", final)
 
 
 class RunnableLambda(Runnable[Input, Output]):
@@ -4336,7 +4336,7 @@ class RunnableLambda(Runnable[Input, Output]):
             self.afunc = func
             func_for_name = func
         elif callable(func):
-            self.func = cast(Callable[[Input], Output], func)
+            self.func = cast("Callable[[Input], Output]", func)
             func_for_name = func
         else:
             msg = (
@@ -4556,7 +4556,7 @@ class RunnableLambda(Runnable[Input, Output]):
         if inspect.isgeneratorfunction(self.func):
             output: Optional[Output] = None
             for chunk in call_func_with_variable_args(
-                cast(Callable[[Input], Iterator[Output]], self.func),
+                cast("Callable[[Input], Iterator[Output]]", self.func),
                 input,
                 config,
                 run_manager,
@@ -4589,7 +4589,7 @@ class RunnableLambda(Runnable[Input, Output]):
                     recursion_limit=recursion_limit - 1,
                 ),
             )
-        return cast(Output, output)
+        return cast("Output", output)
 
     async def _ainvoke(
         self,
@@ -4611,7 +4611,7 @@ class RunnableLambda(Runnable[Input, Output]):
                 ) -> Output:
                     output: Optional[Output] = None
                     for chunk in call_func_with_variable_args(
-                        cast(Callable[[Input], Iterator[Output]], self.func),
+                        cast("Callable[[Input], Iterator[Output]]", self.func),
                         input,
                         config,
                         run_manager.get_sync(),
@@ -4624,7 +4624,7 @@ class RunnableLambda(Runnable[Input, Output]):
                                 output = output + chunk  # type: ignore[operator]
                             except TypeError:
                                 output = chunk
-                    return cast(Output, output)
+                    return cast("Output", output)
 
             else:
 
@@ -4648,9 +4648,9 @@ class RunnableLambda(Runnable[Input, Output]):
             output: Optional[Output] = None
             async with aclosing(
                 cast(
-                    AsyncGenerator[Any, Any],
+                    "AsyncGenerator[Any, Any]",
                     acall_func_with_variable_args(
-                        cast(Callable, afunc),
+                        cast("Callable", afunc),
                         input,
                         config,
                         run_manager,
@@ -4659,7 +4659,7 @@ class RunnableLambda(Runnable[Input, Output]):
                 )
             ) as stream:
                 async for chunk in cast(
-                    AsyncIterator[Output],
+                    "AsyncIterator[Output]",
                     stream,
                 ):
                     if output is None:
@@ -4671,7 +4671,7 @@ class RunnableLambda(Runnable[Input, Output]):
                             output = chunk
         else:
             output = await acall_func_with_variable_args(
-                cast(Callable, afunc), input, config, run_manager, **kwargs
+                cast("Callable", afunc), input, config, run_manager, **kwargs
             )
         # If the output is a Runnable, invoke it
         if isinstance(output, Runnable):
@@ -4689,7 +4689,7 @@ class RunnableLambda(Runnable[Input, Output]):
                     recursion_limit=recursion_limit - 1,
                 ),
             )
-        return cast(Output, output)
+        return cast("Output", output)
 
     def _config(
         self, config: Optional[RunnableConfig], callable: Callable[..., Any]
@@ -4779,7 +4779,7 @@ class RunnableLambda(Runnable[Input, Output]):
         if inspect.isgeneratorfunction(self.func):
             output: Optional[Output] = None
             for chunk in call_func_with_variable_args(
-                self.func, cast(Input, final), config, run_manager, **kwargs
+                self.func, cast("Input", final), config, run_manager, **kwargs
             ):
                 yield chunk
                 if output is None:
@@ -4791,7 +4791,7 @@ class RunnableLambda(Runnable[Input, Output]):
                         output = chunk
         else:
             output = call_func_with_variable_args(
-                self.func, cast(Input, final), config, run_manager, **kwargs
+                self.func, cast("Input", final), config, run_manager, **kwargs
             )
 
         # If the output is a Runnable, use its stream output
@@ -4813,7 +4813,7 @@ class RunnableLambda(Runnable[Input, Output]):
                 yield chunk
         elif not inspect.isgeneratorfunction(self.func):
             # Otherwise, just yield it
-            yield cast(Output, output)
+            yield cast("Output", output)
 
     def transform(
         self,
@@ -4895,10 +4895,10 @@ class RunnableLambda(Runnable[Input, Output]):
         if is_async_generator(afunc):
             output: Optional[Output] = None
             async for chunk in cast(
-                AsyncIterator[Output],
+                "AsyncIterator[Output]",
                 acall_func_with_variable_args(
-                    cast(Callable, afunc),
-                    cast(Input, final),
+                    cast("Callable", afunc),
+                    cast("Input", final),
                     config,
                     run_manager,
                     **kwargs,
@@ -4914,7 +4914,11 @@ class RunnableLambda(Runnable[Input, Output]):
                         output = chunk
         else:
             output = await acall_func_with_variable_args(
-                cast(Callable, afunc), cast(Input, final), config, run_manager, **kwargs
+                cast("Callable", afunc),
+                cast("Input", final),
+                config,
+                run_manager,
+                **kwargs,
             )
 
         # If the output is a Runnable, use its astream output
@@ -4936,7 +4940,7 @@ class RunnableLambda(Runnable[Input, Output]):
                 yield chunk
         elif not is_async_generator(afunc):
             # Otherwise, just yield it
-            yield cast(Output, output)
+            yield cast("Output", output)
 
     async def atransform(
         self,
@@ -5301,7 +5305,7 @@ class RunnableBindingBase(RunnableSerializable[Input, Output]):
     @override
     def InputType(self) -> type[Input]:
         return (
-            cast(type[Input], self.custom_input_type)
+            cast("type[Input]", self.custom_input_type)
             if self.custom_input_type is not None
             else self.bound.InputType
         )
@@ -5310,7 +5314,7 @@ class RunnableBindingBase(RunnableSerializable[Input, Output]):
     @override
     def OutputType(self) -> type[Output]:
         return (
-            cast(type[Output], self.custom_output_type)
+            cast("type[Output]", self.custom_output_type)
             if self.custom_output_type is not None
             else self.bound.OutputType
         )
@@ -5383,7 +5387,7 @@ class RunnableBindingBase(RunnableSerializable[Input, Output]):
     ) -> list[Output]:
         if isinstance(config, list):
             configs = cast(
-                list[RunnableConfig],
+                "list[RunnableConfig]",
                 [self._merge_configs(conf) for conf in config],
             )
         else:
@@ -5405,7 +5409,7 @@ class RunnableBindingBase(RunnableSerializable[Input, Output]):
     ) -> list[Output]:
         if isinstance(config, list):
             configs = cast(
-                list[RunnableConfig],
+                "list[RunnableConfig]",
                 [self._merge_configs(conf) for conf in config],
             )
         else:
@@ -5447,7 +5451,7 @@ class RunnableBindingBase(RunnableSerializable[Input, Output]):
     ) -> Iterator[tuple[int, Union[Output, Exception]]]:
         if isinstance(config, Sequence):
             configs = cast(
-                list[RunnableConfig],
+                "list[RunnableConfig]",
                 [self._merge_configs(conf) for conf in config],
             )
         else:
@@ -5498,7 +5502,7 @@ class RunnableBindingBase(RunnableSerializable[Input, Output]):
     ) -> AsyncIterator[tuple[int, Union[Output, Exception]]]:
         if isinstance(config, Sequence):
             configs = cast(
-                list[RunnableConfig],
+                "list[RunnableConfig]",
                 [self._merge_configs(conf) for conf in config],
             )
         else:
@@ -5665,7 +5669,7 @@ class RunnableBinding(RunnableBindingBase[Input, Output]):
         return self.__class__(
             bound=self.bound,
             kwargs=self.kwargs,
-            config=cast(RunnableConfig, {**self.config, **(config or {}), **kwargs}),
+            config=cast("RunnableConfig", {**self.config, **(config or {}), **kwargs}),
             custom_input_type=self.custom_input_type,
             custom_output_type=self.custom_output_type,
         )
@@ -5835,9 +5839,9 @@ def coerce_to_runnable(thing: RunnableLike) -> Runnable[Input, Output]:
     elif is_async_generator(thing) or inspect.isgeneratorfunction(thing):
         return RunnableGenerator(thing)
     elif callable(thing):
-        return RunnableLambda(cast(Callable[[Input], Output], thing))
+        return RunnableLambda(cast("Callable[[Input], Output]", thing))
     elif isinstance(thing, dict):
-        return cast(Runnable[Input, Output], RunnableParallel(thing))
+        return cast("Runnable[Input, Output]", RunnableParallel(thing))
     else:
         msg = (
             f"Expected a Runnable, callable or dict."
diff --git a/libs/core/langchain_core/runnables/branch.py b/libs/core/langchain_core/runnables/branch.py
index 56c43886189..788e90f7038 100644
--- a/libs/core/langchain_core/runnables/branch.py
+++ b/libs/core/langchain_core/runnables/branch.py
@@ -105,7 +105,7 @@ class RunnableBranch(RunnableSerializable[Input, Output]):
             raise TypeError(msg)
 
         default_ = cast(
-            Runnable[Input, Output], coerce_to_runnable(cast(RunnableLike, default))
+            "Runnable[Input, Output]", coerce_to_runnable(cast("RunnableLike", default))
         )
 
         _branches = []
@@ -125,7 +125,7 @@ class RunnableBranch(RunnableSerializable[Input, Output]):
                 )
                 raise ValueError(msg)
             condition, runnable = branch
-            condition = cast(Runnable[Input, bool], coerce_to_runnable(condition))
+            condition = cast("Runnable[Input, bool]", coerce_to_runnable(condition))
             runnable = coerce_to_runnable(runnable)
             _branches.append((condition, runnable))
 
diff --git a/libs/core/langchain_core/runnables/config.py b/libs/core/langchain_core/runnables/config.py
index 8f06209cb99..3c0a8d3308e 100644
--- a/libs/core/langchain_core/runnables/config.py
+++ b/libs/core/langchain_core/runnables/config.py
@@ -200,7 +200,7 @@ def ensure_config(config: Optional[RunnableConfig] = None) -> RunnableConfig:
     if var_config := var_child_runnable_config.get():
         empty.update(
             cast(
-                RunnableConfig,
+                "RunnableConfig",
                 {
                     k: v.copy() if k in COPIABLE_KEYS else v  # type: ignore[attr-defined]
                     for k, v in var_config.items()
@@ -211,7 +211,7 @@ def ensure_config(config: Optional[RunnableConfig] = None) -> RunnableConfig:
     if config is not None:
         empty.update(
             cast(
-                RunnableConfig,
+                "RunnableConfig",
                 {
                     k: v.copy() if k in COPIABLE_KEYS else v  # type: ignore[attr-defined]
                     for k, v in config.items()
@@ -271,7 +271,7 @@ def get_config_list(
             stacklevel=3,
         )
         subsequent = cast(
-            RunnableConfig, {k: v for k, v in config.items() if k != "run_id"}
+            "RunnableConfig", {k: v for k, v in config.items() if k != "run_id"}
         )
         return [
             ensure_config(subsequent) if i else ensure_config(config)
@@ -533,7 +533,7 @@ class ContextThreadPoolExecutor(ThreadPoolExecutor):
             Future[T]: The future for the function.
         """
         return super().submit(
-            cast(Callable[..., T], partial(copy_context().run, func, *args, **kwargs))
+            cast("Callable[..., T]", partial(copy_context().run, func, *args, **kwargs))
         )
 
     def map(
@@ -621,7 +621,7 @@ async def run_in_executor(
         # Use default executor with context copied from current context
         return await asyncio.get_running_loop().run_in_executor(
             None,
-            cast(Callable[..., T], partial(copy_context().run, wrapper)),
+            cast("Callable[..., T]", partial(copy_context().run, wrapper)),
         )
 
     return await asyncio.get_running_loop().run_in_executor(executor_or_config, wrapper)
diff --git a/libs/core/langchain_core/runnables/configurable.py b/libs/core/langchain_core/runnables/configurable.py
index 79473e5d268..244b6bfcf2c 100644
--- a/libs/core/langchain_core/runnables/configurable.py
+++ b/libs/core/langchain_core/runnables/configurable.py
@@ -122,7 +122,7 @@ class DynamicRunnable(RunnableSerializable[Input, Output]):
         runnable: Runnable[Input, Output] = self
         while isinstance(runnable, DynamicRunnable):
             runnable, config = runnable._prepare(merge_configs(runnable.config, config))
-        return runnable, cast(RunnableConfig, config)
+        return runnable, cast("RunnableConfig", config)
 
     @abstractmethod
     def _prepare(
@@ -178,10 +178,10 @@ class DynamicRunnable(RunnableSerializable[Input, Output]):
 
         # If there's only one input, don't bother with the executor
         if len(inputs) == 1:
-            return cast(list[Output], [invoke(prepared[0], inputs[0])])
+            return cast("list[Output]", [invoke(prepared[0], inputs[0])])
 
         with get_executor_for_config(configs[0]) as executor:
-            return cast(list[Output], list(executor.map(invoke, prepared, inputs)))
+            return cast("list[Output]", list(executor.map(invoke, prepared, inputs)))
 
     async def abatch(
         self,
@@ -271,7 +271,7 @@ class DynamicRunnable(RunnableSerializable[Input, Output]):
                         and "configurable" in arg
                         and isinstance(arg["configurable"], dict)
                     ):
-                        runnable, config = self.prepare(cast(RunnableConfig, arg))
+                        runnable, config = self.prepare(cast("RunnableConfig", arg))
                         kwargs = {**kwargs, "config": config}
                         return getattr(runnable, name)(*args, **kwargs)
 
@@ -281,7 +281,7 @@ class DynamicRunnable(RunnableSerializable[Input, Output]):
                         and "configurable" in arg
                         and isinstance(arg["configurable"], dict)
                     ):
-                        runnable, config = self.prepare(cast(RunnableConfig, arg))
+                        runnable, config = self.prepare(cast("RunnableConfig", arg))
                         argsl = list(args)
                         argsl[idx] = config
                         return getattr(runnable, name)(*argsl, **kwargs)
@@ -563,7 +563,7 @@ class RunnableConfigurableAlternatives(DynamicRunnable[Input, Output]):
                         for v in list(self.alternatives.keys()) + [self.default_key]
                     ),
                 )
-                _enums_for_spec[self.which] = cast(type[StrEnum], which_enum)
+                _enums_for_spec[self.which] = cast("type[StrEnum]", which_enum)
         return get_unique_config_specs(
             # which alternative
             [
@@ -617,7 +617,7 @@ class RunnableConfigurableAlternatives(DynamicRunnable[Input, Output]):
         # remap configurable keys for the chosen alternative
         if self.prefix_keys:
             config = cast(
-                RunnableConfig,
+                "RunnableConfig",
                 {
                     **config,
                     "configurable": {
@@ -696,7 +696,7 @@ def make_options_spec(
                 spec.name or spec.id,
                 ((v, v) for v in list(spec.options.keys())),
             )
-            _enums_for_spec[spec] = cast(type[StrEnum], enum)
+            _enums_for_spec[spec] = cast("type[StrEnum]", enum)
     if isinstance(spec, ConfigurableFieldSingleOption):
         return ConfigurableFieldSpec(
             id=spec.id,
diff --git a/libs/core/langchain_core/runnables/fallbacks.py b/libs/core/langchain_core/runnables/fallbacks.py
index 20bfa7e7528..10c8a4adbac 100644
--- a/libs/core/langchain_core/runnables/fallbacks.py
+++ b/libs/core/langchain_core/runnables/fallbacks.py
@@ -317,12 +317,12 @@ class RunnableWithFallbacks(RunnableSerializable[Input, Output]):
                     if not return_exceptions:
                         first_to_raise = first_to_raise or output
                     else:
-                        handled_exceptions[i] = cast(BaseException, output)
+                        handled_exceptions[i] = cast("BaseException", output)
                     run_again.pop(i)
                 elif isinstance(output, self.exceptions_to_handle):
                     if self.exception_key:
                         input[self.exception_key] = output  # type: ignore
-                    handled_exceptions[i] = cast(BaseException, output)
+                    handled_exceptions[i] = cast("BaseException", output)
                 else:
                     run_managers[i].on_chain_end(output)
                     to_return[i] = output
@@ -413,12 +413,12 @@ class RunnableWithFallbacks(RunnableSerializable[Input, Output]):
                     if not return_exceptions:
                         first_to_raise = first_to_raise or output
                     else:
-                        handled_exceptions[i] = cast(BaseException, output)
+                        handled_exceptions[i] = cast("BaseException", output)
                     run_again.pop(i)
                 elif isinstance(output, self.exceptions_to_handle):
                     if self.exception_key:
                         input[self.exception_key] = output  # type: ignore
-                    handled_exceptions[i] = cast(BaseException, output)
+                    handled_exceptions[i] = cast("BaseException", output)
                 else:
                     to_return[i] = output
                     await run_managers[i].on_chain_end(output)
@@ -547,7 +547,7 @@ class RunnableWithFallbacks(RunnableSerializable[Input, Output]):
                             context=context,
                         )
                     else:
-                        chunk = cast(Output, await py_anext(stream))
+                        chunk = cast("Output", await py_anext(stream))
             except self.exceptions_to_handle as e:
                 first_error = e if first_error is None else first_error
                 last_error = e
diff --git a/libs/core/langchain_core/runnables/passthrough.py b/libs/core/langchain_core/runnables/passthrough.py
index 95c27e7bcc6..718e8552297 100644
--- a/libs/core/langchain_core/runnables/passthrough.py
+++ b/libs/core/langchain_core/runnables/passthrough.py
@@ -565,7 +565,7 @@ class RunnableAssign(RunnableSerializable[dict[str, Any], dict[str, Any]]):
                 if filtered:
                     yield filtered
             # yield map output
-            yield cast(dict[str, Any], first_map_chunk_future.result())
+            yield cast("dict[str, Any]", first_map_chunk_future.result())
             for chunk in map_output:
                 yield chunk
 
diff --git a/libs/core/langchain_core/runnables/retry.py b/libs/core/langchain_core/runnables/retry.py
index 4320a794462..5d7a74685c0 100644
--- a/libs/core/langchain_core/runnables/retry.py
+++ b/libs/core/langchain_core/runnables/retry.py
@@ -245,7 +245,7 @@ class RunnableRetry(RunnableBindingBase[Input, Output]):
                     attempt.retry_state.set_result(result)
         except RetryError as e:
             if result is not_set:
-                result = cast(list[Output], [e] * len(inputs))
+                result = cast("list[Output]", [e] * len(inputs))
 
         outputs: list[Union[Output, Exception]] = []
         for idx in range(len(inputs)):
@@ -311,7 +311,7 @@ class RunnableRetry(RunnableBindingBase[Input, Output]):
                     attempt.retry_state.set_result(result)
         except RetryError as e:
             if result is not_set:
-                result = cast(list[Output], [e] * len(inputs))
+                result = cast("list[Output]", [e] * len(inputs))
 
         outputs: list[Union[Output, Exception]] = []
         for idx in range(len(inputs)):
diff --git a/libs/core/langchain_core/runnables/router.py b/libs/core/langchain_core/runnables/router.py
index 8d679ad35c9..856bec5df76 100644
--- a/libs/core/langchain_core/runnables/router.py
+++ b/libs/core/langchain_core/runnables/router.py
@@ -158,7 +158,7 @@ class RouterRunnable(RunnableSerializable[RouterInput, Output]):
         configs = get_config_list(config, len(inputs))
         with get_executor_for_config(configs[0]) as executor:
             return cast(
-                list[Output],
+                "list[Output]",
                 list(executor.map(invoke, runnables, actual_inputs, configs)),
             )
 
diff --git a/libs/core/langchain_core/tools/base.py b/libs/core/langchain_core/tools/base.py
index 87a8c80b865..aa555d38624 100644
--- a/libs/core/langchain_core/tools/base.py
+++ b/libs/core/langchain_core/tools/base.py
@@ -941,11 +941,11 @@ def _prep_run_args(
 ) -> tuple[Union[str, dict], dict]:
     config = ensure_config(config)
     if _is_tool_call(input):
-        tool_call_id: Optional[str] = cast(ToolCall, input)["id"]
-        tool_input: Union[str, dict] = cast(ToolCall, input)["args"].copy()
+        tool_call_id: Optional[str] = cast("ToolCall", input)["id"]
+        tool_input: Union[str, dict] = cast("ToolCall", input)["args"].copy()
     else:
         tool_call_id = None
-        tool_input = cast(Union[str, dict], input)
+        tool_input = cast("Union[str, dict]", input)
     return (
         tool_input,
         dict(
diff --git a/libs/core/langchain_core/tracers/context.py b/libs/core/langchain_core/tracers/context.py
index 5e516a7817d..5d31bd334ba 100644
--- a/libs/core/langchain_core/tracers/context.py
+++ b/libs/core/langchain_core/tracers/context.py
@@ -128,9 +128,7 @@ def _get_trace_callbacks(
             example_id=example_id,
         )
         if callback_manager is None:
-            from langchain_core.callbacks.base import Callbacks
-
-            cb = cast(Callbacks, [tracer])
+            cb = cast("Callbacks", [tracer])
         else:
             if not any(
                 isinstance(handler, LangChainTracer)
@@ -206,13 +204,12 @@ def register_configure_hook(
     if env_var is not None and handle_class is None:
         msg = "If env_var is set, handle_class must also be set to a non-None value."
         raise ValueError(msg)
-    from langchain_core.callbacks.base import BaseCallbackHandler
 
     _configure_hooks.append(
         (
             # the typings of ContextVar do not have the generic arg set as covariant
             # so we have to cast it
-            cast(ContextVar[Optional[BaseCallbackHandler]], context_var),
+            cast("ContextVar[Optional[BaseCallbackHandler]]", context_var),
             inheritable,
             handle_class,
             env_var,
diff --git a/libs/core/langchain_core/tracers/core.py b/libs/core/langchain_core/tracers/core.py
index 599c4000cd6..1f6e05dbc61 100644
--- a/libs/core/langchain_core/tracers/core.py
+++ b/libs/core/langchain_core/tracers/core.py
@@ -285,7 +285,7 @@ class _TracerCore(ABC):
                 output_generation = llm_run.outputs["generations"][i][j]
                 if "message" in output_generation:
                     output_generation["message"] = dumpd(
-                        cast(ChatGeneration, generation).message
+                        cast("ChatGeneration", generation).message
                     )
         llm_run.end_time = datetime.now(timezone.utc)
         llm_run.events.append({"name": "end", "time": llm_run.end_time})
diff --git a/libs/core/langchain_core/tracers/evaluation.py b/libs/core/langchain_core/tracers/evaluation.py
index 057b8f4565f..a1e9dac63c6 100644
--- a/libs/core/langchain_core/tracers/evaluation.py
+++ b/libs/core/langchain_core/tracers/evaluation.py
@@ -90,7 +90,7 @@ class EvaluatorCallbackHandler(BaseTracer):
             self.executor = ThreadPoolExecutor(max_workers=max_concurrency)
             weakref.finalize(
                 self,
-                lambda: cast(ThreadPoolExecutor, self.executor).shutdown(wait=True),
+                lambda: cast("ThreadPoolExecutor", self.executor).shutdown(wait=True),
             )
         else:
             self.executor = None
@@ -156,7 +156,7 @@ class EvaluatorCallbackHandler(BaseTracer):
         if isinstance(results, EvaluationResult):
             results_ = [results]
         elif isinstance(results, dict) and "results" in results:
-            results_ = cast(list[EvaluationResult], results["results"])
+            results_ = cast("list[EvaluationResult]", results["results"])
         else:
             msg = (
                 f"Invalid evaluation result type {type(results)}."
diff --git a/libs/core/langchain_core/tracers/event_stream.py b/libs/core/langchain_core/tracers/event_stream.py
index 2a5f6b5f840..52f897ba293 100644
--- a/libs/core/langchain_core/tracers/event_stream.py
+++ b/libs/core/langchain_core/tracers/event_stream.py
@@ -185,7 +185,7 @@ class _AstreamEventsCallbackHandler(AsyncCallbackHandler, _StreamingCallbackHand
         run_info = self.run_map.get(run_id)
         if run_info is None:
             # run has finished, don't issue any stream events
-            yield cast(T, first)
+            yield cast("T", first)
             return
         if tap is sentinel:
             # if we are the first to tap, issue stream events
@@ -199,7 +199,7 @@ class _AstreamEventsCallbackHandler(AsyncCallbackHandler, _StreamingCallbackHand
                 "parent_ids": self._get_parent_ids(run_id),
             }
             self._send({**event, "data": {"chunk": first}}, run_info["run_type"])
-            yield cast(T, first)
+            yield cast("T", first)
             # consume the rest of the output
             async for chunk in output:
                 self._send(
@@ -209,7 +209,7 @@ class _AstreamEventsCallbackHandler(AsyncCallbackHandler, _StreamingCallbackHand
                 yield chunk
         else:
             # otherwise just pass through
-            yield cast(T, first)
+            yield cast("T", first)
             # consume the rest of the output
             async for chunk in output:
                 yield chunk
@@ -235,7 +235,7 @@ class _AstreamEventsCallbackHandler(AsyncCallbackHandler, _StreamingCallbackHand
         run_info = self.run_map.get(run_id)
         if run_info is None:
             # run has finished, don't issue any stream events
-            yield cast(T, first)
+            yield cast("T", first)
             return
         if tap is sentinel:
             # if we are the first to tap, issue stream events
@@ -249,7 +249,7 @@ class _AstreamEventsCallbackHandler(AsyncCallbackHandler, _StreamingCallbackHand
                 "parent_ids": self._get_parent_ids(run_id),
             }
             self._send({**event, "data": {"chunk": first}}, run_info["run_type"])
-            yield cast(T, first)
+            yield cast("T", first)
             # consume the rest of the output
             for chunk in output:
                 self._send(
@@ -259,7 +259,7 @@ class _AstreamEventsCallbackHandler(AsyncCallbackHandler, _StreamingCallbackHand
                 yield chunk
         else:
             # otherwise just pass through
-            yield cast(T, first)
+            yield cast("T", first)
             # consume the rest of the output
             for chunk in output:
                 yield chunk
@@ -423,14 +423,14 @@ class _AstreamEventsCallbackHandler(AsyncCallbackHandler, _StreamingCallbackHand
             if chunk is None:
                 chunk_ = AIMessageChunk(content=token)
             else:
-                chunk_ = cast(ChatGenerationChunk, chunk).message
+                chunk_ = cast("ChatGenerationChunk", chunk).message
 
         elif run_info["run_type"] == "llm":
             event = "on_llm_stream"
             if chunk is None:
                 chunk_ = GenerationChunk(text=token)
             else:
-                chunk_ = cast(GenerationChunk, chunk)
+                chunk_ = cast("GenerationChunk", chunk)
         else:
             msg = f"Unexpected run type: {run_info['run_type']}"
             raise ValueError(msg)
@@ -461,7 +461,7 @@ class _AstreamEventsCallbackHandler(AsyncCallbackHandler, _StreamingCallbackHand
         output: Union[dict, BaseMessage] = {}
 
         if run_info["run_type"] == "chat_model":
-            generations = cast(list[list[ChatGenerationChunk]], response.generations)
+            generations = cast("list[list[ChatGenerationChunk]]", response.generations)
             for gen in generations:
                 if output != {}:
                     break
@@ -471,7 +471,7 @@ class _AstreamEventsCallbackHandler(AsyncCallbackHandler, _StreamingCallbackHand
 
             event = "on_chat_model_end"
         elif run_info["run_type"] == "llm":
-            generations = cast(list[list[GenerationChunk]], response.generations)
+            generations = cast("list[list[GenerationChunk]]", response.generations)
             output = {
                 "generations": [
                     [
@@ -942,7 +942,7 @@ async def _astream_events_implementation_v2(
 
     # Assign the stream handler to the config
     config = ensure_config(config)
-    run_id = cast(UUID, config.setdefault("run_id", uuid4()))
+    run_id = cast("UUID", config.setdefault("run_id", uuid4()))
     callbacks = config.get("callbacks")
     if callbacks is None:
         config["callbacks"] = [event_streamer]
diff --git a/libs/core/langchain_core/utils/aiter.py b/libs/core/langchain_core/utils/aiter.py
index b4e26915a39..4e82d9fb3ca 100644
--- a/libs/core/langchain_core/utils/aiter.py
+++ b/libs/core/langchain_core/utils/aiter.py
@@ -55,7 +55,7 @@ def py_anext(
     """
     try:
         __anext__ = cast(
-            Callable[[AsyncIterator[T]], Awaitable[T]], type(iterator).__anext__
+            "Callable[[AsyncIterator[T]], Awaitable[T]]", type(iterator).__anext__
         )
     except AttributeError as e:
         msg = f"{iterator!r} is not an async iterator"
diff --git a/libs/core/langchain_core/utils/function_calling.py b/libs/core/langchain_core/utils/function_calling.py
index 5cacfc32565..25248ab8707 100644
--- a/libs/core/langchain_core/utils/function_calling.py
+++ b/libs/core/langchain_core/utils/function_calling.py
@@ -244,10 +244,9 @@ convert_python_function_to_openai_function = deprecated(
 
 def _convert_typed_dict_to_openai_function(typed_dict: type) -> FunctionDescription:
     visited: dict = {}
-    from pydantic.v1 import BaseModel
 
     model = cast(
-        type[BaseModel],
+        "type[BaseModel]",
         _convert_any_typed_dicts_to_pydantic(typed_dict, visited=visited),
     )
     return _convert_pydantic_to_openai_function(model)  # type: ignore
@@ -471,15 +470,17 @@ def convert_to_openai_function(
         if function_copy and "properties" in function_copy:
             oai_function["parameters"] = function_copy
     elif isinstance(function, type) and is_basemodel_subclass(function):
-        oai_function = cast(dict, _convert_pydantic_to_openai_function(function))
+        oai_function = cast("dict", _convert_pydantic_to_openai_function(function))
     elif is_typeddict(function):
         oai_function = cast(
-            dict, _convert_typed_dict_to_openai_function(cast(type, function))
+            "dict", _convert_typed_dict_to_openai_function(cast("type", function))
         )
     elif isinstance(function, BaseTool):
-        oai_function = cast(dict, _format_tool_to_openai_function(function))
+        oai_function = cast("dict", _format_tool_to_openai_function(function))
     elif callable(function):
-        oai_function = cast(dict, _convert_python_function_to_openai_function(function))
+        oai_function = cast(
+            "dict", _convert_python_function_to_openai_function(function)
+        )
     else:
         msg = (
             f"Unsupported function\n\n{function}\n\nFunctions must be passed in"
@@ -775,7 +776,7 @@ def _py_38_safe_origin(origin: type) -> type:
         collections.abc.MutableMapping: typing.MutableMapping,
         **origin_union_type_map,
     }
-    return cast(type, origin_map.get(origin, origin))
+    return cast("type", origin_map.get(origin, origin))
 
 
 def _recursive_set_additional_properties_false(
diff --git a/libs/core/langchain_core/utils/mustache.py b/libs/core/langchain_core/utils/mustache.py
index 3a95e06bb08..6e12964f7b5 100644
--- a/libs/core/langchain_core/utils/mustache.py
+++ b/libs/core/langchain_core/utils/mustache.py
@@ -360,7 +360,7 @@ def _get_key(
                 # Move into the scope
                 try:
                     # Try subscripting (Normal dictionaries)
-                    resolved_scope = cast(dict[str, Any], resolved_scope)[child]
+                    resolved_scope = cast("dict[str, Any]", resolved_scope)[child]
                 except (TypeError, AttributeError):
                     try:
                         resolved_scope = getattr(resolved_scope, child)
@@ -618,7 +618,7 @@ def render(
             scope = _get_key(
                 key, scopes, warn=warn, keep=keep, def_ldel=def_ldel, def_rdel=def_rdel
             )
-            scopes.insert(0, cast(Literal[False], not scope))
+            scopes.insert(0, cast("Literal[False]", not scope))
 
         # If we're a partial
         elif tag == "partial":
diff --git a/libs/core/langchain_core/utils/pydantic.py b/libs/core/langchain_core/utils/pydantic.py
index 0aff29c1bb1..d7afe6b626b 100644
--- a/libs/core/langchain_core/utils/pydantic.py
+++ b/libs/core/langchain_core/utils/pydantic.py
@@ -473,7 +473,7 @@ def _create_root_model(
         except TypeError:
             pass
         custom_root_type = type(name, (RootModel,), base_class_attributes)
-    return cast(type[BaseModel], custom_root_type)
+    return cast("type[BaseModel]", custom_root_type)
 
 
 @lru_cache(maxsize=256)
@@ -598,7 +598,7 @@ def create_model_v2(
     Returns:
         Type[BaseModel]: The created model.
     """
-    field_definitions = cast(dict[str, Any], field_definitions or {})  # type: ignore[no-redef]
+    field_definitions = cast("dict[str, Any]", field_definitions or {})  # type: ignore[no-redef]
 
     if root:
         if field_definitions:
diff --git a/libs/core/pyproject.toml b/libs/core/pyproject.toml
index f7350e0e165..7f50acb0ff9 100644
--- a/libs/core/pyproject.toml
+++ b/libs/core/pyproject.toml
@@ -28,7 +28,7 @@ repository = "https://github.com/langchain-ai/langchain"
 
 [dependency-groups]
 lint = [
-    "ruff<1.0.0,>=0.9.2",
+    "ruff<0.12.0,>=0.11.2",
 ]
 typing = [
     "mypy<1.11,>=1.10",
diff --git a/libs/core/tests/unit_tests/example_selectors/test_similarity.py b/libs/core/tests/unit_tests/example_selectors/test_similarity.py
index 5a5f40d197a..0936bb2bd72 100644
--- a/libs/core/tests/unit_tests/example_selectors/test_similarity.py
+++ b/libs/core/tests/unit_tests/example_selectors/test_similarity.py
@@ -127,7 +127,7 @@ def test_from_examples() -> None:
     assert selector.vectorstore_kwargs == {"vs_foo": "vs_bar"}
 
     assert isinstance(selector.vectorstore, DummyVectorStore)
-    vector_store = cast(DummyVectorStore, selector.vectorstore)
+    vector_store = cast("DummyVectorStore", selector.vectorstore)
     assert vector_store.embeddings is embeddings
     assert vector_store.init_arg == "some_init_arg"
     assert vector_store.texts == ["bar"]
@@ -153,7 +153,7 @@ async def test_afrom_examples() -> None:
     assert selector.vectorstore_kwargs == {"vs_foo": "vs_bar"}
 
     assert isinstance(selector.vectorstore, DummyVectorStore)
-    vector_store = cast(DummyVectorStore, selector.vectorstore)
+    vector_store = cast("DummyVectorStore", selector.vectorstore)
     assert vector_store.embeddings is embeddings
     assert vector_store.init_arg == "some_init_arg"
     assert vector_store.texts == ["bar"]
@@ -207,7 +207,7 @@ def test_mmr_from_examples() -> None:
     assert selector.vectorstore_kwargs == {"vs_foo": "vs_bar"}
 
     assert isinstance(selector.vectorstore, DummyVectorStore)
-    vector_store = cast(DummyVectorStore, selector.vectorstore)
+    vector_store = cast("DummyVectorStore", selector.vectorstore)
     assert vector_store.embeddings is embeddings
     assert vector_store.init_arg == "some_init_arg"
     assert vector_store.texts == ["bar"]
@@ -235,7 +235,7 @@ async def test_mmr_afrom_examples() -> None:
     assert selector.vectorstore_kwargs == {"vs_foo": "vs_bar"}
 
     assert isinstance(selector.vectorstore, DummyVectorStore)
-    vector_store = cast(DummyVectorStore, selector.vectorstore)
+    vector_store = cast("DummyVectorStore", selector.vectorstore)
     assert vector_store.embeddings is embeddings
     assert vector_store.init_arg == "some_init_arg"
     assert vector_store.texts == ["bar"]
diff --git a/libs/core/tests/unit_tests/output_parsers/test_list_parser.py b/libs/core/tests/unit_tests/output_parsers/test_list_parser.py
index 6336f8abb1e..1b176706e2d 100644
--- a/libs/core/tests/unit_tests/output_parsers/test_list_parser.py
+++ b/libs/core/tests/unit_tests/output_parsers/test_list_parser.py
@@ -101,7 +101,7 @@ def test_numbered_list() -> None:
         (text2, ["apple", "banana", "cherry"]),
         (text3, []),
     ]:
-        expectedlist = [[a] for a in cast(list[str], expected)]
+        expectedlist = [[a] for a in cast("list[str]", expected)]
         assert parser.parse(text) == expected
         assert add(parser.transform(t for t in text)) == (expected or None)
         assert list(parser.transform(t for t in text)) == expectedlist
@@ -137,7 +137,7 @@ def test_markdown_list() -> None:
         (text2, ["apple", "banana", "cherry"]),
         (text3, []),
     ]:
-        expectedlist = [[a] for a in cast(list[str], expected)]
+        expectedlist = [[a] for a in cast("list[str]", expected)]
         assert parser.parse(text) == expected
         assert add(parser.transform(t for t in text)) == (expected or None)
         assert list(parser.transform(t for t in text)) == expectedlist
@@ -240,7 +240,7 @@ async def test_numbered_list_async() -> None:
         (text2, ["apple", "banana", "cherry"]),
         (text3, []),
     ]:
-        expectedlist = [[a] for a in cast(list[str], expected)]
+        expectedlist = [[a] for a in cast("list[str]", expected)]
         assert await parser.aparse(text) == expected
         assert await aadd(parser.atransform(aiter_from_iter(t for t in text))) == (
             expected or None
@@ -283,7 +283,7 @@ async def test_markdown_list_async() -> None:
         (text2, ["apple", "banana", "cherry"]),
         (text3, []),
     ]:
-        expectedlist = [[a] for a in cast(list[str], expected)]
+        expectedlist = [[a] for a in cast("list[str]", expected)]
         assert await parser.aparse(text) == expected
         assert await aadd(parser.atransform(aiter_from_iter(t for t in text))) == (
             expected or None
diff --git a/libs/core/tests/unit_tests/prompts/test_chat.py b/libs/core/tests/unit_tests/prompts/test_chat.py
index 32ad0500779..020c959778c 100644
--- a/libs/core/tests/unit_tests/prompts/test_chat.py
+++ b/libs/core/tests/unit_tests/prompts/test_chat.py
@@ -929,7 +929,7 @@ async def test_chat_tmpl_serdes(snapshot: SnapshotAssertion) -> None:
             ("system", [{"text": "You are an AI assistant named {name}."}]),
             SystemMessagePromptTemplate.from_template("you are {foo}"),
             cast(
-                tuple,
+                "tuple",
                 (
                     "human",
                     [
diff --git a/libs/core/tests/unit_tests/prompts/test_structured.py b/libs/core/tests/unit_tests/prompts/test_structured.py
index 921ff68a0fe..9a955fa67b2 100644
--- a/libs/core/tests/unit_tests/prompts/test_structured.py
+++ b/libs/core/tests/unit_tests/prompts/test_structured.py
@@ -19,7 +19,7 @@ def _fake_runnable(
     if isclass(schema) and is_basemodel_subclass(schema):
         return schema(name="yo", value=value)
     else:
-        params = cast(dict, schema)["parameters"]
+        params = cast("dict", schema)["parameters"]
         return {k: 1 if k != "value" else value for k, v in params.items()}
 
 
diff --git a/libs/core/tests/unit_tests/runnables/test_config.py b/libs/core/tests/unit_tests/runnables/test_config.py
index 678a8086220..dc7f1c5d0ca 100644
--- a/libs/core/tests/unit_tests/runnables/test_config.py
+++ b/libs/core/tests/unit_tests/runnables/test_config.py
@@ -48,7 +48,7 @@ def test_ensure_config() -> None:
             "tags": ["tag3", "tag4"],
         },
     )
-    config = ctx.run(ensure_config, cast(RunnableConfig, arg))
+    config = ctx.run(ensure_config, cast("RunnableConfig", arg))
     assert len(arg["callbacks"]) == 1, (
         "ensure_config should not modify the original config"
     )
@@ -147,7 +147,7 @@ async def test_merge_config_callbacks() -> None:
 def test_config_arbitrary_keys() -> None:
     base: RunnablePassthrough[Any] = RunnablePassthrough()
     bound = base.with_config(my_custom_key="my custom value")
-    config = cast(RunnableBinding, bound).config
+    config = cast("RunnableBinding", bound).config
 
     assert config.get("my_custom_key") == "my custom value"
 
diff --git a/libs/core/tests/unit_tests/runnables/test_runnable.py b/libs/core/tests/unit_tests/runnables/test_runnable.py
index dd28080ea2c..12c0fad86d7 100644
--- a/libs/core/tests/unit_tests/runnables/test_runnable.py
+++ b/libs/core/tests/unit_tests/runnables/test_runnable.py
@@ -2436,7 +2436,7 @@ async def test_stream_log_retriever() -> None:
             ):
                 del op["value"]["id"]
 
-    assert sorted(cast(RunLog, add(stream_log)).state["logs"]) == [
+    assert sorted(cast("RunLog", add(stream_log)).state["logs"]) == [
         "ChatPromptTemplate",
         "FakeListLLM",
         "FakeListLLM:2",
@@ -2632,7 +2632,7 @@ def test_combining_sequences(
         lambda x: {"question": x[0] + x[1]}
     )
 
-    chain2 = cast(RunnableSequence, input_formatter | prompt2 | chat2 | parser2)
+    chain2 = cast("RunnableSequence", input_formatter | prompt2 | chat2 | parser2)
 
     assert isinstance(chain, RunnableSequence)
     assert chain2.first == input_formatter
@@ -2640,7 +2640,7 @@ def test_combining_sequences(
     assert chain2.last == parser2
     assert dumps(chain2, pretty=True) == snapshot
 
-    combined_chain = cast(RunnableSequence, chain | chain2)
+    combined_chain = cast("RunnableSequence", chain | chain2)
 
     assert combined_chain.first == prompt
     assert combined_chain.middle == [
@@ -3278,7 +3278,7 @@ async def test_map_astream() -> None:
             final_state = chunk
         else:
             final_state += chunk
-    final_state = cast(RunLog, final_state)
+    final_state = cast("RunLog", final_state)
 
     assert final_state.state["final_output"] == final_value
     assert len(final_state.state["streamed_output"]) == len(streamed_chunks)
@@ -3312,7 +3312,7 @@ async def test_map_astream() -> None:
             final_state = chunk
         else:
             final_state += chunk
-    final_state = cast(RunLog, final_state)
+    final_state = cast("RunLog", final_state)
 
     assert final_state.state["final_output"] == final_value
     assert len(final_state.state["streamed_output"]) == len(streamed_chunks)
@@ -3328,7 +3328,7 @@ async def test_map_astream() -> None:
             final_state = chunk
         else:
             final_state += chunk
-    final_state = cast(RunLog, final_state)
+    final_state = cast("RunLog", final_state)
 
     assert final_state.state["final_output"] == final_value
     assert len(final_state.state["streamed_output"]) == len(streamed_chunks)
@@ -4032,7 +4032,7 @@ async def test_runnable_lambda_astream() -> None:
     output = [
         chunk
         async for chunk in cast(
-            AsyncIterator[str], RunnableLambda(lambda x: llm).astream("")
+            "AsyncIterator[str]", RunnableLambda(lambda x: llm).astream("")
         )
     ]
     assert output == list(llm_res)
@@ -5350,7 +5350,7 @@ def test_default_transform_with_dicts() -> None:
         def invoke(
             self, input: Input, config: Optional[RunnableConfig] = None, **kwargs: Any
         ) -> Output:
-            return cast(Output, input)  # type: ignore
+            return cast("Output", input)  # type: ignore
 
     runnable = CustomRunnable[dict[str, str], dict[str, str]]()
     chunks = iter(
@@ -5371,7 +5371,7 @@ async def test_default_atransform_with_dicts() -> None:
         def invoke(
             self, input: Input, config: Optional[RunnableConfig] = None, **kwargs: Any
         ) -> Output:
-            return cast(Output, input)
+            return cast("Output", input)
 
     runnable = CustomRunnable[dict[str, str], dict[str, str]]()
 
diff --git a/libs/core/tests/unit_tests/runnables/test_runnable_events_v1.py b/libs/core/tests/unit_tests/runnables/test_runnable_events_v1.py
index 9aab4eef34f..634dd866a48 100644
--- a/libs/core/tests/unit_tests/runnables/test_runnable_events_v1.py
+++ b/libs/core/tests/unit_tests/runnables/test_runnable_events_v1.py
@@ -42,7 +42,7 @@ def _with_nulled_run_id(events: Sequence[StreamEvent]) -> list[StreamEvent]:
         assert "parent_ids" in event, "Parent ids should be present in the event."
         assert event["parent_ids"] == [], "Parent ids should be empty."
 
-    return cast(list[StreamEvent], [{**event, "run_id": ""} for event in events])
+    return cast("list[StreamEvent]", [{**event, "run_id": ""} for event in events])
 
 
 async def _as_async_iterator(iterable: list) -> AsyncIterator:
diff --git a/libs/core/tests/unit_tests/runnables/test_runnable_events_v2.py b/libs/core/tests/unit_tests/runnables/test_runnable_events_v2.py
index e1e1f37b904..82b2309168c 100644
--- a/libs/core/tests/unit_tests/runnables/test_runnable_events_v2.py
+++ b/libs/core/tests/unit_tests/runnables/test_runnable_events_v2.py
@@ -68,7 +68,7 @@ def _with_nulled_run_id(events: Sequence[StreamEvent]) -> list[StreamEvent]:
         )
 
     return cast(
-        list[StreamEvent],
+        "list[StreamEvent]",
         [{**event, "run_id": "", "parent_ids": []} for event in events],
     )
 
diff --git a/libs/core/tests/unit_tests/test_tools.py b/libs/core/tests/unit_tests/test_tools.py
index 18f11bdc26e..156556f3902 100644
--- a/libs/core/tests/unit_tests/test_tools.py
+++ b/libs/core/tests/unit_tests/test_tools.py
@@ -203,7 +203,7 @@ def test_decorator_with_specified_schema() -> None:
     assert isinstance(tool_func, BaseTool)
     assert tool_func.args_schema == _MockSchema
 
-    @tool(args_schema=cast(ArgsSchema, _MockSchemaV1))
+    @tool(args_schema=cast("ArgsSchema", _MockSchemaV1))
     def tool_func_v1(arg1: int, arg2: bool, arg3: Optional[dict] = None) -> str:
         return f"{arg1} {arg2} {arg3}"
 
@@ -1935,7 +1935,7 @@ def test_structured_tool_with_different_pydantic_versions(pydantic_model: Any) -
 
     assert foo_tool.invoke({"a": 5, "b": "hello"}) == "foo"
 
-    args_schema = cast(BaseModel, foo_tool.args_schema)
+    args_schema = cast("BaseModel", foo_tool.args_schema)
     args_json_schema = (
         args_schema.model_json_schema()
         if hasattr(args_schema, "model_json_schema")
@@ -2484,7 +2484,7 @@ def test_tool_decorator_description() -> None:
 
     assert foo.description == "Foo."
     assert (
-        cast(BaseModel, foo.tool_call_schema).model_json_schema()["description"]
+        cast("BaseModel", foo.tool_call_schema).model_json_schema()["description"]
         == "Foo."
     )
 
@@ -2496,7 +2496,7 @@ def test_tool_decorator_description() -> None:
 
     assert foo_description.description == "description"
     assert (
-        cast(BaseModel, foo_description.tool_call_schema).model_json_schema()[
+        cast("BaseModel", foo_description.tool_call_schema).model_json_schema()[
             "description"
         ]
         == "description"
@@ -2514,7 +2514,7 @@ def test_tool_decorator_description() -> None:
 
     assert foo_args_schema.description == "Bar."
     assert (
-        cast(BaseModel, foo_args_schema.tool_call_schema).model_json_schema()[
+        cast("BaseModel", foo_args_schema.tool_call_schema).model_json_schema()[
             "description"
         ]
         == "Bar."
@@ -2527,7 +2527,7 @@ def test_tool_decorator_description() -> None:
     assert foo_args_schema_description.description == "description"
     assert (
         cast(
-            BaseModel, foo_args_schema_description.tool_call_schema
+            "BaseModel", foo_args_schema_description.tool_call_schema
         ).model_json_schema()["description"]
         == "description"
     )
@@ -2552,13 +2552,13 @@ def test_tool_decorator_description() -> None:
 
     assert foo_args_jsons_schema.description == "JSON Schema."
     assert (
-        cast(dict, foo_args_jsons_schema.tool_call_schema)["description"]
+        cast("dict", foo_args_jsons_schema.tool_call_schema)["description"]
         == "JSON Schema."
     )
 
     assert foo_args_jsons_schema_with_description.description == "description"
     assert (
-        cast(dict, foo_args_jsons_schema_with_description.tool_call_schema)[
+        cast("dict", foo_args_jsons_schema_with_description.tool_call_schema)[
             "description"
         ]
         == "description"
diff --git a/libs/core/uv.lock b/libs/core/uv.lock
index fdbfad5601f..309da3d6b01 100644
--- a/libs/core/uv.lock
+++ b/libs/core/uv.lock
@@ -1,4 +1,5 @@
 version = 1
+revision = 1
 requires-python = ">=3.9, <4.0"
 resolution-markers = [
     "python_full_version >= '3.12.4'",
@@ -998,7 +999,7 @@ dev = [
     { name = "jupyter", specifier = ">=1.0.0,<2.0.0" },
     { name = "setuptools", specifier = ">=67.6.1,<68.0.0" },
 ]
-lint = [{ name = "ruff", specifier = ">=0.9.2,<1.0.0" }]
+lint = [{ name = "ruff", specifier = ">=0.11.2,<0.12.0" }]
 test = [
     { name = "blockbuster", specifier = "~=1.5.18" },
     { name = "freezegun", specifier = ">=1.2.2,<2.0.0" },
@@ -1026,7 +1027,7 @@ typing = [
 
 [[package]]
 name = "langchain-tests"
-version = "0.3.16"
+version = "0.3.17"
 source = { directory = "../standard-tests" }
 dependencies = [
     { name = "httpx" },
@@ -2261,27 +2262,27 @@ wheels = [
 
 [[package]]
 name = "ruff"
-version = "0.9.4"
+version = "0.11.2"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/c0/17/529e78f49fc6f8076f50d985edd9a2cf011d1dbadb1cdeacc1d12afc1d26/ruff-0.9.4.tar.gz", hash = "sha256:6907ee3529244bb0ed066683e075f09285b38dd5b4039370df6ff06041ca19e7", size = 3599458 }
+sdist = { url = "https://files.pythonhosted.org/packages/90/61/fb87430f040e4e577e784e325351186976516faef17d6fcd921fe28edfd7/ruff-0.11.2.tar.gz", hash = "sha256:ec47591497d5a1050175bdf4e1a4e6272cddff7da88a2ad595e1e326041d8d94", size = 3857511 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/b6/f8/3fafb7804d82e0699a122101b5bee5f0d6e17c3a806dcbc527bb7d3f5b7a/ruff-0.9.4-py3-none-linux_armv6l.whl", hash = "sha256:64e73d25b954f71ff100bb70f39f1ee09e880728efb4250c632ceed4e4cdf706", size = 11668400 },
-    { url = "https://files.pythonhosted.org/packages/2e/a6/2efa772d335da48a70ab2c6bb41a096c8517ca43c086ea672d51079e3d1f/ruff-0.9.4-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6ce6743ed64d9afab4fafeaea70d3631b4d4b28b592db21a5c2d1f0ef52934bf", size = 11628395 },
-    { url = "https://files.pythonhosted.org/packages/dc/d7/cd822437561082f1c9d7225cc0d0fbb4bad117ad7ac3c41cd5d7f0fa948c/ruff-0.9.4-py3-none-macosx_11_0_arm64.whl", hash = "sha256:54499fb08408e32b57360f6f9de7157a5fec24ad79cb3f42ef2c3f3f728dfe2b", size = 11090052 },
-    { url = "https://files.pythonhosted.org/packages/9e/67/3660d58e893d470abb9a13f679223368ff1684a4ef40f254a0157f51b448/ruff-0.9.4-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:37c892540108314a6f01f105040b5106aeb829fa5fb0561d2dcaf71485021137", size = 11882221 },
-    { url = "https://files.pythonhosted.org/packages/79/d1/757559995c8ba5f14dfec4459ef2dd3fcea82ac43bc4e7c7bf47484180c0/ruff-0.9.4-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:de9edf2ce4b9ddf43fd93e20ef635a900e25f622f87ed6e3047a664d0e8f810e", size = 11424862 },
-    { url = "https://files.pythonhosted.org/packages/c0/96/7915a7c6877bb734caa6a2af424045baf6419f685632469643dbd8eb2958/ruff-0.9.4-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:87c90c32357c74f11deb7fbb065126d91771b207bf9bfaaee01277ca59b574ec", size = 12626735 },
-    { url = "https://files.pythonhosted.org/packages/0e/cc/dadb9b35473d7cb17c7ffe4737b4377aeec519a446ee8514123ff4a26091/ruff-0.9.4-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:56acd6c694da3695a7461cc55775f3a409c3815ac467279dfa126061d84b314b", size = 13255976 },
-    { url = "https://files.pythonhosted.org/packages/5f/c3/ad2dd59d3cabbc12df308cced780f9c14367f0321e7800ca0fe52849da4c/ruff-0.9.4-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e0c93e7d47ed951b9394cf352d6695b31498e68fd5782d6cbc282425655f687a", size = 12752262 },
-    { url = "https://files.pythonhosted.org/packages/c7/17/5f1971e54bd71604da6788efd84d66d789362b1105e17e5ccc53bba0289b/ruff-0.9.4-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1d4c8772670aecf037d1bf7a07c39106574d143b26cfe5ed1787d2f31e800214", size = 14401648 },
-    { url = "https://files.pythonhosted.org/packages/30/24/6200b13ea611b83260501b6955b764bb320e23b2b75884c60ee7d3f0b68e/ruff-0.9.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bfc5f1d7afeda8d5d37660eeca6d389b142d7f2b5a1ab659d9214ebd0e025231", size = 12414702 },
-    { url = "https://files.pythonhosted.org/packages/34/cb/f5d50d0c4ecdcc7670e348bd0b11878154bc4617f3fdd1e8ad5297c0d0ba/ruff-0.9.4-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:faa935fc00ae854d8b638c16a5f1ce881bc3f67446957dd6f2af440a5fc8526b", size = 11859608 },
-    { url = "https://files.pythonhosted.org/packages/d6/f4/9c8499ae8426da48363bbb78d081b817b0f64a9305f9b7f87eab2a8fb2c1/ruff-0.9.4-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:a6c634fc6f5a0ceae1ab3e13c58183978185d131a29c425e4eaa9f40afe1e6d6", size = 11485702 },
-    { url = "https://files.pythonhosted.org/packages/18/59/30490e483e804ccaa8147dd78c52e44ff96e1c30b5a95d69a63163cdb15b/ruff-0.9.4-py3-none-musllinux_1_2_i686.whl", hash = "sha256:433dedf6ddfdec7f1ac7575ec1eb9844fa60c4c8c2f8887a070672b8d353d34c", size = 12067782 },
-    { url = "https://files.pythonhosted.org/packages/3d/8c/893fa9551760b2f8eb2a351b603e96f15af167ceaf27e27ad873570bc04c/ruff-0.9.4-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:d612dbd0f3a919a8cc1d12037168bfa536862066808960e0cc901404b77968f0", size = 12483087 },
-    { url = "https://files.pythonhosted.org/packages/23/15/f6751c07c21ca10e3f4a51ea495ca975ad936d780c347d9808bcedbd7182/ruff-0.9.4-py3-none-win32.whl", hash = "sha256:db1192ddda2200671f9ef61d9597fcef89d934f5d1705e571a93a67fb13a4402", size = 9852302 },
-    { url = "https://files.pythonhosted.org/packages/12/41/2d2d2c6a72e62566f730e49254f602dfed23019c33b5b21ea8f8917315a1/ruff-0.9.4-py3-none-win_amd64.whl", hash = "sha256:05bebf4cdbe3ef75430d26c375773978950bbf4ee3c95ccb5448940dc092408e", size = 10850051 },
-    { url = "https://files.pythonhosted.org/packages/c6/e6/3d6ec3bc3d254e7f005c543a661a41c3e788976d0e52a1ada195bd664344/ruff-0.9.4-py3-none-win_arm64.whl", hash = "sha256:585792f1e81509e38ac5123492f8875fbc36f3ede8185af0a26df348e5154f41", size = 10078251 },
+    { url = "https://files.pythonhosted.org/packages/62/99/102578506f0f5fa29fd7e0df0a273864f79af044757aef73d1cae0afe6ad/ruff-0.11.2-py3-none-linux_armv6l.whl", hash = "sha256:c69e20ea49e973f3afec2c06376eb56045709f0212615c1adb0eda35e8a4e477", size = 10113146 },
+    { url = "https://files.pythonhosted.org/packages/74/ad/5cd4ba58ab602a579997a8494b96f10f316e874d7c435bcc1a92e6da1b12/ruff-0.11.2-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:2c5424cc1c4eb1d8ecabe6d4f1b70470b4f24a0c0171356290b1953ad8f0e272", size = 10867092 },
+    { url = "https://files.pythonhosted.org/packages/fc/3e/d3f13619e1d152c7b600a38c1a035e833e794c6625c9a6cea6f63dbf3af4/ruff-0.11.2-py3-none-macosx_11_0_arm64.whl", hash = "sha256:ecf20854cc73f42171eedb66f006a43d0a21bfb98a2523a809931cda569552d9", size = 10224082 },
+    { url = "https://files.pythonhosted.org/packages/90/06/f77b3d790d24a93f38e3806216f263974909888fd1e826717c3ec956bbcd/ruff-0.11.2-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0c543bf65d5d27240321604cee0633a70c6c25c9a2f2492efa9f6d4b8e4199bb", size = 10394818 },
+    { url = "https://files.pythonhosted.org/packages/99/7f/78aa431d3ddebfc2418cd95b786642557ba8b3cb578c075239da9ce97ff9/ruff-0.11.2-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:20967168cc21195db5830b9224be0e964cc9c8ecf3b5a9e3ce19876e8d3a96e3", size = 9952251 },
+    { url = "https://files.pythonhosted.org/packages/30/3e/f11186d1ddfaca438c3bbff73c6a2fdb5b60e6450cc466129c694b0ab7a2/ruff-0.11.2-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:955a9ce63483999d9f0b8f0b4a3ad669e53484232853054cc8b9d51ab4c5de74", size = 11563566 },
+    { url = "https://files.pythonhosted.org/packages/22/6c/6ca91befbc0a6539ee133d9a9ce60b1a354db12c3c5d11cfdbf77140f851/ruff-0.11.2-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:86b3a27c38b8fce73bcd262b0de32e9a6801b76d52cdb3ae4c914515f0cef608", size = 12208721 },
+    { url = "https://files.pythonhosted.org/packages/19/b0/24516a3b850d55b17c03fc399b681c6a549d06ce665915721dc5d6458a5c/ruff-0.11.2-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a3b66a03b248c9fcd9d64d445bafdf1589326bee6fc5c8e92d7562e58883e30f", size = 11662274 },
+    { url = "https://files.pythonhosted.org/packages/d7/65/76be06d28ecb7c6070280cef2bcb20c98fbf99ff60b1c57d2fb9b8771348/ruff-0.11.2-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0397c2672db015be5aa3d4dac54c69aa012429097ff219392c018e21f5085147", size = 13792284 },
+    { url = "https://files.pythonhosted.org/packages/ce/d2/4ceed7147e05852876f3b5f3fdc23f878ce2b7e0b90dd6e698bda3d20787/ruff-0.11.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:869bcf3f9abf6457fbe39b5a37333aa4eecc52a3b99c98827ccc371a8e5b6f1b", size = 11327861 },
+    { url = "https://files.pythonhosted.org/packages/c4/78/4935ecba13706fd60ebe0e3dc50371f2bdc3d9bc80e68adc32ff93914534/ruff-0.11.2-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:2a2b50ca35457ba785cd8c93ebbe529467594087b527a08d487cf0ee7b3087e9", size = 10276560 },
+    { url = "https://files.pythonhosted.org/packages/81/7f/1b2435c3f5245d410bb5dc80f13ec796454c21fbda12b77d7588d5cf4e29/ruff-0.11.2-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:7c69c74bf53ddcfbc22e6eb2f31211df7f65054bfc1f72288fc71e5f82db3eab", size = 9945091 },
+    { url = "https://files.pythonhosted.org/packages/39/c4/692284c07e6bf2b31d82bb8c32f8840f9d0627d92983edaac991a2b66c0a/ruff-0.11.2-py3-none-musllinux_1_2_i686.whl", hash = "sha256:6e8fb75e14560f7cf53b15bbc55baf5ecbe373dd5f3aab96ff7aa7777edd7630", size = 10977133 },
+    { url = "https://files.pythonhosted.org/packages/94/cf/8ab81cb7dd7a3b0a3960c2769825038f3adcd75faf46dd6376086df8b128/ruff-0.11.2-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:842a472d7b4d6f5924e9297aa38149e5dcb1e628773b70e6387ae2c97a63c58f", size = 11378514 },
+    { url = "https://files.pythonhosted.org/packages/d9/3a/a647fa4f316482dacf2fd68e8a386327a33d6eabd8eb2f9a0c3d291ec549/ruff-0.11.2-py3-none-win32.whl", hash = "sha256:aca01ccd0eb5eb7156b324cfaa088586f06a86d9e5314b0eb330cb48415097cc", size = 10319835 },
+    { url = "https://files.pythonhosted.org/packages/86/54/3c12d3af58012a5e2cd7ebdbe9983f4834af3f8cbea0e8a8c74fa1e23b2b/ruff-0.11.2-py3-none-win_amd64.whl", hash = "sha256:3170150172a8f994136c0c66f494edf199a0bbea7a409f649e4bc8f4d7084080", size = 11373713 },
+    { url = "https://files.pythonhosted.org/packages/d6/d4/dd813703af8a1e2ac33bf3feb27e8a5ad514c9f219df80c64d69807e7f71/ruff-0.11.2-py3-none-win_arm64.whl", hash = "sha256:52933095158ff328f4c77af3d74f0379e34fd52f175144cefc1b192e7ccd32b4", size = 10441990 },
 ]
 
 [[package]]

From 1cf91a23867f4ca2678f6adffb8dcab8ee1df59e Mon Sep 17 00:00:00 2001
From: Eugene Yurtsev <eyurtsev@gmail.com>
Date: Thu, 27 Mar 2025 15:02:44 -0400
Subject: [PATCH 27/30] docs: fix llms-txt (#30528)

* Fix trailing slashes
* Fix chat model integration links
---
 docs/static/llms.txt | 570 +++++++++++++++++++++----------------------
 1 file changed, 285 insertions(+), 285 deletions(-)

diff --git a/docs/static/llms.txt b/docs/static/llms.txt
index 37c9dd44915..986a0e50685 100644
--- a/docs/static/llms.txt
+++ b/docs/static/llms.txt
@@ -2,50 +2,50 @@
 
 ## High level
 
-- **[Why LangChain?](https://python.langchain.com/docs/concepts/why_langchain)**: Overview of the value that LangChain provides.
-- **[Architecture](https://python.langchain.com/docs/concepts/architecture)**: How packages are organized in the LangChain ecosystem.
+[Why LangChain?](https://python.langchain.com/docs/concepts/why_langchain/): considering using LangChain, when building complex AI applications, and when needing to evaluate AI applications This page discusses the main reasons to use LangChain: standardized component interfaces, orchestration capabilities, and observability/evaluation through LangSmith
+[Architecture](https://python.langchain.com/docs/concepts/architecture/): needing an overview of the LangChain architecture, exploring the various packages and components, or deciding which parts to use for a specific application. Provides a high-level overview of the different packages that make up the LangChain framework, including langchain-core, langchain, integration packages, langchain-community, langgraph, langserve, and LangSmith.
 
 ## Concepts
 
-- **[Chat models](https://python.langchain.com/docs/concepts/chat_models)**: LLMs exposed via a chat API that process sequences of messages as input and output a message.
-- **[Messages](https://python.langchain.com/docs/concepts/messages)**: The unit of communication in chat models, used to represent model input and output.
-- **[Chat history](https://python.langchain.com/docs/concepts/chat_history)**: A conversation represented as a sequence of messages, alternating between user messages and model responses.
-- **[Tools](https://python.langchain.com/docs/concepts/tools)**: A function with an associated schema defining the function's name, description, and the arguments it accepts.
-- **[Tool calling](https://python.langchain.com/docs/concepts/tool_calling)**: A type of chat model API that accepts tool schemas, along with messages, as input and returns invocations of those tools as part of the output message.
-- **[Structured output](https://python.langchain.com/docs/concepts/structured_outputs)**: A technique to make a chat model respond in a structured format, such as JSON that matches a given schema.
-- **[Memory](https://langchain-ai.github.io/langgraph/concepts/memory/)**: Information about a conversation that is persisted so that it can be used in future conversations.
-- **[Multimodality](https://python.langchain.com/docs/concepts/multimodality)**: The ability to work with data that comes in different forms, such as text, audio, images, and video.
-- **[Runnable interface](https://python.langchain.com/docs/concepts/runnables)**: The base abstraction that many LangChain components and the LangChain Expression Language are built on.
-- **[Streaming](https://python.langchain.com/docs/concepts/streaming)**: LangChain streaming APIs for surfacing results as they are generated.
-- **[LangChain Expression Language (LCEL)](https://python.langchain.com/docs/concepts/lcel)**: A syntax for orchestrating LangChain components. Most useful for simpler applications.
-- **[Document loaders](https://python.langchain.com/docs/concepts/document_loaders)**: Load a source as a list of documents.
-- **[Retrieval](https://python.langchain.com/docs/concepts/retrieval)**: Information retrieval systems can retrieve structured or unstructured data from a datasource in response to a query.
-- **[Text splitters](https://python.langchain.com/docs/concepts/text_splitters)**: Split long text into smaller chunks that can be individually indexed to enable granular retrieval.
-- **[Embedding models](https://python.langchain.com/docs/concepts/embedding_models)**: Models that represent data such as text or images in a vector space.
-- **[Vector stores](https://python.langchain.com/docs/concepts/vectorstores)**: Storage of and efficient search over vectors and associated metadata.
-- **[Retriever](https://python.langchain.com/docs/concepts/retrievers)**: A component that returns relevant documents from a knowledge base in response to a query.
-- **[Retrieval Augmented Generation (RAG)](https://python.langchain.com/docs/concepts/rag)**: A technique that enhances language models by combining them with external knowledge bases.
-- **[Agents](https://python.langchain.com/docs/concepts/agents)**: Use a [language model](https://python.langchain.com/docs/concepts/chat_models) to choose a sequence of actions to take. Agents can interact with external resources via [tool](https://python.langchain.com/docs/concepts/tools).
-- **[Prompt templates](https://python.langchain.com/docs/concepts/prompt_templates)**: Component for factoring out the static parts of a model "prompt" (usually a sequence of messages). Useful for serializing, versioning, and reusing these static parts.
-- **[Output parsers](https://python.langchain.com/docs/concepts/output_parsers)**: Responsible for taking the output of a model and transforming it into a more suitable format for downstream tasks. Output parsers were primarily useful prior to the general availability of [tool calling](https://python.langchain.com/docs/concepts/tool_calling) and [structured outputs](https://python.langchain.com/docs/concepts/structured_outputs).
-- **[Few-shot prompting](https://python.langchain.com/docs/concepts/few_shot_prompting)**: A technique for improving model performance by providing a few examples of the task to perform in the prompt.
-- **[Example selectors](https://python.langchain.com/docs/concepts/example_selectors)**: Used to select the most relevant examples from a dataset based on a given input. Example selectors are used in few-shot prompting to select examples for a prompt.
-- **[Async programming](https://python.langchain.com/docs/concepts/async)**: The basics that one should know to use LangChain in an asynchronous context.
-- **[Callbacks](https://python.langchain.com/docs/concepts/callbacks)**: Callbacks enable the execution of custom auxiliary code in built-in components. Callbacks are used to stream outputs from LLMs in LangChain, trace the intermediate steps of an application, and more.
-- **[Tracing](https://python.langchain.com/docs/concepts/tracing)**: The process of recording the steps that an application takes to go from input to output. Tracing is essential for debugging and diagnosing issues in complex applications.
-- **[Evaluation](https://python.langchain.com/docs/concepts/evaluation)**: The process of assessing the performance and effectiveness of AI applications. This involves testing the model's responses against a set of predefined criteria or benchmarks to ensure it meets the desired quality standards and fulfills the intended purpose. This process is vital for building reliable applications.
-- **[Testing](https://python.langchain.com/docs/concepts/testing)**: The process of verifying that a component of an integration or application works as expected. Testing is essential for ensuring that the application behaves correctly and that changes to the codebase do not introduce new bugs.
+[Chat Models](https://python.langchain.com/docs/concepts/chat_models/): building applications using chat models, learning about chat model interfaces and features, or interested in integrating chat models with external tools and services. Provides an overview of chat models in LangChain, including their features, integration options, interfaces, tool calling, structured outputs, multimodality, context windows, and advanced topics like rate-limiting and caching.
+[Messages](https://python.langchain.com/docs/concepts/messages/): querying LangChain's chat message format, understanding different message types, building chat applications. Messages are the unit of communication in chat models, representing input/output with roles, content, metadata. Covers SystemMessage, HumanMessage, AIMessage, AIMessageChunk, ToolMessage, RemoveMessage, and legacy FunctionMessage.
+[Chat history](https://python.langchain.com/docs/concepts/chat_history/): dealing with chat history, managing chat context, or understanding conversation patterns. Covers chat history structure, conversation patterns between user/assistant/tools, and guidelines for managing chat history to stay within context window.
+[Tools](https://python.langchain.com/docs/concepts/tools/): needing an overview of tools in LangChain, wanting to create custom tools, or learning how to pass runtime values to tools. Tools are a way to encapsulate functions with schemas that can be passed to chat models supporting tool calling. The page covers the tool interface, creating tools using the @tool decorator, configuring tool schemas, tool artifacts, special type annotations like InjectedToolArg, and toolkits.
+[tool calling](https://python.langchain.com/docs/concepts/tool_calling/): needing to understand how to enable tool calling functionality, how to create tools from functions, how to bind tools to a model that supports tool calling. The page covers the key concepts of tool calling, including tool creation using decorators, tool binding to models, tool calling by models, and tool execution. It provides an overview, recommended usage, and best practices.
+[structured outputs](https://python.langchain.com/docs/concepts/structured_outputs/): it needs to return output in a structured format, when working with databases or APIs that require structured data, or when building applications with structured responses. Covers structured output concepts like schema definition and methods like tool calling and JSON mode, as well as helper functions, to instruct models to produce structured outputs conforming to a given schema.
+[Memory](https://langchain-ai.github.io/langgraph/concepts/memory/): developing agents with memory capabilities, implementing memory management strategies, or learning about different types of memory for AI agents. Covers topics related to short-term and long-term memory for agents, techniques for managing conversation history and summarizing past conversations, different types of memory (semantic, episodic, procedural), and approaches for writing memories in the hot path or in the background.
+[Multimodality](https://python.langchain.com/docs/concepts/multimodality/): needing to understand multimodal capabilities, using chat models with multimodal inputs, or using multimodal retrieval/embeddings. Discusses ability of LangChain components like chat models, embedding models, and vector stores to handle multimodal data like text, images, audio, video. Covers current status and limitations around multimodal inputs and outputs for chat models.
+[invoke](https://python.langchain.com/docs/concepts/runnables/): learning how to use the Runnable interface, when working with custom Runnables, and when needing to configure Runnables at runtime. The page covers the Runnable interface, its methods for invocation, batching, streaming, inspecting schemas, and configuration. It explains RunnableConfig, custom Runnables, and configurable Runnables.
+[stream](https://python.langchain.com/docs/concepts/streaming/): [building applications that use streaming, building applications that need to display partial results in real-time, building applications that need to provide updates on pipeline or workflow progress] 'This page covers streaming in LangChain, including what can be streamed in LLM applications, the streaming APIs available, how to write custom data to the stream, and how LangChain automatically enables streaming for chat models in certain cases.'
+[LCEL](https://python.langchain.com/docs/concepts/lcel/): needing an overview of the LangChain Expression Language (LCEL), deciding whether to use LCEL or not, and understanding how to compose chains using LCEL primitives. Provides an overview of the LCEL, a declarative approach to building chains from existing Runnables, covering its benefits, composition primitives like RunnableSequence and RunnableParallel, the composition syntax, automatic type coercion, and guidance on when to use LCEL versus alternatives like LangGraph.
+[Document Loaders](https://python.langchain.com/docs/concepts/document_loaders/): needing to load data from various sources like files, webpages, or databases, or when handling large datasets with lazy loading. Document loaders help load data from different sources into a standardized Document object format, with options for lazy loading of large datasets.
+[Retrieval](https://python.langchain.com/docs/concepts/retrieval/): building retrieval systems, understanding query analysis, integrating with databases This page covers key concepts and techniques in retrieval systems, including query analysis (re-writing and construction), vector and lexical indexes, databases, and LangChain's unified retriever interface.
+[Text Splitters](https://python.langchain.com/docs/concepts/text_splitters/): working with long documents, handling limited model input sizes, or optimizing retrieval systems This page discusses different strategies for splitting large texts into smaller chunks, including length-based, text structure-based, document structure-based, and semantic meaning-based approaches.
+[Embedding Models](https://python.langchain.com/docs/concepts/embedding_models/): LLM should read this page when: 1) Working with text embeddings for search/retrieval 2) Comparing text similarity using embedding vectors 3) Selecting or integrating text embedding models It covers key concepts of embedding models: converting text to numerical vectors, measuring similarity between vectors, embedding models (historical context, interface, integrations), and common similarity metrics (cosine, Euclidean, dot product).
+[Vector stores](https://python.langchain.com/docs/concepts/vectorstores/): LLM should read this page when: 1) Building applications that need to index and retrieve information based on semantic similarity 2) Integrating vector databases into their application 3) Exploring advanced vector search and retrieval techniques Vector stores are specialized data stores that enable indexing and retrieving information based on vector representations (embeddings) of data, allowing semantic similarity search over unstructured data like text, images, and audio. The page covers vector store integrations, the core interface, adding/deleting documents, basic and advanced similarity search techniques, and concepts like metadata filtering.
+[Retrievers](https://python.langchain.com/docs/concepts/retrievers/): building a retrieval system, integrating different retrieval sources, or linking retrieved information to source documents. This page outlines the retriever interface in LangChain, common types of retrievers such as vector stores and search APIs, and advanced retrieval patterns like ensembling and retaining source document information.
+[Retrieval Augmented Generation (RAG)](https://python.langchain.com/docs/concepts/rag/): developing applications that incorporate retrieval and generation, building question-answering systems with external data sources, or optimizing knowledge retrieval and integration into language models. Covers the concept of Retrieval Augmented Generation (RAG), which combines retrieval systems with language models to utilize external knowledge, access up-to-date information, leverage domain-specific expertise, reduce hallucination, and integrate knowledge cost-effectively.
+[Agents](https://python.langchain.com/docs/concepts/agents/): building AI agents or systems that take high-level tasks and perform a series of actions to accomplish them, transitioning from the legacy AgentExecutor to the newer and more flexible LangGraph system. Provides an overview of agents in LangChain, the legacy AgentExecutor concept, resources for using AgentExecutor, and guidance on migrating to the preferred LangGraph architecture for building customizable agents.
+[Prompt Templates](https://python.langchain.com/docs/concepts/prompt_templates/): creating prompts for language models, formatting chat messages, slotting messages into specific locations in a prompt. This page covers different types of prompt templates (string, chat, messages placeholder) for formatting prompts for language models and chat models.
+[Output Parsers](https://python.langchain.com/docs/concepts/output_parsers/): looking for ways to extract structured data from model outputs, parsing model outputs into different formats, or handling errors in parsing. Covers various LangChain output parsers like JSON, XML, CSV, Pandas DataFrame, along with capabilities like output fixing, retrying, and using user-defined formats.
+[Few-shot prompting](https://python.langchain.com/docs/concepts/few_shot_prompting/): needing to improve model performance, when deciding how to format few-shot examples, when selecting examples for few-shot prompting The page covers generating examples, number of examples, selecting examples, and formatting examples for few-shot prompting with language models.
+[Example Selectors](https://python.langchain.com/docs/concepts/example_selectors/): selecting examples for few-shot prompting, dynamically choosing examples for prompts, or understanding different example selection techniques. The page covers example selectors, which are classes responsible for selecting and formatting examples to include as part of prompts for improved performance with few-shot learning.
+[Async programming](https://python.langchain.com/docs/concepts/async/): building asynchronous applications with LangChain, working with async runnables, or handling async API calls. Explains LangChain's asynchronous APIs, delegation to sync methods, performance considerations, compatibility with asyncio, and usage in Jupyter notebooks.
+[Callbacks](https://python.langchain.com/docs/concepts/callbacks/): [needing to log, monitor, or stream events in an LLM application] [This page covers LangChain's callback system, which allows hooking into various stages of an LLM application for logging, monitoring, streaming, and other purposes. It explains the different callback events, callback handlers, and how to pass callbacks.]
+[Tracing](https://python.langchain.com/docs/concepts/tracing/): tracing the steps of a chain/agent for debugging, understanding the chain's flow, or inspecting intermediary outputs. Discusses the concept of tracing in LangChain, including that traces contain runs which are individual steps, and that tracing provides observability into chains/agents.
+[Evaluation](https://python.langchain.com/docs/concepts/evaluation/): evaluating the performance of LLM-powered applications, creating or curating datasets, defining metrics for evaluation This page covers the concept of evaluation in LangChain, including using LangSmith to create datasets, define metrics, track results over time, and run evaluations automatically.
+[Testing](https://python.langchain.com/docs/concepts/testing/): testing LangChain components, implementing unit tests, or setting up integration tests This page explains unit tests, integration tests, and standard tests in LangChain, including code examples
 
 ## How-to guides
 
 ### Installation
 
-- [How to: install LangChain packages](https://python.langchain.com/docs/how_to/installation/)
-- [How to: use LangChain with different Pydantic versions](https://python.langchain.com/docs/how_to/pydantic_compatibility)
-- [How to: return structured data from a model](https://python.langchain.com/docs/how_to/structured_output/)
-- [How to: use a model to call tools](https://python.langchain.com/docs/how_to/tool_calling)
-- [How to: stream runnables](https://python.langchain.com/docs/how_to/streaming)
-- [How to: debug your LLM apps](https://python.langchain.com/docs/how_to/debugging/)
+[How to: install LangChain packages](https://python.langchain.com/docs/how_to/installation/): installing LangChain packages, learning about the LangChain ecosystem packages, installing specific ecosystem packages This page explains how to install the main LangChain package, as well as different ecosystem packages like langchain-core, langchain-community, langchain-openai, langchain-experimental, langgraph, langserve, langchain-cli, and langsmith SDK.
+[How to: use LangChain with different Pydantic versions](https://python.langchain.com/docs/how_to/pydantic_compatibility/): needing to use LangChain with different Pydantic versions, needing to install Pydantic 2 with LangChain, or avoiding using the pydantic.v1 namespace with LangChain APIs. The page explains that LangChain 0.3 uses Pydantic 2 internally and advises users to install Pydantic 2 and avoid using the pydantic.v1 namespace with LangChain APIs.
+[How to: return structured data from a model](https://python.langchain.com/docs/how_to/structured_output/): LLM should read this page when: 1) wanting to return structured data from a model, 2) building applications that require structured outputs, 3) exploring techniques for parsing model outputs into objects or schemas. This page covers methods for obtaining structured outputs from language models, including using .with_structured_output(), prompting techniques with output parsers, and handling complex schemas with few-shot examples.
+[How to: use chat models to call tools](https://python.langchain.com/docs/how_to/tool_calling/): needing to call tools from chat models, wanting to use chat models to generate structured output, or doing extraction from text using chat models. Explains how to define tool schemas as Python functions, Pydantic/TypedDict classes, or LangChain Tools; bind them to chat models; retrieve tool calls from LLM responses; and optionally parse tool calls into structured objects.
+[How to: stream runnables](https://python.langchain.com/docs/how_to/streaming/): Line 1: 'wanting to learn how to stream LLM responses, stream intermediate steps, and configure streaming events.' Line 2: 'This page covers how to use the `stream` and `astream` methods to stream final outputs, how to use `astream_events` to stream both final outputs and intermediate steps, filtering events, propagating callbacks for streaming, and working with input streams.'
+[How to: debug your LLM apps](https://python.langchain.com/docs/how_to/debugging/): debugging LLM applications, adding print statements, or logging events for tracing. Covers setting verbose mode to print important events, debug mode to print all events, and using LangSmith for visualizing event traces.
 
 ### Components
 
@@ -53,207 +53,207 @@ These are the core building blocks you can use when building applications.
 
 #### Chat models
 
-[Chat Models](https://python.langchain.com/docs/concepts/chat_models) are newer forms of language models that take messages in and output a message.
-See [supported integrations](https://python.langchain.com/docs/integrations/chat/) for details on getting started with chat models from a specific provider.
+[Chat Models](https://python.langchain.com/docs/concepts/chat_models/): building applications using chat models, learning about chat model interfaces and features, or interested in integrating chat models with external tools and services. Provides an overview of chat models in LangChain, including their features, integration options, interfaces, tool calling, structured outputs, multimodality, context windows, and advanced topics like rate-limiting and caching.
+[here](https://python.langchain.com/docs/integrations/chat/): integrating chat models into an application, using chat models for conversational AI tasks, or choosing between different chat model providers. Provides an overview of chat models integrated with LangChain, including OpenAI, Anthropic, Google, and others. Covers key features like tool calling, structured output, JSON mode, local usage, and multimodal support.
 
-- [How to: do function/tool calling](https://python.langchain.com/docs/how_to/tool_calling)
-- [How to: get models to return structured output](https://python.langchain.com/docs/how_to/structured_output)
-- [How to: cache model responses](https://python.langchain.com/docs/how_to/chat_model_caching)
-- [How to: get log probabilities](https://python.langchain.com/docs/how_to/logprobs)
-- [How to: create a custom chat model class](https://python.langchain.com/docs/how_to/custom_chat_model)
-- [How to: stream a response back](https://python.langchain.com/docs/how_to/chat_streaming)
-- [How to: track token usage](https://python.langchain.com/docs/how_to/chat_token_usage_tracking)
-- [How to: track response metadata across providers](https://python.langchain.com/docs/how_to/response_metadata)
-- [How to: use chat model to call tools](https://python.langchain.com/docs/how_to/tool_calling)
-- [How to: stream tool calls](https://python.langchain.com/docs/how_to/tool_streaming)
-- [How to: handle rate limits](https://python.langchain.com/docs/how_to/chat_model_rate_limiting)
-- [How to: few shot prompt tool behavior](https://python.langchain.com/docs/how_to/tools_few_shot)
-- [How to: bind model-specific formatted tools](https://python.langchain.com/docs/how_to/tools_model_specific)
-- [How to: force a specific tool call](https://python.langchain.com/docs/how_to/tool_choice)
-- [How to: work with local models](https://python.langchain.com/docs/how_to/local_llms)
-- [How to: init any model in one line](https://python.langchain.com/docs/how_to/chat_models_universal_init/)
+[How to: use chat models to call tools](https://python.langchain.com/docs/how_to/tool_calling/): needing to call tools from chat models, wanting to use chat models to generate structured output, or doing extraction from text using chat models. Explains how to define tool schemas as Python functions, Pydantic/TypedDict classes, or LangChain Tools; bind them to chat models; retrieve tool calls from LLM responses; and optionally parse tool calls into structured objects.
+[How to: get models to return structured output](https://python.langchain.com/docs/how_to/structured_output/): wanting to obtain structured output from an LLM, needing to parse JSON/XML/YAML output from an LLM, or looking to use few-shot examples with structured outputs. This page covers using the `.with_structured_output()` method to obtain structured data from LLMs, prompting techniques to elicit structured outputs, and parsing structured outputs.
+[How to: cache model responses](https://python.langchain.com/docs/how_to/chat_model_caching/): needing to cache ChatModel responses for efficiency, needing to reduce API calls for cost savings, or during development. This page covers how to use an in-memory cache or a SQLite database for caching ChatModel responses, which can improve performance and reduce costs.
+[How to: get log probabilities](https://python.langchain.com/docs/how_to/logprobs/): Line 1: 'seeking to get token-level log probabilities from OpenAI chat models, when needing to understand how log probabilities are represented in LangChain' Line 2: 'Explains how to configure OpenAI chat models to return token log probabilities, and how these are included in the response metadata and streamed responses.'
+[How to: create a custom chat model class](https://python.langchain.com/docs/how_to/custom_chat_model/): creating a custom chat model class, integrating a new language model as a chat model, or implementing streaming for a chat model. This page explains how to create a custom chat model class by inheriting from BaseChatModel, and implementing methods like _generate and _stream. It covers handling inputs, messages, streaming, identifying parameters, and contributing custom chat models.
+[How to: stream a response back](https://python.langchain.com/docs/how_to/chat_streaming/): LLM should read this page when: 1) It needs to stream chat model responses token-by-token 2) It needs to understand how to use the astream() and astream_events() methods for chat models 3) It wants to see examples of streaming chat model responses synchronously and asynchronously This page explains how to stream chat model responses token-by-token using the astream() and astream_events() methods, and provides examples for synchronous and asynchronous streaming with chat models that support this feature.
+[How to: track token usage](https://python.langchain.com/docs/how_to/chat_token_usage_tracking/): tracking token usage for chat models, determining costs of using chat models, implementing token usage tracking in applications. Provides methods to track token usage from OpenAI and Anthropic chat models through AIMessage.usage_metadata, callbacks, and using LangSmith. Covers streaming token usage and aggregating usage across multiple calls.
+[How to: track response metadata across providers](https://python.langchain.com/docs/how_to/response_metadata/): needing to access metadata from model responses, wanting to get information like token usage or log probabilities, or checking safety ratings Explains how to access response metadata from various chat model providers like OpenAI, Anthropic, Vertex AI, etc. Shows code examples of retrieving metadata like token usage, log probabilities, and safety ratings.
+[How to: use chat models to call tools](https://python.langchain.com/docs/how_to/tool_calling/): needing to call tools from chat models, wanting to use chat models to generate structured output, or doing extraction from text using chat models. Explains how to define tool schemas as Python functions, Pydantic/TypedDict classes, or LangChain Tools; bind them to chat models; retrieve tool calls from LLM responses; and optionally parse tool calls into structured objects.
+[How to: stream tool calls](https://python.langchain.com/docs/how_to/tool_streaming/): Line 1: 'wanting to stream tool calls, when needing to handle partial tool call data, or when needing to accumulate tool call chunks' Line 2: 'This page explains how to stream tool calls, merge message chunks to accumulate tool call chunks, and parse tool calls from accumulated chunks, with code examples.'
+[How to: handle rate limits](https://python.langchain.com/docs/how_to/chat_model_rate_limiting/): handling rate limits from model providers, running many parallel queries to a model, benchmarking a chat model. The page explains how to initialize and use an in-memory rate limiter with chat models to limit the number of requests made per unit time.
+[How to: few shot prompt tool behavior](https://python.langchain.com/docs/how_to/tools_few_shot/): using few-shot examples to improve tool calling, demonstrating how to incorporate example queries and responses into the prompt. The page explains how to create few-shot prompts including examples of tool usage, allowing the model to learn from these demonstrations to improve its ability to correctly call tools for math operations or other tasks.
+[How to: bind model-specific formatted tools](https://python.langchain.com/docs/how_to/tools_model_specific/): binding model-specific tools, binding OpenAI tool schemas, invoking model-specific tools This page explains how to bind model-specific tool schemas directly to an LLM, with an example using the OpenAI tool schema format.
+[How to: force models to call a tool](https://python.langchain.com/docs/how_to/tool_choice/): needing to force an LLM to call a specific tool, needing to force an LLM to call at least one tool This page shows how to use the tool_choice parameter to force an LLM to call a specific tool or to call at least one tool from a set of available tools.
+[How to: work with local models](https://python.langchain.com/docs/how_to/local_llms/): [running LLMs locally on a user's device, using open-source LLMs, utilizing custom prompts with LLMs] [Overview of open-source LLMs and frameworks for running inference locally, instructions for setting up and using local LLMs (Ollama, llama.cpp, GPT4All, llamafile), guidance on formatting prompts for specific LLMs, potential use cases for local LLMs.]
+[How to: init any model in one line](https://python.langchain.com/docs/how_to/chat_models_universal_init/): initializing chat models for different model providers, creating a configurable chat model, inferring the model provider from the model name. The page explains how to initialize any LLM chat model integration in one line using the init_chat_model() helper, create a configurable chat model with default or custom parameters, and infer the model provider based on the model name.
 
 #### Messages
 
-[Messages](https://python.langchain.com/docs/concepts/messages) are the input and output of chat models. They have some `content` and a `role`, which describes the source of the message.
+[Messages](https://python.langchain.com/docs/concepts/messages/): querying LangChain's chat message format, understanding different message types, building chat applications. Messages are the unit of communication in chat models, representing input/output with roles, content, metadata. Covers SystemMessage, HumanMessage, AIMessage, AIMessageChunk, ToolMessage, RemoveMessage, and legacy FunctionMessage.
 
-- [How to: trim messages](https://python.langchain.com/docs/how_to/trim_messages/)
-- [How to: filter messages](https://python.langchain.com/docs/how_to/filter_messages/)
-- [How to: merge consecutive messages of the same type](https://python.langchain.com/docs/how_to/merge_message_runs/)
+[How to: manage large chat history](https://python.langchain.com/docs/how_to/trim_messages/): working with long chat histories, when concerned about token limits for chat models, when implementing token management strategies. This page explains how to use the trim_messages utility to reduce the size of a chat message history to fit within token limits, covering trimming by token count or message count, and allowing customization of trimming strategies.
+[How to: filter messages](https://python.langchain.com/docs/how_to/filter_messages/): needing to filter messages by type, id, or name when working with message histories, when using chains/agents that pass message histories between components. Provides instructions and examples for filtering message lists (e.g. to only include human messages) using the filter_messages utility, including basic usage, chaining with models, and API reference.
+[How to: merge consecutive messages of the same type](https://python.langchain.com/docs/how_to/merge_message_runs/): it needs to merge consecutive messages of the same type for a particular model, when it wants to compose the merge_message_runs utility with other components in a chain, or when it needs to invoke the merge_message_runs utility imperatively. The page explains how to use the merge_message_runs utility to merge consecutive messages of the same type, provides examples of using it in chains or invoking it directly, and links to the API reference for more details.
 
 #### Prompt templates
 
-[Prompt Templates](https://python.langchain.com/docs/concepts/prompt_templates) are responsible for formatting user input into a format that can be passed to a language model.
+[Prompt Templates](https://python.langchain.com/docs/concepts/prompt_templates/): creating prompts for language models, formatting chat messages, slotting messages into specific locations in a prompt. This page covers different types of prompt templates (string, chat, messages placeholder) for formatting prompts for language models and chat models.
 
-- [How to: use few shot examples](https://python.langchain.com/docs/how_to/few_shot_examples)
-- [How to: use few shot examples in chat models](https://python.langchain.com/docs/how_to/few_shot_examples_chat/)
-- [How to: partially format prompt templates](https://python.langchain.com/docs/how_to/prompts_partial)
-- [How to: compose prompts together](https://python.langchain.com/docs/how_to/prompts_composition)
+[How to: use few shot examples](https://python.langchain.com/docs/how_to/few_shot_examples/): creating few-shot prompts, using example selectors, providing examples to large language models This page explains how to use few-shot examples to provide context to language models, including creating formatters, constructing example sets, using example selectors like SemanticSimilarityExampleSelector, and creating FewShotPromptTemplates.
+[How to: use few shot examples in chat models](https://python.langchain.com/docs/how_to/few_shot_examples_chat/): LLM should read this page when: 1) wanting to provide a few-shot example to fine-tune a chat model's output, 2) needing to dynamically select examples from a larger set based on semantic similarity to the input This page covers how to provide few-shot examples to chat models using either fixed examples or dynamically selecting examples from a vectorstore based on semantic similarity to the input.
+[How to: partially format prompt templates](https://python.langchain.com/docs/how_to/prompts_partial/): needing to partially format prompt templates, wanting to pass partial strings to templates, or needing to pass functions returning strings to templates. Explains how to partially format prompt templates by passing in a subset of required values as strings or functions that return strings, to create a new template expecting only remaining values.
+[How to: compose prompts together](https://python.langchain.com/docs/how_to/prompts_composition/): needing to compose prompts from various prompt components, working with chat prompts, or using the PipelinePromptTemplate class. This page explains how to concatenate different prompt templates together to build larger prompts, covering both string prompts and chat prompts, as well as using the PipelinePromptTemplate to reuse prompt components.
 
 #### Example selectors
 
-[Example Selectors](https://python.langchain.com/docs/concepts/example_selectors) are responsible for selecting the correct few shot examples to pass to the prompt.
+[Example Selectors](https://python.langchain.com/docs/concepts/example_selectors/): selecting examples for few-shot prompting, dynamically choosing examples for prompts, or understanding different example selection techniques. The page covers example selectors, which are classes responsible for selecting and formatting examples to include as part of prompts for improved performance with few-shot learning.
 
-- [How to: use example selectors](https://python.langchain.com/docs/how_to/example_selectors)
-- [How to: select examples by length](https://python.langchain.com/docs/how_to/example_selectors_length_based)
-- [How to: select examples by semantic similarity](https://python.langchain.com/docs/how_to/example_selectors_similarity)
-- [How to: select examples by semantic ngram overlap](https://python.langchain.com/docs/how_to/example_selectors_ngram)
-- [How to: select examples by maximal marginal relevance](https://python.langchain.com/docs/how_to/example_selectors_mmr)
-- [How to: select examples from LangSmith few-shot datasets](https://python.langchain.com/docs/how_to/example_selectors_langsmith/)
+[How to: use example selectors](https://python.langchain.com/docs/how_to/example_selectors/): needing to select example prompts for few-shot learning, when having many examples to choose from, or when creating a custom example selector. Explains how to use example selectors in LangChain to select which examples to include in a prompt, covering built-in selectors like similarity and providing a custom example selector.
+[How to: select examples by length](https://python.langchain.com/docs/how_to/example_selectors_length_based/): selecting examples for few-shot prompting, handling long examples that may exceed context window, and dynamically including the appropriate number of examples. This page explains how to use the LengthBasedExampleSelector to select examples based on their length, including fewer examples for longer inputs to avoid exceeding the context window.
+[How to: select examples by semantic similarity](https://python.langchain.com/docs/how_to/example_selectors_similarity/): selecting relevant examples for few-shot prompting, building example-based systems, finding relevant reference cases This page covers how to select examples by similarity to the input using embedding-based semantic search over a vector store.
+[How to: select examples by semantic ngram overlap](https://python.langchain.com/docs/how_to/example_selectors_ngram/): selecting relevant examples to include in few-shot prompts, determining relevancy through n-gram overlap scores, and customizing example selection thresholds. Explains how to use the NGramOverlapExampleSelector to select and order examples based on n-gram overlap with the input text, including setting thresholds and dynamically adding examples.
+[How to: select examples by maximal marginal relevance](https://python.langchain.com/docs/how_to/example_selectors_mmr/): needing to select few-shot examples optimizing for both similarity to inputs and diversity from each other, working with example-based prompting for fewshot learning. Demonstrates how to use the MaxMarginalRelevanceExampleSelector, which selects examples by maximizing relevance to inputs while also optimizing for diversity between selected examples, contrasting it with just selecting by similarity.
+[How to: select examples from LangSmith few-shot datasets](https://python.langchain.com/docs/how_to/example_selectors_langsmith/): [learning how to use LangSmith datasets for few-shot example selection, dynamically creating few-shot prompts from LangSmith data, integrating LangSmith with LangChain chains] [The page covers setting up LangSmith, querying LangSmith datasets for similar examples, and using those examples in a LangChain chain to create dynamic few-shot prompts for chat models.]
 
 #### LLMs
 
-What LangChain calls [LLMs](https://python.langchain.com/docs/concepts/text_llms) are older forms of language models that take a string in and output a string.
+[LLMs](https://python.langchain.com/docs/concepts/text_llms/): needing an overview of string-based language models, learning about legacy models in LangChain, or comparing string-based models to chat models. Covers LangChain's support for older language models that take strings as input and output, distinguishing them from newer chat models; advises using chat models where possible.
 
-- [How to: cache model responses](https://python.langchain.com/docs/how_to/llm_caching)
-- [How to: create a custom LLM class](https://python.langchain.com/docs/how_to/custom_llm)
-- [How to: stream a response back](https://python.langchain.com/docs/how_to/streaming_llm)
-- [How to: track token usage](https://python.langchain.com/docs/how_to/llm_token_usage_tracking)
-- [How to: work with local models](https://python.langchain.com/docs/how_to/local_llms)
+[How to: cache model responses](https://python.langchain.com/docs/how_to/llm_caching/): it needs to cache responses to save money and time, learn about caching in LangChain. LangChain provides an optional caching layer for LLMs to save money and time by reducing API calls for repeated requests. Examples show caching with InMemoryCache and SQLiteCache.
+[How to: create a custom LLM class](https://python.langchain.com/docs/how_to/custom_llm/): creating a custom LLM class, wrapping their own LLM provider, integrating with a new language model not yet supported by LangChain. This page explains how to create a custom LLM class by implementing the required _call and _llm_type methods, as well as optional methods like _identifying_params, _acall, _stream, and _astream. It provides an example implementation, demonstrates testing and integration with LangChain APIs, and offers guidance for contributing custom LLM integrations.
+[How to: stream a response back](https://python.langchain.com/docs/how_to/streaming_llm/): it needs to stream responses from an LLM, when it needs to work with async streaming from LLMs, when it needs to stream events from an LLM. This page shows how to stream responses token-by-token from LLMs using both sync and async methods, as well as how to stream events from LLMs asynchronously.
+[How to: track token usage](https://python.langchain.com/docs/how_to/llm_token_usage_tracking/): tracking token usage for LLM calls, managing costs for an LLM application, or calculating costs based on token counts. The page covers how to track token usage using LangSmith, OpenAI callback handlers, and handling streaming contexts; it also summarizes limitations with legacy models for streaming.
+[How to: work with local models](https://python.langchain.com/docs/how_to/local_llms/): [running LLMs locally on a user's device, using open-source LLMs, utilizing custom prompts with LLMs] [Overview of open-source LLMs and frameworks for running inference locally, instructions for setting up and using local LLMs (Ollama, llama.cpp, GPT4All, llamafile), guidance on formatting prompts for specific LLMs, potential use cases for local LLMs.]
 
 #### Output parsers
 
-[Output Parsers](https://python.langchain.com/docs/concepts/output_parsers) are responsible for taking the output of an LLM and parsing into more structured format.
+[Output Parsers](https://python.langchain.com/docs/concepts/output_parsers/): looking for ways to extract structured data from model outputs, parsing model outputs into different formats, or handling errors in parsing. Covers various LangChain output parsers like JSON, XML, CSV, Pandas DataFrame, along with capabilities like output fixing, retrying, and using user-defined formats.
 
-- [How to: parse text from message objects](https://python.langchain.com/docs/how_to/output_parser_string)
-- [How to: use output parsers to parse an LLM response into structured format](https://python.langchain.com/docs/how_to/output_parser_structured)
-- [How to: parse JSON output](https://python.langchain.com/docs/how_to/output_parser_json)
-- [How to: parse XML output](https://python.langchain.com/docs/how_to/output_parser_xml)
-- [How to: parse YAML output](https://python.langchain.com/docs/how_to/output_parser_yaml)
-- [How to: retry when output parsing errors occur](https://python.langchain.com/docs/how_to/output_parser_retry)
-- [How to: try to fix errors in output parsing](https://python.langchain.com/docs/how_to/output_parser_fixing)
-- [How to: write a custom output parser class](https://python.langchain.com/docs/how_to/output_parser_custom)
+[How to: parse text from message objects](https://python.langchain.com/docs/how_to/output_parser_string/): needing to parse text from message objects, needing to extract text from chat model responses, or working with structured output formats. This page explains how to use the StrOutputParser to extract text from message objects, regardless of the underlying content format, such as text, multimodal data, or structured output.
+[How to: use output parsers to parse an LLM response into structured format](https://python.langchain.com/docs/how_to/output_parser_structured/): [needing to parse LLM output into structured data, needing to stream partially parsed structured outputs, using LCEL with output parsers] 'Explains how to use output parsers like PydanticOutputParser to parse LLM text responses into structured formats like Python objects, and how to integrate them with prompts, models, and LCEL streaming.'
+[How to: parse JSON output](https://python.langchain.com/docs/how_to/output_parser_json/): LLM should read this page when: 1) Prompting a language model to return JSON output 2) Parsing JSON output from a language model 3) Streaming partial JSON objects from a language model 'This page explains how to use the JsonOutputParser to specify a desired JSON schema, prompt a language model to generate output conforming to that schema, and parse the model's response as JSON. It covers using JsonOutputParser with and without Pydantic, streaming partial JSON objects, and provides code examples.'
+[How to: parse XML output](https://python.langchain.com/docs/how_to/output_parser_xml/): needing to parse XML output from a model, when outputting prompts with XML formatting instructions for models, when streaming partial XML results This page shows how to use the XMLOutputParser to parse model output in XML format, including adding XML formatting instructions to prompts and streaming partial XML output
+[How to: parse YAML output](https://python.langchain.com/docs/how_to/output_parser_yaml/): LLM should read this page when: 1) Needing to generate YAML output conforming to a specific schema 2) Incorporating YAML output into a larger prompt/chain 3) Parsing YAML output returned by an LLM 'This page explains how to use the YamlOutputParser to parse YAML output from language models, allowing the output to conform to a predefined schema. It covers setting up the parser, constructing prompts with formatting instructions, and chaining the parser with a model.'
+[How to: retry when output parsing errors occur](https://python.langchain.com/docs/how_to/output_parser_retry/): [attempting to parse and handle partial or error LLM outputs, troubleshooting output parsing failures, implementing retry logic for parsing] [Explains how to use the RetryOutputParser to handle parsing errors by reprompting the LLM, provides examples for using it with OpenAI models and chaining it with other runnables.]
+[How to: try to fix errors in output parsing](https://python.langchain.com/docs/how_to/output_parser_fixing/): needing to handle improperly formatted outputs, attempting to fix formatting issues using an LLM, or parsing outputs that do not conform to a predefined schema. Explains how to use the OutputFixingParser, which wraps another parser and attempts to fix formatting errors by consulting an LLM when the original parser fails.
+[How to: write a custom output parser class](https://python.langchain.com/docs/how_to/output_parser_custom/): Line 1: 'creating a custom output parser, implementing a custom parser by inheriting from base classes, or parsing raw model outputs' Line 2: 'Covers how to create custom output parsers using runnable lambdas/generators (recommended) or by inheriting from base parser classes like BaseOutputParser and BaseGenerationOutputParser. Includes examples for simple and more complex parsing scenarios.'
 
 #### Document loaders
 
-[Document Loaders](https://python.langchain.com/docs/concepts/document_loaders) are responsible for loading documents from a variety of sources.
+[Document Loaders](https://python.langchain.com/docs/concepts/document_loaders/): needing to load data from various sources like files, webpages, or databases, or when handling large datasets with lazy loading. Document loaders help load data from different sources into a standardized Document object format, with options for lazy loading of large datasets.
 
-- [How to: load PDF files](https://python.langchain.com/docs/how_to/document_loader_pdf)
-- [How to: load web pages](https://python.langchain.com/docs/how_to/document_loader_web)
-- [How to: load CSV data](https://python.langchain.com/docs/how_to/document_loader_csv)
-- [How to: load data from a directory](https://python.langchain.com/docs/how_to/document_loader_directory)
-- [How to: load HTML data](https://python.langchain.com/docs/how_to/document_loader_html)
-- [How to: load JSON data](https://python.langchain.com/docs/how_to/document_loader_json)
-- [How to: load Markdown data](https://python.langchain.com/docs/how_to/document_loader_markdown)
-- [How to: load Microsoft Office data](https://python.langchain.com/docs/how_to/document_loader_office_file)
-- [How to: write a custom document loader](https://python.langchain.com/docs/how_to/document_loader_custom)
+- [How to: load PDF files](https://python.langchain.com/docs/how_to/document_loader_pdf/)
+[How to: load web pages](https://python.langchain.com/docs/how_to/document_loader_web/): LLM should read this page when: - It needs to load and process web pages for question answering or other applications - It needs guidance on using web page content with LangChain 'The page covers how to load web pages into LangChain's Document format, including simple text extraction and advanced parsing of page structure. It demonstrates tools like WebBaseLoader and UnstructuredLoader, and shows how to perform operations like vector search over loaded web content.'
+[How to: load CSV data](https://python.langchain.com/docs/how_to/document_loader_csv/): loading CSV files into a sequence of documents, customizing CSV parsing and loading, specifying a column to identify the document source This page explains how to load CSV files into a sequence of Document objects using LangChain's CSVLoader, including customizing the parsing, specifying a source column, and loading from a string.
+[How to: load data from a directory](https://python.langchain.com/docs/how_to/document_loader_directory/): loading documents from a file system, handling various file encodings, or using custom document loaders. Shows how to load files from directories using the DirectoryLoader, handle encoding errors, use multithreading, and customize the loader class.
+[How to: load HTML data](https://python.langchain.com/docs/how_to/document_loader_html/): loading HTML documents, parsing HTML files with specialized tools, or extracting text from HTML. This page covers how to load HTML documents into LangChain Document objects using Unstructured and BeautifulSoup4, with code examples and API references provided.
+[How to: load JSON data](https://python.langchain.com/docs/how_to/document_loader_json/): loading JSON or JSON Lines data into LangChain Documents, or extracting metadata from JSON data. This page explains how to use the JSONLoader to convert JSON and JSONL data into LangChain Documents, including how to extract specific fields into the content and metadata, and provides examples for common JSON structures.
+[How to: load Markdown data](https://python.langchain.com/docs/how_to/document_loader_markdown/): needing to load Markdown files, needing to retain Markdown elements, needing to parse Markdown into components This page covers how to load Markdown files into LangChain documents, including retaining elements like titles and lists, and parsing Markdown into components.
+[How to: load Microsoft Office data](https://python.langchain.com/docs/how_to/document_loader_office_file/): loading Microsoft Office files (DOCX, XLSX, PPTX) into LangChain, when working with Azure AI Document Intelligence. It covers how to use the AzureAIDocumentIntelligenceLoader to load Office documents into LangChain Documents for further processing.
+[How to: write a custom document loader](https://python.langchain.com/docs/how_to/document_loader_custom/): Line 1: 'creating a custom document loader, working with files, or using the GenericLoader abstraction' Line 2: 'This page explains how to create a custom document loader, work with files using BaseBlobParser and Blob, and use the GenericLoader to combine a BlobLoader with a BaseBlobParser.'
 
 #### Text splitters
 
-[Text Splitters](https://python.langchain.com/docs/concepts/text_splitters) take a document and split into chunks that can be used for retrieval.
+[Text Splitters](https://python.langchain.com/docs/concepts/text_splitters/): working with long documents, handling limited model input sizes, or optimizing retrieval systems This page discusses different strategies for splitting large texts into smaller chunks, including length-based, text structure-based, document structure-based, and semantic meaning-based approaches.
 
-- [How to: recursively split text](https://python.langchain.com/docs/how_to/recursive_text_splitter)
-- [How to: split HTML](https://python.langchain.com/docs/how_to/split_html)
-- [How to: split by character](https://python.langchain.com/docs/how_to/character_text_splitter)
-- [How to: split code](https://python.langchain.com/docs/how_to/code_splitter)
-- [How to: split Markdown by headers](https://python.langchain.com/docs/how_to/markdown_header_metadata_splitter)
-- [How to: recursively split JSON](https://python.langchain.com/docs/how_to/recursive_json_splitter)
-- [How to: split text into semantic chunks](https://python.langchain.com/docs/how_to/semantic-chunker)
-- [How to: split by tokens](https://python.langchain.com/docs/how_to/split_by_token)
+[How to: recursively split text](https://python.langchain.com/docs/how_to/recursive_text_splitter/): splitting long text into smaller chunks, processing text from languages without word boundaries like Chinese or Japanese, parsing documents for downstream tasks. Covers how to recursively split text by list of characters like newlines and spaces, and options to customize characters for different languages. Discusses chunk size, overlap, and creating LangChain Document objects.
+[How to: split HTML](https://python.langchain.com/docs/how_to/split_html/): needing to split HTML content into chunks, preserving semantic structure for better context during processing Explains different techniques to split HTML pages like HTMLHeaderTextSplitter, HTMLSectionSplitter, HTMLSemanticPreservingSplitter; covers preserving tables, lists, custom handlers
+[How to: split by character](https://python.langchain.com/docs/how_to/character_text_splitter/): needing to split text by individual characters, needing to control chunk size by character count, needing to handle text with differing chunk sizes. Explains how to split text into chunks by character count, using the CharacterTextSplitter. Covers setting chunk size, overlap, and passing metadata.
+[How to: split code](https://python.langchain.com/docs/how_to/code_splitter/): needing to split code into logical chunks, working with code from specific programming languages, or creating language-specific text splitters. Provides examples of using the RecursiveCharacterTextSplitter to split code from various programming languages like Python, JavaScript, Markdown, and others into document chunks based on language-specific separators.
+[How to: split Markdown by headers](https://python.langchain.com/docs/how_to/markdown_header_metadata_splitter/): splitting markdown files into chunks, handling headers and metadata in markdown files, constraining chunk sizes in markdown files. This page covers how to split markdown files by headers into chunks, handle metadata associated with headers, and constrain chunk sizes using other text splitters like RecursiveCharacterTextSplitter.
+[How to: recursively split JSON](https://python.langchain.com/docs/how_to/recursive_json_splitter/): splitting JSON data into smaller chunks, managing chunk sizes from list content within JSON data. Explains how to split JSON data into smaller chunks while keeping nested objects intact, control chunk sizes, and handle JSON lists by converting them to dictionaries before splitting.
+[How to: split text into semantic chunks](https://python.langchain.com/docs/how_to/semantic-chunker/): building an application that needs to split long text into smaller chunks based on semantic meaning, when working with large documents that need to be broken down into semantically coherent sections, or when needing to control the granularity of text splitting. This page explains how to use the SemanticChunker from LangChain to split text into semantically coherent chunks by leveraging embedding models, with options to control the splitting behavior based on percentile, standard deviation, interquartile range, or gradient of embedding distance.
+[How to: split by tokens](https://python.langchain.com/docs/how_to/split_by_token/): LLM should read this page when: 1) Splitting long text into chunks while counting tokens 2) Handling non-English languages for text splitting 3) Comparing different tokenizers for text splitting 'The page covers how to split text into chunks based on token count using different tokenizers like tiktoken, spaCy, SentenceTransformers, NLTK, KoNLPY (for Korean), and Hugging Face tokenizers. It explains the approaches, usage, and API references for each tokenizer.'
 
 #### Embedding models
 
-[Embedding Models](https://python.langchain.com/docs/concepts/embedding_models) take a piece of text and create a numerical representation of it.
-See [supported integrations](https://python.langchain.com/docs/integrations/text_embedding/) for details on getting started with embedding models from a specific provider.
+[Embedding Models](https://python.langchain.com/docs/concepts/embedding_models/): LLM should read this page when: 1) Working with text embeddings for search/retrieval 2) Comparing text similarity using embedding vectors 3) Selecting or integrating text embedding models It covers key concepts of embedding models: converting text to numerical vectors, measuring similarity between vectors, embedding models (historical context, interface, integrations), and common similarity metrics (cosine, Euclidean, dot product).
+[supported integrations](https://python.langchain.com/docs/integrations/text_embedding/): looking for integrations with embedding models, wanting to compare embedding providers, needing guidance on selecting an embedding model This page documents integrations with various model providers that allow using embeddings in LangChain, covering OpenAI, Azure, Google, AWS, HuggingFace, and other embedding services.
 
-- [How to: embed text data](https://python.langchain.com/docs/how_to/embed_text)
-- [How to: cache embedding results](https://python.langchain.com/docs/how_to/caching_embeddings)
-- [How to: create a custom embeddings class](https://python.langchain.com/docs/how_to/custom_embeddings)
+[How to: embed text data](https://python.langchain.com/docs/how_to/embed_text/): it needs to embed text into vectors, when it needs to use text embeddings for tasks like semantic search, and when it needs to understand the interface for text embedding models. This page explains how to use LangChain's Embeddings class to interface with various text embedding model providers, embed documents and queries, and work with the resulting vector representations of text.
+[How to: cache embedding results](https://python.langchain.com/docs/how_to/caching_embeddings/): caching document embeddings to improve performance, caching query embeddings to improve performance, or choosing a data store for caching embeddings. This page covers how to use the CacheBackedEmbeddings class to cache document and query embeddings in a ByteStore, demonstrating its usage with a local file store and an in-memory store. It also explains how to specify the cache namespace to avoid collisions.
+[How to: create a custom embeddings class](https://python.langchain.com/docs/how_to/custom_embeddings/): needing to use a custom text embedding model, integrating a new text embedding provider, or contributing a new text embedding integration. The page covers implementing custom text embedding models for LangChain by following the Embeddings interface, providing examples, testing, and contributing guidelines.
 
 #### Vector stores
 
-[Vector stores](https://python.langchain.com/docs/concepts/vectorstores) are databases that can efficiently store and retrieve embeddings.
-See [supported integrations](https://python.langchain.com/docs/integrations/vectorstores/) for details on getting started with vector stores from a specific provider.
+[Vector stores](https://python.langchain.com/docs/concepts/vectorstores/): LLM should read this page when: 1) Building applications that need to index and retrieve information based on semantic similarity 2) Integrating vector databases into their application 3) Exploring advanced vector search and retrieval techniques Vector stores are specialized data stores that enable indexing and retrieving information based on vector representations (embeddings) of data, allowing semantic similarity search over unstructured data like text, images, and audio. The page covers vector store integrations, the core interface, adding/deleting documents, basic and advanced similarity search techniques, and concepts like metadata filtering.
+[supported integrations](https://python.langchain.com/docs/integrations/vectorstores/): Line 1: 'integrating vector stores into applications, deciding which vector store to use, or understanding the capabilities of different vector stores' Line 2: 'This page provides an overview of vector stores, which are used to store embedded data and perform similarity search. It lists the different vector stores integrated with LangChain, along with their key features and capabilities.'
 
-- [How to: use a vector store to retrieve data](https://python.langchain.com/docs/how_to/vectorstores)
+[How to: use a vector store to retrieve data](https://python.langchain.com/docs/how_to/vectorstores/): building applications that require searching over large collections of text, when indexing and retrieving relevant information based on similarity between embeddings, and when working with vector databases and embeddings. The page covers how to create and query vector stores, which are used to store embedded vectors of text and search for similar embeddings. It explains how to initialize different vector store options like Chroma, FAISS, and LanceDB, and how to perform similarity searches on them. It also touches on asynchronous operations with vector stores.
 
 #### Retrievers
 
-[Retrievers](https://python.langchain.com/docs/concepts/retrievers) are responsible for taking a query and returning relevant documents.
+[Retrievers](https://python.langchain.com/docs/concepts/retrievers/): building a retrieval system, integrating different retrieval sources, or linking retrieved information to source documents. This page outlines the retriever interface in LangChain, common types of retrievers such as vector stores and search APIs, and advanced retrieval patterns like ensembling and retaining source document information.
 
-- [How to: use a vector store to retrieve data](https://python.langchain.com/docs/how_to/vectorstore_retriever)
-- [How to: generate multiple queries to retrieve data for](https://python.langchain.com/docs/how_to/MultiQueryRetriever)
-- [How to: use contextual compression to compress the data retrieved](https://python.langchain.com/docs/how_to/contextual_compression)
-- [How to: write a custom retriever class](https://python.langchain.com/docs/how_to/custom_retriever)
-- [How to: add similarity scores to retriever results](https://python.langchain.com/docs/how_to/add_scores_retriever)
-- [How to: combine the results from multiple retrievers](https://python.langchain.com/docs/how_to/ensemble_retriever)
-- [How to: reorder retrieved results to mitigate the "lost in the middle" effect](https://python.langchain.com/docs/how_to/long_context_reorder)
-- [How to: generate multiple embeddings per document](https://python.langchain.com/docs/how_to/multi_vector)
-- [How to: retrieve the whole document for a chunk](https://python.langchain.com/docs/how_to/parent_document_retriever)
-- [How to: generate metadata filters](https://python.langchain.com/docs/how_to/self_query)
-- [How to: create a time-weighted retriever](https://python.langchain.com/docs/how_to/time_weighted_vectorstore)
-- [How to: use hybrid vector and keyword retrieval](https://python.langchain.com/docs/how_to/hybrid)
+[How to: use a vector store to retrieve data](https://python.langchain.com/docs/how_to/vectorstore_retriever/): using vector stores for retrieval, implementing maximum marginal relevance retrieval, or specifying additional search parameters. This page explains how to create a retriever from a vector store, how to use maximum marginal relevance retrieval, and how to pass parameters like similarity score thresholds and top-k results.
+[How to: generate multiple queries to retrieve data for](https://python.langchain.com/docs/how_to/MultiQueryRetriever/): Line 1: 'improving retrieval results for search queries, retrieving documents from a vector database, or using an LLM to generate multiple queries for a given input' Line 2: 'Explains how to use MultiQueryRetriever to automatically generate multiple queries from an input question using an LLM, retrieve documents for each query, and take the unique union of results to improve retrieval performance.'
+[How to: use contextual compression to compress the data retrieved](https://python.langchain.com/docs/how_to/contextual_compression/): [it needs to retrieve relevant information from a large corpus of documents, it needs to filter out irrelevant content from retrieved documents, it needs to compress or shorten documents to focus on query-relevant content] This page discusses contextual compression, a technique that allows retrieving only relevant portions of documents given a query, using various methods like LLM-based extractors/filters, embedding similarity filters, or combinations thereof via pipelines.
+[How to: write a custom retriever class](https://python.langchain.com/docs/how_to/custom_retriever/): learning how to create a custom retriever, when implementing custom retrieval logic, when adding retrieval capabilities to an application. Explains how to implement a custom Retriever class by extending BaseRetriever, including providing examples and guidelines for contributing custom retrievers.
+[How to: add similarity scores to retriever results](https://python.langchain.com/docs/how_to/add_scores_retriever/): needing to incorporate similarity/relevance scores from retrievers, using vector or multi-vector retrievers, or propagating scores through custom retriever subclasses Shows how to add similarity scores from retrievers like Vector Store Retrievers, SelfQueryRetriever, and MultiVectorRetriever to the metadata of retrieved documents
+[How to: combine the results from multiple retrievers](https://python.langchain.com/docs/how_to/ensemble_retriever/): combining results from multiple retriever algorithms, leveraging different retrieval strengths, or using a hybrid search approach. The page explains how to use the EnsembleRetriever to combine results from sparse and dense retrievers, outlines basic usage, and demonstrates runtime configuration of individual retrievers.
+[How to: reorder retrieved results to mitigate the "lost in the middle" effect](https://python.langchain.com/docs/how_to/long_context_reorder/): looking to improve performance of RAG applications, mitigating the "lost in the middle" effect, reordering retrieved results for longer contexts. Explains how to reorder retrieved documents to position the most relevant at the beginning and end, with less relevant in the middle, helping surface important information for language models.
+[How to: generate multiple embeddings per document](https://python.langchain.com/docs/how_to/multi_vector/): needing to retrieve documents using multiple vector embeddings per document, when working with long documents that need to be split into chunks, when using document summaries for retrieval. This page covers how to index documents using 1) document chunks, 2) summaries generated with an LLM, and 3) hypothetical questions generated with an LLM. It demonstrates the usage of the MultiVectorRetriever to retrieve parent documents based on vector embeddings of chunks/summaries/questions.
+[How to: retrieve the whole document for a chunk](https://python.langchain.com/docs/how_to/parent_document_retriever/): [1) wanting to retrieve larger documents instead of just smaller chunks for context, 2) trying to balance keeping context while splitting long documents] [The page explains how to use the ParentDocumentRetriever, which first splits documents into small chunks for indexing but then retrieves the larger parent documents those chunks came from during retrieval. It shows code examples for retrieving full documents as well as larger chunks rather than full documents.]
+[How to: generate metadata filters](https://python.langchain.com/docs/how_to/self_query/): needing to perform retrieval on documents based on semantic similarity to the query text and metadata filters, integrating the retrieval into a question-answering pipeline. Covers creating a Self Query Retriever which can perform semantic text retrieval and structured metadata filtering in one step, using an underlying vector store and a query constructor LLM chain to parse natural language queries into structured representations.
+[How to: create a time-weighted retriever](https://python.langchain.com/docs/how_to/time_weighted_vectorstore/): it needs to retrieve documents from a vector store considering both semantic similarity and time decay, it needs to simulate time for testing purposes, or it needs to adjust the balance between semantic similarity and recency in retrieving documents. This page explains how to use the TimeWeightedVectorStoreRetriever, which combines semantic similarity scores from a vector store with a time decay factor that reduces the relevance of older documents over time, and provides examples of using different decay rates and mocking time for testing.
+[How to: use hybrid vector and keyword retrieval](https://python.langchain.com/docs/how_to/hybrid/): LLM should read this page when: 1) It needs to perform hybrid search combining vector and other search techniques 2) It uses a vectorstore that supports hybrid search capabilities Explains how to configure and invoke LangChain chains to leverage hybrid search features of vectorstores like Astra DB, ElasticSearch, etc.
 
 #### Indexing
 
 Indexing is the process of keeping your vectorstore in-sync with the underlying data source.
 
-- [How to: reindex data to keep your vectorstore in-sync with the underlying data source](https://python.langchain.com/docs/how_to/indexing)
+[How to: reindex data to keep your vectorstore in-sync with the underlying data source](https://python.langchain.com/docs/how_to/indexing/): needing to index documents into a vector store, handling content deduplication and document mutations over time, or cleaning up old/deleted documents from the store. Covers the LangChain indexing API workflow, including deletion modes, using document loaders, and setting source metadata for documents to handle mutations and deletions properly.
 
 #### Tools
 
-LangChain [Tools](https://python.langchain.com/docs/concepts/tools) contain a description of the tool (to pass to the language model) as well as the implementation of the function to call. Refer [here](https://python.langchain.com/docs/integrations/tools/) for a list of pre-buit tools.
+[Tools](https://python.langchain.com/docs/concepts/tools/): needing an overview of tools in LangChain, wanting to create custom tools, or learning how to pass runtime values to tools. Tools are a way to encapsulate functions with schemas that can be passed to chat models supporting tool calling. The page covers the tool interface, creating tools using the @tool decorator, configuring tool schemas, tool artifacts, special type annotations like InjectedToolArg, and toolkits.
 
-- [How to: create tools](https://python.langchain.com/docs/how_to/custom_tools)
-- [How to: use built-in tools and toolkits](https://python.langchain.com/docs/how_to/tools_builtin)
-- [How to: use chat models to call tools](https://python.langchain.com/docs/how_to/tool_calling)
-- [How to: pass tool outputs to chat models](https://python.langchain.com/docs/how_to/tool_results_pass_to_model)
-- [How to: pass run time values to tools](https://python.langchain.com/docs/how_to/tool_runtime)
-- [How to: add a human-in-the-loop for tools](https://python.langchain.com/docs/how_to/tools_human)
-- [How to: handle tool errors](https://python.langchain.com/docs/how_to/tools_error)
-- [How to: force models to call a tool](https://python.langchain.com/docs/how_to/tool_choice)
-- [How to: disable parallel tool calling](https://python.langchain.com/docs/how_to/tool_calling_parallel)
-- [How to: access the `RunnableConfig` from a tool](https://python.langchain.com/docs/how_to/tool_configure)
-- [How to: stream events from a tool](https://python.langchain.com/docs/how_to/tool_stream_events)
-- [How to: return artifacts from a tool](https://python.langchain.com/docs/how_to/tool_artifacts/)
-- [How to: convert Runnables to tools](https://python.langchain.com/docs/how_to/convert_runnable_to_tool)
-- [How to: add ad-hoc tool calling capability to models](https://python.langchain.com/docs/how_to/tools_prompting)
-- [How to: pass in runtime secrets](https://python.langchain.com/docs/how_to/runnable_runtime_secrets)
+[How to: define a custom tool](https://python.langchain.com/docs/how_to/custom_tools/): creating custom tools for agents, converting functions or runnables to tools, or subclassing BaseTool. This page covers creating tools from functions using the @tool decorator or StructuredTool class, creating tools from Runnables, subclassing BaseTool for custom tools, creating async tools, handling tool errors, and returning artifacts from tool execution.
+[How to: use built-in tools and toolkits](https://python.langchain.com/docs/how_to/tools_builtin/): needing to use built-in LangChain tools or toolkits, needing to customize built-in LangChain tools. This page covers how to use LangChain's built-in tools and toolkits, including customizing tool names, descriptions, and argument schemas. It also explains how to use LangChain toolkits, which are collections of tools for specific tasks.
+[How to: use chat models to call tools](https://python.langchain.com/docs/how_to/tool_calling/): needing to call tools from chat models, wanting to use chat models to generate structured output, or doing extraction from text using chat models. Explains how to define tool schemas as Python functions, Pydantic/TypedDict classes, or LangChain Tools; bind them to chat models; retrieve tool calls from LLM responses; and optionally parse tool calls into structured objects.
+[How to: pass tool outputs to chat models](https://python.langchain.com/docs/how_to/tool_results_pass_to_model/): 1) integrating tools with chat models, 2) implementing tool calling functionality, 3) passing tool outputs back to chat models. Demonstrates how to pass tool function outputs back to chat models as tool messages, allowing the model to incorporate tool results in generating a final response.
+[How to: pass run time values to tools](https://python.langchain.com/docs/how_to/tool_runtime/): it needs to pass runtime values to tools, when it needs to prevent an LLM from generating certain tool arguments, and when it needs to inject arguments directly at runtime. This page explains how to use the InjectedToolArg annotation to mark certain parameters of a Tool as being injected at runtime, preventing the LLM from generating those arguments. It also shows how to inject the arguments at runtime and create a tool-executing chain.
+[How to: add a human-in-the-loop for tools](https://python.langchain.com/docs/how_to/tools_human/): adding human approval to tool calling, allowing human intervention in a workflow, or setting up fail-safes for sensitive operations. This page demonstrates how to add a human-in-the-loop step to approve or reject tool calls made by an LLM in a tool-calling chain using LangChain.
+[How to: handle tool errors](https://python.langchain.com/docs/how_to/tools_error/): needing to handle errors that occur when tools are called by an LLM, when building fault tolerance into tool-calling chains, or when enabling self-correction for tool calling errors. The page covers strategies like try/except for tool calls, fallbacks to different models, retrying with exceptions passed to the LLM, and creating custom tool exceptions.
+[How to: force models to call a tool](https://python.langchain.com/docs/how_to/tool_choice/): needing to force an LLM to call a specific tool, needing to force an LLM to call at least one tool This page shows how to use the tool_choice parameter to force an LLM to call a specific tool or to call at least one tool from a set of available tools.
+[How to: disable parallel tool calling](https://python.langchain.com/docs/how_to/tool_calling_parallel/): considering disabling parallel tool calling, when looking for examples on parallel vs. single tool calls, when trying to control the number of tool calls made. Explains how to disable parallel tool calling in LangChain so that only one tool is called at a time, providing code examples.
+[How to: access the `RunnableConfig` from a tool](https://python.langchain.com/docs/how_to/tool_configure/): accessing or configuring runtime behavior of sub-runnables from a custom tool, streaming events from child runnables within a tool This page explains how to access the RunnableConfig from within a custom tool to configure sub-invocations and stream events from those sub-invocations
+[How to: stream events from a tool](https://python.langchain.com/docs/how_to/tool_stream_events/): Line 1: 'it needs to stream events from a tool, when it needs to configure tools to access internal runnables, or when it needs to propagate configurations to child runnables in async environments' Line 2: 'Guide on how to stream events from tools that call chat models, retrievers, or other runnables, by accessing internal events and propagating configurations, with examples and explanations for compatibility across Python versions'
+[How to: return artifacts from a tool](https://python.langchain.com/docs/how_to/tool_artifacts/): returning structured data from a tool, passing artifacts to downstream components, handling custom data types from tools This page explains how tools can return artifacts separate from model input, allowing custom objects, dataframes, or images to be passed to downstream components while limiting model exposure.
+[How to: convert Runnables to tools](https://python.langchain.com/docs/how_to/convert_runnable_to_tool/): Line 1: 'needing to convert a Python function or Runnable into a LangChain tool, when building an agent that calls external tools, or when integrating a custom tool into a chat model' Line 2: 'Demonstrates how to use the Runnable.as_tool() method to convert a Runnable to a tool with a name, description, and arguments schema. Includes examples of agents calling tools created from Runnables.'
+[How to: add ad-hoc tool calling capability to models](https://python.langchain.com/docs/how_to/tools_prompting/): LLM should read this page when: 1) Adding ad-hoc tool calling capability to chat models/LLMs, 2) Using models not fine-tuned for tool calling, 3) Invoking custom tools from LLMs 'This guide demonstrates how to create prompts that instruct LLMs to request tool invocations, parse the LLM output to extract tool and arguments, invoke the requested tool, and return the tool output.'
+[How to: pass runtime secrets to a runnable](https://python.langchain.com/docs/how_to/runnable_runtime_secrets/): needing to pass sensitive data to a runnable, ensuring secrets remain hidden from tracing, or integrating secret values with runnables. Explains how to pass runtime secrets to runnables using RunnableConfig, allowing certain keys to be hidden from tracing while still being accessible during invocation.
 
 #### Multimodal
 
-- [How to: pass multimodal data directly to models](https://python.langchain.com/docs/how_to/multimodal_inputs/)
-- [How to: use multimodal prompts](https://python.langchain.com/docs/how_to/multimodal_prompts/)
+[How to: pass multimodal data directly to models](https://python.langchain.com/docs/how_to/multimodal_inputs/): needing to pass multimodal data (images, videos, etc.) to models, when working with models that support multimodal input and tool calling capabilities, and when looking to understand how to encode and pass different types of multimodal data. This page demonstrates how to pass multimodal input like images directly to LLMs and chat models, covering encoding techniques, passing single/multiple images, and invoking models with image/multimodal content. It also shows how to use multimodal models for tool calling.
+[How to: use multimodal prompts](https://python.langchain.com/docs/how_to/multimodal_prompts/): wanting to pass multimodal data like images to an LLM, when wanting to send multiple pieces of multimodal data to an LLM, when wanting instructions on how to format multimodal prompts. This shows how to use prompt templates to format multimodal inputs like images to models that support it, including sending multiple images, and comparing images.
 
 #### Agents
 
 :::note
 
-For in depth how-to guides for agents, please check out [LangGraph](https://langchain-ai.github.io/langgraph/) documentation.
+[LangGraph](https://langchain-ai.github.io/langgraph/): learning about LangGraph, considering using LangGraph for an AI application, or deciding between LangGraph and alternatives. Overview of LangGraph as an open-source framework for building AI agents, its key features like reliability and customizability, its ecosystem integration with other LangChain products, and additional learning resources.
 
 :::
 
-- [How to: use legacy LangChain Agents (AgentExecutor)](https://python.langchain.com/docs/how_to/agent_executor)
-- [How to: migrate from legacy LangChain agents to LangGraph](https://python.langchain.com/docs/how_to/migrate_agent)
+[How to: use legacy LangChain Agents (AgentExecutor)](https://python.langchain.com/docs/how_to/agent_executor/): building agents with specific tools, when working with chat history, when using language models for tool calling. This page explains how to build agents with AgentExecutor that can call tools like search engines and retrievers, how to add chat history to agents, and how to use language models to determine which tools to call.
+[How to: migrate from legacy LangChain agents to LangGraph](https://python.langchain.com/docs/how_to/migrate_agent/): LLM should read this page when: 1) Migrating from legacy LangChain agents to LangGraph 2) Comparing the functionality of LangChain and LangGraph agents This page provides a detailed guide on migrating from legacy LangChain agents to LangGraph agents, covering topics such as basic usage, prompt templates, memory handling, iterating through steps, dealing with intermediate steps, setting iteration and execution time limits, early stopping methods, and trimming intermediate steps.
 
 #### Callbacks
 
-[Callbacks](https://python.langchain.com/docs/concepts/callbacks) allow you to hook into the various stages of your LLM application's execution.
+[Callbacks](https://python.langchain.com/docs/concepts/callbacks/): [needing to log, monitor, or stream events in an LLM application] [This page covers LangChain's callback system, which allows hooking into various stages of an LLM application for logging, monitoring, streaming, and other purposes. It explains the different callback events, callback handlers, and how to pass callbacks.]
 
-- [How to: pass in callbacks at runtime](https://python.langchain.com/docs/how_to/callbacks_runtime)
-- [How to: attach callbacks to a module](https://python.langchain.com/docs/how_to/callbacks_attach)
-- [How to: pass callbacks into a module constructor](https://python.langchain.com/docs/how_to/callbacks_constructor)
-- [How to: create custom callback handlers](https://python.langchain.com/docs/how_to/custom_callbacks)
-- [How to: use callbacks in async environments](https://python.langchain.com/docs/how_to/callbacks_async)
-- [How to: dispatch custom callback events](https://python.langchain.com/docs/how_to/callbacks_custom_events)
+[How to: pass in callbacks at runtime](https://python.langchain.com/docs/how_to/callbacks_runtime/): needing to pass callback handlers at runtime to capture events, needing to attach handlers to nested objects This page explains how to pass callback handlers at runtime when invoking a runnable, which allows capturing events from all nested objects without manually attaching handlers.
+[How to: attach callbacks to a module](https://python.langchain.com/docs/how_to/callbacks_attach/): attaching callbacks to a runnable, reusing callbacks across multiple executions, composing a chain of runnables This page explains how to attach callbacks to a runnable using the .with_config() method, allowing callbacks to be reused across multiple executions and propagated to child components in a chain of runnables.
+[How to: pass callbacks into a module constructor](https://python.langchain.com/docs/how_to/callbacks_constructor/): LLM should read this page when: 1) Implementing callbacks in LangChain, 2) Understanding the scope of constructor callbacks, 3) Deciding whether to use constructor or runtime callbacks 'This page explains how to pass callbacks into the constructor of LangChain objects, and that constructor callbacks are scoped only to the object they are defined on, not inherited by child objects.'
+[How to: create custom callback handlers](https://python.langchain.com/docs/how_to/custom_callbacks/): creating custom behavior for LangChain components, customizing callback events, implementing event handlers This page explains how to create custom callback handlers by implementing callback methods and attaching the handler to LangChain components
+[How to: use callbacks in async environments](https://python.langchain.com/docs/how_to/callbacks_async/): needing to use callbacks in async environments, handling sync callbacks in async methods, using AsyncCallbackHandler Covers using callbacks with async APIs, avoiding blocking with AsyncCallbackHandler, propagating callbacks in async runnables, example of sync and async callback handlers
+[How to: dispatch custom callback events](https://python.langchain.com/docs/how_to/callbacks_custom_events/): dispatching custom callback events, handling async or sync custom callback events, or consuming custom events via the astream events API. This page covers how to dispatch custom callback events from within a Runnable, consume these events via async/sync callback handlers, and access custom events through the astream events API.
 
 #### Custom
 
 All of LangChain components can easily be extended to support your own versions.
 
-- [How to: create a custom chat model class](https://python.langchain.com/docs/how_to/custom_chat_model)
-- [How to: create a custom LLM class](https://python.langchain.com/docs/how_to/custom_llm)
-- [How to: create a custom embeddings class](https://python.langchain.com/docs/how_to/custom_embeddings)
-- [How to: write a custom retriever class](https://python.langchain.com/docs/how_to/custom_retriever)
-- [How to: write a custom document loader](https://python.langchain.com/docs/how_to/document_loader_custom)
-- [How to: write a custom output parser class](https://python.langchain.com/docs/how_to/output_parser_custom)
-- [How to: create custom callback handlers](https://python.langchain.com/docs/how_to/custom_callbacks)
-- [How to: define a custom tool](https://python.langchain.com/docs/how_to/custom_tools)
-- [How to: dispatch custom callback events](https://python.langchain.com/docs/how_to/callbacks_custom_events)
+[How to: create a custom chat model class](https://python.langchain.com/docs/how_to/custom_chat_model/): creating a custom chat model class, integrating a new language model as a chat model, or implementing streaming for a chat model. This page explains how to create a custom chat model class by inheriting from BaseChatModel, and implementing methods like _generate and _stream. It covers handling inputs, messages, streaming, identifying parameters, and contributing custom chat models.
+[How to: create a custom LLM class](https://python.langchain.com/docs/how_to/custom_llm/): creating a custom LLM class, wrapping their own LLM provider, integrating with a new language model not yet supported by LangChain. This page explains how to create a custom LLM class by implementing the required _call and _llm_type methods, as well as optional methods like _identifying_params, _acall, _stream, and _astream. It provides an example implementation, demonstrates testing and integration with LangChain APIs, and offers guidance for contributing custom LLM integrations.
+[How to: create a custom embeddings class](https://python.langchain.com/docs/how_to/custom_embeddings/): needing to use a custom text embedding model, integrating a new text embedding provider, or contributing a new text embedding integration. The page covers implementing custom text embedding models for LangChain by following the Embeddings interface, providing examples, testing, and contributing guidelines.
+[How to: write a custom retriever class](https://python.langchain.com/docs/how_to/custom_retriever/): learning how to create a custom retriever, when implementing custom retrieval logic, when adding retrieval capabilities to an application. Explains how to implement a custom Retriever class by extending BaseRetriever, including providing examples and guidelines for contributing custom retrievers.
+[How to: write a custom document loader](https://python.langchain.com/docs/how_to/document_loader_custom/): Line 1: 'creating a custom document loader, working with files, or using the GenericLoader abstraction' Line 2: 'This page explains how to create a custom document loader, work with files using BaseBlobParser and Blob, and use the GenericLoader to combine a BlobLoader with a BaseBlobParser.'
+[How to: write a custom output parser class](https://python.langchain.com/docs/how_to/output_parser_custom/): Line 1: 'creating a custom output parser, implementing a custom parser by inheriting from base classes, or parsing raw model outputs' Line 2: 'Covers how to create custom output parsers using runnable lambdas/generators (recommended) or by inheriting from base parser classes like BaseOutputParser and BaseGenerationOutputParser. Includes examples for simple and more complex parsing scenarios.'
+[How to: create custom callback handlers](https://python.langchain.com/docs/how_to/custom_callbacks/): creating custom behavior for LangChain components, customizing callback events, implementing event handlers This page explains how to create custom callback handlers by implementing callback methods and attaching the handler to LangChain components
+[How to: define a custom tool](https://python.langchain.com/docs/how_to/custom_tools/): creating custom tools for agents, converting functions or runnables to tools, or subclassing BaseTool. This page covers creating tools from functions using the @tool decorator or StructuredTool class, creating tools from Runnables, subclassing BaseTool for custom tools, creating async tools, handling tool errors, and returning artifacts from tool execution.
+[How to: dispatch custom callback events](https://python.langchain.com/docs/how_to/callbacks_custom_events/): dispatching custom callback events, handling async or sync custom callback events, or consuming custom events via the astream events API. This page covers how to dispatch custom callback events from within a Runnable, consume these events via async/sync callback handlers, and access custom events through the astream events API.
 
 #### Serialization
 
-- [How to: save and load LangChain objects](https://python.langchain.com/docs/how_to/serialization)
+[How to: save and load LangChain objects](https://python.langchain.com/docs/how_to/serialization/): needing to save and reload LangChain objects, handle API keys securely when serializing/deserializing objects, and maintain compatibility when deserializing objects across different versions of LangChain. This page discusses how to save and load serializable LangChain objects like chains, messages, and documents using the dump/load functions, which separate API keys and ensure cross-version compatibility. Examples are provided for serializing/deserializing to JSON strings, Python dicts, and disk files.
 
 ## Use cases
 
@@ -262,175 +262,175 @@ These guides cover use-case specific details.
 ### Q&A with RAG
 
 Retrieval Augmented Generation (RAG) is a way to connect LLMs to external sources of data.
-For a high-level tutorial on RAG, check out [this guide](https://python.langchain.com/docs/tutorials/rag/).
+[this guide](https://python.langchain.com/docs/tutorials/rag/): building a retrieval-augmented question-answering system, when needing to index and search through unstructured data sources, when learning about key concepts like document loaders, text splitters, vector stores, and retrievers. This tutorial covers how to build a Q&A application over textual data by loading documents, splitting them into chunks, embedding and storing the chunks in a vector store, retrieving relevant chunks for a user query, and generating an answer using a language model with the retrieved context.
 
-- [How to: add chat history](https://python.langchain.com/docs/how_to/qa_chat_history_how_to/)
-- [How to: stream](https://python.langchain.com/docs/how_to/qa_streaming/)
-- [How to: return sources](https://python.langchain.com/docs/how_to/qa_sources/)
-- [How to: return citations](https://python.langchain.com/docs/how_to/qa_citations/)
-- [How to: do per-user retrieval](https://python.langchain.com/docs/how_to/qa_per_user/)
+[How to: add chat history](https://python.langchain.com/docs/how_to/qa_chat_history_how_to/): building a conversational question-answering application, incorporating chat history and retrieval from external knowledge sources, and deciding between using chains or agents for the application logic. Discusses building chat applications with LangChain by using chains for predictable retrieval steps or agents for more dynamic reasoning. Covers setting up components like embeddings and vector stores, constructing chains with tool calls for retrieval, and assembling LangGraph agents with a ReAct executor. Provides examples for testing the applications.
+[How to: stream](https://python.langchain.com/docs/how_to/qa_streaming/): LLM should read this page when: 1) Building a RAG (Retrieval Augmented Generation) application that requires streaming final outputs or intermediate steps 2) Integrating streaming capabilities into an existing LLM-based application 'The page provides guidance on how to stream final outputs and intermediate steps from a RAG (Retrieval Augmented Generation) application built with LangChain and LangGraph. It covers setting up the necessary components, constructing the RAG application, and utilizing different streaming modes to stream tokens from the final output or individual state updates from each step.'
+[How to: return sources](https://python.langchain.com/docs/how_to/qa_sources/): LLM should read this page when: 1) Building a question-answering (QA) application that needs to return the sources used to generate the answer. 2) Implementing a conversational QA system with retrieval-augmented generation (RAG). 3) Structuring model outputs to include sources or citations. 'This guide explains how to configure LangChain's QA and RAG workflows to retrieve and return the source documents or citations used to generate the final answer. It covers both basic RAG and conversational RAG architectures, and demonstrates techniques for structuring the model output to include source information.'
+[How to: return citations](https://python.langchain.com/docs/how_to/qa_citations/): seeking to add citations to results from a Retrieval Augmented Generation (RAG) application, when wanting to justify an answer using source material, and when needing to provide evidence for generated outputs. The page covers various methods for getting a RAG application to cite sources used in generating answers, including tool-calling to return source IDs or text snippets, direct prompting to generate structured outputs with citations, retrieving and compressing context to minimize need for citations, and post-processing generated answers to annotate with citations.
+[How to: do per-user retrieval](https://python.langchain.com/docs/how_to/qa_per_user/): needing to configure retrieval chains for per-user data access, wanting to limit document access for different users, or building retrieval applications with multi-tenant architectures. Explains how to configure retriever search kwargs to limit retrieved documents based on user, demonstrates code example using Pinecone namespace for multi-tenancy.
 
 
 ### Extraction
 
 Extraction is when you use LLMs to extract structured information from unstructured text.
-For a high level tutorial on extraction, check out [this guide](https://python.langchain.com/docs/tutorials/extraction/).
+[this guide](https://python.langchain.com/docs/tutorials/extraction/): building information extraction applications, understanding how to use reference examples for improving extraction performance, or when needing to extract structured data from unstructured text. This tutorial covers building an information extraction chain using LangChain, defining schemas for extracting structured data, using reference examples to improve extraction quality, and extracting multiple entities from text.
 
-- [How to: use reference examples](https://python.langchain.com/docs/how_to/extraction_examples/)
-- [How to: handle long text](https://python.langchain.com/docs/how_to/extraction_long_text/)
-- [How to: do extraction without using function calling](https://python.langchain.com/docs/how_to/extraction_parse)
+[How to: use reference examples](https://python.langchain.com/docs/how_to/extraction_examples/): wanting to use reference examples to improve extraction quality, wanting to structure example inputs and outputs for extraction, wanting to test an extraction model with and without examples. This page explains how to define reference examples in the format expected for the LangChain tool calling API, how to incorporate these examples into prompts, and how using examples can improve extraction performance compared to not using examples.
+[How to: handle long text](https://python.langchain.com/docs/how_to/extraction_long_text/): working with large documents or PDFs that exceed the context window of the LLM, when needing to extract structured information from text. This page covers strategies for handling long text when doing information extraction, including a brute force approach of chunking the text and extracting from each chunk, and a retrieval-augmented generation (RAG) approach of indexing the chunks and only extracting from relevant ones. It also discusses common issues with these approaches.
+[How to: do extraction without using function calling](https://python.langchain.com/docs/how_to/extraction_parse/): looking to extract structured data from text, when needing to parse model outputs into objects, or when wanting to avoid using tool calling methods for extraction tasks. This page explains how to use prompting instructions to get LLMs to generate outputs in a structured format like JSON, and then use output parsers to convert the model responses into Python objects.
 
 ### Chatbots
 
 Chatbots involve using an LLM to have a conversation.
-For a high-level tutorial on building chatbots, check out [this guide](https://python.langchain.com/docs/tutorials/chatbot/).
+[this guide](https://python.langchain.com/docs/tutorials/chatbot/): building a chatbot application, incorporating conversational history, or using prompt templates. This page demonstrates how to build a chatbot with LangChain, including adding message persistence, prompt templates, conversation history management, and response streaming.
 
-- [How to: manage memory](https://python.langchain.com/docs/how_to/chatbots_memory)
-- [How to: do retrieval](https://python.langchain.com/docs/how_to/chatbots_retrieval)
-- [How to: use tools](https://python.langchain.com/docs/how_to/chatbots_tools)
-- [How to: manage large chat history](https://python.langchain.com/docs/how_to/trim_messages/)
+[How to: manage memory](https://python.langchain.com/docs/how_to/chatbots_memory/): LLM should read this page when: 1) Building a chatbot and wants to incorporate memory (chat history) 2) Looking to add context from previous messages to improve responses 3) Needs techniques to handle long conversations by summarizing or trimming history 'The page covers different techniques to add memory capabilities to chatbots, including passing previous messages directly, automatic history management using LangGraph persistence, trimming messages to reduce context, and generating summaries of conversations. Examples in Python are provided for each approach.'
+[How to: do retrieval](https://python.langchain.com/docs/how_to/chatbots_retrieval/): building a retrieval-augmented chatbot, adding conversational context to retrieval queries, or streaming responses from a chatbot. This page covers setting up a retriever over a document corpus, creating document chains and retrieval chains, transforming queries for better retrieval, and streaming responses from the retrieval chain.
+[How to: use tools](https://python.langchain.com/docs/how_to/chatbots_tools/): looking to integrate tools into chatbots, when using agents with tools, when incorporating web search into conversational agents. The page covers how to create a conversational agent using LangChain that can interact with APIs and web search tools, while maintaining chat history. It demonstrates setting up a ReAct agent with a Tavily search tool, invoking the agent, handling conversational responses with chat history, and adding memory.
+[How to: manage large chat history](https://python.langchain.com/docs/how_to/trim_messages/): working with long chat histories, when concerned about token limits for chat models, when implementing token management strategies. This page explains how to use the trim_messages utility to reduce the size of a chat message history to fit within token limits, covering trimming by token count or message count, and allowing customization of trimming strategies.
 
 ### Query analysis
 
 Query Analysis is the task of using an LLM to generate a query to send to a retriever.
-For a high-level tutorial on query analysis, check out [this guide](https://python.langchain.com/docs/tutorials/rag/#query-analysis).
+[this guide](https://python.langchain.com/docs/tutorials/rag/#query-analysis): LLM should read this page when: 1) Building a question-answering application over unstructured data 2) Learning about Retrieval Augmented Generation (RAG) architectures 3) Indexing data for use with LLMs 'This tutorial covers building a Retrieval Augmented Generation (RAG) application that can answer questions based on ingested data. It walks through loading data, chunking it, embedding and storing it in a vector store, retrieving relevant chunks for a given query, and generating an answer using an LLM. It also shows how to incorporate query analysis for improved retrieval.'
 
-- [How to: add examples to the prompt](https://python.langchain.com/docs/how_to/query_few_shot)
-- [How to: handle cases where no queries are generated](https://python.langchain.com/docs/how_to/query_no_queries)
-- [How to: handle multiple queries](https://python.langchain.com/docs/how_to/query_multiple_queries)
-- [How to: handle multiple retrievers](https://python.langchain.com/docs/how_to/query_multiple_retrievers)
-- [How to: construct filters](https://python.langchain.com/docs/how_to/query_constructing_filters)
-- [How to: deal with high cardinality categorical variables](https://python.langchain.com/docs/how_to/query_high_cardinality)
+[How to: add examples to the prompt](https://python.langchain.com/docs/how_to/query_few_shot/): needing to guide an LLM to generate queries, when fine-tuning an LLM for query generation, when incorporating examples into few-shot prompts. This page covers how to add examples to prompts for query analysis in LangChain, including setting up the system, defining the query schema, generating queries, and tuning prompts by adding examples.
+[How to: handle cases where no queries are generated](https://python.langchain.com/docs/how_to/query_no_queries/): querying for information, handling cases where no queries are generated, integrating query analysis with retrieval. Provides guidance on handling scenarios where query analysis techniques allow for no queries to be generated, including code examples for structuring the output, performing query analysis with an LLM, and integrating query analysis with a retriever in a chain.
+[How to: handle multiple queries](https://python.langchain.com/docs/how_to/query_multiple_queries/): handling queries that generate multiple potential queries, combining retrieval results from multiple queries, and integrating query analysis with retrieval pipelines. Explains how to handle scenarios where a query analysis step produces multiple potential queries by running retrievals for each query and combining the results. Demonstrates this approach with code examples using LangChain components.
+[How to: handle multiple retrievers](https://python.langchain.com/docs/how_to/query_multiple_retrievers/): needing to handle multiple retrievers for query analysis, when implementing a query analyzer that can select between different retrievers, when building a retrieval-augmented system that needs to choose between different data sources. This page explains how to handle scenarios where a query analysis step allows for selecting between multiple retrievers, showing an example implementation using LangChain's tools for structured output parsing, prompting, and chaining components together.
+[How to: construct filters](https://python.langchain.com/docs/how_to/query_constructing_filters/): constructing filters for query analysis, translating filters to specific retriever formats, using LangChain's structured query objects. This page covers how to construct filters as Pydantic models and translate them into retriever-specific filters using LangChain's translators for Chroma and Elasticsearch.
+[How to: deal with high cardinality categorical variables](https://python.langchain.com/docs/how_to/query_high_cardinality/): dealing with categorical data with high cardinality, handling potential misspellings of categorical values, and filtering based on categorical values. The page discusses techniques for handling high-cardinality categorical data in query analysis, such as adding all possible values to the prompt, using a vector store to find relevant values, and correcting user input to the closest valid categorical value.
 
 ### Q&A over SQL + CSV
 
 You can use LLMs to do question answering over tabular data.
-For a high-level tutorial, check out [this guide](https://python.langchain.com/docs/tutorials/sql_qa/).
+[this guide](https://python.langchain.com/docs/tutorials/sql_qa/): LLM should read this page when: 1. Building a question-answering system over a SQL database 2. Implementing agents or chains to interact with a SQL database 'This tutorial covers building question-answering systems over SQL databases using LangChain. It demonstrates creating chains and agents that can generate SQL queries from natural language, execute them against a database, and provide natural language responses. It covers techniques like schema exploration, query validation, and handling high-cardinality columns.'
 
-- [How to: use prompting to improve results](https://python.langchain.com/docs/how_to/sql_prompting)
-- [How to: do query validation](https://python.langchain.com/docs/how_to/sql_query_checking)
-- [How to: deal with large databases](https://python.langchain.com/docs/how_to/sql_large_db)
-- [How to: deal with CSV files](https://python.langchain.com/docs/how_to/sql_csv)
+[How to: use prompting to improve results](https://python.langchain.com/docs/how_to/sql_prompting/): 'querying SQL databases with a language model, when doing few-shot prompting for SQL queries, and when selecting relevant few-shot examples dynamically.' 'This page covers how to improve SQL query generation prompts by incorporating database schema information, providing few-shot examples, and dynamically selecting the most relevant few-shot examples using semantic similarity.'
+[How to: do query validation](https://python.langchain.com/docs/how_to/sql_query_checking/): Line 1: 'working on SQL query generation, handling invalid SQL queries, or incorporating human approval for SQL queries' Line 2: 'This page covers strategies for validating SQL queries, such as appending a query validator step, prompt engineering, human-in-the-loop approval, and error handling.'
+[How to: deal with large databases](https://python.langchain.com/docs/how_to/sql_large_db/): dealing with large databases in SQL question-answering, identifying relevant table schemas to include in prompts, and handling high-cardinality columns with proper nouns or other unique values. The page discusses methods to identify relevant tables and table schemas to include in prompts when dealing with large databases. It also covers techniques to handle high-cardinality columns containing proper nouns or other unique values, such as creating a vector store of distinct values and querying it to include relevant spellings in prompts.
+[How to: deal with CSV files](https://python.langchain.com/docs/how_to/sql_csv/): needing to build question-answering systems over CSV data, wanting to understand the tradeoffs between using SQL or Python libraries like Pandas, and requiring guidance on securely executing code from language models. This page covers two main approaches to question answering over CSV data: using SQL by loading CSVs into a database, or giving an LLM access to Python environments to interact with CSV data using libraries like Pandas. It discusses the security implications of each approach and provides code examples for implementing question-answering chains and agents with both methods.
 
 ### Q&A over graph databases
 
 You can use an LLM to do question answering over graph databases.
-For a high-level tutorial, check out [this guide](https://python.langchain.com/docs/tutorials/graph/).
+[this guide](https://python.langchain.com/docs/tutorials/graph/): LLM should read this page when: 1) Building a question-answering system over a graph database 2) Implementing text-to-query generation for graph databases 3) Learning techniques for query validation and error handling 'This page covers building a question-answering application over a graph database using LangChain. It provides a basic implementation using the GraphQACypherChain, followed by an advanced implementation with LangGraph. The latter includes techniques like few-shot prompting, query validation, and error handling for generating accurate Cypher queries from natural language.'
 
-- [How to: add a semantic layer over the database](https://python.langchain.com/docs/how_to/graph_semantic)
-- [How to: construct knowledge graphs](https://python.langchain.com/docs/how_to/graph_constructing)
+[How to: add a semantic layer over the database](https://python.langchain.com/docs/how_to/graph_semantic/): needing to add a semantic layer over a graph database, needing to use tools representing Cypher templates with an LLM, or needing to build a LangGraph Agent to interact with a Neo4j database. This page covers how to create custom tools with Cypher templates for a Neo4j graph database, bind those tools to an LLM, and build a LangGraph Agent that can invoke the tools to retrieve information from the graph database.
+[How to: construct knowledge graphs](https://python.langchain.com/docs/how_to/graph_constructing/): constructing knowledge graphs from unstructured text, storing information in a graph database, using LLM Graph Transformer to extract knowledge from text. This page explains how to set up a Neo4j graph database, use LLMGraphTransformer to extract structured knowledge graph data from text, filter extracted nodes/relationships, and store the knowledge graph in Neo4j.
 
 ### Summarization
 
 LLMs can summarize and otherwise distill desired information from text, including
-large volumes of text. For a high-level tutorial, check out [this guide](https://python.langchain.com/docs/tutorials/summarization).
+[this guide](https://python.langchain.com/docs/tutorials/summarization/): needing to summarize long texts or documents, when building question-answering systems, when creating text analysis applications. This page covers summarizing texts using LangChain, including the "stuff" method (concatenating into single prompt), the "map-reduce" method (splitting into chunks for parallel summarization), and orchestrating these methods using LangGraph.
 
-- [How to: summarize text in a single LLM call](https://python.langchain.com/docs/how_to/summarize_stuff)
-- [How to: summarize text through parallelization](https://python.langchain.com/docs/how_to/summarize_map_reduce)
-- [How to: summarize text through iterative refinement](https://python.langchain.com/docs/how_to/summarize_refine)
+[How to: summarize text in a single LLM call](https://python.langchain.com/docs/how_to/summarize_stuff/): looking to summarize text, seeking a simple single-LLM summarization method, or exploring basic summarization chains in LangChain. This page outlines how to use LangChain's pre-built 'stuff' summarization chain, which stuffs text into a prompt for an LLM to summarize in a single call.
+[How to: summarize text through parallelization](https://python.langchain.com/docs/how_to/summarize_map_reduce/): needing to summarize long text documents using parallelization, needing to optimize summarization for large volumes of text, and needing efficient summarization strategies. This page discusses using a map-reduce strategy to summarize text through parallelization, including breaking the text into subdocuments, generating summaries for each in parallel (map step), and then consolidating the summaries into a final summary (reduce step). It provides code examples using LangChain and LangGraph.
+[How to: summarize text through iterative refinement](https://python.langchain.com/docs/how_to/summarize_refine/): LLM should read this page when: 1. Attempting to summarize long texts through iterative refinement 2. Learning about building applications with LangGraph 3. Seeking examples of streaming LLM outputs 'This guide demonstrates how to summarize text through iterative refinement using LangGraph. It involves splitting the text into documents, summarizing the first document, and then refining the summary based on subsequent documents until finished. The approach leverages LangGraph's streaming capabilities and modularity.'
 
 ## LangChain Expression Language (LCEL)
 
-[LangChain Expression Language](https://python.langchain.com/docs/concepts/lcel) is a way to create arbitrary custom chains. It is built on the [Runnable](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html) protocol.
+[LCEL](https://python.langchain.com/docs/concepts/lcel/): needing an overview of the LangChain Expression Language (LCEL), deciding whether to use LCEL or not, and understanding how to compose chains using LCEL primitives. Provides an overview of the LCEL, a declarative approach to building chains from existing Runnables, covering its benefits, composition primitives like RunnableSequence and RunnableParallel, the composition syntax, automatic type coercion, and guidance on when to use LCEL versus alternatives like LangGraph.
 
-[**LCEL cheatsheet**](https://python.langchain.com/docs/how_to/lcel_cheatsheet/): For a quick overview of how to use the main LCEL primitives.
+[**LCEL cheatsheet**](https://python.langchain.com/docs/how_to/lcel_cheatsheet/): 'needing a reference for interacting with Runnables in LangChain or building custom runnables and chains' 'This page provides a comprehensive cheatsheet with examples for key operations with Runnables such as invoking, batching, streaming, composing, configuring, and dynamically building runnables and chains'
 
-[**Migration guide**](https://python.langchain.com/docs/versions/migrating_chains): For migrating legacy chain abstractions to LCEL.
+[**Migration guide**](https://python.langchain.com/docs/versions/migrating_chains/): migrating older chains from LangChain v0.0, reimplementing legacy chains, or upgrading to use LCEL and LangGraph This page provides guidance on migrating from deprecated v0.0 chain implementations to using LCEL and LangGraph, including specific guides for various legacy chains like LLMChain, ConversationChain, RetrievalQA, and others.
 
-- [How to: chain runnables](https://python.langchain.com/docs/how_to/sequence)
-- [How to: stream runnables](https://python.langchain.com/docs/how_to/streaming)
-- [How to: invoke runnables in parallel](https://python.langchain.com/docs/how_to/parallel/)
-- [How to: add default invocation args to runnables](https://python.langchain.com/docs/how_to/binding/)
-- [How to: turn any function into a runnable](https://python.langchain.com/docs/how_to/functions)
-- [How to: pass through inputs from one chain step to the next](https://python.langchain.com/docs/how_to/passthrough)
-- [How to: configure runnable behavior at runtime](https://python.langchain.com/docs/how_to/configure)
-- [How to: add message history (memory) to a chain](https://python.langchain.com/docs/how_to/message_history)
-- [How to: route between sub-chains](https://python.langchain.com/docs/how_to/routing)
-- [How to: create a dynamic (self-constructing) chain](https://python.langchain.com/docs/how_to/dynamic_chain/)
-- [How to: inspect runnables](https://python.langchain.com/docs/how_to/inspect)
-- [How to: add fallbacks to a runnable](https://python.langchain.com/docs/how_to/fallbacks)
-- [How to: pass runtime secrets to a runnable](https://python.langchain.com/docs/how_to/runnable_runtime_secrets)
+[How to: chain runnables](https://python.langchain.com/docs/how_to/sequence/): chaining multiple LangChain components together, composing prompt templates with models, or combining runnables in a sequence. This page explains how to chain runnables (LangChain components) together using the pipe operator '|' or the .pipe() method, including chaining prompt templates with models and parsers, and how input/output formats are coerced during chaining.
+[How to: stream runnables](https://python.langchain.com/docs/how_to/streaming/): Line 1: 'wanting to learn how to stream LLM responses, stream intermediate steps, and configure streaming events.' Line 2: 'This page covers how to use the `stream` and `astream` methods to stream final outputs, how to use `astream_events` to stream both final outputs and intermediate steps, filtering events, propagating callbacks for streaming, and working with input streams.'
+[How to: invoke runnables in parallel](https://python.langchain.com/docs/how_to/parallel/): parallelizing steps in a chain, formatting data for chaining, or splitting inputs to run multiple runnables in parallel. Explains how to use RunnableParallel to execute runnables concurrently, format data between steps, and provides examples of parallelizing chains.
+[How to: add default invocation args to runnables](https://python.langchain.com/docs/how_to/binding/): LLM should read this page when: 1) Wanting to invoke a Runnable with constant arguments not part of the preceding output or user input 2) Needing to bind provider-specific arguments like stop sequences or tools 'This page explains how to use the Runnable.bind() method to set default invocation arguments for a Runnable within a RunnableSequence. It covers binding stop sequences to language models and attaching OpenAI tools.'
+[How to: turn any function into a runnable](https://python.langchain.com/docs/how_to/functions/): needing to use custom functions, needing to implement streaming, needing to pass metadata to runnables Covers how to use custom functions as Runnables, including constructors, decorators, coercion, passing metadata, and implementing streaming.
+[How to: pass through inputs from one chain step to the next](https://python.langchain.com/docs/how_to/passthrough/): needing to pass data from one step to the next in a chain, when formatting inputs for prompts, when retrieving and preparing context for prompts. This page explains how to use RunnablePassthrough and RunnableParallel to pass data unchanged through chains, covering examples like formatting retrieval results and user inputs into prompts.
+[How to: configure runnable behavior at runtime](https://python.langchain.com/docs/how_to/configure/): configuring chain internals at runtime, swapping models or prompts within a chain, or exploring different configurations of runnables. The page covers how to use .configurable_fields to configure parameters of a runnable at runtime, and .configurable_alternatives to swap out runnables with alternatives, including examples for chat models, prompts, and combinations thereof.
+[How to: add message history (memory) to a chain](https://python.langchain.com/docs/how_to/message_history/): building a chatbot or multi-turn application, wanting to persist conversational state, wanting to manage message history This page explains how to add message history and persist conversational state using LangGraph, covering examples with chat models and prompt templates, and managing the message history.
+[How to: route between sub-chains](https://python.langchain.com/docs/how_to/routing/): LLM should read this page when: - It needs to conditionally route between sub-chains based on previous outputs - It needs to use semantic similarity to choose the most relevant prompt for a given query 'The page covers how to route between sub-chains in LangChain, including using custom functions, RunnableBranch, and semantic similarity for prompt routing. It provides code examples for each method.'
+[How to: create a dynamic (self-constructing) chain](https://python.langchain.com/docs/how_to/dynamic_chain/): developing dynamic chains, implementing conditional routing, returning runnables dynamically The page explains how to create a dynamic chain that constructs parts of itself at runtime by having Runnable Lambdas return other Runnables.
+[How to: inspect runnables](https://python.langchain.com/docs/how_to/inspect/): inspecting internals of an LCEL chain, debugging chain logic, or retrieving chain prompts. Provides methods to visualize chain graphs, print prompts used in chains, and inspect chain steps programmatically.
+[How to: add fallbacks to a runnable](https://python.langchain.com/docs/how_to/fallbacks/): needing to add fallback options in case of errors, processing long inputs, or wanting to use a better model. This page explains how to configure fallback chains for LLM APIs in case of rate limiting or errors, for handling long input texts exceeding context windows, and for defaulting to better models when parsing fails.
+[How to: pass runtime secrets to a runnable](https://python.langchain.com/docs/how_to/runnable_runtime_secrets/): needing to pass sensitive data to a runnable, ensuring secrets remain hidden from tracing, or integrating secret values with runnables. Explains how to pass runtime secrets to runnables using RunnableConfig, allowing certain keys to be hidden from tracing while still being accessible during invocation.
 
 Tracing gives you observability inside your chains and agents, and is vital in diagnosing issues.
 
-- [How to: trace with LangChain](https://docs.smith.langchain.com/how_to_guides/tracing/trace_with_langchain)
-- [How to: add metadata and tags to traces](https://docs.smith.langchain.com/how_to_guides/tracing/trace_with_langchain#add-metadata-and-tags-to-traces)
+[How to: trace with LangChain](https://docs.smith.langchain.com/how_to_guides/tracing/trace_with_langchain/): tracing LangChain applications with LangSmith, customizing trace metadata and run names, or integrating LangChain with the LangSmith SDK. Provides guides on integrating LangSmith tracing into LangChain applications, configuring trace metadata and run names, distributed tracing, interoperability between LangChain and LangSmith SDK, and tracing LangChain invocations without environment variables.
+[How to: add metadata and tags to traces](https://docs.smith.langchain.com/how_to_guides/tracing/trace_with_langchain/#add-metadata-and-tags-to-traces): tracing LangChain applications with LangSmith, when logging metadata and tags to traces, and when customizing trace names and IDs. This page provides step-by-step guides on integrating LangSmith tracing with LangChain in Python and JS/TS, covering quick start instructions, selective tracing, logging to specific projects, adding metadata/tags, customizing run names/IDs, accessing run IDs, distributed tracing in Python, and interoperability with the LangSmith SDK.
 
-You can see general tracing-related how-tos [in this section of the LangSmith docs](https://docs.smith.langchain.com/how_to_guides/tracing).
+[in this section of the LangSmith docs](https://docs.smith.langchain.com/how_to_guides/tracing/): configuring observability for LLM applications, accessing and managing traces, and setting up automation and monitoring. Guides on configuring tracing, using the UI/API for traces, creating dashboards, automating rules/alerts, and gathering human feedback for LLM applications.
 
 ## Integrations
 
 ### Featured Chat Model Providers
 
-- [ChatAnthropic](https://python.langchain.com/docs/anthropic/)
-- [ChatMistralAI](https://python.langchain.com/docs/mistralai/)
-- [ChatFireworks](https://python.langchain.com/docs/fireworks/)
-- [AzureChatOpenAI](https://python.langchain.com/docs/azure_chat_openai/)
-- [ChatOpenAI](https://python.langchain.com/docs/openai/)
-- [ChatTogether](https://python.langchain.com/docs/together/)
-- [ChatVertexAI](https://python.langchain.com/docs/google_vertex_ai_palm/)
-- [ChatGoogleGenerativeAI](https://python.langchain.com/docs/google_generative_ai/)
-- [ChatGroq](https://python.langchain.com/docs/groq/)
-- [ChatCohere](https://python.langchain.com/docs/cohere/)
-- [ChatBedrock](https://python.langchain.com/docs/bedrock/)
-- [ChatHuggingFace](https://python.langchain.com/docs/huggingface/)
-- [ChatNVIDIA](https://python.langchain.com/docs/nvidia_ai_endpoints/)
-- [ChatOllama](https://python.langchain.com/docs/ollama/)
-- [ChatLlamaCpp](https://python.langchain.com/docs/llamacpp)
-- [ChatAI21](https://python.langchain.com/docs/ai21)
-- [ChatUpstage](https://python.langchain.com/docs/upstage)
-- [ChatDatabricks](https://python.langchain.com/docs/databricks)
-- [ChatWatsonx](https://python.langchain.com/docs/ibm_watsonx)
-- [ChatXAI](https://python.langchain.com/docs/xai)
+- [ChatAnthropic](https://python.langchain.com/docs/integrations/chat/anthropic/)
+- [ChatMistralAI](https://python.langchain.com/docs/integrations/chat/mistralai/)
+- [ChatFireworks](https://python.langchain.com/docs/integrations/chat/fireworks/)
+- [AzureChatOpenAI](https://python.langchain.com/docs/integrations/chat/azure_chat_openai/)
+- [ChatOpenAI](https://python.langchain.com/docs/integrations/chat/openai/)
+- [ChatTogether](https://python.langchain.com/docs/integrations/chat/together/)
+- [ChatVertexAI](https://python.langchain.com/docs/integrations/chat/google_vertex_ai_palm/)
+- [ChatGoogleGenerativeAI](https://python.langchain.com/docs/integrations/chat/google_generative_ai/)
+- [ChatGroq](https://python.langchain.com/docs/integrations/chat/groq/)
+- [ChatCohere](https://python.langchain.com/docs/integrations/chat/cohere/)
+- [ChatBedrock](https://python.langchain.com/docs/integrations/chat/bedrock/)
+- [ChatHuggingFace](https://python.langchain.com/docs/integrations/chat/huggingface/)
+- [ChatNVIDIA](https://python.langchain.com/docs/integrations/chat/nvidia_ai_endpoints/)
+- [ChatOllama](https://python.langchain.com/docs/integrations/chat/ollama/)
+- [ChatLlamaCpp](https://python.langchain.com/docs/integrations/chat/llamacpp/)
+- [ChatAI21](https://python.langchain.com/docs/integrations/chat/ai21/)
+- [ChatUpstage](https://python.langchain.com/docs/integrations/chat/upstage/)
+- [ChatDatabricks](https://python.langchain.com/docs/integrations/chat/databricks/)
+- [ChatWatsonx](https://python.langchain.com/docs/integrations/chat/ibm_watsonx/)
+- [ChatXAI](https://python.langchain.com/docs/integrations/chat/xai/)
 
-Other chat model integrations can be found [here](https://python.langchain.com/docs/integrations/chat/).
+[All](https://python.langchain.com/docs/integrations/chat/): integrating chat models into an application, using chat models for conversational AI tasks, or choosing between different chat model providers. Provides an overview of chat models integrated with LangChain, including OpenAI, Anthropic, Google, and others. Covers key features like tool calling, structured output, JSON mode, local usage, and multimodal support.
 
 ## Glossary
 
-- **[AIMessageChunk](https://python.langchain.com/docs/concepts/messages#aimessagechunk)**: A partial response from an AI message. Used when streaming responses from a chat model.
-- **[AIMessage](https://python.langchain.com/docs/concepts/messages#aimessage)**: Represents a complete response from an AI model.
-- **[astream_events](https://python.langchain.com/docs/concepts/chat_models#key-methods)**: Stream granular information from [LCEL](https://python.langchain.com/docs/concepts/lcel) chains.
-- **[BaseTool](https://python.langchain.com/docs/concepts/tools/#tool-interface)**: The base class for all tools in LangChain.
-- **[batch](https://python.langchain.com/docs/concepts/runnables)**: Use to execute a runnable with batch inputs.
-- **[bind_tools](https://python.langchain.com/docs/concepts/tool_calling/#tool-binding)**: Allows models to interact with tools.
-- **[Caching](https://python.langchain.com/docs/concepts/chat_models#caching)**: Storing results to avoid redundant calls to a chat model.
-- **[Chat models](https://python.langchain.com/docs/concepts/multimodality/#multimodality-in-chat-models)**: Chat models that handle multiple data modalities.
-- **[Configurable runnables](https://python.langchain.com/docs/concepts/runnables/#configurable-runnables)**: Creating configurable Runnables.
-- **[Context window](https://python.langchain.com/docs/concepts/chat_models#context-window)**: The maximum size of input a chat model can process.
-- **[Conversation patterns](https://python.langchain.com/docs/concepts/chat_history#conversation-patterns)**: Common patterns in chat interactions.
-- **[Document](https://python.langchain.com/api_reference/core/documents/langchain_core.documents.base.Document.html)**: LangChain's representation of a document.
-- **[Embedding models](https://python.langchain.com/docs/concepts/multimodality/#multimodality-in-embedding-models)**: Models that generate vector embeddings for various data types.
-- **[HumanMessage](https://python.langchain.com/docs/concepts/messages#humanmessage)**: Represents a message from a human user.
-- **[InjectedState](https://python.langchain.com/docs/concepts/tools#injectedstate)**: A state injected into a tool function.
-- **[InjectedStore](https://python.langchain.com/docs/concepts/tools#injectedstore)**: A store that can be injected into a tool for data persistence.
-- **[InjectedToolArg](https://python.langchain.com/docs/concepts/tools#injectedtoolarg)**: Mechanism to inject arguments into tool functions.
-- **[input and output types](https://python.langchain.com/docs/concepts/runnables#input-and-output-types)**: Types used for input and output in Runnables.
-- **[Integration packages](https://python.langchain.com/docs/concepts/architecture/#integration-packages)**: Third-party packages that integrate with LangChain.
-- **[Integration tests](https://python.langchain.com/docs/concepts/testing#integration-tests)**: Tests that verify the correctness of the interaction between components, usually run with access to the underlying API that powers an integration.
-- **[invoke](https://python.langchain.com/docs/concepts/runnables)**: A standard method to invoke a Runnable.
-- **[JSON mode](https://python.langchain.com/docs/concepts/structured_outputs#json-mode)**: Returning responses in JSON format.
-- **[langchain-community](https://python.langchain.com/docs/concepts/architecture#langchain-community)**: Community-driven components for LangChain.
-- **[langchain-core](https://python.langchain.com/docs/concepts/architecture#langchain-core)**: Core langchain package. Includes base interfaces and in-memory implementations.
-- **[langchain](https://python.langchain.com/docs/concepts/architecture#langchain)**: A package for higher level components (e.g., some pre-built chains).
-- **[langgraph](https://python.langchain.com/docs/concepts/architecture#langgraph)**: Powerful orchestration layer for LangChain. Use to build complex pipelines and workflows.
-- **[Managing chat history](https://python.langchain.com/docs/concepts/chat_history#managing-chat-history)**: Techniques to maintain and manage the chat history.
-- **[OpenAI format](https://python.langchain.com/docs/concepts/messages#openai-format)**: OpenAI's message format for chat models.
-- **[Propagation of RunnableConfig](https://python.langchain.com/docs/concepts/runnables/#propagation-of-runnableconfig)**: Propagating configuration through Runnables. Read if working with python 3.9, 3.10 and async.
-- **[rate-limiting](https://python.langchain.com/docs/concepts/chat_models#rate-limiting)**: Client side rate limiting for chat models.
-- **[RemoveMessage](https://python.langchain.com/docs/concepts/messages/#removemessage)**: An abstraction used to remove a message from chat history, used primarily in LangGraph.
-- **[role](https://python.langchain.com/docs/concepts/messages#role)**: Represents the role (e.g., user, assistant) of a chat message.
-- **[RunnableConfig](https://python.langchain.com/docs/concepts/runnables/#runnableconfig)**: Use to pass run time information to Runnables (e.g., `run_name`, `run_id`, `tags`, `metadata`, `max_concurrency`, `recursion_limit`, `configurable`).
-- **[Standard parameters for chat models](https://python.langchain.com/docs/concepts/chat_models#standard-parameters)**: Parameters such as API key, `temperature`, and `max_tokens`.
-- **[Standard tests](https://python.langchain.com/docs/concepts/testing#standard-tests)**: A defined set of unit and integration tests that all integrations must pass.
-- **[stream](https://python.langchain.com/docs/concepts/streaming)**: Use to stream output from a Runnable or a graph.
-- **[Tokenization](https://python.langchain.com/docs/concepts/tokens)**: The process of converting data into tokens and vice versa.
-- **[Tokens](https://python.langchain.com/docs/concepts/tokens)**: The basic unit that a language model reads, processes, and generates under the hood.
-- **[Tool artifacts](https://python.langchain.com/docs/concepts/tools#tool-artifacts)**: Add artifacts to the output of a tool that will not be sent to the model, but will be available for downstream processing.
-- **[Tool binding](https://python.langchain.com/docs/concepts/tool_calling#tool-binding)**: Binding tools to models.
-- **[@tool](https://python.langchain.com/docs/concepts/tools/#create-tools-using-the-tool-decorator)**: Decorator for creating tools in LangChain.
-- **[Toolkits](https://python.langchain.com/docs/concepts/tools#toolkits)**: A collection of tools that can be used together.
-- **[ToolMessage](https://python.langchain.com/docs/concepts/messages#toolmessage)**: Represents a message that contains the results of a tool execution.
-- **[Unit tests](https://python.langchain.com/docs/concepts/testing#unit-tests)**: Tests that verify the correctness of individual components, run in isolation without access to the Internet.
-- **[Vector stores](https://python.langchain.com/docs/concepts/vectorstores)**: Datastores specialized for storing and efficiently searching vector embeddings.
-- **[with_structured_output](https://python.langchain.com/docs/concepts/structured_outputs/#structured-output-method)**: A helper method for chat models that natively support [tool calling](https://python.langchain.com/docs/concepts/tool_calling) to get structured output matching a given schema specified via Pydantic, JSON schema or a function.
-- **[with_types](https://python.langchain.com/docs/concepts/runnables#with_types)**: Method to overwrite the input and output types of a runnable. Useful when working with complex LCEL chains and deploying with LangServe.
\ No newline at end of file
+[AIMessageChunk](https://python.langchain.com/docs/concepts/messages/#aimessagechunk): 'needing to understand messages and message structure for chat models, when working with chat history, and when integrating with chat model providers' Line 2: 'Detailed overview of the different message types used in LangChain for chat models, how messages are structured, and how to convert between LangChain and OpenAI message formats.'
+[AIMessage](https://python.langchain.com/docs/concepts/messages/#aimessage): building chat applications, when implementing tool calling, or when working with chat model outputs. Messages are the units of communication in chat models, representing input, output and metadata; topics include message types, roles, content, metadata, conversation structure, and LangChain's unified message format.
+[astream_events](https://python.langchain.com/docs/concepts/chat_models/#key-methods): LLM should read this page when: 1) Implementing an application that uses a chat model 2) Integrating chat models with other LangChain components 3) Planning for advanced chat model features like tool calling or structured outputs This page provides an overview of chat models in LangChain, including their key features, interfaces, integration options, tool calling, structured outputs, multimodality, context windows, and advanced topics like rate limiting and caching.
+[BaseTool](https://python.langchain.com/docs/concepts/tools/#tool-interface): needing to understand LangChain tools, wanting to create custom tools, or looking for best practices for designing tools. The page covers the tool abstraction in LangChain, which associates a Python function with a schema for name, description, and arguments. It explains how to create tools using the @tool decorator, configure the schema, handle tool artifacts, use special type annotations (InjectedToolArg, RunnableConfig), and provides an overview of toolkits.
+[invoke](https://python.langchain.com/docs/concepts/runnables/): learning how to use the Runnable interface, when working with custom Runnables, and when needing to configure Runnables at runtime. The page covers the Runnable interface, its methods for invocation, batching, streaming, inspecting schemas, and configuration. It explains RunnableConfig, custom Runnables, and configurable Runnables.
+[bind_tools](https://python.langchain.com/docs/concepts/tool_calling/#tool-binding): building applications that require an LLM to directly interact with external systems or APIs, when integrating tools or functions into an LLM workflow, or when fine-tuning an LLM to better handle tool calling. This page provides an overview of tool calling, which allows LLMs to invoke external tools or APIs with specific input schemas. It covers key concepts like tool creation, binding tools to LLMs, initiating tool calls from LLMs, and executing the called tools. It also offers guidance on recommended usage and best practices.
+[Caching](https://python.langchain.com/docs/concepts/chat_models/#caching): building chat applications, using LLMs for information extraction, or working with multimodal data This page discusses chat models, which are language models that operate on messages. It covers chat model interfaces, integrations, features like tool calling and structured outputs, multimodality, context windows, rate limiting, and caching.
+[Chat models](https://python.langchain.com/docs/concepts/multimodality/#multimodality-in-chat-models): needing to understand multimodal capabilities in LangChain, when working with multimodal data like images/audio/video, and when determining if a specific LangChain component supports multimodality. Provides an overview of multimodality in chat models, embedding models, and vector stores. Discusses multimodal inputs/outputs for chat models and how they are formatted.
+[Configurable runnables](https://python.langchain.com/docs/concepts/runnables/#configurable-runnables): trying to understand how to use Runnables, how to configure and compose Runnables, and how to inspect Runnable schemas. The Runnable interface is the foundation for working with LangChain components like language models, output parsers, and retrievers. It defines methods for invoking, batching, streaming, inspecting schemas, configuring, and composing Runnables.
+[Context window](https://python.langchain.com/docs/concepts/chat_models/#context-window): getting an overview of chat models, understanding the key functionality of chat models, and determining if this concept is relevant for their application. Provides an overview of chat models (LLMs with a chat interface), their features, integrations, key methods like invoking/streaming, handling inputs/outputs, using tools/structured outputs, and advanced topics like rate limiting and caching.
+[Conversation patterns](https://python.langchain.com/docs/concepts/chat_history/#conversation-patterns): managing conversation history in chatbots, implementing memory for chat models, understanding correct conversation structure. This page explains the concept of chat history, a record of messages exchanged between a user and a chat model. It covers conversation patterns, guidelines for managing chat history to avoid exceeding context window, and the importance of preserving conversation structure.
+[Document](https://python.langchain.com/api_reference/core/documents/langchain_core.documents.base.Document.html/): working with document data, retrieving and processing text documents, integrating with text embedding and vector storage systems This page provides details on the Document class and its associated methods and properties, as well as examples of how to use it in various scenarios such as document loading, retrieval, and transformation
+[Embedding models](https://python.langchain.com/docs/concepts/multimodality/#multimodality-in-embedding-models): needing to understand multimodal capabilities of LangChain components, wanting to work with non-text data like images/audio/video, or planning to incorporate multimodal data in chat interactions. Provides an overview of multimodality support in chat models (inputs and tools), embedding models, and vector stores; notes current limitations and expected future expansions to handle different data types.
+[HumanMessage](https://python.langchain.com/docs/concepts/messages/#humanmessage): LLM should read this page when: 1) Understanding how to structure conversations with chat models, 2) Needing to work with different types of messages (user, assistant, system, tool), 3) Converting between LangChain and OpenAI message formats. Messages are the units of communication used by chat models, representing user input, assistant output, system instructions, and tool results. Key topics include message structure, types (HumanMessage, AIMessage, SystemMessage, ToolMessage), multimodal content support, and integration with OpenAI message format.
+[InjectedState](https://python.langchain.com/docs/concepts/tools/#injectedstate): learning about LangChain's tools, creating custom tools, or integrating tools with chat models. Provides conceptual overview of tools - encapsulating functions with schemas for models to call. Covers creating tools with @tool decorator, tool interfaces, special type annotations, artifacts, best practices, and toolkits.
+[InjectedStore](https://python.langchain.com/docs/concepts/tools/#injectedstore): needing to understand how to create and use tools in LangChain, when needing to pass runtime values to tools, and when needing to configure a tool's schema. Tools are a way to encapsulate functions and their schemas to be used with chat models that support tool calling. The page covers the tool interface, creating tools with the @tool decorator, using tools directly, configuring tool schemas, returning artifacts from tools, and special type annotations like InjectedToolArg and RunnableConfig.
+[InjectedToolArg](https://python.langchain.com/docs/concepts/tools/#injectedtoolarg): trying to understand how to create and use tools in LangChain, when needing to configure tool schemas, and when wanting to return artifacts from tools. Tools provide a way to encapsulate Python functions and schemas to be passed to chat models for execution. The page covers creating tools with the @tool decorator, configuring tool schemas, special type annotations, and tool artifacts.
+[input and output types](https://python.langchain.com/docs/concepts/runnables/#input-and-output-types): needing to interact with LangChain components, wanting to understand the core Runnable interface, or composing complex chains using LCEL. Covers the Runnable interface that defines a standard way to invoke, batch, stream and inspect components; the RunnableConfig for setting runtime options; creating custom Runnables; configurable Runnables; and how input/output types, schemas, and streaming work.
+[Integration packages](https://python.langchain.com/docs/concepts/architecture/#integration-packages): determining the overall architecture of LangChain, understanding the different components and packages in the LangChain ecosystem, or deciding which packages to import for a specific use case. This page provides an overview of the different packages that make up the LangChain framework, including langchain-core, langchain, integration packages, langchain-community, langgraph, langserve, and LangSmith, and explains the purpose and contents of each package.
+[Integration tests](https://python.langchain.com/docs/concepts/testing/#integration-tests): needing guidance on testing LangChain components, understanding different types of tests (unit, integration, standard), or wanting to contribute by adding tests to an integration. Provides an overview of unit tests, integration tests, and standard tests in the LangChain ecosystem, including definitions, examples, and how to implement them for new tools/integrations.
+[invoke](https://python.langchain.com/docs/concepts/runnables/): learning how to use the Runnable interface, when working with custom Runnables, and when needing to configure Runnables at runtime. The page covers the Runnable interface, its methods for invocation, batching, streaming, inspecting schemas, and configuration. It explains RunnableConfig, custom Runnables, and configurable Runnables.
+[JSON mode](https://python.langchain.com/docs/concepts/structured_outputs/#json-mode): LLM should read this page when: 1) It needs to return structured output that conforms to a specific schema, 2) It needs to store model output in a database, 3) It needs to ensure model output matches a predefined format. This page covers how to define an output schema, and techniques like tool calling and JSON mode that allow models to return structured output conforming to that schema, as well as a helper method to streamline the process.
+[langchain-community](https://python.langchain.com/docs/concepts/architecture/#langchain-community): learning about the structure of LangChain, deploying LangChain applications, or needing an overview of the LangChain ecosystem. This page gives an overview of the different packages, components, and services that make up the LangChain framework, including langchain-core, langchain, integration packages, langchain-community, LangGraph, LangServe, and LangSmith.
+[langchain-core](https://python.langchain.com/docs/concepts/architecture/#langchain-core): needing an overview of LangChain's architecture, when considering integrating external packages, or when exploring the LangChain ecosystem. Outlines the main components of LangChain (langchain-core, langchain, integration packages, langchain-community, langgraph, langserve, LangSmith) and their roles, providing a high-level architectural overview.
+[langchain](https://python.langchain.com/docs/concepts/architecture/#langchain): looking to understand the overall architecture of LangChain, when trying to determine what LangChain packages to install, or when wanting an overview of the various LangChain projects. This page outlines the hierarchical structure of the LangChain framework, describing the purpose and contents of key packages like langchain-core, langchain, integration packages, langchain-community, langgraph, langserve, and LangSmith.
+[langgraph](https://python.langchain.com/docs/concepts/architecture/#langgraph): developing applications with LangChain, seeking to understand the overall architecture of LangChain, planning to contribute to or integrate with LangChain The page outlines the layered architecture of LangChain, describing the core abstraction layer, the main LangChain package, integration packages, community integrations, LangGraph for stateful agents, LangServe for deployment, and LangSmith developer tools
+[Managing chat history](https://python.langchain.com/docs/concepts/chat_history/#managing-chat-history): understanding and managing chat history, learning about conversation patterns, following correct chat history structure. Explains chat history concept, provides guidelines for managing chat history, discusses conversation patterns involving users, assistants, and tools.
+[OpenAI format](https://python.langchain.com/docs/concepts/messages/#openai-format): building chat applications, working with chat models, or consuming message streams. This page covers the structure and components of messages used in chat models, including roles, content, usage metadata, and different message types like HumanMessage, AIMessage, and ToolMessage.
+[Propagation of RunnableConfig](https://python.langchain.com/docs/concepts/runnables/#propagation-of-runnableconfig): LLM should read this page when: learning about the LangChain Runnable interface, working with Runnables in LangChain, understanding how to configure and execute Runnables. The page covers the Runnable interface in LangChain, including invoking/batching/streaming Runnables, input/output schemas, configuring Runnables, creating custom Runnables, and working with configurable Runnables.
+[rate-limiting](https://python.langchain.com/docs/concepts/chat_models/#rate-limiting): 1) working with chat models, 2) integrating tool calling or structured outputs, 3) understanding chat model capabilities. Overview of chat model interface, inputs/outputs, standard parameters; tool calling and structured output support; multimodality; context window; advanced topics like rate limiting, caching.
+[RemoveMessage](https://python.langchain.com/docs/concepts/messages/#removemessage): needing information on the structure of messages used in conversational AI models, wanting to understand how messages are represented in LangChain, or looking for details on specific message types like SystemMessage, HumanMessage, and AIMessage. Messages are the basic units of communication in conversational AI models, containing a role (e.g. user, assistant), content (text or multimodal data), and metadata; LangChain provides a standardized message format and different message types to represent various components of a conversation.
+[role](https://python.langchain.com/docs/concepts/messages/#role): understanding how to structure messages for chat models, accessing details about different LangChain message types, or converting between LangChain and OpenAI message formats. Messages are the core unit of communication in chat models, representing input/output content and metadata; LangChain defines SystemMessage, HumanMessage, AIMessage, ToolMessage and others to standardize message format across providers.
+[RunnableConfig](https://python.langchain.com/docs/concepts/runnables/#runnableconfig): needing to understand the Runnable interface, invoking and configuring Runnables, and creating custom Runnables. The page covers the Runnable interface's core concepts, methods like invoke, batch, and stream, input/output types, configuring Runnables with RunnableConfig, creating custom Runnables from functions, and using configurable Runnables.
+[Standard parameters for chat models](https://python.langchain.com/docs/concepts/chat_models/#standard-parameters): building applications using chat models, working with chat models for tool calling, structured outputs or multimodal inputs/outputs. Covers overview of chat models, integrations, interfaces, tool calling, structured outputs, multimodality, context window, rate-limiting, and caching of chat models.
+[Standard tests](https://python.langchain.com/docs/concepts/testing/#standard-tests): needing guidance on testing LangChain components, or wanting to understand the different types of tests used in LangChain. This page discusses unit tests for individual functions, integration tests for validating multiple components working together, and LangChain's standard tests for ensuring consistency across tools and integrations.
+[stream](https://python.langchain.com/docs/concepts/streaming/): [building applications that use streaming, building applications that need to display partial results in real-time, building applications that need to provide updates on pipeline or workflow progress] 'This page covers streaming in LangChain, including what can be streamed in LLM applications, the streaming APIs available, how to write custom data to the stream, and how LangChain automatically enables streaming for chat models in certain cases.'
+[Tokens](https://python.langchain.com/docs/concepts/tokens/): needing to understand tokens used by LLMs, when dealing with character/token counts, when working with multimodal inputs Tokens are the fundamental units processed by language models. A token can represent words, word parts, punctuation, and other units. Models tokenize inputs, process tokens sequentially, and generate new tokens as output. Tokens enable efficient and contextual language processing compared to characters.
+[Tokens](https://python.langchain.com/docs/concepts/tokens/): needing to understand tokens used by LLMs, when dealing with character/token counts, when working with multimodal inputs Tokens are the fundamental units processed by language models. A token can represent words, word parts, punctuation, and other units. Models tokenize inputs, process tokens sequentially, and generate new tokens as output. Tokens enable efficient and contextual language processing compared to characters.
+[Tool artifacts](https://python.langchain.com/docs/concepts/tools/#tool-artifacts): needing to understand what tools are, how to create and use them, and how they integrate with models. Explains what tools are in LangChain, how to create them using the @tool decorator, special type annotations for configuring runtime behavior, how to use tools directly or pass them to chat models, and best practices for designing tools.
+[Tool binding](https://python.langchain.com/docs/concepts/tool_calling/#tool-binding): determining if tool calling functionality is appropriate for their application, understanding the key concepts and workflow of tool calling, and considering best practices for designing tools. This page covers an overview of tool calling, key concepts like tool creation/binding/calling/execution, recommended usage workflow, details on implementing each step, and best practices for designing effective tools.
+[@tool](https://python.langchain.com/docs/concepts/tools/#create-tools-using-the-tool-decorator): needing to understand tools in LangChain, when creating custom tools, or when integrating tools into LangChain applications. Provides an overview of tools, how to create and configure tools using the @tool decorator, different tool types (e.g. with artifacts, injected arguments), and best practices for designing tools.
+[Toolkits](https://python.langchain.com/docs/concepts/tools/#toolkits): creating custom Python functions to use with LangChain, configuring existing tools, or adding tools to chat models. Explains the tool abstraction for encapsulating Python functions, creating tools with the `@tool` decorator, configuring schemas, handling tool artifacts, special type annotations, and using toolkits that group related tools.
+[ToolMessage](https://python.langchain.com/docs/concepts/messages/#toolmessage): understanding the communication protocol with chat models, working with chat history management, or understanding LangChain's Message object structure. Messages are the unit of communication in chat models and represent input/output along with metadata; LangChain provides a unified Message format with types like SystemMessage, HumanMessage, AIMessage to handle different roles, content types, tool calls.
+[Unit tests](https://python.langchain.com/docs/concepts/testing/#unit-tests): developing unit or integration tests, or when contributing to LangChain integrations Provides an overview of unit tests, integration tests, and standard tests used in the LangChain ecosystem
+[Vector stores](https://python.langchain.com/docs/concepts/vectorstores/): LLM should read this page when: 1) Building applications that need to index and retrieve information based on semantic similarity 2) Integrating vector databases into their application 3) Exploring advanced vector search and retrieval techniques Vector stores are specialized data stores that enable indexing and retrieving information based on vector representations (embeddings) of data, allowing semantic similarity search over unstructured data like text, images, and audio. The page covers vector store integrations, the core interface, adding/deleting documents, basic and advanced similarity search techniques, and concepts like metadata filtering.
+[with_structured_output](https://python.langchain.com/docs/concepts/structured_outputs/#structured-output-method): [needing to return structured data like JSON or database rows, working with models that support structured output like tools or JSON modes, or integrating with helper functions to streamline structured output] [Overview of structured output concept, schema definition formats like JSON/dicts and Pydantic, model integration methods like tool calling and JSON modes, LangChain structured output helper method]
+[with_types](https://python.langchain.com/docs/concepts/runnables/#with_types): learning about the Runnable interface in LangChain, understanding how to work with Runnables, and customizing or configuring Runnables. The page covers the Runnable interface, optimized parallel execution, streaming APIs, input/output types, inspecting schemas, RunnableConfig options, creating custom Runnables from functions, and configurable Runnables.

From e6b6c0739547fd54ebc27fcc7f3112b030195d9f Mon Sep 17 00:00:00 2001
From: Kyungho Byoun <kyungho.byoun@gmail.com>
Date: Fri, 28 Mar 2025 04:19:50 +0900
Subject: [PATCH 28/30] community: add HANA dialect to SQLDatabase (#30475)

This PR includes support for HANA dialect in SQLDatabase, which is a
wrapper class for SQLAlchemy.

Currently, it is unable to set schema name when using HANA DB with
Langchain. And, it does not show any message to user so that it makes
hard for user to figure out why the SQL does not work as expected.

Here is the reference document for HANA DB to set schema for the
session.

- [SET SCHEMA Statement (Session
Management)](https://help.sap.com/docs/SAP_HANA_PLATFORM/4fe29514fd584807ac9f2a04f6754767/20fd550375191014b886a338afb4cd5f.html)
---
 .../utilities/sql_database.py                 | 16 +++++++++++
 .../tests/unit_tests/test_sql_database.py     | 28 ++++++++++++++++++-
 2 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/libs/community/langchain_community/utilities/sql_database.py b/libs/community/langchain_community/utilities/sql_database.py
index 2b6b84ca4a1..1a94434afde 100644
--- a/libs/community/langchain_community/utilities/sql_database.py
+++ b/libs/community/langchain_community/utilities/sql_database.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import re
 from typing import Any, Dict, Iterable, List, Literal, Optional, Sequence, Union
 
 import sqlalchemy
@@ -44,6 +45,16 @@ def truncate_word(content: Any, *, length: int, suffix: str = "...") -> str:
     return content[: length - len(suffix)].rsplit(" ", 1)[0] + suffix
 
 
+def sanitize_schema(schema: str) -> str:
+    """Sanitize a schema name to only contain letters, digits, and underscores."""
+    if not re.match(r"^[a-zA-Z0-9_]+$", schema):
+        raise ValueError(
+            f"Schema name '{schema}' contains invalid characters. "
+            "Schema names must contain only letters, digits, and underscores."
+        )
+    return schema
+
+
 class SQLDatabase:
     """SQLAlchemy wrapper around a database."""
 
@@ -465,6 +476,11 @@ class SQLDatabase:
                         (self._schema,),
                         execution_options=execution_options,
                     )
+                elif self.dialect == "hana":
+                    connection.exec_driver_sql(
+                        f"SET SCHEMA {sanitize_schema(self._schema)}",
+                        execution_options=execution_options,
+                    )
 
             if isinstance(command, str):
                 command = text(command)
diff --git a/libs/community/tests/unit_tests/test_sql_database.py b/libs/community/tests/unit_tests/test_sql_database.py
index 6acb734a543..349abd19313 100644
--- a/libs/community/tests/unit_tests/test_sql_database.py
+++ b/libs/community/tests/unit_tests/test_sql_database.py
@@ -16,7 +16,11 @@ from sqlalchemy import (
 )
 from sqlalchemy.engine import Engine, Result
 
-from langchain_community.utilities.sql_database import SQLDatabase, truncate_word
+from langchain_community.utilities.sql_database import (
+    SQLDatabase,
+    sanitize_schema,
+    truncate_word,
+)
 
 is_sqlalchemy_v1 = version.parse(sa.__version__).major == 1
 
@@ -262,3 +266,25 @@ def test_truncate_word() -> None:
     assert truncate_word("Hello World", length=-10) == "Hello World"
     assert truncate_word("Hello World", length=5, suffix="!!!") == "He!!!"
     assert truncate_word("Hello World", length=12, suffix="!!!") == "Hello World"
+
+
+def test_sanitize_schema() -> None:
+    valid_schema_names = [
+        "test_schema",
+        "schema123",
+        "TEST_SCHEMA_123",
+        "_schema_",
+    ]
+    for schema in valid_schema_names:
+        assert sanitize_schema(schema) == schema
+
+    invalid_schema_names = [
+        "test-schema",
+        "schema.name",
+        "schema$",
+        "schema name",
+    ]
+    for schema in invalid_schema_names:
+        with pytest.raises(ValueError) as ex:
+            sanitize_schema(schema)
+        assert f"Schema name '{schema}' contains invalid characters" in str(ex.value)

From 1f0686db806af4fe5f357b494e12cb4786686840 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=B0=8F=E8=B1=86=E8=B1=86=E5=AD=A6=E9=95=BF?=
 <1342181530@qq.com>
Date: Fri, 28 Mar 2025 03:27:04 +0800
Subject: [PATCH 29/30] community: add netmind integration (#30149)

Co-authored-by: yanrujing <rujing.yan@protagonist-ai.com>
Co-authored-by: ccurme <chester.curme@gmail.com>
---
 docs/docs/integrations/chat/netmind.ipynb     | 326 ++++++++++++++++++
 .../docs/integrations/providers/netmind.ipynb |  73 ++++
 .../integrations/text_embedding/netmind.ipynb | 323 +++++++++++++++++
 libs/packages.yml                             |   3 +
 4 files changed, 725 insertions(+)
 create mode 100644 docs/docs/integrations/chat/netmind.ipynb
 create mode 100644 docs/docs/integrations/providers/netmind.ipynb
 create mode 100644 docs/docs/integrations/text_embedding/netmind.ipynb

diff --git a/docs/docs/integrations/chat/netmind.ipynb b/docs/docs/integrations/chat/netmind.ipynb
new file mode 100644
index 00000000000..50402e976c6
--- /dev/null
+++ b/docs/docs/integrations/chat/netmind.ipynb
@@ -0,0 +1,326 @@
+{
+ "cells": [
+  {
+   "cell_type": "raw",
+   "id": "afaf8039",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "sidebar_label: Netmind\n",
+    "---"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e49f1e0d",
+   "metadata": {},
+   "source": [
+    "# ChatNetmind\n",
+    "\n",
+    "This will help you getting started with Netmind [chat models](https://www.netmind.ai/). For detailed documentation of all ChatNetmind features and configurations head to the [API reference](https://github.com/protagolabs/langchain-netmind).\n",
+    "\n",
+    "-  See https://www.netmind.ai/ for an example.\n",
+    "\n",
+    "## Overview\n",
+    "### Integration details\n",
+    "\n",
+    "| Class                                                                                        | Package | Local | Serializable | [JS support](https://js.langchain.com/docs/integrations/chat/) | Package downloads | Package latest |\n",
+    "|:---------------------------------------------------------------------------------------------| :--- |:-----:|:------------:|:--------------------------------------------------------------:| :---: | :---: |\n",
+    "| [ChatNetmind](https://python.langchain.com/api_reference/) | [langchain-netmind](https://python.langchain.com/api_reference/) |   ✅   |      ❌       |                               ❌                                | ![PyPI - Downloads](https://img.shields.io/pypi/dm/langchain-netmind?style=flat-square&label=%20) | ![PyPI - Version](https://img.shields.io/pypi/v/langchain-netmind?style=flat-square&label=%20) |\n",
+    "\n",
+    "### Model features\n",
+    "| [Tool calling](../../how_to/tool_calling.ipynb) | [Structured output](../../how_to/structured_output.ipynb) | JSON mode | [Image input](../../how_to/multimodal_inputs.ipynb) | Audio input | Video input | [Token-level streaming](../../how_to/chat_streaming.ipynb) | Native async | [Token usage](../../how_to/chat_token_usage_tracking.ipynb) | [Logprobs](../../how_to/logprobs.ipynb) |\n",
+    "|:-----------------------------------------------:|:---------------------------------------------------------:|:---------:|:---------------------------------------------------:|:-----------:|:-----------:|:----------------------------------------------------------:|:------------:|:-----------------------------------------------------------:|:---------------------------------------:|\n",
+    "|                        ✅                        |                             ✅                             |     ✅     |                          ❌                          |      ❌      |      ❌      |                             ✅                              |      ✅       |                              ✅                              |                    ✅                    | \n",
+    "\n",
+    "## Setup\n",
+    "\n",
+    "To access Netmind models you'll need to create a/an Netmind account, get an API key, and install the `langchain-netmind` integration package.\n",
+    "\n",
+    "### Credentials\n",
+    "\n",
+    "Head to https://www.netmind.ai/ to sign up to Netmind and generate an API key. Once you've done this set the NETMIND_API_KEY environment variable:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "id": "433e8d2b-9519-4b49-b2c4-7ab65b046c94",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-20T02:00:30.732333Z",
+     "start_time": "2025-03-20T02:00:28.384208Z"
+    }
+   },
+   "source": [
+    "import getpass\n",
+    "import os\n",
+    "\n",
+    "if not os.getenv(\"NETMIND_API_KEY\"):\n",
+    "    os.environ[\"NETMIND_API_KEY\"] = getpass.getpass(\"Enter your Netmind API key: \")"
+   ],
+   "outputs": [],
+   "execution_count": 1
+  },
+  {
+   "cell_type": "markdown",
+   "id": "72ee0c4b-9764-423a-9dbf-95129e185210",
+   "metadata": {},
+   "source": [
+    "If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "id": "a15d341e-3e26-4ca3-830b-5aab30ed66de",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-20T02:00:33.421446Z",
+     "start_time": "2025-03-20T02:00:33.419081Z"
+    }
+   },
+   "source": [
+    "# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
+    "# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
+   ],
+   "outputs": [],
+   "execution_count": 2
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0730d6a1-c893-4840-9817-5e5251676d5d",
+   "metadata": {},
+   "source": [
+    "### Installation\n",
+    "\n",
+    "The LangChain Netmind integration lives in the `langchain-netmind` package:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "id": "652d6238-1f87-422a-b135-f5abbb8652fc",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-20T02:00:35.923300Z",
+     "start_time": "2025-03-20T02:00:34.505928Z"
+    }
+   },
+   "source": [
+    "%pip install -qU langchain-netmind"
+   ],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r\n",
+      "\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m A new release of pip is available: \u001B[0m\u001B[31;49m24.0\u001B[0m\u001B[39;49m -> \u001B[0m\u001B[32;49m25.0.1\u001B[0m\r\n",
+      "\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m To update, run: \u001B[0m\u001B[32;49mpip install --upgrade pip\u001B[0m\r\n",
+      "Note: you may need to restart the kernel to use updated packages.\n"
+     ]
+    }
+   ],
+   "execution_count": 3
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a38cde65-254d-4219-a441-068766c0d4b5",
+   "metadata": {},
+   "source": [
+    "## Instantiation\n",
+    "\n",
+    "Now we can instantiate our model object and generate chat completions:\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "id": "cb09c344-1836-4e0c-acf8-11d13ac1dbae",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-20T02:01:08.007764Z",
+     "start_time": "2025-03-20T02:01:07.391951Z"
+    }
+   },
+   "source": [
+    "from langchain_netmind import ChatNetmind\n",
+    "\n",
+    "llm = ChatNetmind(\n",
+    "    model=\"deepseek-ai/DeepSeek-V3\",\n",
+    "    temperature=0,\n",
+    "    max_tokens=None,\n",
+    "    timeout=None,\n",
+    "    max_retries=2,\n",
+    "    # other params...\n",
+    ")"
+   ],
+   "outputs": [],
+   "execution_count": 4
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2b4f3e15",
+   "metadata": {},
+   "source": "## Invocation\n"
+  },
+  {
+   "cell_type": "code",
+   "id": "62e0dbc3",
+   "metadata": {
+    "tags": [],
+    "ExecuteTime": {
+     "end_time": "2025-03-20T02:01:19.011273Z",
+     "start_time": "2025-03-20T02:01:10.295510Z"
+    }
+   },
+   "source": [
+    "messages = [\n",
+    "    (\n",
+    "        \"system\",\n",
+    "        \"You are a helpful assistant that translates English to French. Translate the user sentence.\",\n",
+    "    ),\n",
+    "    (\"human\", \"I love programming.\"),\n",
+    "]\n",
+    "ai_msg = llm.invoke(messages)\n",
+    "ai_msg"
+   ],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "AIMessage(content=\"J'adore programmer.\", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 13, 'prompt_tokens': 31, 'total_tokens': 44, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'deepseek-ai/DeepSeek-V3', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-ca6c2010-844d-4bf6-baac-6e248491b000-0', usage_metadata={'input_tokens': 31, 'output_tokens': 13, 'total_tokens': 44, 'input_token_details': {}, 'output_token_details': {}})"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "execution_count": 5
+  },
+  {
+   "cell_type": "code",
+   "id": "d86145b3-bfef-46e8-b227-4dda5c9c2705",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-20T02:01:20.240190Z",
+     "start_time": "2025-03-20T02:01:20.238242Z"
+    }
+   },
+   "source": [
+    "print(ai_msg.content)"
+   ],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "J'adore programmer.\n"
+     ]
+    }
+   ],
+   "execution_count": 6
+  },
+  {
+   "cell_type": "markdown",
+   "id": "18e2bfc0-7e78-4528-a73f-499ac150dca8",
+   "metadata": {},
+   "source": [
+    "## Chaining\n",
+    "\n",
+    "We can [chain](/docs/how_to/sequence/) our model with a prompt template like so:\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "id": "e197d1d7-a070-4c96-9f8a-a0e86d046e0b",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-20T02:01:27.456393Z",
+     "start_time": "2025-03-20T02:01:23.993410Z"
+    }
+   },
+   "source": [
+    "from langchain_core.prompts import ChatPromptTemplate\n",
+    "\n",
+    "prompt = ChatPromptTemplate(\n",
+    "    [\n",
+    "        (\n",
+    "            \"system\",\n",
+    "            \"You are a helpful assistant that translates {input_language} to {output_language}.\",\n",
+    "        ),\n",
+    "        (\"human\", \"{input}\"),\n",
+    "    ]\n",
+    ")\n",
+    "\n",
+    "chain = prompt | llm\n",
+    "chain.invoke(\n",
+    "    {\n",
+    "        \"input_language\": \"English\",\n",
+    "        \"output_language\": \"German\",\n",
+    "        \"input\": \"I love programming.\",\n",
+    "    }\n",
+    ")"
+   ],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "AIMessage(content='Ich liebe es zu programmieren.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 26, 'total_tokens': 40, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'deepseek-ai/DeepSeek-V3', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-d63adcc6-53ba-4caa-9a79-78d640b39274-0', usage_metadata={'input_tokens': 26, 'output_tokens': 14, 'total_tokens': 40, 'input_token_details': {}, 'output_token_details': {}})"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "execution_count": 7
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d1ee55bc-ffc8-4cfa-801c-993953a08cfd",
+   "metadata": {},
+   "source": ""
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3a5bb5ca-c3ae-4a58-be67-2cd18574b9a3",
+   "metadata": {},
+   "source": [
+    "## API reference\n",
+    "\n",
+    "For detailed documentation of all ChatNetmind features and configurations head to the API reference:  \n",
+    "* [API reference](https://python.langchain.com/api_reference/)  \n",
+    "* [langchain-netmind](https://github.com/protagolabs/langchain-netmind)  \n",
+    "* [pypi](https://pypi.org/project/langchain-netmind/)"
+   ]
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "outputs": [],
+   "execution_count": null,
+   "source": "",
+   "id": "30f8be8c940bfbf3"
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/docs/integrations/providers/netmind.ipynb b/docs/docs/integrations/providers/netmind.ipynb
new file mode 100644
index 00000000000..fa73d88a596
--- /dev/null
+++ b/docs/docs/integrations/providers/netmind.ipynb
@@ -0,0 +1,73 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Netmind\n",
+    "\n",
+    "[Netmind AI](https://www.netmind.ai/) Build AI Faster, Smarter, and More Affordably.\n",
+    "Train, Fine-tune, Run Inference, and Scale with our Global GPU Network—Your all-in-one AI Engine.\n",
+    "\n",
+    "This example goes over how to use LangChain to interact with Netmind AI models.\n"
+   ]
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": [
+    "## Installation and Setup\n",
+    "\n",
+    "```bash\n",
+    "pip install langchain-netmind\n",
+    "```\n",
+    "\n",
+    "Get an Netmind api key and set it as an environment variable (`NETMIND_API_KEY`).  \n",
+    "Head to https://www.netmind.ai/ to sign up to Netmind and generate an API key. \n",
+    "\n"
+   ]
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": [
+    "## Chat Models\n",
+    "\n",
+    "For more on Netmind chat models, visit the guide [here](/docs/integrations/chat/netmind)"
+   ]
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": [
+    "## Embedding Model\n",
+    "\n",
+    "For more on Netmind embedding models, visit the [guide](/docs/integrations/text_embedding/netmind)\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
diff --git a/docs/docs/integrations/text_embedding/netmind.ipynb b/docs/docs/integrations/text_embedding/netmind.ipynb
new file mode 100644
index 00000000000..ad59fc28590
--- /dev/null
+++ b/docs/docs/integrations/text_embedding/netmind.ipynb
@@ -0,0 +1,323 @@
+{
+ "cells": [
+  {
+   "cell_type": "raw",
+   "id": "afaf8039",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "sidebar_label: Netmind\n",
+    "---"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9a3d6f34",
+   "metadata": {},
+   "source": [
+    "# NetmindEmbeddings\n",
+    "\n",
+    "This will help you get started with Netmind embedding models using LangChain. For detailed documentation on `NetmindEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/).\n",
+    "\n",
+    "## Overview\n",
+    "### Integration details\n",
+    "\n",
+    "| Provider | Package |\n",
+    "|:--------:|:-------:|\n",
+    "| [Netmind](/docs/integrations/providers/netmind/) | [langchain-netmind](https://python.langchain.com/api_reference/) |\n",
+    "\n",
+    "## Setup\n",
+    "\n",
+    "To access Netmind embedding models you'll need to create a/an Netmind account, get an API key, and install the `langchain-netmind` integration package.\n",
+    "\n",
+    "### Credentials\n",
+    "\n",
+    "Head to https://www.netmind.ai/ to sign up to Netmind and generate an API key. Once you've done this set the NETMIND_API_KEY environment variable:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "id": "36521c2a",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-20T01:53:29.982962Z",
+     "start_time": "2025-03-20T01:53:27.764291Z"
+    }
+   },
+   "source": [
+    "import getpass\n",
+    "import os\n",
+    "\n",
+    "if not os.getenv(\"NETMIND_API_KEY\"):\n",
+    "    os.environ[\"NETMIND_API_KEY\"] = getpass.getpass(\"Enter your Netmind API key: \")"
+   ],
+   "outputs": [],
+   "execution_count": 1
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c84fb993",
+   "metadata": {},
+   "source": [
+    "If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "id": "39a4953b",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-20T01:53:32.143687Z",
+     "start_time": "2025-03-20T01:53:32.141858Z"
+    }
+   },
+   "source": [
+    "# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
+    "# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
+   ],
+   "outputs": [],
+   "execution_count": 2
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d9664366",
+   "metadata": {},
+   "source": [
+    "### Installation\n",
+    "\n",
+    "The LangChain Netmind integration lives in the `langchain-netmind` package:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "id": "64853226",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-20T01:53:38.639440Z",
+     "start_time": "2025-03-20T01:53:36.171640Z"
+    }
+   },
+   "source": [
+    "%pip install -qU langchain-netmind"
+   ],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r\n",
+      "\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m A new release of pip is available: \u001B[0m\u001B[31;49m24.0\u001B[0m\u001B[39;49m -> \u001B[0m\u001B[32;49m25.0.1\u001B[0m\r\n",
+      "\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m To update, run: \u001B[0m\u001B[32;49mpip install --upgrade pip\u001B[0m\r\n",
+      "Note: you may need to restart the kernel to use updated packages.\n"
+     ]
+    }
+   ],
+   "execution_count": 3
+  },
+  {
+   "cell_type": "markdown",
+   "id": "45dd1724",
+   "metadata": {},
+   "source": [
+    "## Instantiation\n",
+    "\n",
+    "Now we can instantiate our model object:\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "id": "9ea7a09b",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-20T01:54:31.005334Z",
+     "start_time": "2025-03-20T01:54:30.146876Z"
+    }
+   },
+   "source": [
+    "from langchain_netmind import NetmindEmbeddings\n",
+    "\n",
+    "embeddings = NetmindEmbeddings(\n",
+    "    model=\"nvidia/NV-Embed-v2\",\n",
+    ")"
+   ],
+   "outputs": [],
+   "execution_count": 4
+  },
+  {
+   "cell_type": "markdown",
+   "id": "77d271b6",
+   "metadata": {},
+   "source": [
+    "## Indexing and Retrieval\n",
+    "\n",
+    "Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/).\n",
+    "\n",
+    "Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "id": "d817716b",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-20T01:54:40.963137Z",
+     "start_time": "2025-03-20T01:54:34.500805Z"
+    }
+   },
+   "source": [
+    "# Create a vector store with a sample text\n",
+    "from langchain_core.vectorstores import InMemoryVectorStore\n",
+    "\n",
+    "text = \"LangChain is the framework for building context-aware reasoning applications\"\n",
+    "\n",
+    "vectorstore = InMemoryVectorStore.from_texts(\n",
+    "    [text],\n",
+    "    embedding=embeddings,\n",
+    ")\n",
+    "\n",
+    "# Use the vectorstore as a retriever\n",
+    "retriever = vectorstore.as_retriever()\n",
+    "\n",
+    "# Retrieve the most similar text\n",
+    "retrieved_documents = retriever.invoke(\"What is LangChain?\")\n",
+    "\n",
+    "# show the retrieved document's content\n",
+    "retrieved_documents[0].page_content"
+   ],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'LangChain is the framework for building context-aware reasoning applications'"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "execution_count": 5
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e02b9855",
+   "metadata": {},
+   "source": [
+    "## Direct Usage\n",
+    "\n",
+    "Under the hood, the vectorstore and retriever implementations are calling `embeddings.embed_documents(...)` and `embeddings.embed_query(...)` to create embeddings for the text(s) used in `from_texts` and retrieval `invoke` operations, respectively.\n",
+    "\n",
+    "You can directly call these methods to get embeddings for your own use cases.\n",
+    "\n",
+    "### Embed single texts\n",
+    "\n",
+    "You can embed single texts or documents with `embed_query`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "id": "0d2befcd",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-20T01:54:49.540750Z",
+     "start_time": "2025-03-20T01:54:45.196528Z"
+    }
+   },
+   "source": [
+    "single_vector = embeddings.embed_query(text)\n",
+    "print(str(single_vector)[:100])  # Show the first 100 characters of the vector"
+   ],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[-0.0051240199245512486, -0.01726294495165348, 0.011966848745942116, -0.0018107350915670395, 0.01146\n"
+     ]
+    }
+   ],
+   "execution_count": 6
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1b5a7d03",
+   "metadata": {},
+   "source": [
+    "### Embed multiple texts\n",
+    "\n",
+    "You can embed multiple texts with `embed_documents`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "id": "2f4d6e97",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-20T01:54:57.089847Z",
+     "start_time": "2025-03-20T01:54:52.468719Z"
+    }
+   },
+   "source": [
+    "text2 = (\n",
+    "    \"LangGraph is a library for building stateful, multi-actor applications with LLMs\"\n",
+    ")\n",
+    "two_vectors = embeddings.embed_documents([text, text2])\n",
+    "for vector in two_vectors:\n",
+    "    print(str(vector)[:100])  # Show the first 100 characters of the vector"
+   ],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[-0.0051240199245512486, -0.01726294495165348, 0.011966848745942116, -0.0018107350915670395, 0.01146\n",
+      "[0.022523142397403717, -0.002223758026957512, -0.008578270673751831, -0.006029821466654539, 0.008752\n"
+     ]
+    }
+   ],
+   "execution_count": 7
+  },
+  {
+   "cell_type": "markdown",
+   "id": "98785c12",
+   "metadata": {},
+   "source": [
+    "## API Reference\n",
+    "\n",
+    "For detailed documentation on `NetmindEmbeddings` features and configuration options, please refer to the:  \n",
+    "* [API reference](https://python.langchain.com/api_reference/)  \n",
+    "* [langchain-netmind](https://github.com/protagolabs/langchain-netmind)  \n",
+    "* [pypi](https://pypi.org/project/langchain-netmind/)\n"
+   ]
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "outputs": [],
+   "execution_count": null,
+   "source": "",
+   "id": "adb9e45c34733299"
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/libs/packages.yml b/libs/packages.yml
index d53e7a24ccc..ea4881568b2 100644
--- a/libs/packages.yml
+++ b/libs/packages.yml
@@ -517,6 +517,9 @@ packages:
   repo: OpenGradient/og-langchain
   downloads: 274
   downloads_updated_at: '2025-03-22T21:59:15.663971+00:00'
+- name: langchain-netmind
+  path: .
+  repo: protagolabs/langchain-netmind
 - name: langchain-agentql
   path: langchain
   repo: tinyfish-io/agentql-integrations

From 92189c8b31503c5bbe263f9030d0d70b36a7ee53 Mon Sep 17 00:00:00 2001
From: Philippe PRADOS <github@prados.fr>
Date: Fri, 28 Mar 2025 15:15:40 +0100
Subject: [PATCH 30/30] community[patch]: Handle gray scale images in
 ImageBlobParser (Fixes 30261 and 29586) (#30493)

Fix [29586](https://github.com/langchain-ai/langchain/issues/29586) and
[30261](https://github.com/langchain-ai/langchain/pull/30261)
---
 .../document_loaders/parsers/images.py        |  6 ++-
 .../document_loaders/parsers/test_images.py   | 43 ++++++++++++++++++-
 2 files changed, 46 insertions(+), 3 deletions(-)

diff --git a/libs/community/langchain_community/document_loaders/parsers/images.py b/libs/community/langchain_community/document_loaders/parsers/images.py
index eb3da4d2a11..3d977aae973 100644
--- a/libs/community/langchain_community/document_loaders/parsers/images.py
+++ b/libs/community/langchain_community/document_loaders/parsers/images.py
@@ -53,7 +53,11 @@ class BaseImageBlobParser(BaseBlobParser):
 
         with blob.as_bytes_io() as buf:
             if blob.mimetype == "application/x-npy":
-                img = Img.fromarray(numpy.load(buf))
+                array = numpy.load(buf)
+                if array.ndim == 3 and array.shape[2] == 1:  # Grayscale image
+                    img = Img.fromarray(numpy.squeeze(array, axis=2), mode="L")
+                else:
+                    img = Img.fromarray(array)
             else:
                 img = Img.open(buf)
             content = self._analyze_image(img)
diff --git a/libs/community/tests/integration_tests/document_loaders/parsers/test_images.py b/libs/community/tests/integration_tests/document_loaders/parsers/test_images.py
index e6d71fae692..e5c6e372b8c 100644
--- a/libs/community/tests/integration_tests/document_loaders/parsers/test_images.py
+++ b/libs/community/tests/integration_tests/document_loaders/parsers/test_images.py
@@ -1,7 +1,9 @@
 import re
+from io import BytesIO
 from pathlib import Path
 from typing import Any, Type
 
+import numpy as np
 import pytest
 from langchain_core.documents.base import Blob
 from langchain_core.language_models import FakeMessagesListChatModel
@@ -18,12 +20,13 @@ building_image = Blob.from_path(path_base / "examples/building.jpg")
 text_image = Blob.from_path(path_base / "examples/text.png")
 page_image = Blob.from_path(path_base / "examples/page.png")
 
+_re_in_image = r"(?ms).*MAKE.*TEXT.*STAND.*OUT.*FROM.*"
+
 
 @pytest.mark.parametrize(
     "blob,body",
     [
-        (building_image, ""),
-        (text_image, r"(?ms).*MAKE.*TEXT.*STAND.*OUT.*FROM.*BACKGROUNDS.*"),
+        (Blob.from_path(path_base / "examples/text-gray.png"), _re_in_image),
     ],
 )
 @pytest.mark.parametrize(
@@ -58,3 +61,39 @@ def test_image_parser_with_differents_files(
     documents = list(blob_loader(**kw).lazy_parse(blob))
     assert len(documents) == 1
     assert re.compile(body).match(documents[0].page_content)
+
+
+@pytest.mark.parametrize(
+    "blob_loader,kw",
+    [
+        (RapidOCRBlobParser, {}),
+        (TesseractBlobParser, {}),
+        (
+            LLMImageBlobParser,
+            {
+                "model": FakeMessagesListChatModel(
+                    responses=[
+                        ChatMessage(
+                            id="ai1",
+                            role="system",
+                            content="A building. MAKE TEXT STAND  OUT FROM BACKGROUNDS",
+                        ),
+                    ]
+                )
+            },
+        ),
+    ],
+)
+def test_image_parser_with_numpy(
+    blob_loader: Type,
+    kw: dict[str, Any],
+) -> None:
+    gray_image = np.empty(shape=(412, 1652, 1))
+    with BytesIO() as buffer:
+        np.save(buffer, gray_image)
+        buffer.seek(0)
+        npy_bytes = buffer.getvalue()
+
+    blob = Blob.from_data(npy_bytes, mime_type="application/x-npy")
+    documents = list(blob_loader(**kw).lazy_parse(blob))
+    assert len(documents) == 1