From 72a0f425ecdaf3e3f1d7f73213900f6a508613c3 Mon Sep 17 00:00:00 2001
From: Kanav Bansal <13186335+bansalkanav@users.noreply.github.com>
Date: Fri, 18 Jul 2025 19:15:28 +0530
Subject: [PATCH 1/8] docs(docs): correct package name from
 langchain-google_vertexai to langchain-google-vertexai for VertexAILLM
 (#32095)

- **Description:** This PR updates the `package` field for the VertexAI
integration in the documentation metadata. The original value was
`langchain-google_vertexai`, which has been corrected to
`langchain-google-vertexai` to reflect the actual package name used in
PyPI and LangChain integrations.
  - **Issue:** N/A
  - **Dependencies:** None
  - **Twitter handle:** N/A
---
 docs/src/theme/FeatureTables.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/src/theme/FeatureTables.js b/docs/src/theme/FeatureTables.js
index d1b688d2f84..24110bb9cd7 100644
--- a/docs/src/theme/FeatureTables.js
+++ b/docs/src/theme/FeatureTables.js
@@ -322,7 +322,7 @@ const FEATURE_TABLES = {
             {
                 name: "VertexAILLM",
                 link: "google_vertexai",
-                package: "langchain-google_vertexai",
+                package: "langchain-google-vertexai",
                 apiLink: "https://python.langchain.com/api_reference/google_vertexai/llms/langchain_google_vertexai.llms.VertexAI.html"
             },
             {

From 50a12a7ee54711e1f7f4ec82231812652fc71e27 Mon Sep 17 00:00:00 2001
From: Kanav Bansal <13186335+bansalkanav@users.noreply.github.com>
Date: Fri, 18 Jul 2025 19:30:49 +0530
Subject: [PATCH 2/8] fix(docs): fix broken link in VertexAILLM and NVIDIA LLM
 integrations (#32096)

## **Description:**
This PR updates the `link` values for the following integration metadata
entries:

1. **VertexAILLM**
   - Changed from: `google_vertexai`
   - To: `google_vertex_ai_palm`
2. **NVIDIA**
   - Changed from: `NVIDIA`
   - To: `nvidia_ai_endpoints`

These changes ensure that the documentation links correspond to the
correct integration paths, improving documentation navigation and
consistency with the integration structure.

## **Issue:** N/A
## **Dependencies:** None
## **Twitter handle:** N/A

Co-authored-by: Mason Daugherty <mason@langchain.dev>
---
 docs/src/theme/FeatureTables.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/src/theme/FeatureTables.js b/docs/src/theme/FeatureTables.js
index 24110bb9cd7..215c72481eb 100644
--- a/docs/src/theme/FeatureTables.js
+++ b/docs/src/theme/FeatureTables.js
@@ -327,7 +327,7 @@ const FEATURE_TABLES = {
             },
             {
                 name: "NVIDIA",
-                link: "NVIDIA",
+                link: "nvidia_ai_endpoints",
                 package: "langchain-nvidia",
                 apiLink: "https://python.langchain.com/api_reference/nvidia_ai_endpoints/llm/langchain_nvidia_ai_endpoints.llm.NVIDIA.html"
             },

From 427d2d6397c7263a9ed5e40c610f468f6c563f1a Mon Sep 17 00:00:00 2001
From: Gurram Siddarth Reddy
 <73605274+siddarthreddygsr@users.noreply.github.com>
Date: Sat, 19 Jul 2025 01:24:28 +0530
Subject: [PATCH 3/8] fix(core): implement sleep delay in
 FakeMessagesListChatModel `_generate` (#32014)

implement sleep delay in FakeMessagesListChatModel._generate so the
sleep parameter is respected, matching the documented behavior. This
adds artificial latency between responses for testing purposes.

Issue: closes
[#31974](https://github.com/langchain-ai/langchain/issues/31974)
following
[docs](https://python.langchain.com/api_reference/core/language_models/langchain_core.language_models.fake_chat_models.FakeMessagesListChatModel.html#langchain_core.language_models.fake_chat_models.FakeMessagesListChatModel.sleep)

Dependencies: none

Twitter handle: [@siddarthreddyg2](https://x.com/siddarthreddyg2)

---------

Signed-off-by: Siddarthreddygsr <siddarthreddygsr@gmail.com>
---
 .../language_models/fake_chat_models.py       |  2 ++
 .../unit_tests/fake/test_fake_chat_model.py   | 19 ++++++++++++++++++-
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/libs/core/langchain_core/language_models/fake_chat_models.py b/libs/core/langchain_core/language_models/fake_chat_models.py
index a1cdd49a7b3..b8dd7325242 100644
--- a/libs/core/langchain_core/language_models/fake_chat_models.py
+++ b/libs/core/langchain_core/language_models/fake_chat_models.py
@@ -36,6 +36,8 @@ class FakeMessagesListChatModel(BaseChatModel):
         run_manager: Optional[CallbackManagerForLLMRun] = None,
         **kwargs: Any,
     ) -> ChatResult:
+        if self.sleep is not None:
+            time.sleep(self.sleep)
         response = self.responses[self.i]
         if self.i < len(self.responses) - 1:
             self.i += 1
diff --git a/libs/core/tests/unit_tests/fake/test_fake_chat_model.py b/libs/core/tests/unit_tests/fake/test_fake_chat_model.py
index 7500e1640ac..ce262797535 100644
--- a/libs/core/tests/unit_tests/fake/test_fake_chat_model.py
+++ b/libs/core/tests/unit_tests/fake/test_fake_chat_model.py
@@ -1,5 +1,6 @@
 """Tests for verifying that testing utility code works as expected."""
 
+import time
 from itertools import cycle
 from typing import Any, Optional, Union
 from uuid import UUID
@@ -9,10 +10,11 @@ from typing_extensions import override
 from langchain_core.callbacks.base import AsyncCallbackHandler
 from langchain_core.language_models import (
     FakeListChatModel,
+    FakeMessagesListChatModel,
     GenericFakeChatModel,
     ParrotFakeChatModel,
 )
-from langchain_core.messages import AIMessage, AIMessageChunk, BaseMessage
+from langchain_core.messages import AIMessage, AIMessageChunk, BaseMessage, HumanMessage
 from langchain_core.outputs import ChatGenerationChunk, GenerationChunk
 from tests.unit_tests.stubs import (
     _any_id_ai_message,
@@ -230,3 +232,18 @@ def test_fake_list_chat_model_batch() -> None:
         fake = FakeListChatModel(responses=["a", "b", "c"])
         resp = fake.batch(["1", "2", "3"])
         assert resp == expected
+
+
+def test_fake_messages_list_chat_model_sleep_delay() -> None:
+    sleep_time = 0.1
+    model = FakeMessagesListChatModel(
+        responses=[AIMessage(content="A"), AIMessage(content="B")],
+        sleep=sleep_time,
+    )
+    messages = [HumanMessage(content="C")]
+
+    start = time.time()
+    model.invoke(messages)
+    elapsed = time.time() - start
+
+    assert elapsed >= sleep_time

From 98bfd57a76233efddec46394015d2791e2f4dbed Mon Sep 17 00:00:00 2001
From: Isaac Francisco <78627776+isahers1@users.noreply.github.com>
Date: Fri, 18 Jul 2025 14:00:02 -0700
Subject: [PATCH 4/8] fix(core): better error message for empty var names
 (#32073)

Previously, we hit an index out of range error with empty variable names
(accessing tag[0]), now we through a slightly nicer error

---------

Co-authored-by: Mason Daugherty <mason@langchain.dev>
---
 libs/core/langchain_core/utils/mustache.py            | 5 +++++
 libs/core/tests/unit_tests/prompts/test_structured.py | 7 +++++++
 2 files changed, 12 insertions(+)

diff --git a/libs/core/langchain_core/utils/mustache.py b/libs/core/langchain_core/utils/mustache.py
index be0739568c2..12cab8da0a3 100644
--- a/libs/core/langchain_core/utils/mustache.py
+++ b/libs/core/langchain_core/utils/mustache.py
@@ -150,6 +150,11 @@ def parse_tag(template: str, l_del: str, r_del: str) -> tuple[tuple[str, str], s
         msg = f"unclosed tag at line {_CURRENT_LINE}"
         raise ChevronError(msg) from e
 
+    # Check for empty tags
+    if not tag.strip():
+        msg = f"empty tag at line {_CURRENT_LINE}"
+        raise ChevronError(msg)
+
     # Find the type meaning of the first character
     tag_type = tag_types.get(tag[0], "variable")
 
diff --git a/libs/core/tests/unit_tests/prompts/test_structured.py b/libs/core/tests/unit_tests/prompts/test_structured.py
index 0b74b37cc6f..fda2d00b367 100644
--- a/libs/core/tests/unit_tests/prompts/test_structured.py
+++ b/libs/core/tests/unit_tests/prompts/test_structured.py
@@ -2,6 +2,7 @@ from functools import partial
 from inspect import isclass
 from typing import Any, Union, cast
 
+import pytest
 from pydantic import BaseModel
 
 from langchain_core.language_models import FakeListChatModel
@@ -10,6 +11,7 @@ from langchain_core.load.load import loads
 from langchain_core.messages import HumanMessage
 from langchain_core.prompts.structured import StructuredPrompt
 from langchain_core.runnables.base import Runnable, RunnableLambda
+from langchain_core.utils.mustache import ChevronError
 from langchain_core.utils.pydantic import is_basemodel_subclass
 
 
@@ -128,3 +130,8 @@ def test_structured_prompt_template_format() -> None:
     assert prompt.invoke({"person": {"name": "foo"}}).to_messages() == [
         HumanMessage("hi foo")
     ]
+
+
+def test_structured_prompt_template_empty_vars() -> None:
+    with pytest.raises(ChevronError, match="empty tag"):
+        StructuredPrompt([("human", "hi {{}}")], schema={}, template_format="mustache")

From f7d1b1fbb191a6d043a03d3ba6114d0b9a92486f Mon Sep 17 00:00:00 2001
From: Kanav Bansal <13186335+bansalkanav@users.noreply.github.com>
Date: Sun, 20 Jul 2025 02:57:31 +0530
Subject: [PATCH 5/8] docs(docs): update RAG tutorials link to point to correct
 path (#32113)

---
 .../docs/integrations/text_embedding/google_generative_ai.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/docs/integrations/text_embedding/google_generative_ai.ipynb b/docs/docs/integrations/text_embedding/google_generative_ai.ipynb
index 5e743279a23..26a022c8f77 100644
--- a/docs/docs/integrations/text_embedding/google_generative_ai.ipynb
+++ b/docs/docs/integrations/text_embedding/google_generative_ai.ipynb
@@ -173,7 +173,7 @@
    "source": [
     "## Indexing and Retrieval\n",
     "\n",
-    "Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/).\n",
+    "Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/rag).\n",
     "\n",
     "Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
    ]

From 6d71bb83de71e74f5bb77383c1fed121bd72b1b5 Mon Sep 17 00:00:00 2001
From: Yoshi <70424721+yoshihyoda@users.noreply.github.com>
Date: Sat, 19 Jul 2025 14:30:15 -0700
Subject: [PATCH 6/8] fix(core): fix docstrings and add sleep to
 FakeListChatModel._call (#32108)

---
 .../core/langchain_core/language_models/fake_chat_models.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/libs/core/langchain_core/language_models/fake_chat_models.py b/libs/core/langchain_core/language_models/fake_chat_models.py
index b8dd7325242..184a4fcb154 100644
--- a/libs/core/langchain_core/language_models/fake_chat_models.py
+++ b/libs/core/langchain_core/language_models/fake_chat_models.py
@@ -63,9 +63,9 @@ class FakeListChatModel(SimpleChatModel):
     """List of responses to **cycle** through in order."""
     sleep: Optional[float] = None
     i: int = 0
-    """List of responses to **cycle** through in order."""
-    error_on_chunk_number: Optional[int] = None
     """Internally incremented after every model invocation."""
+    error_on_chunk_number: Optional[int] = None
+    """If set, raise an error on the specified chunk number during streaming."""
 
     @property
     @override
@@ -81,6 +81,8 @@ class FakeListChatModel(SimpleChatModel):
         **kwargs: Any,
     ) -> str:
         """First try to lookup in queries, else return 'foo' or 'bar'."""
+        if self.sleep is not None:
+            time.sleep(self.sleep)
         response = self.responses[self.i]
         if self.i < len(self.responses) - 1:
             self.i += 1

From cc076ed89176a00063c42ccfde5b92a9c56ab7a3 Mon Sep 17 00:00:00 2001
From: ccurme <chester.curme@gmail.com>
Date: Sat, 19 Jul 2025 22:50:31 -0300
Subject: [PATCH 7/8] fix(huggingface): update model used in standard tests
 (#32116)

---
 .../huggingface/tests/integration_tests/test_standard.py  | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/libs/partners/huggingface/tests/integration_tests/test_standard.py b/libs/partners/huggingface/tests/integration_tests/test_standard.py
index 27f5c2368a7..e3f0cacf706 100644
--- a/libs/partners/huggingface/tests/integration_tests/test_standard.py
+++ b/libs/partners/huggingface/tests/integration_tests/test_standard.py
@@ -16,7 +16,7 @@ class TestHuggingFaceEndpoint(ChatModelIntegrationTests):
     @property
     def chat_model_params(self) -> dict:
         llm = HuggingFaceEndpoint(  # type: ignore[call-arg]
-            repo_id="Qwen/Qwen2.5-72B-Instruct",
+            repo_id="meta-llama/Llama-4-Maverick-17B-128E-Instruct",
             task="conversational",
             provider="fireworks-ai",
             temperature=0,
@@ -59,12 +59,6 @@ class TestHuggingFaceEndpoint(ChatModelIntegrationTests):
             model, my_adder_tool=my_adder_tool
         )
 
-    @pytest.mark.xfail(reason=("Not implemented"))
-    def test_structured_few_shot_examples(
-        self, model: BaseChatModel, my_adder_tool: BaseTool
-    ) -> None:
-        super().test_structured_few_shot_examples(model, my_adder_tool=my_adder_tool)
-
     @property
     def has_tool_choice(self) -> bool:
         return False

From 668c084520b8215d7137c0cd0c25d9725fc927f2 Mon Sep 17 00:00:00 2001
From: astraszab <40484210+astraszab@users.noreply.github.com>
Date: Sun, 20 Jul 2025 18:28:35 +0000
Subject: [PATCH 8/8] docs(core): move incorrect arg limitation in rate
 limiter's docstring (#32118)

---
 libs/core/langchain_core/rate_limiters.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/libs/core/langchain_core/rate_limiters.py b/libs/core/langchain_core/rate_limiters.py
index 952bdaaf8ad..cf5c61d3ea7 100644
--- a/libs/core/langchain_core/rate_limiters.py
+++ b/libs/core/langchain_core/rate_limiters.py
@@ -146,13 +146,12 @@ class InMemoryRateLimiter(BaseRateLimiter):
 
         Args:
             requests_per_second: The number of tokens to add per second to the bucket.
-                Must be at least 1. The tokens represent "credit" that can be used
-                to make requests.
+                The tokens represent "credit" that can be used to make requests.
             check_every_n_seconds: check whether the tokens are available
                 every this many seconds. Can be a float to represent
                 fractions of a second.
             max_bucket_size: The maximum number of tokens that can be in the bucket.
-                This is used to prevent bursts of requests.
+                Must be at least 1. Used to prevent bursts of requests.
         """
         # Number of requests that we can make per second.
         self.requests_per_second = requests_per_second