diff --git a/docs/docs/integrations/text_embedding/google_generative_ai.ipynb b/docs/docs/integrations/text_embedding/google_generative_ai.ipynb
index 5e743279a23..26a022c8f77 100644
--- a/docs/docs/integrations/text_embedding/google_generative_ai.ipynb
+++ b/docs/docs/integrations/text_embedding/google_generative_ai.ipynb
@@ -173,7 +173,7 @@
    "source": [
     "## Indexing and Retrieval\n",
     "\n",
-    "Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/).\n",
+    "Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/rag).\n",
     "\n",
     "Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
    ]
diff --git a/docs/src/theme/FeatureTables.js b/docs/src/theme/FeatureTables.js
index d1b688d2f84..215c72481eb 100644
--- a/docs/src/theme/FeatureTables.js
+++ b/docs/src/theme/FeatureTables.js
@@ -322,12 +322,12 @@ const FEATURE_TABLES = {
             {
                 name: "VertexAILLM",
                 link: "google_vertexai",
-                package: "langchain-google_vertexai",
+                package: "langchain-google-vertexai",
                 apiLink: "https://python.langchain.com/api_reference/google_vertexai/llms/langchain_google_vertexai.llms.VertexAI.html"
             },
             {
                 name: "NVIDIA",
-                link: "NVIDIA",
+                link: "nvidia_ai_endpoints",
                 package: "langchain-nvidia",
                 apiLink: "https://python.langchain.com/api_reference/nvidia_ai_endpoints/llm/langchain_nvidia_ai_endpoints.llm.NVIDIA.html"
             },
diff --git a/libs/core/langchain_core/language_models/fake_chat_models.py b/libs/core/langchain_core/language_models/fake_chat_models.py
index a1cdd49a7b3..184a4fcb154 100644
--- a/libs/core/langchain_core/language_models/fake_chat_models.py
+++ b/libs/core/langchain_core/language_models/fake_chat_models.py
@@ -36,6 +36,8 @@ class FakeMessagesListChatModel(BaseChatModel):
         run_manager: Optional[CallbackManagerForLLMRun] = None,
         **kwargs: Any,
     ) -> ChatResult:
+        if self.sleep is not None:
+            time.sleep(self.sleep)
         response = self.responses[self.i]
         if self.i < len(self.responses) - 1:
             self.i += 1
@@ -61,9 +63,9 @@ class FakeListChatModel(SimpleChatModel):
     """List of responses to **cycle** through in order."""
     sleep: Optional[float] = None
     i: int = 0
-    """List of responses to **cycle** through in order."""
-    error_on_chunk_number: Optional[int] = None
     """Internally incremented after every model invocation."""
+    error_on_chunk_number: Optional[int] = None
+    """If set, raise an error on the specified chunk number during streaming."""
 
     @property
     @override
@@ -79,6 +81,8 @@ class FakeListChatModel(SimpleChatModel):
         **kwargs: Any,
     ) -> str:
         """First try to lookup in queries, else return 'foo' or 'bar'."""
+        if self.sleep is not None:
+            time.sleep(self.sleep)
         response = self.responses[self.i]
         if self.i < len(self.responses) - 1:
             self.i += 1
diff --git a/libs/core/langchain_core/rate_limiters.py b/libs/core/langchain_core/rate_limiters.py
index 952bdaaf8ad..cf5c61d3ea7 100644
--- a/libs/core/langchain_core/rate_limiters.py
+++ b/libs/core/langchain_core/rate_limiters.py
@@ -146,13 +146,12 @@ class InMemoryRateLimiter(BaseRateLimiter):
 
         Args:
             requests_per_second: The number of tokens to add per second to the bucket.
-                Must be at least 1. The tokens represent "credit" that can be used
-                to make requests.
+                The tokens represent "credit" that can be used to make requests.
             check_every_n_seconds: check whether the tokens are available
                 every this many seconds. Can be a float to represent
                 fractions of a second.
             max_bucket_size: The maximum number of tokens that can be in the bucket.
-                This is used to prevent bursts of requests.
+                Must be at least 1. Used to prevent bursts of requests.
         """
         # Number of requests that we can make per second.
         self.requests_per_second = requests_per_second
diff --git a/libs/core/langchain_core/utils/mustache.py b/libs/core/langchain_core/utils/mustache.py
index be0739568c2..12cab8da0a3 100644
--- a/libs/core/langchain_core/utils/mustache.py
+++ b/libs/core/langchain_core/utils/mustache.py
@@ -150,6 +150,11 @@ def parse_tag(template: str, l_del: str, r_del: str) -> tuple[tuple[str, str], s
         msg = f"unclosed tag at line {_CURRENT_LINE}"
         raise ChevronError(msg) from e
 
+    # Check for empty tags
+    if not tag.strip():
+        msg = f"empty tag at line {_CURRENT_LINE}"
+        raise ChevronError(msg)
+
     # Find the type meaning of the first character
     tag_type = tag_types.get(tag[0], "variable")
 
diff --git a/libs/core/tests/unit_tests/fake/test_fake_chat_model.py b/libs/core/tests/unit_tests/fake/test_fake_chat_model.py
index 7500e1640ac..ce262797535 100644
--- a/libs/core/tests/unit_tests/fake/test_fake_chat_model.py
+++ b/libs/core/tests/unit_tests/fake/test_fake_chat_model.py
@@ -1,5 +1,6 @@
 """Tests for verifying that testing utility code works as expected."""
 
+import time
 from itertools import cycle
 from typing import Any, Optional, Union
 from uuid import UUID
@@ -9,10 +10,11 @@ from typing_extensions import override
 from langchain_core.callbacks.base import AsyncCallbackHandler
 from langchain_core.language_models import (
     FakeListChatModel,
+    FakeMessagesListChatModel,
     GenericFakeChatModel,
     ParrotFakeChatModel,
 )
-from langchain_core.messages import AIMessage, AIMessageChunk, BaseMessage
+from langchain_core.messages import AIMessage, AIMessageChunk, BaseMessage, HumanMessage
 from langchain_core.outputs import ChatGenerationChunk, GenerationChunk
 from tests.unit_tests.stubs import (
     _any_id_ai_message,
@@ -230,3 +232,18 @@ def test_fake_list_chat_model_batch() -> None:
         fake = FakeListChatModel(responses=["a", "b", "c"])
         resp = fake.batch(["1", "2", "3"])
         assert resp == expected
+
+
+def test_fake_messages_list_chat_model_sleep_delay() -> None:
+    sleep_time = 0.1
+    model = FakeMessagesListChatModel(
+        responses=[AIMessage(content="A"), AIMessage(content="B")],
+        sleep=sleep_time,
+    )
+    messages = [HumanMessage(content="C")]
+
+    start = time.time()
+    model.invoke(messages)
+    elapsed = time.time() - start
+
+    assert elapsed >= sleep_time
diff --git a/libs/core/tests/unit_tests/prompts/test_structured.py b/libs/core/tests/unit_tests/prompts/test_structured.py
index 0b74b37cc6f..fda2d00b367 100644
--- a/libs/core/tests/unit_tests/prompts/test_structured.py
+++ b/libs/core/tests/unit_tests/prompts/test_structured.py
@@ -2,6 +2,7 @@ from functools import partial
 from inspect import isclass
 from typing import Any, Union, cast
 
+import pytest
 from pydantic import BaseModel
 
 from langchain_core.language_models import FakeListChatModel
@@ -10,6 +11,7 @@ from langchain_core.load.load import loads
 from langchain_core.messages import HumanMessage
 from langchain_core.prompts.structured import StructuredPrompt
 from langchain_core.runnables.base import Runnable, RunnableLambda
+from langchain_core.utils.mustache import ChevronError
 from langchain_core.utils.pydantic import is_basemodel_subclass
 
 
@@ -128,3 +130,8 @@ def test_structured_prompt_template_format() -> None:
     assert prompt.invoke({"person": {"name": "foo"}}).to_messages() == [
         HumanMessage("hi foo")
     ]
+
+
+def test_structured_prompt_template_empty_vars() -> None:
+    with pytest.raises(ChevronError, match="empty tag"):
+        StructuredPrompt([("human", "hi {{}}")], schema={}, template_format="mustache")
diff --git a/libs/partners/huggingface/tests/integration_tests/test_standard.py b/libs/partners/huggingface/tests/integration_tests/test_standard.py
index 27f5c2368a7..e3f0cacf706 100644
--- a/libs/partners/huggingface/tests/integration_tests/test_standard.py
+++ b/libs/partners/huggingface/tests/integration_tests/test_standard.py
@@ -16,7 +16,7 @@ class TestHuggingFaceEndpoint(ChatModelIntegrationTests):
     @property
     def chat_model_params(self) -> dict:
         llm = HuggingFaceEndpoint(  # type: ignore[call-arg]
-            repo_id="Qwen/Qwen2.5-72B-Instruct",
+            repo_id="meta-llama/Llama-4-Maverick-17B-128E-Instruct",
             task="conversational",
             provider="fireworks-ai",
             temperature=0,
@@ -59,12 +59,6 @@ class TestHuggingFaceEndpoint(ChatModelIntegrationTests):
             model, my_adder_tool=my_adder_tool
         )
 
-    @pytest.mark.xfail(reason=("Not implemented"))
-    def test_structured_few_shot_examples(
-        self, model: BaseChatModel, my_adder_tool: BaseTool
-    ) -> None:
-        super().test_structured_few_shot_examples(model, my_adder_tool=my_adder_tool)
-
     @property
     def has_tool_choice(self) -> bool:
         return False