Merge branch 'langchain-ai:master' into master

2025-08-11 05:45:01 +00:00 · 2025-07-21 10:30:40 +08:00 · 2025-07-21 10:30:40 +08:00 · ad86d8f066
commit ad86d8f066
parent 452de4c0db 668c084520
8 changed files with 42 additions and 16 deletions
--- a/docs/docs/integrations/text_embedding/google_generative_ai.ipynb
+++ b/docs/docs/integrations/text_embedding/google_generative_ai.ipynb
@ -173,7 +173,7 @@
   "source": [
    "## Indexing and Retrieval\n",
    "\n",
-    "Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/).\n",
+    "Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/rag).\n",
    "\n",
    "Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
   ]
--- a/docs/src/theme/FeatureTables.js
+++ b/docs/src/theme/FeatureTables.js
@ -322,12 +322,12 @@ const FEATURE_TABLES = {
            {
                name: "VertexAILLM",
                link: "google_vertexai",
-                package: "langchain-google_vertexai",
+                package: "langchain-google-vertexai",
                apiLink: "https://python.langchain.com/api_reference/google_vertexai/llms/langchain_google_vertexai.llms.VertexAI.html"
            },
            {
                name: "NVIDIA",
-                link: "NVIDIA",
+                link: "nvidia_ai_endpoints",
                package: "langchain-nvidia",
                apiLink: "https://python.langchain.com/api_reference/nvidia_ai_endpoints/llm/langchain_nvidia_ai_endpoints.llm.NVIDIA.html"
            },
--- a/libs/core/langchain_core/language_models/fake_chat_models.py
+++ b/libs/core/langchain_core/language_models/fake_chat_models.py
@ -36,6 +36,8 @@ class FakeMessagesListChatModel(BaseChatModel):
        run_manager: Optional[CallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> ChatResult:
+        if self.sleep is not None:
+            time.sleep(self.sleep)
        response = self.responses[self.i]
        if self.i < len(self.responses) - 1:
            self.i += 1
@ -61,9 +63,9 @@ class FakeListChatModel(SimpleChatModel):
    """List of responses to **cycle** through in order."""
    sleep: Optional[float] = None
    i: int = 0
-    """List of responses to **cycle** through in order."""
-    error_on_chunk_number: Optional[int] = None
    """Internally incremented after every model invocation."""
+    error_on_chunk_number: Optional[int] = None
+    """If set, raise an error on the specified chunk number during streaming."""

    @property
    @override
@ -79,6 +81,8 @@ class FakeListChatModel(SimpleChatModel):
        **kwargs: Any,
    ) -> str:
        """First try to lookup in queries, else return 'foo' or 'bar'."""
+        if self.sleep is not None:
+            time.sleep(self.sleep)
        response = self.responses[self.i]
        if self.i < len(self.responses) - 1:
            self.i += 1
--- a/libs/core/langchain_core/rate_limiters.py
+++ b/libs/core/langchain_core/rate_limiters.py
@ -146,13 +146,12 @@ class InMemoryRateLimiter(BaseRateLimiter):

        Args:
            requests_per_second: The number of tokens to add per second to the bucket.
-                Must be at least 1. The tokens represent "credit" that can be used
-                to make requests.
+                The tokens represent "credit" that can be used to make requests.
            check_every_n_seconds: check whether the tokens are available
                every this many seconds. Can be a float to represent
                fractions of a second.
            max_bucket_size: The maximum number of tokens that can be in the bucket.
-                This is used to prevent bursts of requests.
+                Must be at least 1. Used to prevent bursts of requests.
        """
        # Number of requests that we can make per second.
        self.requests_per_second = requests_per_second
--- a/libs/core/langchain_core/utils/mustache.py
+++ b/libs/core/langchain_core/utils/mustache.py
@ -150,6 +150,11 @@ def parse_tag(template: str, l_del: str, r_del: str) -> tuple[tuple[str, str], s
        msg = f"unclosed tag at line {_CURRENT_LINE}"
        raise ChevronError(msg) from e

+    # Check for empty tags
+    if not tag.strip():
+        msg = f"empty tag at line {_CURRENT_LINE}"
+        raise ChevronError(msg)
+
    # Find the type meaning of the first character
    tag_type = tag_types.get(tag[0], "variable")

--- a/libs/core/tests/unit_tests/fake/test_fake_chat_model.py
+++ b/libs/core/tests/unit_tests/fake/test_fake_chat_model.py
@ -1,5 +1,6 @@
 """Tests for verifying that testing utility code works as expected."""

+import time
 from itertools import cycle
 from typing import Any, Optional, Union
 from uuid import UUID
@ -9,10 +10,11 @@ from typing_extensions import override
 from langchain_core.callbacks.base import AsyncCallbackHandler
 from langchain_core.language_models import (
    FakeListChatModel,
+    FakeMessagesListChatModel,
    GenericFakeChatModel,
    ParrotFakeChatModel,
 )
-from langchain_core.messages import AIMessage, AIMessageChunk, BaseMessage
+from langchain_core.messages import AIMessage, AIMessageChunk, BaseMessage, HumanMessage
 from langchain_core.outputs import ChatGenerationChunk, GenerationChunk
 from tests.unit_tests.stubs import (
    _any_id_ai_message,
@ -230,3 +232,18 @@ def test_fake_list_chat_model_batch() -> None:
        fake = FakeListChatModel(responses=["a", "b", "c"])
        resp = fake.batch(["1", "2", "3"])
        assert resp == expected
+
+
+def test_fake_messages_list_chat_model_sleep_delay() -> None:
+    sleep_time = 0.1
+    model = FakeMessagesListChatModel(
+        responses=[AIMessage(content="A"), AIMessage(content="B")],
+        sleep=sleep_time,
+    )
+    messages = [HumanMessage(content="C")]
+
+    start = time.time()
+    model.invoke(messages)
+    elapsed = time.time() - start
+
+    assert elapsed >= sleep_time
--- a/libs/core/tests/unit_tests/prompts/test_structured.py
+++ b/libs/core/tests/unit_tests/prompts/test_structured.py
@ -2,6 +2,7 @@ from functools import partial
 from inspect import isclass
 from typing import Any, Union, cast

+import pytest
 from pydantic import BaseModel

 from langchain_core.language_models import FakeListChatModel
@ -10,6 +11,7 @@ from langchain_core.load.load import loads
 from langchain_core.messages import HumanMessage
 from langchain_core.prompts.structured import StructuredPrompt
 from langchain_core.runnables.base import Runnable, RunnableLambda
+from langchain_core.utils.mustache import ChevronError
 from langchain_core.utils.pydantic import is_basemodel_subclass


@ -128,3 +130,8 @@ def test_structured_prompt_template_format() -> None:
    assert prompt.invoke({"person": {"name": "foo"}}).to_messages() == [
        HumanMessage("hi foo")
    ]
+
+
+def test_structured_prompt_template_empty_vars() -> None:
+    with pytest.raises(ChevronError, match="empty tag"):
+        StructuredPrompt([("human", "hi {{}}")], schema={}, template_format="mustache")
--- a/libs/partners/huggingface/tests/integration_tests/test_standard.py
+++ b/libs/partners/huggingface/tests/integration_tests/test_standard.py
@ -16,7 +16,7 @@ class TestHuggingFaceEndpoint(ChatModelIntegrationTests):
    @property
    def chat_model_params(self) -> dict:
        llm = HuggingFaceEndpoint(  # type: ignore[call-arg]
-            repo_id="Qwen/Qwen2.5-72B-Instruct",
+            repo_id="meta-llama/Llama-4-Maverick-17B-128E-Instruct",
            task="conversational",
            provider="fireworks-ai",
            temperature=0,
@ -59,12 +59,6 @@ class TestHuggingFaceEndpoint(ChatModelIntegrationTests):
            model, my_adder_tool=my_adder_tool
        )

-    @pytest.mark.xfail(reason=("Not implemented"))
-    def test_structured_few_shot_examples(
-        self, model: BaseChatModel, my_adder_tool: BaseTool
-    ) -> None:
-        super().test_structured_few_shot_examples(model, my_adder_tool=my_adder_tool)
-
    @property
    def has_tool_choice(self) -> bool:
        return False