diff --git a/docs/docs/integrations/text_embedding/google_generative_ai.ipynb b/docs/docs/integrations/text_embedding/google_generative_ai.ipynb index 5e743279a23..26a022c8f77 100644 --- a/docs/docs/integrations/text_embedding/google_generative_ai.ipynb +++ b/docs/docs/integrations/text_embedding/google_generative_ai.ipynb @@ -173,7 +173,7 @@ "source": [ "## Indexing and Retrieval\n", "\n", - "Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/).\n", + "Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/rag).\n", "\n", "Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`." ] diff --git a/docs/src/theme/FeatureTables.js b/docs/src/theme/FeatureTables.js index d1b688d2f84..215c72481eb 100644 --- a/docs/src/theme/FeatureTables.js +++ b/docs/src/theme/FeatureTables.js @@ -322,12 +322,12 @@ const FEATURE_TABLES = { { name: "VertexAILLM", link: "google_vertexai", - package: "langchain-google_vertexai", + package: "langchain-google-vertexai", apiLink: "https://python.langchain.com/api_reference/google_vertexai/llms/langchain_google_vertexai.llms.VertexAI.html" }, { name: "NVIDIA", - link: "NVIDIA", + link: "nvidia_ai_endpoints", package: "langchain-nvidia", apiLink: "https://python.langchain.com/api_reference/nvidia_ai_endpoints/llm/langchain_nvidia_ai_endpoints.llm.NVIDIA.html" }, diff --git a/libs/core/langchain_core/language_models/fake_chat_models.py b/libs/core/langchain_core/language_models/fake_chat_models.py index a1cdd49a7b3..184a4fcb154 100644 --- a/libs/core/langchain_core/language_models/fake_chat_models.py +++ b/libs/core/langchain_core/language_models/fake_chat_models.py @@ -36,6 +36,8 @@ class FakeMessagesListChatModel(BaseChatModel): run_manager: Optional[CallbackManagerForLLMRun] = None, **kwargs: Any, ) -> ChatResult: + if self.sleep is not None: + time.sleep(self.sleep) response = self.responses[self.i] if self.i < len(self.responses) - 1: self.i += 1 @@ -61,9 +63,9 @@ class FakeListChatModel(SimpleChatModel): """List of responses to **cycle** through in order.""" sleep: Optional[float] = None i: int = 0 - """List of responses to **cycle** through in order.""" - error_on_chunk_number: Optional[int] = None """Internally incremented after every model invocation.""" + error_on_chunk_number: Optional[int] = None + """If set, raise an error on the specified chunk number during streaming.""" @property @override @@ -79,6 +81,8 @@ class FakeListChatModel(SimpleChatModel): **kwargs: Any, ) -> str: """First try to lookup in queries, else return 'foo' or 'bar'.""" + if self.sleep is not None: + time.sleep(self.sleep) response = self.responses[self.i] if self.i < len(self.responses) - 1: self.i += 1 diff --git a/libs/core/langchain_core/rate_limiters.py b/libs/core/langchain_core/rate_limiters.py index 952bdaaf8ad..cf5c61d3ea7 100644 --- a/libs/core/langchain_core/rate_limiters.py +++ b/libs/core/langchain_core/rate_limiters.py @@ -146,13 +146,12 @@ class InMemoryRateLimiter(BaseRateLimiter): Args: requests_per_second: The number of tokens to add per second to the bucket. - Must be at least 1. The tokens represent "credit" that can be used - to make requests. + The tokens represent "credit" that can be used to make requests. check_every_n_seconds: check whether the tokens are available every this many seconds. Can be a float to represent fractions of a second. max_bucket_size: The maximum number of tokens that can be in the bucket. - This is used to prevent bursts of requests. + Must be at least 1. Used to prevent bursts of requests. """ # Number of requests that we can make per second. self.requests_per_second = requests_per_second diff --git a/libs/core/langchain_core/utils/mustache.py b/libs/core/langchain_core/utils/mustache.py index be0739568c2..12cab8da0a3 100644 --- a/libs/core/langchain_core/utils/mustache.py +++ b/libs/core/langchain_core/utils/mustache.py @@ -150,6 +150,11 @@ def parse_tag(template: str, l_del: str, r_del: str) -> tuple[tuple[str, str], s msg = f"unclosed tag at line {_CURRENT_LINE}" raise ChevronError(msg) from e + # Check for empty tags + if not tag.strip(): + msg = f"empty tag at line {_CURRENT_LINE}" + raise ChevronError(msg) + # Find the type meaning of the first character tag_type = tag_types.get(tag[0], "variable") diff --git a/libs/core/tests/unit_tests/fake/test_fake_chat_model.py b/libs/core/tests/unit_tests/fake/test_fake_chat_model.py index 7500e1640ac..ce262797535 100644 --- a/libs/core/tests/unit_tests/fake/test_fake_chat_model.py +++ b/libs/core/tests/unit_tests/fake/test_fake_chat_model.py @@ -1,5 +1,6 @@ """Tests for verifying that testing utility code works as expected.""" +import time from itertools import cycle from typing import Any, Optional, Union from uuid import UUID @@ -9,10 +10,11 @@ from typing_extensions import override from langchain_core.callbacks.base import AsyncCallbackHandler from langchain_core.language_models import ( FakeListChatModel, + FakeMessagesListChatModel, GenericFakeChatModel, ParrotFakeChatModel, ) -from langchain_core.messages import AIMessage, AIMessageChunk, BaseMessage +from langchain_core.messages import AIMessage, AIMessageChunk, BaseMessage, HumanMessage from langchain_core.outputs import ChatGenerationChunk, GenerationChunk from tests.unit_tests.stubs import ( _any_id_ai_message, @@ -230,3 +232,18 @@ def test_fake_list_chat_model_batch() -> None: fake = FakeListChatModel(responses=["a", "b", "c"]) resp = fake.batch(["1", "2", "3"]) assert resp == expected + + +def test_fake_messages_list_chat_model_sleep_delay() -> None: + sleep_time = 0.1 + model = FakeMessagesListChatModel( + responses=[AIMessage(content="A"), AIMessage(content="B")], + sleep=sleep_time, + ) + messages = [HumanMessage(content="C")] + + start = time.time() + model.invoke(messages) + elapsed = time.time() - start + + assert elapsed >= sleep_time diff --git a/libs/core/tests/unit_tests/prompts/test_structured.py b/libs/core/tests/unit_tests/prompts/test_structured.py index 0b74b37cc6f..fda2d00b367 100644 --- a/libs/core/tests/unit_tests/prompts/test_structured.py +++ b/libs/core/tests/unit_tests/prompts/test_structured.py @@ -2,6 +2,7 @@ from functools import partial from inspect import isclass from typing import Any, Union, cast +import pytest from pydantic import BaseModel from langchain_core.language_models import FakeListChatModel @@ -10,6 +11,7 @@ from langchain_core.load.load import loads from langchain_core.messages import HumanMessage from langchain_core.prompts.structured import StructuredPrompt from langchain_core.runnables.base import Runnable, RunnableLambda +from langchain_core.utils.mustache import ChevronError from langchain_core.utils.pydantic import is_basemodel_subclass @@ -128,3 +130,8 @@ def test_structured_prompt_template_format() -> None: assert prompt.invoke({"person": {"name": "foo"}}).to_messages() == [ HumanMessage("hi foo") ] + + +def test_structured_prompt_template_empty_vars() -> None: + with pytest.raises(ChevronError, match="empty tag"): + StructuredPrompt([("human", "hi {{}}")], schema={}, template_format="mustache") diff --git a/libs/partners/huggingface/tests/integration_tests/test_standard.py b/libs/partners/huggingface/tests/integration_tests/test_standard.py index 27f5c2368a7..e3f0cacf706 100644 --- a/libs/partners/huggingface/tests/integration_tests/test_standard.py +++ b/libs/partners/huggingface/tests/integration_tests/test_standard.py @@ -16,7 +16,7 @@ class TestHuggingFaceEndpoint(ChatModelIntegrationTests): @property def chat_model_params(self) -> dict: llm = HuggingFaceEndpoint( # type: ignore[call-arg] - repo_id="Qwen/Qwen2.5-72B-Instruct", + repo_id="meta-llama/Llama-4-Maverick-17B-128E-Instruct", task="conversational", provider="fireworks-ai", temperature=0, @@ -59,12 +59,6 @@ class TestHuggingFaceEndpoint(ChatModelIntegrationTests): model, my_adder_tool=my_adder_tool ) - @pytest.mark.xfail(reason=("Not implemented")) - def test_structured_few_shot_examples( - self, model: BaseChatModel, my_adder_tool: BaseTool - ) -> None: - super().test_structured_few_shot_examples(model, my_adder_tool=my_adder_tool) - @property def has_tool_choice(self) -> bool: return False