diff --git a/libs/standard-tests/langchain_tests/integration_tests/base_store.py b/libs/standard-tests/langchain_tests/integration_tests/base_store.py index cc5fab8bcf7..ff9e178aea9 100644 --- a/libs/standard-tests/langchain_tests/integration_tests/base_store.py +++ b/libs/standard-tests/langchain_tests/integration_tests/base_store.py @@ -1,3 +1,11 @@ +""" +Standard tests for the BaseStore abstraction + +We don't recommend implementing externally managed BaseStore abstractions at this time. + +:private: +""" + from abc import abstractmethod from typing import AsyncGenerator, Generator, Generic, Tuple, TypeVar diff --git a/libs/standard-tests/langchain_tests/integration_tests/cache.py b/libs/standard-tests/langchain_tests/integration_tests/cache.py index 7087da8ea07..3d04731d16a 100644 --- a/libs/standard-tests/langchain_tests/integration_tests/cache.py +++ b/libs/standard-tests/langchain_tests/integration_tests/cache.py @@ -1,3 +1,11 @@ +""" +Standard tests for the BaseCache abstraction + +We don't recommend implementing externally managed BaseCache abstractions at this time. + +:private: +""" + from abc import abstractmethod import pytest diff --git a/libs/standard-tests/langchain_tests/integration_tests/chat_models.py b/libs/standard-tests/langchain_tests/integration_tests/chat_models.py index 4e6a811c451..0dd5334cc48 100644 --- a/libs/standard-tests/langchain_tests/integration_tests/chat_models.py +++ b/libs/standard-tests/langchain_tests/integration_tests/chat_models.py @@ -16,7 +16,7 @@ from langchain_core.messages import ( ) from langchain_core.output_parsers import StrOutputParser from langchain_core.prompts import ChatPromptTemplate -from langchain_core.tools import tool +from langchain_core.tools import BaseTool, tool from langchain_core.utils.function_calling import tool_example_to_messages from pydantic import BaseModel, Field from pydantic.v1 import BaseModel as BaseModelV1 @@ -24,16 +24,29 @@ from pydantic.v1 import Field as FieldV1 from langchain_tests.unit_tests.chat_models import ( ChatModelTests, - my_adder_tool, ) from langchain_tests.utils.pydantic import PYDANTIC_MAJOR_VERSION -class MagicFunctionSchema(BaseModel): +def _get_joke_class() -> type[BaseModel]: + """ + :private: + """ + + class Joke(BaseModel): + """Joke to tell user.""" + + setup: str = Field(description="question to set up a joke") + punchline: str = Field(description="answer to resolve the joke") + + return Joke + + +class _MagicFunctionSchema(BaseModel): input: int = Field(..., gt=-1000, lt=1000) -@tool(args_schema=MagicFunctionSchema) +@tool(args_schema=_MagicFunctionSchema) def magic_function(input: int) -> int: """Applies a magic function to an input.""" return input + 2 @@ -45,13 +58,6 @@ def magic_function_no_args() -> int: return 5 -class Joke(BaseModel): - """Joke to tell user.""" - - setup: str = Field(description="question to set up a joke") - punchline: str = Field(description="answer to resolve the joke") - - def _validate_tool_call_message(message: BaseMessage) -> None: assert isinstance(message, AIMessage) assert len(message.tool_calls) == 1 @@ -103,12 +109,201 @@ class ChatModelIntegrationTests(ChatModelTests): .. note:: API references for individual test methods include troubleshooting tips. - .. note:: - Test subclasses can control what features are tested (such as tool - calling or multi-modality) by selectively overriding the properties on the - class. Relevant properties are mentioned in the references for each method. - See this page for detail on all properties: - https://python.langchain.com/api_reference/standard_tests/unit_tests/langchain_tests.unit_tests.chat_models.ChatModelTests.html + + Test subclasses must implement the following two properties: + + chat_model_class + The chat model class to test, e.g., ``ChatParrotLink``. + + Example: + + .. code-block:: python + + @property + def chat_model_class(self) -> Type[ChatParrotLink]: + return ChatParrotLink + + chat_model_params + Initialization parameters for the chat model. + + Example: + + .. code-block:: python + + @property + def chat_model_params(self) -> dict: + return {"model": "bird-brain-001", "temperature": 0} + + In addition, test subclasses can control what features are tested (such as tool + calling or multi-modality) by selectively overriding the following properties. + Expand to see details: + + .. dropdown:: has_tool_calling + + Boolean property indicating whether the chat model supports tool calling. + + By default, this is determined by whether the chat model's `bind_tools` method + is overridden. It typically does not need to be overridden on the test class. + + .. dropdown:: tool_choice_value + + Value to use for tool choice when used in tests. + + Some tests for tool calling features attempt to force tool calling via a + `tool_choice` parameter. A common value for this parameter is "any". Defaults + to `None`. + + Note: if the value is set to "tool_name", the name of the tool used in each + test will be set as the value for `tool_choice`. + + Example: + + .. code-block:: python + + @property + def tool_choice_value(self) -> Optional[str]: + return "any" + + .. dropdown:: has_structured_output + + Boolean property indicating whether the chat model supports structured + output. + + By default, this is determined by whether the chat model's + `with_structured_output` method is overridden. If the base implementation is + intended to be used, this method should be overridden. + + See: https://python.langchain.com/docs/concepts/structured_outputs/ + + Example: + + .. code-block:: python + + @property + def has_structured_output(self) -> bool: + return True + + .. dropdown:: supports_image_inputs + + Boolean property indicating whether the chat model supports image inputs. + Defaults to ``False``. + + If set to ``True``, the chat model will be tested using content blocks of the + form + + .. code-block:: python + + [ + {"type": "text", "text": "describe the weather in this image"}, + { + "type": "image_url", + "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}, + }, + ] + + See https://python.langchain.com/docs/concepts/multimodality/ + + Example: + + .. code-block:: python + + @property + def supports_image_inputs(self) -> bool: + return True + + .. dropdown:: supports_video_inputs + + Boolean property indicating whether the chat model supports image inputs. + Defaults to ``False``. No current tests are written for this feature. + + .. dropdown:: returns_usage_metadata + + Boolean property indicating whether the chat model returns usage metadata + on invoke and streaming responses. + + ``usage_metadata`` is an optional dict attribute on AIMessages that track input + and output tokens: https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.UsageMetadata.html + + Example: + + .. code-block:: python + + @property + def returns_usage_metadata(self) -> bool: + return False + + .. dropdown:: supports_anthropic_inputs + + Boolean property indicating whether the chat model supports Anthropic-style + inputs. + + These inputs might feature "tool use" and "tool result" content blocks, e.g., + + .. code-block:: python + + [ + {"type": "text", "text": "Hmm let me think about that"}, + { + "type": "tool_use", + "input": {"fav_color": "green"}, + "id": "foo", + "name": "color_picker", + }, + ] + + If set to ``True``, the chat model will be tested using content blocks of this + form. + + Example: + + .. code-block:: python + + @property + def supports_anthropic_inputs(self) -> bool: + return False + + .. dropdown:: supports_image_tool_message + + Boolean property indicating whether the chat model supports ToolMessages + that include image content, e.g., + + .. code-block:: python + + ToolMessage( + content=[ + { + "type": "image_url", + "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}, + }, + ], + tool_call_id="1", + name="random_image", + ) + + If set to ``True``, the chat model will be tested with message sequences that + include ToolMessages of this form. + + Example: + + .. code-block:: python + + @property + def supports_image_tool_message(self) -> bool: + return False + + .. dropdown:: supported_usage_metadata_details + + Property controlling what usage metadata details are emitted in both invoke + and stream. + + ``usage_metadata`` is an optional dict attribute on AIMessages that track input + and output tokens: https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.UsageMetadata.html + + It includes optional keys ``input_token_details`` and ``output_token_details`` + that can track usage details associated with special types of tokens, such as + cached, audio, or reasoning. + + Only needs to be overridden if these details are supplied. """ @property @@ -908,6 +1103,7 @@ class ChatModelIntegrationTests(ChatModelTests): if not self.has_tool_calling: pytest.skip("Test requires tool calling.") + Joke = _get_joke_class() # Pydantic class # Type ignoring since the interface only officially supports pydantic 1 # or pydantic.v1.BaseModel but not pydantic.BaseModel from pydantic 2. @@ -960,6 +1156,8 @@ class ChatModelIntegrationTests(ChatModelTests): if not self.has_tool_calling: pytest.skip("Test requires tool calling.") + Joke = _get_joke_class() + # Pydantic class # Type ignoring since the interface only officially supports pydantic 1 # or pydantic.v1.BaseModel but not pydantic.BaseModel from pydantic 2. @@ -1089,7 +1287,9 @@ class ChatModelIntegrationTests(ChatModelTests): joke_result = chat.invoke("Give me a joke about cats, include the punchline.") assert isinstance(joke_result, Joke) - def test_tool_message_histories_string_content(self, model: BaseChatModel) -> None: + def test_tool_message_histories_string_content( + self, model: BaseChatModel, my_adder_tool: BaseTool + ) -> None: """Test that message histories are compatible with string tool contents (e.g. OpenAI format). If a model passes this test, it should be compatible with messages generated from providers following OpenAI format. @@ -1158,6 +1358,7 @@ class ChatModelIntegrationTests(ChatModelTests): def test_tool_message_histories_list_content( self, model: BaseChatModel, + my_adder_tool: BaseTool, ) -> None: """Test that message histories are compatible with list tool contents (e.g. Anthropic format). @@ -1246,7 +1447,9 @@ class ChatModelIntegrationTests(ChatModelTests): result_list_content = model_with_tools.invoke(messages_list_content) assert isinstance(result_list_content, AIMessage) - def test_structured_few_shot_examples(self, model: BaseChatModel) -> None: + def test_structured_few_shot_examples( + self, model: BaseChatModel, my_adder_tool: BaseTool + ) -> None: """Test that the model can process few-shot examples with tool calls. These are represented as a sequence of messages of the following form: @@ -1557,7 +1760,9 @@ class ChatModelIntegrationTests(ChatModelTests): ] model.bind_tools([color_picker]).invoke(messages) - def test_tool_message_error_status(self, model: BaseChatModel) -> None: + def test_tool_message_error_status( + self, model: BaseChatModel, my_adder_tool: BaseTool + ) -> None: """Test that ToolMessage with ``status="error"`` can be handled. These messages may take the form: diff --git a/libs/standard-tests/langchain_tests/integration_tests/indexer.py b/libs/standard-tests/langchain_tests/integration_tests/indexer.py index f1e5d9eee0a..bdc0fc2e6b4 100644 --- a/libs/standard-tests/langchain_tests/integration_tests/indexer.py +++ b/libs/standard-tests/langchain_tests/integration_tests/indexer.py @@ -1,4 +1,12 @@ -"""Test suite to check index implementations.""" +"""Test suite to check index implementations. + +Standard tests for the DocumentIndex abstraction + +We don't recommend implementing externally managed DocumentIndex abstractions at this +time. + +:private: +""" import inspect import uuid diff --git a/libs/standard-tests/langchain_tests/unit_tests/chat_models.py b/libs/standard-tests/langchain_tests/unit_tests/chat_models.py index 71f3dd2169c..19225c1f2a6 100644 --- a/libs/standard-tests/langchain_tests/unit_tests/chat_models.py +++ b/libs/standard-tests/langchain_tests/unit_tests/chat_models.py @@ -11,7 +11,7 @@ import pytest from langchain_core.language_models import BaseChatModel from langchain_core.load import dumpd, load from langchain_core.runnables import RunnableBinding -from langchain_core.tools import tool +from langchain_core.tools import BaseTool, tool from pydantic import BaseModel, Field, SecretStr from pydantic.v1 import ( BaseModel as BaseModelV1, @@ -28,15 +28,12 @@ from langchain_tests.base import BaseStandardTests from langchain_tests.utils.pydantic import PYDANTIC_MAJOR_VERSION -class Person(BaseModel): # Used by some dependent tests. Should be deprecated. - """Record attributes of a person.""" - - name: str = Field(..., description="The name of the person.") - age: int = Field(..., description="The age of the person.") - - def generate_schema_pydantic_v1_from_2() -> Any: - """Use to generate a schema from v1 namespace in pydantic 2.""" + """ + Use to generate a schema from v1 namespace in pydantic 2. + + :private: + """ if PYDANTIC_MAJOR_VERSION != 2: raise AssertionError("This function is only compatible with Pydantic v2.") @@ -50,7 +47,11 @@ def generate_schema_pydantic_v1_from_2() -> Any: def generate_schema_pydantic() -> Any: - """Works with either pydantic 1 or 2""" + """ + Works with either pydantic 1 or 2 + + :private: + """ class PersonA(BaseModel): """Record attributes of a person.""" @@ -67,20 +68,153 @@ if PYDANTIC_MAJOR_VERSION == 2: TEST_PYDANTIC_MODELS.append(generate_schema_pydantic_v1_from_2()) -@tool -def my_adder_tool(a: int, b: int) -> int: - """Takes two integers, a and b, and returns their sum.""" - return a + b - - -def my_adder(a: int, b: int) -> int: - """Takes two integers, a and b, and returns their sum.""" - return a + b - - class ChatModelTests(BaseStandardTests): """Base class for chat model tests. + :private: + """ # noqa: E501 + + @property + @abstractmethod + def chat_model_class(self) -> Type[BaseChatModel]: + """The chat model class to test, e.g., `ChatParrotLink`.""" + ... + + @property + def chat_model_params(self) -> dict: + """Initialization parameters for the chat mobdel.""" + return {} + + @property + def standard_chat_model_params(self) -> dict: + """:meta private:""" + return { + "temperature": 0, + "max_tokens": 100, + "timeout": 60, + "stop": [], + "max_retries": 2, + } + + @pytest.fixture + def model(self) -> BaseChatModel: + """Fixture that returns an instance of the chat model. Should not be + overridden.""" + return self.chat_model_class( + **{**self.standard_chat_model_params, **self.chat_model_params} + ) + + @pytest.fixture + def my_adder_tool(self) -> BaseTool: + @tool + def my_adder_tool(a: int, b: int) -> int: + """Takes two integers, a and b, and returns their sum.""" + return a + b + + return my_adder_tool + + @property + def has_tool_calling(self) -> bool: + """Boolean property indicating whether the model supports tool calling.""" + return self.chat_model_class.bind_tools is not BaseChatModel.bind_tools + + @property + def tool_choice_value(self) -> Optional[str]: + """Value to use for tool choice when used in tests.""" + return None + + @property + def has_structured_output(self) -> bool: + """Boolean property indicating whether the chat model supports structured + output.""" + return ( + self.chat_model_class.with_structured_output + is not BaseChatModel.with_structured_output + ) + + @property + def supports_image_inputs(self) -> bool: + """Boolean property indicating whether the chat model supports image inputs. + Defaults to ``False``.""" + return False + + @property + def supports_video_inputs(self) -> bool: + """Boolean property indicating whether the chat model supports image inputs. + Defaults to ``False``. No current tests are written for this feature.""" + return False + + @property + def returns_usage_metadata(self) -> bool: + """Boolean property indicating whether the chat model returns usage metadata + on invoke and streaming responses.""" + return True + + @property + def supports_anthropic_inputs(self) -> bool: + """Boolean property indicating whether the chat model supports Anthropic-style + inputs.""" + return False + + @property + def supports_image_tool_message(self) -> bool: + """Boolean property indicating whether the chat model supports ToolMessages + that include image content.""" + return False + + @property + def supported_usage_metadata_details( + self, + ) -> Dict[ + Literal["invoke", "stream"], + List[ + Literal[ + "audio_input", + "audio_output", + "reasoning_output", + "cache_read_input", + "cache_creation_input", + ] + ], + ]: + """Property controlling what usage metadata details are emitted in both invoke + and stream. Only needs to be overridden if these details are returned by the + model.""" + return {"invoke": [], "stream": []} + + +class ChatModelUnitTests(ChatModelTests): + """Base class for chat model unit tests. + + Test subclasses must implement the ``chat_model_class`` and + ``chat_model_params`` properties to specify what model to test and its + initialization parameters. + + Example: + + .. code-block:: python + + from typing import Type + + from langchain_tests.unit_tests import ChatModelUnitTests + from my_package.chat_models import MyChatModel + + + class TestMyChatModelUnit(ChatModelUnitTests): + @property + def chat_model_class(self) -> Type[MyChatModel]: + # Return the chat model class to test here + return MyChatModel + + @property + def chat_model_params(self) -> dict: + # Return initialization parameters for the model. + return {"model": "model-001", "temperature": 0} + + .. note:: + API references for individual test methods include troubleshooting tips. + + Test subclasses must implement the following two properties: chat_model_class @@ -275,146 +409,6 @@ class ChatModelTests(BaseStandardTests): cached, audio, or reasoning. Only needs to be overridden if these details are supplied. - """ # noqa: E501 - - @property - @abstractmethod - def chat_model_class(self) -> Type[BaseChatModel]: - """The chat model class to test, e.g., `ChatParrotLink`.""" - ... - - @property - def chat_model_params(self) -> dict: - """Initialization parameters for the chat mobdel.""" - return {} - - @property - def standard_chat_model_params(self) -> dict: - """:meta private:""" - return { - "temperature": 0, - "max_tokens": 100, - "timeout": 60, - "stop": [], - "max_retries": 2, - } - - @pytest.fixture - def model(self) -> BaseChatModel: - """Fixture that returns an instance of the chat model. Should not be - overridden.""" - return self.chat_model_class( - **{**self.standard_chat_model_params, **self.chat_model_params} - ) - - @property - def has_tool_calling(self) -> bool: - """Boolean property indicating whether the model supports tool calling.""" - return self.chat_model_class.bind_tools is not BaseChatModel.bind_tools - - @property - def tool_choice_value(self) -> Optional[str]: - """Value to use for tool choice when used in tests.""" - return None - - @property - def has_structured_output(self) -> bool: - """Boolean property indicating whether the chat model supports structured - output.""" - return ( - self.chat_model_class.with_structured_output - is not BaseChatModel.with_structured_output - ) - - @property - def supports_image_inputs(self) -> bool: - """Boolean property indicating whether the chat model supports image inputs. - Defaults to ``False``.""" - return False - - @property - def supports_video_inputs(self) -> bool: - """Boolean property indicating whether the chat model supports image inputs. - Defaults to ``False``. No current tests are written for this feature.""" - return False - - @property - def returns_usage_metadata(self) -> bool: - """Boolean property indicating whether the chat model returns usage metadata - on invoke and streaming responses.""" - return True - - @property - def supports_anthropic_inputs(self) -> bool: - """Boolean property indicating whether the chat model supports Anthropic-style - inputs.""" - return False - - @property - def supports_image_tool_message(self) -> bool: - """Boolean property indicating whether the chat model supports ToolMessages - that include image content.""" - return False - - @property - def supported_usage_metadata_details( - self, - ) -> Dict[ - Literal["invoke", "stream"], - List[ - Literal[ - "audio_input", - "audio_output", - "reasoning_output", - "cache_read_input", - "cache_creation_input", - ] - ], - ]: - """Property controlling what usage metadata details are emitted in both invoke - and stream. Only needs to be overridden if these details are returned by the - model.""" - return {"invoke": [], "stream": []} - - -class ChatModelUnitTests(ChatModelTests): - """Base class for chat model unit tests. - - Test subclasses must implement the ``chat_model_class`` and - ``chat_model_params`` properties to specify what model to test and its - initialization parameters. - - Example: - - .. code-block:: python - - from typing import Type - - from langchain_tests.unit_tests import ChatModelUnitTests - from my_package.chat_models import MyChatModel - - - class TestMyChatModelUnit(ChatModelUnitTests): - @property - def chat_model_class(self) -> Type[MyChatModel]: - # Return the chat model class to test here - return MyChatModel - - @property - def chat_model_params(self) -> dict: - # Return initialization parameters for the model. - return {"model": "model-001", "temperature": 0} - - .. note:: - API references for individual test methods include troubleshooting tips. - - .. note:: - Test subclasses can control what features are tested (such as tool - calling or multi-modality) by selectively overriding the properties on the - class. Relevant properties are mentioned in the references for each method. - See this page for detail on all properties: - https://python.langchain.com/api_reference/standard_tests/unit_tests/langchain_tests.unit_tests.chat_models.ChatModelTests.html - Testing initialization from environment variables Some unit tests may require testing initialization from environment variables. @@ -526,6 +520,7 @@ class ChatModelUnitTests(ChatModelTests): def test_bind_tool_pydantic( self, model: BaseChatModel, + my_adder_tool: BaseTool, ) -> None: """Test that chat model correctly handles Pydantic models that are passed into ``bind_tools``. Test is skipped if the ``has_tool_calling`` property @@ -542,6 +537,10 @@ class ChatModelUnitTests(ChatModelTests): if not self.has_tool_calling: return + def my_adder(a: int, b: int) -> int: + """Takes two integers, a and b, and returns their sum.""" + return a + b + tools = [my_adder_tool, my_adder] for pydantic_model in TEST_PYDANTIC_MODELS: diff --git a/libs/standard-tests/langchain_tests/unit_tests/embeddings.py b/libs/standard-tests/langchain_tests/unit_tests/embeddings.py index c580243b16d..527be61e8be 100644 --- a/libs/standard-tests/langchain_tests/unit_tests/embeddings.py +++ b/libs/standard-tests/langchain_tests/unit_tests/embeddings.py @@ -11,6 +11,10 @@ from langchain_tests.base import BaseStandardTests class EmbeddingsTests(BaseStandardTests): + """ + :private: + """ + @property @abstractmethod def embeddings_class(self) -> Type[Embeddings]: ...