mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-18 08:03:36 +00:00
tests[patch]: populate API reference for chat models (#28487)
Populate API reference for test class properties and test methods for chat models. Also: - Make `standard_chat_model_params` private. - `pytest.skip` some tests that were previously passed if features are not supported.
This commit is contained in:
@@ -1,4 +1,6 @@
|
||||
"""Unit tests for chat models."""
|
||||
"""
|
||||
:autodoc-options: autoproperty
|
||||
"""
|
||||
|
||||
import os
|
||||
from abc import abstractmethod
|
||||
@@ -77,16 +79,218 @@ def my_adder(a: int, b: int) -> int:
|
||||
|
||||
|
||||
class ChatModelTests(BaseStandardTests):
|
||||
"""Base class for chat model tests.
|
||||
|
||||
Test subclasses must implement the following two properties:
|
||||
|
||||
chat_model_class
|
||||
The chat model class to test, e.g., ``ChatParrotLink``.
|
||||
|
||||
Example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
@property
|
||||
def chat_model_class(self) -> Type[ChatParrotLink]:
|
||||
return ChatParrotLink
|
||||
|
||||
chat_model_params
|
||||
Initialization parameters for the chat model.
|
||||
|
||||
Example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
@property
|
||||
def chat_model_params(self) -> dict:
|
||||
return {"model": "bird-brain-001", "temperature": 0}
|
||||
|
||||
In addition, test subclasses can control what features are tested (such as tool
|
||||
calling or multi-modality) by selectively overriding the following properties.
|
||||
Expand to see details:
|
||||
|
||||
.. dropdown:: has_tool_calling
|
||||
|
||||
Boolean property indicating whether the chat model supports tool calling.
|
||||
|
||||
By default, this is determined by whether the chat model's `bind_tools` method
|
||||
is overridden. It typically does not need to be overridden on the test class.
|
||||
|
||||
.. dropdown:: tool_choice_value
|
||||
|
||||
Value to use for tool choice when used in tests.
|
||||
|
||||
Some tests for tool calling features attempt to force tool calling via a
|
||||
`tool_choice` parameter. A common value for this parameter is "any". Defaults
|
||||
to `None`.
|
||||
|
||||
Note: if the value is set to "tool_name", the name of the tool used in each
|
||||
test will be set as the value for `tool_choice`.
|
||||
|
||||
Example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
@property
|
||||
def tool_choice_value(self) -> Optional[str]:
|
||||
return "any"
|
||||
|
||||
.. dropdown:: has_structured_output
|
||||
|
||||
Boolean property indicating whether the chat model supports structured
|
||||
output.
|
||||
|
||||
By default, this is determined by whether the chat model's
|
||||
`with_structured_output` method is overridden. If the base implementation is
|
||||
intended to be used, this method should be overridden.
|
||||
|
||||
See: https://python.langchain.com/docs/concepts/structured_outputs/
|
||||
|
||||
Example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
@property
|
||||
def has_structured_output(self) -> bool:
|
||||
return True
|
||||
|
||||
.. dropdown:: supports_image_inputs
|
||||
|
||||
Boolean property indicating whether the chat model supports image inputs.
|
||||
Defaults to ``False``.
|
||||
|
||||
If set to ``True``, the chat model will be tested using content blocks of the
|
||||
form
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
[
|
||||
{"type": "text", "text": "describe the weather in this image"},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": f"data:image/jpeg;base64,{image_data}"},
|
||||
},
|
||||
]
|
||||
|
||||
See https://python.langchain.com/docs/concepts/multimodality/
|
||||
|
||||
Example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
@property
|
||||
def supports_image_inputs(self) -> bool:
|
||||
return True
|
||||
|
||||
.. dropdown:: supports_video_inputs
|
||||
|
||||
Boolean property indicating whether the chat model supports image inputs.
|
||||
Defaults to ``False``. No current tests are written for this feature.
|
||||
|
||||
.. dropdown:: returns_usage_metadata
|
||||
|
||||
Boolean property indicating whether the chat model returns usage metadata
|
||||
on invoke and streaming responses.
|
||||
|
||||
``usage_metadata`` is an optional dict attribute on AIMessages that track input
|
||||
and output tokens: https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.UsageMetadata.html
|
||||
|
||||
Example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
@property
|
||||
def returns_usage_metadata(self) -> bool:
|
||||
return False
|
||||
|
||||
.. dropdown:: supports_anthropic_inputs
|
||||
|
||||
Boolean property indicating whether the chat model supports Anthropic-style
|
||||
inputs.
|
||||
|
||||
These inputs might feature "tool use" and "tool result" content blocks, e.g.,
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
[
|
||||
{"type": "text", "text": "Hmm let me think about that"},
|
||||
{
|
||||
"type": "tool_use",
|
||||
"input": {"fav_color": "green"},
|
||||
"id": "foo",
|
||||
"name": "color_picker",
|
||||
},
|
||||
]
|
||||
|
||||
If set to ``True``, the chat model will be tested using content blocks of this
|
||||
form.
|
||||
|
||||
Example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
@property
|
||||
def supports_anthropic_inputs(self) -> bool:
|
||||
return False
|
||||
|
||||
.. dropdown:: supports_image_tool_message
|
||||
|
||||
Boolean property indicating whether the chat model supports ToolMessages
|
||||
that include image content, e.g.,
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
ToolMessage(
|
||||
content=[
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": f"data:image/jpeg;base64,{image_data}"},
|
||||
},
|
||||
],
|
||||
tool_call_id="1",
|
||||
name="random_image",
|
||||
)
|
||||
|
||||
If set to ``True``, the chat model will be tested with message sequences that
|
||||
include ToolMessages of this form.
|
||||
|
||||
Example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
@property
|
||||
def supports_image_tool_message(self) -> bool:
|
||||
return False
|
||||
|
||||
.. dropdown:: supported_usage_metadata_details
|
||||
|
||||
Property controlling what usage metadata details are emitted in both invoke
|
||||
and stream.
|
||||
|
||||
``usage_metadata`` is an optional dict attribute on AIMessages that track input
|
||||
and output tokens: https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.UsageMetadata.html
|
||||
|
||||
It includes optional keys ``input_token_details`` and ``output_token_details``
|
||||
that can track usage details associated with special types of tokens, such as
|
||||
cached, audio, or reasoning.
|
||||
|
||||
Only needs to be overridden if these details are supplied.
|
||||
""" # noqa: E501
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def chat_model_class(self) -> Type[BaseChatModel]: ...
|
||||
def chat_model_class(self) -> Type[BaseChatModel]:
|
||||
"""The chat model class to test, e.g., `ChatParrotLink`."""
|
||||
...
|
||||
|
||||
@property
|
||||
def chat_model_params(self) -> dict:
|
||||
"""Initialization parameters for the chat mobdel."""
|
||||
return {}
|
||||
|
||||
@property
|
||||
def standard_chat_model_params(self) -> dict:
|
||||
""":meta private:"""
|
||||
return {
|
||||
"temperature": 0,
|
||||
"max_tokens": 100,
|
||||
@@ -97,12 +301,15 @@ class ChatModelTests(BaseStandardTests):
|
||||
|
||||
@pytest.fixture
|
||||
def model(self) -> BaseChatModel:
|
||||
"""Fixture that returns an instance of the chat model. Should not be
|
||||
overridden."""
|
||||
return self.chat_model_class(
|
||||
**{**self.standard_chat_model_params, **self.chat_model_params}
|
||||
)
|
||||
|
||||
@property
|
||||
def has_tool_calling(self) -> bool:
|
||||
"""Boolean property indicating whether the model supports tool calling."""
|
||||
return self.chat_model_class.bind_tools is not BaseChatModel.bind_tools
|
||||
|
||||
@property
|
||||
@@ -112,6 +319,8 @@ class ChatModelTests(BaseStandardTests):
|
||||
|
||||
@property
|
||||
def has_structured_output(self) -> bool:
|
||||
"""Boolean property indicating whether the chat model supports structured
|
||||
output."""
|
||||
return (
|
||||
self.chat_model_class.with_structured_output
|
||||
is not BaseChatModel.with_structured_output
|
||||
@@ -119,22 +328,32 @@ class ChatModelTests(BaseStandardTests):
|
||||
|
||||
@property
|
||||
def supports_image_inputs(self) -> bool:
|
||||
"""Boolean property indicating whether the chat model supports image inputs.
|
||||
Defaults to ``False``."""
|
||||
return False
|
||||
|
||||
@property
|
||||
def supports_video_inputs(self) -> bool:
|
||||
"""Boolean property indicating whether the chat model supports image inputs.
|
||||
Defaults to ``False``. No current tests are written for this feature."""
|
||||
return False
|
||||
|
||||
@property
|
||||
def returns_usage_metadata(self) -> bool:
|
||||
"""Boolean property indicating whether the chat model returns usage metadata
|
||||
on invoke and streaming responses."""
|
||||
return True
|
||||
|
||||
@property
|
||||
def supports_anthropic_inputs(self) -> bool:
|
||||
"""Boolean property indicating whether the chat model supports Anthropic-style
|
||||
inputs."""
|
||||
return False
|
||||
|
||||
@property
|
||||
def supports_image_tool_message(self) -> bool:
|
||||
"""Boolean property indicating whether the chat model supports ToolMessages
|
||||
that include image content."""
|
||||
return False
|
||||
|
||||
@property
|
||||
@@ -152,31 +371,127 @@ class ChatModelTests(BaseStandardTests):
|
||||
]
|
||||
],
|
||||
]:
|
||||
"""Property controlling what usage metadata details are emitted in both invoke
|
||||
and stream. Only needs to be overridden if these details are returned by the
|
||||
model."""
|
||||
return {"invoke": [], "stream": []}
|
||||
|
||||
|
||||
class ChatModelUnitTests(ChatModelTests):
|
||||
"""Base class for chat model unit tests.
|
||||
|
||||
Test subclasses must implement the following two properties:
|
||||
|
||||
chat_model_class
|
||||
The chat model class to test, e.g., ``ChatParrotLink``.
|
||||
|
||||
Example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
@property
|
||||
def chat_model_class(self) -> Type[ChatParrotLink]:
|
||||
return ChatParrotLink
|
||||
|
||||
chat_model_params
|
||||
Initialization parameters for the chat model.
|
||||
|
||||
Example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
@property
|
||||
def chat_model_params(self) -> dict:
|
||||
return {"model": "bird-brain-001", "temperature": 0}
|
||||
|
||||
.. note::
|
||||
API references for individual test methods include troubleshooting tips.
|
||||
|
||||
.. note::
|
||||
Test subclasses can control what features are tested (such as tool
|
||||
calling or multi-modality) by selectively overriding the properties on the
|
||||
class. Relevant properties are mentioned in the references for each method.
|
||||
See this page for detail on all properties:
|
||||
https://python.langchain.com/api_reference/standard_tests/unit_tests/langchain_tests.unit_tests.chat_models.ChatModelTests.html
|
||||
|
||||
|
||||
Testing initialization from environment variables
|
||||
Some unit tests may require testing initialization from environment variables.
|
||||
These tests can be enabled by overriding the ``init_from_env_params``
|
||||
property (see below):
|
||||
|
||||
.. dropdown:: init_from_env_params
|
||||
|
||||
This property is used in unit tests to test initialization from
|
||||
environment variables. It should return a tuple of three dictionaries
|
||||
that specify the environment variables, additional initialization args,
|
||||
and expected instance attributes to check.
|
||||
|
||||
Defaults to empty dicts. If not overridden, the test is skipped.
|
||||
|
||||
Example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
@property
|
||||
def init_from_env_params(self) -> Tuple[dict, dict, dict]:
|
||||
return (
|
||||
{
|
||||
"MY_API_KEY": "api_key",
|
||||
},
|
||||
{
|
||||
"model": "bird-brain-001",
|
||||
},
|
||||
{
|
||||
"my_api_key": "api_key",
|
||||
},
|
||||
)
|
||||
""" # noqa: E501
|
||||
|
||||
@property
|
||||
def standard_chat_model_params(self) -> dict:
|
||||
""":meta private:"""
|
||||
params = super().standard_chat_model_params
|
||||
params["api_key"] = "test"
|
||||
return params
|
||||
|
||||
@property
|
||||
def init_from_env_params(self) -> Tuple[dict, dict, dict]:
|
||||
"""Return env vars, init args, and expected instance attrs for initializing
|
||||
from env vars."""
|
||||
"""This property is used in unit tests to test initialization from environment
|
||||
variables. It should return a tuple of three dictionaries that specify the
|
||||
environment variables, additional initialization args, and expected instance
|
||||
attributes to check."""
|
||||
return {}, {}, {}
|
||||
|
||||
def test_init(self) -> None:
|
||||
"""Test model initialization. This should pass for all integrations.
|
||||
|
||||
.. dropdown:: Troubleshooting
|
||||
|
||||
If this test fails, ensure that:
|
||||
|
||||
1. ``chat_model_params`` is specified and the model can be initialized from those params;
|
||||
2. The model accommodates standard parameters: https://python.langchain.com/docs/concepts/chat_models/#standard-parameters
|
||||
""" # noqa: E501
|
||||
model = self.chat_model_class(
|
||||
**{**self.standard_chat_model_params, **self.chat_model_params}
|
||||
)
|
||||
assert model is not None
|
||||
|
||||
def test_init_from_env(self) -> None:
|
||||
"""Test initialization from environment variables. Relies on the
|
||||
``init_from_env_params`` property. Test is skipped if that property is not
|
||||
set.
|
||||
|
||||
.. dropdown:: Troubleshooting
|
||||
|
||||
If this test fails, ensure that ``init_from_env_params`` is specified
|
||||
correctly.
|
||||
"""
|
||||
env_params, model_params, expected_attrs = self.init_from_env_params
|
||||
if env_params:
|
||||
if not env_params:
|
||||
pytest.skip("init_from_env_params not specified.")
|
||||
else:
|
||||
with mock.patch.dict(os.environ, env_params):
|
||||
model = self.chat_model_class(**model_params)
|
||||
assert model is not None
|
||||
@@ -189,6 +504,14 @@ class ChatModelUnitTests(ChatModelTests):
|
||||
def test_init_streaming(
|
||||
self,
|
||||
) -> None:
|
||||
"""Test that model can be initialized with ``streaming=True``. This is for
|
||||
backward-compatibility purposes.
|
||||
|
||||
.. dropdown:: Troubleshooting
|
||||
|
||||
If this test fails, ensure that the model can be initialized with a
|
||||
boolean ``streaming`` parameter.
|
||||
"""
|
||||
model = self.chat_model_class(
|
||||
**{
|
||||
**self.standard_chat_model_params,
|
||||
@@ -202,6 +525,18 @@ class ChatModelUnitTests(ChatModelTests):
|
||||
self,
|
||||
model: BaseChatModel,
|
||||
) -> None:
|
||||
"""Test that chat model correctly handles Pydantic models that are passed
|
||||
into ``bind_tools``. Test is skipped if the ``has_tool_calling`` property
|
||||
on the test class is False.
|
||||
|
||||
.. dropdown:: Troubleshooting
|
||||
|
||||
If this test fails, ensure that the model's ``bind_tools`` method
|
||||
properly handles Pydantic V2 models. ``langchain_core`` implements
|
||||
a utility function that will accommodate most formats: https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html
|
||||
|
||||
See example implementation of ``bind_tools`` here: https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.bind_tools
|
||||
""" # noqa: E501
|
||||
if not self.has_tool_calling:
|
||||
return
|
||||
|
||||
@@ -227,12 +562,35 @@ class ChatModelUnitTests(ChatModelTests):
|
||||
model: BaseChatModel,
|
||||
schema: Any,
|
||||
) -> None:
|
||||
"""Test ``with_structured_output`` method. Test is skipped if the
|
||||
``has_structured_output`` property on the test class is False.
|
||||
|
||||
.. dropdown:: Troubleshooting
|
||||
|
||||
If this test fails, ensure that the model's ``bind_tools`` method
|
||||
properly handles Pydantic V2 models. ``langchain_core`` implements
|
||||
a utility function that will accommodate most formats: https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html
|
||||
|
||||
See example implementation of ``with_structured_output`` here: https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.with_structured_output
|
||||
""" # noqa: E501
|
||||
if not self.has_structured_output:
|
||||
return
|
||||
|
||||
assert model.with_structured_output(schema) is not None
|
||||
|
||||
def test_standard_params(self, model: BaseChatModel) -> None:
|
||||
"""Test that model properly generates standard parameters. These are used
|
||||
for tracing purposes.
|
||||
|
||||
.. dropdown:: Troubleshooting
|
||||
|
||||
If this test fails, check that the model accommodates standard parameters:
|
||||
https://python.langchain.com/docs/concepts/chat_models/#standard-parameters
|
||||
|
||||
Check also that the model class is named according to convention
|
||||
(e.g., ``ChatProviderName``).
|
||||
"""
|
||||
|
||||
class ExpectedParams(BaseModelV1):
|
||||
ls_provider: str
|
||||
ls_model_name: str
|
||||
@@ -260,10 +618,20 @@ class ChatModelUnitTests(ChatModelTests):
|
||||
pytest.fail(f"Validation error: {e}")
|
||||
|
||||
def test_serdes(self, model: BaseChatModel, snapshot: SnapshotAssertion) -> None:
|
||||
"""Test serialization and deserialization of the model. Test is skipped if the
|
||||
``is_lc_serializable`` property on the chat model class is not overwritten
|
||||
to return ``True``.
|
||||
|
||||
.. dropdown:: Troubleshooting
|
||||
|
||||
If this test fails, check that the ``init_from_env_params`` property is
|
||||
correctly set on the test class.
|
||||
"""
|
||||
if not self.chat_model_class.is_lc_serializable():
|
||||
return
|
||||
env_params, model_params, expected_attrs = self.init_from_env_params
|
||||
with mock.patch.dict(os.environ, env_params):
|
||||
ser = dumpd(model)
|
||||
assert ser == snapshot(name="serialized")
|
||||
assert model.dict() == load(dumpd(model)).dict()
|
||||
pytest.skip("Model is not serializable.")
|
||||
else:
|
||||
env_params, model_params, expected_attrs = self.init_from_env_params
|
||||
with mock.patch.dict(os.environ, env_params):
|
||||
ser = dumpd(model)
|
||||
assert ser == snapshot(name="serialized")
|
||||
assert model.dict() == load(dumpd(model)).dict()
|
||||
|
Reference in New Issue
Block a user