langchain/libs/standard-tests/langchain_tests/unit_tests/chat_models.py
ccurme ab831ce05c
tests[patch]: populate API reference for chat models (#28487)
Populate API reference for test class properties and test methods for
chat models.

Also:
- Make `standard_chat_model_params` private.
- `pytest.skip` some tests that were previously passed if features are
not supported.
2024-12-03 15:24:54 -05:00

638 lines
21 KiB
Python

"""
:autodoc-options: autoproperty
"""
import os
from abc import abstractmethod
from typing import Any, Dict, List, Literal, Optional, Tuple, Type
from unittest import mock
import pytest
from langchain_core.language_models import BaseChatModel
from langchain_core.load import dumpd, load
from langchain_core.runnables import RunnableBinding
from langchain_core.tools import tool
from pydantic import BaseModel, Field, SecretStr
from pydantic.v1 import (
BaseModel as BaseModelV1,
)
from pydantic.v1 import (
Field as FieldV1,
)
from pydantic.v1 import (
ValidationError as ValidationErrorV1,
)
from syrupy import SnapshotAssertion
from langchain_tests.base import BaseStandardTests
from langchain_tests.utils.pydantic import PYDANTIC_MAJOR_VERSION
class Person(BaseModel): # Used by some dependent tests. Should be deprecated.
"""Record attributes of a person."""
name: str = Field(..., description="The name of the person.")
age: int = Field(..., description="The age of the person.")
def generate_schema_pydantic_v1_from_2() -> Any:
"""Use to generate a schema from v1 namespace in pydantic 2."""
if PYDANTIC_MAJOR_VERSION != 2:
raise AssertionError("This function is only compatible with Pydantic v2.")
class PersonB(BaseModelV1):
"""Record attributes of a person."""
name: str = FieldV1(..., description="The name of the person.")
age: int = FieldV1(..., description="The age of the person.")
return PersonB
def generate_schema_pydantic() -> Any:
"""Works with either pydantic 1 or 2"""
class PersonA(BaseModel):
"""Record attributes of a person."""
name: str = Field(..., description="The name of the person.")
age: int = Field(..., description="The age of the person.")
return PersonA
TEST_PYDANTIC_MODELS = [generate_schema_pydantic()]
if PYDANTIC_MAJOR_VERSION == 2:
TEST_PYDANTIC_MODELS.append(generate_schema_pydantic_v1_from_2())
@tool
def my_adder_tool(a: int, b: int) -> int:
"""Takes two integers, a and b, and returns their sum."""
return a + b
def my_adder(a: int, b: int) -> int:
"""Takes two integers, a and b, and returns their sum."""
return a + b
class ChatModelTests(BaseStandardTests):
"""Base class for chat model tests.
Test subclasses must implement the following two properties:
chat_model_class
The chat model class to test, e.g., ``ChatParrotLink``.
Example:
.. code-block:: python
@property
def chat_model_class(self) -> Type[ChatParrotLink]:
return ChatParrotLink
chat_model_params
Initialization parameters for the chat model.
Example:
.. code-block:: python
@property
def chat_model_params(self) -> dict:
return {"model": "bird-brain-001", "temperature": 0}
In addition, test subclasses can control what features are tested (such as tool
calling or multi-modality) by selectively overriding the following properties.
Expand to see details:
.. dropdown:: has_tool_calling
Boolean property indicating whether the chat model supports tool calling.
By default, this is determined by whether the chat model's `bind_tools` method
is overridden. It typically does not need to be overridden on the test class.
.. dropdown:: tool_choice_value
Value to use for tool choice when used in tests.
Some tests for tool calling features attempt to force tool calling via a
`tool_choice` parameter. A common value for this parameter is "any". Defaults
to `None`.
Note: if the value is set to "tool_name", the name of the tool used in each
test will be set as the value for `tool_choice`.
Example:
.. code-block:: python
@property
def tool_choice_value(self) -> Optional[str]:
return "any"
.. dropdown:: has_structured_output
Boolean property indicating whether the chat model supports structured
output.
By default, this is determined by whether the chat model's
`with_structured_output` method is overridden. If the base implementation is
intended to be used, this method should be overridden.
See: https://python.langchain.com/docs/concepts/structured_outputs/
Example:
.. code-block:: python
@property
def has_structured_output(self) -> bool:
return True
.. dropdown:: supports_image_inputs
Boolean property indicating whether the chat model supports image inputs.
Defaults to ``False``.
If set to ``True``, the chat model will be tested using content blocks of the
form
.. code-block:: python
[
{"type": "text", "text": "describe the weather in this image"},
{
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{image_data}"},
},
]
See https://python.langchain.com/docs/concepts/multimodality/
Example:
.. code-block:: python
@property
def supports_image_inputs(self) -> bool:
return True
.. dropdown:: supports_video_inputs
Boolean property indicating whether the chat model supports image inputs.
Defaults to ``False``. No current tests are written for this feature.
.. dropdown:: returns_usage_metadata
Boolean property indicating whether the chat model returns usage metadata
on invoke and streaming responses.
``usage_metadata`` is an optional dict attribute on AIMessages that track input
and output tokens: https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.UsageMetadata.html
Example:
.. code-block:: python
@property
def returns_usage_metadata(self) -> bool:
return False
.. dropdown:: supports_anthropic_inputs
Boolean property indicating whether the chat model supports Anthropic-style
inputs.
These inputs might feature "tool use" and "tool result" content blocks, e.g.,
.. code-block:: python
[
{"type": "text", "text": "Hmm let me think about that"},
{
"type": "tool_use",
"input": {"fav_color": "green"},
"id": "foo",
"name": "color_picker",
},
]
If set to ``True``, the chat model will be tested using content blocks of this
form.
Example:
.. code-block:: python
@property
def supports_anthropic_inputs(self) -> bool:
return False
.. dropdown:: supports_image_tool_message
Boolean property indicating whether the chat model supports ToolMessages
that include image content, e.g.,
.. code-block:: python
ToolMessage(
content=[
{
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{image_data}"},
},
],
tool_call_id="1",
name="random_image",
)
If set to ``True``, the chat model will be tested with message sequences that
include ToolMessages of this form.
Example:
.. code-block:: python
@property
def supports_image_tool_message(self) -> bool:
return False
.. dropdown:: supported_usage_metadata_details
Property controlling what usage metadata details are emitted in both invoke
and stream.
``usage_metadata`` is an optional dict attribute on AIMessages that track input
and output tokens: https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.UsageMetadata.html
It includes optional keys ``input_token_details`` and ``output_token_details``
that can track usage details associated with special types of tokens, such as
cached, audio, or reasoning.
Only needs to be overridden if these details are supplied.
""" # noqa: E501
@property
@abstractmethod
def chat_model_class(self) -> Type[BaseChatModel]:
"""The chat model class to test, e.g., `ChatParrotLink`."""
...
@property
def chat_model_params(self) -> dict:
"""Initialization parameters for the chat mobdel."""
return {}
@property
def standard_chat_model_params(self) -> dict:
""":meta private:"""
return {
"temperature": 0,
"max_tokens": 100,
"timeout": 60,
"stop": [],
"max_retries": 2,
}
@pytest.fixture
def model(self) -> BaseChatModel:
"""Fixture that returns an instance of the chat model. Should not be
overridden."""
return self.chat_model_class(
**{**self.standard_chat_model_params, **self.chat_model_params}
)
@property
def has_tool_calling(self) -> bool:
"""Boolean property indicating whether the model supports tool calling."""
return self.chat_model_class.bind_tools is not BaseChatModel.bind_tools
@property
def tool_choice_value(self) -> Optional[str]:
"""Value to use for tool choice when used in tests."""
return None
@property
def has_structured_output(self) -> bool:
"""Boolean property indicating whether the chat model supports structured
output."""
return (
self.chat_model_class.with_structured_output
is not BaseChatModel.with_structured_output
)
@property
def supports_image_inputs(self) -> bool:
"""Boolean property indicating whether the chat model supports image inputs.
Defaults to ``False``."""
return False
@property
def supports_video_inputs(self) -> bool:
"""Boolean property indicating whether the chat model supports image inputs.
Defaults to ``False``. No current tests are written for this feature."""
return False
@property
def returns_usage_metadata(self) -> bool:
"""Boolean property indicating whether the chat model returns usage metadata
on invoke and streaming responses."""
return True
@property
def supports_anthropic_inputs(self) -> bool:
"""Boolean property indicating whether the chat model supports Anthropic-style
inputs."""
return False
@property
def supports_image_tool_message(self) -> bool:
"""Boolean property indicating whether the chat model supports ToolMessages
that include image content."""
return False
@property
def supported_usage_metadata_details(
self,
) -> Dict[
Literal["invoke", "stream"],
List[
Literal[
"audio_input",
"audio_output",
"reasoning_output",
"cache_read_input",
"cache_creation_input",
]
],
]:
"""Property controlling what usage metadata details are emitted in both invoke
and stream. Only needs to be overridden if these details are returned by the
model."""
return {"invoke": [], "stream": []}
class ChatModelUnitTests(ChatModelTests):
"""Base class for chat model unit tests.
Test subclasses must implement the following two properties:
chat_model_class
The chat model class to test, e.g., ``ChatParrotLink``.
Example:
.. code-block:: python
@property
def chat_model_class(self) -> Type[ChatParrotLink]:
return ChatParrotLink
chat_model_params
Initialization parameters for the chat model.
Example:
.. code-block:: python
@property
def chat_model_params(self) -> dict:
return {"model": "bird-brain-001", "temperature": 0}
.. note::
API references for individual test methods include troubleshooting tips.
.. note::
Test subclasses can control what features are tested (such as tool
calling or multi-modality) by selectively overriding the properties on the
class. Relevant properties are mentioned in the references for each method.
See this page for detail on all properties:
https://python.langchain.com/api_reference/standard_tests/unit_tests/langchain_tests.unit_tests.chat_models.ChatModelTests.html
Testing initialization from environment variables
Some unit tests may require testing initialization from environment variables.
These tests can be enabled by overriding the ``init_from_env_params``
property (see below):
.. dropdown:: init_from_env_params
This property is used in unit tests to test initialization from
environment variables. It should return a tuple of three dictionaries
that specify the environment variables, additional initialization args,
and expected instance attributes to check.
Defaults to empty dicts. If not overridden, the test is skipped.
Example:
.. code-block:: python
@property
def init_from_env_params(self) -> Tuple[dict, dict, dict]:
return (
{
"MY_API_KEY": "api_key",
},
{
"model": "bird-brain-001",
},
{
"my_api_key": "api_key",
},
)
""" # noqa: E501
@property
def standard_chat_model_params(self) -> dict:
""":meta private:"""
params = super().standard_chat_model_params
params["api_key"] = "test"
return params
@property
def init_from_env_params(self) -> Tuple[dict, dict, dict]:
"""This property is used in unit tests to test initialization from environment
variables. It should return a tuple of three dictionaries that specify the
environment variables, additional initialization args, and expected instance
attributes to check."""
return {}, {}, {}
def test_init(self) -> None:
"""Test model initialization. This should pass for all integrations.
.. dropdown:: Troubleshooting
If this test fails, ensure that:
1. ``chat_model_params`` is specified and the model can be initialized from those params;
2. The model accommodates standard parameters: https://python.langchain.com/docs/concepts/chat_models/#standard-parameters
""" # noqa: E501
model = self.chat_model_class(
**{**self.standard_chat_model_params, **self.chat_model_params}
)
assert model is not None
def test_init_from_env(self) -> None:
"""Test initialization from environment variables. Relies on the
``init_from_env_params`` property. Test is skipped if that property is not
set.
.. dropdown:: Troubleshooting
If this test fails, ensure that ``init_from_env_params`` is specified
correctly.
"""
env_params, model_params, expected_attrs = self.init_from_env_params
if not env_params:
pytest.skip("init_from_env_params not specified.")
else:
with mock.patch.dict(os.environ, env_params):
model = self.chat_model_class(**model_params)
assert model is not None
for k, expected in expected_attrs.items():
actual = getattr(model, k)
if isinstance(actual, SecretStr):
actual = actual.get_secret_value()
assert actual == expected
def test_init_streaming(
self,
) -> None:
"""Test that model can be initialized with ``streaming=True``. This is for
backward-compatibility purposes.
.. dropdown:: Troubleshooting
If this test fails, ensure that the model can be initialized with a
boolean ``streaming`` parameter.
"""
model = self.chat_model_class(
**{
**self.standard_chat_model_params,
**self.chat_model_params,
"streaming": True,
}
)
assert model is not None
def test_bind_tool_pydantic(
self,
model: BaseChatModel,
) -> None:
"""Test that chat model correctly handles Pydantic models that are passed
into ``bind_tools``. Test is skipped if the ``has_tool_calling`` property
on the test class is False.
.. dropdown:: Troubleshooting
If this test fails, ensure that the model's ``bind_tools`` method
properly handles Pydantic V2 models. ``langchain_core`` implements
a utility function that will accommodate most formats: https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html
See example implementation of ``bind_tools`` here: https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.bind_tools
""" # noqa: E501
if not self.has_tool_calling:
return
tools = [my_adder_tool, my_adder]
for pydantic_model in TEST_PYDANTIC_MODELS:
model_schema = (
pydantic_model.model_json_schema()
if hasattr(pydantic_model, "model_json_schema")
else pydantic_model.schema()
)
tools.extend([pydantic_model, model_schema])
# Doing a mypy ignore here since some of the tools are from pydantic
# BaseModel 2 which isn't typed properly yet. This will need to be fixed
# so type checking does not become annoying to users.
tool_model = model.bind_tools(tools, tool_choice="any") # type: ignore
assert isinstance(tool_model, RunnableBinding)
@pytest.mark.parametrize("schema", TEST_PYDANTIC_MODELS)
def test_with_structured_output(
self,
model: BaseChatModel,
schema: Any,
) -> None:
"""Test ``with_structured_output`` method. Test is skipped if the
``has_structured_output`` property on the test class is False.
.. dropdown:: Troubleshooting
If this test fails, ensure that the model's ``bind_tools`` method
properly handles Pydantic V2 models. ``langchain_core`` implements
a utility function that will accommodate most formats: https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html
See example implementation of ``with_structured_output`` here: https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.with_structured_output
""" # noqa: E501
if not self.has_structured_output:
return
assert model.with_structured_output(schema) is not None
def test_standard_params(self, model: BaseChatModel) -> None:
"""Test that model properly generates standard parameters. These are used
for tracing purposes.
.. dropdown:: Troubleshooting
If this test fails, check that the model accommodates standard parameters:
https://python.langchain.com/docs/concepts/chat_models/#standard-parameters
Check also that the model class is named according to convention
(e.g., ``ChatProviderName``).
"""
class ExpectedParams(BaseModelV1):
ls_provider: str
ls_model_name: str
ls_model_type: Literal["chat"]
ls_temperature: Optional[float]
ls_max_tokens: Optional[int]
ls_stop: Optional[List[str]]
ls_params = model._get_ls_params()
try:
ExpectedParams(**ls_params) # type: ignore
except ValidationErrorV1 as e:
pytest.fail(f"Validation error: {e}")
# Test optional params
model = self.chat_model_class(
max_tokens=10,
stop=["test"],
**self.chat_model_params, # type: ignore
)
ls_params = model._get_ls_params()
try:
ExpectedParams(**ls_params) # type: ignore
except ValidationErrorV1 as e:
pytest.fail(f"Validation error: {e}")
def test_serdes(self, model: BaseChatModel, snapshot: SnapshotAssertion) -> None:
"""Test serialization and deserialization of the model. Test is skipped if the
``is_lc_serializable`` property on the chat model class is not overwritten
to return ``True``.
.. dropdown:: Troubleshooting
If this test fails, check that the ``init_from_env_params`` property is
correctly set on the test class.
"""
if not self.chat_model_class.is_lc_serializable():
pytest.skip("Model is not serializable.")
else:
env_params, model_params, expected_attrs = self.init_from_env_params
with mock.patch.dict(os.environ, env_params):
ser = dumpd(model)
assert ser == snapshot(name="serialized")
assert model.dict() == load(dumpd(model)).dict()