diff --git a/libs/standard-tests/langchain_tests/integration_tests/chat_models_v1.py b/libs/standard-tests/langchain_tests/integration_tests/chat_models_v1.py
index ef64ec15a16..e09bd204f3a 100644
--- a/libs/standard-tests/langchain_tests/integration_tests/chat_models_v1.py
+++ b/libs/standard-tests/langchain_tests/integration_tests/chat_models_v1.py
@@ -1,14 +1,20 @@
"""Integration tests for v1 chat models.
This module provides comprehensive integration tests for the new messages and standard
-content block system introduced in ``langchain_core.messages.content_blocks``.
+content block system introduced in ``langchain_core.v1.messages`` and
+``langchain_core.messages.content_blocks``.
"""
-from typing import Any, Union, cast
+import base64
+import json
+from typing import Annotated, Any, Literal, Optional, TypedDict, Union, cast
+from unittest.mock import MagicMock
+import httpx
import langchain_core.messages.content_blocks as types
import pytest
from langchain_core.callbacks import BaseCallbackHandler
+from langchain_core.language_models.fake_chat_models import GenericFakeChatModel
from langchain_core.messages.base import BaseMessage
from langchain_core.messages.content_blocks import (
AudioContentBlock,
@@ -29,19 +35,41 @@ from langchain_core.messages.content_blocks import (
WebSearchCall,
WebSearchResult,
create_audio_block,
+ create_file_block,
create_image_block,
+ create_non_standard_block,
create_plaintext_block,
create_text_block,
+ create_tool_call,
create_video_block,
is_reasoning_block,
is_text_block,
is_tool_call_block,
)
+from langchain_core.output_parsers.string import StrOutputParser
+from langchain_core.prompts.chat import ChatPromptTemplate
from langchain_core.tools import tool
+from langchain_core.tools.base import BaseTool
+from langchain_core.utils.function_calling import (
+ convert_to_json_schema,
+ tool_example_to_messages,
+)
from langchain_core.v1.chat_models import BaseChatModel
-from langchain_core.v1.messages import AIMessage, AIMessageChunk, HumanMessage
+from langchain_core.v1.messages import (
+ AIMessage,
+ AIMessageChunk,
+ HumanMessage,
+ SystemMessage,
+ ToolMessage,
+)
+from pydantic import BaseModel, Field
+from pydantic.v1 import BaseModel as BaseModelV1
+from pydantic.v1 import Field as FieldV1
+from pytest_benchmark.fixture import BenchmarkFixture
+from vcr.cassette import Cassette
from langchain_tests.unit_tests.chat_models_v1 import ChatModelV1Tests
+from langchain_tests.utils.pydantic import PYDANTIC_MAJOR_VERSION
# Content block type definitions for testing
ContentBlock = Union[
@@ -85,11 +113,79 @@ def _get_test_video_base64() -> str:
return "PLACEHOLDER_VIDEO_DATA"
+def _get_joke_class(
+ schema_type: Literal["pydantic", "typeddict", "json_schema"],
+) -> Any:
+ """:private:"""
+
+ class Joke(BaseModel):
+ """Joke to tell user."""
+
+ setup: str = Field(description="question to set up a joke")
+ punchline: str = Field(description="answer to resolve the joke")
+
+ def validate_joke(result: Any) -> bool:
+ return isinstance(result, Joke)
+
+ class JokeDict(TypedDict):
+ """Joke to tell user."""
+
+ setup: Annotated[str, ..., "question to set up a joke"]
+ punchline: Annotated[str, ..., "answer to resolve the joke"]
+
+ def validate_joke_dict(result: Any) -> bool:
+ return all(key in ["setup", "punchline"] for key in result)
+
+ if schema_type == "pydantic":
+ return Joke, validate_joke
+
+ if schema_type == "typeddict":
+ return JokeDict, validate_joke_dict
+
+ if schema_type == "json_schema":
+ return Joke.model_json_schema(), validate_joke_dict
+ msg = "Invalid schema type"
+ raise ValueError(msg)
+
+
+class _TestCallbackHandler(BaseCallbackHandler):
+ options: list[Optional[dict]]
+
+ def __init__(self) -> None:
+ super().__init__()
+ self.options = []
+
+ def on_chat_model_start(
+ self,
+ serialized: Any,
+ messages: Any,
+ *,
+ options: Optional[dict[str, Any]] = None,
+ **kwargs: Any,
+ ) -> None:
+ self.options.append(options)
+
+
+class _MagicFunctionSchema(BaseModel):
+ input: int = Field(..., gt=-1000, lt=1000)
+
+
+@tool(args_schema=_MagicFunctionSchema)
+def magic_function(_input: int) -> int:
+ """Applies a magic function to an input."""
+ return _input + 2
+
+
+@tool
+def magic_function_no_args() -> int:
+ """Calculates a magic function."""
+ return 5
+
+
def _validate_tool_call_message(message: AIMessage) -> None:
"""Validate that a message contains tool calls in content blocks format."""
if isinstance(message.content, list):
- # Check for tool calls in content blocks
tool_call_blocks = [
block
for block in message.content
@@ -97,67 +193,2203 @@ def _validate_tool_call_message(message: AIMessage) -> None:
]
assert len(tool_call_blocks) >= 1
- tool_call = tool_call_blocks[0]
- assert "name" in tool_call
- assert "args" in tool_call
- assert "id" in tool_call
- # TODO: review if this is necessary
- # else:
- # # Fallback to legacy tool_calls attribute
- # assert hasattr(message, "tool_calls")
- # assert len(message.tool_calls) >= 1
+ for tool_call in tool_call_blocks:
+ # Ensure each tool call has the required fields
+ assert "name" in tool_call
+ assert "args" in tool_call
+ assert "id" in tool_call
+ # (No fallback, since the tools attribute makes the same search as the list
+ # comprehension above)
-def _validate_multimodal_content_blocks(
- message: BaseMessage, expected_types: list[str]
-) -> None:
- """Validate that a message contains expected content block types."""
- assert isinstance(message, (HumanMessage, AIMessage))
- assert isinstance(message.content, list)
+def _validate_tool_call_message_no_args(message: AIMessage) -> None:
+ """Validate that a message contains a single tool call with no arguments.
- found_types = []
- for block in message.content:
- if isinstance(block, dict) and "type" in block:
- found_types.append(block["type"])
+ Used for testing tool calls without arguments, such as
+ ``magic_function_no_args``.
+ """
+ assert isinstance(message, AIMessage)
+ assert len(message.tool_calls) == 1
+ tool_call = message.tool_calls[0]
+ assert tool_call["name"] == "magic_function_no_args"
+ assert tool_call["args"] == {}
+ assert tool_call["id"] is not None
- for type_ in expected_types:
- assert type_ in found_types, f"Expected content block type '{type_}' not found"
+
+@tool
+def unicode_customer(customer_name: str, description: str) -> str:
+ """Tool for creating a customer with a name containing Unicode characters.
+
+ Args:
+ customer_name: The customer's name in their native language.
+ description: Description of the customer.
+
+ Returns:
+ A confirmation message about the customer creation.
+ """
+ return f"Created customer: {customer_name} - {description}"
class ChatModelV1IntegrationTests(ChatModelV1Tests):
- """Integration tests for v1 chat models with standard content blocks support.
+ """Base class for v1 chat model integration tests.
- Inherits from ``ChatModelV1Tests`` to provide comprehensive testing of content
- block functionality with real external services.
- """
+ TODO: verify this entire docstring!
+
+ Test subclasses must implement the ``chat_model_class`` and
+ ``chat_model_params`` properties to specify what model to test and its
+ initialization parameters.
+
+ Example:
+
+ .. code-block:: python
+
+ from typing import Type
+
+ from langchain_tests.integration_tests import ChatModelV1IntegrationTests
+ from my_package.chat_models import MyChatModel
+
+
+ class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+ @property
+ def chat_model_class(self) -> Type[MyV1ChatModel]:
+ # Return the chat model class to test here
+ return MyChatModel
+
+ @property
+ def chat_model_params(self) -> dict:
+ # Return initialization parameters for the v1 model.
+ return {"model": "model-001", "temperature": 0}
+
+ .. note::
+ API references for individual test methods include troubleshooting tips.
+
+
+ Test subclasses **must** implement the following two properties:
+
+ chat_model_class
+ The chat model class to test, e.g., ``ChatParrotLinkV1``.
+
+ Example:
+
+ .. code-block:: python
+
+ @property
+ def chat_model_class(self) -> Type[ChatParrotLinkV1]:
+ return ChatParrotLinkV1
+
+ chat_model_params
+ Initialization parameters for the chat model.
+
+ Example:
+
+ .. code-block:: python
+
+ @property
+ def chat_model_params(self) -> dict:
+ return {"model": "bird-brain-001", "temperature": 0}
+
+ In addition, test subclasses can control what features are tested (such as tool
+ calling or multi-modality) by selectively overriding the following properties.
+ Expand to see details:
+
+ .. dropdown:: has_tool_calling
+
+ TODO
+
+ .. dropdown:: tool_choice_value
+
+ TODO
+
+ .. dropdown:: has_tool_choice
+
+ TODO
+
+ .. dropdown:: has_structured_output
+
+ TODO
+
+ .. dropdown:: structured_output_kwargs
+
+ TODO
+
+ .. dropdown:: supports_json_mode
+
+ TODO
+
+ .. dropdown:: returns_usage_metadata
+
+ TODO
+
+ .. dropdown:: supports_anthropic_inputs
+
+ TODO
+
+ .. dropdown:: supports_image_tool_message
+
+ TODO
+
+ .. dropdown:: supported_usage_metadata_details
+
+ TODO
+
+ .. dropdown:: enable_vcr_tests
+
+ Property controlling whether to enable select tests that rely on
+ `VCR `_ caching of HTTP calls, such
+ as benchmarking tests.
+
+ To enable these tests, follow these steps:
+
+ 1. Override the ``enable_vcr_tests`` property to return ``True``:
+
+ .. code-block:: python
+
+ @property
+ def enable_vcr_tests(self) -> bool:
+ return True
+
+ 2. Configure VCR to exclude sensitive headers and other information from cassettes.
+
+ .. important::
+ VCR will by default record authentication headers and other sensitive
+ information in cassettes. Read below for how to configure what
+ information is recorded in cassettes.
+
+ To add configuration to VCR, add a ``conftest.py`` file to the ``tests/``
+ directory and implement the ``vcr_config`` fixture there.
+
+ ``langchain-tests`` excludes the headers ``'authorization'``,
+ ``'x-api-key'``, and ``'api-key'`` from VCR cassettes. To pick up this
+ configuration, you will need to add ``conftest.py`` as shown below. You can
+ also exclude additional headers, override the default exclusions, or apply
+ other customizations to the VCR configuration. See example below:
+
+ .. code-block:: python
+ :caption: tests/conftest.py
+
+ import pytest
+ from langchain_tests.conftest import _base_vcr_config as _base_vcr_config
+
+ _EXTRA_HEADERS = [
+ # Specify additional headers to redact
+ ("user-agent", "PLACEHOLDER"),
+ ]
+
+
+ def remove_response_headers(response: dict) -> dict:
+ # If desired, remove or modify headers in the response.
+ response["headers"] = {}
+ return response
+
+
+ @pytest.fixture(scope="session")
+ def vcr_config(_base_vcr_config: dict) -> dict: # noqa: F811
+ \"\"\"Extend the default configuration from langchain_tests.\"\"\"
+ config = _base_vcr_config.copy()
+ config.setdefault("filter_headers", []).extend(_EXTRA_HEADERS)
+ config["before_record_response"] = remove_response_headers
+
+ return config
+
+ .. dropdown:: Compressing cassettes
+
+ ``langchain-tests`` includes a custom VCR serializer that compresses
+ cassettes using gzip. To use it, register the ``yaml.gz`` serializer
+ to your VCR fixture and enable this serializer in the config. See
+ example below:
+
+ .. code-block:: python
+ :caption: tests/conftest.py
+
+ import pytest
+ from langchain_tests.conftest import CustomPersister, CustomSerializer
+ from langchain_tests.conftest import _base_vcr_config as _base_vcr_config
+ from vcr import VCR
+
+ _EXTRA_HEADERS = [
+ # Specify additional headers to redact
+ ("user-agent", "PLACEHOLDER"),
+ ]
+
+
+ def remove_response_headers(response: dict) -> dict:
+ # If desired, remove or modify headers in the response.
+ response["headers"] = {}
+ return response
+
+
+ @pytest.fixture(scope="session")
+ def vcr_config(_base_vcr_config: dict) -> dict: # noqa: F811
+ \"\"\"Extend the default configuration from langchain_tests.\"\"\"
+ config = _base_vcr_config.copy()
+ config.setdefault("filter_headers", []).extend(_EXTRA_HEADERS)
+ config["before_record_response"] = remove_response_headers
+ # New: enable serializer and set file extension
+ config["serializer"] = "yaml.gz"
+ config["path_transformer"] = VCR.ensure_suffix(".yaml.gz")
+
+ return config
+
+
+ def pytest_recording_configure(config: dict, vcr: VCR) -> None:
+ vcr.register_persister(CustomPersister())
+ vcr.register_serializer("yaml.gz", CustomSerializer())
+
+
+ You can inspect the contents of the compressed cassettes (e.g., to
+ ensure no sensitive information is recorded) using
+
+ .. code-block:: bash
+
+ gunzip -k /path/to/tests/cassettes/TestClass_test.yaml.gz
+
+ or by using the serializer:
+
+ .. code-block:: python
+
+ from langchain_tests.conftest import CustomPersister, CustomSerializer
+
+ cassette_path = "/path/to/tests/cassettes/TestClass_test.yaml.gz"
+ requests, responses = CustomPersister().load_cassette(path, CustomSerializer())
+
+ 3. Run tests to generate VCR cassettes.
+
+ Example:
+
+ .. code-block:: bash
+
+ uv run python -m pytest tests/integration_tests/test_chat_models.py::TestMyModel::test_stream_time
+
+ This will generate a VCR cassette for the test in
+ ``tests/integration_tests/cassettes/``.
+
+ .. important::
+ You should inspect the generated cassette to ensure that it does not
+ contain sensitive information. If it does, you can modify the
+ ``vcr_config`` fixture to exclude headers or modify the response
+ before it is recorded.
+
+ You can then commit the cassette to your repository. Subsequent test runs
+ will use the cassette instead of making HTTP calls.
+
+ """ # noqa: E501
- # Additional multimodal support properties for integration testing
@property
- def supports_multimodal_reasoning(self) -> bool:
- """Whether the model can reason about multimodal content."""
- return (
- self.supports_image_content_blocks
- and self.supports_reasoning_content_blocks
+ def standard_chat_model_params(self) -> dict:
+ """:private:"""
+ return {}
+
+ def test_invoke(self, model: BaseChatModel) -> None:
+ """Test to verify that ``model.invoke(simple_message)`` works.
+
+ A model should be able to produce a non-empty ``AIMessage`` in response to
+ ``"Hello"``. The message should at least contain a ``TextContentBlock`` with
+ text populated.
+
+ .. important::
+ This should pass for all integrations!
+
+ .. dropdown:: Troubleshooting
+
+ TODO
+
+ """
+ result = model.invoke("Hello")
+ assert result is not None
+ assert isinstance(result, AIMessage)
+ assert isinstance(result.text, str)
+ assert len(result.content) > 0
+
+ text_contentblock = result.content[0]
+ assert is_text_block(text_contentblock)
+
+ async def test_ainvoke(self, model: BaseChatModel) -> None:
+ """Test to verify that ``await model.ainvoke(simple_message)`` works.
+
+ A model should be able to produce a non-empty ``AIMessage`` in response to
+ ``"Hello"``. The message should at least contain a ``TextContentBlock`` with
+ text populated.
+
+ .. important::
+ This should pass for all integrations!
+
+ Passing this test does not indicate a "natively async" implementation, but
+ rather that the model can be used in an async context.
+
+ .. dropdown:: Troubleshooting
+
+ First, debug
+ :meth:`~langchain_tests.integration_tests.chat_models_v1.ChatModelV1IntegrationTests.test_invoke`.
+ because ``ainvoke`` has a default implementation that calls ``invoke`` in an
+ async context.
+
+ """
+ result = await model.ainvoke("Hello")
+ assert result is not None
+ assert isinstance(result, AIMessage)
+ assert isinstance(result.text, str)
+ assert len(result.content) > 0
+
+ text_contentblock = result.content[0]
+ assert is_text_block(text_contentblock)
+
+ def test_stream(self, model: BaseChatModel) -> None:
+ """Test to verify that ``model.stream(simple_message)`` works.
+
+ .. important::
+ This should pass for all integrations!
+
+ Passing this test does not indicate a "streaming" implementation, but rather
+ that the model can be used in a streaming context. For instance, a model
+ that yields at least one chunk in response to ``"Hello"``.
+
+ .. dropdown:: Troubleshooting
+
+ First, debug
+ :meth:`~langchain_tests.integration_tests.chat_models_v1.ChatModelV1IntegrationTests.test_invoke`.
+ because ``stream`` has a default implementation that calls ``invoke`` and
+ yields the result as a single chunk.
+
+ """
+ num_chunks = 0
+ for chunk in model.stream("Hello"):
+ assert chunk is not None
+ assert isinstance(chunk, AIMessageChunk)
+ assert isinstance(chunk.content, list)
+ num_chunks += 1
+ assert num_chunks > 0
+
+ async def test_astream(self, model: BaseChatModel) -> None:
+ """Test to verify that ``await model.astream(simple_message)`` works.
+
+ .. important::
+ This should pass for all integrations!
+
+ Passing this test does not indicate a "natively async" or "streaming"
+ implementation, but rather that the model can be used in an async streaming
+ context.
+
+ .. dropdown:: Troubleshooting
+
+ First, debug
+ :meth:`~langchain_tests.integration_tests.chat_models_v1.ChatModelV1IntegrationTests.test_stream`.
+ and
+ :meth:`~langchain_tests.integration_tests.chat_models_v1.ChatModelV1IntegrationTests.test_ainvoke`.
+ because ``astream`` has a default implementation that calls ``_stream`` in
+ an async context if it is implemented, or ``ainvoke`` and yields the result
+ as a single ``AIMessageChunk`` chunk if not.
+
+ """
+ num_chunks = 0
+ async for chunk in model.astream("Hello"):
+ assert chunk is not None
+ assert isinstance(chunk, AIMessageChunk)
+ assert isinstance(chunk.content, list)
+ num_chunks += 1
+ assert num_chunks > 0
+
+ def test_batch(self, model: BaseChatModel) -> None:
+ """Test to verify that ``model.batch([messages])`` works.
+
+ .. important::
+ This should pass for all integrations!
+
+ Tests the model's ability to process multiple prompts in a single batch. We
+ expect that the ``TextContentBlock`` of each response is populated with text.
+
+ Passing this test does not indicate a "natively batching" or "batching"
+ implementation, but rather that the model can be used in a batching context. For
+ instance, your model may internally call ``invoke`` for each message in the
+ batch, even if the model provider does not support batching natively.
+
+ .. dropdown:: Troubleshooting
+
+ First, debug
+ :meth:`~langchain_tests.integration_tests.chat_models_v1.ChatModelV1IntegrationTests.test_invoke`
+ because ``batch`` has a default implementation that calls ``invoke`` for
+ each message in the batch.
+
+ If that test passes but not this one, you should make sure your ``batch``
+ method does not raise any exceptions, and that it returns a list of valid
+ :class:`~langchain_core.v1.messages.AIMessage` objects.
+
+ """
+ batch_results = model.batch(["Hello", "Hey"])
+ assert batch_results is not None
+ assert isinstance(batch_results, list)
+ assert len(batch_results) == 2
+ for result in batch_results:
+ assert result is not None
+ assert isinstance(result, AIMessage)
+ assert len(result.content) > 0
+ assert isinstance(result.text, str)
+ assert len(result.text) > 0
+
+ async def test_abatch(self, model: BaseChatModel) -> None:
+ """Test to verify that ``await model.abatch([messages])`` works.
+
+ .. important::
+ This should pass for all integrations!
+
+ Tests the model's ability to process multiple prompts in a single batch
+ asynchronously. We expect that the ``TextContentBlock`` of each response is
+ populated with text.
+
+ Passing this test does not indicate a "natively batching" or "batching"
+ implementation, but rather that the model can be used in a batching context. For
+ instance, your model may internally call ``ainvoke`` for each message in the
+ batch, even if the model provider does not support batching natively.
+
+ .. dropdown:: Troubleshooting
+
+ First, debug
+ :meth:`~langchain_tests.integration_tests.chat_models_v1.ChatModelV1IntegrationTests.test_batch`
+ and
+ :meth:`~langchain_tests.integration_tests.chat_models_v1.ChatModelV1IntegrationTests.test_ainvoke`
+ because ``abatch`` has a default implementation that calls ``ainvoke`` for
+ each message in the batch.
+
+ If those tests pass but not this one, you should make sure your ``abatch``
+ method does not raise any exceptions, and that it returns a list of valid
+ :class:`~langchain_core.v1.messages.AIMessage` objects.
+
+ """
+ batch_results = await model.abatch(["Hello", "Hey"])
+ assert batch_results is not None
+ assert isinstance(batch_results, list)
+ assert len(batch_results) == 2
+ for result in batch_results:
+ assert result is not None
+ assert isinstance(result, AIMessage)
+ assert len(result.content) > 0
+ assert isinstance(result.text, str)
+ assert len(result.text) > 0
+
+ def test_conversation(self, model: BaseChatModel) -> None:
+ """Test to verify that the model can handle multi-turn conversations.
+
+ .. important::
+ This should pass for all integrations!
+
+ Tests the model's ability to process a sequence of alternating human and AI
+ messages as context for generating the next response. We expect that the
+ ``TextContentBlock`` of each response is populated with text.
+
+ .. dropdown:: Troubleshooting
+
+ First, debug
+ :meth:`~langchain_tests.integration_tests.chat_models_v1.ChatModelV1IntegrationTests.test_invoke`
+ because this test also uses ``model.invoke()``.
+
+ If that test passes but not this one, you should verify that:
+ 1. Your model correctly processes the message history
+ 2. The model maintains appropriate context from previous messages
+ 3. The response is a valid :class:`~langchain_core.v1.messages.AIMessage`
+
+ """
+ messages = [
+ HumanMessage("hello"),
+ AIMessage("hello"),
+ HumanMessage("how are you"),
+ ]
+ result = model.invoke(messages)
+ assert result is not None
+ assert isinstance(result, AIMessage)
+ assert len(result.content) > 0
+ assert isinstance(result.text, str)
+ assert len(result.text) > 0
+
+ def test_double_messages_conversation(self, model: BaseChatModel) -> None:
+ """Test to verify that the model can handle double-message conversations.
+
+ .. important::
+ This should pass for all integrations!
+
+ Tests the model's ability to process a sequence of double-system, double-human,
+ and double-ai messages as context for generating the next response. We expect
+ that the ``TextContentBlock`` of each response is populated with text.
+
+ .. dropdown:: Troubleshooting
+
+ First, debug
+ :meth:`~langchain_tests.integration_tests.chat_models_v1.ChatModelV1IntegrationTests.test_invoke`
+ because this test also uses ``model.invoke()``.
+
+ Second, debug
+ :meth:`~langchain_tests.integration_tests.chat_models_v1.ChatModelV1IntegrationTests.test_conversation`
+ because this test is the "basic case" without double messages.
+
+ If that test passes those but not this one, you should verify that:
+ 1. Your model API can handle double messages, or the integration should merge messages before sending them to the API.
+ 2. The response is a valid :class:`~langchain_core.v1.messages.AIMessage`
+
+ """ # noqa: E501
+ messages = [
+ SystemMessage("hello"),
+ SystemMessage("hello"),
+ HumanMessage("hello"),
+ HumanMessage("hello"),
+ AIMessage("hello"),
+ AIMessage("hello"),
+ HumanMessage("how are you"),
+ ]
+ result = model.invoke(messages)
+ assert result is not None
+ assert isinstance(result, AIMessage)
+ assert len(result.content) > 0
+ assert isinstance(result.text, str)
+ assert len(result.text) > 0
+
+ def test_usage_metadata(self, model: BaseChatModel) -> None:
+ """Test to verify that the model returns correct usage metadata.
+
+ This test is optional and should be skipped if the model does not return
+ usage metadata (see Configuration below).
+
+ .. versionchanged:: 0.3.17
+
+ Additionally check for the presence of ``model_name`` in the response
+ metadata, which is needed for usage tracking in callback handlers.
+
+ .. dropdown:: Configuration
+
+ By default, this test is run.
+
+ To disable this feature, set the ``returns_usage_metadata`` property to
+ ``False`` in your test class:
+
+ .. code-block:: python
+
+ class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+ @property
+ def returns_usage_metadata(self) -> bool:
+ return False
+
+ This test can also check the format of specific kinds of usage metadata
+ based on the ``supported_usage_metadata_details`` property. This property
+ should be configured as follows with the types of tokens that the model
+ supports tracking:
+
+ TODO: check this!
+
+ .. code-block:: python
+
+ class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+ @property
+ def supported_usage_metadata_details(self) -> dict:
+ return {
+ "invoke": [
+ "audio_input",
+ "audio_output",
+ "reasoning_output",
+ "cache_read_input",
+ "cache_creation_input",
+ ],
+ "stream": [
+ "audio_input",
+ "audio_output",
+ "reasoning_output",
+ "cache_read_input",
+ "cache_creation_input",
+ ],
+ }
+
+
+ .. dropdown:: Troubleshooting
+
+ TODO
+
+ """
+ if not self.returns_usage_metadata:
+ pytest.skip("Not implemented.")
+
+ result = model.invoke("Hello")
+ assert result is not None
+ assert isinstance(result, AIMessage)
+
+ assert result.usage_metadata is not None
+ assert isinstance(result.usage_metadata["input_tokens"], int)
+ assert isinstance(result.usage_metadata["output_tokens"], int)
+ assert isinstance(result.usage_metadata["total_tokens"], int)
+
+ # Check model_name is in response_metadata
+ # (Needed for langchain_core.callbacks.usage)
+ model_name = result.response_metadata.get("model_name")
+ assert isinstance(model_name, str)
+ assert model_name != "", "model_name is empty"
+
+ # TODO: check these
+ # `input_tokens` is the total, possibly including other unclassified or
+ # system-level tokens.
+ if "audio_input" in self.supported_usage_metadata_details["invoke"]:
+ # Checks if the specific chat model integration being tested has declared
+ # that it supports reporting token counts specifically for `audio_input`
+ msg = self.invoke_with_audio_input() # To be implemented in test subclass
+ assert (usage_metadata := msg.usage_metadata) is not None
+ assert (
+ input_token_details := usage_metadata.get("input_token_details")
+ ) is not None
+ assert isinstance(input_token_details.get("audio"), int)
+ # Asserts that total input tokens are at least the sum of the token counts
+ total_detailed_tokens = sum(
+ v for v in input_token_details.values() if isinstance(v, int)
+ )
+ assert usage_metadata.get("input_tokens", 0) >= total_detailed_tokens
+ if "audio_output" in self.supported_usage_metadata_details["invoke"]:
+ msg = self.invoke_with_audio_output()
+ assert (usage_metadata := msg.usage_metadata) is not None
+ assert (
+ output_token_details := usage_metadata.get("output_token_details")
+ ) is not None
+ assert isinstance(output_token_details.get("audio"), int)
+ # Asserts that total output tokens are at least the sum of the token counts
+ total_detailed_tokens = sum(
+ v for v in output_token_details.values() if isinstance(v, int)
+ )
+ assert usage_metadata.get("output_tokens", 0) >= total_detailed_tokens
+ if "reasoning_output" in self.supported_usage_metadata_details["invoke"]:
+ msg = self.invoke_with_reasoning_output()
+ assert (usage_metadata := msg.usage_metadata) is not None
+ assert (
+ output_token_details := usage_metadata.get("output_token_details")
+ ) is not None
+ assert isinstance(output_token_details.get("reasoning"), int)
+ # Asserts that total output tokens are at least the sum of the token counts
+ total_detailed_tokens = sum(
+ v for v in output_token_details.values() if isinstance(v, int)
+ )
+ assert usage_metadata.get("output_tokens", 0) >= total_detailed_tokens
+ if "cache_read_input" in self.supported_usage_metadata_details["invoke"]:
+ msg = self.invoke_with_cache_read_input()
+ assert (usage_metadata := msg.usage_metadata) is not None
+ assert (
+ input_token_details := usage_metadata.get("input_token_details")
+ ) is not None
+ assert isinstance(input_token_details.get("cache_read"), int)
+ # Asserts that total input tokens are at least the sum of the token counts
+ total_detailed_tokens = sum(
+ v for v in input_token_details.values() if isinstance(v, int)
+ )
+ assert usage_metadata.get("input_tokens", 0) >= total_detailed_tokens
+ if "cache_creation_input" in self.supported_usage_metadata_details["invoke"]:
+ msg = self.invoke_with_cache_creation_input()
+ assert (usage_metadata := msg.usage_metadata) is not None
+ assert (
+ input_token_details := usage_metadata.get("input_token_details")
+ ) is not None
+ assert isinstance(input_token_details.get("cache_creation"), int)
+ # Asserts that total input tokens are at least the sum of the token counts
+ total_detailed_tokens = sum(
+ v for v in input_token_details.values() if isinstance(v, int)
+ )
+ assert usage_metadata.get("input_tokens", 0) >= total_detailed_tokens
+
+ def test_usage_metadata_streaming(self, model: BaseChatModel) -> None:
+ """Test usage metadata in streaming mode.
+
+ Test to verify that the model returns correct usage metadata in streaming mode.
+
+ .. versionchanged:: 0.3.17
+
+ Additionally check for the presence of ``model_name`` in the response
+ metadata, which is needed for usage tracking in callback handlers.
+
+ .. dropdown:: Configuration
+
+ By default, this test is run.
+ To disable this feature, set ``returns_usage_metadata`` to ``False`` in your
+ test class:
+
+ .. code-block:: python
+
+ class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+ @property
+ def returns_usage_metadata(self) -> bool:
+ return False
+
+ This test can also check the format of specific kinds of usage metadata
+ based on the ``supported_usage_metadata_details`` property. This property
+ should be configured as follows with the types of tokens that the model
+ supports tracking:
+
+ TODO: check this!
+
+ .. code-block:: python
+
+ class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+ @property
+ def supported_usage_metadata_details(self) -> dict:
+ return {
+ "invoke": [
+ "audio_input",
+ "audio_output",
+ "reasoning_output",
+ "cache_read_input",
+ "cache_creation_input",
+ ],
+ "stream": [
+ "audio_input",
+ "audio_output",
+ "reasoning_output",
+ "cache_read_input",
+ "cache_creation_input",
+ ],
+ }
+
+ .. dropdown:: Troubleshooting
+
+ TODO
+
+ """
+ if not self.returns_usage_metadata:
+ pytest.skip("Not implemented.")
+
+ full: Optional[AIMessageChunk] = None
+ for chunk in model.stream("Write me 2 haikus. Only include the haikus."):
+ assert isinstance(chunk, AIMessageChunk)
+ # Only one chunk is allowed to set usage_metadata.input_tokens
+ # if multiple do, it's likely a bug that will result in overcounting
+ # input tokens (since the total number of input tokens applies to the full
+ # generation, not individual chunks)
+ if full and full.usage_metadata and full.usage_metadata["input_tokens"]:
+ assert (
+ not chunk.usage_metadata or not chunk.usage_metadata["input_tokens"]
+ ), (
+ "Only one chunk should set input_tokens,"
+ " the rest should be 0 or None"
+ )
+ full = chunk if full is None else cast("AIMessageChunk", full + chunk)
+
+ assert isinstance(full, AIMessageChunk)
+ assert full.usage_metadata is not None
+ assert isinstance(full.usage_metadata["input_tokens"], int)
+ assert isinstance(full.usage_metadata["output_tokens"], int)
+ assert isinstance(full.usage_metadata["total_tokens"], int)
+
+ # Check model_name is in response_metadata
+ # (Needed for langchain_core.callbacks.usage)
+ model_name = full.response_metadata.get("model_name")
+ assert isinstance(model_name, str)
+ assert model_name != "", "model_name is empty"
+
+ # TODO: check these
+ if "audio_input" in self.supported_usage_metadata_details["stream"]:
+ msg = self.invoke_with_audio_input(stream=True)
+ assert msg.usage_metadata is not None
+ assert isinstance(
+ msg.usage_metadata.get("input_token_details", {}).get("audio"), int
+ )
+ if "audio_output" in self.supported_usage_metadata_details["stream"]:
+ msg = self.invoke_with_audio_output(stream=True)
+ assert msg.usage_metadata is not None
+ assert isinstance(
+ msg.usage_metadata.get("output_token_details", {}).get("audio"), int
+ )
+ if "reasoning_output" in self.supported_usage_metadata_details["stream"]:
+ msg = self.invoke_with_reasoning_output(stream=True)
+ assert msg.usage_metadata is not None
+ assert isinstance(
+ msg.usage_metadata.get("output_token_details", {}).get("reasoning"), int
+ )
+ if "cache_read_input" in self.supported_usage_metadata_details["stream"]:
+ msg = self.invoke_with_cache_read_input(stream=True)
+ assert msg.usage_metadata is not None
+ assert isinstance(
+ msg.usage_metadata.get("input_token_details", {}).get("cache_read"), int
+ )
+ if "cache_creation_input" in self.supported_usage_metadata_details["stream"]:
+ msg = self.invoke_with_cache_creation_input(stream=True)
+ assert msg.usage_metadata is not None
+ assert isinstance(
+ msg.usage_metadata.get("input_token_details", {}).get("cache_creation"),
+ int,
+ )
+
+ def test_stop_sequence(self, model: BaseChatModel) -> None:
+ """Test that model does not fail when invoked with the ``stop`` parameter,
+ which is a standard parameter for stopping generation at a certain token.
+
+ `More on standard parameters `__
+
+ .. important::
+ This should pass for all integrations!
+
+ .. dropdown:: Troubleshooting
+
+ TODO
+
+ """
+ result = model.invoke("hi", stop=["you"])
+ assert isinstance(result, AIMessage)
+
+ custom_model = self.chat_model_class(
+ **{
+ **self.chat_model_params,
+ "stop": ["you"],
+ }
+ )
+ result = custom_model.invoke("hi")
+ assert isinstance(result, AIMessage)
+
+ def test_tool_calling(self, model: BaseChatModel) -> None:
+ """Test that the model generates tool calls. This test is skipped if the
+ ``has_tool_calling`` property on the test class is set to False.
+
+ This test is optional and should be skipped if the model does not support
+ tool calling (see Configuration below).
+
+ .. dropdown:: Configuration
+
+ To disable tool calling tests, set ``has_tool_calling`` to False in your
+ test class:
+
+ .. code-block:: python
+
+ class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+ @property
+ def has_tool_calling(self) -> bool:
+ return False
+
+ .. dropdown:: Troubleshooting
+
+ If this test fails, check that ``bind_tools`` is implemented to correctly
+ translate LangChain tool objects into the appropriate schema for your
+ chat model.
+
+ This test may fail if the chat model does not support a ``tool_choice``
+ parameter. This parameter can be used to force a tool call. If
+ ``tool_choice`` is not supported and the model consistently fails this
+ test, you can ``xfail`` the test:
+
+ .. code-block:: python
+
+ @pytest.mark.xfail(reason=("Does not support tool_choice."))
+ def test_tool_calling(self, model: BaseChatModelV1) -> None:
+ super().test_tool_calling(model)
+
+ Otherwise, in the case that only one tool is bound, ensure that
+ ``tool_choice`` supports the string ``'any'`` to force calling that tool.
+
+ """
+ if not self.has_tool_calling:
+ pytest.skip("Test requires tool calling.")
+
+ tool_choice_value = None if not self.has_tool_choice else "any"
+ # Emit warning if tool_choice_value property is overridden
+
+ # TODO remove since deprecated?
+ # if inspect.getattr_static(
+ # self, "tool_choice_value"
+ # ) is not inspect.getattr_static(
+ # ChatModelV1IntegrationTests, "tool_choice_value"
+ # ):
+ # warn_deprecated(
+ # "0.3.15",
+ # message=(
+ # "`tool_choice_value` will be removed in version 0.3.20. If a "
+ # "model supports `tool_choice`, it should accept `tool_choice='any' " # noqa: E501
+ # "and `tool_choice=`. If the model does not "
+ # "support `tool_choice`, override the `supports_tool_choice` "
+ # "property to return `False`."
+ # ),
+ # removal="0.3.20",
+ # )
+
+ model_with_tools = model.bind_tools(
+ [magic_function], tool_choice=tool_choice_value
+ )
+ query = "What is the value of magic_function(3)? Use the tool."
+ result = model_with_tools.invoke(query)
+ _validate_tool_call_message(result)
+
+ # Test stream()
+ full: Optional[AIMessageChunk] = None
+ for chunk in model_with_tools.stream(query):
+ full = chunk if full is None else full + chunk
+ assert isinstance(full, AIMessage)
+ _validate_tool_call_message(full)
+
+ async def test_tool_calling_async(self, model: BaseChatModel) -> None:
+ """Test that the model generates tool calls. This test is skipped if the
+ ``has_tool_calling`` property on the test class is set to False.
+
+ This test is optional and should be skipped if the model does not support
+ tool calling (see Configuration below).
+
+ .. dropdown:: Configuration
+
+ To disable tool calling tests, set ``has_tool_calling`` to False in your
+ test class:
+
+ .. code-block:: python
+
+ class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+ @property
+ def has_tool_calling(self) -> bool:
+ return False
+
+ .. dropdown:: Troubleshooting
+
+ If this test fails, check that ``bind_tools`` is implemented to correctly
+ translate LangChain tool objects into the appropriate schema for your
+ chat model.
+
+ This test may fail if the chat model does not support a ``tool_choice``
+ parameter. This parameter can be used to force a tool call. If
+ ``tool_choice`` is not supported and the model consistently fails this
+ test, you can ``xfail`` the test:
+
+ .. code-block:: python
+
+ @pytest.mark.xfail(reason=("Does not support tool_choice."))
+ async def test_tool_calling_async(self, model: BaseChatModelV1) -> None:
+ await super().test_tool_calling_async(model)
+
+ Otherwise, in the case that only one tool is bound, ensure that
+ ``tool_choice`` supports the string ``'any'`` to force calling that tool.
+
+ """
+ if not self.has_tool_calling:
+ pytest.skip("Test requires tool calling.")
+
+ tool_choice_value = None if not self.has_tool_choice else "any"
+ model_with_tools = model.bind_tools(
+ [magic_function], tool_choice=tool_choice_value
+ )
+ query = "What is the value of magic_function(3)? Use the tool."
+ result = await model_with_tools.ainvoke(query)
+ _validate_tool_call_message(result)
+
+ # Test astream()
+ full: Optional[AIMessageChunk] = None
+ async for chunk in model_with_tools.astream(query):
+ full = chunk if full is None else full + chunk
+ assert isinstance(full, AIMessage)
+ _validate_tool_call_message(full)
+
+ def test_bind_runnables_as_tools(self, model: BaseChatModel) -> None:
+ """Test that the model generates tool calls for tools that are derived from
+ LangChain runnables. This test is skipped if the ``has_tool_calling`` property
+ on the test class is set to False.
+
+ This test is optional and should be skipped if the model does not support
+ tool calling (see Configuration below).
+
+ .. dropdown:: Configuration
+
+ To disable tool calling tests, set ``has_tool_calling`` to False in your
+ test class:
+
+ .. code-block:: python
+
+ class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+ @property
+ def has_tool_calling(self) -> bool:
+ return False
+
+ .. dropdown:: Troubleshooting
+
+ If this test fails, check that ``bind_tools`` is implemented to correctly
+ translate LangChain tool objects into the appropriate schema for your
+ chat model.
+
+ This test may fail if the chat model does not support a ``tool_choice``
+ parameter. This parameter can be used to force a tool call. If
+ ``tool_choice`` is not supported and the model consistently fails this
+ test, you can ``xfail`` the test:
+
+ .. code-block:: python
+
+ @pytest.mark.xfail(reason=("Does not support tool_choice."))
+ def test_bind_runnables_as_tools(self, model: BaseChatModelV1) -> None:
+ super().test_bind_runnables_as_tools(model)
+
+ Otherwise, ensure that the ``tool_choice_value`` property is correctly
+ specified on the test class.
+
+ """
+ if not self.has_tool_calling:
+ pytest.skip("Test requires tool calling.")
+
+ prompt = ChatPromptTemplate.from_messages(
+ [("human", "Hello. Please respond in the style of {answer_style}.")]
+ )
+ llm = GenericFakeChatModel(messages=iter(["hello matey"]))
+ chain = prompt | llm | StrOutputParser()
+ tool_ = chain.as_tool(
+ name="greeting_generator",
+ description="Generate a greeting in a particular style of speaking.",
)
- @property
- def supports_code_interpreter(self) -> bool:
- """Whether the model supports code interpreter blocks."""
- return False
+ if self.has_tool_choice:
+ tool_choice: Optional[str] = "any"
+ else:
+ tool_choice = None
- @property
- def supports_structured_citations(self) -> bool:
- """Whether the model supports structured citation generation."""
- return self.supports_citations
+ model_with_tools = model.bind_tools([tool_], tool_choice=tool_choice)
+ query = "Using the tool, generate a Pirate greeting."
+ result = model_with_tools.invoke(query)
+ assert isinstance(result, AIMessage)
+ assert result.tool_calls
+ tool_call = result.tool_calls[0]
+ assert tool_call["args"].get(
+ "answer_style"
+ ) # TODO: do we need to handle if args is str? # noqa: E501
+ assert is_tool_call_block(tool_call)
- @property
- def requires_api_key(self) -> bool:
- """Whether integration tests require an API key."""
- return True
+ def test_tool_message_histories_string_content(
+ self, model: BaseChatModel, my_adder_tool: BaseTool
+ ) -> None:
+ """Test that message histories are compatible with string tool contents
+ (e.g. OpenAI format). If a model passes this test, it should be compatible
+ with messages generated from providers following OpenAI format.
+
+ This test should be skipped if the model does not support tool calling
+ (see Configuration below).
+
+ .. dropdown:: Configuration
+
+ To disable tool calling tests, set ``has_tool_calling`` to False in your
+ test class:
+
+ .. code-block:: python
+
+ class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+ @property
+ def has_tool_calling(self) -> bool:
+ return False
+
+ .. dropdown:: Troubleshooting
+
+ TODO: verify this!
+
+ If this test fails, check that:
+
+ 1. The model can correctly handle message histories that include ``AIMessage`` objects with ``""`` ``TextContentBlock``s.
+ 2. The ``tool_calls`` attribute on ``AIMessage`` objects is correctly handled and passed to the model in an appropriate format.
+ 3. The model can correctly handle ``ToolMessage`` objects with string content and arbitrary string values for ``tool_call_id``.
+
+ You can ``xfail`` the test if tool calling is implemented but this format
+ is not supported.
+
+ .. code-block:: python
+
+ @pytest.mark.xfail(reason=("Not implemented."))
+ def test_tool_message_histories_string_content(self, *args: Any) -> None:
+ super().test_tool_message_histories_string_content(*args)
+
+ """ # noqa: E501
+ if not self.has_tool_calling:
+ pytest.skip("Test requires tool calling.")
+
+ model_with_tools = model.bind_tools([my_adder_tool])
+ function_name = "my_adder_tool"
+ function_args = {"a": "1", "b": "2"}
+
+ messages_string_content = [
+ HumanMessage("What is 1 + 2"),
+ # String content (e.g. OpenAI)
+ create_tool_call(function_name, function_args, id="abc123"),
+ ToolMessage(
+ json.dumps({"result": 3}), tool_call_id="abc123", status="success"
+ ),
+ ]
+ result_string_content = model_with_tools.invoke(messages_string_content) # TODO
+ assert isinstance(result_string_content, AIMessage)
+
+ def test_tool_message_histories_list_content(
+ self,
+ model: BaseChatModel,
+ my_adder_tool: BaseTool,
+ ) -> None:
+ """Test that message histories are compatible with list tool contents
+ (e.g. Anthropic format).
+
+ These message histories will include AIMessage objects with "tool use" and
+ content blocks, e.g.,
+
+ .. code-block:: python
+
+ [
+ {"type": "text", "text": "Hmm let me think about that"},
+ {
+ "type": "tool_use",
+ "input": {"fav_color": "green"},
+ "id": "foo",
+ "name": "color_picker",
+ },
+ ]
+
+ This test should be skipped if the model does not support tool calling
+ (see Configuration below).
+
+ .. dropdown:: Configuration
+
+ To disable tool calling tests, set ``has_tool_calling`` to False in your
+ test class:
+
+ .. code-block:: python
+
+ class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+ @property
+ def has_tool_calling(self) -> bool:
+ return False
+
+ .. dropdown:: Troubleshooting
+
+ If this test fails, check that:
+
+ 1. The model can correctly handle message histories that include ``AIMessage`` objects with list content.
+ 2. The ``tool_calls`` attribute on ``AIMessage`` objects is correctly handled and passed to the model in an appropriate format.
+ 3. The model can correctly handle ``ToolMessage`` objects with string content and arbitrary string values for ``tool_call_id``.
+
+ You can ``xfail`` the test if tool calling is implemented but this format
+ is not supported.
+
+ .. code-block:: python
+
+ @pytest.mark.xfail(reason=("Not implemented."))
+ def test_tool_message_histories_list_content(self, *args: Any) -> None:
+ super().test_tool_message_histories_list_content(*args)
+
+ """ # noqa: E501
+ pytest.fail("Test not implemented yet.")
+
+ # TODO
+ # if not self.has_tool_calling:
+ # pytest.skip("Test requires tool calling.")
+
+ # model_with_tools = model.bind_tools([my_adder_tool])
+ # function_name = "my_adder_tool"
+ # function_args = {"a": 1, "b": 2}
+
+ # messages_list_content = [
+ # HumanMessage("What is 1 + 2"),
+ # # List content (e.g., Anthropic)
+ # AIMessage(
+ # [
+ # {"type": "text", "text": "some text"},
+ # {
+ # "type": "tool_use",
+ # "id": "abc123",
+ # "name": function_name,
+ # "input": function_args,
+ # },
+ # ],
+ # tool_calls=[
+ # {
+ # "name": function_name,
+ # "args": function_args,
+ # "id": "abc123",
+ # "type": "tool_call",
+ # },
+ # ],
+ # ),
+ # ToolMessage(
+ # json.dumps({"result": 3}),
+ # name=function_name,
+ # tool_call_id="abc123",
+ # ),
+ # ]
+ # result_list_content = model_with_tools.invoke(messages_list_content)
+ # assert isinstance(result_list_content, AIMessage)
+
+ def test_tool_choice(self, model: BaseChatModel) -> None:
+ """Test that the model can force tool calling via the ``tool_choice``
+ parameter. This test is skipped if the ``has_tool_choice`` property on the
+ test class is set to False.
+
+ This test is optional and should be skipped if the model does not support
+ tool calling (see Configuration below).
+
+ .. dropdown:: Configuration
+
+ To disable tool calling tests, set ``has_tool_choice`` to False in your
+ test class:
+
+ .. code-block:: python
+
+ class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+ @property
+ def has_tool_choice(self) -> bool:
+ return False
+
+ .. dropdown:: Troubleshooting
+
+ If this test fails, check whether the ``test_tool_calling`` test is passing.
+ If it is not, refer to the troubleshooting steps in that test first.
+
+ If ``test_tool_calling`` is passing, check that the underlying model
+ supports forced tool calling. If it does, ``bind_tools`` should accept a
+ ``tool_choice`` parameter that can be used to force a tool call.
+
+ It should accept:
+
+ 1. The string ``'any'`` to force calling the bound tool, and,
+ 2. The string name of the tool to force calling that tool.
+
+ """
+ if not self.has_tool_choice or not self.has_tool_calling:
+ pytest.skip("Test requires tool choice.")
+
+ @tool
+ def get_weather(location: str) -> str:
+ """Get weather at a location."""
+ return "It's sunny."
+
+ for tool_choice in ["any", "magic_function"]:
+ model_with_tools = model.bind_tools(
+ [magic_function, get_weather], tool_choice=tool_choice
+ )
+ result = model_with_tools.invoke("Hello!")
+ assert isinstance(result, AIMessage)
+ assert result.tool_calls
+ if tool_choice == "magic_function":
+ assert result.tool_calls[0]["name"] == "magic_function"
+
+ def test_tool_calling_with_no_arguments(self, model: BaseChatModel) -> None:
+ """Test that the model generates tool calls for tools with no arguments.
+ This test is skipped if the ``has_tool_calling`` property on the test class
+ is set to False.
+
+ This test is optional and should be skipped if the model does not support
+ tool calling (see Configuration below).
+
+ .. dropdown:: Configuration
+
+ To disable tool calling tests, set ``has_tool_calling`` to False in your
+ test class:
+
+ .. code-block:: python
+
+ class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+ @property
+ def has_tool_calling(self) -> bool:
+ return False
+
+ .. dropdown:: Troubleshooting
+
+ If this test fails, check that ``bind_tools`` is implemented to correctly
+ translate LangChain tool objects into the appropriate schema for your
+ chat model. It should correctly handle the case where a tool has no
+ arguments.
+
+ This test may fail if the chat model does not support a ``tool_choice``
+ parameter. This parameter can be used to force a tool call. It may also
+ fail if a provider does not support this form of tool. In these cases,
+ you can ``xfail`` the test:
+
+ .. code-block:: python
+
+ @pytest.mark.xfail(reason=("Does not support tool_choice."))
+ def test_tool_calling_with_no_arguments(self, model: BaseChatModelV1) -> None:
+ super().test_tool_calling_with_no_arguments(model)
+
+ Otherwise, in the case that only one tool is bound, ensure that
+ ``tool_choice`` supports the string ``'any'`` to force calling that tool.
+
+ """ # noqa: E501
+ if not self.has_tool_calling:
+ pytest.skip("Test requires tool calling.")
+
+ tool_choice_value = None if not self.has_tool_choice else "any"
+ model_with_tools = model.bind_tools(
+ [magic_function_no_args], tool_choice=tool_choice_value
+ )
+ query = "What is the value of magic_function_no_args()? Use the tool."
+ result = model_with_tools.invoke(query)
+ _validate_tool_call_message_no_args(result)
+
+ full: Optional[AIMessageChunk] = None
+ for chunk in model_with_tools.stream(query):
+ full = chunk if full is None else full + chunk
+ assert isinstance(full, AIMessage)
+ _validate_tool_call_message_no_args(full)
+
+ def test_tool_message_error_status(
+ self, model: BaseChatModel, my_adder_tool: BaseTool
+ ) -> None:
+ """Test that ``ToolMessage`` with ``status="error"`` can be handled.
+
+ These messages may take the form:
+
+ .. code-block:: python
+
+ ToolMessage(
+ content="Error: Missing required argument 'b'.",
+ status="error",
+ )
+
+ If possible, the ``status`` field should be parsed and passed appropriately
+ to the model.
+
+ This test is optional and should be skipped if the model does not support
+ tool calling (see Configuration below).
+
+ .. dropdown:: Configuration
+
+ To disable tool calling tests, set ``has_tool_calling`` to False in your
+ test class:
+
+ .. code-block:: python
+
+ class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+ @property
+ def has_tool_calling(self) -> bool:
+ return False
+
+ .. dropdown:: Troubleshooting
+
+ If this test fails, check that the ``status`` field on ``ToolMessage``
+ objects is either ignored or passed to the model appropriately.
+
+ """
+ if not self.has_tool_calling:
+ pytest.skip("Test requires tool calling.")
+
+ model_with_tools = model.bind_tools([my_adder_tool])
+ messages = [
+ HumanMessage("What is 1 + 2?"),
+ create_tool_call(
+ "my_adder_tool", {"a": 1}, id="abc123"
+ ), # Missing required argument 'b'
+ ToolMessage(
+ "Error: Missing required argument 'b'.",
+ tool_call_id="abc123",
+ status="error",
+ ),
+ ]
+ result = model_with_tools.invoke(messages)
+ assert isinstance(result, AIMessage)
+
+ def test_structured_few_shot_examples(
+ self, model: BaseChatModel, my_adder_tool: BaseTool
+ ) -> None:
+ """Test that the model can process few-shot examples with tool calls.
+
+ These are represented as a sequence of messages of the following form:
+
+ - ``HumanMessage`` with ``TextContentBlock`` content;
+ - ``AIMessage`` with the ``tool_calls`` attribute populated;
+ - ``ToolMessage`` with string content;
+ - ``ToolMessage`` with content block content;
+ - ``AIMessage`` with ``TextContentBlock`` content (an answer);
+ - ``HumanMessage`` with ``TextContentBlock`` content (a follow-up question).
+
+ This test should be skipped if the model does not support tool calling
+ (see Configuration below).
+
+ .. dropdown:: Configuration
+
+ To disable tool calling tests, set ``has_tool_calling`` to False in your
+ test class:
+
+ .. code-block:: python
+
+ class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+ @property
+ def has_tool_calling(self) -> bool:
+ return False
+
+ .. dropdown:: Troubleshooting
+
+ This test uses `a utility function `__
+ in ``langchain_core`` to generate a sequence of messages representing
+ "few-shot" examples.
+
+ If this test fails, check that the model can correctly handle this
+ sequence of messages.
+
+ You can ``xfail`` the test if tool calling is implemented but this format
+ is not supported.
+
+ .. code-block:: python
+
+ @pytest.mark.xfail(reason=("Not implemented."))
+ def test_structured_few_shot_examples(self, *args: Any) -> None:
+ super().test_structured_few_shot_examples(*args)
+
+ """
+ if not self.has_tool_calling:
+ pytest.skip("Test requires tool calling.")
+
+ model_with_tools = model.bind_tools([my_adder_tool], tool_choice="any")
+ function_result = json.dumps({"result": 3})
+
+ tool_schema = my_adder_tool.args_schema
+ assert isinstance(tool_schema, type)
+ assert issubclass(tool_schema, BaseModel)
+ # TODO verify this is correct
+ few_shot_messages = tool_example_to_messages(
+ "What is 1 + 2",
+ [tool_schema(a=1, b=2)],
+ tool_outputs=[function_result],
+ ai_response=function_result,
+ )
+
+ messages = [*few_shot_messages, HumanMessage("What is 3 + 4")]
+ result = model_with_tools.invoke(messages)
+ assert isinstance(result, AIMessage)
+
+ @pytest.mark.parametrize("schema_type", ["pydantic", "typeddict", "json_schema"])
+ def test_structured_output(self, model: BaseChatModel, schema_type: str) -> None:
+ """Test to verify structured output is generated both on ``invoke()`` and ``stream()``.
+
+ This test is optional and should be skipped if the model does not support
+ structured output (see Configuration below).
+
+ .. dropdown:: Configuration
+
+ To disable structured output tests, set ``has_structured_output`` to False
+ in your test class:
+
+ .. code-block:: python
+
+ class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+ @property
+ def has_structured_output(self) -> bool:
+ return False
+
+ By default, ``has_structured_output`` is True if a model overrides the
+ ``with_structured_output`` or ``bind_tools`` methods.
+
+ .. dropdown:: Troubleshooting
+
+ If this test fails, ensure that the model's ``bind_tools`` method
+ properly handles both JSON Schema and Pydantic V2 models.
+
+ ``langchain_core`` implements `a utility function `__
+ that will accommodate most formats.
+
+ See `example implementation `__
+ of ``with_structured_output``.
+
+ """ # noqa: E501
+ if not self.has_structured_output:
+ pytest.skip("Test requires structured output.")
+
+ schema, validation_function = _get_joke_class(schema_type)
+ chat = model.with_structured_output(schema, **self.structured_output_kwargs)
+ mock_callback = MagicMock()
+ mock_callback.on_chat_model_start = MagicMock()
+
+ invoke_callback = _TestCallbackHandler()
+
+ result = chat.invoke(
+ "Tell me a joke about cats.", config={"callbacks": [invoke_callback]}
+ )
+ validation_function(result)
+
+ assert len(invoke_callback.options) == 1, (
+ "Expected on_chat_model_start to be called once"
+ )
+ assert isinstance(invoke_callback.options[0], dict)
+ assert isinstance(
+ invoke_callback.options[0]["ls_structured_output_format"]["schema"], dict
+ )
+ assert invoke_callback.options[0]["ls_structured_output_format"][
+ "schema"
+ ] == convert_to_json_schema(schema)
+
+ stream_callback = _TestCallbackHandler()
+
+ for chunk in chat.stream(
+ "Tell me a joke about cats.", config={"callbacks": [stream_callback]}
+ ):
+ validation_function(chunk)
+ assert chunk
+
+ assert len(stream_callback.options) == 1, (
+ "Expected on_chat_model_start to be called once"
+ )
+ assert isinstance(stream_callback.options[0], dict)
+ assert isinstance(
+ stream_callback.options[0]["ls_structured_output_format"]["schema"], dict
+ )
+ assert stream_callback.options[0]["ls_structured_output_format"][
+ "schema"
+ ] == convert_to_json_schema(schema)
+
+ @pytest.mark.parametrize("schema_type", ["pydantic", "typeddict", "json_schema"])
+ async def test_structured_output_async(
+ self, model: BaseChatModel, schema_type: str
+ ) -> None:
+ """Test to verify structured output is generated both on ``invoke()`` and ``stream()``.
+
+ This test is optional and should be skipped if the model does not support
+ structured output (see Configuration below).
+
+ .. dropdown:: Configuration
+
+ To disable structured output tests, set ``has_structured_output`` to False
+ in your test class:
+
+ .. code-block:: python
+
+ class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+ @property
+ def has_structured_output(self) -> bool:
+ return False
+
+ By default, ``has_structured_output`` is True if a model overrides the
+ ``with_structured_output`` or ``bind_tools`` methods.
+
+ .. dropdown:: Troubleshooting
+
+ If this test fails, ensure that the model's ``bind_tools`` method
+ properly handles both JSON Schema and Pydantic V2 models.
+
+ ``langchain_core`` implements `a utility function `__
+ that will accommodate most formats.
+
+ See `example implementation `__
+ of ``with_structured_output``.
+
+ """ # noqa: E501
+ if not self.has_structured_output:
+ pytest.skip("Test requires structured output.")
+
+ schema, validation_function = _get_joke_class(schema_type)
+
+ chat = model.with_structured_output(schema, **self.structured_output_kwargs)
+ ainvoke_callback = _TestCallbackHandler()
+
+ result = await chat.ainvoke(
+ "Tell me a joke about cats.", config={"callbacks": [ainvoke_callback]}
+ )
+ validation_function(result)
+
+ assert len(ainvoke_callback.options) == 1, (
+ "Expected on_chat_model_start to be called once"
+ )
+ assert isinstance(ainvoke_callback.options[0], dict)
+ assert isinstance(
+ ainvoke_callback.options[0]["ls_structured_output_format"]["schema"], dict
+ )
+ assert ainvoke_callback.options[0]["ls_structured_output_format"][
+ "schema"
+ ] == convert_to_json_schema(schema)
+
+ astream_callback = _TestCallbackHandler()
+
+ async for chunk in chat.astream(
+ "Tell me a joke about cats.", config={"callbacks": [astream_callback]}
+ ):
+ validation_function(chunk)
+ assert chunk
+
+ assert len(astream_callback.options) == 1, (
+ "Expected on_chat_model_start to be called once"
+ )
+
+ assert isinstance(astream_callback.options[0], dict)
+ assert isinstance(
+ astream_callback.options[0]["ls_structured_output_format"]["schema"], dict
+ )
+ assert astream_callback.options[0]["ls_structured_output_format"][
+ "schema"
+ ] == convert_to_json_schema(schema)
+
+ @pytest.mark.skipif(PYDANTIC_MAJOR_VERSION != 2, reason="Test requires pydantic 2.")
+ def test_structured_output_pydantic_2_v1(self, model: BaseChatModel) -> None:
+ """Test to verify we can generate structured output using ``pydantic.v1.BaseModel``.
+
+ ``pydantic.v1.BaseModel`` is available in the Pydantic 2 package.
+
+ This test is optional and should be skipped if the model does not support
+ structured output (see Configuration below).
+
+ .. dropdown:: Configuration
+
+ To disable structured output tests, set ``has_structured_output`` to False
+ in your test class:
+
+ .. code-block:: python
+
+ class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+ @property
+ def has_structured_output(self) -> bool:
+ return False
+
+ By default, ``has_structured_output`` is True if a model overrides the
+ ``with_structured_output`` or ``bind_tools`` methods.
+
+ .. dropdown:: Troubleshooting
+
+ If this test fails, ensure that the model's ``bind_tools`` method
+ properly handles both JSON Schema and Pydantic V1 models.
+
+ ``langchain_core`` implements `a utility function `__
+ that will accommodate most formats.
+
+ See `example implementation `__
+ of ``with_structured_output``.
+
+ """ # noqa: E501
+ if not self.has_structured_output:
+ pytest.skip("Test requires structured output.")
+
+ class Joke(BaseModelV1): # Uses langchain_core.pydantic_v1.BaseModel
+ """Joke to tell user."""
+
+ setup: str = FieldV1(description="question to set up a joke")
+ punchline: str = FieldV1(description="answer to resolve the joke")
+
+ # Pydantic class
+ chat = model.with_structured_output(Joke, **self.structured_output_kwargs)
+ result = chat.invoke("Tell me a joke about cats.")
+ assert isinstance(result, Joke)
+
+ for chunk in chat.stream("Tell me a joke about cats."):
+ assert isinstance(chunk, Joke)
+
+ # Schema
+ chat = model.with_structured_output(
+ Joke.schema(), **self.structured_output_kwargs
+ )
+ result = chat.invoke("Tell me a joke about cats.")
+ assert isinstance(result, dict)
+ assert set(result.keys()) == {"setup", "punchline"}
+
+ for chunk in chat.stream("Tell me a joke about cats."):
+ assert isinstance(chunk, dict)
+ assert isinstance(chunk, dict) # for mypy
+ assert set(chunk.keys()) == {"setup", "punchline"}
+
+ def test_structured_output_optional_param(self, model: BaseChatModel) -> None:
+ """Test to verify we can generate structured output that includes optional
+ parameters.
+
+ This test is optional and should be skipped if the model does not support
+ structured output (see Configuration below).
+
+ .. dropdown:: Configuration
+
+ To disable structured output tests, set ``has_structured_output`` to False
+ in your test class:
+
+ .. code-block:: python
+
+ class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+ @property
+ def has_structured_output(self) -> bool:
+ return False
+
+ By default, ``has_structured_output`` is True if a model overrides the
+ ``with_structured_output`` or ``bind_tools`` methods.
+
+ .. dropdown:: Troubleshooting
+
+ If this test fails, ensure that the model's ``bind_tools`` method
+ properly handles Pydantic V2 models with optional parameters.
+
+ ``langchain_core`` implements `a utility function `__
+ that will accommodate most formats.
+
+ See `example implementation `__
+ of ``with_structured_output``.
+
+ """
+ if not self.has_structured_output:
+ pytest.skip("Test requires structured output.")
+
+ # Pydantic
+ class Joke(BaseModel):
+ """Joke to tell user."""
+
+ setup: str = Field(description="question to set up a joke")
+ punchline: Optional[str] = Field(
+ default=None, description="answer to resolve the joke"
+ )
+
+ chat = model.with_structured_output(Joke, **self.structured_output_kwargs)
+ setup_result = chat.invoke(
+ "Give me the setup to a joke about cats, no punchline."
+ )
+ assert isinstance(setup_result, Joke)
+
+ joke_result = chat.invoke("Give me a joke about cats, include the punchline.")
+ assert isinstance(joke_result, Joke)
+
+ # Schema
+ chat = model.with_structured_output(
+ Joke.model_json_schema(), **self.structured_output_kwargs
+ )
+ result = chat.invoke("Tell me a joke about cats.")
+ assert isinstance(result, dict)
+
+ # TypedDict
+ class JokeDict(TypedDict):
+ """Joke to tell user."""
+
+ setup: Annotated[str, ..., "question to set up a joke"]
+ punchline: Annotated[Optional[str], None, "answer to resolve the joke"]
+
+ chat = model.with_structured_output(JokeDict, **self.structured_output_kwargs)
+ result = chat.invoke("Tell me a joke about cats.")
+ assert isinstance(result, dict)
+
+ def test_json_mode(self, model: BaseChatModel) -> None:
+ """Test structured output via `JSON mode. `_.
+
+ This test is optional and should be skipped if the model does not support
+ the JSON mode feature (see Configuration below).
+
+ .. dropdown:: Configuration
+
+ To disable this test, set ``supports_json_mode`` to False in your
+ test class:
+
+ .. code-block:: python
+
+ class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+ @property
+ def supports_json_mode(self) -> bool:
+ return False
+
+ .. dropdown:: Troubleshooting
+
+ See `example implementation `__
+ of ``with_structured_output``.
+
+ """
+ if not self.supports_json_mode:
+ pytest.skip("Test requires json mode support.")
+
+ from pydantic import BaseModel as BaseModelProper
+ from pydantic import Field as FieldProper
+
+ class Joke(BaseModelProper):
+ """Joke to tell user."""
+
+ setup: str = FieldProper(description="question to set up a joke")
+ punchline: str = FieldProper(description="answer to resolve the joke")
+
+ # Pydantic class
+ # Type ignoring since the interface only officially supports pydantic 1
+ # or pydantic.v1.BaseModel but not pydantic.BaseModel from pydantic 2.
+ # We'll need to do a pass updating the type signatures.
+ chat = model.with_structured_output(Joke, method="json_mode")
+ msg = (
+ "Tell me a joke about cats. Return the result as a JSON with 'setup' and "
+ "'punchline' keys. Return nothing other than JSON."
+ )
+ result = chat.invoke(msg)
+ assert isinstance(result, Joke)
+
+ for chunk in chat.stream(msg):
+ assert isinstance(chunk, Joke)
+
+ # Schema
+ chat = model.with_structured_output(
+ Joke.model_json_schema(), method="json_mode"
+ )
+ result = chat.invoke(msg)
+ assert isinstance(result, dict)
+ assert set(result.keys()) == {"setup", "punchline"}
+
+ for chunk in chat.stream(msg):
+ assert isinstance(chunk, dict)
+ assert isinstance(chunk, dict) # for mypy
+ assert set(chunk.keys()) == {"setup", "punchline"}
+
+ def test_pdf_inputs(self, model: BaseChatModel) -> None:
+ """Test that the model can process PDF inputs.
+
+ This test should be skipped (see Configuration below) if the model does not
+ support PDF inputs. These will take the form:
+
+ .. code-block:: python
+
+ {
+ "type": "image",
+ "source_type": "base64",
+ "data": "",
+ "mime_type": "application/pdf",
+ }
+
+ See https://python.langchain.com/docs/concepts/multimodality/
+
+ .. dropdown:: Configuration
+
+ To disable this test, set ``supports_pdf_inputs`` to False in your
+ test class:
+
+ .. code-block:: python
+
+ class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+
+ @property
+ def supports_pdf_inputs(self) -> bool:
+ return False
+
+ .. dropdown:: Troubleshooting
+
+ If this test fails, check that the model can correctly handle messages
+ with pdf content blocks, including base64-encoded files. Otherwise, set
+ the ``supports_pdf_inputs`` property to False.
+
+ """
+ pytest.fail("Test not implemented yet.")
+
+ # TODO
+ # if not self.supports_pdf_inputs:
+ # pytest.skip("Model does not support PDF inputs.")
+ # url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"
+ # pdf_data = base64.b64encode(httpx.get(url).content).decode("utf-8")
+
+ # message = HumanMessage(
+ # [
+ # {
+ # "type": "text",
+ # "text": "Summarize this document:",
+ # },
+ # {
+ # "type": "file",
+ # "source_type": "base64",
+ # "mime_type": "application/pdf",
+ # "data": pdf_data,
+ # },
+ # ]
+ # )
+ # _ = model.invoke([message])
+
+ # # Test OpenAI Chat Completions format
+ # message = HumanMessage(
+ # [
+ # {
+ # "type": "text",
+ # "text": "Summarize this document:",
+ # },
+ # {
+ # "type": "file",
+ # "file": {
+ # "filename": "test file.pdf",
+ # "file_data": f"data:application/pdf;base64,{pdf_data}",
+ # },
+ # },
+ # ]
+ # )
+ # _ = model.invoke([message])
+
+ def test_audio_inputs(self, model: BaseChatModel) -> None:
+ """Test that the model can process audio inputs.
+
+ This test should be skipped (see Configuration below) if the model does not
+ support audio inputs. These will take the form:
+
+ .. code-block:: python
+
+ # AudioContentBlock
+ {
+ "type": "audio",
+ "base64": "",
+ "mime_type": "audio/wav", # or appropriate mime-type
+ }
+
+ See https://python.langchain.com/docs/concepts/multimodality/
+
+ .. dropdown:: Configuration
+
+ To disable this test, set ``supports_audio_content_blocks`` to False in your
+ test class:
+
+ .. code-block:: python
+
+ class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+
+ @property
+ def supports_audio_content_blocks(self) -> bool:
+ return False
+
+ .. dropdown:: Troubleshooting
+
+ If this test fails, check that the model can correctly handle messages
+ with audio content blocks. Otherwise, set the ``supports_audio_content_blocks``
+ property to False.
+
+ """ # noqa: E501
+ if not self.supports_audio_content_blocks:
+ pytest.skip("Model does not support AudioContentBlock inputs.")
+
+ url = "https://upload.wikimedia.org/wikipedia/commons/3/3d/Alcal%C3%A1_de_Henares_%28RPS_13-04-2024%29_canto_de_ruise%C3%B1or_%28Luscinia_megarhynchos%29_en_el_Soto_del_Henares.wav"
+ audio_data = base64.b64encode(httpx.get(url).content).decode("utf-8")
+
+ message = HumanMessage(
+ [
+ create_text_block("Describe this audio:"),
+ create_audio_block(
+ base64=audio_data,
+ mime_type="audio/wav",
+ ),
+ ]
+ )
+ _ = model.invoke([message])
+
+ # TODO?
+ # Test OpenAI Chat Completions format
+ # message = HumanMessage(
+ # [
+ # {
+ # "type": "text",
+ # "text": "Describe this audio:",
+ # },
+ # {
+ # "type": "input_audio",
+ # "input_audio": {"data": audio_data, "format": "wav"},
+ # },
+ # ]
+ # )
+ # _ = model.invoke([message])
+
+ def test_audio_content_blocks_processing(self, model: BaseChatModel) -> None:
+ """Test audio content block processing with transcription.
+
+ TODO: expand docstring
+
+ """
+ if not self.supports_audio_content_blocks:
+ pytest.skip("Model does not support audio inputs.")
+
+ audio_block = create_audio_block(
+ base64=_get_test_audio_base64(),
+ mime_type="audio/wav",
+ )
+ text_block = create_text_block("Transcribe this audio file.")
+
+ result = model.invoke([HumanMessage([text_block, audio_block])])
+
+ assert isinstance(result, AIMessage)
+ if result.text:
+ assert len(result.text) > 10 # Substantial response
+
+ def test_image_inputs(self, model: BaseChatModel) -> None:
+ """Test that the model can process image inputs.
+
+ This test should be skipped (see Configuration below) if the model does not
+ support image inputs. These will take the form:
+
+ .. code-block:: python
+
+ # ImageContentBlock
+ {
+ "type": "image",
+ "base64": "",
+ "mime_type": "image/png", # or appropriate mime-type
+ }
+
+ TODO: verify this
+ For backward-compatibility, we must also support OpenAI-style
+ image content blocks:
+
+ .. code-block:: python
+
+ [
+ {"type": "text", "text": "describe the weather in this image"},
+ {
+ "type": "image_url",
+ "image_url": {"url": f"data:image/jpeg;base64,{image_data}"},
+ },
+ ]
+
+ See https://python.langchain.com/docs/concepts/multimodality/
+
+ .. dropdown:: Configuration
+
+ To disable this test, set ``supports_image_content_blocks`` to False in your
+ test class:
+
+ .. code-block:: python
+
+ class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+ @property
+ def supports_image_content_blocks(self) -> bool:
+ return False
+
+ # Can also explicitly disable testing image URLs:
+ @property
+ def supports_image_urls(self) -> bool:
+ return False
+
+ .. dropdown:: Troubleshooting
+
+ If this test fails, check that the model can correctly handle messages
+ with image content blocks, including base64-encoded images. Otherwise, set
+ the ``supports_image_content_blocks`` property to False.
+
+ """
+ if not self.supports_image_content_blocks:
+ pytest.skip("Model does not support image message.")
+
+ image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
+ image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8")
+
+ # TODO?
+ # OpenAI format, base64 data
+ # message = HumanMessage(
+ # content=[
+ # {"type": "text", "text": "describe the weather in this image"},
+ # {
+ # "type": "image_url",
+ # "image_url": {"url": f"data:image/jpeg;base64,{image_data}"},
+ # },
+ # ],
+ # )
+ # _ = model.invoke([message])
+
+ # Standard format, base64 data
+ message = HumanMessage(
+ [
+ create_text_block("describe the weather in this image"),
+ create_image_block(
+ base64=image_data,
+ mime_type="image/jpeg",
+ ),
+ ],
+ )
+ _ = model.invoke([message])
+
+ # TODO?
+ # Standard format, URL
+ # if self.supports_image_urls:
+ # message = HumanMessage(
+ # content=[
+ # {"type": "text", "text": "describe the weather in this image"},
+ # {
+ # "type": "image",
+ # "source_type": "url",
+ # "url": image_url,
+ # },
+ # ],
+ # )
+ # _ = model.invoke([message])
+
+ def test_image_tool_message(self, model: BaseChatModel) -> None:
+ """Test that the model can process ToolMessages with image inputs.
+
+ TODO: is this needed?
+
+ This test should be skipped if the model does not support messages of the
+ form:
+
+ .. code-block:: python
+
+ ToolMessage(
+ content=[
+ {
+ "type": "image_url",
+ "image_url": {"url": f"data:image/jpeg;base64,{image_data}"},
+ },
+ ],
+ tool_call_id="1",
+ )
+
+ containing image content blocks in OpenAI Chat Completions format, in addition
+ to messages of the form:
+
+ .. code-block:: python
+
+ ToolMessage(
+ content=[
+ {
+ "type": "image",
+ "source_type": "base64",
+ "data": image_data,
+ "mime_type": "image/jpeg",
+ },
+ ],
+ tool_call_id="1",
+ )
+
+ containing image content blocks in standard format.
+
+ This test can be skipped by setting the ``supports_image_tool_message`` property
+ to False (see Configuration below).
+
+ .. dropdown:: Configuration
+
+ To disable this test, set ``supports_image_tool_message`` to False in your
+ test class:
+
+ .. code-block:: python
+
+ class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+ @property
+ def supports_image_tool_message(self) -> bool:
+ return False
+
+ .. dropdown:: Troubleshooting
+
+ If this test fails, check that the model can correctly handle messages
+ with image content blocks in ToolMessages, including base64-encoded
+ images. Otherwise, set the ``supports_image_tool_message`` property to
+ False.
+
+ """
+ pytest.fail("Test not implemented yet.")
+
+ # TODO
+ # if not self.supports_image_tool_message:
+ # pytest.skip("Model does not support image tool message.")
+ # image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
+ # image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8")
+
+ # # Support both OpenAI and standard formats
+ # oai_format_message = ToolMessage(
+ # content=[
+ # {
+ # "type": "image_url",
+ # "image_url": {"url": f"data:image/jpeg;base64,{image_data}"},
+ # },
+ # ],
+ # tool_call_id="1",
+ # name="random_image",
+ # )
+
+ # standard_format_message = ToolMessage(
+ # content=[
+ # {
+ # "type": "image",
+ # "source_type": "base64",
+ # "data": image_data,
+ # "mime_type": "image/jpeg",
+ # },
+ # ],
+ # tool_call_id="1",
+ # name="random_image",
+ # )
+
+ # for tool_message in [oai_format_message, standard_format_message]:
+ # messages = [
+ # HumanMessage(
+ # "get a random image using the tool and describe the weather"
+ # ),
+ # AIMessage(
+ # [],
+ # tool_calls=[
+ # {
+ # "type": "tool_call",
+ # "id": "1",
+ # "name": "random_image",
+ # "args": {},
+ # }
+ # ],
+ # ),
+ # tool_message,
+ # ]
+
+ # def random_image() -> str:
+ # """Return a random image."""
+ # return ""
+
+ # _ = model.bind_tools([random_image]).invoke(messages)
- # Multimodal testing
def test_image_content_blocks_with_analysis(self, model: BaseChatModel) -> None:
- """Test image analysis using ``ImageContentBlock``s."""
+ """Test image analysis using ``ImageContentBlock``s.
+
+ TODO: expand docstring
+
+ """
if not self.supports_image_content_blocks:
pytest.skip("Model does not support image inputs.")
@@ -180,7 +2412,11 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests):
assert len(result.text) > 10 # Substantial response
def test_video_content_blocks(self, model: BaseChatModel) -> None:
- """Test video content block processing."""
+ """Test video content block processing.
+
+ TODO: expand docstring
+
+ """
if not self.supports_video_content_blocks:
pytest.skip("Model does not support video inputs.")
@@ -196,30 +2432,438 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests):
if result.text:
assert len(result.text) > 10 # Substantial response
- def test_audio_content_blocks_processing(self, model: BaseChatModel) -> None:
- """Test audio content block processing with transcription."""
- if not self.supports_audio_content_blocks:
- pytest.skip("Model does not support audio inputs.")
+ def test_anthropic_inputs(self, model: BaseChatModel) -> None:
+ """Test that model can process Anthropic-style message histories.
- audio_block = create_audio_block(
- base64=_get_test_audio_base64(),
- mime_type="audio/wav",
- )
- text_block = create_text_block("Transcribe this audio file.")
+ TODO?
- result = model.invoke([HumanMessage([text_block, audio_block])])
+ These message histories will include ``AIMessage`` objects with ``tool_use``
+ content blocks, e.g.,
+ .. code-block:: python
+
+ AIMessage(
+ [
+ {"type": "text", "text": "Hmm let me think about that"},
+ {
+ "type": "tool_use",
+ "input": {"fav_color": "green"},
+ "id": "foo",
+ "name": "color_picker",
+ },
+ ]
+ )
+
+ as well as ``HumanMessage`` objects containing ``tool_result`` content blocks:
+
+ .. code-block:: python
+
+ HumanMessage(
+ [
+ {
+ "type": "tool_result",
+ "tool_use_id": "foo",
+ "content": [
+ {
+ "type": "text",
+ "text": "green is a great pick! that's my sister's favorite color", # noqa: E501
+ }
+ ],
+ "is_error": False,
+ },
+ {"type": "text", "text": "what's my sister's favorite color"},
+ ]
+ )
+
+ This test should be skipped if the model does not support messages of this
+ form (or doesn't support tool calling generally). See Configuration below.
+
+ .. dropdown:: Configuration
+
+ To disable this test, set ``supports_anthropic_inputs`` to False in your
+ test class:
+
+ .. code-block:: python
+
+ class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+ @property
+ def supports_anthropic_inputs(self) -> bool:
+ return False
+
+ .. dropdown:: Troubleshooting
+
+ If this test fails, check that:
+
+ 1. The model can correctly handle message histories that include message objects with list content.
+ 2. The ``tool_calls`` attribute on AIMessage objects is correctly handled and passed to the model in an appropriate format.
+ 3. HumanMessages with "tool_result" content blocks are correctly handled.
+
+ Otherwise, if Anthropic tool call and result formats are not supported,
+ set the ``supports_anthropic_inputs`` property to False.
+
+ """ # noqa: E501
+ pytest.fail("Test not implemented yet.")
+
+ # TODO
+ # if not self.supports_anthropic_inputs:
+ # pytest.skip("Model does not explicitly support Anthropic inputs.")
+
+ # # Anthropic-format tool
+ # color_picker = {
+ # "name": "color_picker",
+ # "input_schema": {
+ # "type": "object",
+ # "properties": {
+ # "fav_color": {"type": "string"},
+ # },
+ # "required": ["fav_color"],
+ # },
+ # "description": "Input your fav color and get a random fact about it.",
+ # "cache_control": {"type": "ephemeral"},
+ # }
+
+ # human_content: list[dict] = [
+ # {
+ # "type": "text",
+ # "text": "what's your favorite color in this image",
+ # "cache_control": {"type": "ephemeral"},
+ # },
+ # ]
+ # if self.supports_image_inputs:
+ # image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
+ # image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8") # noqa: E501
+ # human_content.append(
+ # {
+ # "type": "image",
+ # "source": {
+ # "type": "base64",
+ # "media_type": "image/jpeg",
+ # "data": image_data,
+ # },
+ # }
+ # )
+ # messages = [
+ # SystemMessage("you're a good assistant"),
+ # HumanMessage(human_content), # type: ignore[arg-type]
+ # AIMessage(
+ # [
+ # {"type": "text", "text": "Hmm let me think about that"},
+ # {
+ # "type": "tool_use",
+ # "input": {"fav_color": "green"},
+ # "id": "foo",
+ # "name": "color_picker",
+ # },
+ # ],
+ # tool_calls=[
+ # {
+ # "name": "color_picker",
+ # "args": {"fav_color": "green"},
+ # "id": "foo",
+ # "type": "tool_call",
+ # }
+ # ],
+ # ),
+ # ToolMessage("That's a great pick!", tool_call_id="foo"),
+ # ]
+ # response = model.bind_tools([color_picker]).invoke(messages)
+ # assert isinstance(response, AIMessage)
+
+ # # Test thinking blocks
+ # messages = [
+ # HumanMessage(
+ # [
+ # {
+ # "type": "text",
+ # "text": "Hello",
+ # },
+ # ]
+ # ),
+ # AIMessage(
+ # [
+ # {
+ # "type": "thinking",
+ # "thinking": "I'm thinking...",
+ # "signature": "abc123",
+ # },
+ # {
+ # "type": "text",
+ # "text": "Hello, how are you?",
+ # },
+ # ]
+ # ),
+ # HumanMessage(
+ # [
+ # {
+ # "type": "text",
+ # "text": "Well, thanks.",
+ # },
+ # ]
+ # ),
+ # ]
+ # response = model.invoke(messages)
+ # assert isinstance(response, AIMessage)
+
+ def test_message_with_name(self, model: BaseChatModel) -> None:
+ """Test that ``HumanMessage`` with values for the ``name`` field can be handled.
+
+ This test expects that the model with a non-empty ``TextContentBlock``.
+
+ These messages may take the form:
+
+ .. code-block:: python
+
+ HumanMessage("hello", name="example_user")
+
+ If possible, the ``name`` field should be parsed and passed appropriately
+ to the model. Otherwise, it should be ignored.
+
+ .. dropdown:: Troubleshooting
+
+ If this test fails, check that the ``name`` field on ``HumanMessage``
+ objects is either ignored or passed to the model appropriately.
+
+ """
+ result = model.invoke([HumanMessage("hello", name="example_user")])
+ assert result is not None
assert isinstance(result, AIMessage)
- if result.text:
- assert len(result.text) > 10 # Substantial response
+ assert len(result.content) > 0
+ assert isinstance(result.text, str)
+ assert len(result.text) > 0
+
+ def test_agent_loop(self, model: BaseChatModel) -> None:
+ """Test that the model supports a simple ReAct agent loop. This test is skipped
+ if the ``has_tool_calling`` property on the test class is set to False.
+
+ This test is optional and should be skipped if the model does not support
+ tool calling (see Configuration below).
+
+ .. dropdown:: Configuration
+
+ To disable tool calling tests, set ``has_tool_calling`` to False in your
+ test class:
+
+ .. code-block:: python
+
+ class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+ @property
+ def has_tool_calling(self) -> bool:
+ return False
+
+ .. dropdown:: Troubleshooting
+
+ If this test fails, check that ``bind_tools`` is implemented to correctly
+ translate LangChain tool objects into the appropriate schema for your
+ chat model.
+
+ Check also that all required information (e.g., tool calling identifiers)
+ from ``AIMessage`` objects is propagated correctly to model payloads.
+
+ This test may fail if the chat model does not consistently generate tool
+ calls in response to an appropriate query. In these cases you can ``xfail``
+ the test:
+
+ .. code-block:: python
+
+ @pytest.mark.xfail(reason=("Does not support tool_choice."))
+ def test_agent_loop(self, model: BaseChatModel) -> None:
+ super().test_agent_loop(model)
+
+ """
+ if not self.has_tool_calling:
+ pytest.skip("Test requires tool calling.")
+
+ @tool
+ def get_weather(location: str) -> str:
+ """Call to surf the web."""
+ return "It's sunny."
+
+ llm_with_tools = model.bind_tools([get_weather])
+ input_message = HumanMessage("What is the weather in San Francisco, CA?")
+ tool_call_message = llm_with_tools.invoke([input_message])
+ assert isinstance(tool_call_message, AIMessage)
+ tool_calls = tool_call_message.tool_calls
+ assert len(tool_calls) == 1
+ tool_call = tool_calls[0]
+ tool_message = get_weather.invoke(tool_call)
+ assert isinstance(tool_message, ToolMessage)
+ response = llm_with_tools.invoke(
+ [
+ input_message,
+ tool_call_message,
+ tool_message,
+ ]
+ )
+ assert isinstance(response, AIMessage)
+
+ @pytest.mark.benchmark
+ @pytest.mark.vcr
+ def test_stream_time(
+ self, model: BaseChatModel, benchmark: BenchmarkFixture, vcr: Cassette
+ ) -> None:
+ """Test that streaming does not introduce undue overhead.
+
+ See ``enable_vcr_tests`` dropdown :class:`above `
+ for more information.
+
+ .. dropdown:: Configuration
+
+ This test can be enabled or disabled using the ``enable_vcr_tests``
+ property. For example, to disable the test, set this property to ``False``:
+
+ .. code-block:: python
+
+ @property
+ def enable_vcr_tests(self) -> bool:
+ return False
+
+ .. important::
+
+ VCR will by default record authentication headers and other sensitive
+ information in cassettes. See ``enable_vcr_tests`` dropdown
+ :class:`above ` for how to configure what
+ information is recorded in cassettes.
+
+ """
+ if not self.enable_vcr_tests:
+ pytest.skip("VCR not set up.")
+
+ def _run() -> None:
+ for _ in model.stream("Write a story about a cat."):
+ pass
+
+ if not vcr.responses:
+ _run()
+ else:
+ benchmark(_run)
+
+ def invoke_with_audio_input(self, *, stream: bool = False) -> AIMessage:
+ """:private:"""
+ # To be implemented in test subclass
+ raise NotImplementedError
+
+ def invoke_with_audio_output(self, *, stream: bool = False) -> AIMessage:
+ """:private:"""
+ # To be implemented in test subclass
+ raise NotImplementedError
+
+ def invoke_with_reasoning_output(self, *, stream: bool = False) -> AIMessage:
+ """:private:"""
+ # To be implemented in test subclass
+ raise NotImplementedError
+
+ def invoke_with_cache_read_input(self, *, stream: bool = False) -> AIMessage:
+ """:private:"""
+ # To be implemented in test subclass
+ raise NotImplementedError
+
+ def invoke_with_cache_creation_input(self, *, stream: bool = False) -> AIMessage:
+ """:private:"""
+ # To be implemented in test subclass
+ raise NotImplementedError
+
+ def test_unicode_tool_call_integration(
+ self,
+ model: BaseChatModel,
+ *,
+ tool_choice: Optional[str] = None,
+ force_tool_call: bool = True,
+ ) -> None:
+ """Generic integration test for Unicode characters in tool calls.
+
+ Args:
+ model: The chat model to test
+ tool_choice: Tool choice parameter to pass to ``bind_tools()`` (provider-specific)
+ force_tool_call: Whether to force a tool call (use ``tool_choice=True`` if None)
+
+ Tests that Unicode characters in tool call arguments are preserved correctly,
+ not escaped as ``\\uXXXX`` sequences.
+ """ # noqa: E501
+ if not self.has_tool_calling:
+ pytest.skip("Test requires tool calling support.")
+
+ # Configure tool choice based on provider capabilities
+ if tool_choice is None and force_tool_call:
+ tool_choice = "any"
+
+ if tool_choice is not None:
+ llm_with_tool = model.bind_tools(
+ [unicode_customer], tool_choice=tool_choice
+ )
+ else:
+ llm_with_tool = model.bind_tools([unicode_customer])
+
+ # Test with Chinese characters
+ msgs = [
+ HumanMessage(
+ "Create a customer named '你好啊集团' (Hello Group) - a Chinese "
+ "technology company"
+ )
+ ]
+ ai_msg = llm_with_tool.invoke(msgs)
+
+ assert isinstance(ai_msg, AIMessage)
+ assert isinstance(ai_msg.tool_calls, list)
+
+ if force_tool_call:
+ assert len(ai_msg.tool_calls) >= 1, (
+ f"Expected at least 1 tool call, got {len(ai_msg.tool_calls)}"
+ )
+
+ if ai_msg.tool_calls:
+ tool_call = ai_msg.tool_calls[0]
+ assert tool_call["name"] == "unicode_customer"
+ assert "args" in tool_call
+
+ # Verify Unicode characters are properly handled
+ args = tool_call["args"]
+ assert "customer_name" in args
+ customer_name = args["customer_name"]
+
+ # The model should include the Unicode characters, not escaped sequences
+ assert (
+ "你好" in customer_name
+ or "你" in customer_name
+ or "好" in customer_name
+ ), f"Unicode characters not found in: {customer_name}"
+
+ # Test with additional Unicode examples - Japanese
+ msgs_jp = [
+ HumanMessage(
+ "Create a customer named 'こんにちは株式会社' (Hello Corporation) - a "
+ "Japanese company"
+ )
+ ]
+ ai_msg_jp = llm_with_tool.invoke(msgs_jp)
+
+ assert isinstance(ai_msg_jp, AIMessage)
+
+ if force_tool_call:
+ assert len(ai_msg_jp.tool_calls) >= 1
+
+ if ai_msg_jp.tool_calls:
+ tool_call_jp = ai_msg_jp.tool_calls[0]
+ args_jp = tool_call_jp["args"]
+ customer_name_jp = args_jp["customer_name"]
+
+ # Verify Japanese Unicode characters are preserved
+ assert (
+ "こんにちは" in customer_name_jp
+ or "株式会社" in customer_name_jp
+ or "こ" in customer_name_jp
+ or "ん" in customer_name_jp
+ ), f"Japanese Unicode characters not found in: {customer_name_jp}"
def test_complex_multimodal_reasoning(self, model: BaseChatModel) -> None:
- """Test complex reasoning with multiple content types."""
- # TODO: come back to this, seems like a unique scenario
+ """Test complex reasoning with multiple content types.
+
+ TODO: expand docstring
+
+ """
if not self.supports_multimodal_reasoning:
pytest.skip("Model does not support multimodal reasoning.")
- content_blocks: list[ContentBlock] = [
+ content_blocks: list[types.ContentBlock] = [
create_text_block(
"Compare these media files and provide reasoning analysis:"
),
@@ -242,7 +2886,6 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests):
assert isinstance(result, AIMessage)
- # Check for reasoning blocks in response
if self.supports_reasoning_content_blocks:
reasoning_blocks = [
block
@@ -252,7 +2895,11 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests):
assert len(reasoning_blocks) > 0
def test_citation_generation_with_sources(self, model: BaseChatModel) -> None:
- """Test that the model can generate ``Citations`` with source links."""
+ """Test that the model can generate ``Citations`` with source links.
+
+ TODO: expand docstring
+
+ """
if not self.supports_structured_citations:
pytest.skip("Model does not support structured citations.")
@@ -294,7 +2941,11 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests):
assert "end_index" in annotation
def test_web_search_integration(self, model: BaseChatModel) -> None:
- """Test web search content blocks integration."""
+ """Test web search content blocks integration.
+
+ TODO: expand docstring
+
+ """
if not self.supports_web_search_blocks:
pytest.skip("Model does not support web search blocks.")
@@ -320,7 +2971,11 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests):
assert len(search_call_blocks) > 0 or len(search_result_blocks) > 0
def test_code_interpreter_blocks(self, model: BaseChatModel) -> None:
- """Test code interpreter content blocks."""
+ """Test code interpreter content blocks.
+
+ TODO: expand docstring
+
+ """
if not self.supports_code_interpreter:
pytest.skip("Model does not support code interpreter blocks.")
@@ -345,7 +3000,11 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests):
assert len(code_blocks) > 0
def test_tool_calling_with_content_blocks(self, model: BaseChatModel) -> None:
- """Test tool calling with content blocks."""
+ """Test tool calling with content blocks.
+
+ TODO: expand docstring
+
+ """
if not self.has_tool_calling:
pytest.skip("Model does not support tool calls.")
@@ -366,7 +3025,11 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests):
def test_plaintext_content_blocks_from_documents(
self, model: BaseChatModel
) -> None:
- """Test PlainTextContentBlock for document plaintext content."""
+ """Test PlainTextContentBlock for document plaintext content.
+
+ TODO: expand docstring
+
+ """
if not self.supports_plaintext_content_blocks:
pytest.skip("Model does not support PlainTextContentBlock.")
@@ -385,7 +3048,11 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests):
# TODO expand
def test_content_block_streaming_integration(self, model: BaseChatModel) -> None:
- """Test streaming with content blocks."""
+ """Test streaming with content blocks.
+
+ TODO: expand docstring
+
+ """
if not self.supports_content_blocks_v1:
pytest.skip("Model does not support content blocks v1.")
@@ -415,7 +3082,11 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests):
def test_error_handling_with_invalid_content_blocks(
self, model: BaseChatModel
) -> None:
- """Test error handling with various invalid content block configurations."""
+ """Test error handling with various invalid content block configurations.
+
+ TODO: expand docstring
+
+ """
if not self.supports_content_blocks_v1:
pytest.skip("Model does not support content blocks v1.")
@@ -437,7 +3108,11 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests):
assert len(str(e)) > 0
async def test_async_content_blocks_processing(self, model: BaseChatModel) -> None:
- """Test asynchronous processing of content blocks."""
+ """Test asynchronous processing of content blocks.
+
+ TODO: expand docstring
+
+ """
if not self.supports_content_blocks_v1:
pytest.skip("Model does not support content blocks v1.")
@@ -447,7 +3122,11 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests):
assert isinstance(result, AIMessage)
def test_content_blocks_with_callbacks(self, model: BaseChatModel) -> None:
- """Test that content blocks work correctly with callback handlers."""
+ """Test that content blocks work correctly with callback handlers.
+
+ TODO: expand docstring
+
+ """
if not self.supports_content_blocks_v1:
pytest.skip("Model does not support content blocks v1.")
@@ -475,3 +3154,140 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests):
hasattr(msg, "content") and isinstance(msg.content, list)
for msg in callback_handler.messages_seen
)
+
+ def test_input_conversion_string(self, model: BaseChatModel) -> None:
+ """Test that string input is properly converted to messages.
+
+ TODO: expand docstring
+
+ """
+ result = model.invoke("Test string input")
+ assert isinstance(result, AIMessage)
+ assert result.content is not None
+
+ def test_input_conversion_empty_string(self, model: BaseChatModel) -> None:
+ """Test that empty string input is handled gracefully.
+
+ TODO: expand docstring
+
+ """
+ result = model.invoke("")
+ assert isinstance(result, AIMessage)
+
+ def test_input_conversion_message_v1_list(self, model: BaseChatModel) -> None:
+ """Test that v1 message list input is handled correctly.
+
+ TODO: expand docstring
+
+ """
+ messages = [HumanMessage("Test message")]
+ result = model.invoke(messages)
+ assert isinstance(result, AIMessage)
+ assert result.content is not None
+
+ def test_text_content_blocks_basic(self, model: BaseChatModel) -> None:
+ """Test that the model can handle the ``TextContentBlock`` format."""
+ if not self.supports_text_content_blocks:
+ pytest.skip("Model does not support TextContentBlock (rare!)")
+
+ text_block = create_text_block("Hello, world!")
+ message = HumanMessage(content=[text_block])
+
+ result = model.invoke([message])
+ assert isinstance(result, AIMessage)
+ assert result.content is not None
+
+ def test_mixed_content_blocks_basic(self, model: BaseChatModel) -> None:
+ """Test that the model can handle messages with mixed content blocks."""
+ if not (
+ self.supports_text_content_blocks and self.supports_image_content_blocks
+ ):
+ pytest.skip(
+ "Model doesn't support mixed content blocks (concurrent text and image)"
+ )
+
+ content_blocks: list[types.ContentBlock] = [
+ create_text_block("Describe this image:"),
+ create_image_block(
+ base64="iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg==",
+ mime_type="image/png",
+ ),
+ ]
+
+ message = HumanMessage(content=content_blocks)
+ result = model.invoke([message])
+
+ assert isinstance(result, AIMessage)
+ assert result.content is not None
+
+ def test_reasoning_content_blocks_basic(self, model: BaseChatModel) -> None:
+ """Test that the model can generate ``ReasoningContentBlock``."""
+ if not self.supports_reasoning_content_blocks:
+ pytest.skip("Model does not support ReasoningContentBlock.")
+
+ message = HumanMessage("Think step by step: What is 2 + 2?")
+ result = model.invoke([message])
+
+ assert isinstance(result, AIMessage)
+ if isinstance(result.content, list):
+ reasoning_blocks = [
+ block
+ for block in result.content
+ if isinstance(block, dict) and is_reasoning_block(block)
+ ]
+ assert len(reasoning_blocks) > 0
+
+ def test_non_standard_content_blocks_basic(self, model: BaseChatModel) -> None:
+ """Test that the model can handle ``NonStandardContentBlock``."""
+ if not self.supports_non_standard_blocks:
+ pytest.skip("Model does not support NonStandardContentBlock.")
+
+ non_standard_block = create_non_standard_block(
+ {
+ "custom_field": "custom_value",
+ "data": [1, 2, 3],
+ }
+ )
+
+ message = HumanMessage(content=[non_standard_block])
+
+ # Should not raise an error
+ result = model.invoke([message])
+ assert isinstance(result, AIMessage)
+
+ def test_invalid_tool_call_handling_basic(self, model: BaseChatModel) -> None:
+ """Test that the model can handle ``InvalidToolCall`` blocks gracefully."""
+ if not self.supports_invalid_tool_calls:
+ pytest.skip("Model does not support InvalidToolCall handling.")
+
+ invalid_tool_call: InvalidToolCall = {
+ "type": "invalid_tool_call",
+ "name": "nonexistent_tool",
+ "args": None,
+ "id": "invalid_123",
+ "error": "Tool not found",
+ }
+
+ # Create a message with invalid tool call in history
+ ai_message = AIMessage(content=[invalid_tool_call])
+ follow_up = HumanMessage("Please try again with a valid approach.")
+
+ result = model.invoke([ai_message, follow_up])
+ assert isinstance(result, AIMessage)
+ assert result.content is not None
+
+ def test_file_content_blocks_basic(self, model: BaseChatModel) -> None:
+ """Test that the model can handle ``FileContentBlock``."""
+ if not self.supports_file_content_blocks:
+ pytest.skip("Model does not support FileContentBlock.")
+
+ file_block = create_file_block(
+ base64="SGVsbG8sIHdvcmxkIQ==", # "Hello, world!"
+ mime_type="text/plain",
+ )
+
+ message = HumanMessage(content=[file_block])
+ result = model.invoke([message])
+
+ assert isinstance(result, AIMessage)
+ assert result.content is not None
diff --git a/libs/standard-tests/langchain_tests/unit_tests/chat_models_v1.py b/libs/standard-tests/langchain_tests/unit_tests/chat_models_v1.py
index 92f2b409884..bcf84c49632 100644
--- a/libs/standard-tests/langchain_tests/unit_tests/chat_models_v1.py
+++ b/libs/standard-tests/langchain_tests/unit_tests/chat_models_v1.py
@@ -7,27 +7,69 @@ This module provides updated test patterns for the new messages introduced in
content blocks system.
"""
-from typing import Literal, cast
+import inspect
+import os
+from abc import abstractmethod
+from typing import Any, Literal, Optional
+from unittest import mock
import pytest
from langchain_core.load import dumpd, load
from langchain_core.messages.content_blocks import (
- ContentBlock,
- InvalidToolCall,
- TextContentBlock,
- create_file_block,
- create_image_block,
- create_non_standard_block,
create_text_block,
- is_reasoning_block,
- is_text_block,
- is_tool_call_block,
)
-from langchain_core.tools import tool
+from langchain_core.runnables import RunnableBinding
+from langchain_core.tools import BaseTool, tool
from langchain_core.v1.chat_models import BaseChatModel
-from langchain_core.v1.messages import AIMessage, HumanMessage
+from langchain_core.v1.messages import HumanMessage
+from pydantic import BaseModel, Field, SecretStr
+from pydantic.v1 import BaseModel as BaseModelV1
+from pydantic.v1 import Field as FieldV1
+from pydantic.v1 import ValidationError as ValidationErrorV1
+from pytest_benchmark.fixture import BenchmarkFixture # type: ignore[import-untyped]
+from syrupy.assertion import SnapshotAssertion
from langchain_tests.base import BaseStandardTests
+from langchain_tests.utils.pydantic import PYDANTIC_MAJOR_VERSION
+
+
+def generate_schema_pydantic_v1_from_2() -> Any:
+ """Use to generate a schema from v1 namespace in pydantic 2.
+
+ :private:
+ """
+ if PYDANTIC_MAJOR_VERSION != 2:
+ msg = "This function is only compatible with Pydantic v2."
+ raise AssertionError(msg)
+
+ class PersonB(BaseModelV1):
+ """Record attributes of a person."""
+
+ name: str = FieldV1(..., description="The name of the person.")
+ age: int = FieldV1(..., description="The age of the person.")
+
+ return PersonB
+
+
+def generate_schema_pydantic() -> Any:
+ """Works with either pydantic 1 or 2.
+
+ :private:
+ """
+
+ class PersonA(BaseModel):
+ """Record attributes of a person."""
+
+ name: str = Field(..., description="The name of the person.")
+ age: int = Field(..., description="The age of the person.")
+
+ return PersonA
+
+
+TEST_PYDANTIC_MODELS = [generate_schema_pydantic()]
+
+if PYDANTIC_MAJOR_VERSION == 2:
+ TEST_PYDANTIC_MODELS.append(generate_schema_pydantic_v1_from_2())
class ChatModelV1Tests(BaseStandardTests):
@@ -39,16 +81,79 @@ class ChatModelV1Tests(BaseStandardTests):
:private:
"""
- # Core Model Properties - these should be implemented by subclasses
+ @property
+ @abstractmethod
+ def chat_model_class(self) -> type[BaseChatModel]:
+ """The chat model class to test, e.g., ``ChatParrotLink``."""
+ ...
+
+ @property
+ def chat_model_params(self) -> dict:
+ """Initialization parameters for the chat model."""
+ return {}
+
+ @property
+ def standard_chat_model_params(self) -> dict:
+ """:private:"""
+ return {
+ "temperature": 0,
+ "max_tokens": 100,
+ "timeout": 60,
+ "stop": [],
+ "max_retries": 2,
+ }
+
+ @pytest.fixture
+ def model(self) -> BaseChatModel:
+ """:private:"""
+ return self.chat_model_class(
+ **{
+ **self.standard_chat_model_params,
+ **self.chat_model_params,
+ }
+ )
+
+ @pytest.fixture
+ def my_adder_tool(self) -> BaseTool:
+ """:private:"""
+
+ @tool
+ def my_adder_tool(a: int, b: int) -> int:
+ """Takes two integers, a and b, and returns their sum."""
+ return a + b
+
+ return my_adder_tool
+
@property
def has_tool_calling(self) -> bool:
"""Whether the model supports tool calling."""
- return False
+ return self.chat_model_class.bind_tools is not BaseChatModel.bind_tools
+
+ @property
+ def tool_choice_value(self) -> Optional[str]:
+ """(None or str) To use for tool choice when used in tests."""
+ return None
+
+ @property
+ def has_tool_choice(self) -> bool:
+ """Whether the model supports forcing tool calling via ``tool_choice``."""
+ bind_tools_params = inspect.signature(
+ self.chat_model_class.bind_tools
+ ).parameters
+ return "tool_choice" in bind_tools_params
@property
def has_structured_output(self) -> bool:
"""Whether the model supports structured output."""
- return False
+ return (
+ self.chat_model_class.with_structured_output
+ is not BaseChatModel.with_structured_output
+ ) or self.has_tool_calling
+
+ @property
+ def structured_output_kwargs(self) -> dict:
+ """Additional kwargs for ``with_structured_output``."""
+ return {}
@property
def supports_json_mode(self) -> bool:
@@ -139,30 +244,43 @@ class ChatModelV1Tests(BaseStandardTests):
"""
return False
+ @property
+ def supports_multimodal_reasoning(self) -> bool:
+ """Whether the model can reason about multimodal content."""
+ return (
+ self.supports_image_content_blocks
+ and self.supports_reasoning_content_blocks
+ )
+
@property
def supports_citations(self) -> bool:
"""Whether the model supports ``Citation`` annotations."""
return False
+ @property
+ def supports_structured_citations(self) -> bool:
+ """Whether the model supports structured citation generation."""
+ return self.supports_citations
+
@property
def supports_web_search_blocks(self) -> bool:
"""Whether the model supports ``WebSearchCall``/``WebSearchResult`` blocks."""
return False
+ @property
+ def supports_code_interpreter(self) -> bool:
+ """Whether the model supports code interpreter blocks."""
+ return False
+
@property
def supports_invalid_tool_calls(self) -> bool:
"""Whether the model can handle ``InvalidToolCall`` blocks."""
return False
@property
- def has_tool_choice(self) -> bool:
- """Whether the model supports forcing tool calling via ``tool_choice``."""
- return False
-
- @property
- def structured_output_kwargs(self) -> dict:
- """Additional kwargs for ``with_structured_output``."""
- return {}
+ def returns_usage_metadata(self) -> bool:
+ """Whether the model returns usage metadata on invoke and streaming."""
+ return True
@property
def supports_anthropic_inputs(self) -> bool:
@@ -170,10 +288,17 @@ class ChatModelV1Tests(BaseStandardTests):
return False
@property
- def returns_usage_metadata(self) -> bool:
- """Whether the model returns usage metadata on invoke and streaming."""
- return True
+ def enable_vcr_tests(self) -> bool:
+ """Whether to enable VCR tests for the chat model.
+ .. important::
+ See ``enable_vcr_tests`` dropdown :class:`above ` for more
+ information.
+
+ """
+ return False
+
+ # TODO: check this, since there is `reasoning_output` in usage metadata details ?
@property
def supported_usage_metadata_details(
self,
@@ -192,67 +317,503 @@ class ChatModelV1Tests(BaseStandardTests):
"""What usage metadata details are emitted in ``invoke()`` and ``stream()``."""
return {"invoke": [], "stream": []}
- @property
- def enable_vcr_tests(self) -> bool:
- """Whether to enable VCR tests for the chat model."""
- return False
-
class ChatModelV1UnitTests(ChatModelV1Tests):
- """Unit tests for chat models with content blocks v1 support.
+ """Base class for chat model v1 unit tests.
These tests run in isolation without external dependencies.
- """
- # Core Method Tests
- def test_invoke_basic(self, model: BaseChatModel) -> None:
- """Test basic invoke functionality with simple string input."""
- result = model.invoke("Hello, world!")
- assert isinstance(result, AIMessage)
- assert result.content is not None
+ Test subclasses must implement the ``chat_model_class`` and
+ ``chat_model_params`` properties to specify what model to test and its
+ initialization parameters.
- def test_invoke_with_message_list(self, model: BaseChatModel) -> None:
- """Test invoke with list of messages."""
- messages = [HumanMessage("Hello, world!")]
- result = model.invoke(messages)
- assert isinstance(result, AIMessage)
- assert result.content is not None
+ Example:
- async def test_ainvoke_basic(self, model: BaseChatModel) -> None:
- """Test basic async invoke functionality."""
- result = await model.ainvoke("Hello, world!")
- assert isinstance(result, AIMessage)
- assert result.content is not None
+ .. code-block:: python
- def test_stream_basic(self, model: BaseChatModel) -> None:
- """Test basic streaming functionality."""
- chunks = []
- for chunk in model.stream("Hello, world!"):
- chunks.append(chunk)
- assert hasattr(chunk, "content")
+ from typing import Type
- assert len(chunks) > 0
- # Verify chunks can be aggregated
- if chunks:
- final_message = chunks[0]
- for chunk in chunks[1:]:
- final_message = final_message + chunk
- assert isinstance(final_message.content, (str, list))
+ from langchain_tests.unit_tests import ChatModelV1UnitTests
+ from my_package.chat_models import MyChatModel
- async def test_astream_basic(self, model: BaseChatModel) -> None:
- """Test basic async streaming functionality."""
- chunks = []
- async for chunk in model.astream("Hello, world!"):
- chunks.append(chunk)
- assert hasattr(chunk, "content")
- assert len(chunks) > 0
- # Verify chunks can be aggregated
- if chunks:
- final_message = chunks[0]
- for chunk in chunks[1:]:
- final_message = final_message + chunk
- assert isinstance(final_message.content, (str, list))
+ class TestMyChatModelUnit(ChatModelV1UnitTests):
+ @property
+ def chat_model_class(self) -> Type[MyChatModel]:
+ # Return the chat model class to test here
+ return MyChatModel
+
+ @property
+ def chat_model_params(self) -> dict:
+ # Return initialization parameters for the v1 model.
+ return {"model": "model-001", "temperature": 0}
+
+ .. note::
+ API references for individual test methods include troubleshooting tips.
+
+
+ Test subclasses **must** implement the following two properties:
+
+ chat_model_class
+ The chat model class to test, e.g., ``ChatParrotLinkV1``.
+
+ Example:
+
+ .. code-block:: python
+
+ @property
+ def chat_model_class(self) -> Type[ChatParrotLinkV1]:
+ return ChatParrotLinkV1
+
+ chat_model_params
+ Initialization parameters for the chat model.
+
+ Example:
+
+ .. code-block:: python
+
+ @property
+ def chat_model_params(self) -> dict:
+ return {"model": "bird-brain-001", "temperature": 0}
+
+ In addition, test subclasses can control what features are tested (such as tool
+ calling or multi-modality) by selectively overriding the following properties.
+ Expand to see details:
+
+ .. dropdown:: has_tool_calling
+
+ TODO
+
+ .. dropdown:: tool_choice_value
+
+ TODO
+
+ .. dropdown:: has_tool_choice
+
+ TODO
+
+ .. dropdown:: has_structured_output
+
+ TODO
+
+ .. dropdown:: structured_output_kwargs
+
+ TODO
+
+ .. dropdown:: supports_json_mode
+
+ TODO
+
+ .. dropdown:: returns_usage_metadata
+
+ TODO
+
+ .. dropdown:: supports_anthropic_inputs
+
+ TODO
+
+ .. dropdown:: supported_usage_metadata_details
+
+ TODO
+
+ .. dropdown:: enable_vcr_tests
+
+ Property controlling whether to enable select tests that rely on
+ `VCR `_ caching of HTTP calls, such
+ as benchmarking tests.
+
+ To enable these tests, follow these steps:
+
+ 1. Override the ``enable_vcr_tests`` property to return ``True``:
+
+ .. code-block:: python
+
+ @property
+ def enable_vcr_tests(self) -> bool:
+ return True
+
+ 2. Configure VCR to exclude sensitive headers and other information from cassettes.
+
+ .. important::
+ VCR will by default record authentication headers and other sensitive
+ information in cassettes. Read below for how to configure what
+ information is recorded in cassettes.
+
+ To add configuration to VCR, add a ``conftest.py`` file to the ``tests/``
+ directory and implement the ``vcr_config`` fixture there.
+
+ ``langchain-tests`` excludes the headers ``'authorization'``,
+ ``'x-api-key'``, and ``'api-key'`` from VCR cassettes. To pick up this
+ configuration, you will need to add ``conftest.py`` as shown below. You can
+ also exclude additional headers, override the default exclusions, or apply
+ other customizations to the VCR configuration. See example below:
+
+ .. code-block:: python
+ :caption: tests/conftest.py
+
+ import pytest
+ from langchain_tests.conftest import _base_vcr_config as _base_vcr_config
+
+ _EXTRA_HEADERS = [
+ # Specify additional headers to redact
+ ("user-agent", "PLACEHOLDER"),
+ ]
+
+
+ def remove_response_headers(response: dict) -> dict:
+ # If desired, remove or modify headers in the response.
+ response["headers"] = {}
+ return response
+
+
+ @pytest.fixture(scope="session")
+ def vcr_config(_base_vcr_config: dict) -> dict: # noqa: F811
+ \"\"\"Extend the default configuration from langchain_tests.\"\"\"
+ config = _base_vcr_config.copy()
+ config.setdefault("filter_headers", []).extend(_EXTRA_HEADERS)
+ config["before_record_response"] = remove_response_headers
+
+ return config
+
+ .. dropdown:: Compressing cassettes
+
+ ``langchain-tests`` includes a custom VCR serializer that compresses
+ cassettes using gzip. To use it, register the ``yaml.gz`` serializer
+ to your VCR fixture and enable this serializer in the config. See
+ example below:
+
+ .. code-block:: python
+ :caption: tests/conftest.py
+
+ import pytest
+ from langchain_tests.conftest import CustomPersister, CustomSerializer
+ from langchain_tests.conftest import _base_vcr_config as _base_vcr_config
+ from vcr import VCR
+
+ _EXTRA_HEADERS = [
+ # Specify additional headers to redact
+ ("user-agent", "PLACEHOLDER"),
+ ]
+
+
+ def remove_response_headers(response: dict) -> dict:
+ # If desired, remove or modify headers in the response.
+ response["headers"] = {}
+ return response
+
+
+ @pytest.fixture(scope="session")
+ def vcr_config(_base_vcr_config: dict) -> dict: # noqa: F811
+ \"\"\"Extend the default configuration from langchain_tests.\"\"\"
+ config = _base_vcr_config.copy()
+ config.setdefault("filter_headers", []).extend(_EXTRA_HEADERS)
+ config["before_record_response"] = remove_response_headers
+ # New: enable serializer and set file extension
+ config["serializer"] = "yaml.gz"
+ config["path_transformer"] = VCR.ensure_suffix(".yaml.gz")
+
+ return config
+
+
+ def pytest_recording_configure(config: dict, vcr: VCR) -> None:
+ vcr.register_persister(CustomPersister())
+ vcr.register_serializer("yaml.gz", CustomSerializer())
+
+
+ You can inspect the contents of the compressed cassettes (e.g., to
+ ensure no sensitive information is recorded) using
+
+ .. code-block:: bash
+
+ gunzip -k /path/to/tests/cassettes/TestClass_test.yaml.gz
+
+ or by using the serializer:
+
+ .. code-block:: python
+
+ from langchain_tests.conftest import CustomPersister, CustomSerializer
+
+ cassette_path = "/path/to/tests/cassettes/TestClass_test.yaml.gz"
+ requests, responses = CustomPersister().load_cassette(path, CustomSerializer())
+
+ 3. Run tests to generate VCR cassettes.
+
+ Example:
+
+ .. code-block:: bash
+
+ uv run python -m pytest tests/integration_tests/test_chat_models.py::TestMyModel::test_stream_time
+
+ This will generate a VCR cassette for the test in
+ ``tests/integration_tests/cassettes/``.
+
+ .. important::
+ You should inspect the generated cassette to ensure that it does not
+ contain sensitive information. If it does, you can modify the
+ ``vcr_config`` fixture to exclude headers or modify the response
+ before it is recorded.
+
+ You can then commit the cassette to your repository. Subsequent test runs
+ will use the cassette instead of making HTTP calls.
+
+ Testing initialization from environment variables
+ Some unit tests may require testing initialization from environment variables.
+ These tests can be enabled by overriding the ``init_from_env_params``
+ property (see below):
+
+ .. dropdown:: init_from_env_params
+
+ This property is used in unit tests to test initialization from
+ environment variables. It should return a tuple of three dictionaries
+ that specify the environment variables, additional initialization args,
+ and expected instance attributes to check.
+
+ Defaults to empty dicts. If not overridden, the test is skipped.
+
+ Example:
+
+ .. code-block:: python
+
+ @property
+ def init_from_env_params(self) -> Tuple[dict, dict, dict]:
+ return (
+ {
+ "MY_API_KEY": "api_key",
+ },
+ {
+ "model": "bird-brain-001",
+ },
+ {
+ "my_api_key": "api_key",
+ },
+ )
+
+ """ # noqa: E501
+
+ @property
+ def standard_chat_model_params(self) -> dict:
+ """:private:"""
+ params = super().standard_chat_model_params
+ params["api_key"] = "test"
+ return params
+
+ @property
+ def init_from_env_params(self) -> tuple[dict, dict, dict]:
+ """Environment variables, additional initialization args, and expected
+ instance attributes for testing initialization from environment variables.
+
+ """
+ return {}, {}, {}
+
+ # Initialization Tests
+ def test_init(self) -> None:
+ """Test model initialization. This should pass for all integrations.
+
+ .. dropdown:: Troubleshooting
+
+ If this test fails, ensure that:
+
+ 1. ``chat_model_params`` is specified and the model can be initialized from those params;
+ 2. The model accommodates `standard parameters `__
+
+ """ # noqa: E501
+ model = self.chat_model_class(
+ **{
+ **self.standard_chat_model_params,
+ **self.chat_model_params,
+ }
+ )
+ assert model is not None
+
+ def test_init_from_env(self) -> None:
+ """Test initialization from environment variables. Relies on the
+ ``init_from_env_params`` property. Test is skipped if that property is not
+ set.
+
+ .. dropdown:: Troubleshooting
+
+ If this test fails, ensure that ``init_from_env_params`` is specified
+ correctly and that model parameters are properly set from environment
+ variables during initialization.
+
+ """
+ env_params, model_params, expected_attrs = self.init_from_env_params
+ if not env_params:
+ pytest.skip("init_from_env_params not specified.")
+ else:
+ with mock.patch.dict(os.environ, env_params):
+ model = self.chat_model_class(**model_params)
+ assert model is not None
+ for k, expected in expected_attrs.items():
+ actual = getattr(model, k)
+ if isinstance(actual, SecretStr):
+ actual = actual.get_secret_value()
+ assert actual == expected
+
+ def test_init_streaming(
+ self,
+ ) -> None:
+ """Test that model can be initialized with ``streaming=True``. This is for
+ backward-compatibility purposes.
+
+ .. dropdown:: Troubleshooting
+
+ If this test fails, ensure that the model can be initialized with a
+ boolean ``streaming`` parameter.
+
+ """
+ model = self.chat_model_class(
+ **{
+ **self.standard_chat_model_params,
+ **self.chat_model_params,
+ "streaming": True,
+ }
+ )
+ assert model is not None
+
+ def test_bind_tool_pydantic(
+ self,
+ model: BaseChatModel,
+ my_adder_tool: BaseTool,
+ ) -> None:
+ """Test that chat model correctly handles Pydantic models that are passed
+ into ``bind_tools``. Test is skipped if the ``has_tool_calling`` property
+ on the test class is False.
+
+ .. dropdown:: Troubleshooting
+
+ If this test fails, ensure that the model's ``bind_tools`` method
+ properly handles Pydantic V2 models. ``langchain_core`` implements
+ a utility function that will accommodate most formats: https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html
+
+ See example implementation of ``bind_tools`` here: https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.bind_tools
+
+ """
+ if not self.has_tool_calling:
+ return
+
+ def my_adder(a: int, b: int) -> int:
+ """Takes two integers, a and b, and returns their sum."""
+ return a + b
+
+ tools = [my_adder_tool, my_adder]
+
+ for pydantic_model in TEST_PYDANTIC_MODELS:
+ model_schema = (
+ pydantic_model.model_json_schema()
+ if hasattr(pydantic_model, "model_json_schema")
+ else pydantic_model.schema()
+ )
+ tools.extend([pydantic_model, model_schema])
+
+ # Doing a mypy ignore here since some of the tools are from pydantic
+ # BaseModel 2 which isn't typed properly yet. This will need to be fixed
+ # so type checking does not become annoying to users.
+ tool_model = model.bind_tools(tools, tool_choice="any") # type: ignore[arg-type]
+ assert isinstance(tool_model, RunnableBinding)
+
+ @pytest.mark.parametrize("schema", TEST_PYDANTIC_MODELS)
+ def test_with_structured_output(
+ self,
+ model: BaseChatModel,
+ schema: Any,
+ ) -> None:
+ """Test ``with_structured_output`` method. Test is skipped if the
+ ``has_structured_output`` property on the test class is False.
+
+ .. dropdown:: Troubleshooting
+
+ If this test fails, ensure that the model's ``bind_tools`` method
+ properly handles Pydantic V2 models. ``langchain_core`` implements
+ a utility function that will accommodate most formats: https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html
+
+ See example implementation of ``with_structured_output`` here: https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.with_structured_output
+
+ """
+ if not self.has_structured_output:
+ return
+
+ assert model.with_structured_output(schema) is not None
+ for method in ["json_schema", "function_calling", "json_mode"]:
+ strict_values = [None, False, True] if method != "json_mode" else [None]
+ for strict in strict_values:
+ assert model.with_structured_output(
+ schema, method=method, strict=strict
+ )
+
+ def test_standard_params(self, model: BaseChatModel) -> None:
+ """Test that model properly generates standard parameters. These are used
+ for tracing purposes.
+
+ .. dropdown:: Troubleshooting
+
+ If this test fails, check that the model accommodates `standard parameters `__.
+
+ Check also that the model class is named according to convention
+ (e.g., ``ChatProviderName``).
+ """
+
+ class ExpectedParams(BaseModelV1):
+ ls_provider: str
+ ls_model_name: str
+ ls_model_type: Literal["chat"]
+ ls_temperature: Optional[float]
+ ls_max_tokens: Optional[int]
+ ls_stop: Optional[list[str]]
+
+ ls_params = model._get_ls_params()
+ try:
+ ExpectedParams(**ls_params) # type: ignore[arg-type]
+ except ValidationErrorV1 as e:
+ pytest.fail(f"Validation error: {e}")
+
+ # Test optional params
+ model = self.chat_model_class(
+ max_tokens=10, # type: ignore[call-arg]
+ stop=["test"], # type: ignore[call-arg]
+ **self.chat_model_params,
+ )
+ ls_params = model._get_ls_params()
+ try:
+ ExpectedParams(**ls_params) # type: ignore[arg-type]
+ except ValidationErrorV1 as e:
+ pytest.fail(f"Validation error: {e}")
+
+ def test_serdes(self, model: BaseChatModel, snapshot: SnapshotAssertion) -> None:
+ """Test serialization and deserialization of the model. Test is skipped if the
+ ``is_lc_serializable`` property on the chat model class is not overwritten
+ to return ``True``.
+
+ .. dropdown:: Troubleshooting
+
+ If this test fails, check that the ``init_from_env_params`` property is
+ correctly set on the test class.
+ """
+ if not self.chat_model_class.is_lc_serializable():
+ pytest.skip("Model is not serializable.")
+ else:
+ env_params, _model_params, _expected_attrs = self.init_from_env_params
+ with mock.patch.dict(os.environ, env_params):
+ ser = dumpd(model)
+ assert ser == snapshot(name="serialized")
+ assert (
+ model.model_dump()
+ == load(
+ dumpd(model), valid_namespaces=model.get_lc_namespace()[:1]
+ ).model_dump()
+ )
+
+ @pytest.mark.benchmark
+ def test_init_time(self, benchmark: BenchmarkFixture) -> None:
+ """Test initialization time of the chat model. If this test fails, check that
+ we are not introducing undue overhead in the model's initialization.
+ """
+
+ def _init_in_loop() -> None:
+ for _ in range(10):
+ self.chat_model_class(**self.chat_model_params)
+
+ benchmark(_init_in_loop)
# Property Tests
def test_llm_type_property(self, model: BaseChatModel) -> None:
@@ -279,275 +840,10 @@ class ChatModelV1UnitTests(ChatModelV1Tests):
assert key in dumped
assert dumped[key] == value
- # Input Conversion Tests
- def test_input_conversion_string(self, model: BaseChatModel) -> None:
- """Test that string input is properly converted to messages."""
- # This test verifies the _convert_input method works correctly
- result = model.invoke("Test string input")
- assert isinstance(result, AIMessage)
- assert result.content is not None
-
- def test_input_conversion_empty_string(self, model: BaseChatModel) -> None:
- """Test that empty string input is handled gracefully."""
- result = model.invoke("")
- assert isinstance(result, AIMessage)
- # Content might be empty or some default response
-
- def test_input_conversion_message_v1_list(self, model: BaseChatModel) -> None:
- """Test that v1 message list input is handled correctly."""
- messages = [HumanMessage("Test message")]
- result = model.invoke(messages)
- assert isinstance(result, AIMessage)
- assert result.content is not None
-
- # Batch Processing Tests
- def test_batch_basic(self, model: BaseChatModel) -> None:
- """Test basic batch processing functionality."""
- inputs = ["Hello", "How are you?", "Goodbye"]
- results = model.batch(inputs) # type: ignore[arg-type]
-
- assert isinstance(results, list)
- assert len(results) == len(inputs)
- for result in results:
- assert isinstance(result, AIMessage)
- assert result.content is not None
-
- async def test_abatch_basic(self, model: BaseChatModel) -> None:
- """Test basic async batch processing functionality."""
- inputs = ["Hello", "How are you?", "Goodbye"]
- results = await model.abatch(inputs) # type: ignore[arg-type]
-
- assert isinstance(results, list)
- assert len(results) == len(inputs)
- for result in results:
- assert isinstance(result, AIMessage)
- assert result.content is not None
-
- # Content Block Tests
- def test_text_content_blocks(self, model: BaseChatModel) -> None:
- """Test that the model can handle the ``TextContentBlock`` format.
-
- This test verifies that the model correctly processes messages containing
- ``TextContentBlock`` objects instead of plain strings.
- """
- if not self.supports_text_content_blocks:
- pytest.skip("Model does not support TextContentBlock (rare!)")
-
- text_block = create_text_block("Hello, world!")
- message = HumanMessage(content=[text_block])
-
- result = model.invoke([message])
- assert isinstance(result, AIMessage)
- assert result.content is not None
-
- def test_mixed_content_blocks(self, model: BaseChatModel) -> None:
- """Test that the model can handle messages with mixed content blocks."""
- if not (
- self.supports_text_content_blocks and self.supports_image_content_blocks
- ):
- pytest.skip(
- "Model doesn't support mixed content blocks (concurrent text and image)"
- )
-
- content_blocks: list[ContentBlock] = [
- create_text_block("Describe this image:"),
- create_image_block(
- base64="iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg==",
- mime_type="image/png",
- ),
- ]
-
- message = HumanMessage(content=content_blocks)
- result = model.invoke([message])
-
- assert isinstance(result, AIMessage)
- assert result.content is not None
-
- def test_reasoning_content_blocks(self, model: BaseChatModel) -> None:
- """Test that the model can generate ``ReasoningContentBlock``."""
- if not self.supports_reasoning_content_blocks:
- pytest.skip("Model does not support ReasoningContentBlock.")
-
- message = HumanMessage("Think step by step: What is 2 + 2?")
- result = model.invoke([message])
-
- assert isinstance(result, AIMessage)
- if isinstance(result.content, list):
- reasoning_blocks = [
- block
- for block in result.content
- if isinstance(block, dict) and is_reasoning_block(block)
- ]
- assert len(reasoning_blocks) > 0
-
- def test_citations_in_response(self, model: BaseChatModel) -> None:
- """Test that the model can generate ``Citations`` in text blocks."""
- if not self.supports_citations:
- pytest.skip("Model does not support citations.")
-
- message = HumanMessage("Provide information about Python with citations.")
- result = model.invoke([message])
-
- assert isinstance(result, AIMessage)
- if isinstance(result.content, list):
- content_list = result.content
- text_blocks_with_citations: list[TextContentBlock] = []
- for block in content_list:
- if (
- isinstance(block, dict)
- and is_text_block(block)
- and "annotations" in block
- and isinstance(block.get("annotations"), list)
- and len(cast(list, block.get("annotations", []))) > 0
- ):
- text_block = cast(TextContentBlock, block)
- text_blocks_with_citations.append(text_block)
- assert len(text_blocks_with_citations) > 0
-
- # Verify that at least one known citation type is present
- has_citation = any(
- "citation" in annotation.get("type", "")
- for block in text_blocks_with_citations
- for annotation in block.get("annotations", [])
- ) or any(
- "non_standard_annotation" in annotation.get("type", "")
- for block in text_blocks_with_citations
- for annotation in block.get("annotations", [])
- )
- assert has_citation, "No citations found in text blocks."
-
- def test_non_standard_content_blocks(self, model: BaseChatModel) -> None:
- """Test that the model can handle ``NonStandardContentBlock``."""
- if not self.supports_non_standard_blocks:
- pytest.skip("Model does not support NonStandardContentBlock.")
-
- non_standard_block = create_non_standard_block(
- {
- "custom_field": "custom_value",
- "data": [1, 2, 3],
- }
- )
-
- message = HumanMessage(content=[non_standard_block])
-
- # Should not raise an error
- result = model.invoke([message])
- assert isinstance(result, AIMessage)
-
- def test_enhanced_tool_calls_with_content_blocks(
- self, model: BaseChatModel
- ) -> None:
- """Test enhanced tool calling with content blocks format."""
- if not self.has_tool_calling:
- pytest.skip("Model does not support enhanced tool calls.")
-
- @tool
- def sample_tool(query: str) -> str:
- """A sample tool for testing."""
- return f"Result for: {query}"
-
- model_with_tools = model.bind_tools([sample_tool])
- message = HumanMessage("Use the sample tool with query 'test'")
-
- result = model_with_tools.invoke([message])
- assert isinstance(result, AIMessage)
-
- # Check if tool calls are in content blocks format
- if isinstance(result.content, list):
- tool_call_blocks = [
- block
- for block in result.content
- if isinstance(block, dict) and is_tool_call_block(block)
- ]
- assert len(tool_call_blocks) > 0
- # Backwards compat?
- # else:
- # # Fallback to legacy tool_calls attribute
- # assert hasattr(result, "tool_calls") and result.tool_calls
-
- def test_invalid_tool_call_handling(self, model: BaseChatModel) -> None:
- """Test that the model can handle ``InvalidToolCall`` blocks gracefully."""
- if not self.supports_invalid_tool_calls:
- pytest.skip("Model does not support InvalidToolCall handling.")
-
- invalid_tool_call: InvalidToolCall = {
- "type": "invalid_tool_call",
- "name": "nonexistent_tool",
- "args": None,
- "id": "invalid_123",
- "error": "Tool not found",
- }
-
- # Create a message with invalid tool call in history
- ai_message = AIMessage(content=[invalid_tool_call])
- follow_up = HumanMessage("Please try again with a valid approach.")
-
- result = model.invoke([ai_message, follow_up])
- assert isinstance(result, AIMessage)
- assert result.content is not None
- # TODO: enhance/double check this
-
- def test_web_search_content_blocks(self, model: BaseChatModel) -> None:
- """Test generating ``WebSearchCall``/``WebSearchResult`` blocks."""
- if not self.supports_web_search_blocks:
- pytest.skip("Model does not support web search blocks.")
-
- message = HumanMessage("Search for recent news about AI developments.")
- result = model.invoke([message])
-
- assert isinstance(result, AIMessage)
- if isinstance(result.content, list):
- search_blocks = [
- block
- for block in result.content
- if isinstance(block, dict)
- and block.get("type") in ["web_search_call", "web_search_result"]
- ]
- assert len(search_blocks) > 0
-
- def test_file_content_blocks(self, model: BaseChatModel) -> None:
- """Test that the model can handle ``FileContentBlock``."""
- if not self.supports_file_content_blocks:
- pytest.skip("Model does not support FileContentBlock.")
-
- file_block = create_file_block(
- base64="SGVsbG8sIHdvcmxkIQ==", # "Hello, world!"
- mime_type="text/plain",
- )
-
- message = HumanMessage(content=[file_block])
- result = model.invoke([message])
-
- assert isinstance(result, AIMessage)
- assert result.content is not None
- # TODO: make more robust?
-
- def test_content_block_streaming(self, model: BaseChatModel) -> None:
- """Test that content blocks work correctly with streaming."""
- if not self.supports_content_blocks_v1:
- pytest.skip("Model does not support content blocks v1.")
-
- text_block = create_text_block("Generate a short story.")
- message = HumanMessage(content=[text_block])
-
- chunks = []
- for chunk in model.stream([message]):
- chunks.append(chunk)
- assert hasattr(chunk, "content")
-
- assert len(chunks) > 0
-
- # Verify final aggregated message
- final_message = chunks[0]
- for chunk in chunks[1:]:
- final_message = final_message + chunk
-
- assert isinstance(final_message.content, (str, list))
-
def test_content_block_serialization(self, model: BaseChatModel) -> None:
"""Test that messages with content blocks can be serialized/deserialized."""
if not self.supports_content_blocks_v1:
- pytest.skip("Model does not support content blocks v1.")
+ pytest.skip("Model does not support v1 content blocks.")
text_block = create_text_block("Test serialization")
message = HumanMessage(content=[text_block])
@@ -560,36 +856,4 @@ class ChatModelV1UnitTests(ChatModelV1Tests):
deserialized = load(serialized)
assert isinstance(deserialized, HumanMessage)
assert deserialized.content == message.content
- # TODO: make more robust
-
- def test_backwards_compatibility(self, model: BaseChatModel) -> None:
- """Test that models still work with legacy string content."""
- # This should work regardless of content blocks support
- legacy_message = HumanMessage("Hello, world!")
- result = model.invoke([legacy_message])
-
- assert isinstance(result, AIMessage)
- assert result.content is not None
-
- legacy_message_named_param = HumanMessage(content="Hello, world!")
- result_named_param = model.invoke([legacy_message_named_param])
-
- assert isinstance(result_named_param, AIMessage)
- assert result_named_param.content is not None
-
- def test_content_block_validation(self, model: BaseChatModel) -> None:
- """Test that invalid content blocks are handled gracefully."""
- if not self.supports_content_blocks_v1:
- pytest.skip("Model does not support content blocks v1.")
-
- # Test with invalid content block structure
- invalid_block = {"type": "invalid_type", "invalid_field": "value"}
- message = HumanMessage(content=[invalid_block]) # type: ignore[list-item]
-
- # Should handle gracefully (either convert to NonStandardContentBlock or reject)
- try:
- result = model.invoke([message])
- assert isinstance(result, AIMessage)
- except (ValueError, TypeError) as e:
- # Acceptable to raise validation errors for truly invalid blocks
- assert "invalid" in str(e).lower() or "unknown" in str(e).lower()
+ # TODO: make more robust? include more fields