feat: port various nit changes from wip-v0.4 (#32506)

Lots of work that wasn't directly related to core
improvements/messages/testing functionality
This commit is contained in:
Mason Daugherty
2025-08-11 15:09:08 -04:00
committed by GitHub
parent 7db9e60601
commit ee4c2510eb
63 changed files with 2213 additions and 2862 deletions

View File

@@ -14,62 +14,68 @@ also break your CI if we introduce tests that your integration doesn't pass.
Pip:
```bash
pip install -U langchain-tests
```
```bash
pip install -U langchain-tests
```
Poetry:
```bash
poetry add langchain-tests
```
```bash
poetry add langchain-tests
```
uv:
```bash
uv add langchain-tests
```
## Usage
To add standard tests to an integration package's e.g. ChatModel, you need to create
To add standard tests to an integration package (e.g., for a ChatModel), you need to create
1. A unit test class that inherits from ChatModelUnitTests
2. An integration test class that inherits from ChatModelIntegrationTests
1. A unit test class that inherits from `ChatModelUnitTests`
2. An integration test class that inherits from `ChatModelIntegrationTests`
`tests/unit_tests/test_standard.py`:
```python
"""Standard LangChain interface tests"""
```python
"""Standard LangChain interface tests"""
from typing import Type
from typing import Type
import pytest
from langchain_core.language_models import BaseChatModel
from langchain_tests.unit_tests import ChatModelUnitTests
import pytest
from langchain_core.language_models import BaseChatModel
from langchain_tests.unit_tests import ChatModelUnitTests
from langchain_parrot_chain import ChatParrotChain
from langchain_parrot_chain import ChatParrotChain
class TestParrotChainStandard(ChatModelUnitTests):
@pytest.fixture
def chat_model_class(self) -> Type[BaseChatModel]:
return ChatParrotChain
```
class TestParrotChainStandard(ChatModelUnitTests):
@pytest.fixture
def chat_model_class(self) -> Type[BaseChatModel]:
return ChatParrotChain
```
`tests/integration_tests/test_standard.py`:
```python
"""Standard LangChain interface tests"""
```python
"""Standard LangChain interface tests"""
from typing import Type
from typing import Type
import pytest
from langchain_core.language_models import BaseChatModel
from langchain_tests.integration_tests import ChatModelIntegrationTests
import pytest
from langchain_core.language_models import BaseChatModel
from langchain_tests.integration_tests import ChatModelIntegrationTests
from langchain_parrot_chain import ChatParrotChain
from langchain_parrot_chain import ChatParrotChain
class TestParrotChainStandard(ChatModelIntegrationTests):
@pytest.fixture
def chat_model_class(self) -> Type[BaseChatModel]:
return ChatParrotChain
```
class TestParrotChainStandard(ChatModelIntegrationTests):
@pytest.fixture
def chat_model_class(self) -> Type[BaseChatModel]:
return ChatParrotChain
```
## Reference

View File

@@ -9,7 +9,7 @@ class BaseStandardTests(ABC):
:private:
"""
# find path to standard test implementations
# Find path to standard test implementations
comparison_class = None
def explore_bases(cls: type) -> None:

View File

@@ -32,9 +32,7 @@ from pytest_benchmark.fixture import BenchmarkFixture # type: ignore[import-unt
from typing_extensions import TypedDict
from vcr.cassette import Cassette
from langchain_tests.unit_tests.chat_models import (
ChatModelTests,
)
from langchain_tests.unit_tests.chat_models import ChatModelTests
from langchain_tests.utils.pydantic import PYDANTIC_MAJOR_VERSION
@@ -110,6 +108,7 @@ def magic_function_no_args() -> int:
def _validate_tool_call_message(message: BaseMessage) -> None:
assert isinstance(message, AIMessage)
assert len(message.tool_calls) == 1
tool_call = message.tool_calls[0]
assert tool_call["name"] == "magic_function"
assert tool_call["args"] == {"input": 3}
@@ -120,6 +119,7 @@ def _validate_tool_call_message(message: BaseMessage) -> None:
def _validate_tool_call_message_no_args(message: BaseMessage) -> None:
assert isinstance(message, AIMessage)
assert len(message.tool_calls) == 1
tool_call = message.tool_calls[0]
assert tool_call["name"] == "magic_function_no_args"
assert tool_call["args"] == {}
@@ -137,6 +137,7 @@ def unicode_customer(customer_name: str, description: str) -> str:
Returns:
A confirmation message about the customer creation.
"""
return f"Created customer: {customer_name} - {description}"
@@ -173,7 +174,7 @@ class ChatModelIntegrationTests(ChatModelTests):
API references for individual test methods include troubleshooting tips.
Test subclasses must implement the following two properties:
Test subclasses **must** implement the following two properties:
chat_model_class
The chat model class to test, e.g., ``ChatParrotLink``.
@@ -426,10 +427,10 @@ class ChatModelIntegrationTests(ChatModelTests):
.. dropdown:: returns_usage_metadata
Boolean property indicating whether the chat model returns usage metadata
on invoke and streaming responses.
on invoke and streaming responses. Defaults to ``True``.
``usage_metadata`` is an optional dict attribute on AIMessages that track input
and output tokens: https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.UsageMetadata.html
``usage_metadata`` is an optional dict attribute on ``AIMessage``s that track input
and output tokens. `See more. <https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.UsageMetadata.html>`__
Example:
@@ -440,7 +441,7 @@ class ChatModelIntegrationTests(ChatModelTests):
return False
Models supporting ``usage_metadata`` should also return the name of the
underlying model in the ``response_metadata`` of the AIMessage.
underlying model in the ``response_metadata`` of the ``AIMessage``.
.. dropdown:: supports_anthropic_inputs
@@ -525,8 +526,8 @@ class ChatModelIntegrationTests(ChatModelTests):
Property controlling what usage metadata details are emitted in both invoke
and stream.
``usage_metadata`` is an optional dict attribute on AIMessages that track input
and output tokens: https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.UsageMetadata.html
``usage_metadata`` is an optional dict attribute on ``AIMessage``s that track input
and output tokens. `See more. <https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.UsageMetadata.html>`__
It includes optional keys ``input_token_details`` and ``output_token_details``
that can track usage details associated with special types of tokens, such as
@@ -682,13 +683,13 @@ class ChatModelIntegrationTests(ChatModelTests):
return {}
def test_invoke(self, model: BaseChatModel) -> None:
"""Test to verify that `model.invoke(simple_message)` works.
"""Test to verify that ``model.invoke(simple_message)`` works.
This should pass for all integrations.
.. dropdown:: Troubleshooting
If this test fails, you should make sure your _generate method
If this test fails, you should make sure your ``_generate`` method
does not raise any exceptions, and that it returns a valid
:class:`~langchain_core.outputs.chat_result.ChatResult` like so:
@@ -708,7 +709,7 @@ class ChatModelIntegrationTests(ChatModelTests):
assert len(result.content) > 0
async def test_ainvoke(self, model: BaseChatModel) -> None:
"""Test to verify that `await model.ainvoke(simple_message)` works.
"""Test to verify that ``await model.ainvoke(simple_message)`` works.
This should pass for all integrations. Passing this test does not indicate
a "natively async" implementation, but rather that the model can be used
@@ -718,7 +719,7 @@ class ChatModelIntegrationTests(ChatModelTests):
First, debug
:meth:`~langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_invoke`.
because `ainvoke` has a default implementation that calls `invoke` in an
because ``ainvoke`` has a default implementation that calls ``invoke`` in an
async context.
If that test passes but not this one, you should make sure your _agenerate
@@ -741,7 +742,7 @@ class ChatModelIntegrationTests(ChatModelTests):
assert len(result.content) > 0
def test_stream(self, model: BaseChatModel) -> None:
"""Test to verify that `model.stream(simple_message)` works.
"""Test to verify that ``model.stream(simple_message)`` works.
This should pass for all integrations. Passing this test does not indicate
a "streaming" implementation, but rather that the model can be used in a
@@ -751,10 +752,10 @@ class ChatModelIntegrationTests(ChatModelTests):
First, debug
:meth:`~langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_invoke`.
because `stream` has a default implementation that calls `invoke` and yields
the result as a single chunk.
because ``stream`` has a default implementation that calls ``invoke`` and
yields the result as a single chunk.
If that test passes but not this one, you should make sure your _stream
If that test passes but not this one, you should make sure your ``_stream``
method does not raise any exceptions, and that it yields valid
:class:`~langchain_core.outputs.chat_generation.ChatGenerationChunk`
objects like so:
@@ -770,11 +771,12 @@ class ChatModelIntegrationTests(ChatModelTests):
for chunk in model.stream("Hello"):
assert chunk is not None
assert isinstance(chunk, AIMessageChunk)
assert isinstance(chunk.content, (str, list))
num_chunks += 1
assert num_chunks > 0
async def test_astream(self, model: BaseChatModel) -> None:
"""Test to verify that `await model.astream(simple_message)` works.
"""Test to verify that ``await model.astream(simple_message)`` works.
This should pass for all integrations. Passing this test does not indicate
a "natively async" or "streaming" implementation, but rather that the model can
@@ -786,11 +788,11 @@ class ChatModelIntegrationTests(ChatModelTests):
:meth:`~langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_stream`.
and
:meth:`~langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_ainvoke`.
because `astream` has a default implementation that calls `_stream` in an
async context if it is implemented, or `ainvoke` and yields the result as a
single chunk if not.
because ``astream`` has a default implementation that calls ``_stream`` in
an async context if it is implemented, or ``ainvoke`` and yields the result
as a single chunk if not.
If those tests pass but not this one, you should make sure your _astream
If those tests pass but not this one, you should make sure your ``_astream``
method does not raise any exceptions, and that it yields valid
:class:`~langchain_core.outputs.chat_generation.ChatGenerationChunk`
objects like so:
@@ -811,7 +813,7 @@ class ChatModelIntegrationTests(ChatModelTests):
assert num_chunks > 0
def test_batch(self, model: BaseChatModel) -> None:
"""Test to verify that `model.batch([messages])` works.
"""Test to verify that ``model.batch([messages])`` works.
This should pass for all integrations. Tests the model's ability to process
multiple prompts in a single batch.
@@ -820,12 +822,13 @@ class ChatModelIntegrationTests(ChatModelTests):
First, debug
:meth:`~langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_invoke`
because `batch` has a default implementation that calls `invoke` for each
message in the batch.
because ``batch`` has a default implementation that calls ``invoke`` for
each message in the batch.
If that test passes but not this one, you should make sure your `batch`
If that test passes but not this one, you should make sure your ``batch``
method does not raise any exceptions, and that it returns a list of valid
:class:`~langchain_core.messages.AIMessage` objects.
"""
batch_results = model.batch(["Hello", "Hey"])
assert batch_results is not None
@@ -838,7 +841,7 @@ class ChatModelIntegrationTests(ChatModelTests):
assert len(result.content) > 0
async def test_abatch(self, model: BaseChatModel) -> None:
"""Test to verify that `await model.abatch([messages])` works.
"""Test to verify that ``await model.abatch([messages])`` works.
This should pass for all integrations. Tests the model's ability to process
multiple prompts in a single batch asynchronously.
@@ -849,12 +852,13 @@ class ChatModelIntegrationTests(ChatModelTests):
:meth:`~langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_batch`
and
:meth:`~langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_ainvoke`
because `abatch` has a default implementation that calls `ainvoke` for each
message in the batch.
because ``abatch`` has a default implementation that calls ``ainvoke`` for
each message in the batch.
If those tests pass but not this one, you should make sure your `abatch`
If those tests pass but not this one, you should make sure your ``abatch``
method does not raise any exceptions, and that it returns a list of valid
:class:`~langchain_core.messages.AIMessage` objects.
"""
batch_results = await model.abatch(["Hello", "Hey"])
assert batch_results is not None
@@ -877,18 +881,20 @@ class ChatModelIntegrationTests(ChatModelTests):
First, debug
:meth:`~langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_invoke`
because this test also uses `model.invoke()`.
because this test also uses ``model.invoke()``.
If that test passes but not this one, you should verify that:
1. Your model correctly processes the message history
2. The model maintains appropriate context from previous messages
3. The response is a valid :class:`~langchain_core.messages.AIMessage`
"""
messages = [
HumanMessage("hello"),
AIMessage("hello"),
HumanMessage("how are you"),
]
result = model.invoke(messages)
assert result is not None
assert isinstance(result, AIMessage)
@@ -906,17 +912,17 @@ class ChatModelIntegrationTests(ChatModelTests):
First, debug
:meth:`~langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_invoke`
because this test also uses `model.invoke()`.
because this test also uses ``model.invoke()``.
Second, debug
:meth:`~langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_conversation`
because this test is the "basic case" without double messages.
If that test passes those but not this one, you should verify that:
1. Your model API can handle double messages, or the integration should
merge messages before sending them to the API.
1. Your model API can handle double messages, or the integration should merge messages before sending them to the API.
2. The response is a valid :class:`~langchain_core.messages.AIMessage`
"""
""" # noqa: E501
messages = [
SystemMessage("hello"),
SystemMessage("hello"),
@@ -926,6 +932,7 @@ class ChatModelIntegrationTests(ChatModelTests):
AIMessage("hello"),
HumanMessage("how are you"),
]
result = model.invoke(messages)
assert result is not None
assert isinstance(result, AIMessage)
@@ -940,13 +947,14 @@ class ChatModelIntegrationTests(ChatModelTests):
.. versionchanged:: 0.3.17
Additionally check for the presence of `model_name` in the response
Additionally check for the presence of ``model_name`` in the response
metadata, which is needed for usage tracking in callback handlers.
.. dropdown:: Configuration
By default, this test is run.
To disable this feature, set `returns_usage_metadata` to False in your
To disable this feature, set ``returns_usage_metadata`` to ``False`` in your
test class:
.. code-block:: python
@@ -957,7 +965,7 @@ class ChatModelIntegrationTests(ChatModelTests):
return False
This test can also check the format of specific kinds of usage metadata
based on the `supported_usage_metadata_details` property. This property
based on the ``supported_usage_metadata_details`` property. This property
should be configured as follows with the types of tokens that the model
supports tracking:
@@ -988,7 +996,7 @@ class ChatModelIntegrationTests(ChatModelTests):
If this test fails, first verify that your model returns
:class:`~langchain_core.messages.ai.UsageMetadata` dicts
attached to the returned AIMessage object in `_generate`:
attached to the returned AIMessage object in ``_generate``:
.. code-block:: python
@@ -1020,9 +1028,11 @@ class ChatModelIntegrationTests(ChatModelTests):
"""
if not self.returns_usage_metadata:
pytest.skip("Not implemented.")
result = model.invoke("Hello")
assert result is not None
assert isinstance(result, AIMessage)
assert result.usage_metadata is not None
assert isinstance(result.usage_metadata["input_tokens"], int)
assert isinstance(result.usage_metadata["output_tokens"], int)
@@ -1106,13 +1116,13 @@ class ChatModelIntegrationTests(ChatModelTests):
.. versionchanged:: 0.3.17
Additionally check for the presence of `model_name` in the response
Additionally check for the presence of ``model_name`` in the response
metadata, which is needed for usage tracking in callback handlers.
.. dropdown:: Configuration
By default, this test is run.
To disable this feature, set `returns_usage_metadata` to False in your
To disable this feature, set ``returns_usage_metadata`` to ``False`` in your
test class:
.. code-block:: python
@@ -1123,7 +1133,7 @@ class ChatModelIntegrationTests(ChatModelTests):
return False
This test can also check the format of specific kinds of usage metadata
based on the `supported_usage_metadata_details` property. This property
based on the ``supported_usage_metadata_details`` property. This property
should be configured as follows with the types of tokens that the model
supports tracking:
@@ -1153,16 +1163,16 @@ class ChatModelIntegrationTests(ChatModelTests):
If this test fails, first verify that your model yields
:class:`~langchain_core.messages.ai.UsageMetadata` dicts
attached to the returned AIMessage object in `_stream`
attached to the returned AIMessage object in ``_stream``
that sum up to the total usage metadata.
Note that `input_tokens` should only be included on one of the chunks
(typically the first or the last chunk), and the rest should have 0 or None
to avoid counting input tokens multiple times.
Note that ``input_tokens`` should only be included on one of the chunks
(typically the first or the last chunk), and the rest should have ``0`` or
``None`` to avoid counting input tokens multiple times.
`output_tokens` typically count the number of tokens in each chunk, not the
sum. This test will pass as long as the sum of `output_tokens` across all
chunks is not 0.
``output_tokens`` typically count the number of tokens in each chunk, not
the sum. This test will pass as long as the sum of ``output_tokens`` across
all chunks is not ``0``.
.. code-block:: python
@@ -1198,6 +1208,7 @@ class ChatModelIntegrationTests(ChatModelTests):
"""
if not self.returns_usage_metadata:
pytest.skip("Not implemented.")
full: Optional[AIMessageChunk] = None
for chunk in model.stream("Write me 2 haikus. Only include the haikus."):
assert isinstance(chunk, AIMessageChunk)
@@ -1262,7 +1273,7 @@ class ChatModelIntegrationTests(ChatModelTests):
"""Test that model does not fail when invoked with the ``stop`` parameter,
which is a standard parameter for stopping generation at a certain token.
More on standard parameters here: https://python.langchain.com/docs/concepts/chat_models/#standard-parameters
`More on standard parameters <https://python.langchain.com/docs/concepts/chat_models/#standard-parameters>`__
This should pass for all integrations.
@@ -1336,6 +1347,7 @@ class ChatModelIntegrationTests(ChatModelTests):
"""
if not self.has_tool_calling:
pytest.skip("Test requires tool calling.")
tool_choice_value = None if not self.has_tool_choice else "any"
# Emit warning if tool_choice_value property is overridden
if inspect.getattr_static(
@@ -1410,6 +1422,7 @@ class ChatModelIntegrationTests(ChatModelTests):
"""
if not self.has_tool_calling:
pytest.skip("Test requires tool calling.")
tool_choice_value = None if not self.has_tool_choice else "any"
model_with_tools = model.bind_tools(
[magic_function], tool_choice=tool_choice_value
@@ -1519,10 +1532,10 @@ class ChatModelIntegrationTests(ChatModelTests):
If this test fails, check that:
1. The model can correctly handle message histories that include AIMessage objects with ``""`` content.
2. The ``tool_calls`` attribute on AIMessage objects is correctly handled and passed to the model in an appropriate format.
3. The model can correctly handle ToolMessage objects with string content and arbitrary string values for ``tool_call_id``.
assert tool_call.get("type") == "tool_call"
1. The model can correctly handle message histories that include ``AIMessage`` objects with ``""`` content.
2. The ``tool_calls`` attribute on ``AIMessage`` objects is correctly handled and passed to the model in an appropriate format.
3. The model can correctly handle ``ToolMessage`` objects with string content and arbitrary string values for ``tool_call_id``.
You can ``xfail`` the test if tool calling is implemented but this format
is not supported.
@@ -1535,6 +1548,7 @@ class ChatModelIntegrationTests(ChatModelTests):
""" # noqa: E501
if not self.has_tool_calling:
pytest.skip("Test requires tool calling.")
model_with_tools = model.bind_tools([my_adder_tool])
function_name = "my_adder_tool"
function_args = {"a": "1", "b": "2"}
@@ -1570,7 +1584,7 @@ class ChatModelIntegrationTests(ChatModelTests):
"""Test that message histories are compatible with list tool contents
(e.g. Anthropic format).
These message histories will include AIMessage objects with "tool use" and
These message histories will include ``AIMessage`` objects with "tool use" and
content blocks, e.g.,
.. code-block:: python
@@ -1604,8 +1618,8 @@ class ChatModelIntegrationTests(ChatModelTests):
If this test fails, check that:
1. The model can correctly handle message histories that include AIMessage objects with list content.
2. The ``tool_calls`` attribute on AIMessage objects is correctly handled and passed to the model in an appropriate format.
1. The model can correctly handle message histories that include ``AIMessage`` objects with list content.
2. The ``tool_calls`` attribute on ``AIMessage`` objects is correctly handled and passed to the model in an appropriate format.
3. The model can correctly handle ToolMessage objects with string content and arbitrary string values for ``tool_call_id``.
You can ``xfail`` the test if tool calling is implemented but this format
@@ -1620,6 +1634,7 @@ class ChatModelIntegrationTests(ChatModelTests):
""" # noqa: E501
if not self.has_tool_calling:
pytest.skip("Test requires tool calling.")
model_with_tools = model.bind_tools([my_adder_tool])
function_name = "my_adder_tool"
function_args = {"a": 1, "b": 2}
@@ -1692,7 +1707,7 @@ class ChatModelIntegrationTests(ChatModelTests):
pytest.skip("Test requires tool choice.")
@tool
def get_weather(location: str) -> str: # pylint: disable=unused-argument
def get_weather(location: str) -> str:
"""Get weather at a location."""
return "It's sunny."
@@ -1750,6 +1765,7 @@ class ChatModelIntegrationTests(ChatModelTests):
""" # noqa: E501
if not self.has_tool_calling:
pytest.skip("Test requires tool calling.")
tool_choice_value = None if not self.has_tool_choice else "any"
model_with_tools = model.bind_tools(
[magic_function_no_args], tool_choice=tool_choice_value
@@ -1767,7 +1783,7 @@ class ChatModelIntegrationTests(ChatModelTests):
def test_tool_message_error_status(
self, model: BaseChatModel, my_adder_tool: BaseTool
) -> None:
"""Test that ToolMessage with ``status="error"`` can be handled.
"""Test that ``ToolMessage`` with ``status="error"`` can be handled.
These messages may take the form:
@@ -1806,6 +1822,7 @@ class ChatModelIntegrationTests(ChatModelTests):
"""
if not self.has_tool_calling:
pytest.skip("Test requires tool calling.")
model_with_tools = model.bind_tools([my_adder_tool])
messages = [
HumanMessage("What is 1 + 2"),
@@ -1860,8 +1877,9 @@ class ChatModelIntegrationTests(ChatModelTests):
.. dropdown:: Troubleshooting
This test uses a utility function in ``langchain_core`` to generate a
sequence of messages representing "few-shot" examples: https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.tool_example_to_messages.html
This test uses `a utility function <https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.tool_example_to_messages.html>`__
in ``langchain_core`` to generate a sequence of messages representing
"few-shot" examples.
If this test fails, check that the model can correctly handle this
sequence of messages.
@@ -1878,6 +1896,7 @@ class ChatModelIntegrationTests(ChatModelTests):
"""
if not self.has_tool_calling:
pytest.skip("Test requires tool calling.")
model_with_tools = model.bind_tools([my_adder_tool], tool_choice="any")
function_result = json.dumps({"result": 3})
@@ -1921,10 +1940,12 @@ class ChatModelIntegrationTests(ChatModelTests):
If this test fails, ensure that the model's ``bind_tools`` method
properly handles both JSON Schema and Pydantic V2 models.
``langchain_core`` implements a utility function that will accommodate
most formats: https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html
See example implementation of ``with_structured_output`` here: https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.with_structured_output
``langchain_core`` implements `a utility function <https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html>`__
that will accommodate most formats.
See `example implementation <https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.with_structured_output>`__
of ``with_structured_output``.
"""
if not self.has_structured_output:
@@ -2000,10 +2021,12 @@ class ChatModelIntegrationTests(ChatModelTests):
If this test fails, ensure that the model's ``bind_tools`` method
properly handles both JSON Schema and Pydantic V2 models.
``langchain_core`` implements a utility function that will accommodate
most formats: https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html
See example implementation of ``with_structured_output`` here: https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.with_structured_output
``langchain_core`` implements `a utility function <https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html>`__
that will accommodate most formats.
See `example implementation <https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.with_structured_output>`__
of ``with_structured_output``.
"""
if not self.has_structured_output:
@@ -2052,10 +2075,9 @@ class ChatModelIntegrationTests(ChatModelTests):
@pytest.mark.skipif(PYDANTIC_MAJOR_VERSION != 2, reason="Test requires pydantic 2.")
def test_structured_output_pydantic_2_v1(self, model: BaseChatModel) -> None:
"""Test to verify we can generate structured output using
pydantic.v1.BaseModel.
"""Test to verify we can generate structured output using ``pydantic.v1.BaseModel``.
pydantic.v1.BaseModel is available in the pydantic 2 package.
``pydantic.v1.BaseModel`` is available in the Pydantic 2 package.
This test is optional and should be skipped if the model does not support
structured output (see Configuration below).
@@ -2079,12 +2101,14 @@ class ChatModelIntegrationTests(ChatModelTests):
If this test fails, ensure that the model's ``bind_tools`` method
properly handles both JSON Schema and Pydantic V1 models.
``langchain_core`` implements a utility function that will accommodate
most formats: https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html
See example implementation of ``with_structured_output`` here: https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.with_structured_output
``langchain_core`` implements `a utility function <https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html>`__
that will accommodate most formats.
"""
See `example implementation <https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.with_structured_output>`__
of ``with_structured_output``.
""" # noqa: E501
if not self.has_structured_output:
pytest.skip("Test requires structured output.")
@@ -2141,10 +2165,12 @@ class ChatModelIntegrationTests(ChatModelTests):
If this test fails, ensure that the model's ``bind_tools`` method
properly handles Pydantic V2 models with optional parameters.
``langchain_core`` implements a utility function that will accommodate
most formats: https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html
See example implementation of ``with_structured_output`` here: https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.with_structured_output
``langchain_core`` implements `a utility function <https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html>`__
that will accommodate most formats.
See `example implementation <https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.with_structured_output>`__
of ``with_structured_output``.
"""
if not self.has_structured_output:
@@ -2225,7 +2251,7 @@ class ChatModelIntegrationTests(ChatModelTests):
# Type ignoring since the interface only officially supports pydantic 1
# or pydantic.v1.BaseModel but not pydantic.BaseModel from pydantic 2.
# We'll need to do a pass updating the type signatures.
chat = model.with_structured_output(Joke, method="json_mode") # type: ignore[arg-type]
chat = model.with_structured_output(Joke, method="json_mode")
msg = (
"Tell me a joke about cats. Return the result as a JSON with 'setup' and "
"'punchline' keys. Return nothing other than JSON."
@@ -2288,6 +2314,7 @@ class ChatModelIntegrationTests(ChatModelTests):
"""
if not self.supports_pdf_inputs:
pytest.skip("Model does not support PDF inputs.")
url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"
pdf_data = base64.b64encode(httpx.get(url).content).decode("utf-8")
@@ -2364,6 +2391,7 @@ class ChatModelIntegrationTests(ChatModelTests):
"""
if not self.supports_audio_inputs:
pytest.skip("Model does not support audio inputs.")
url = "https://upload.wikimedia.org/wikipedia/commons/3/3d/Alcal%C3%A1_de_Henares_%28RPS_13-04-2024%29_canto_de_ruise%C3%B1or_%28Luscinia_megarhynchos%29_en_el_Soto_del_Henares.wav"
audio_data = base64.b64encode(httpx.get(url).content).decode("utf-8")
@@ -2465,6 +2493,7 @@ class ChatModelIntegrationTests(ChatModelTests):
"""
if not self.supports_image_inputs:
pytest.skip("Model does not support image message.")
image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8")
@@ -2572,6 +2601,7 @@ class ChatModelIntegrationTests(ChatModelTests):
"""
if not self.supports_image_tool_message:
pytest.skip("Model does not support image tool message.")
image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8")
@@ -2687,7 +2717,7 @@ class ChatModelIntegrationTests(ChatModelTests):
1. The model can correctly handle message histories that include message objects with list content.
2. The ``tool_calls`` attribute on AIMessage objects is correctly handled and passed to the model in an appropriate format.
3. HumanMessages with "tool_result" content blocks are correctly handled.
3. ``HumanMessage``s with "tool_result" content blocks are correctly handled.
Otherwise, if Anthropic tool call and result formats are not supported,
set the ``supports_anthropic_inputs`` property to False.
@@ -2793,7 +2823,7 @@ class ChatModelIntegrationTests(ChatModelTests):
assert isinstance(response, AIMessage)
def test_message_with_name(self, model: BaseChatModel) -> None:
"""Test that HumanMessage with values for the ``name`` field can be handled.
"""Test that ``HumanMessage`` with values for the ``name`` field can be handled.
These messages may take the form:
@@ -2842,7 +2872,7 @@ class ChatModelIntegrationTests(ChatModelTests):
chat model.
Check also that all required information (e.g., tool calling identifiers)
from AIMessage objects is propagated correctly to model payloads.
from ``AIMessage`` objects is propagated correctly to model payloads.
This test may fail if the chat model does not consistently generate tool
calls in response to an appropriate query. In these cases you can ``xfail``
@@ -2859,7 +2889,7 @@ class ChatModelIntegrationTests(ChatModelTests):
pytest.skip("Test requires tool calling.")
@tool
def get_weather(location: str) -> str: # pylint: disable=unused-argument
def get_weather(location: str) -> str:
"""Call to surf the web."""
return "It's sunny."
@@ -2953,12 +2983,13 @@ class ChatModelIntegrationTests(ChatModelTests):
Args:
model: The chat model to test
tool_choice: Tool choice parameter to pass to bind_tools (provider-specific)
force_tool_call: Whether to force a tool call (use tool_choice=True if None)
tool_choice: Tool choice parameter to pass to ``bind_tools()`` (provider-specific)
force_tool_call: Whether to force a tool call (use ``tool_choice=True`` if None)
Tests that Unicode characters in tool call arguments are preserved correctly,
not escaped as \\uXXXX sequences.
"""
not escaped as ``\\uXXXX`` sequences.
""" # noqa: E501
if not self.has_tool_calling:
pytest.skip("Test requires tool calling support.")

View File

@@ -12,17 +12,11 @@ from langchain_core.load import dumpd, load
from langchain_core.runnables import RunnableBinding
from langchain_core.tools import BaseTool, tool
from pydantic import BaseModel, Field, SecretStr
from pydantic.v1 import (
BaseModel as BaseModelV1,
)
from pydantic.v1 import (
Field as FieldV1,
)
from pydantic.v1 import (
ValidationError as ValidationErrorV1,
)
from pydantic.v1 import BaseModel as BaseModelV1
from pydantic.v1 import Field as FieldV1
from pydantic.v1 import ValidationError as ValidationErrorV1
from pytest_benchmark.fixture import BenchmarkFixture # type: ignore[import-untyped]
from syrupy import SnapshotAssertion
from syrupy.assertion import SnapshotAssertion
from langchain_tests.base import BaseStandardTests
from langchain_tests.utils.pydantic import PYDANTIC_MAJOR_VERSION
@@ -32,6 +26,7 @@ def generate_schema_pydantic_v1_from_2() -> Any:
"""Use to generate a schema from v1 namespace in pydantic 2.
:private:
"""
if PYDANTIC_MAJOR_VERSION != 2:
msg = "This function is only compatible with Pydantic v2."
@@ -50,6 +45,7 @@ def generate_schema_pydantic() -> Any:
"""Works with either pydantic 1 or 2.
:private:
"""
class PersonA(BaseModel):
@@ -71,6 +67,7 @@ class ChatModelTests(BaseStandardTests):
"""Base class for chat model tests.
:private:
"""
@property
@@ -154,16 +151,12 @@ class ChatModelTests(BaseStandardTests):
@property
def supports_image_inputs(self) -> bool:
"""(bool) whether the chat model supports image inputs, defaults to
``False``.
"""
"""(bool) whether the chat model supports image inputs, defaults to ``False``.""" # noqa: E501
return False
@property
def supports_image_urls(self) -> bool:
"""(bool) whether the chat model supports image inputs from URLs, defaults to
``False``.
"""
"""(bool) whether the chat model supports image inputs from URLs, defaults to ``False``.""" # noqa: E501
return False
@property
@@ -173,23 +166,21 @@ class ChatModelTests(BaseStandardTests):
@property
def supports_audio_inputs(self) -> bool:
"""(bool) whether the chat model supports audio inputs, defaults to
``False``.
"""
"""(bool) whether the chat model supports audio inputs, defaults to ``False``.""" # noqa: E501
return False
@property
def supports_video_inputs(self) -> bool:
"""(bool) whether the chat model supports video inputs, defaults to ``False``.
No current tests are written for this feature.
"""
return False
@property
def returns_usage_metadata(self) -> bool:
"""(bool) whether the chat model returns usage metadata on invoke and streaming
responses.
"""
"""(bool) whether the chat model returns usage metadata on invoke and streaming responses.""" # noqa: E501
return True
@property
@@ -199,9 +190,7 @@ class ChatModelTests(BaseStandardTests):
@property
def supports_image_tool_message(self) -> bool:
"""(bool) whether the chat model supports ToolMessages that include image
content.
"""
"""(bool) whether the chat model supports ``ToolMessage``s that include image content.""" # noqa: E501
return False
@property
@@ -211,6 +200,7 @@ class ChatModelTests(BaseStandardTests):
.. important::
See ``enable_vcr_tests`` dropdown :class:`above <ChatModelTests>` for more
information.
"""
return False
@@ -267,7 +257,7 @@ class ChatModelUnitTests(ChatModelTests):
API references for individual test methods include troubleshooting tips.
Test subclasses must implement the following two properties:
Test subclasses **must** implement the following two properties:
chat_model_class
The chat model class to test, e.g., ``ChatParrotLink``.
@@ -299,7 +289,7 @@ class ChatModelUnitTests(ChatModelTests):
Boolean property indicating whether the chat model supports tool calling.
By default, this is determined by whether the chat model's `bind_tools` method
By default, this is determined by whether the chat model's ``bind_tools`` method
is overridden. It typically does not need to be overridden on the test class.
Example override:
@@ -401,7 +391,7 @@ class ChatModelUnitTests(ChatModelTests):
Defaults to ``False``.
If set to ``True``, the chat model will be tested using content blocks of the
form
form.
.. code-block:: python
@@ -437,7 +427,7 @@ class ChatModelUnitTests(ChatModelTests):
URLs. Defaults to ``False``.
If set to ``True``, the chat model will be tested using content blocks of the
form
form.
.. code-block:: python
@@ -463,7 +453,7 @@ class ChatModelUnitTests(ChatModelTests):
Defaults to ``False``.
If set to ``True``, the chat model will be tested using content blocks of the
form
form.
.. code-block:: python
@@ -490,7 +480,7 @@ class ChatModelUnitTests(ChatModelTests):
Defaults to ``False``.
If set to ``True``, the chat model will be tested using content blocks of the
form
form.
.. code-block:: python
@@ -519,10 +509,10 @@ class ChatModelUnitTests(ChatModelTests):
.. dropdown:: returns_usage_metadata
Boolean property indicating whether the chat model returns usage metadata
on invoke and streaming responses.
on invoke and streaming responses. Defaults to ``True``.
``usage_metadata`` is an optional dict attribute on AIMessages that track input
and output tokens: https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.UsageMetadata.html
``usage_metadata`` is an optional dict attribute on ``AIMessage``s that track input
and output tokens. `See more. <https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.UsageMetadata.html>`__
Example:
@@ -533,7 +523,7 @@ class ChatModelUnitTests(ChatModelTests):
return False
Models supporting ``usage_metadata`` should also return the name of the
underlying model in the ``response_metadata`` of the AIMessage.
underlying model in the ``response_metadata`` of the ``AIMessage``.
.. dropdown:: supports_anthropic_inputs
@@ -567,7 +557,7 @@ class ChatModelUnitTests(ChatModelTests):
.. dropdown:: supports_image_tool_message
Boolean property indicating whether the chat model supports ToolMessages
Boolean property indicating whether the chat model supports ``ToolMessage``s
that include image content, e.g.,
.. code-block:: python
@@ -615,11 +605,11 @@ class ChatModelUnitTests(ChatModelTests):
.. dropdown:: supported_usage_metadata_details
Property controlling what usage metadata details are emitted in both invoke
and stream.
Property controlling what usage metadata details are emitted in both ``invoke``
and ``stream``.
``usage_metadata`` is an optional dict attribute on AIMessages that track input
and output tokens: https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.UsageMetadata.html
``usage_metadata`` is an optional dict attribute on ``AIMessage``s that track input
and output tokens. `See more. <https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.UsageMetadata.html>`__
It includes optional keys ``input_token_details`` and ``output_token_details``
that can track usage details associated with special types of tokens, such as
@@ -812,6 +802,7 @@ class ChatModelUnitTests(ChatModelTests):
def init_from_env_params(self) -> tuple[dict, dict, dict]:
"""(tuple) environment variables, additional initialization args, and expected
instance attributes for testing initialization from environment variables.
"""
return {}, {}, {}
@@ -823,7 +814,8 @@ class ChatModelUnitTests(ChatModelTests):
If this test fails, ensure that:
1. ``chat_model_params`` is specified and the model can be initialized from those params;
2. The model accommodates standard parameters: https://python.langchain.com/docs/concepts/chat_models/#standard-parameters
2. The model accommodates `standard parameters <https://python.langchain.com/docs/concepts/chat_models/#standard-parameters>`__
""" # noqa: E501
model = self.chat_model_class(
**{
@@ -843,6 +835,7 @@ class ChatModelUnitTests(ChatModelTests):
If this test fails, ensure that ``init_from_env_params`` is specified
correctly and that model parameters are properly set from environment
variables during initialization.
"""
env_params, model_params, expected_attrs = self.init_from_env_params
if not env_params:
@@ -867,6 +860,7 @@ class ChatModelUnitTests(ChatModelTests):
If this test fails, ensure that the model can be initialized with a
boolean ``streaming`` parameter.
"""
model = self.chat_model_class(
**{
@@ -893,6 +887,7 @@ class ChatModelUnitTests(ChatModelTests):
a utility function that will accommodate most formats: https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html
See example implementation of ``bind_tools`` here: https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.bind_tools
"""
if not self.has_tool_calling:
return
@@ -933,6 +928,7 @@ class ChatModelUnitTests(ChatModelTests):
a utility function that will accommodate most formats: https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html
See example implementation of ``with_structured_output`` here: https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.with_structured_output
"""
if not self.has_structured_output:
return
@@ -955,6 +951,7 @@ class ChatModelUnitTests(ChatModelTests):
Check also that the model class is named according to convention
(e.g., ``ChatProviderName``).
"""
class ExpectedParams(BaseModelV1):
@@ -992,6 +989,7 @@ class ChatModelUnitTests(ChatModelTests):
If this test fails, check that the ``init_from_env_params`` property is
correctly set on the test class.
"""
if not self.chat_model_class.is_lc_serializable():
pytest.skip("Model is not serializable.")
@@ -1011,6 +1009,7 @@ class ChatModelUnitTests(ChatModelTests):
def test_init_time(self, benchmark: BenchmarkFixture) -> None:
"""Test initialization time of the chat model. If this test fails, check that
we are not introducing undue overhead in the model's initialization.
"""
def _init_in_loop() -> None:

View File

@@ -1,15 +1,9 @@
from collections.abc import Iterator
from typing import Any, Optional
from langchain_core.callbacks import (
CallbackManagerForLLMRun,
)
from langchain_core.callbacks import CallbackManagerForLLMRun
from langchain_core.language_models import BaseChatModel
from langchain_core.messages import (
AIMessage,
AIMessageChunk,
BaseMessage,
)
from langchain_core.messages import AIMessage, AIMessageChunk, BaseMessage
from langchain_core.messages.ai import UsageMetadata
from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
from pydantic import Field

View File

@@ -356,7 +356,7 @@ test = [
test-integration = []
typing = [
{ name = "langchain-text-splitters", directory = "../text-splitters" },
{ name = "mypy", specifier = ">=1.15,<1.16" },
{ name = "mypy", specifier = ">=1.17.1,<1.18" },
{ name = "types-pyyaml", specifier = ">=6.0.12.2,<7.0.0.0" },
{ name = "types-requests", specifier = ">=2.28.11.5,<3.0.0.0" },
]