feat: port various nit changes from wip-v0.4 (#32506)

Lots of work that wasn't directly related to core
improvements/messages/testing functionality
This commit is contained in:
Mason Daugherty
2025-08-11 15:09:08 -04:00
committed by GitHub
parent 7db9e60601
commit ee4c2510eb
63 changed files with 2213 additions and 2862 deletions

View File

@@ -32,9 +32,7 @@ from pytest_benchmark.fixture import BenchmarkFixture # type: ignore[import-unt
from typing_extensions import TypedDict
from vcr.cassette import Cassette
from langchain_tests.unit_tests.chat_models import (
ChatModelTests,
)
from langchain_tests.unit_tests.chat_models import ChatModelTests
from langchain_tests.utils.pydantic import PYDANTIC_MAJOR_VERSION
@@ -110,6 +108,7 @@ def magic_function_no_args() -> int:
def _validate_tool_call_message(message: BaseMessage) -> None:
assert isinstance(message, AIMessage)
assert len(message.tool_calls) == 1
tool_call = message.tool_calls[0]
assert tool_call["name"] == "magic_function"
assert tool_call["args"] == {"input": 3}
@@ -120,6 +119,7 @@ def _validate_tool_call_message(message: BaseMessage) -> None:
def _validate_tool_call_message_no_args(message: BaseMessage) -> None:
assert isinstance(message, AIMessage)
assert len(message.tool_calls) == 1
tool_call = message.tool_calls[0]
assert tool_call["name"] == "magic_function_no_args"
assert tool_call["args"] == {}
@@ -137,6 +137,7 @@ def unicode_customer(customer_name: str, description: str) -> str:
Returns:
A confirmation message about the customer creation.
"""
return f"Created customer: {customer_name} - {description}"
@@ -173,7 +174,7 @@ class ChatModelIntegrationTests(ChatModelTests):
API references for individual test methods include troubleshooting tips.
Test subclasses must implement the following two properties:
Test subclasses **must** implement the following two properties:
chat_model_class
The chat model class to test, e.g., ``ChatParrotLink``.
@@ -426,10 +427,10 @@ class ChatModelIntegrationTests(ChatModelTests):
.. dropdown:: returns_usage_metadata
Boolean property indicating whether the chat model returns usage metadata
on invoke and streaming responses.
on invoke and streaming responses. Defaults to ``True``.
``usage_metadata`` is an optional dict attribute on AIMessages that track input
and output tokens: https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.UsageMetadata.html
``usage_metadata`` is an optional dict attribute on ``AIMessage``s that track input
and output tokens. `See more. <https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.UsageMetadata.html>`__
Example:
@@ -440,7 +441,7 @@ class ChatModelIntegrationTests(ChatModelTests):
return False
Models supporting ``usage_metadata`` should also return the name of the
underlying model in the ``response_metadata`` of the AIMessage.
underlying model in the ``response_metadata`` of the ``AIMessage``.
.. dropdown:: supports_anthropic_inputs
@@ -525,8 +526,8 @@ class ChatModelIntegrationTests(ChatModelTests):
Property controlling what usage metadata details are emitted in both invoke
and stream.
``usage_metadata`` is an optional dict attribute on AIMessages that track input
and output tokens: https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.UsageMetadata.html
``usage_metadata`` is an optional dict attribute on ``AIMessage``s that track input
and output tokens. `See more. <https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.UsageMetadata.html>`__
It includes optional keys ``input_token_details`` and ``output_token_details``
that can track usage details associated with special types of tokens, such as
@@ -682,13 +683,13 @@ class ChatModelIntegrationTests(ChatModelTests):
return {}
def test_invoke(self, model: BaseChatModel) -> None:
"""Test to verify that `model.invoke(simple_message)` works.
"""Test to verify that ``model.invoke(simple_message)`` works.
This should pass for all integrations.
.. dropdown:: Troubleshooting
If this test fails, you should make sure your _generate method
If this test fails, you should make sure your ``_generate`` method
does not raise any exceptions, and that it returns a valid
:class:`~langchain_core.outputs.chat_result.ChatResult` like so:
@@ -708,7 +709,7 @@ class ChatModelIntegrationTests(ChatModelTests):
assert len(result.content) > 0
async def test_ainvoke(self, model: BaseChatModel) -> None:
"""Test to verify that `await model.ainvoke(simple_message)` works.
"""Test to verify that ``await model.ainvoke(simple_message)`` works.
This should pass for all integrations. Passing this test does not indicate
a "natively async" implementation, but rather that the model can be used
@@ -718,7 +719,7 @@ class ChatModelIntegrationTests(ChatModelTests):
First, debug
:meth:`~langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_invoke`.
because `ainvoke` has a default implementation that calls `invoke` in an
because ``ainvoke`` has a default implementation that calls ``invoke`` in an
async context.
If that test passes but not this one, you should make sure your _agenerate
@@ -741,7 +742,7 @@ class ChatModelIntegrationTests(ChatModelTests):
assert len(result.content) > 0
def test_stream(self, model: BaseChatModel) -> None:
"""Test to verify that `model.stream(simple_message)` works.
"""Test to verify that ``model.stream(simple_message)`` works.
This should pass for all integrations. Passing this test does not indicate
a "streaming" implementation, but rather that the model can be used in a
@@ -751,10 +752,10 @@ class ChatModelIntegrationTests(ChatModelTests):
First, debug
:meth:`~langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_invoke`.
because `stream` has a default implementation that calls `invoke` and yields
the result as a single chunk.
because ``stream`` has a default implementation that calls ``invoke`` and
yields the result as a single chunk.
If that test passes but not this one, you should make sure your _stream
If that test passes but not this one, you should make sure your ``_stream``
method does not raise any exceptions, and that it yields valid
:class:`~langchain_core.outputs.chat_generation.ChatGenerationChunk`
objects like so:
@@ -770,11 +771,12 @@ class ChatModelIntegrationTests(ChatModelTests):
for chunk in model.stream("Hello"):
assert chunk is not None
assert isinstance(chunk, AIMessageChunk)
assert isinstance(chunk.content, (str, list))
num_chunks += 1
assert num_chunks > 0
async def test_astream(self, model: BaseChatModel) -> None:
"""Test to verify that `await model.astream(simple_message)` works.
"""Test to verify that ``await model.astream(simple_message)`` works.
This should pass for all integrations. Passing this test does not indicate
a "natively async" or "streaming" implementation, but rather that the model can
@@ -786,11 +788,11 @@ class ChatModelIntegrationTests(ChatModelTests):
:meth:`~langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_stream`.
and
:meth:`~langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_ainvoke`.
because `astream` has a default implementation that calls `_stream` in an
async context if it is implemented, or `ainvoke` and yields the result as a
single chunk if not.
because ``astream`` has a default implementation that calls ``_stream`` in
an async context if it is implemented, or ``ainvoke`` and yields the result
as a single chunk if not.
If those tests pass but not this one, you should make sure your _astream
If those tests pass but not this one, you should make sure your ``_astream``
method does not raise any exceptions, and that it yields valid
:class:`~langchain_core.outputs.chat_generation.ChatGenerationChunk`
objects like so:
@@ -811,7 +813,7 @@ class ChatModelIntegrationTests(ChatModelTests):
assert num_chunks > 0
def test_batch(self, model: BaseChatModel) -> None:
"""Test to verify that `model.batch([messages])` works.
"""Test to verify that ``model.batch([messages])`` works.
This should pass for all integrations. Tests the model's ability to process
multiple prompts in a single batch.
@@ -820,12 +822,13 @@ class ChatModelIntegrationTests(ChatModelTests):
First, debug
:meth:`~langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_invoke`
because `batch` has a default implementation that calls `invoke` for each
message in the batch.
because ``batch`` has a default implementation that calls ``invoke`` for
each message in the batch.
If that test passes but not this one, you should make sure your `batch`
If that test passes but not this one, you should make sure your ``batch``
method does not raise any exceptions, and that it returns a list of valid
:class:`~langchain_core.messages.AIMessage` objects.
"""
batch_results = model.batch(["Hello", "Hey"])
assert batch_results is not None
@@ -838,7 +841,7 @@ class ChatModelIntegrationTests(ChatModelTests):
assert len(result.content) > 0
async def test_abatch(self, model: BaseChatModel) -> None:
"""Test to verify that `await model.abatch([messages])` works.
"""Test to verify that ``await model.abatch([messages])`` works.
This should pass for all integrations. Tests the model's ability to process
multiple prompts in a single batch asynchronously.
@@ -849,12 +852,13 @@ class ChatModelIntegrationTests(ChatModelTests):
:meth:`~langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_batch`
and
:meth:`~langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_ainvoke`
because `abatch` has a default implementation that calls `ainvoke` for each
message in the batch.
because ``abatch`` has a default implementation that calls ``ainvoke`` for
each message in the batch.
If those tests pass but not this one, you should make sure your `abatch`
If those tests pass but not this one, you should make sure your ``abatch``
method does not raise any exceptions, and that it returns a list of valid
:class:`~langchain_core.messages.AIMessage` objects.
"""
batch_results = await model.abatch(["Hello", "Hey"])
assert batch_results is not None
@@ -877,18 +881,20 @@ class ChatModelIntegrationTests(ChatModelTests):
First, debug
:meth:`~langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_invoke`
because this test also uses `model.invoke()`.
because this test also uses ``model.invoke()``.
If that test passes but not this one, you should verify that:
1. Your model correctly processes the message history
2. The model maintains appropriate context from previous messages
3. The response is a valid :class:`~langchain_core.messages.AIMessage`
"""
messages = [
HumanMessage("hello"),
AIMessage("hello"),
HumanMessage("how are you"),
]
result = model.invoke(messages)
assert result is not None
assert isinstance(result, AIMessage)
@@ -906,17 +912,17 @@ class ChatModelIntegrationTests(ChatModelTests):
First, debug
:meth:`~langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_invoke`
because this test also uses `model.invoke()`.
because this test also uses ``model.invoke()``.
Second, debug
:meth:`~langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_conversation`
because this test is the "basic case" without double messages.
If that test passes those but not this one, you should verify that:
1. Your model API can handle double messages, or the integration should
merge messages before sending them to the API.
1. Your model API can handle double messages, or the integration should merge messages before sending them to the API.
2. The response is a valid :class:`~langchain_core.messages.AIMessage`
"""
""" # noqa: E501
messages = [
SystemMessage("hello"),
SystemMessage("hello"),
@@ -926,6 +932,7 @@ class ChatModelIntegrationTests(ChatModelTests):
AIMessage("hello"),
HumanMessage("how are you"),
]
result = model.invoke(messages)
assert result is not None
assert isinstance(result, AIMessage)
@@ -940,13 +947,14 @@ class ChatModelIntegrationTests(ChatModelTests):
.. versionchanged:: 0.3.17
Additionally check for the presence of `model_name` in the response
Additionally check for the presence of ``model_name`` in the response
metadata, which is needed for usage tracking in callback handlers.
.. dropdown:: Configuration
By default, this test is run.
To disable this feature, set `returns_usage_metadata` to False in your
To disable this feature, set ``returns_usage_metadata`` to ``False`` in your
test class:
.. code-block:: python
@@ -957,7 +965,7 @@ class ChatModelIntegrationTests(ChatModelTests):
return False
This test can also check the format of specific kinds of usage metadata
based on the `supported_usage_metadata_details` property. This property
based on the ``supported_usage_metadata_details`` property. This property
should be configured as follows with the types of tokens that the model
supports tracking:
@@ -988,7 +996,7 @@ class ChatModelIntegrationTests(ChatModelTests):
If this test fails, first verify that your model returns
:class:`~langchain_core.messages.ai.UsageMetadata` dicts
attached to the returned AIMessage object in `_generate`:
attached to the returned AIMessage object in ``_generate``:
.. code-block:: python
@@ -1020,9 +1028,11 @@ class ChatModelIntegrationTests(ChatModelTests):
"""
if not self.returns_usage_metadata:
pytest.skip("Not implemented.")
result = model.invoke("Hello")
assert result is not None
assert isinstance(result, AIMessage)
assert result.usage_metadata is not None
assert isinstance(result.usage_metadata["input_tokens"], int)
assert isinstance(result.usage_metadata["output_tokens"], int)
@@ -1106,13 +1116,13 @@ class ChatModelIntegrationTests(ChatModelTests):
.. versionchanged:: 0.3.17
Additionally check for the presence of `model_name` in the response
Additionally check for the presence of ``model_name`` in the response
metadata, which is needed for usage tracking in callback handlers.
.. dropdown:: Configuration
By default, this test is run.
To disable this feature, set `returns_usage_metadata` to False in your
To disable this feature, set ``returns_usage_metadata`` to ``False`` in your
test class:
.. code-block:: python
@@ -1123,7 +1133,7 @@ class ChatModelIntegrationTests(ChatModelTests):
return False
This test can also check the format of specific kinds of usage metadata
based on the `supported_usage_metadata_details` property. This property
based on the ``supported_usage_metadata_details`` property. This property
should be configured as follows with the types of tokens that the model
supports tracking:
@@ -1153,16 +1163,16 @@ class ChatModelIntegrationTests(ChatModelTests):
If this test fails, first verify that your model yields
:class:`~langchain_core.messages.ai.UsageMetadata` dicts
attached to the returned AIMessage object in `_stream`
attached to the returned AIMessage object in ``_stream``
that sum up to the total usage metadata.
Note that `input_tokens` should only be included on one of the chunks
(typically the first or the last chunk), and the rest should have 0 or None
to avoid counting input tokens multiple times.
Note that ``input_tokens`` should only be included on one of the chunks
(typically the first or the last chunk), and the rest should have ``0`` or
``None`` to avoid counting input tokens multiple times.
`output_tokens` typically count the number of tokens in each chunk, not the
sum. This test will pass as long as the sum of `output_tokens` across all
chunks is not 0.
``output_tokens`` typically count the number of tokens in each chunk, not
the sum. This test will pass as long as the sum of ``output_tokens`` across
all chunks is not ``0``.
.. code-block:: python
@@ -1198,6 +1208,7 @@ class ChatModelIntegrationTests(ChatModelTests):
"""
if not self.returns_usage_metadata:
pytest.skip("Not implemented.")
full: Optional[AIMessageChunk] = None
for chunk in model.stream("Write me 2 haikus. Only include the haikus."):
assert isinstance(chunk, AIMessageChunk)
@@ -1262,7 +1273,7 @@ class ChatModelIntegrationTests(ChatModelTests):
"""Test that model does not fail when invoked with the ``stop`` parameter,
which is a standard parameter for stopping generation at a certain token.
More on standard parameters here: https://python.langchain.com/docs/concepts/chat_models/#standard-parameters
`More on standard parameters <https://python.langchain.com/docs/concepts/chat_models/#standard-parameters>`__
This should pass for all integrations.
@@ -1336,6 +1347,7 @@ class ChatModelIntegrationTests(ChatModelTests):
"""
if not self.has_tool_calling:
pytest.skip("Test requires tool calling.")
tool_choice_value = None if not self.has_tool_choice else "any"
# Emit warning if tool_choice_value property is overridden
if inspect.getattr_static(
@@ -1410,6 +1422,7 @@ class ChatModelIntegrationTests(ChatModelTests):
"""
if not self.has_tool_calling:
pytest.skip("Test requires tool calling.")
tool_choice_value = None if not self.has_tool_choice else "any"
model_with_tools = model.bind_tools(
[magic_function], tool_choice=tool_choice_value
@@ -1519,10 +1532,10 @@ class ChatModelIntegrationTests(ChatModelTests):
If this test fails, check that:
1. The model can correctly handle message histories that include AIMessage objects with ``""`` content.
2. The ``tool_calls`` attribute on AIMessage objects is correctly handled and passed to the model in an appropriate format.
3. The model can correctly handle ToolMessage objects with string content and arbitrary string values for ``tool_call_id``.
assert tool_call.get("type") == "tool_call"
1. The model can correctly handle message histories that include ``AIMessage`` objects with ``""`` content.
2. The ``tool_calls`` attribute on ``AIMessage`` objects is correctly handled and passed to the model in an appropriate format.
3. The model can correctly handle ``ToolMessage`` objects with string content and arbitrary string values for ``tool_call_id``.
You can ``xfail`` the test if tool calling is implemented but this format
is not supported.
@@ -1535,6 +1548,7 @@ class ChatModelIntegrationTests(ChatModelTests):
""" # noqa: E501
if not self.has_tool_calling:
pytest.skip("Test requires tool calling.")
model_with_tools = model.bind_tools([my_adder_tool])
function_name = "my_adder_tool"
function_args = {"a": "1", "b": "2"}
@@ -1570,7 +1584,7 @@ class ChatModelIntegrationTests(ChatModelTests):
"""Test that message histories are compatible with list tool contents
(e.g. Anthropic format).
These message histories will include AIMessage objects with "tool use" and
These message histories will include ``AIMessage`` objects with "tool use" and
content blocks, e.g.,
.. code-block:: python
@@ -1604,8 +1618,8 @@ class ChatModelIntegrationTests(ChatModelTests):
If this test fails, check that:
1. The model can correctly handle message histories that include AIMessage objects with list content.
2. The ``tool_calls`` attribute on AIMessage objects is correctly handled and passed to the model in an appropriate format.
1. The model can correctly handle message histories that include ``AIMessage`` objects with list content.
2. The ``tool_calls`` attribute on ``AIMessage`` objects is correctly handled and passed to the model in an appropriate format.
3. The model can correctly handle ToolMessage objects with string content and arbitrary string values for ``tool_call_id``.
You can ``xfail`` the test if tool calling is implemented but this format
@@ -1620,6 +1634,7 @@ class ChatModelIntegrationTests(ChatModelTests):
""" # noqa: E501
if not self.has_tool_calling:
pytest.skip("Test requires tool calling.")
model_with_tools = model.bind_tools([my_adder_tool])
function_name = "my_adder_tool"
function_args = {"a": 1, "b": 2}
@@ -1692,7 +1707,7 @@ class ChatModelIntegrationTests(ChatModelTests):
pytest.skip("Test requires tool choice.")
@tool
def get_weather(location: str) -> str: # pylint: disable=unused-argument
def get_weather(location: str) -> str:
"""Get weather at a location."""
return "It's sunny."
@@ -1750,6 +1765,7 @@ class ChatModelIntegrationTests(ChatModelTests):
""" # noqa: E501
if not self.has_tool_calling:
pytest.skip("Test requires tool calling.")
tool_choice_value = None if not self.has_tool_choice else "any"
model_with_tools = model.bind_tools(
[magic_function_no_args], tool_choice=tool_choice_value
@@ -1767,7 +1783,7 @@ class ChatModelIntegrationTests(ChatModelTests):
def test_tool_message_error_status(
self, model: BaseChatModel, my_adder_tool: BaseTool
) -> None:
"""Test that ToolMessage with ``status="error"`` can be handled.
"""Test that ``ToolMessage`` with ``status="error"`` can be handled.
These messages may take the form:
@@ -1806,6 +1822,7 @@ class ChatModelIntegrationTests(ChatModelTests):
"""
if not self.has_tool_calling:
pytest.skip("Test requires tool calling.")
model_with_tools = model.bind_tools([my_adder_tool])
messages = [
HumanMessage("What is 1 + 2"),
@@ -1860,8 +1877,9 @@ class ChatModelIntegrationTests(ChatModelTests):
.. dropdown:: Troubleshooting
This test uses a utility function in ``langchain_core`` to generate a
sequence of messages representing "few-shot" examples: https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.tool_example_to_messages.html
This test uses `a utility function <https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.tool_example_to_messages.html>`__
in ``langchain_core`` to generate a sequence of messages representing
"few-shot" examples.
If this test fails, check that the model can correctly handle this
sequence of messages.
@@ -1878,6 +1896,7 @@ class ChatModelIntegrationTests(ChatModelTests):
"""
if not self.has_tool_calling:
pytest.skip("Test requires tool calling.")
model_with_tools = model.bind_tools([my_adder_tool], tool_choice="any")
function_result = json.dumps({"result": 3})
@@ -1921,10 +1940,12 @@ class ChatModelIntegrationTests(ChatModelTests):
If this test fails, ensure that the model's ``bind_tools`` method
properly handles both JSON Schema and Pydantic V2 models.
``langchain_core`` implements a utility function that will accommodate
most formats: https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html
See example implementation of ``with_structured_output`` here: https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.with_structured_output
``langchain_core`` implements `a utility function <https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html>`__
that will accommodate most formats.
See `example implementation <https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.with_structured_output>`__
of ``with_structured_output``.
"""
if not self.has_structured_output:
@@ -2000,10 +2021,12 @@ class ChatModelIntegrationTests(ChatModelTests):
If this test fails, ensure that the model's ``bind_tools`` method
properly handles both JSON Schema and Pydantic V2 models.
``langchain_core`` implements a utility function that will accommodate
most formats: https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html
See example implementation of ``with_structured_output`` here: https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.with_structured_output
``langchain_core`` implements `a utility function <https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html>`__
that will accommodate most formats.
See `example implementation <https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.with_structured_output>`__
of ``with_structured_output``.
"""
if not self.has_structured_output:
@@ -2052,10 +2075,9 @@ class ChatModelIntegrationTests(ChatModelTests):
@pytest.mark.skipif(PYDANTIC_MAJOR_VERSION != 2, reason="Test requires pydantic 2.")
def test_structured_output_pydantic_2_v1(self, model: BaseChatModel) -> None:
"""Test to verify we can generate structured output using
pydantic.v1.BaseModel.
"""Test to verify we can generate structured output using ``pydantic.v1.BaseModel``.
pydantic.v1.BaseModel is available in the pydantic 2 package.
``pydantic.v1.BaseModel`` is available in the Pydantic 2 package.
This test is optional and should be skipped if the model does not support
structured output (see Configuration below).
@@ -2079,12 +2101,14 @@ class ChatModelIntegrationTests(ChatModelTests):
If this test fails, ensure that the model's ``bind_tools`` method
properly handles both JSON Schema and Pydantic V1 models.
``langchain_core`` implements a utility function that will accommodate
most formats: https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html
See example implementation of ``with_structured_output`` here: https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.with_structured_output
``langchain_core`` implements `a utility function <https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html>`__
that will accommodate most formats.
"""
See `example implementation <https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.with_structured_output>`__
of ``with_structured_output``.
""" # noqa: E501
if not self.has_structured_output:
pytest.skip("Test requires structured output.")
@@ -2141,10 +2165,12 @@ class ChatModelIntegrationTests(ChatModelTests):
If this test fails, ensure that the model's ``bind_tools`` method
properly handles Pydantic V2 models with optional parameters.
``langchain_core`` implements a utility function that will accommodate
most formats: https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html
See example implementation of ``with_structured_output`` here: https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.with_structured_output
``langchain_core`` implements `a utility function <https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html>`__
that will accommodate most formats.
See `example implementation <https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.with_structured_output>`__
of ``with_structured_output``.
"""
if not self.has_structured_output:
@@ -2225,7 +2251,7 @@ class ChatModelIntegrationTests(ChatModelTests):
# Type ignoring since the interface only officially supports pydantic 1
# or pydantic.v1.BaseModel but not pydantic.BaseModel from pydantic 2.
# We'll need to do a pass updating the type signatures.
chat = model.with_structured_output(Joke, method="json_mode") # type: ignore[arg-type]
chat = model.with_structured_output(Joke, method="json_mode")
msg = (
"Tell me a joke about cats. Return the result as a JSON with 'setup' and "
"'punchline' keys. Return nothing other than JSON."
@@ -2288,6 +2314,7 @@ class ChatModelIntegrationTests(ChatModelTests):
"""
if not self.supports_pdf_inputs:
pytest.skip("Model does not support PDF inputs.")
url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"
pdf_data = base64.b64encode(httpx.get(url).content).decode("utf-8")
@@ -2364,6 +2391,7 @@ class ChatModelIntegrationTests(ChatModelTests):
"""
if not self.supports_audio_inputs:
pytest.skip("Model does not support audio inputs.")
url = "https://upload.wikimedia.org/wikipedia/commons/3/3d/Alcal%C3%A1_de_Henares_%28RPS_13-04-2024%29_canto_de_ruise%C3%B1or_%28Luscinia_megarhynchos%29_en_el_Soto_del_Henares.wav"
audio_data = base64.b64encode(httpx.get(url).content).decode("utf-8")
@@ -2465,6 +2493,7 @@ class ChatModelIntegrationTests(ChatModelTests):
"""
if not self.supports_image_inputs:
pytest.skip("Model does not support image message.")
image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8")
@@ -2572,6 +2601,7 @@ class ChatModelIntegrationTests(ChatModelTests):
"""
if not self.supports_image_tool_message:
pytest.skip("Model does not support image tool message.")
image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8")
@@ -2687,7 +2717,7 @@ class ChatModelIntegrationTests(ChatModelTests):
1. The model can correctly handle message histories that include message objects with list content.
2. The ``tool_calls`` attribute on AIMessage objects is correctly handled and passed to the model in an appropriate format.
3. HumanMessages with "tool_result" content blocks are correctly handled.
3. ``HumanMessage``s with "tool_result" content blocks are correctly handled.
Otherwise, if Anthropic tool call and result formats are not supported,
set the ``supports_anthropic_inputs`` property to False.
@@ -2793,7 +2823,7 @@ class ChatModelIntegrationTests(ChatModelTests):
assert isinstance(response, AIMessage)
def test_message_with_name(self, model: BaseChatModel) -> None:
"""Test that HumanMessage with values for the ``name`` field can be handled.
"""Test that ``HumanMessage`` with values for the ``name`` field can be handled.
These messages may take the form:
@@ -2842,7 +2872,7 @@ class ChatModelIntegrationTests(ChatModelTests):
chat model.
Check also that all required information (e.g., tool calling identifiers)
from AIMessage objects is propagated correctly to model payloads.
from ``AIMessage`` objects is propagated correctly to model payloads.
This test may fail if the chat model does not consistently generate tool
calls in response to an appropriate query. In these cases you can ``xfail``
@@ -2859,7 +2889,7 @@ class ChatModelIntegrationTests(ChatModelTests):
pytest.skip("Test requires tool calling.")
@tool
def get_weather(location: str) -> str: # pylint: disable=unused-argument
def get_weather(location: str) -> str:
"""Call to surf the web."""
return "It's sunny."
@@ -2953,12 +2983,13 @@ class ChatModelIntegrationTests(ChatModelTests):
Args:
model: The chat model to test
tool_choice: Tool choice parameter to pass to bind_tools (provider-specific)
force_tool_call: Whether to force a tool call (use tool_choice=True if None)
tool_choice: Tool choice parameter to pass to ``bind_tools()`` (provider-specific)
force_tool_call: Whether to force a tool call (use ``tool_choice=True`` if None)
Tests that Unicode characters in tool call arguments are preserved correctly,
not escaped as \\uXXXX sequences.
"""
not escaped as ``\\uXXXX`` sequences.
""" # noqa: E501
if not self.has_tool_calling:
pytest.skip("Test requires tool calling support.")