From 81c4f21b5297b31b2cd82ccefdd134aa0e59efa0 Mon Sep 17 00:00:00 2001 From: ccurme Date: Sat, 1 Nov 2025 16:38:20 -0400 Subject: [PATCH] fix(standard-tests): update multimodal tests (#33781) --- .../integration_tests/test_chat_models.py | 10 +-- .../chat_models/test_base.py | 4 +- .../integration_tests/chat_models.py | 77 +++++++++++++++---- .../langchain_tests/unit_tests/chat_models.py | 10 +++ 4 files changed, 77 insertions(+), 24 deletions(-) diff --git a/libs/partners/anthropic/tests/integration_tests/test_chat_models.py b/libs/partners/anthropic/tests/integration_tests/test_chat_models.py index 7041563c348..b4c6c373c23 100644 --- a/libs/partners/anthropic/tests/integration_tests/test_chat_models.py +++ b/libs/partners/anthropic/tests/integration_tests/test_chat_models.py @@ -1078,14 +1078,14 @@ def test_image_tool_calling() -> None: "text": "what's your favorite color in this image", }, ] - image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" + image_url = "https://raw.githubusercontent.com/langchain-ai/docs/4d11d08b6b0e210bd456943f7a22febbd168b543/src/images/agentic-rag-output.png" image_data = b64encode(httpx.get(image_url).content).decode("utf-8") human_content.append( { "type": "image", "source": { "type": "base64", - "media_type": "image/jpeg", + "media_type": "image/png", "data": image_data, }, }, @@ -1098,7 +1098,7 @@ def test_image_tool_calling() -> None: {"type": "text", "text": "Hmm let me think about that"}, { "type": "tool_use", - "input": {"fav_color": "green"}, + "input": {"fav_color": "purple"}, "id": "foo", "name": "color_picker", }, @@ -1112,7 +1112,7 @@ def test_image_tool_calling() -> None: "content": [ { "type": "text", - "text": "green is a great pick! that's my sister's favorite color", # noqa: E501 + "text": "purple is a great pick! that's my sister's favorite color", # noqa: E501 }, ], "is_error": False, @@ -1122,7 +1122,7 @@ def test_image_tool_calling() -> None: ), ] llm = ChatAnthropic(model=MODEL_NAME) # type: ignore[call-arg] - llm.bind_tools([color_picker]).invoke(messages) + _ = llm.bind_tools([color_picker]).invoke(messages) @pytest.mark.default_cassette("test_web_search.yaml.gz") diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_base.py b/libs/partners/openai/tests/integration_tests/chat_models/test_base.py index b35249e2a19..01214fff790 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_base.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_base.py @@ -709,7 +709,7 @@ async def test_openai_response_headers_async(use_responses_api: bool) -> None: def test_image_token_counting_jpeg() -> None: model = ChatOpenAI(model="gpt-4o", temperature=0) - image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" + image_url = "https://raw.githubusercontent.com/langchain-ai/docs/9f99bb977307a1bd5efeb8dc6b67eb13904c4af1/src/oss/images/checkpoints.jpg" message = HumanMessage( content=[ {"type": "text", "text": "describe the weather in this image"}, @@ -741,7 +741,7 @@ def test_image_token_counting_jpeg() -> None: def test_image_token_counting_png() -> None: model = ChatOpenAI(model="gpt-4o", temperature=0) - image_url = "https://upload.wikimedia.org/wikipedia/commons/4/47/PNG_transparency_demonstration_1.png" + image_url = "https://raw.githubusercontent.com/langchain-ai/docs/4d11d08b6b0e210bd456943f7a22febbd168b543/src/images/agentic-rag-output.png" message = HumanMessage( content=[ {"type": "text", "text": "how many dice are in this image"}, diff --git a/libs/standard-tests/langchain_tests/integration_tests/chat_models.py b/libs/standard-tests/langchain_tests/integration_tests/chat_models.py index 6f80704d1a1..332237349ff 100644 --- a/libs/standard-tests/langchain_tests/integration_tests/chat_models.py +++ b/libs/standard-tests/langchain_tests/integration_tests/chat_models.py @@ -4,6 +4,8 @@ from __future__ import annotations import base64 import json +import os +import warnings from typing import Annotated, Any, Literal from unittest.mock import MagicMock @@ -134,6 +136,21 @@ def _validate_tool_call_message_no_args(message: BaseMessage) -> None: assert tool_call.get("type") == "tool_call" +def _get_base64_from_url(url: str) -> str: + user_agent = os.environ.get("LANGCHAIN_TESTS_USER_AGENT") + if not user_agent: + warning_message = ( + "LANGCHAIN_TESTS_USER_AGENT environment variable not set. " + "langchain-tests pulls (CC0 License) audio data from wikimedia.org. " + "Consider setting a user agent to identify your requests. See " + "https://foundation.wikimedia.org/wiki/Policy:Wikimedia_Foundation_User-Agent_Policy" + ) + warnings.warn(warning_message, stacklevel=2) + headers = {"User-Agent": user_agent} if user_agent else {} + httpx_response = httpx.get(url, headers=headers).content + return base64.b64encode(httpx_response).decode("utf-8") + + @tool def unicode_customer(customer_name: str, description: str) -> str: """Tool for creating a customer with Unicode name. @@ -405,6 +422,16 @@ class ChatModelIntegrationTests(ChatModelTests): return True ``` + Note: this test downloads audio data from wikimedia.org. You may need to set + the `LANGCHAIN_TESTS_USER_AGENT` environment variable to identify these + requests, e.g., + + ```bash + export LANGCHAIN_TESTS_USER_AGENT="CoolBot/0.0 (https://example.org/coolbot/; coolbot@example.org) generic-library/0.0" + ``` + + Refer to the [Wikimedia Foundation User-Agent Policy](https://foundation.wikimedia.org/wiki/Policy:Wikimedia_Foundation_User-Agent_Policy). + ??? info "`supports_video_inputs`" Boolean property indicating whether the chat model supports image inputs. @@ -2429,6 +2456,16 @@ class ChatModelIntegrationTests(ChatModelTests): } ``` + Note: this test downloads audio data from wikimedia.org. You may need to set + the `LANGCHAIN_TESTS_USER_AGENT` environment variable to identify these + requests, e.g., + + ```bash + export LANGCHAIN_TESTS_USER_AGENT="CoolBot/0.0 (https://example.org/coolbot/; coolbot@example.org) generic-library/0.0" + ``` + + Refer to the [Wikimedia Foundation User-Agent Policy](https://foundation.wikimedia.org/wiki/Policy:Wikimedia_Foundation_User-Agent_Policy). + ??? note "Configuration" To disable this test, set `supports_audio_inputs` to False in your @@ -2447,12 +2484,14 @@ class ChatModelIntegrationTests(ChatModelTests): with audio content blocks, specifically base64-encoded files. Otherwise, set the `supports_audio_inputs` property to False. - """ + """ # noqa: E501 if not self.supports_audio_inputs: pytest.skip("Model does not support audio inputs.") - url = "https://upload.wikimedia.org/wikipedia/commons/3/3d/Alcal%C3%A1_de_Henares_%28RPS_13-04-2024%29_canto_de_ruise%C3%B1or_%28Luscinia_megarhynchos%29_en_el_Soto_del_Henares.wav" - audio_data = base64.b64encode(httpx.get(url).content).decode("utf-8") + # https://commons.wikimedia.org/wiki/File:Northern_Flicker_202280456.wav + # License: CC0 1.0 Universal + url = "https://upload.wikimedia.org/wikipedia/commons/6/6a/Northern_Flicker_202280456.wav" + audio_data = _get_base64_from_url(url) message = HumanMessage( [ @@ -2551,16 +2590,16 @@ class ChatModelIntegrationTests(ChatModelTests): if not self.supports_image_inputs: pytest.skip("Model does not support image message.") - image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" + image_url = "https://raw.githubusercontent.com/langchain-ai/docs/4d11d08b6b0e210bd456943f7a22febbd168b543/src/images/agentic-rag-output.png" image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8") # OpenAI CC format, base64 data message = HumanMessage( content=[ - {"type": "text", "text": "describe the weather in this image"}, + {"type": "text", "text": "Give a concise description of this image."}, { "type": "image_url", - "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}, + "image_url": {"url": f"data:image/png;base64,{image_data}"}, }, ], ) @@ -2569,11 +2608,11 @@ class ChatModelIntegrationTests(ChatModelTests): # Standard LangChain format, base64 data message = HumanMessage( content=[ - {"type": "text", "text": "describe the weather in this image"}, + {"type": "text", "text": "Give a concise description of this image."}, { "type": "image", "base64": image_data, - "mime_type": "image/jpeg", + "mime_type": "image/png", }, ], ) @@ -2583,7 +2622,10 @@ class ChatModelIntegrationTests(ChatModelTests): if self.supports_image_urls: message = HumanMessage( content=[ - {"type": "text", "text": "describe the weather in this image"}, + { + "type": "text", + "text": "Give a concise description of this image.", + }, { "type": "image", "url": image_url, @@ -2654,7 +2696,7 @@ class ChatModelIntegrationTests(ChatModelTests): if not self.supports_image_tool_message: pytest.skip("Model does not support image tool message.") - image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" + image_url = "https://raw.githubusercontent.com/langchain-ai/docs/4d11d08b6b0e210bd456943f7a22febbd168b543/src/images/agentic-rag-output.png" image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8") # OpenAI CC format, base64 data @@ -2662,7 +2704,7 @@ class ChatModelIntegrationTests(ChatModelTests): content=[ { "type": "image_url", - "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}, + "image_url": {"url": f"data:image/png;base64,{image_data}"}, }, ], tool_call_id="1", @@ -2675,7 +2717,7 @@ class ChatModelIntegrationTests(ChatModelTests): { "type": "image", "base64": image_data, - "mime_type": "image/jpeg", + "mime_type": "image/png", }, ], tool_call_id="1", @@ -2685,7 +2727,8 @@ class ChatModelIntegrationTests(ChatModelTests): for tool_message in [oai_format_message, standard_format_message]: messages = [ HumanMessage( - "get a random image using the tool and describe the weather" + "get a random diagram using the tool and give it a concise " + "description" ), AIMessage( [], @@ -2888,14 +2931,14 @@ class ChatModelIntegrationTests(ChatModelTests): }, ] if self.supports_image_inputs: - image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" + image_url = "https://raw.githubusercontent.com/langchain-ai/docs/4d11d08b6b0e210bd456943f7a22febbd168b543/src/images/agentic-rag-output.png" image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8") human_content.append( { "type": "image", "source": { "type": "base64", - "media_type": "image/jpeg", + "media_type": "image/png", "data": image_data, }, } @@ -2908,7 +2951,7 @@ class ChatModelIntegrationTests(ChatModelTests): {"type": "text", "text": "Hmm let me think about that"}, { "type": "tool_use", - "input": {"fav_color": "green"}, + "input": {"fav_color": "purple"}, "id": "foo", "name": "color_picker", }, @@ -2916,7 +2959,7 @@ class ChatModelIntegrationTests(ChatModelTests): tool_calls=[ { "name": "color_picker", - "args": {"fav_color": "green"}, + "args": {"fav_color": "purple"}, "id": "foo", "type": "tool_call", } diff --git a/libs/standard-tests/langchain_tests/unit_tests/chat_models.py b/libs/standard-tests/langchain_tests/unit_tests/chat_models.py index e54c70ccac2..4ac585c27bd 100644 --- a/libs/standard-tests/langchain_tests/unit_tests/chat_models.py +++ b/libs/standard-tests/langchain_tests/unit_tests/chat_models.py @@ -515,6 +515,16 @@ class ChatModelUnitTests(ChatModelTests): return True ``` + Note: this test downloads audio data from wikimedia.org. You may need to set + the `LANGCHAIN_TESTS_USER_AGENT` environment variable to identify these + requests, e.g., + + ```bash + export LANGCHAIN_TESTS_USER_AGENT="CoolBot/0.0 (https://example.org/coolbot/; coolbot@example.org) generic-library/0.0" + ``` + + Refer to the [Wikimedia Foundation User-Agent Policy](https://foundation.wikimedia.org/wiki/Policy:Wikimedia_Foundation_User-Agent_Policy). + ??? info "`supports_video_inputs`" Boolean property indicating whether the chat model supports image inputs.