fix(core): fix tracing for PDFs in v1 messages (#32434)

2025-08-16 08:06:14 +00:00 · 2025-08-11 13:18:32 -03:00 · 2025-08-11 13:18:32 -03:00 · 45a067509f
commit 45a067509f
parent 23c3fa65d4
17 changed files with 639 additions and 674 deletions
--- a/libs/core/langchain_core/messages/content_blocks.py
+++ b/libs/core/langchain_core/messages/content_blocks.py
@ -979,8 +979,11 @@ def convert_to_openai_data_block(block: dict) -> dict:
            file = {"file_data": f"data:{block['mime_type']};base64,{base64_data}"}
            if filename := block.get("filename"):
                file["filename"] = filename
-            elif (metadata := block.get("metadata")) and ("filename" in metadata):
+            elif (extras := block.get("extras")) and ("filename" in extras):
-                file["filename"] = metadata["filename"]
+                file["filename"] = extras["filename"]
            elif (extras := block.get("metadata")) and ("filename" in extras):
                # Backward compat
                file["filename"] = extras["filename"]
            else:
                warnings.warn(
                    "OpenAI may require a filename for file inputs. Specify a filename "
--- a/libs/core/langchain_core/v1/chat_models.py
+++ b/libs/core/langchain_core/v1/chat_models.py
@ -44,6 +44,7 @@ from langchain_core.language_models.base import (
 )
 from langchain_core.load import dumpd
 from langchain_core.messages import (
    convert_to_openai_data_block,
    convert_to_openai_image_block,
    get_buffer_string,
    is_data_content_block,
@ -132,6 +133,30 @@ def _format_for_tracing(messages: Sequence[MessageV1]) -> list[MessageV1]:
                # TODO: for tracing purposes we store non-standard types (OpenAI format)
                # in message content. Consider typing these block formats.
                message_to_trace.content[idx] = convert_to_openai_image_block(block)  # type: ignore[arg-type, call-overload]
            elif (
                block.get("type") == "file"
                and is_data_content_block(block)  # type: ignore[arg-type]  # permit unnecessary runtime check
                and "base64" in block
            ):
                if message_to_trace is message:
                    # Shallow copy
                    message_to_trace = copy.copy(message)
                    message_to_trace.content = list(message_to_trace.content)
                message_to_trace.content[idx] = convert_to_openai_data_block(block)  # type: ignore[arg-type, call-overload]
            elif len(block) == 1 and "type" not in block:
                # Tracing assumes all content blocks have a "type" key. Here
                # we add this key if it is missing, and there's an obvious
                # choice for the type (e.g., a single key in the block).
                if message_to_trace is message:
                    # Shallow copy
                    message_to_trace = copy.copy(message)
                    message_to_trace.content = list(message_to_trace.content)
                key = next(iter(block))
                message_to_trace.content[idx] = {  # type: ignore[call-overload]
                    "type": key,
                    key: block[key],  # type: ignore[literal-required]
                }
            else:
                pass
        messages_to_trace.append(message_to_trace)
--- a/libs/core/tests/unit_tests/language_models/chat_models/test_base.py
+++ b/libs/core/tests/unit_tests/language_models/chat_models/test_base.py
@ -13,10 +13,14 @@ from langchain_core.language_models import (
    FakeListChatModel,
    ParrotFakeChatModel,
 )
-from langchain_core.language_models._utils import _normalize_messages
+from langchain_core.language_models._utils import (
    _normalize_messages,
    _normalize_messages_v1,
 )
 from langchain_core.language_models.fake_chat_models import (
    FakeListChatModelError,
    GenericFakeChatModelV1,
    ParrotFakeChatModelV1,
 )
 from langchain_core.messages import (
    AIMessage,
@ -33,6 +37,7 @@ from langchain_core.tracers.context import collect_runs
 from langchain_core.tracers.event_stream import _AstreamEventsCallbackHandler
 from langchain_core.tracers.schemas import Run
 from langchain_core.v1.messages import AIMessageChunk as AIMessageChunkV1
 from langchain_core.v1.messages import HumanMessage as HumanMessageV1
 from tests.unit_tests.fake.callbacks import (
    BaseFakeCallbackHandler,
    FakeAsyncCallbackHandler,
@ -430,9 +435,10 @@ class FakeChatModelStartTracer(FakeTracer):
        )
-def test_trace_images_in_openai_format() -> None:
+@pytest.mark.parametrize("llm_class", [ParrotFakeChatModel, ParrotFakeChatModelV1])
 def test_trace_images_in_openai_format(llm_class: Any) -> None:
    """Test that images are traced in OpenAI format."""
-    llm = ParrotFakeChatModel()
+    llm = llm_class()
    messages = [
        {
            "role": "user",
@ -456,7 +462,8 @@ def test_trace_images_in_openai_format() -> None:
                            "type": "image_url",
                            "image_url": {"url": "https://example.com/image.png"},
                        }
-                    ]
+                    ],
                    id=tracer.messages[0][0][0].id,
                )
            ]
        ]
@ -471,9 +478,10 @@ def test_trace_images_in_openai_format() -> None:
    ]
-def test_trace_content_blocks_with_no_type_key() -> None:
+@pytest.mark.parametrize("llm_class", [ParrotFakeChatModel, ParrotFakeChatModelV1])
 def test_trace_content_blocks_with_no_type_key(llm_class: Any) -> None:
    """Test that we add a ``type`` key to certain content blocks that don't have one."""
-    llm = ParrotFakeChatModel()
+    llm = llm_class()
    messages = [
        {
            "role": "user",
@ -503,7 +511,8 @@ def test_trace_content_blocks_with_no_type_key() -> None:
                            "type": "cachePoint",
                            "cachePoint": {"type": "default"},
                        },
-                    ]
+                    ],
                    id=tracer.messages[0][0][0].id,
                )
            ]
        ]
@ -520,9 +529,10 @@ def test_trace_content_blocks_with_no_type_key() -> None:
    ]
-def test_extend_support_to_openai_multimodal_formats() -> None:
+@pytest.mark.parametrize("llm_class", [ParrotFakeChatModel, ParrotFakeChatModelV1])
 def test_extend_support_to_openai_multimodal_formats(llm_class: Any) -> None:
    """Test that chat models normalize OpenAI file and audio inputs."""
-    llm = ParrotFakeChatModel()
+    llm = llm_class()
    messages = [
        {
            "role": "user",
@ -660,6 +670,34 @@ def test_normalize_messages_edge_cases() -> None:
    assert messages == _normalize_messages(messages)
 def test_normalize_messages_edge_cases_v1() -> None:
    # Test some blocks that should pass through
    messages = [
        HumanMessageV1(
            content=[
                {  # type: ignore[list-item]
                    "type": "file",
                    "file": "uri",
                },
                {  # type: ignore[list-item]
                    "type": "input_file",
                    "file_data": "uri",
                    "filename": "file-name",
                },
                {  # type: ignore[list-item]
                    "type": "input_audio",
                    "input_audio": "uri",
                },
                {  # type: ignore[list-item]
                    "type": "input_image",
                    "image_url": "uri",
                },
            ]
        )
    ]
    assert messages == _normalize_messages_v1(messages)
 def test_streaming_v1() -> None:
    chunks = [
        AIMessageChunkV1(
--- a/libs/partners/ollama/tests/integration_tests/v1/chat_models/test_chat_models_standard_v1.py
+++ b/libs/partners/ollama/tests/integration_tests/v1/chat_models/test_chat_models_standard_v1.py
@ -4,11 +4,11 @@ from unittest.mock import MagicMock, patch
 import pytest
 from httpx import ConnectError
-from langchain_core.messages.content_blocks import ToolCallChunk, is_reasoning_block
+from langchain_core.messages.content_blocks import ToolCallChunk
 from langchain_core.tools import tool
 from langchain_core.v1.chat_models import BaseChatModel
-from langchain_core.v1.messages import AIMessage, AIMessageChunk, HumanMessage
+from langchain_core.v1.messages import AIMessageChunk, HumanMessage
-from langchain_tests.integration_tests.chat_models_v1 import ChatModelV1IntegrationTests
+from langchain_tests.v1.integration_tests.chat_models import ChatModelIntegrationTests
 from ollama import ResponseError
 from pydantic import ValidationError
@ -26,7 +26,7 @@ def get_current_weather(location: str) -> dict:
    return {"temperature": "unknown", "conditions": "unknown"}
-class TestChatOllamaV1(ChatModelV1IntegrationTests):
+class TestChatOllamaV1(ChatModelIntegrationTests):
    @property
    def chat_model_class(self) -> type[ChatOllama]:
        return ChatOllama
@ -195,39 +195,39 @@ class TestChatOllamaV1(ChatModelV1IntegrationTests):
    #         "reasoning."
    #     )
-    @pytest.mark.xfail(
+    # @pytest.mark.xfail(
-        reason=(
+    #     reason=(
-            f"{DEFAULT_MODEL_NAME} does not support reasoning. Override uses "
+    #         f"{DEFAULT_MODEL_NAME} does not support reasoning. Override uses "
-            "reasoning-capable model with `reasoning=True` enabled."
+    #         "reasoning-capable model with `reasoning=True` enabled."
-        ),
+    #     ),
-        strict=False,
+    #     strict=False,
-    )
+    # )
-    def test_reasoning_content_blocks_basic(self, model: BaseChatModel) -> None:
+    # def test_reasoning_content_blocks_basic(self, model: BaseChatModel) -> None:
-        """Test that the model can generate ``ReasoningContentBlock``.
+    #     """Test that the model can generate ``ReasoningContentBlock``.
-        This test overrides the default model to use a reasoning-capable model
+    #     This test overrides the default model to use a reasoning-capable model
-        with reasoning mode explicitly enabled.
+    #     with reasoning mode explicitly enabled.
-        """
+    #     """
-        if not self.supports_reasoning_content_blocks:
+    #     if not self.supports_reasoning_content_blocks:
-            pytest.skip("Model does not support ReasoningContentBlock.")
+    #         pytest.skip("Model does not support ReasoningContentBlock.")
-        reasoning_enabled_model = ChatOllama(
+    #     reasoning_enabled_model = ChatOllama(
-            model=REASONING_MODEL_NAME, reasoning=True, validate_model_on_init=True
+    #         model=REASONING_MODEL_NAME, reasoning=True, validate_model_on_init=True
-        )
+    #     )
-        message = HumanMessage("Think step by step: What is 2 + 2?")
+    #     message = HumanMessage("Think step by step: What is 2 + 2?")
-        result = reasoning_enabled_model.invoke([message])
+    #     result = reasoning_enabled_model.invoke([message])
-        assert isinstance(result, AIMessage)
+    #     assert isinstance(result, AIMessage)
-        if isinstance(result.content, list):
+    #     if isinstance(result.content, list):
-            reasoning_blocks = [
+    #         reasoning_blocks = [
-                block
+    #             block
-                for block in result.content
+    #             for block in result.content
-                if isinstance(block, dict) and is_reasoning_block(block)
+    #             if isinstance(block, dict) and is_reasoning_block(block)
-            ]
+    #         ]
-            assert len(reasoning_blocks) > 0, (
+    #         assert len(reasoning_blocks) > 0, (
-                "Expected reasoning content blocks but found none. "
+    #             "Expected reasoning content blocks but found none. "
-                f"Content blocks: {[block.get('type') for block in result.content]}"
+    #             f"Content blocks: {[block.get('type') for block in result.content]}"
-            )
+    #         )
    # Additional Ollama reasoning tests in v1/chat_models/test_chat_models_v1.py
--- a/libs/partners/ollama/tests/unit_tests/v1/test_chat_models.py
+++ b/libs/partners/ollama/tests/unit_tests/v1/test_chat_models.py
@ -13,7 +13,7 @@ from langchain_core.messages.content_blocks import (
    create_text_block,
 )
 from langchain_core.v1.messages import AIMessage, HumanMessage, MessageV1, SystemMessage
-from langchain_tests.unit_tests.chat_models_v1 import ChatModelV1UnitTests
+from langchain_tests.v1.unit_tests.chat_models import ChatModelUnitTests
 from langchain_ollama._compat import (
    _convert_chunk_to_v1,
@ -240,7 +240,7 @@ class TestMessageConversion:
        assert result["images"] == []
-class TestChatOllama(ChatModelV1UnitTests):
+class TestChatOllama(ChatModelUnitTests):
    """Test `ChatOllama`."""
    @property
--- a/libs/partners/openai/langchain_openai/v1/chat_models/base.py
+++ b/libs/partners/openai/langchain_openai/v1/chat_models/base.py
@ -186,7 +186,7 @@ def _convert_dict_to_message(_dict: Mapping[str, Any]) -> MessageV1:
        raise ValueError(error_message)
-def _format_message_content(content: Any, responses_api: bool = False) -> Any:
+def _format_message_content(content: Any, responses_ai_msg: bool = False) -> Any:
    """Format message content."""
    if content and isinstance(content, list):
        formatted_content = []
@ -201,7 +201,9 @@ def _format_message_content(content: Any, responses_api: bool = False) -> Any:
            elif (
                isinstance(block, dict)
                and is_data_content_block(block)
-                and not responses_api
+                # Responses API messages handled separately in _compat (parsed into
                # image generation calls)
                and not responses_ai_msg
            ):
                formatted_content.append(convert_to_openai_data_block(block))
            # Anthropic image blocks
@ -235,7 +237,9 @@ def _format_message_content(content: Any, responses_api: bool = False) -> Any:
    return formatted_content
-def _convert_message_to_dict(message: MessageV1, responses_api: bool = False) -> dict:
+def _convert_message_to_dict(
    message: MessageV1, responses_ai_msg: bool = False
 ) -> dict:
    """Convert a LangChain message to a dictionary.
    Args:
@ -245,7 +249,9 @@ def _convert_message_to_dict(message: MessageV1, responses_api: bool = False) ->
        The dictionary.
    """
    message_dict: dict[str, Any] = {
-        "content": _format_message_content(message.content, responses_api=responses_api)
+        "content": _format_message_content(
            message.content, responses_ai_msg=responses_ai_msg
        )
    }
    if name := message.name:
        message_dict["name"] = name
@ -273,7 +279,7 @@ def _convert_message_to_dict(message: MessageV1, responses_api: bool = False) ->
            if (
                block.get("type") == "audio"
                and (id_ := block.get("id"))
-                and not responses_api
+                and not responses_ai_msg
            ):
                # openai doesn't support passing the data back - only the id
                # https://platform.openai.com/docs/guides/audio/multi-turn-conversations
@ -2992,14 +2998,13 @@ def _oai_structured_outputs_parser(
        else:
            return parsed
    elif any(
-        block["type"] == "non_standard" and block["value"].get("type") == "refusal"
+        block["type"] == "non_standard" and "refusal" in block["value"]
        for block in ai_msg.content
    ):
        refusal = next(
-            block["value"]["text"]
+            block["value"]["refusal"]
            for block in ai_msg.content
-            if block["type"] == "non_standard"
+            if block["type"] == "non_standard" and "refusal" in block["value"]
            and block["value"].get("type") == "refusal"
        )
        raise OpenAIRefusalError(refusal)
    elif ai_msg.tool_calls:
@ -3246,12 +3251,13 @@ def _construct_responses_api_input(messages: Sequence[MessageV1]) -> list:
    """Construct the input for the OpenAI Responses API."""
    input_ = []
    for lc_msg in messages:
        msg = _convert_message_to_dict(lc_msg, responses_api=True)
        if isinstance(lc_msg, AIMessageV1):
            msg = _convert_message_to_dict(lc_msg, responses_ai_msg=True)
            msg["content"] = _convert_from_v1_to_responses(
                msg["content"], lc_msg.tool_calls
            )
        else:
            msg = _convert_message_to_dict(lc_msg)
            # Get content from non-standard content blocks
            for i, block in enumerate(msg["content"]):
                if block.get("type") == "non_standard":
--- a/libs/partners/openai/tests/integration_tests/chat_models/test_base_standard_v1.py
+++ b/libs/partners/openai/tests/integration_tests/chat_models/test_base_standard_v1.py
@ -0,0 +1,163 @@
 """Standard LangChain interface tests"""
 import base64
 from pathlib import Path
 from typing import Literal, cast
 import httpx
 import pytest
 from langchain_core.v1.chat_models import BaseChatModel
 from langchain_core.v1.messages import AIMessage, HumanMessage
 from langchain_tests.v1.integration_tests import ChatModelIntegrationTests
 from langchain_openai.v1 import ChatOpenAI
 REPO_ROOT_DIR = Path(__file__).parents[6]
 class TestOpenAIStandardV1(ChatModelIntegrationTests):
    @property
    def chat_model_class(self) -> type[BaseChatModel]:
        return ChatOpenAI
    @property
    def chat_model_params(self) -> dict:
        return {
            "model": "gpt-5-nano",
            "stream_usage": True,
            "reasoning_effort": "minimal",
        }
    @property
    def supports_image_inputs(self) -> bool:
        return True
    @property
    def supports_image_urls(self) -> bool:
        return True
    @property
    def supports_json_mode(self) -> bool:
        return True
    @property
    def supports_anthropic_inputs(self) -> bool:
        return True
    @property
    def supported_usage_metadata_details(
        self,
    ) -> dict[
        Literal["invoke", "stream"],
        list[
            Literal[
                "audio_input",
                "audio_output",
                "reasoning_output",
                "cache_read_input",
                "cache_creation_input",
            ]
        ],
    ]:
        return {"invoke": ["reasoning_output", "cache_read_input"], "stream": []}
    @property
    def enable_vcr_tests(self) -> bool:
        return True
    def invoke_with_cache_read_input(self, *, stream: bool = False) -> AIMessage:
        with open(REPO_ROOT_DIR / "README.md") as f:
            readme = f.read()
        input_ = f"""What's langchain? Here's the langchain README:
        {readme}
        """
        llm = ChatOpenAI(model="gpt-4o-mini", stream_usage=True)
        _invoke(llm, input_, stream)
        # invoke twice so first invocation is cached
        return _invoke(llm, input_, stream)
    def invoke_with_reasoning_output(self, *, stream: bool = False) -> AIMessage:
        llm = ChatOpenAI(model="o1-mini", stream_usage=True, temperature=1)
        input_ = (
            "explain  the relationship between the 2008/9 economic crisis and the "
            "startup ecosystem in the early 2010s"
        )
        return _invoke(llm, input_, stream)
    @property
    def supports_pdf_inputs(self) -> bool:
        # OpenAI requires a filename for PDF inputs
        # For now, we test with filename in OpenAI-specific tests
        return False
    def test_openai_pdf_inputs(self, model: BaseChatModel) -> None:
        """Test that the model can process PDF inputs."""
        url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"
        pdf_data = base64.b64encode(httpx.get(url).content).decode("utf-8")
        message = HumanMessage(
            [
                {"type": "text", "text": "What is the title of this document?"},
                {
                    "type": "file",
                    "mime_type": "application/pdf",
                    "base64": pdf_data,
                    "extras": {"filename": "my-pdf"},  # OpenAI requires a filename
                },
            ]
        )
        _ = model.invoke([message])
        # Test OpenAI Chat Completions format
        message = HumanMessage(
            [
                {"type": "text", "text": "What is the title of this document?"},
                {  # type: ignore[list-item]
                    "type": "file",
                    "file": {
                        "filename": "test file.pdf",
                        "file_data": f"data:application/pdf;base64,{pdf_data}",
                    },
                },
            ]
        )
        _ = model.invoke([message])
 def _invoke(llm: ChatOpenAI, input_: str, stream: bool) -> AIMessage:
    if stream:
        full = None
        for chunk in llm.stream(input_):
            full = full + chunk if full else chunk  # type: ignore[operator]
        return cast(AIMessage, full)
    else:
        return cast(AIMessage, llm.invoke(input_))
@pytest.mark.skip()  # Test either finishes in 5 seconds or 5 minutes.
 def test_audio_model() -> None:
    class AudioModelTests(ChatModelIntegrationTests):
        @property
        def chat_model_class(self) -> type[ChatOpenAI]:
            return ChatOpenAI
        @property
        def chat_model_params(self) -> dict:
            return {
                "model": "gpt-4o-audio-preview",
                "temperature": 0,
                "model_kwargs": {
                    "modalities": ["text", "audio"],
                    "audio": {"voice": "alloy", "format": "wav"},
                },
            }
        @property
        def supports_audio_inputs(self) -> bool:
            return True
    test_instance = AudioModelTests()
    model = test_instance.chat_model_class(**test_instance.chat_model_params)
    AudioModelTests().test_audio_inputs(model)
--- a/libs/standard-tests/QUICK_START.md
+++ b/libs/standard-tests/QUICK_START.md
@ -13,7 +13,7 @@ New imports:
 from langchain_tests.unit_tests.chat_models import ChatModelUnitTests
 # v1
-from langchain_tests.unit_tests.chat_models_v1 import ChatModelV1UnitTests
+from langchain_tests.v1.unit_tests.chat_models import ChatModelUnitTests as ChatModelV1UnitTests
 ```
 ### 2. Minimal Configuration
@ -72,10 +72,6 @@ class TestAdvancedModelV1(ChatModelV1UnitTests):
        """Model provides source citations"""
        return True
    @property
    def supports_tool_calls(self):
        """Tool calling with metadata"""
        return True
 ```
 ## 📋 Feature Reference
@ -163,7 +159,7 @@ for testing chat models that support the enhanced content blocks system.
 from typing import Any
-from langchain_core.language_models.v1.chat_models import BaseChatModelV1
+from langchain_core.v1.language_models.chat_models import BaseChatModelV1
 from langchain_core.language_models import GenericFakeChatModel
 from langchain_core.messages import BaseMessage
 from langchain_core.messages.content_blocks import TextContentBlock
@ -276,7 +272,7 @@ from typing import Any
 import pytest
 from langchain_core.language_models import BaseChatModel, GenericFakeChatModel
-from langchain_tests.integration_tests.chat_models_v1 import ChatModelV1IntegrationTests
+from langchain_tests.v1.integration_tests.chat_models import ChatModelIntegrationTests as ChatModelV1IntegrationTests
 # Example fake model for demonstration (replace with real model in practice)
@ -341,11 +337,6 @@ class TestFakeChatModelV1Integration(ChatModelV1IntegrationTests):
        """Disable web search for this fake model."""
        return False
    @property
    def supports_tool_calls(self) -> bool:
        """Enable tool calling tests."""
        return True
    @property
    def has_tool_calling(self) -> bool:
        """Enable tool calling tests."""
--- a/libs/standard-tests/README.md
+++ b/libs/standard-tests/README.md
@ -92,4 +92,4 @@ as required is optional.
 For chat models that support the new content blocks v1 format (multimodal content, reasoning blocks, citations, etc.), use the v1 test suite instead:
 - See `QUICK_START.md` and `README_V1.md` for v1 testing documentation
- Use `ChatModelV1Tests` from `langchain_tests.unit_tests.chat_models_v1`
+- Use `ChatModelTests` from `langchain_tests.v1.unit_tests.chat_models`
--- a/libs/standard-tests/README_V1.md
+++ b/libs/standard-tests/README_V1.md
@ -14,10 +14,10 @@ The standard tests v1 package provides comprehensive testing for chat models tha
 ### Basic Unit Tests
 ```python
-from langchain_tests.unit_tests.chat_models_v1 import ChatModelV1UnitTests
+from langchain_tests.v1.unit_tests.chat_models import ChatModelUnitTests
 from your_package import YourChatModel
-class TestYourChatModelV1(ChatModelV1UnitTests):
+class TestYourChatModelV1(ChatModelUnitTests):
    @property
    def chat_model_class(self):
        return YourChatModel
@ -43,10 +43,10 @@ class TestYourChatModelV1(ChatModelV1UnitTests):
 ### Integration Tests
 ```python
-from langchain_tests.integration_tests.chat_models_v1 import ChatModelV1IntegrationTests
+from langchain_tests.v1.integration_tests.chat_models import ChatModelIntegrationTests
 from your_package import YourChatModel
-class TestYourChatModelV1Integration(ChatModelV1IntegrationTests):
+class TestYourChatModelV1Integration(ChatModelIntegrationTests):
    @property
    def chat_model_class(self):
        return YourChatModel
@ -81,14 +81,10 @@ class TestYourChatModelV1Integration(ChatModelV1IntegrationTests):
 - `supports_image_content_blocks`: `ImageContentBlock`s (v1 format)
 - `supports_video_content_blocks`: `VideoContentBlock`s (v1 format)
 - `supports_audio_content_blocks`: `AudioContentBlock`s (v1 format)
 - `supports_plaintext_content_blocks`: `PlainTextContentBlock`s (plaintext from documents)
 - `supports_file_content_blocks`: `FileContentBlock`s
 ### Tool Calling
- `supports_tool_calls`: Tool calling with content blocks
+- `has_tool_calls`: Tool calling with content blocks
 - `supports_invalid_tool_calls`: Error handling for invalid tool calls
 - `supports_tool_call_chunks`: Streaming tool call support
 ### Advanced Features
@ -99,7 +95,7 @@ class TestYourChatModelV1Integration(ChatModelV1IntegrationTests):
 ## Test Categories
-### Unit Tests (`ChatModelV1Tests`)
+### Unit Tests (`ChatModelTests`)
 - Content block format validation
 - Ser/deserialization
@ -108,7 +104,7 @@ class TestYourChatModelV1Integration(ChatModelV1IntegrationTests):
 - Error handling for invalid blocks
 - Backward compatibility with string content
-### Integration Tests (`ChatModelV1IntegrationTests`)
+### Integration Tests (`ChatModelIntegrationTests`)
 - Real multimodal content processing
 - Advanced reasoning with content blocks
@ -130,7 +126,7 @@ class TestYourChatModelV1Integration(ChatModelV1IntegrationTests):
   from langchain_tests.unit_tests.chat_models import ChatModelUnitTests
   # v1
-   from langchain_tests.unit_tests.chat_models_v1 import ChatModelV1UnitTests
+   from langchain_tests.v1.unit_tests.chat_models import ChatModelUnitTests ChatModelV1UnitTests
   ```
 2. **Configure content blocks support**:
--- a/libs/standard-tests/langchain_tests/integration_tests/init.py
+++ b/libs/standard-tests/langchain_tests/integration_tests/init.py
@ -20,7 +20,6 @@ for module in modules:
 from .base_store import BaseStoreAsyncTests, BaseStoreSyncTests
 from .cache import AsyncCacheTestSuite, SyncCacheTestSuite
 from .chat_models import ChatModelIntegrationTests
 from .chat_models_v1 import ChatModelV1IntegrationTests
 from .embeddings import EmbeddingsIntegrationTests
 from .retrievers import RetrieversIntegrationTests
 from .tools import ToolsIntegrationTests
@ -31,7 +30,6 @@ __all__ = [
    "BaseStoreAsyncTests",
    "BaseStoreSyncTests",
    "ChatModelIntegrationTests",
    "ChatModelV1IntegrationTests",
    "EmbeddingsIntegrationTests",
    "RetrieversIntegrationTests",
    "SyncCacheTestSuite",
--- a/libs/standard-tests/langchain_tests/v1/init.py
+++ b/libs/standard-tests/langchain_tests/v1/init.py
@ -0,0 +1,9 @@
 """Base Test classes for standard testing.
 To learn how to use these classes, see the
 `integration standard testing <https://python.langchain.com/docs/contributing/how_to/integrations/standard_tests/>`__
 guide.
 This package provides both the original test suites and the v1 test suites that support
 the new content blocks system introduced in ``langchain_core.messages.content_blocks``.
 """
--- a/libs/standard-tests/langchain_tests/v1/integration_tests/init.py
+++ b/libs/standard-tests/langchain_tests/v1/integration_tests/init.py
@ -0,0 +1,16 @@
 # ruff: noqa: E402
 import pytest
 # Rewrite assert statements for test suite so that implementations can
 # see the full error message from failed asserts.
 # https://docs.pytest.org/en/7.1.x/how-to/writing_plugins.html#assertion-rewriting
 modules = ["chat_models"]
 for module in modules:
    pytest.register_assert_rewrite(f"langchain_tests.v1.integration_tests.{module}")
 from .chat_models import ChatModelIntegrationTests
 __all__ = [
    "ChatModelIntegrationTests",
 ]
--- a/libs/standard-tests/langchain_tests/v1/integration_tests/chat_models.py
+++ b/libs/standard-tests/langchain_tests/v1/integration_tests/chat_models.py
--- a/libs/standard-tests/langchain_tests/v1/unit_tests/init.py
+++ b/libs/standard-tests/langchain_tests/v1/unit_tests/init.py
@ -0,0 +1,14 @@
 # ruff: noqa: E402
 import pytest
 # Rewrite assert statements for test suite so that implementations can
 # see the full error message from failed asserts.
 # https://docs.pytest.org/en/7.1.x/how-to/writing_plugins.html#assertion-rewriting
 modules = ["chat_models"]
 for module in modules:
    pytest.register_assert_rewrite(f"langchain_tests.unit_tests.{module}")
 from .chat_models import ChatModelUnitTests
 __all__ = ["ChatModelUnitTests"]
--- a/libs/standard-tests/langchain_tests/v1/unit_tests/chat_models.py
+++ b/libs/standard-tests/langchain_tests/v1/unit_tests/chat_models.py
@ -72,7 +72,7 @@ if PYDANTIC_MAJOR_VERSION == 2:
    TEST_PYDANTIC_MODELS.append(generate_schema_pydantic_v1_from_2())
-class ChatModelV1Tests(BaseStandardTests):
+class ChatModelTests(BaseStandardTests):
    """Test suite for v1 chat models.
    This class provides comprehensive testing for the new message system introduced in
@ -139,15 +139,6 @@ class ChatModelV1Tests(BaseStandardTests):
        """Whether the model supports tool calling."""
        return self.chat_model_class.bind_tools is not BaseChatModel.bind_tools
    @property
    def tool_choice_value(self) -> Optional[str]:
        """(None or str) To use for tool choice when used in tests.
        Not required.
        """
        return None
    @property
    def has_tool_choice(self) -> bool:
        """Whether the model supports forcing tool calling via ``tool_choice``."""
@ -184,6 +175,35 @@ class ChatModelV1Tests(BaseStandardTests):
        """
        return False
    @property
    def supports_image_inputs(self) -> bool:
        """(bool) whether the chat model supports image inputs, defaults to ``False``."""  # noqa: E501
        return False
    @property
    def supports_image_urls(self) -> bool:
        """(bool) whether the chat model supports image inputs from URLs, defaults to ``False``."""  # noqa: E501
        return False
    @property
    def supports_pdf_inputs(self) -> bool:
        """(bool) whether the chat model supports PDF inputs, defaults to ``False``."""
        return False
    @property
    def supports_audio_inputs(self) -> bool:
        """(bool) whether the chat model supports audio inputs, defaults to ``False``."""  # noqa: E501
        return False
    @property
    def supports_video_inputs(self) -> bool:
        """(bool) whether the chat model supports video inputs, defaults to ``False``.
        No current tests are written for this feature.
        """
        return False
    # Content Block Support Properties
    @property
    def supports_content_blocks_v1(self) -> bool:
@ -198,14 +218,10 @@ class ChatModelV1Tests(BaseStandardTests):
        support. Each defaults to False:
        - ``supports_reasoning_content_blocks``
        - ``supports_plaintext_content_blocks``
        - ``supports_file_content_blocks``
        - ``supports_image_content_blocks``
        - ``supports_audio_content_blocks``
        - ``supports_video_content_blocks``
        - ``supports_citations``
        - ``supports_web_search_blocks``
        - ``supports_invalid_tool_calls``
        """
        return True
@ -238,48 +254,6 @@ class ChatModelV1Tests(BaseStandardTests):
        """
        return False
    @property
    def supports_plaintext_content_blocks(self) -> bool:
        """Whether the model supports ``PlainTextContentBlock``.
        Defaults to False.
        """
        return False
    @property
    def supports_file_content_blocks(self) -> bool:
        """Whether the model supports ``FileContentBlock``.
        Replaces ``supports_pdf_inputs`` from v0.
        Defaults to False.
        """
        return False
    @property
    def supports_image_content_blocks(self) -> bool:
        """Whether the model supports ``ImageContentBlock``.
        Replaces ``supports_image_inputs`` from v0.
        Defaults to False.
        """
        return False
    @property
    def supports_audio_content_blocks(self) -> bool:
        """Whether the model supports ``AudioContentBlock``.
        Replaces ``supports_audio_inputs`` from v0.
        Defaults to False.
        """
        return False
    @property
    def supports_video_content_blocks(self) -> bool:
        """Whether the model supports ``VideoContentBlock``.
@ -294,10 +268,7 @@ class ChatModelV1Tests(BaseStandardTests):
    @property
    def supports_multimodal_reasoning(self) -> bool:
        """Whether the model can reason about multimodal content."""
-        return (
+        return self.supports_image_inputs and self.supports_reasoning_content_blocks
            self.supports_image_content_blocks
            and self.supports_reasoning_content_blocks
        )
    @property
    def supports_citations(self) -> bool:
@ -308,11 +279,6 @@ class ChatModelV1Tests(BaseStandardTests):
        """
        return False
    @property
    def supports_structured_citations(self) -> bool:
        """Whether the model supports structured citation generation."""
        return self.supports_citations
    @property
    def supports_web_search_blocks(self) -> bool:
        """Whether the model supports ``WebSearchCall``/``WebSearchResult`` blocks.
@ -331,15 +297,6 @@ class ChatModelV1Tests(BaseStandardTests):
        """
        return False
    @property
    def supports_invalid_tool_calls(self) -> bool:
        """Whether the model can handle ``InvalidToolCall`` blocks.
        Defaults to False.
        """
        return False
    @property
    def returns_usage_metadata(self) -> bool:
        """Whether the model returns usage metadata on invoke and streaming.
@ -391,7 +348,7 @@ class ChatModelV1Tests(BaseStandardTests):
        return {"invoke": [], "stream": []}
-class ChatModelV1UnitTests(ChatModelV1Tests):
+class ChatModelUnitTests(ChatModelTests):
    """Base class for chat model v1 unit tests.
    These tests run in isolation without external dependencies.
@ -406,11 +363,11 @@ class ChatModelV1UnitTests(ChatModelV1Tests):
        from typing import Type
-        from langchain_tests.unit_tests import ChatModelV1UnitTests
+        from langchain_tests.v1.unit_tests import ChatModelUnitTests
-        from my_package.chat_models import MyChatModel
+        from my_package.v1.chat_models import MyChatModel
-        class TestMyChatModelUnit(ChatModelV1UnitTests):
+        class TestMyChatModelUnit(ChatModelUnitTests):
            @property
            def chat_model_class(self) -> Type[MyChatModel]:
                # Return the chat model class to test here
--- a/libs/standard-tests/tests/unit_tests/test_custom_chat_model_v1.py
+++ b/libs/standard-tests/tests/unit_tests/test_custom_chat_model_v1.py
@ -2,12 +2,12 @@
 import pytest
-from langchain_tests.unit_tests.chat_models_v1 import ChatModelV1UnitTests
+from langchain_tests.v1.unit_tests.chat_models import ChatModelUnitTests
 from .custom_chat_model_v1 import ChatParrotLinkV1
-class TestChatParrotLinkV1Unit(ChatModelV1UnitTests):
+class TestChatParrotLinkV1Unit(ChatModelUnitTests):
    """Unit tests for ``ChatParrotLinkV1`` using the standard v1 test suite."""
    @property
@ -66,16 +66,6 @@ class TestChatParrotLinkV1Unit(ChatModelV1UnitTests):
        """``ChatParrotLinkV1`` does not generate ``ReasoningContentBlock``."""
        return False
    @property
    def supports_plaintext_content_blocks(self) -> bool:
        """``ChatParrotLinkV1`` does not support ``PlainTextContentBlock``."""
        return False
    @property
    def supports_file_content_blocks(self) -> bool:
        """``ChatParrotLinkV1`` does not support ``FileContentBlock``."""
        return False
    @property
    def supports_image_content_blocks(self) -> bool:
        """``ChatParrotLinkV1`` does not support ``ImageContentBlock``."""
@ -100,18 +90,3 @@ class TestChatParrotLinkV1Unit(ChatModelV1UnitTests):
    def supports_web_search_blocks(self) -> bool:
        """``ChatParrotLinkV1`` does not support web search blocks."""
        return False
    @property
    def supports_tool_calls(self) -> bool:
        """``ChatParrotLinkV1`` does not support tool calls."""
        return False
    @property
    def supports_invalid_tool_calls(self) -> bool:
        """``ChatParrotLinkV1`` does not support ``InvalidToolCall`` handling."""
        return False
    @property
    def supports_tool_call_chunks(self) -> bool:
        """``ChatParrotLinkV1`` does not support ``ToolCallChunk`` blocks."""
        return False