integration tests

2025-08-14 23:26:34 +00:00 · 2025-08-01 12:47:08 -04:00 · 2025-08-01 12:47:08 -04:00 · eae4d1db43
commit eae4d1db43
parent 6820723177
2 changed files with 477 additions and 2 deletions
--- a/libs/core/langchain_core/messages/content_blocks.py
+++ b/libs/core/langchain_core/messages/content_blocks.py
@ -1241,8 +1241,7 @@ def create_file_block(
 def create_plaintext_block(
-    text: str,
+    text: Optional[str] = None,
    *,
    url: Optional[str] = None,
    base64: Optional[str] = None,
    file_id: Optional[str] = None,
--- a/libs/standard-tests/langchain_tests/integration_tests/chat_models_v1.py
+++ b/libs/standard-tests/langchain_tests/integration_tests/chat_models_v1.py
@ -0,0 +1,476 @@
 """Integration tests for v1 chat models.
 This module provides comprehensive integration tests for the new messages and standard
 content block system introduced in ``langchain_core.messages.content_blocks``.
 """
 from typing import Any, Union, cast
 import langchain_core.messages.content_blocks as types
 import pytest
 from langchain_core.callbacks import BaseCallbackHandler
 from langchain_core.language_models.v1.chat_models import BaseChatModelV1
 from langchain_core.messages.base import BaseMessage
 from langchain_core.messages.content_blocks import (
    AudioContentBlock,
    Citation,
    CodeInterpreterCall,
    CodeInterpreterOutput,
    CodeInterpreterResult,
    FileContentBlock,
    ImageContentBlock,
    InvalidToolCall,
    NonStandardContentBlock,
    PlainTextContentBlock,
    ReasoningContentBlock,
    TextContentBlock,
    ToolCall,
    ToolCallChunk,
    VideoContentBlock,
    WebSearchCall,
    WebSearchResult,
    create_audio_block,
    create_image_block,
    create_plaintext_block,
    create_text_block,
    create_video_block,
 )
 from langchain_core.messages.v1 import AIMessage, AIMessageChunk, HumanMessage
 from langchain_core.tools import tool
 from langchain_tests.unit_tests.chat_models_v1 import ChatModelV1Tests
 # Content block type definitions for testing
 ContentBlock = Union[
    TextContentBlock,
    ImageContentBlock,
    VideoContentBlock,
    AudioContentBlock,
    PlainTextContentBlock,
    FileContentBlock,
    ReasoningContentBlock,
    NonStandardContentBlock,
    ToolCall,
    InvalidToolCall,
    ToolCallChunk,
    WebSearchCall,
    WebSearchResult,
    Citation,
    CodeInterpreterCall,
    CodeInterpreterOutput,
    CodeInterpreterResult,
 ]
 def _get_test_image_base64() -> str:
    """Get a small test image as base64 for testing."""
    # 1x1 pixel transparent PNG
    return "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg=="  # noqa: E501
 def _get_test_audio_base64() -> str:
    """Get a small test audio file as base64 for testing."""
    # Minimal WAV file (1 second of silence)
    return (
        "UklGRjIAAABXQVZFZm10IBAAAAABAAEAQB8AAEAfAAABAAgAZGF0YQ4AAAAAAAAAAAAAAAAAAA=="
    )
 def _get_test_video_base64() -> str:
    """Get a small test video file as base64 for testing."""
    # Minimal valid video file would be much larger; for testing we use a placeholder
    return "PLACEHOLDER_VIDEO_DATA"
 def _validate_tool_call_message(message: AIMessage) -> None:
    """Validate that a message contains tool calls in content blocks format."""
    if isinstance(message.content, list):
        # Check for tool calls in content blocks
        tool_call_blocks = [
            block
            for block in message.content
            if isinstance(block, dict) and block.get("type") == "tool_call"
        ]
        assert len(tool_call_blocks) >= 1
        tool_call = tool_call_blocks[0]
        assert "name" in tool_call
        assert "args" in tool_call
        assert "id" in tool_call
    # TODO: review if this is necessary
    # else:
    #     # Fallback to legacy tool_calls attribute
    #     assert hasattr(message, "tool_calls")
    #     assert len(message.tool_calls) >= 1
 def _validate_multimodal_content_blocks(
    message: BaseMessage, expected_types: list[str]
 ) -> None:
    """Validate that a message contains expected content block types."""
    assert isinstance(message, (HumanMessage, AIMessage))
    assert isinstance(message.content, list)
    found_types = []
    for block in message.content:
        if isinstance(block, dict) and "type" in block:
            found_types.append(block["type"])
    for type_ in expected_types:
        assert type_ in found_types, f"Expected content block type '{type_}' not found"
 class ChatModelV1IntegrationTests(ChatModelV1Tests):
    """Integration tests for v1 chat models with standard content blocks support.
    Inherits from ``ChatModelV1Tests`` to provide comprehensive testing of content
    block functionality with real external services.
    """
    # Additional multimodal support properties for integration testing
    @property
    def supports_multimodal_reasoning(self) -> bool:
        """Whether the model can reason about multimodal content."""
        return (
            self.supports_image_content_blocks
            and self.supports_reasoning_content_blocks
        )
    @property
    def supports_code_interpreter(self) -> bool:
        """Whether the model supports code interpreter blocks."""
        return False
    @property
    def supports_structured_citations(self) -> bool:
        """Whether the model supports structured citation generation."""
        return self.supports_citations
    @property
    def requires_api_key(self) -> bool:
        """Whether integration tests require an API key."""
        return True
    # Multimodal testing
    def test_image_content_blocks_with_analysis(self, model: BaseChatModelV1) -> None:
        """Test image analysis using ``ImageContentBlock``s."""
        if not self.supports_image_content_blocks:
            pytest.skip("Model does not support image inputs.")
        image_block = create_image_block(
            base64=_get_test_image_base64(),
            mime_type="image/png",
        )
        text_block = create_text_block("Analyze this image in detail.")
        result = model.invoke([HumanMessage([text_block, image_block])])
        assert isinstance(result, AIMessage)
        text_blocks = [
            block
            for block in result.content
            if isinstance(block, dict) and block.get("type") == "text"
        ]
        assert len(text_blocks) > 0
        if result.text:
            assert len(result.text) > 10  # Substantial response
    def test_video_content_blocks(self, model: BaseChatModelV1) -> None:
        """Test video content block processing."""
        if not self.supports_video_content_blocks:
            pytest.skip("Model does not support video inputs.")
        video_block = create_video_block(
            base64=_get_test_video_base64(),
            mime_type="video/mp4",
        )
        text_block = create_text_block("Describe what you see in this video.")
        result = model.invoke([HumanMessage([text_block, video_block])])
        assert isinstance(result, AIMessage)
        if result.text:
            assert len(result.text) > 10  # Substantial response
    def test_audio_content_blocks_processing(self, model: BaseChatModelV1) -> None:
        """Test audio content block processing with transcription."""
        if not self.supports_audio_content_blocks:
            pytest.skip("Model does not support audio inputs.")
        audio_block = create_audio_block(
            base64=_get_test_audio_base64(),
            mime_type="audio/wav",
        )
        text_block = create_text_block("Transcribe this audio file.")
        result = model.invoke([HumanMessage([text_block, audio_block])])
        assert isinstance(result, AIMessage)
        if result.text:
            assert len(result.text) > 10  # Substantial response
    def test_complex_multimodal_reasoning(self, model: BaseChatModelV1) -> None:
        """Test complex reasoning with multiple content types."""
        # TODO: come back to this, seems like a unique scenario
        if not self.supports_multimodal_reasoning:
            pytest.skip("Model does not support multimodal reasoning.")
        content_blocks: list[ContentBlock] = [
            create_text_block(
                "Compare these media files and provide reasoning analysis:"
            ),
            create_image_block(
                base64=_get_test_image_base64(),
                mime_type="image/png",
            ),
        ]
        if self.supports_audio_content_blocks:
            content_blocks.append(
                create_audio_block(
                    base64=_get_test_audio_base64(),
                    mime_type="audio/wav",
                )
            )
        message = HumanMessage(content=cast("list[types.ContentBlock]", content_blocks))
        result = model.invoke([message])
        assert isinstance(result, AIMessage)
        # Check for reasoning blocks in response
        if self.supports_reasoning_content_blocks:
            reasoning_blocks = [
                block
                for block in result.content
                if isinstance(block, dict) and block.get("type") == "reasoning"
            ]
            assert len(reasoning_blocks) > 0
    def test_citation_generation_with_sources(self, model: BaseChatModelV1) -> None:
        """Test that the model can generate ``Citations`` with source links."""
        if not self.supports_structured_citations:
            pytest.skip("Model does not support structured citations.")
        message = HumanMessage(
            "Provide factual information about the distance to the moon with proper "
            "citations to scientific sources."
        )
        result = model.invoke([message])
        assert isinstance(result, AIMessage)
        # Check for text blocks with citations
        text_blocks_with_citations = []
        for block in result.content:
            if (
                isinstance(block, dict)
                and block.get("type") == "text"
                and "annotations" in block
            ):
                annotations = cast("list[dict[str, Any]]", block.get("annotations", []))
                citations = [
                    ann
                    for ann in annotations
                    if isinstance(ann, dict) and ann.get("type") == "citation"
                ]
                if citations:
                    text_blocks_with_citations.append(block)
        assert len(text_blocks_with_citations) > 0
        # Validate citation structure
        for block in text_blocks_with_citations:
            annotations = cast("list[dict[str, Any]]", block.get("annotations", []))
            for annotation in annotations:
                if annotation.get("type") == "citation":
                    # TODO: evaluate these since none are *technically* required
                    # This may be a test that needs adjustment on per-integration basis
                    assert "cited_text" in annotation
                    assert "start_index" in annotation
                    assert "end_index" in annotation
    def test_web_search_integration(self, model: BaseChatModelV1) -> None:
        """Test web search content blocks integration."""
        if not self.supports_web_search_blocks:
            pytest.skip("Model does not support web search blocks.")
        message = HumanMessage(
            "Search for the latest developments in quantum computing."
        )
        result = model.invoke([message])
        assert isinstance(result, AIMessage)
        # Check for web search blocks
        search_call_blocks = [
            block
            for block in result.content
            if isinstance(block, dict) and block.get("type") == "web_search_call"
        ]
        search_result_blocks = [
            block
            for block in result.content
            if isinstance(block, dict) and block.get("type") == "web_search_result"
        ]
        # TODO: should this be one or the other or both?
        assert len(search_call_blocks) > 0 or len(search_result_blocks) > 0
    def test_code_interpreter_blocks(self, model: BaseChatModelV1) -> None:
        """Test code interpreter content blocks."""
        if not self.supports_code_interpreter:
            pytest.skip("Model does not support code interpreter blocks.")
        message = HumanMessage("Calculate the factorial of 10 using Python code.")
        result = model.invoke([message])
        assert isinstance(result, AIMessage)
        # Check for code interpreter blocks
        code_blocks = [
            block
            for block in result.content
            if isinstance(block, dict)
            and block.get("type")
            in [
                "code_interpreter_call",
                "code_interpreter_output",
                "code_interpreter_result",
            ]
        ]
        # TODO: should we require all three types or just an output/result?
        assert len(code_blocks) > 0
    def test_tool_calling_with_content_blocks(self, model: BaseChatModelV1) -> None:
        """Test tool calling with content blocks."""
        if not self.supports_enhanced_tool_calls:
            pytest.skip("Model does not support tool calls.")
        @tool
        def calculate_area(length: float, width: float) -> str:
            """Calculate the area of a rectangle."""
            area = length * width
            return f"The area is {area} square units."
        model_with_tools = model.bind_tools([calculate_area])
        message = HumanMessage(
            "Calculate the area of a rectangle with length 5 and width 3."
        )
        result = model_with_tools.invoke([message])
        _validate_tool_call_message(result)
    def test_plaintext_content_blocks_from_documents(
        self, model: BaseChatModelV1
    ) -> None:
        """Test PlainTextContentBlock for document plaintext content."""
        if not self.supports_plaintext_content_blocks:
            pytest.skip("Model does not support PlainTextContentBlock.")
        # Test with PlainTextContentBlock (plaintext from document)
        plaintext_block = create_plaintext_block(
            text="This is plaintext content extracted from a document.",
            file_id="doc_123",
        )
        message = HumanMessage(
            content=cast("list[types.ContentBlock]", [plaintext_block])
        )
        result = model.invoke([message])
        assert isinstance(result, AIMessage)
        # TODO expand
    def test_content_block_streaming_integration(self, model: BaseChatModelV1) -> None:
        """Test streaming with content blocks."""
        if not self.supports_content_blocks_v1:
            pytest.skip("Model does not support content blocks v1.")
        message = HumanMessage(
            content=[
                {
                    "type": "text",
                    "text": "Write a detailed explanation of machine learning.",
                }
            ]
        )
        chunks = []
        for chunk in model.stream([message]):
            chunks.append(chunk)
            assert isinstance(chunk, (AIMessage, AIMessageChunk))
        assert len(chunks) > 1  # Should receive multiple chunks
        # Aggregate chunks
        final_message = chunks[0]
        for chunk in chunks[1:]:
            final_message = final_message + chunk
        assert isinstance(final_message.content, list)
    def test_error_handling_with_invalid_content_blocks(
        self, model: BaseChatModelV1
    ) -> None:
        """Test error handling with various invalid content block configurations."""
        if not self.supports_content_blocks_v1:
            pytest.skip("Model does not support content blocks v1.")
        test_cases = [
            {"type": "text"},  # Missing text field
            {"type": "image"},  # Missing url/mime_type
            {"type": "tool_call", "name": "test"},  # Missing args/id
        ]
        for invalid_block in test_cases:
            message = HumanMessage([invalid_block])  # type: ignore[list-item]
            # Should either handle gracefully or raise appropriate error
            try:
                result = model.invoke([message])
                assert isinstance(result, AIMessage)
            except (ValueError, TypeError, KeyError) as e:
                # Acceptable to raise validation errors
                assert len(str(e)) > 0
    async def test_async_content_blocks_processing(
        self, model: BaseChatModelV1
    ) -> None:
        """Test asynchronous processing of content blocks."""
        if not self.supports_content_blocks_v1:
            pytest.skip("Model does not support content blocks v1.")
        message = HumanMessage("Generate a creative story about space exploration.")
        result = await model.ainvoke([message])
        assert isinstance(result, AIMessage)
    def test_content_blocks_with_callbacks(self, model: BaseChatModelV1) -> None:
        """Test that content blocks work correctly with callback handlers."""
        if not self.supports_content_blocks_v1:
            pytest.skip("Model does not support content blocks v1.")
        class ContentBlockCallbackHandler(BaseCallbackHandler):
            def __init__(self) -> None:
                self.messages_seen: list[BaseMessage] = []
            def on_chat_model_start(
                self,
                serialized: Any,  # noqa: ARG002
                messages: Any,
                **kwargs: Any,  # noqa: ARG002
            ) -> None:
                self.messages_seen.extend(messages)
        callback_handler = ContentBlockCallbackHandler()
        message = HumanMessage("Test message for callback handling.")
        result = model.invoke([message], config={"callbacks": [callback_handler]})
        assert isinstance(result, AIMessage)
        assert len(callback_handler.messages_seen) > 0
        assert any(
            hasattr(msg, "content") and isinstance(msg.content, list)
            for msg in callback_handler.messages_seen
        )