integration tests

This commit is contained in:
Mason Daugherty 2025-08-01 12:47:08 -04:00
parent 6820723177
commit eae4d1db43
No known key found for this signature in database
2 changed files with 477 additions and 2 deletions

View File

@ -1241,8 +1241,7 @@ def create_file_block(
def create_plaintext_block( def create_plaintext_block(
text: str, text: Optional[str] = None,
*,
url: Optional[str] = None, url: Optional[str] = None,
base64: Optional[str] = None, base64: Optional[str] = None,
file_id: Optional[str] = None, file_id: Optional[str] = None,

View File

@ -0,0 +1,476 @@
"""Integration tests for v1 chat models.
This module provides comprehensive integration tests for the new messages and standard
content block system introduced in ``langchain_core.messages.content_blocks``.
"""
from typing import Any, Union, cast
import langchain_core.messages.content_blocks as types
import pytest
from langchain_core.callbacks import BaseCallbackHandler
from langchain_core.language_models.v1.chat_models import BaseChatModelV1
from langchain_core.messages.base import BaseMessage
from langchain_core.messages.content_blocks import (
AudioContentBlock,
Citation,
CodeInterpreterCall,
CodeInterpreterOutput,
CodeInterpreterResult,
FileContentBlock,
ImageContentBlock,
InvalidToolCall,
NonStandardContentBlock,
PlainTextContentBlock,
ReasoningContentBlock,
TextContentBlock,
ToolCall,
ToolCallChunk,
VideoContentBlock,
WebSearchCall,
WebSearchResult,
create_audio_block,
create_image_block,
create_plaintext_block,
create_text_block,
create_video_block,
)
from langchain_core.messages.v1 import AIMessage, AIMessageChunk, HumanMessage
from langchain_core.tools import tool
from langchain_tests.unit_tests.chat_models_v1 import ChatModelV1Tests
# Content block type definitions for testing
ContentBlock = Union[
TextContentBlock,
ImageContentBlock,
VideoContentBlock,
AudioContentBlock,
PlainTextContentBlock,
FileContentBlock,
ReasoningContentBlock,
NonStandardContentBlock,
ToolCall,
InvalidToolCall,
ToolCallChunk,
WebSearchCall,
WebSearchResult,
Citation,
CodeInterpreterCall,
CodeInterpreterOutput,
CodeInterpreterResult,
]
def _get_test_image_base64() -> str:
"""Get a small test image as base64 for testing."""
# 1x1 pixel transparent PNG
return "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg==" # noqa: E501
def _get_test_audio_base64() -> str:
"""Get a small test audio file as base64 for testing."""
# Minimal WAV file (1 second of silence)
return (
"UklGRjIAAABXQVZFZm10IBAAAAABAAEAQB8AAEAfAAABAAgAZGF0YQ4AAAAAAAAAAAAAAAAAAA=="
)
def _get_test_video_base64() -> str:
"""Get a small test video file as base64 for testing."""
# Minimal valid video file would be much larger; for testing we use a placeholder
return "PLACEHOLDER_VIDEO_DATA"
def _validate_tool_call_message(message: AIMessage) -> None:
"""Validate that a message contains tool calls in content blocks format."""
if isinstance(message.content, list):
# Check for tool calls in content blocks
tool_call_blocks = [
block
for block in message.content
if isinstance(block, dict) and block.get("type") == "tool_call"
]
assert len(tool_call_blocks) >= 1
tool_call = tool_call_blocks[0]
assert "name" in tool_call
assert "args" in tool_call
assert "id" in tool_call
# TODO: review if this is necessary
# else:
# # Fallback to legacy tool_calls attribute
# assert hasattr(message, "tool_calls")
# assert len(message.tool_calls) >= 1
def _validate_multimodal_content_blocks(
message: BaseMessage, expected_types: list[str]
) -> None:
"""Validate that a message contains expected content block types."""
assert isinstance(message, (HumanMessage, AIMessage))
assert isinstance(message.content, list)
found_types = []
for block in message.content:
if isinstance(block, dict) and "type" in block:
found_types.append(block["type"])
for type_ in expected_types:
assert type_ in found_types, f"Expected content block type '{type_}' not found"
class ChatModelV1IntegrationTests(ChatModelV1Tests):
"""Integration tests for v1 chat models with standard content blocks support.
Inherits from ``ChatModelV1Tests`` to provide comprehensive testing of content
block functionality with real external services.
"""
# Additional multimodal support properties for integration testing
@property
def supports_multimodal_reasoning(self) -> bool:
"""Whether the model can reason about multimodal content."""
return (
self.supports_image_content_blocks
and self.supports_reasoning_content_blocks
)
@property
def supports_code_interpreter(self) -> bool:
"""Whether the model supports code interpreter blocks."""
return False
@property
def supports_structured_citations(self) -> bool:
"""Whether the model supports structured citation generation."""
return self.supports_citations
@property
def requires_api_key(self) -> bool:
"""Whether integration tests require an API key."""
return True
# Multimodal testing
def test_image_content_blocks_with_analysis(self, model: BaseChatModelV1) -> None:
"""Test image analysis using ``ImageContentBlock``s."""
if not self.supports_image_content_blocks:
pytest.skip("Model does not support image inputs.")
image_block = create_image_block(
base64=_get_test_image_base64(),
mime_type="image/png",
)
text_block = create_text_block("Analyze this image in detail.")
result = model.invoke([HumanMessage([text_block, image_block])])
assert isinstance(result, AIMessage)
text_blocks = [
block
for block in result.content
if isinstance(block, dict) and block.get("type") == "text"
]
assert len(text_blocks) > 0
if result.text:
assert len(result.text) > 10 # Substantial response
def test_video_content_blocks(self, model: BaseChatModelV1) -> None:
"""Test video content block processing."""
if not self.supports_video_content_blocks:
pytest.skip("Model does not support video inputs.")
video_block = create_video_block(
base64=_get_test_video_base64(),
mime_type="video/mp4",
)
text_block = create_text_block("Describe what you see in this video.")
result = model.invoke([HumanMessage([text_block, video_block])])
assert isinstance(result, AIMessage)
if result.text:
assert len(result.text) > 10 # Substantial response
def test_audio_content_blocks_processing(self, model: BaseChatModelV1) -> None:
"""Test audio content block processing with transcription."""
if not self.supports_audio_content_blocks:
pytest.skip("Model does not support audio inputs.")
audio_block = create_audio_block(
base64=_get_test_audio_base64(),
mime_type="audio/wav",
)
text_block = create_text_block("Transcribe this audio file.")
result = model.invoke([HumanMessage([text_block, audio_block])])
assert isinstance(result, AIMessage)
if result.text:
assert len(result.text) > 10 # Substantial response
def test_complex_multimodal_reasoning(self, model: BaseChatModelV1) -> None:
"""Test complex reasoning with multiple content types."""
# TODO: come back to this, seems like a unique scenario
if not self.supports_multimodal_reasoning:
pytest.skip("Model does not support multimodal reasoning.")
content_blocks: list[ContentBlock] = [
create_text_block(
"Compare these media files and provide reasoning analysis:"
),
create_image_block(
base64=_get_test_image_base64(),
mime_type="image/png",
),
]
if self.supports_audio_content_blocks:
content_blocks.append(
create_audio_block(
base64=_get_test_audio_base64(),
mime_type="audio/wav",
)
)
message = HumanMessage(content=cast("list[types.ContentBlock]", content_blocks))
result = model.invoke([message])
assert isinstance(result, AIMessage)
# Check for reasoning blocks in response
if self.supports_reasoning_content_blocks:
reasoning_blocks = [
block
for block in result.content
if isinstance(block, dict) and block.get("type") == "reasoning"
]
assert len(reasoning_blocks) > 0
def test_citation_generation_with_sources(self, model: BaseChatModelV1) -> None:
"""Test that the model can generate ``Citations`` with source links."""
if not self.supports_structured_citations:
pytest.skip("Model does not support structured citations.")
message = HumanMessage(
"Provide factual information about the distance to the moon with proper "
"citations to scientific sources."
)
result = model.invoke([message])
assert isinstance(result, AIMessage)
# Check for text blocks with citations
text_blocks_with_citations = []
for block in result.content:
if (
isinstance(block, dict)
and block.get("type") == "text"
and "annotations" in block
):
annotations = cast("list[dict[str, Any]]", block.get("annotations", []))
citations = [
ann
for ann in annotations
if isinstance(ann, dict) and ann.get("type") == "citation"
]
if citations:
text_blocks_with_citations.append(block)
assert len(text_blocks_with_citations) > 0
# Validate citation structure
for block in text_blocks_with_citations:
annotations = cast("list[dict[str, Any]]", block.get("annotations", []))
for annotation in annotations:
if annotation.get("type") == "citation":
# TODO: evaluate these since none are *technically* required
# This may be a test that needs adjustment on per-integration basis
assert "cited_text" in annotation
assert "start_index" in annotation
assert "end_index" in annotation
def test_web_search_integration(self, model: BaseChatModelV1) -> None:
"""Test web search content blocks integration."""
if not self.supports_web_search_blocks:
pytest.skip("Model does not support web search blocks.")
message = HumanMessage(
"Search for the latest developments in quantum computing."
)
result = model.invoke([message])
assert isinstance(result, AIMessage)
# Check for web search blocks
search_call_blocks = [
block
for block in result.content
if isinstance(block, dict) and block.get("type") == "web_search_call"
]
search_result_blocks = [
block
for block in result.content
if isinstance(block, dict) and block.get("type") == "web_search_result"
]
# TODO: should this be one or the other or both?
assert len(search_call_blocks) > 0 or len(search_result_blocks) > 0
def test_code_interpreter_blocks(self, model: BaseChatModelV1) -> None:
"""Test code interpreter content blocks."""
if not self.supports_code_interpreter:
pytest.skip("Model does not support code interpreter blocks.")
message = HumanMessage("Calculate the factorial of 10 using Python code.")
result = model.invoke([message])
assert isinstance(result, AIMessage)
# Check for code interpreter blocks
code_blocks = [
block
for block in result.content
if isinstance(block, dict)
and block.get("type")
in [
"code_interpreter_call",
"code_interpreter_output",
"code_interpreter_result",
]
]
# TODO: should we require all three types or just an output/result?
assert len(code_blocks) > 0
def test_tool_calling_with_content_blocks(self, model: BaseChatModelV1) -> None:
"""Test tool calling with content blocks."""
if not self.supports_enhanced_tool_calls:
pytest.skip("Model does not support tool calls.")
@tool
def calculate_area(length: float, width: float) -> str:
"""Calculate the area of a rectangle."""
area = length * width
return f"The area is {area} square units."
model_with_tools = model.bind_tools([calculate_area])
message = HumanMessage(
"Calculate the area of a rectangle with length 5 and width 3."
)
result = model_with_tools.invoke([message])
_validate_tool_call_message(result)
def test_plaintext_content_blocks_from_documents(
self, model: BaseChatModelV1
) -> None:
"""Test PlainTextContentBlock for document plaintext content."""
if not self.supports_plaintext_content_blocks:
pytest.skip("Model does not support PlainTextContentBlock.")
# Test with PlainTextContentBlock (plaintext from document)
plaintext_block = create_plaintext_block(
text="This is plaintext content extracted from a document.",
file_id="doc_123",
)
message = HumanMessage(
content=cast("list[types.ContentBlock]", [plaintext_block])
)
result = model.invoke([message])
assert isinstance(result, AIMessage)
# TODO expand
def test_content_block_streaming_integration(self, model: BaseChatModelV1) -> None:
"""Test streaming with content blocks."""
if not self.supports_content_blocks_v1:
pytest.skip("Model does not support content blocks v1.")
message = HumanMessage(
content=[
{
"type": "text",
"text": "Write a detailed explanation of machine learning.",
}
]
)
chunks = []
for chunk in model.stream([message]):
chunks.append(chunk)
assert isinstance(chunk, (AIMessage, AIMessageChunk))
assert len(chunks) > 1 # Should receive multiple chunks
# Aggregate chunks
final_message = chunks[0]
for chunk in chunks[1:]:
final_message = final_message + chunk
assert isinstance(final_message.content, list)
def test_error_handling_with_invalid_content_blocks(
self, model: BaseChatModelV1
) -> None:
"""Test error handling with various invalid content block configurations."""
if not self.supports_content_blocks_v1:
pytest.skip("Model does not support content blocks v1.")
test_cases = [
{"type": "text"}, # Missing text field
{"type": "image"}, # Missing url/mime_type
{"type": "tool_call", "name": "test"}, # Missing args/id
]
for invalid_block in test_cases:
message = HumanMessage([invalid_block]) # type: ignore[list-item]
# Should either handle gracefully or raise appropriate error
try:
result = model.invoke([message])
assert isinstance(result, AIMessage)
except (ValueError, TypeError, KeyError) as e:
# Acceptable to raise validation errors
assert len(str(e)) > 0
async def test_async_content_blocks_processing(
self, model: BaseChatModelV1
) -> None:
"""Test asynchronous processing of content blocks."""
if not self.supports_content_blocks_v1:
pytest.skip("Model does not support content blocks v1.")
message = HumanMessage("Generate a creative story about space exploration.")
result = await model.ainvoke([message])
assert isinstance(result, AIMessage)
def test_content_blocks_with_callbacks(self, model: BaseChatModelV1) -> None:
"""Test that content blocks work correctly with callback handlers."""
if not self.supports_content_blocks_v1:
pytest.skip("Model does not support content blocks v1.")
class ContentBlockCallbackHandler(BaseCallbackHandler):
def __init__(self) -> None:
self.messages_seen: list[BaseMessage] = []
def on_chat_model_start(
self,
serialized: Any, # noqa: ARG002
messages: Any,
**kwargs: Any, # noqa: ARG002
) -> None:
self.messages_seen.extend(messages)
callback_handler = ContentBlockCallbackHandler()
message = HumanMessage("Test message for callback handling.")
result = model.invoke([message], config={"callbacks": [callback_handler]})
assert isinstance(result, AIMessage)
assert len(callback_handler.messages_seen) > 0
assert any(
hasattr(msg, "content") and isinstance(msg.content, list)
for msg in callback_handler.messages_seen
)