diff --git a/libs/core/langchain_core/messages/content_blocks.py b/libs/core/langchain_core/messages/content_blocks.py index 727e2cd1085..d426339156d 100644 --- a/libs/core/langchain_core/messages/content_blocks.py +++ b/libs/core/langchain_core/messages/content_blocks.py @@ -979,8 +979,11 @@ def convert_to_openai_data_block(block: dict) -> dict: file = {"file_data": f"data:{block['mime_type']};base64,{base64_data}"} if filename := block.get("filename"): file["filename"] = filename - elif (metadata := block.get("metadata")) and ("filename" in metadata): - file["filename"] = metadata["filename"] + elif (extras := block.get("extras")) and ("filename" in extras): + file["filename"] = extras["filename"] + elif (extras := block.get("metadata")) and ("filename" in extras): + # Backward compat + file["filename"] = extras["filename"] else: warnings.warn( "OpenAI may require a filename for file inputs. Specify a filename " diff --git a/libs/core/langchain_core/v1/chat_models.py b/libs/core/langchain_core/v1/chat_models.py index d51607c8efa..77331e70423 100644 --- a/libs/core/langchain_core/v1/chat_models.py +++ b/libs/core/langchain_core/v1/chat_models.py @@ -44,6 +44,7 @@ from langchain_core.language_models.base import ( ) from langchain_core.load import dumpd from langchain_core.messages import ( + convert_to_openai_data_block, convert_to_openai_image_block, get_buffer_string, is_data_content_block, @@ -132,6 +133,30 @@ def _format_for_tracing(messages: Sequence[MessageV1]) -> list[MessageV1]: # TODO: for tracing purposes we store non-standard types (OpenAI format) # in message content. Consider typing these block formats. message_to_trace.content[idx] = convert_to_openai_image_block(block) # type: ignore[arg-type, call-overload] + elif ( + block.get("type") == "file" + and is_data_content_block(block) # type: ignore[arg-type] # permit unnecessary runtime check + and "base64" in block + ): + if message_to_trace is message: + # Shallow copy + message_to_trace = copy.copy(message) + message_to_trace.content = list(message_to_trace.content) + + message_to_trace.content[idx] = convert_to_openai_data_block(block) # type: ignore[arg-type, call-overload] + elif len(block) == 1 and "type" not in block: + # Tracing assumes all content blocks have a "type" key. Here + # we add this key if it is missing, and there's an obvious + # choice for the type (e.g., a single key in the block). + if message_to_trace is message: + # Shallow copy + message_to_trace = copy.copy(message) + message_to_trace.content = list(message_to_trace.content) + key = next(iter(block)) + message_to_trace.content[idx] = { # type: ignore[call-overload] + "type": key, + key: block[key], # type: ignore[literal-required] + } else: pass messages_to_trace.append(message_to_trace) diff --git a/libs/core/tests/unit_tests/language_models/chat_models/test_base.py b/libs/core/tests/unit_tests/language_models/chat_models/test_base.py index 5d1d4f581a2..8d828cf8943 100644 --- a/libs/core/tests/unit_tests/language_models/chat_models/test_base.py +++ b/libs/core/tests/unit_tests/language_models/chat_models/test_base.py @@ -13,10 +13,14 @@ from langchain_core.language_models import ( FakeListChatModel, ParrotFakeChatModel, ) -from langchain_core.language_models._utils import _normalize_messages +from langchain_core.language_models._utils import ( + _normalize_messages, + _normalize_messages_v1, +) from langchain_core.language_models.fake_chat_models import ( FakeListChatModelError, GenericFakeChatModelV1, + ParrotFakeChatModelV1, ) from langchain_core.messages import ( AIMessage, @@ -33,6 +37,7 @@ from langchain_core.tracers.context import collect_runs from langchain_core.tracers.event_stream import _AstreamEventsCallbackHandler from langchain_core.tracers.schemas import Run from langchain_core.v1.messages import AIMessageChunk as AIMessageChunkV1 +from langchain_core.v1.messages import HumanMessage as HumanMessageV1 from tests.unit_tests.fake.callbacks import ( BaseFakeCallbackHandler, FakeAsyncCallbackHandler, @@ -430,9 +435,10 @@ class FakeChatModelStartTracer(FakeTracer): ) -def test_trace_images_in_openai_format() -> None: +@pytest.mark.parametrize("llm_class", [ParrotFakeChatModel, ParrotFakeChatModelV1]) +def test_trace_images_in_openai_format(llm_class: Any) -> None: """Test that images are traced in OpenAI format.""" - llm = ParrotFakeChatModel() + llm = llm_class() messages = [ { "role": "user", @@ -456,7 +462,8 @@ def test_trace_images_in_openai_format() -> None: "type": "image_url", "image_url": {"url": "https://example.com/image.png"}, } - ] + ], + id=tracer.messages[0][0][0].id, ) ] ] @@ -471,9 +478,10 @@ def test_trace_images_in_openai_format() -> None: ] -def test_trace_content_blocks_with_no_type_key() -> None: +@pytest.mark.parametrize("llm_class", [ParrotFakeChatModel, ParrotFakeChatModelV1]) +def test_trace_content_blocks_with_no_type_key(llm_class: Any) -> None: """Test that we add a ``type`` key to certain content blocks that don't have one.""" - llm = ParrotFakeChatModel() + llm = llm_class() messages = [ { "role": "user", @@ -503,7 +511,8 @@ def test_trace_content_blocks_with_no_type_key() -> None: "type": "cachePoint", "cachePoint": {"type": "default"}, }, - ] + ], + id=tracer.messages[0][0][0].id, ) ] ] @@ -520,9 +529,10 @@ def test_trace_content_blocks_with_no_type_key() -> None: ] -def test_extend_support_to_openai_multimodal_formats() -> None: +@pytest.mark.parametrize("llm_class", [ParrotFakeChatModel, ParrotFakeChatModelV1]) +def test_extend_support_to_openai_multimodal_formats(llm_class: Any) -> None: """Test that chat models normalize OpenAI file and audio inputs.""" - llm = ParrotFakeChatModel() + llm = llm_class() messages = [ { "role": "user", @@ -660,6 +670,34 @@ def test_normalize_messages_edge_cases() -> None: assert messages == _normalize_messages(messages) +def test_normalize_messages_edge_cases_v1() -> None: + # Test some blocks that should pass through + messages = [ + HumanMessageV1( + content=[ + { # type: ignore[list-item] + "type": "file", + "file": "uri", + }, + { # type: ignore[list-item] + "type": "input_file", + "file_data": "uri", + "filename": "file-name", + }, + { # type: ignore[list-item] + "type": "input_audio", + "input_audio": "uri", + }, + { # type: ignore[list-item] + "type": "input_image", + "image_url": "uri", + }, + ] + ) + ] + assert messages == _normalize_messages_v1(messages) + + def test_streaming_v1() -> None: chunks = [ AIMessageChunkV1( diff --git a/libs/partners/ollama/tests/integration_tests/v1/chat_models/test_chat_models_standard_v1.py b/libs/partners/ollama/tests/integration_tests/v1/chat_models/test_chat_models_standard_v1.py index fb69084460b..41a5f4b20a7 100644 --- a/libs/partners/ollama/tests/integration_tests/v1/chat_models/test_chat_models_standard_v1.py +++ b/libs/partners/ollama/tests/integration_tests/v1/chat_models/test_chat_models_standard_v1.py @@ -4,11 +4,11 @@ from unittest.mock import MagicMock, patch import pytest from httpx import ConnectError -from langchain_core.messages.content_blocks import ToolCallChunk, is_reasoning_block +from langchain_core.messages.content_blocks import ToolCallChunk from langchain_core.tools import tool from langchain_core.v1.chat_models import BaseChatModel -from langchain_core.v1.messages import AIMessage, AIMessageChunk, HumanMessage -from langchain_tests.integration_tests.chat_models_v1 import ChatModelV1IntegrationTests +from langchain_core.v1.messages import AIMessageChunk, HumanMessage +from langchain_tests.v1.integration_tests.chat_models import ChatModelIntegrationTests from ollama import ResponseError from pydantic import ValidationError @@ -26,7 +26,7 @@ def get_current_weather(location: str) -> dict: return {"temperature": "unknown", "conditions": "unknown"} -class TestChatOllamaV1(ChatModelV1IntegrationTests): +class TestChatOllamaV1(ChatModelIntegrationTests): @property def chat_model_class(self) -> type[ChatOllama]: return ChatOllama @@ -195,39 +195,39 @@ class TestChatOllamaV1(ChatModelV1IntegrationTests): # "reasoning." # ) - @pytest.mark.xfail( - reason=( - f"{DEFAULT_MODEL_NAME} does not support reasoning. Override uses " - "reasoning-capable model with `reasoning=True` enabled." - ), - strict=False, - ) - def test_reasoning_content_blocks_basic(self, model: BaseChatModel) -> None: - """Test that the model can generate ``ReasoningContentBlock``. + # @pytest.mark.xfail( + # reason=( + # f"{DEFAULT_MODEL_NAME} does not support reasoning. Override uses " + # "reasoning-capable model with `reasoning=True` enabled." + # ), + # strict=False, + # ) + # def test_reasoning_content_blocks_basic(self, model: BaseChatModel) -> None: + # """Test that the model can generate ``ReasoningContentBlock``. - This test overrides the default model to use a reasoning-capable model - with reasoning mode explicitly enabled. - """ - if not self.supports_reasoning_content_blocks: - pytest.skip("Model does not support ReasoningContentBlock.") + # This test overrides the default model to use a reasoning-capable model + # with reasoning mode explicitly enabled. + # """ + # if not self.supports_reasoning_content_blocks: + # pytest.skip("Model does not support ReasoningContentBlock.") - reasoning_enabled_model = ChatOllama( - model=REASONING_MODEL_NAME, reasoning=True, validate_model_on_init=True - ) + # reasoning_enabled_model = ChatOllama( + # model=REASONING_MODEL_NAME, reasoning=True, validate_model_on_init=True + # ) - message = HumanMessage("Think step by step: What is 2 + 2?") - result = reasoning_enabled_model.invoke([message]) - assert isinstance(result, AIMessage) - if isinstance(result.content, list): - reasoning_blocks = [ - block - for block in result.content - if isinstance(block, dict) and is_reasoning_block(block) - ] - assert len(reasoning_blocks) > 0, ( - "Expected reasoning content blocks but found none. " - f"Content blocks: {[block.get('type') for block in result.content]}" - ) + # message = HumanMessage("Think step by step: What is 2 + 2?") + # result = reasoning_enabled_model.invoke([message]) + # assert isinstance(result, AIMessage) + # if isinstance(result.content, list): + # reasoning_blocks = [ + # block + # for block in result.content + # if isinstance(block, dict) and is_reasoning_block(block) + # ] + # assert len(reasoning_blocks) > 0, ( + # "Expected reasoning content blocks but found none. " + # f"Content blocks: {[block.get('type') for block in result.content]}" + # ) # Additional Ollama reasoning tests in v1/chat_models/test_chat_models_v1.py diff --git a/libs/partners/ollama/tests/unit_tests/v1/test_chat_models.py b/libs/partners/ollama/tests/unit_tests/v1/test_chat_models.py index 0eaed6d3fe4..f2c0b5d55c2 100644 --- a/libs/partners/ollama/tests/unit_tests/v1/test_chat_models.py +++ b/libs/partners/ollama/tests/unit_tests/v1/test_chat_models.py @@ -13,7 +13,7 @@ from langchain_core.messages.content_blocks import ( create_text_block, ) from langchain_core.v1.messages import AIMessage, HumanMessage, MessageV1, SystemMessage -from langchain_tests.unit_tests.chat_models_v1 import ChatModelV1UnitTests +from langchain_tests.v1.unit_tests.chat_models import ChatModelUnitTests from langchain_ollama._compat import ( _convert_chunk_to_v1, @@ -240,7 +240,7 @@ class TestMessageConversion: assert result["images"] == [] -class TestChatOllama(ChatModelV1UnitTests): +class TestChatOllama(ChatModelUnitTests): """Test `ChatOllama`.""" @property diff --git a/libs/partners/openai/langchain_openai/v1/chat_models/base.py b/libs/partners/openai/langchain_openai/v1/chat_models/base.py index 48a2dbf2664..d3cd0b482f1 100644 --- a/libs/partners/openai/langchain_openai/v1/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/v1/chat_models/base.py @@ -186,7 +186,7 @@ def _convert_dict_to_message(_dict: Mapping[str, Any]) -> MessageV1: raise ValueError(error_message) -def _format_message_content(content: Any, responses_api: bool = False) -> Any: +def _format_message_content(content: Any, responses_ai_msg: bool = False) -> Any: """Format message content.""" if content and isinstance(content, list): formatted_content = [] @@ -201,7 +201,9 @@ def _format_message_content(content: Any, responses_api: bool = False) -> Any: elif ( isinstance(block, dict) and is_data_content_block(block) - and not responses_api + # Responses API messages handled separately in _compat (parsed into + # image generation calls) + and not responses_ai_msg ): formatted_content.append(convert_to_openai_data_block(block)) # Anthropic image blocks @@ -235,7 +237,9 @@ def _format_message_content(content: Any, responses_api: bool = False) -> Any: return formatted_content -def _convert_message_to_dict(message: MessageV1, responses_api: bool = False) -> dict: +def _convert_message_to_dict( + message: MessageV1, responses_ai_msg: bool = False +) -> dict: """Convert a LangChain message to a dictionary. Args: @@ -245,7 +249,9 @@ def _convert_message_to_dict(message: MessageV1, responses_api: bool = False) -> The dictionary. """ message_dict: dict[str, Any] = { - "content": _format_message_content(message.content, responses_api=responses_api) + "content": _format_message_content( + message.content, responses_ai_msg=responses_ai_msg + ) } if name := message.name: message_dict["name"] = name @@ -273,7 +279,7 @@ def _convert_message_to_dict(message: MessageV1, responses_api: bool = False) -> if ( block.get("type") == "audio" and (id_ := block.get("id")) - and not responses_api + and not responses_ai_msg ): # openai doesn't support passing the data back - only the id # https://platform.openai.com/docs/guides/audio/multi-turn-conversations @@ -2992,14 +2998,13 @@ def _oai_structured_outputs_parser( else: return parsed elif any( - block["type"] == "non_standard" and block["value"].get("type") == "refusal" + block["type"] == "non_standard" and "refusal" in block["value"] for block in ai_msg.content ): refusal = next( - block["value"]["text"] + block["value"]["refusal"] for block in ai_msg.content - if block["type"] == "non_standard" - and block["value"].get("type") == "refusal" + if block["type"] == "non_standard" and "refusal" in block["value"] ) raise OpenAIRefusalError(refusal) elif ai_msg.tool_calls: @@ -3246,12 +3251,13 @@ def _construct_responses_api_input(messages: Sequence[MessageV1]) -> list: """Construct the input for the OpenAI Responses API.""" input_ = [] for lc_msg in messages: - msg = _convert_message_to_dict(lc_msg, responses_api=True) if isinstance(lc_msg, AIMessageV1): + msg = _convert_message_to_dict(lc_msg, responses_ai_msg=True) msg["content"] = _convert_from_v1_to_responses( msg["content"], lc_msg.tool_calls ) else: + msg = _convert_message_to_dict(lc_msg) # Get content from non-standard content blocks for i, block in enumerate(msg["content"]): if block.get("type") == "non_standard": diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_base_standard_v1.py b/libs/partners/openai/tests/integration_tests/chat_models/test_base_standard_v1.py new file mode 100644 index 00000000000..01c68882f64 --- /dev/null +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_base_standard_v1.py @@ -0,0 +1,163 @@ +"""Standard LangChain interface tests""" + +import base64 +from pathlib import Path +from typing import Literal, cast + +import httpx +import pytest +from langchain_core.v1.chat_models import BaseChatModel +from langchain_core.v1.messages import AIMessage, HumanMessage +from langchain_tests.v1.integration_tests import ChatModelIntegrationTests + +from langchain_openai.v1 import ChatOpenAI + +REPO_ROOT_DIR = Path(__file__).parents[6] + + +class TestOpenAIStandardV1(ChatModelIntegrationTests): + @property + def chat_model_class(self) -> type[BaseChatModel]: + return ChatOpenAI + + @property + def chat_model_params(self) -> dict: + return { + "model": "gpt-5-nano", + "stream_usage": True, + "reasoning_effort": "minimal", + } + + @property + def supports_image_inputs(self) -> bool: + return True + + @property + def supports_image_urls(self) -> bool: + return True + + @property + def supports_json_mode(self) -> bool: + return True + + @property + def supports_anthropic_inputs(self) -> bool: + return True + + @property + def supported_usage_metadata_details( + self, + ) -> dict[ + Literal["invoke", "stream"], + list[ + Literal[ + "audio_input", + "audio_output", + "reasoning_output", + "cache_read_input", + "cache_creation_input", + ] + ], + ]: + return {"invoke": ["reasoning_output", "cache_read_input"], "stream": []} + + @property + def enable_vcr_tests(self) -> bool: + return True + + def invoke_with_cache_read_input(self, *, stream: bool = False) -> AIMessage: + with open(REPO_ROOT_DIR / "README.md") as f: + readme = f.read() + + input_ = f"""What's langchain? Here's the langchain README: + + {readme} + """ + llm = ChatOpenAI(model="gpt-4o-mini", stream_usage=True) + _invoke(llm, input_, stream) + # invoke twice so first invocation is cached + return _invoke(llm, input_, stream) + + def invoke_with_reasoning_output(self, *, stream: bool = False) -> AIMessage: + llm = ChatOpenAI(model="o1-mini", stream_usage=True, temperature=1) + input_ = ( + "explain the relationship between the 2008/9 economic crisis and the " + "startup ecosystem in the early 2010s" + ) + return _invoke(llm, input_, stream) + + @property + def supports_pdf_inputs(self) -> bool: + # OpenAI requires a filename for PDF inputs + # For now, we test with filename in OpenAI-specific tests + return False + + def test_openai_pdf_inputs(self, model: BaseChatModel) -> None: + """Test that the model can process PDF inputs.""" + url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf" + pdf_data = base64.b64encode(httpx.get(url).content).decode("utf-8") + + message = HumanMessage( + [ + {"type": "text", "text": "What is the title of this document?"}, + { + "type": "file", + "mime_type": "application/pdf", + "base64": pdf_data, + "extras": {"filename": "my-pdf"}, # OpenAI requires a filename + }, + ] + ) + _ = model.invoke([message]) + + # Test OpenAI Chat Completions format + message = HumanMessage( + [ + {"type": "text", "text": "What is the title of this document?"}, + { # type: ignore[list-item] + "type": "file", + "file": { + "filename": "test file.pdf", + "file_data": f"data:application/pdf;base64,{pdf_data}", + }, + }, + ] + ) + _ = model.invoke([message]) + + +def _invoke(llm: ChatOpenAI, input_: str, stream: bool) -> AIMessage: + if stream: + full = None + for chunk in llm.stream(input_): + full = full + chunk if full else chunk # type: ignore[operator] + return cast(AIMessage, full) + else: + return cast(AIMessage, llm.invoke(input_)) + + +@pytest.mark.skip() # Test either finishes in 5 seconds or 5 minutes. +def test_audio_model() -> None: + class AudioModelTests(ChatModelIntegrationTests): + @property + def chat_model_class(self) -> type[ChatOpenAI]: + return ChatOpenAI + + @property + def chat_model_params(self) -> dict: + return { + "model": "gpt-4o-audio-preview", + "temperature": 0, + "model_kwargs": { + "modalities": ["text", "audio"], + "audio": {"voice": "alloy", "format": "wav"}, + }, + } + + @property + def supports_audio_inputs(self) -> bool: + return True + + test_instance = AudioModelTests() + model = test_instance.chat_model_class(**test_instance.chat_model_params) + AudioModelTests().test_audio_inputs(model) diff --git a/libs/standard-tests/QUICK_START.md b/libs/standard-tests/QUICK_START.md index e3fd85ad900..e9fdf473ab3 100644 --- a/libs/standard-tests/QUICK_START.md +++ b/libs/standard-tests/QUICK_START.md @@ -13,7 +13,7 @@ New imports: from langchain_tests.unit_tests.chat_models import ChatModelUnitTests # v1 -from langchain_tests.unit_tests.chat_models_v1 import ChatModelV1UnitTests +from langchain_tests.v1.unit_tests.chat_models import ChatModelUnitTests as ChatModelV1UnitTests ``` ### 2. Minimal Configuration @@ -72,10 +72,6 @@ class TestAdvancedModelV1(ChatModelV1UnitTests): """Model provides source citations""" return True - @property - def supports_tool_calls(self): - """Tool calling with metadata""" - return True ``` ## 📋 Feature Reference @@ -163,7 +159,7 @@ for testing chat models that support the enhanced content blocks system. from typing import Any -from langchain_core.language_models.v1.chat_models import BaseChatModelV1 +from langchain_core.v1.language_models.chat_models import BaseChatModelV1 from langchain_core.language_models import GenericFakeChatModel from langchain_core.messages import BaseMessage from langchain_core.messages.content_blocks import TextContentBlock @@ -276,7 +272,7 @@ from typing import Any import pytest from langchain_core.language_models import BaseChatModel, GenericFakeChatModel -from langchain_tests.integration_tests.chat_models_v1 import ChatModelV1IntegrationTests +from langchain_tests.v1.integration_tests.chat_models import ChatModelIntegrationTests as ChatModelV1IntegrationTests # Example fake model for demonstration (replace with real model in practice) @@ -341,11 +337,6 @@ class TestFakeChatModelV1Integration(ChatModelV1IntegrationTests): """Disable web search for this fake model.""" return False - @property - def supports_tool_calls(self) -> bool: - """Enable tool calling tests.""" - return True - @property def has_tool_calling(self) -> bool: """Enable tool calling tests.""" diff --git a/libs/standard-tests/README.md b/libs/standard-tests/README.md index 8355f3c4f23..0b9738a3b07 100644 --- a/libs/standard-tests/README.md +++ b/libs/standard-tests/README.md @@ -92,4 +92,4 @@ as required is optional. For chat models that support the new content blocks v1 format (multimodal content, reasoning blocks, citations, etc.), use the v1 test suite instead: - See `QUICK_START.md` and `README_V1.md` for v1 testing documentation -- Use `ChatModelV1Tests` from `langchain_tests.unit_tests.chat_models_v1` +- Use `ChatModelTests` from `langchain_tests.v1.unit_tests.chat_models` diff --git a/libs/standard-tests/README_V1.md b/libs/standard-tests/README_V1.md index 11c653dc3c1..e1dc9113da5 100644 --- a/libs/standard-tests/README_V1.md +++ b/libs/standard-tests/README_V1.md @@ -14,10 +14,10 @@ The standard tests v1 package provides comprehensive testing for chat models tha ### Basic Unit Tests ```python -from langchain_tests.unit_tests.chat_models_v1 import ChatModelV1UnitTests +from langchain_tests.v1.unit_tests.chat_models import ChatModelUnitTests from your_package import YourChatModel -class TestYourChatModelV1(ChatModelV1UnitTests): +class TestYourChatModelV1(ChatModelUnitTests): @property def chat_model_class(self): return YourChatModel @@ -43,10 +43,10 @@ class TestYourChatModelV1(ChatModelV1UnitTests): ### Integration Tests ```python -from langchain_tests.integration_tests.chat_models_v1 import ChatModelV1IntegrationTests +from langchain_tests.v1.integration_tests.chat_models import ChatModelIntegrationTests from your_package import YourChatModel -class TestYourChatModelV1Integration(ChatModelV1IntegrationTests): +class TestYourChatModelV1Integration(ChatModelIntegrationTests): @property def chat_model_class(self): return YourChatModel @@ -81,14 +81,10 @@ class TestYourChatModelV1Integration(ChatModelV1IntegrationTests): - `supports_image_content_blocks`: `ImageContentBlock`s (v1 format) - `supports_video_content_blocks`: `VideoContentBlock`s (v1 format) - `supports_audio_content_blocks`: `AudioContentBlock`s (v1 format) -- `supports_plaintext_content_blocks`: `PlainTextContentBlock`s (plaintext from documents) -- `supports_file_content_blocks`: `FileContentBlock`s ### Tool Calling -- `supports_tool_calls`: Tool calling with content blocks -- `supports_invalid_tool_calls`: Error handling for invalid tool calls -- `supports_tool_call_chunks`: Streaming tool call support +- `has_tool_calls`: Tool calling with content blocks ### Advanced Features @@ -99,7 +95,7 @@ class TestYourChatModelV1Integration(ChatModelV1IntegrationTests): ## Test Categories -### Unit Tests (`ChatModelV1Tests`) +### Unit Tests (`ChatModelTests`) - Content block format validation - Ser/deserialization @@ -108,7 +104,7 @@ class TestYourChatModelV1Integration(ChatModelV1IntegrationTests): - Error handling for invalid blocks - Backward compatibility with string content -### Integration Tests (`ChatModelV1IntegrationTests`) +### Integration Tests (`ChatModelIntegrationTests`) - Real multimodal content processing - Advanced reasoning with content blocks @@ -130,7 +126,7 @@ class TestYourChatModelV1Integration(ChatModelV1IntegrationTests): from langchain_tests.unit_tests.chat_models import ChatModelUnitTests # v1 - from langchain_tests.unit_tests.chat_models_v1 import ChatModelV1UnitTests + from langchain_tests.v1.unit_tests.chat_models import ChatModelUnitTests ChatModelV1UnitTests ``` 2. **Configure content blocks support**: diff --git a/libs/standard-tests/langchain_tests/integration_tests/__init__.py b/libs/standard-tests/langchain_tests/integration_tests/__init__.py index 02979aa789f..fbe4888d1e5 100644 --- a/libs/standard-tests/langchain_tests/integration_tests/__init__.py +++ b/libs/standard-tests/langchain_tests/integration_tests/__init__.py @@ -20,7 +20,6 @@ for module in modules: from .base_store import BaseStoreAsyncTests, BaseStoreSyncTests from .cache import AsyncCacheTestSuite, SyncCacheTestSuite from .chat_models import ChatModelIntegrationTests -from .chat_models_v1 import ChatModelV1IntegrationTests from .embeddings import EmbeddingsIntegrationTests from .retrievers import RetrieversIntegrationTests from .tools import ToolsIntegrationTests @@ -31,7 +30,6 @@ __all__ = [ "BaseStoreAsyncTests", "BaseStoreSyncTests", "ChatModelIntegrationTests", - "ChatModelV1IntegrationTests", "EmbeddingsIntegrationTests", "RetrieversIntegrationTests", "SyncCacheTestSuite", diff --git a/libs/standard-tests/langchain_tests/v1/__init__.py b/libs/standard-tests/langchain_tests/v1/__init__.py new file mode 100644 index 00000000000..3677a77a697 --- /dev/null +++ b/libs/standard-tests/langchain_tests/v1/__init__.py @@ -0,0 +1,9 @@ +"""Base Test classes for standard testing. + +To learn how to use these classes, see the +`integration standard testing `__ +guide. + +This package provides both the original test suites and the v1 test suites that support +the new content blocks system introduced in ``langchain_core.messages.content_blocks``. +""" diff --git a/libs/standard-tests/langchain_tests/v1/integration_tests/__init__.py b/libs/standard-tests/langchain_tests/v1/integration_tests/__init__.py new file mode 100644 index 00000000000..18f5766bd5d --- /dev/null +++ b/libs/standard-tests/langchain_tests/v1/integration_tests/__init__.py @@ -0,0 +1,16 @@ +# ruff: noqa: E402 +import pytest + +# Rewrite assert statements for test suite so that implementations can +# see the full error message from failed asserts. +# https://docs.pytest.org/en/7.1.x/how-to/writing_plugins.html#assertion-rewriting +modules = ["chat_models"] + +for module in modules: + pytest.register_assert_rewrite(f"langchain_tests.v1.integration_tests.{module}") + +from .chat_models import ChatModelIntegrationTests + +__all__ = [ + "ChatModelIntegrationTests", +] diff --git a/libs/standard-tests/langchain_tests/integration_tests/chat_models_v1.py b/libs/standard-tests/langchain_tests/v1/integration_tests/chat_models.py similarity index 83% rename from libs/standard-tests/langchain_tests/integration_tests/chat_models_v1.py rename to libs/standard-tests/langchain_tests/v1/integration_tests/chat_models.py index 693896a8746..23d7a30ba36 100644 --- a/libs/standard-tests/langchain_tests/integration_tests/chat_models_v1.py +++ b/libs/standard-tests/langchain_tests/v1/integration_tests/chat_models.py @@ -11,7 +11,6 @@ from typing import Annotated, Any, Literal, Optional, TypedDict, Union, cast from unittest.mock import MagicMock import httpx -import langchain_core.messages.content_blocks as types import pytest from langchain_core.callbacks import BaseCallbackHandler from langchain_core.language_models.fake_chat_models import GenericFakeChatModel @@ -34,14 +33,8 @@ from langchain_core.messages.content_blocks import ( WebSearchCall, WebSearchResult, create_audio_block, - create_file_block, create_image_block, - create_non_standard_block, - create_plaintext_block, create_text_block, - create_tool_call, - is_reasoning_block, - is_text_block, is_tool_call_block, ) from langchain_core.output_parsers.string import StrOutputParser @@ -64,7 +57,7 @@ from pydantic import BaseModel, Field from pytest_benchmark.fixture import BenchmarkFixture # type: ignore[import-untyped] from vcr.cassette import Cassette -from langchain_tests.unit_tests.chat_models_v1 import ChatModelV1Tests +from langchain_tests.v1.unit_tests.chat_models import ChatModelTests # Content block type definitions for testing ContentBlock = Union[ @@ -204,7 +197,7 @@ def unicode_customer(customer_name: str, description: str) -> str: return f"Created customer: {customer_name} - {description}" -class ChatModelV1IntegrationTests(ChatModelV1Tests): +class ChatModelIntegrationTests(ChatModelTests): """Base class for v1 chat model integration tests. TODO: verify this entire docstring! @@ -219,11 +212,11 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests): from typing import Type - from langchain_tests.integration_tests import ChatModelV1IntegrationTests - from my_package.chat_models import MyChatModel + from langchain_tests.v1.integration_tests import ChatModelIntegrationTests + from my_package.v1.chat_models import MyChatModel - class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests): + class TestMyV1ChatModelIntegration(ChatModelIntegrationTests): @property def chat_model_class(self) -> Type[MyV1ChatModel]: # Return the chat model class to test here @@ -489,7 +482,7 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests): .. dropdown:: Troubleshooting First, debug - :meth:`~langchain_tests.integration_tests.chat_models_v1.ChatModelV1IntegrationTests.test_invoke`. + :meth:`~langchain_tests.v1.integration_tests.chat_models.ChatModelIntegrationTests.test_invoke`. because ``ainvoke`` has a default implementation that calls ``invoke`` in an async context. @@ -512,7 +505,7 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests): .. dropdown:: Troubleshooting First, debug - :meth:`~langchain_tests.integration_tests.chat_models_v1.ChatModelV1IntegrationTests.test_invoke`. + :meth:`~langchain_tests.v1.integration_tests.chat_models.ChatModelIntegrationTests.test_invoke`. because ``stream`` has a default implementation that calls ``invoke`` and yields the result as a single chunk. @@ -538,9 +531,9 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests): .. dropdown:: Troubleshooting First, debug - :meth:`~langchain_tests.integration_tests.chat_models_v1.ChatModelV1IntegrationTests.test_stream`. + :meth:`~langchain_tests.v1.integration_tests.chat_models.ChatModelIntegrationTests.test_stream`. and - :meth:`~langchain_tests.integration_tests.chat_models_v1.ChatModelV1IntegrationTests.test_ainvoke`. + :meth:`~langchain_tests.v1.integration_tests.chat_models.ChatModelIntegrationTests.test_ainvoke`. because ``astream`` has a default implementation that calls ``_stream`` in an async context if it is implemented, or ``ainvoke`` and yields the result as a single ``AIMessageChunk`` chunk if not. @@ -571,7 +564,7 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests): .. dropdown:: Troubleshooting First, debug - :meth:`~langchain_tests.integration_tests.chat_models_v1.ChatModelV1IntegrationTests.test_invoke` + :meth:`~langchain_tests.v1.integration_tests.chat_models.ChatModelIntegrationTests.test_invoke` because ``batch`` has a default implementation that calls ``invoke`` for each message in the batch. @@ -607,9 +600,9 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests): .. dropdown:: Troubleshooting First, debug - :meth:`~langchain_tests.integration_tests.chat_models_v1.ChatModelV1IntegrationTests.test_batch` + :meth:`~langchain_tests.v1.integration_tests.chat_models.ChatModelIntegrationTests.test_batch` and - :meth:`~langchain_tests.integration_tests.chat_models_v1.ChatModelV1IntegrationTests.test_ainvoke` + :meth:`~langchain_tests.v1.integration_tests.chat_models.ChatModelIntegrationTests.test_ainvoke` because ``abatch`` has a default implementation that calls ``ainvoke`` for each message in the batch. @@ -640,7 +633,7 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests): .. dropdown:: Troubleshooting First, debug - :meth:`~langchain_tests.integration_tests.chat_models_v1.ChatModelV1IntegrationTests.test_invoke` + :meth:`~langchain_tests.v1.integration_tests.chat_models.ChatModelIntegrationTests.test_invoke` because this test also uses ``model.invoke()``. If that test passes but not this one, you should verify that: @@ -672,11 +665,11 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests): .. dropdown:: Troubleshooting First, debug - :meth:`~langchain_tests.integration_tests.chat_models_v1.ChatModelV1IntegrationTests.test_invoke` + :meth:`~langchain_tests.v1.integration_tests.chat_models.ChatModelIntegrationTests.test_invoke` because this test also uses ``model.invoke()``. Second, debug - :meth:`~langchain_tests.integration_tests.chat_models_v1.ChatModelV1IntegrationTests.test_conversation` + :meth:`~langchain_tests.v1.integration_tests.chat_models.ChatModelIntegrationTests.test_conversation` because this test is the "basic case" without double messages. If that test passes those but not this one, you should verify that: @@ -718,7 +711,7 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests): .. code-block:: python - class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests): + class TestMyV1ChatModelIntegration(ChatModelIntegrationTests): @property def returns_usage_metadata(self) -> bool: return False @@ -732,7 +725,7 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests): .. code-block:: python - class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests): + class TestMyV1ChatModelIntegration(ChatModelIntegrationTests): @property def supported_usage_metadata_details(self) -> dict: return { @@ -860,7 +853,7 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests): .. code-block:: python - class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests): + class TestMyV1ChatModelIntegration(ChatModelIntegrationTests): @property def returns_usage_metadata(self) -> bool: return False @@ -874,7 +867,7 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests): .. code-block:: python - class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests): + class TestMyV1ChatModelIntegration(ChatModelIntegrationTests): @property def supported_usage_metadata_details(self) -> dict: return { @@ -1003,7 +996,7 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests): .. code-block:: python - class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests): + class TestMyV1ChatModelIntegration(ChatModelIntegrationTests): @property def has_tool_calling(self) -> bool: return False @@ -1022,7 +1015,7 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests): .. code-block:: python @pytest.mark.xfail(reason=("Does not support tool_choice.")) - def test_tool_calling(self, model: BaseChatModelV1) -> None: + def test_tool_calling(self, model: BaseChatModel) -> None: super().test_tool_calling(model) Otherwise, in the case that only one tool is bound, ensure that @@ -1062,7 +1055,7 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests): .. code-block:: python - class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests): + class TestMyV1ChatModelIntegration(ChatModelIntegrationTests): @property def has_tool_calling(self) -> bool: return False @@ -1081,7 +1074,7 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests): .. code-block:: python @pytest.mark.xfail(reason=("Does not support tool_choice.")) - async def test_tool_calling_async(self, model: BaseChatModelV1) -> None: + async def test_tool_calling_async(self, model: BaseChatModel) -> None: await super().test_tool_calling_async(model) Otherwise, in the case that only one tool is bound, ensure that @@ -1121,7 +1114,7 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests): .. code-block:: python - class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests): + class TestMyV1ChatModelIntegration(ChatModelIntegrationTests): @property def has_tool_calling(self) -> bool: return False @@ -1140,7 +1133,7 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests): .. code-block:: python @pytest.mark.xfail(reason=("Does not support tool_choice.")) - def test_bind_runnables_as_tools(self, model: BaseChatModelV1) -> None: + def test_bind_runnables_as_tools(self, model: BaseChatModel) -> None: super().test_bind_runnables_as_tools(model) Otherwise, ensure that the ``tool_choice_value`` property is correctly @@ -1209,7 +1202,7 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests): .. code-block:: python - class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests): + class TestMyV1ChatModelIntegration(ChatModelIntegrationTests): @property def has_tool_calling(self) -> bool: return False @@ -1288,7 +1281,7 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests): .. code-block:: python - class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests): + class TestMyV1ChatModelIntegration(ChatModelIntegrationTests): @property def has_tool_choice(self) -> bool: return False @@ -1341,7 +1334,7 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests): .. code-block:: python - class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests): + class TestMyV1ChatModelIntegration(ChatModelIntegrationTests): @property def has_tool_calling(self) -> bool: return False @@ -1361,7 +1354,7 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests): .. code-block:: python @pytest.mark.xfail(reason=("Does not support tool_choice.")) - def test_tool_calling_with_no_arguments(self, model: BaseChatModelV1) -> None: + def test_tool_calling_with_no_arguments(self, model: BaseChatModel) -> None: super().test_tool_calling_with_no_arguments(model) Otherwise, in the case that only one tool is bound, ensure that @@ -1415,7 +1408,7 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests): .. code-block:: python - class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests): + class TestMyV1ChatModelIntegration(ChatModelIntegrationTests): @property def has_tool_calling(self) -> bool: return False @@ -1432,9 +1425,17 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests): model_with_tools = model.bind_tools([my_adder_tool]) messages = [ HumanMessage("What is 1 + 2?"), - create_tool_call( - "my_adder_tool", {"a": 1}, id="abc123" - ), # Missing required argument 'b' + AIMessage( + "", + tool_calls=[ + { + "name": "my_adder_tool", + "args": {"a": 1}, + "id": "abc123", + "type": "tool_call", + }, + ], + ), ToolMessage( "Error: Missing required argument 'b'.", tool_call_id="abc123", @@ -1468,7 +1469,7 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests): .. code-block:: python - class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests): + class TestMyV1ChatModelIntegration(ChatModelIntegrationTests): @property def has_tool_calling(self) -> bool: return False @@ -1527,7 +1528,7 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests): .. code-block:: python - class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests): + class TestMyV1ChatModelIntegration(ChatModelIntegrationTests): @property def has_structured_output(self) -> bool: return False @@ -1608,7 +1609,7 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests): .. code-block:: python - class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests): + class TestMyV1ChatModelIntegration(ChatModelIntegrationTests): @property def has_structured_output(self) -> bool: return False @@ -1686,7 +1687,7 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests): .. code-block:: python - class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests): + class TestMyV1ChatModelIntegration(ChatModelIntegrationTests): @property def has_structured_output(self) -> bool: return False @@ -1758,7 +1759,7 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests): .. code-block:: python - class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests): + class TestMyV1ChatModelIntegration(ChatModelIntegrationTests): @property def supports_json_mode(self) -> bool: return False @@ -1833,7 +1834,7 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests): .. code-block:: python - class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests): + class TestMyV1ChatModelIntegration(ChatModelIntegrationTests): @property def supports_pdf_inputs(self) -> bool: @@ -1848,45 +1849,43 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests): """ pytest.xfail("Test not implemented yet.") - # TODO - # if not self.supports_pdf_inputs: - # pytest.skip("Model does not support PDF inputs.") - # url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf" - # pdf_data = base64.b64encode(httpx.get(url).content).decode("utf-8") + if not self.supports_pdf_inputs: + pytest.skip("Model does not support PDF inputs.") + url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf" + pdf_data = base64.b64encode(httpx.get(url).content).decode("utf-8") - # message = HumanMessage( - # [ - # { - # "type": "text", - # "text": "Summarize this document:", - # }, - # { - # "type": "file", - # "source_type": "base64", - # "mime_type": "application/pdf", - # "data": pdf_data, - # }, - # ] - # ) - # _ = model.invoke([message]) + message = HumanMessage( + [ + { + "type": "text", + "text": "Summarize this document:", + }, + { + "type": "file", + "mime_type": "application/pdf", + "base64": pdf_data, + }, + ] + ) + _ = model.invoke([message]) - # # Test OpenAI Chat Completions format - # message = HumanMessage( - # [ - # { - # "type": "text", - # "text": "Summarize this document:", - # }, - # { - # "type": "file", - # "file": { - # "filename": "test file.pdf", - # "file_data": f"data:application/pdf;base64,{pdf_data}", - # }, - # }, - # ] - # ) - # _ = model.invoke([message]) + # Test OpenAI Chat Completions format + message = HumanMessage( + [ + { + "type": "text", + "text": "Summarize this document:", + }, + { + "type": "file", + "file": { + "filename": "test file.pdf", + "file_data": f"data:application/pdf;base64,{pdf_data}", + }, + }, + ] + ) + _ = model.invoke([message]) def test_audio_inputs(self, model: BaseChatModel) -> None: """Test that the model can process audio inputs. @@ -1907,25 +1906,25 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests): .. dropdown:: Configuration - To disable this test, set ``supports_audio_content_blocks`` to False in your + To disable this test, set ``supports_audio_inputs`` to False in your test class: .. code-block:: python - class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests): + class TestMyV1ChatModelIntegration(ChatModelIntegrationTests): @property - def supports_audio_content_blocks(self) -> bool: + def supports_audio_inputs(self) -> bool: return False .. dropdown:: Troubleshooting If this test fails, check that the model can correctly handle messages - with audio content blocks. Otherwise, set the ``supports_audio_content_blocks`` + with audio content blocks. Otherwise, set the ``supports_audio_inputs`` property to False. """ # noqa: E501 - if not self.supports_audio_content_blocks: + if not self.supports_audio_inputs: pytest.skip("Model does not support AudioContentBlock inputs.") url = "https://upload.wikimedia.org/wikipedia/commons/3/3d/Alcal%C3%A1_de_Henares_%28RPS_13-04-2024%29_canto_de_ruise%C3%B1or_%28Luscinia_megarhynchos%29_en_el_Soto_del_Henares.wav" @@ -1942,21 +1941,20 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests): ) _ = model.invoke([message]) - # TODO? # Test OpenAI Chat Completions format - # message = HumanMessage( - # [ - # { - # "type": "text", - # "text": "Describe this audio:", - # }, - # { - # "type": "input_audio", - # "input_audio": {"data": audio_data, "format": "wav"}, - # }, - # ] - # ) - # _ = model.invoke([message]) + message = HumanMessage( + [ + { + "type": "text", + "text": "Describe this audio:", + }, + { # type: ignore[list-item] + "type": "input_audio", + "input_audio": {"data": audio_data, "format": "wav"}, + }, + ] + ) + _ = model.invoke([message]) def test_image_inputs(self, model: BaseChatModel) -> None: """Test that the model can process image inputs. @@ -1991,14 +1989,14 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests): .. dropdown:: Configuration - To disable this test, set ``supports_image_content_blocks`` to False in your + To disable this test, set ``supports_image_inputs`` to False in your test class: .. code-block:: python - class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests): + class TestMyV1ChatModelIntegration(ChatModelIntegrationTests): @property - def supports_image_content_blocks(self) -> bool: + def supports_image_inputs(self) -> bool: return False # Can also explicitly disable testing image URLs: @@ -2010,10 +2008,10 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests): If this test fails, check that the model can correctly handle messages with image content blocks, including base64-encoded images. Otherwise, set - the ``supports_image_content_blocks`` property to False. + the ``supports_image_inputs`` property to False. """ - if not self.supports_image_content_blocks: + if not self.supports_image_inputs: pytest.skip("Model does not support image message.") image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" @@ -2108,7 +2106,7 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests): .. code-block:: python - class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests): + class TestMyV1ChatModelIntegration(ChatModelIntegrationTests): @property def supports_image_tool_message(self) -> bool: return False @@ -2232,7 +2230,7 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests): .. code-block:: python - class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests): + class TestMyV1ChatModelIntegration(ChatModelIntegrationTests): @property def supports_anthropic_inputs(self) -> bool: return False @@ -2392,7 +2390,7 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests): .. code-block:: python - class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests): + class TestMyV1ChatModelIntegration(ChatModelIntegrationTests): @property def has_tool_calling(self) -> bool: return False @@ -2420,7 +2418,7 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests): if not self.has_tool_calling: pytest.skip("Test requires tool calling.") - @tool + @tool(message_version="v1") def get_weather(location: str) -> str: """Call to surf the web.""" return "It's sunny." @@ -2450,7 +2448,7 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests): ) -> None: """Test that streaming does not introduce undue overhead. - See ``enable_vcr_tests`` dropdown :class:`above ` + See ``enable_vcr_tests`` dropdown :class:`above ` for more information. .. dropdown:: Configuration @@ -2468,7 +2466,7 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests): VCR will by default record authentication headers and other sensitive information in cassettes. See ``enable_vcr_tests`` dropdown - :class:`above ` for how to configure what + :class:`above ` for how to configure what information is recorded in cassettes. """ @@ -2621,7 +2619,7 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests): # ), # ] - # if self.supports_audio_content_blocks: + # if self.supports_audio_inputs: # content_blocks.append( # create_audio_block( # base64=_get_test_audio_base64(), @@ -2642,374 +2640,150 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests): # ] # assert len(reasoning_blocks) > 0 - def test_citation_generation_with_sources(self, model: BaseChatModel) -> None: - """Test that the model can generate ``Citations`` with source links. - - TODO: expand docstring - - """ - if not self.supports_structured_citations: - pytest.skip("Model does not support structured citations.") - - message = HumanMessage( - "Provide factual information about the distance to the moon with proper " - "citations to scientific sources." - ) - result = model.invoke([message]) - - assert isinstance(result, AIMessage) - - # Check for text blocks with citations - text_blocks_with_citations = [] - for block in result.content: - if ( - isinstance(block, dict) - and is_text_block(block) - and "annotations" in block - ): - annotations = cast("list[dict[str, Any]]", block.get("annotations", [])) - citations = [ - ann - for ann in annotations - if isinstance(ann, dict) and ann.get("type") == "citation" - ] - if citations: - text_blocks_with_citations.append(block) - assert len(text_blocks_with_citations) > 0 - - # Validate citation structure - for block in text_blocks_with_citations: - annotations = cast("list[dict[str, Any]]", block.get("annotations", [])) - for annotation in annotations: - if annotation.get("type") == "citation": - # TODO: evaluate these since none are *technically* required - # This may be a test that needs adjustment on per-integration basis - assert "cited_text" in annotation - assert "start_index" in annotation - assert "end_index" in annotation - - def test_web_search_integration(self, model: BaseChatModel) -> None: - """Test web search content blocks integration. - - TODO: expand docstring - - """ - if not self.supports_web_search_blocks: - pytest.skip("Model does not support web search blocks.") - - message = HumanMessage( - "Search for the latest developments in quantum computing." - ) - result = model.invoke([message]) - - assert isinstance(result, AIMessage) - - # Check for web search blocks - search_call_blocks = [ - block - for block in result.content - if isinstance(block, dict) and block.get("type") == "web_search_call" - ] - search_result_blocks = [ - block - for block in result.content - if isinstance(block, dict) and block.get("type") == "web_search_result" - ] - # TODO: should this be one or the other or both? - assert len(search_call_blocks) > 0 or len(search_result_blocks) > 0 - - def test_code_interpreter_blocks(self, model: BaseChatModel) -> None: - """Test code interpreter content blocks. - - TODO: expand docstring - - """ - if not self.supports_code_interpreter: - pytest.skip("Model does not support code interpreter blocks.") - - message = HumanMessage("Calculate the factorial of 10 using Python code.") - result = model.invoke([message]) - - assert isinstance(result, AIMessage) - - # Check for code interpreter blocks - code_blocks = [ - block - for block in result.content - if isinstance(block, dict) - and block.get("type") - in [ - "code_interpreter_call", - "code_interpreter_output", - "code_interpreter_result", - ] - ] - # TODO: should we require all three types or just an output/result? - assert len(code_blocks) > 0 - - def test_tool_calling_with_content_blocks(self, model: BaseChatModel) -> None: - """Test tool calling with content blocks. - - TODO: expand docstring - - """ - if not self.has_tool_calling: - pytest.skip("Model does not support tool calls.") - - @tool - def calculate_area(length: float, width: float) -> str: - """Calculate the area of a rectangle.""" - area = length * width - return f"The area is {area} square units." - - model_with_tools = model.bind_tools([calculate_area]) - message = HumanMessage( - "Calculate the area of a rectangle with length 5 and width 3." - ) - - result = model_with_tools.invoke([message]) - _validate_tool_call_message(result) - - def test_plaintext_content_blocks_from_documents( - self, model: BaseChatModel - ) -> None: - """Test PlainTextContentBlock for document plaintext content. - - TODO: expand docstring - - """ - if not self.supports_plaintext_content_blocks: - pytest.skip("Model does not support PlainTextContentBlock.") - - # Test with PlainTextContentBlock (plaintext from document) - plaintext_block = create_plaintext_block( - text="This is plaintext content extracted from a document.", - file_id="doc_123", - ) - - message = HumanMessage( - content=cast("list[types.ContentBlock]", [plaintext_block]) - ) - result = model.invoke([message]) - - assert isinstance(result, AIMessage) - # TODO expand - - def test_content_block_streaming_integration(self, model: BaseChatModel) -> None: - """Test streaming with content blocks. - - TODO: expand docstring - - """ - if not self.supports_content_blocks_v1: - pytest.skip("Model does not support content blocks v1.") - - message = HumanMessage( - content=[ - { - "type": "text", - "text": "Write a detailed explanation of machine learning.", - } - ] - ) - - chunks = [] - for chunk in model.stream([message]): - chunks.append(chunk) - assert isinstance(chunk, (AIMessage, AIMessageChunk)) - - assert len(chunks) > 1 # Should receive multiple chunks - - # Aggregate chunks - final_message = chunks[0] - for chunk in chunks[1:]: - final_message = final_message + chunk - - assert isinstance(final_message.content, list) - - def test_error_handling_with_invalid_content_blocks( - self, model: BaseChatModel - ) -> None: - """Test error handling with various invalid content block configurations. - - TODO: expand docstring - - """ - if not self.supports_content_blocks_v1: - pytest.skip("Model does not support content blocks v1.") - - test_cases = [ - {"type": "text"}, # Missing text field - {"type": "image"}, # Missing url/mime_type - {"type": "tool_call", "name": "test"}, # Missing args/id - ] - - for invalid_block in test_cases: - message = HumanMessage([invalid_block]) # type: ignore[list-item] - - # Should either handle gracefully or raise appropriate error - try: - result = model.invoke([message]) - assert isinstance(result, AIMessage) - except (ValueError, TypeError, KeyError) as e: - # Acceptable to raise validation errors - assert len(str(e)) > 0 - - async def test_async_content_blocks_processing(self, model: BaseChatModel) -> None: - """Test asynchronous processing of content blocks. - - TODO: expand docstring - - """ - if not self.supports_content_blocks_v1: - pytest.skip("Model does not support content blocks v1.") - - message = HumanMessage("Generate a creative story about space exploration.") - - result = await model.ainvoke([message]) - assert isinstance(result, AIMessage) - - def test_input_conversion_string(self, model: BaseChatModel) -> None: - """Test that string input is properly converted to messages. - - TODO: expand docstring - - """ - result = model.invoke("Test string input") - assert isinstance(result, AIMessage) - assert result.content is not None - - def test_input_conversion_empty_string(self, model: BaseChatModel) -> None: - """Test that empty string input is handled gracefully. - - TODO: expand docstring - - """ - result = model.invoke("") - assert isinstance(result, AIMessage) - - def test_input_conversion_message_v1_list(self, model: BaseChatModel) -> None: - """Test that v1 message list input is handled correctly. - - TODO: expand docstring - - """ - messages = [HumanMessage("Test message")] - result = model.invoke(messages) - assert isinstance(result, AIMessage) - assert result.content is not None - - def test_text_content_blocks_basic(self, model: BaseChatModel) -> None: - """Test that the model can handle the ``TextContentBlock`` format.""" - if not self.supports_text_content_blocks: - pytest.skip("Model does not support TextContentBlock (rare!)") - - text_block = create_text_block("Hello, world!") - message = HumanMessage(content=[text_block]) - - result = model.invoke([message]) - assert isinstance(result, AIMessage) - assert result.content is not None - - def test_mixed_content_blocks_basic(self, model: BaseChatModel) -> None: - """Test that the model can handle messages with mixed content blocks.""" - if not ( - self.supports_text_content_blocks and self.supports_image_content_blocks - ): - pytest.skip( - "Model doesn't support mixed content blocks (concurrent text and image)" - ) - - content_blocks: list[types.ContentBlock] = [ - create_text_block("Describe this image:"), - create_image_block( - base64="iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg==", - mime_type="image/png", - ), - ] - - message = HumanMessage(content=content_blocks) - result = model.invoke([message]) - - assert isinstance(result, AIMessage) - assert result.content is not None - - def test_reasoning_content_blocks_basic(self, model: BaseChatModel) -> None: - """Test that the model can generate ``ReasoningContentBlock``. - - If your integration requires a reasoning parameter to be explicitly set, you - will need to override this test to set it appropriately. - - """ - if not self.supports_reasoning_content_blocks: - pytest.skip("Model does not support ReasoningContentBlock.") - - message = HumanMessage("Think step by step: What is 2 + 2?") - result = model.invoke([message]) - - assert isinstance(result, AIMessage) - if isinstance(result.content, list): - reasoning_blocks = [ - block - for block in result.content - if isinstance(block, dict) and is_reasoning_block(block) - ] - assert len(reasoning_blocks) > 0, ( - "Expected reasoning content blocks but found none. " - f"Content blocks: {[block.get('type') for block in result.content]}" - ) - - def test_non_standard_content_blocks_basic(self, model: BaseChatModel) -> None: - """Test that the model can handle ``NonStandardContentBlock``.""" - if not self.supports_non_standard_blocks: - pytest.skip("Model does not support NonStandardContentBlock.") - - non_standard_block = create_non_standard_block( - { - "custom_field": "custom_value", - "data": [1, 2, 3], - } - ) - - message = HumanMessage(content=[non_standard_block]) - - # Should not raise an error - result = model.invoke([message]) - assert isinstance(result, AIMessage) - - def test_invalid_tool_call_handling_basic(self, model: BaseChatModel) -> None: - """Test that the model can handle ``InvalidToolCall`` blocks gracefully.""" - if not self.supports_invalid_tool_calls: - pytest.skip("Model does not support InvalidToolCall handling.") - - invalid_tool_call: InvalidToolCall = { - "type": "invalid_tool_call", - "name": "nonexistent_tool", - "args": None, - "id": "invalid_123", - "error": "Tool not found", - } - - # Create a message with invalid tool call in history - ai_message = AIMessage(content=[invalid_tool_call]) - follow_up = HumanMessage("Please try again with a valid approach.") - - result = model.invoke([ai_message, follow_up]) - assert isinstance(result, AIMessage) - assert result.content is not None - - def test_file_content_blocks_basic(self, model: BaseChatModel) -> None: - """Test that the model can handle ``FileContentBlock``.""" - if not self.supports_file_content_blocks: - pytest.skip("Model does not support FileContentBlock.") - - file_block = create_file_block( - base64="SGVsbG8sIHdvcmxkIQ==", # "Hello, world!" - mime_type="text/plain", - ) - - message = HumanMessage(content=[file_block]) - result = model.invoke([message]) - - assert isinstance(result, AIMessage) - assert result.content is not None + # def test_citation_generation_with_sources(self, model: BaseChatModel) -> None: + # """Test that the model can generate ``Citations`` with source links. + + # TODO: expand docstring + + # """ + # if not self.supports_citations: + # pytest.skip("Model does not support citations.") + + # message = HumanMessage( + # "Provide factual information about the distance to the moon with proper " + # "citations to scientific sources." + # ) + # result = model.invoke([message]) + + # assert isinstance(result, AIMessage) + + # # Check for text blocks with citations + # text_blocks_with_citations = [] + # for block in result.content: + # if ( + # isinstance(block, dict) + # and is_text_block(block) + # and "annotations" in block + # ): + # annotations = cast("list[dict[str, Any]]", block.get("annotations", [])) # noqa: E501 + # citations = [ + # ann + # for ann in annotations + # if isinstance(ann, dict) and ann.get("type") == "citation" + # ] + # if citations: + # text_blocks_with_citations.append(block) + # assert len(text_blocks_with_citations) > 0 + + # # Validate citation structure + # for block in text_blocks_with_citations: + # annotations = cast("list[dict[str, Any]]", block.get("annotations", [])) + # for annotation in annotations: + # if annotation.get("type") == "citation": + # # TODO: evaluate these since none are *technically* required + # # This may need adjustment on per-integration basis + # assert "cited_text" in annotation + # assert "start_index" in annotation + # assert "end_index" in annotation + + # def test_web_search_integration(self, model: BaseChatModel) -> None: + # """Test web search content blocks integration. + + # TODO: expand docstring + + # """ + # if not self.supports_web_search_blocks: + # pytest.skip("Model does not support web search blocks.") + + # message = HumanMessage( + # "Search for the latest developments in quantum computing." + # ) + # result = model.invoke([message]) + + # assert isinstance(result, AIMessage) + + # # Check for web search blocks + # search_call_blocks = [ + # block + # for block in result.content + # if isinstance(block, dict) and block.get("type") == "web_search_call" + # ] + # search_result_blocks = [ + # block + # for block in result.content + # if isinstance(block, dict) and block.get("type") == "web_search_result" + # ] + # # TODO: should this be one or the other or both? + # assert len(search_call_blocks) > 0 or len(search_result_blocks) > 0 + + # def test_code_interpreter_blocks(self, model: BaseChatModel) -> None: + # """Test code interpreter content blocks. + + # TODO: expand docstring + + # """ + # if not self.supports_code_interpreter: + # pytest.skip("Model does not support code interpreter blocks.") + + # message = HumanMessage("Calculate the factorial of 10 using Python code.") + # result = model.invoke([message]) + + # assert isinstance(result, AIMessage) + + # # Check for code interpreter blocks + # code_blocks = [ + # block + # for block in result.content + # if isinstance(block, dict) + # and block.get("type") + # in [ + # "code_interpreter_call", + # "code_interpreter_output", + # "code_interpreter_result", + # ] + # ] + # # TODO: should we require all three types or just an output/result? + # assert len(code_blocks) > 0 + + # def test_reasoning_content_blocks_basic(self, model: BaseChatModel) -> None: + # """Test that the model can generate ``ReasoningContentBlock``. + + # If your integration requires a reasoning parameter to be explicitly set, you + # will need to override this test to set it appropriately. + + # """ + # if not self.supports_reasoning_content_blocks: + # pytest.skip("Model does not support ReasoningContentBlock.") + + # message = HumanMessage("Think step by step: What is 2 + 2?") + # result = model.invoke([message]) + + # assert isinstance(result, AIMessage) + # if isinstance(result.content, list): + # reasoning_blocks = [ + # block + # for block in result.content + # if isinstance(block, dict) and is_reasoning_block(block) + # ] + # assert len(reasoning_blocks) > 0, ( + # "Expected reasoning content blocks but found none. " + # f"Content blocks: {[block.get('type') for block in result.content]}" + # ) + + # def test_non_standard_content_blocks_basic(self, model: BaseChatModel) -> None: + # """Test that the model can handle ``NonStandardContentBlock``.""" + # if not self.supports_non_standard_blocks: + # pytest.skip("Model does not support NonStandardContentBlock.") + + # non_standard_block = create_non_standard_block( + # { + # "custom_field": "custom_value", + # "data": [1, 2, 3], + # } + # ) + + # message = HumanMessage(content=[non_standard_block]) + + # # Should not raise an error + # result = model.invoke([message]) + # assert isinstance(result, AIMessage) diff --git a/libs/standard-tests/langchain_tests/v1/unit_tests/__init__.py b/libs/standard-tests/langchain_tests/v1/unit_tests/__init__.py new file mode 100644 index 00000000000..b7fcd93d3f0 --- /dev/null +++ b/libs/standard-tests/langchain_tests/v1/unit_tests/__init__.py @@ -0,0 +1,14 @@ +# ruff: noqa: E402 +import pytest + +# Rewrite assert statements for test suite so that implementations can +# see the full error message from failed asserts. +# https://docs.pytest.org/en/7.1.x/how-to/writing_plugins.html#assertion-rewriting +modules = ["chat_models"] + +for module in modules: + pytest.register_assert_rewrite(f"langchain_tests.unit_tests.{module}") + +from .chat_models import ChatModelUnitTests + +__all__ = ["ChatModelUnitTests"] diff --git a/libs/standard-tests/langchain_tests/unit_tests/chat_models_v1.py b/libs/standard-tests/langchain_tests/v1/unit_tests/chat_models.py similarity index 93% rename from libs/standard-tests/langchain_tests/unit_tests/chat_models_v1.py rename to libs/standard-tests/langchain_tests/v1/unit_tests/chat_models.py index 26e3afcf640..01228a8ced4 100644 --- a/libs/standard-tests/langchain_tests/unit_tests/chat_models_v1.py +++ b/libs/standard-tests/langchain_tests/v1/unit_tests/chat_models.py @@ -72,7 +72,7 @@ if PYDANTIC_MAJOR_VERSION == 2: TEST_PYDANTIC_MODELS.append(generate_schema_pydantic_v1_from_2()) -class ChatModelV1Tests(BaseStandardTests): +class ChatModelTests(BaseStandardTests): """Test suite for v1 chat models. This class provides comprehensive testing for the new message system introduced in @@ -139,15 +139,6 @@ class ChatModelV1Tests(BaseStandardTests): """Whether the model supports tool calling.""" return self.chat_model_class.bind_tools is not BaseChatModel.bind_tools - @property - def tool_choice_value(self) -> Optional[str]: - """(None or str) To use for tool choice when used in tests. - - Not required. - - """ - return None - @property def has_tool_choice(self) -> bool: """Whether the model supports forcing tool calling via ``tool_choice``.""" @@ -184,6 +175,35 @@ class ChatModelV1Tests(BaseStandardTests): """ return False + @property + def supports_image_inputs(self) -> bool: + """(bool) whether the chat model supports image inputs, defaults to ``False``.""" # noqa: E501 + return False + + @property + def supports_image_urls(self) -> bool: + """(bool) whether the chat model supports image inputs from URLs, defaults to ``False``.""" # noqa: E501 + return False + + @property + def supports_pdf_inputs(self) -> bool: + """(bool) whether the chat model supports PDF inputs, defaults to ``False``.""" + return False + + @property + def supports_audio_inputs(self) -> bool: + """(bool) whether the chat model supports audio inputs, defaults to ``False``.""" # noqa: E501 + return False + + @property + def supports_video_inputs(self) -> bool: + """(bool) whether the chat model supports video inputs, defaults to ``False``. + + No current tests are written for this feature. + + """ + return False + # Content Block Support Properties @property def supports_content_blocks_v1(self) -> bool: @@ -198,14 +218,10 @@ class ChatModelV1Tests(BaseStandardTests): support. Each defaults to False: - ``supports_reasoning_content_blocks`` - - ``supports_plaintext_content_blocks`` - - ``supports_file_content_blocks`` - ``supports_image_content_blocks`` - - ``supports_audio_content_blocks`` - ``supports_video_content_blocks`` - ``supports_citations`` - ``supports_web_search_blocks`` - - ``supports_invalid_tool_calls`` """ return True @@ -238,48 +254,6 @@ class ChatModelV1Tests(BaseStandardTests): """ return False - @property - def supports_plaintext_content_blocks(self) -> bool: - """Whether the model supports ``PlainTextContentBlock``. - - Defaults to False. - - """ - return False - - @property - def supports_file_content_blocks(self) -> bool: - """Whether the model supports ``FileContentBlock``. - - Replaces ``supports_pdf_inputs`` from v0. - - Defaults to False. - - """ - return False - - @property - def supports_image_content_blocks(self) -> bool: - """Whether the model supports ``ImageContentBlock``. - - Replaces ``supports_image_inputs`` from v0. - - Defaults to False. - - """ - return False - - @property - def supports_audio_content_blocks(self) -> bool: - """Whether the model supports ``AudioContentBlock``. - - Replaces ``supports_audio_inputs`` from v0. - - Defaults to False. - - """ - return False - @property def supports_video_content_blocks(self) -> bool: """Whether the model supports ``VideoContentBlock``. @@ -294,10 +268,7 @@ class ChatModelV1Tests(BaseStandardTests): @property def supports_multimodal_reasoning(self) -> bool: """Whether the model can reason about multimodal content.""" - return ( - self.supports_image_content_blocks - and self.supports_reasoning_content_blocks - ) + return self.supports_image_inputs and self.supports_reasoning_content_blocks @property def supports_citations(self) -> bool: @@ -308,11 +279,6 @@ class ChatModelV1Tests(BaseStandardTests): """ return False - @property - def supports_structured_citations(self) -> bool: - """Whether the model supports structured citation generation.""" - return self.supports_citations - @property def supports_web_search_blocks(self) -> bool: """Whether the model supports ``WebSearchCall``/``WebSearchResult`` blocks. @@ -331,15 +297,6 @@ class ChatModelV1Tests(BaseStandardTests): """ return False - @property - def supports_invalid_tool_calls(self) -> bool: - """Whether the model can handle ``InvalidToolCall`` blocks. - - Defaults to False. - - """ - return False - @property def returns_usage_metadata(self) -> bool: """Whether the model returns usage metadata on invoke and streaming. @@ -391,7 +348,7 @@ class ChatModelV1Tests(BaseStandardTests): return {"invoke": [], "stream": []} -class ChatModelV1UnitTests(ChatModelV1Tests): +class ChatModelUnitTests(ChatModelTests): """Base class for chat model v1 unit tests. These tests run in isolation without external dependencies. @@ -406,11 +363,11 @@ class ChatModelV1UnitTests(ChatModelV1Tests): from typing import Type - from langchain_tests.unit_tests import ChatModelV1UnitTests - from my_package.chat_models import MyChatModel + from langchain_tests.v1.unit_tests import ChatModelUnitTests + from my_package.v1.chat_models import MyChatModel - class TestMyChatModelUnit(ChatModelV1UnitTests): + class TestMyChatModelUnit(ChatModelUnitTests): @property def chat_model_class(self) -> Type[MyChatModel]: # Return the chat model class to test here diff --git a/libs/standard-tests/tests/unit_tests/test_custom_chat_model_v1.py b/libs/standard-tests/tests/unit_tests/test_custom_chat_model_v1.py index 7b6dc556a06..eb49b6729f0 100644 --- a/libs/standard-tests/tests/unit_tests/test_custom_chat_model_v1.py +++ b/libs/standard-tests/tests/unit_tests/test_custom_chat_model_v1.py @@ -2,12 +2,12 @@ import pytest -from langchain_tests.unit_tests.chat_models_v1 import ChatModelV1UnitTests +from langchain_tests.v1.unit_tests.chat_models import ChatModelUnitTests from .custom_chat_model_v1 import ChatParrotLinkV1 -class TestChatParrotLinkV1Unit(ChatModelV1UnitTests): +class TestChatParrotLinkV1Unit(ChatModelUnitTests): """Unit tests for ``ChatParrotLinkV1`` using the standard v1 test suite.""" @property @@ -66,16 +66,6 @@ class TestChatParrotLinkV1Unit(ChatModelV1UnitTests): """``ChatParrotLinkV1`` does not generate ``ReasoningContentBlock``.""" return False - @property - def supports_plaintext_content_blocks(self) -> bool: - """``ChatParrotLinkV1`` does not support ``PlainTextContentBlock``.""" - return False - - @property - def supports_file_content_blocks(self) -> bool: - """``ChatParrotLinkV1`` does not support ``FileContentBlock``.""" - return False - @property def supports_image_content_blocks(self) -> bool: """``ChatParrotLinkV1`` does not support ``ImageContentBlock``.""" @@ -100,18 +90,3 @@ class TestChatParrotLinkV1Unit(ChatModelV1UnitTests): def supports_web_search_blocks(self) -> bool: """``ChatParrotLinkV1`` does not support web search blocks.""" return False - - @property - def supports_tool_calls(self) -> bool: - """``ChatParrotLinkV1`` does not support tool calls.""" - return False - - @property - def supports_invalid_tool_calls(self) -> bool: - """``ChatParrotLinkV1`` does not support ``InvalidToolCall`` handling.""" - return False - - @property - def supports_tool_call_chunks(self) -> bool: - """``ChatParrotLinkV1`` does not support ``ToolCallChunk`` blocks.""" - return False