diff --git a/libs/standard-tests/QUICK_START.md b/libs/standard-tests/QUICK_START.md new file mode 100644 index 00000000000..b563c8d1eca --- /dev/null +++ b/libs/standard-tests/QUICK_START.md @@ -0,0 +1,451 @@ +# Standard Tests V1 - Quick Start Guide + +This guide shows you how to quickly get started with the new content blocks v1 test suite. + +## 🚀 Quick Usage + +### 1. Basic Setup + +New imports: + +```python +# v0 +from langchain_tests.unit_tests.chat_models import ChatModelUnitTests + +# v1 +from langchain_tests.unit_tests.chat_models_v1 import ChatModelV1UnitTests +``` + +### 2. Minimal Configuration + +```python +class TestMyChatModelV1(ChatModelV1UnitTests): + @property + def chat_model_class(self): + return MyChatModelV1 + + # Enable content blocks support + @property + def supports_content_blocks_v1(self): + return True + + # The rest should be the same + @property + def chat_model_params(self): + return {"api_key": "test-key"} +``` + +### 3. Run Tests + +```bash +uv run --group test pytest tests/unit_tests/test_my_model_v1.py -v +``` + +## ⚙️ Feature Configuration + +Like before, only enable the features your model supports: + +```python +class TestAdvancedModelV1(ChatModelV1UnitTests): + # REQUIRED + @property + def supports_content_blocks_v1(self): + return True + + # Multimodal features + @property + def supports_image_content_blocks(self): + return True # ✅ Enable if supported + + @property + def supports_video_content_blocks(self): + return False # ❌ Disable if not supported, but will default to False if not explicitly set + + # Advanced features + @property + def supports_reasoning_content_blocks(self): + """Model generates reasoning steps""" + return True + + @property + def supports_citations(self): + """Model provides source citations""" + return True + + @property + def supports_enhanced_tool_calls(self): + """Enhanced tool calling with metadata""" + return True +``` + +## 📋 Feature Reference + +| Property | Description | Default | +|----------|-------------|---------| +| `supports_content_blocks_v1` | Core content blocks support | `True` | +| `supports_text_content_blocks` | Basic text blocks | `True` | +| `supports_image_content_blocks` | Image content blocks (v1) | `False` | +| `supports_video_content_blocks` | Video content blocks (v1) | `False` | +| `supports_audio_content_blocks` | Audio content blocks (v1) | `False` | +| `supports_file_content_blocks` | File content blocks | `False` | +| `supports_reasoning_content_blocks` | Reasoning/thinking blocks | `False` | +| `supports_citations` | Citation annotations | `False` | +| `supports_web_search_blocks` | Web search integration | `False` | +| `supports_enhanced_tool_calls` | Enhanced tool calling | `False` | +| `supports_non_standard_blocks` | Custom content blocks | `True` | + +**Note:** These defaults are provided by the base test class. You only need to override properties where your model's capabilities differ from the default. + +## 🔧 Common Patterns + +### For Text-Only Models + +```python +@property +def supports_content_blocks_v1(self): + return True + +# All multimodal features inherit False defaults from base class +# No need to override them unless your model supports them +``` + +### For Multimodal Models + +Set the v1 content block features that your model supports: + +- `supports_image_content_blocks` +- `supports_video_content_blocks` +- `supports_audio_content_blocks` + +### For Advanced AI Models + +Set the features that your model supports, including reasoning and citations: + +- `supports_reasoning_content_blocks` +- `supports_citations` +- `supports_web_search_blocks` + +## 🚨 Troubleshooting + +### Tests Failing? + +1. **Check feature flags** - Only enable what your model actually supports +2. **Verify API keys** - Integration tests may need credentials +3. **Check model parameters** - Make sure initialization params are correct + +### Tests Skipping? + +This is normal! Tests skip automatically when features aren't supported. Only tests for enabled features will run. + +## 🏃‍♂️ Migration Checklist + +- [ ] Update test base class imports +- [ ] Add `supports_content_blocks_v1 = True` +- [ ] Configure feature flags based on model capabilities +- [ ] Run tests to verify configuration +- [ ] Adjust any failing/skipping tests as needed + +## 📚 Next Steps + +- Read `README_V1.md` for complete feature documentation +- Look at `tests/unit_tests/test_chat_models_v1.py` for working examples + +# Example Files + +## Unit Tests + +```python +"""Example test implementation using ``ChatModelV1UnitTests``. + +This file demonstrates how to use the new content blocks v1 test suite +for testing chat models that support the enhanced content blocks system. +""" + +from typing import Any + +from langchain_core.language_models.v1.chat_models import BaseChatModelV1 +from langchain_core.language_models import GenericFakeChatModel +from langchain_core.messages import BaseMessage +from langchain_core.messages.content_blocks import TextContentBlock + +from langchain_tests.unit_tests.chat_models_v1 import ChatModelV1UnitTests + + +class FakeChatModelV1(GenericFakeChatModel): + """Fake chat model that supports content blocks v1 format. + + This is a test implementation that demonstrates content blocks support. + """ + + def _call(self, messages: Any, stop: Any = None, **kwargs: Any) -> BaseMessage: + """Override to handle content blocks format.""" + # Process messages and handle content blocks + response = super()._call(messages, stop, **kwargs) + + # Convert response to content blocks format if needed + if isinstance(response.content, str): + # Convert string response to TextContentBlock format + from langchain_core.messages import AIMessage + + text_block: TextContentBlock = {"type": "text", "text": response.content} + return AIMessage(content=[text_block]) + + return response + + +class TestFakeChatModelV1(ChatModelV1UnitTests): + """Test implementation using the new content blocks v1 test suite.""" + + @property + def chat_model_class(self) -> type[BaseChatModelV1]: + """Return the fake chat model class for testing.""" + return FakeChatModelV1 + + @property + def chat_model_params(self) -> dict[str, Any]: + """Parameters for initializing the fake chat model.""" + return { + "messages": iter( + [ + "This is a test response with content blocks support.", + "Another test response for validation.", + "Final test response for comprehensive testing.", + ] + ) + } + + # Content blocks v1 support configuration + @property + def supports_content_blocks_v1(self) -> bool: + """This fake model supports content blocks v1.""" + return True + + @property + def supports_text_content_blocks(self) -> bool: + """This fake model supports TextContentBlock.""" + return True + + @property + def supports_reasoning_content_blocks(self) -> bool: + """This fake model does not support ReasoningContentBlock.""" + return False + + @property + def supports_citations(self) -> bool: + """This fake model does not support citations.""" + return False + + @property + def supports_enhanced_tool_calls(self) -> bool: + """This fake model supports enhanced tool calls.""" + return True + + @property + def has_tool_calling(self) -> bool: + """Enable tool calling tests.""" + return True + + @property + def supports_image_content_blocks(self) -> bool: + """This fake model does not support image content blocks.""" + return False + + @property + def supports_non_standard_blocks(self) -> bool: + """This fake model supports non-standard blocks.""" + return True +``` + +## Integration Tests + +```python +"""Example integration test implementation using ChatModelV1IntegrationTests. + +This file demonstrates how to use the new content blocks v1 integration test suite +for testing real chat models that support the enhanced content blocks system. + +Note: This is a template/example. Real implementations should replace +FakeChatModelV1 with actual chat model classes. +""" + +import os +from typing import Any + +import pytest +from langchain_core.language_models import BaseChatModel, GenericFakeChatModel + +from langchain_tests.integration_tests.chat_models_v1 import ChatModelV1IntegrationTests + + +# Example fake model for demonstration (replace with real model in practice) +class FakeChatModelV1Integration(GenericFakeChatModel): + """Fake chat model for integration testing demonstration.""" + + @property + def _llm_type(self) -> str: + return "fake_chat_model_v1_integration" + + +class TestFakeChatModelV1Integration(ChatModelV1IntegrationTests): + """Example integration test using content blocks v1 test suite. + + In practice, this would test a real chat model that supports content blocks. + Replace FakeChatModelV1Integration with your actual chat model class. + """ + + @property + def chat_model_class(self) -> type[BaseChatModel]: + """Return the chat model class to test.""" + return FakeChatModelV1Integration + + @property + def chat_model_params(self) -> dict[str, Any]: + """Parameters for initializing the chat model.""" + return { + "messages": iter( + [ + "Integration test response with content blocks.", + "Multimodal content analysis response.", + "Tool calling response with structured output.", + "Citation-enhanced response with sources.", + "Web search integration response.", + ] + ) + } + + # Content blocks v1 support configuration + @property + def supports_content_blocks_v1(self) -> bool: + """Enable content blocks v1 testing.""" + return True + + @property + def supports_text_content_blocks(self) -> bool: + """Enable TextContentBlock testing.""" + return True + + @property + def supports_reasoning_content_blocks(self) -> bool: + """Disable reasoning blocks for this fake model.""" + return False + + @property + def supports_citations(self) -> bool: + """Disable citations for this fake model.""" + return False + + @property + def supports_web_search_blocks(self) -> bool: + """Disable web search for this fake model.""" + return False + + @property + def supports_enhanced_tool_calls(self) -> bool: + """Enable enhanced tool calling tests.""" + return True + + @property + def has_tool_calling(self) -> bool: + """Enable tool calling tests.""" + return True + + @property + def supports_image_inputs(self) -> bool: + """Disable image inputs for this fake model.""" + return False + + @property + def supports_video_inputs(self) -> bool: + """Disable video inputs for this fake model.""" + return False + + @property + def supports_audio_inputs(self) -> bool: + """Disable audio inputs for this fake model.""" + return False + + @property + def supports_file_content_blocks(self) -> bool: + """Disable file content blocks for this fake model.""" + return False + + @property + def supports_non_standard_blocks(self) -> bool: + """Enable non-standard blocks support.""" + return True + + @property + def requires_api_key(self) -> bool: + """This fake model doesn't require an API key.""" + return False + + +# Example of a more realistic integration test configuration +# that would require API keys and external services +class TestRealChatModelV1IntegrationTemplate(ChatModelV1IntegrationTests): + """Template for testing real chat models with content blocks v1. + + This class shows how you would configure tests for a real model + that requires API keys and supports various content block features. + """ + + @pytest.fixture(scope="class", autouse=True) + def check_api_key(self) -> None: + """Check that required API key is available.""" + if not os.getenv("YOUR_MODEL_API_KEY"): + pytest.skip("YOUR_MODEL_API_KEY not set, skipping integration tests") + + @property + def chat_model_class(self) -> type[BaseChatModel]: + """Return your actual chat model class.""" + # Replace with your actual model, e.g.: + # from your_package import YourChatModel + # return YourChatModel + return FakeChatModelV1Integration # Placeholder + + @property + def chat_model_params(self) -> dict[str, Any]: + """Parameters for your actual chat model.""" + return { + # "api_key": os.getenv("YOUR_MODEL_API_KEY"), + # "model": "your-model-name", + # "temperature": 0.1, + # Add your model's specific parameters + } + + # Configure which features your model supports + @property + def supports_content_blocks_v1(self) -> bool: + return True # Set based on your model's capabilities + + @property + def supports_image_inputs(self) -> bool: + return True # Set based on your model's capabilities + + @property + def supports_reasoning_content_blocks(self) -> bool: + return True # Set based on your model's capabilities + + @property + def supports_citations(self) -> bool: + return True # Set based on your model's capabilities + + @property + def supports_web_search_blocks(self) -> bool: + return False # Set based on your model's capabilities + + @property + def supports_enhanced_tool_calls(self) -> bool: + return True # Set based on your model's capabilities + + @property + def has_tool_calling(self) -> bool: + return True # Set based on your model's capabilities + + # Add any model-specific test overrides or skips + @pytest.mark.skip(reason="Template class - not for actual testing") + def test_all_inherited_tests(self) -> None: + """This template class should not run actual tests.""" + pass + +``` diff --git a/libs/standard-tests/README.md b/libs/standard-tests/README.md index 77f6780062a..85fcc4f4179 100644 --- a/libs/standard-tests/README.md +++ b/libs/standard-tests/README.md @@ -80,3 +80,11 @@ as required is optional. - `chat_model_params`: The keyword arguments to pass to the chat model constructor - `chat_model_has_tool_calling`: Whether the chat model can call tools. By default, this is set to `hasattr(chat_model_class, 'bind_tools)` - `chat_model_has_structured_output`: Whether the chat model can structured output. By default, this is set to `hasattr(chat_model_class, 'with_structured_output')` + +## Content Blocks V1 Support + +For chat models that support the new content blocks v1 format (multimodal content, reasoning blocks, citations, etc.), use the v1 test suite instead: + +- See `QUICK_START.md` and `README_V1.md` for v1 testing documentation +- Use `ChatModelV1UnitTests` from `langchain_tests.unit_tests.chat_models_v1` +- V1 tests support `BaseChatModelV1` models with enhanced content block features diff --git a/libs/standard-tests/README_V1.md b/libs/standard-tests/README_V1.md new file mode 100644 index 00000000000..615b5c94703 --- /dev/null +++ b/libs/standard-tests/README_V1.md @@ -0,0 +1,179 @@ +# Standard Tests V1 - Content Blocks Support + +## Overview + +The standard tests v1 package provides comprehensive testing for chat models that support the new content blocks format. This includes: + +- **Streaming support**: Content blocks in streaming responses +- **Multimodal content**: Text, images, video, audio, and file content blocks +- **Reasoning content**: Structured reasoning steps as content blocks +- **Enhanced tool calling**: Tool calls as content blocks with richer metadata +- **Structured annotations**: Citations, reasoning blocks, and custom annotations +- **Provider-specific extensions**: Non-standard content blocks for custom functionality + +## Usage + +### Basic Unit Tests + +```python +from langchain_tests.unit_tests.chat_models_v1 import ChatModelV1UnitTests +from your_package import YourChatModel + +class TestYourChatModelV1(ChatModelV1UnitTests): + @property + def chat_model_class(self): + return YourChatModel + + @property + def chat_model_params(self): + return {"api_key": "test-key", "model": "your-model"} + + # Configure supported features + @property + def supports_content_blocks_v1(self): + return True + + @property + def supports_image_content_blocks(self): + return True + + @property + def supports_reasoning_content_blocks(self): + return True +``` + +### Integration Tests + +```python +from langchain_tests.integration_tests.chat_models_v1 import ChatModelV1IntegrationTests +from your_package import YourChatModel + +class TestYourChatModelV1Integration(ChatModelV1IntegrationTests): + @property + def chat_model_class(self): + return YourChatModel + + @property + def chat_model_params(self): + return { + "api_key": os.getenv("YOUR_API_KEY"), + "model": "your-model-name" + } + + # Configure which features to test + @property + def supports_citations(self): + return True + + @property + def supports_web_search_blocks(self): + return False # If your model doesn't support this +``` + +## Configuration Properties + +### Core Content Blocks Support + +- `supports_content_blocks_v1`: Enable content blocks v1 testing **(required)** +- `supports_text_content_blocks`: `TextContentBlock` support - very unlikely this will be set to `False` +- `supports_reasoning_content_blocks`: `ReasoningContentBlock` support, e.g. for reasoning models + +### Multimodal Support + +- `supports_image_content_blocks`: `ImageContentBlock`s (v1 format) +- `supports_video_content_blocks`: `VideoContentBlock`s (v1 format) +- `supports_audio_content_blocks`: `AudioContentBlock`s (v1 format) +- `supports_plaintext_content_blocks`: `PlainTextContentBlock`s (plaintext from documents) +- `supports_file_content_blocks`: `FileContentBlock`s + +### Tool Calling + +- `supports_enhanced_tool_calls`: Enhanced tool calling with content blocks +- `supports_invalid_tool_calls`: Error handling for invalid tool calls +- `supports_tool_call_chunks`: Streaming tool call support + +### Advanced Features + +- `supports_citations`: Citation annotations +- `supports_web_search_blocks`: Built-in web search +- `supports_code_interpreter`: Code execution blocks +- `supports_non_standard_blocks`: Custom content blocks + +## Test Categories + +### Unit Tests (`ChatModelV1UnitTests`) + +- Content block format validation +- Ser/deserialization +- Multimodal content handling +- Tool calling with content blocks +- Error handling for invalid blocks +- Backward compatibility with string content + +### Integration Tests (`ChatModelV1IntegrationTests`) + +- Real multimodal content processing +- Advanced reasoning with content blocks +- Citation generation with external sources +- Web search integration +- File processing and analysis +- Performance benchmarking +- Streaming content blocks +- Asynchronous processing + +## Migration from Standard Tests + +### For Test Authors + +1. **Inherit from new base classes**: + + ```python + # v0 + from langchain_tests.unit_tests.chat_models import ChatModelUnitTests + + # v1 + from langchain_tests.unit_tests.chat_models_v1 import ChatModelV1UnitTests + ``` + +2. **Configure content blocks support**: + + ```python + @property + def supports_content_blocks_v1(self): + return True # Enable v1 features + ``` + +3. **Set feature flags** based on your model's capabilities + +## Backward Compatibility + +The v1 tests maintain full backward compatibility: + +- Legacy string content is still tested +- Mixed message formats (legacy + content blocks) are validated +- All original test functionality is preserved +- Models can gradually adopt content blocks features + +## Examples + +See the test files in `tests/unit_tests/test_chat_models_v1.py` and `tests/integration_tests/test_chat_models_v1.py` for complete examples of how to implement tests for your chat model. + +## Best Practices + +1. **Start with basic content blocks** (text) and gradually enable advanced features +2. **Use feature flags** to selectively enable tests based on your model's capabilities +3. **Test error handling** for unsupported content block types +4. **Validate serialization** to persist message histories (passing back in content blocks) +5. **Benchmark performance** with content blocks vs. legacy format +6. **Test streaming** if your model supports it with content blocks + +## Contributing + +When new content block types or features are added: + +1. Add the content block type to the imports +2. Create test helper methods for the new type +3. Add configuration properties for the feature +4. Implement corresponding test methods +5. Update this documentation +6. Add examples in the test files diff --git a/libs/standard-tests/langchain_tests/__init__.py b/libs/standard-tests/langchain_tests/__init__.py index b03553e9cd7..3677a77a697 100644 --- a/libs/standard-tests/langchain_tests/__init__.py +++ b/libs/standard-tests/langchain_tests/__init__.py @@ -3,4 +3,7 @@ To learn how to use these classes, see the `integration standard testing `__ guide. + +This package provides both the original test suites and the v1 test suites that support +the new content blocks system introduced in ``langchain_core.messages.content_blocks``. """ diff --git a/libs/standard-tests/langchain_tests/unit_tests/chat_models_v1.py b/libs/standard-tests/langchain_tests/unit_tests/chat_models_v1.py new file mode 100644 index 00000000000..47209bd7539 --- /dev/null +++ b/libs/standard-tests/langchain_tests/unit_tests/chat_models_v1.py @@ -0,0 +1,551 @@ +""":autodoc-options: autoproperty. + +Standard unit tests for chat models supporting v1 messages. + +This module provides updated test patterns for the new messages introduced in +``langchain_core.messages.content_blocks``. Notably, this includes the standardized +content blocks system. +""" + +from typing import cast + +import pytest +from langchain_core.language_models.v1.chat_models import BaseChatModelV1 +from langchain_core.load import dumpd, load +from langchain_core.messages.content_blocks import ( + ContentBlock, + InvalidToolCall, + TextContentBlock, + create_file_block, + create_image_block, + create_non_standard_block, + create_text_block, +) +from langchain_core.messages.v1 import AIMessage, HumanMessage +from langchain_core.tools import tool + +from langchain_tests.base import BaseStandardTests + + +class ChatModelV1Tests(BaseStandardTests): + """Test suite for v1 chat models. + + This class provides comprehensive testing for the new message system introduced in + LangChain v1, including the standardized content block format. + + :private: + """ + + # Core Model Properties - these should be implemented by subclasses + @property + def has_tool_calling(self) -> bool: + """Whether the model supports tool calling.""" + return False + + @property + def has_structured_output(self) -> bool: + """Whether the model supports structured output.""" + return False + + @property + def supports_json_mode(self) -> bool: + """Whether the model supports JSON mode.""" + return False + + # Content Block Support Properties + @property + def supports_content_blocks_v1(self) -> bool: + """Whether the model supports content blocks v1 format.""" + return True + + @property + def supports_non_standard_blocks(self) -> bool: + """Whether the model supports ``NonStandardContentBlock``.""" + return True + + @property + def supports_text_content_blocks(self) -> bool: + """Whether the model supports ``TextContentBlock``.""" + return self.supports_content_blocks_v1 + + @property + def supports_reasoning_content_blocks(self) -> bool: + """Whether the model supports ``ReasoningContentBlock``.""" + return False + + @property + def supports_plaintext_content_blocks(self) -> bool: + """Whether the model supports ``PlainTextContentBlock``.""" + return False + + @property + def supports_file_content_blocks(self) -> bool: + """Whether the model supports ``FileContentBlock``.""" + return False + + @property + def supports_image_content_blocks(self) -> bool: + """Whether the model supports ``ImageContentBlock``.""" + return False + + @property + def supports_audio_content_blocks(self) -> bool: + """Whether the model supports ``AudioContentBlock``.""" + return False + + @property + def supports_video_content_blocks(self) -> bool: + """Whether the model supports ``VideoContentBlock``.""" + return False + + @property + def supports_citations(self) -> bool: + """Whether the model supports ``Citation`` annotations.""" + return False + + @property + def supports_web_search_blocks(self) -> bool: + """Whether the model supports ``WebSearchCall``/``WebSearchResult`` blocks.""" + return False + + @property + def supports_enhanced_tool_calls(self) -> bool: + """Whether the model supports ``ToolCall`` format with content blocks.""" + return self.has_tool_calling and self.supports_content_blocks_v1 + + @property + def supports_invalid_tool_calls(self) -> bool: + """Whether the model can handle ``InvalidToolCall`` blocks.""" + return False + + @property + def supports_tool_call_chunks(self) -> bool: + """Whether the model supports streaming ``ToolCallChunk`` blocks.""" + return self.supports_enhanced_tool_calls + + +class ChatModelV1UnitTests(ChatModelV1Tests): + """Unit tests for chat models with content blocks v1 support. + + These tests run in isolation without external dependencies. + """ + + # Core Method Tests + def test_invoke_basic(self, model: BaseChatModelV1) -> None: + """Test basic invoke functionality with simple string input.""" + result = model.invoke("Hello, world!") + assert isinstance(result, AIMessage) + assert result.content is not None + + def test_invoke_with_message_list(self, model: BaseChatModelV1) -> None: + """Test invoke with list of messages.""" + messages = [HumanMessage("Hello, world!")] + result = model.invoke(messages) + assert isinstance(result, AIMessage) + assert result.content is not None + + async def test_ainvoke_basic(self, model: BaseChatModelV1) -> None: + """Test basic async invoke functionality.""" + result = await model.ainvoke("Hello, world!") + assert isinstance(result, AIMessage) + assert result.content is not None + + def test_stream_basic(self, model: BaseChatModelV1) -> None: + """Test basic streaming functionality.""" + chunks = [] + for chunk in model.stream("Hello, world!"): + chunks.append(chunk) + assert hasattr(chunk, "content") + + assert len(chunks) > 0 + # Verify chunks can be aggregated + if chunks: + final_message = chunks[0] + for chunk in chunks[1:]: + final_message = final_message + chunk + assert isinstance(final_message.content, (str, list)) + + async def test_astream_basic(self, model: BaseChatModelV1) -> None: + """Test basic async streaming functionality.""" + chunks = [] + async for chunk in model.astream("Hello, world!"): + chunks.append(chunk) + assert hasattr(chunk, "content") + + assert len(chunks) > 0 + # Verify chunks can be aggregated + if chunks: + final_message = chunks[0] + for chunk in chunks[1:]: + final_message = final_message + chunk + assert isinstance(final_message.content, (str, list)) + + # Property Tests + def test_llm_type_property(self, model: BaseChatModelV1) -> None: + """Test that ``_llm_type`` property is implemented and returns a string.""" + llm_type = model._llm_type + assert isinstance(llm_type, str) + assert len(llm_type) > 0 + + def test_identifying_params_property(self, model: BaseChatModelV1) -> None: + """Test that ``_identifying_params`` property returns a mapping.""" + params = model._identifying_params + assert isinstance(params, dict) # Should be dict-like mapping + + # Token Counting Tests + def test_get_token_ids(self, model: BaseChatModelV1) -> None: + """Test that ``get_token_ids`` returns a list of integers.""" + text = "Hello, world!" + token_ids = model.get_token_ids(text) + assert isinstance(token_ids, list) + assert all(isinstance(token_id, int) for token_id in token_ids) + assert len(token_ids) > 0 + + def test_get_num_tokens(self, model: BaseChatModelV1) -> None: + """Test that ``get_num_tokens`` returns a positive integer.""" + text = "Hello, world!" + num_tokens = model.get_num_tokens(text) + assert isinstance(num_tokens, int) + assert num_tokens > 0 + + def test_get_num_tokens_from_messages(self, model: BaseChatModelV1) -> None: + """Test that ``get_num_tokens_from_messages`` returns a positive integer.""" + messages = [HumanMessage("Hello, world!")] + num_tokens = model.get_num_tokens_from_messages(messages) # type: ignore[arg-type] + assert isinstance(num_tokens, int) + assert num_tokens > 0 + + def test_token_counting_consistency(self, model: BaseChatModelV1) -> None: + """Test that token counting methods are consistent with each other.""" + text = "Hello, world!" + token_ids = model.get_token_ids(text) + num_tokens = model.get_num_tokens(text) + + # Number of tokens should match length of token IDs list + assert len(token_ids) == num_tokens + + # Serialization Tests + def test_dump_serialization(self, model: BaseChatModelV1) -> None: + """Test that ``dump()`` returns proper serialization.""" + dumped = model.dump() + assert isinstance(dumped, dict) + assert "_type" in dumped + assert dumped["_type"] == model._llm_type + + # Should contain identifying parameters + for key, value in model._identifying_params.items(): + assert key in dumped + assert dumped[key] == value + + # Input Conversion Tests + def test_input_conversion_string(self, model: BaseChatModelV1) -> None: + """Test that string input is properly converted to messages.""" + # This test verifies the _convert_input method works correctly + result = model.invoke("Test string input") + assert isinstance(result, AIMessage) + assert result.content is not None + + def test_input_conversion_empty_string(self, model: BaseChatModelV1) -> None: + """Test that empty string input is handled gracefully.""" + result = model.invoke("") + assert isinstance(result, AIMessage) + # Content might be empty or some default response + + def test_input_conversion_message_v1_list(self, model: BaseChatModelV1) -> None: + """Test that v1 message list input is handled correctly.""" + messages = [HumanMessage("Test message")] + result = model.invoke(messages) + assert isinstance(result, AIMessage) + assert result.content is not None + + # Batch Processing Tests + def test_batch_basic(self, model: BaseChatModelV1) -> None: + """Test basic batch processing functionality.""" + inputs = ["Hello", "How are you?", "Goodbye"] + results = model.batch(inputs) # type: ignore[arg-type] + + assert isinstance(results, list) + assert len(results) == len(inputs) + for result in results: + assert isinstance(result, AIMessage) + assert result.content is not None + + async def test_abatch_basic(self, model: BaseChatModelV1) -> None: + """Test basic async batch processing functionality.""" + inputs = ["Hello", "How are you?", "Goodbye"] + results = await model.abatch(inputs) # type: ignore[arg-type] + + assert isinstance(results, list) + assert len(results) == len(inputs) + for result in results: + assert isinstance(result, AIMessage) + assert result.content is not None + + # Content Block Tests + def test_text_content_blocks(self, model: BaseChatModelV1) -> None: + """Test that the model can handle the ``TextContentBlock`` format. + + This test verifies that the model correctly processes messages containing + ``TextContentBlock`` objects instead of plain strings. + """ + if not self.supports_text_content_blocks: + pytest.skip("Model does not support TextContentBlock.") + + text_block = create_text_block("Hello, world!") + message = HumanMessage(content=[text_block]) + + result = model.invoke([message]) + assert isinstance(result, AIMessage) + assert result.content is not None + + def test_mixed_content_blocks(self, model: BaseChatModelV1) -> None: + """Test that the model can handle messages with mixed content blocks.""" + if not ( + self.supports_text_content_blocks and self.supports_image_content_blocks + ): + pytest.skip("Model does not support mixed content blocks.") + + content_blocks: list[ContentBlock] = [ + create_text_block("Describe this image:"), + create_image_block( + base64="iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg==", + mime_type="image/png", + ), + ] + + message = HumanMessage(content=content_blocks) + result = model.invoke([message]) + + assert isinstance(result, AIMessage) + assert result.content is not None + + def test_reasoning_content_blocks(self, model: BaseChatModelV1) -> None: + """Test that the model can generate ``ReasoningContentBlock``.""" + if not self.supports_reasoning_content_blocks: + pytest.skip("Model does not support ReasoningContentBlock.") + + message = HumanMessage("Think step by step: What is 2 + 2?") + result = model.invoke([message]) + + assert isinstance(result, AIMessage) + if isinstance(result.content, list): + reasoning_blocks = [ + block + for block in result.content + if isinstance(block, dict) and block.get("type") == "reasoning" + ] + assert len(reasoning_blocks) > 0 + + def test_citations_in_response(self, model: BaseChatModelV1) -> None: + """Test that the model can generate ``Citations`` in text blocks.""" + if not self.supports_citations: + pytest.skip("Model does not support citations.") + + message = HumanMessage("Provide information about Python with citations.") + result = model.invoke([message]) + + assert isinstance(result, AIMessage) + if isinstance(result.content, list): + content_list = result.content + text_blocks_with_citations: list[TextContentBlock] = [] + for block in content_list: + if ( + isinstance(block, dict) + and block.get("type") == "text" + and "annotations" in block + and isinstance(block.get("annotations"), list) + and len(cast(list, block.get("annotations", []))) > 0 + ): + text_block = cast(TextContentBlock, block) + text_blocks_with_citations.append(text_block) + assert len(text_blocks_with_citations) > 0 + + # Verify that at least one known citation type is present + has_citation = any( + "citation" in annotation.get("type", "") + for block in text_blocks_with_citations + for annotation in block.get("annotations", []) + ) or any( + "non_standard_annotation" in annotation.get("type", "") + for block in text_blocks_with_citations + for annotation in block.get("annotations", []) + ) + assert has_citation, "No citations found in text blocks." + + def test_non_standard_content_blocks(self, model: BaseChatModelV1) -> None: + """Test that the model can handle ``NonStandardContentBlock``.""" + if not self.supports_non_standard_blocks: + pytest.skip("Model does not support NonStandardContentBlock.") + + non_standard_block = create_non_standard_block( + { + "custom_field": "custom_value", + "data": [1, 2, 3], + } + ) + + message = HumanMessage(content=[non_standard_block]) + + # Should not raise an error + result = model.invoke([message]) + assert isinstance(result, AIMessage) + + def test_enhanced_tool_calls_with_content_blocks( + self, model: BaseChatModelV1 + ) -> None: + """Test enhanced tool calling with content blocks format.""" + if not self.supports_enhanced_tool_calls: + pytest.skip("Model does not support enhanced tool calls.") + + @tool + def sample_tool(query: str) -> str: + """A sample tool for testing.""" + return f"Result for: {query}" + + model_with_tools = model.bind_tools([sample_tool]) + message = HumanMessage("Use the sample tool with query 'test'") + + result = model_with_tools.invoke([message]) + assert isinstance(result, AIMessage) + + # Check if tool calls are in content blocks format + if isinstance(result.content, list): + tool_call_blocks = [ + block + for block in result.content + if isinstance(block, dict) and block.get("type") == "tool_call" + ] + assert len(tool_call_blocks) > 0 + # Backwards compat? + # else: + # # Fallback to legacy tool_calls attribute + # assert hasattr(result, "tool_calls") and result.tool_calls + + def test_invalid_tool_call_handling(self, model: BaseChatModelV1) -> None: + """Test that the model can handle ``InvalidToolCall`` blocks gracefully.""" + if not self.supports_invalid_tool_calls: + pytest.skip("Model does not support InvalidToolCall handling.") + + invalid_tool_call: InvalidToolCall = { + "type": "invalid_tool_call", + "name": "nonexistent_tool", + "args": None, + "id": "invalid_123", + "error": "Tool not found", + } + + # Create a message with invalid tool call in history + ai_message = AIMessage(content=[invalid_tool_call]) + follow_up = HumanMessage("Please try again with a valid approach.") + + result = model.invoke([ai_message, follow_up]) + assert isinstance(result, AIMessage) + assert result.content is not None + # TODO: enhance/double check this + + def test_web_search_content_blocks(self, model: BaseChatModelV1) -> None: + """Test generating ``WebSearchCall``/``WebSearchResult`` blocks.""" + if not self.supports_web_search_blocks: + pytest.skip("Model does not support web search blocks.") + + message = HumanMessage("Search for recent news about AI developments.") + result = model.invoke([message]) + + assert isinstance(result, AIMessage) + if isinstance(result.content, list): + search_blocks = [ + block + for block in result.content + if isinstance(block, dict) + and block.get("type") in ["web_search_call", "web_search_result"] + ] + assert len(search_blocks) > 0 + + def test_file_content_blocks(self, model: BaseChatModelV1) -> None: + """Test that the model can handle ``FileContentBlock``.""" + if not self.supports_file_content_blocks: + pytest.skip("Model does not support FileContentBlock.") + + file_block = create_file_block( + base64="SGVsbG8sIHdvcmxkIQ==", # "Hello, world!" + mime_type="text/plain", + ) + + message = HumanMessage(content=[file_block]) + result = model.invoke([message]) + + assert isinstance(result, AIMessage) + assert result.content is not None + # TODO: make more robust? + + def test_content_block_streaming(self, model: BaseChatModelV1) -> None: + """Test that content blocks work correctly with streaming.""" + if not self.supports_content_blocks_v1: + pytest.skip("Model does not support content blocks v1.") + + text_block = create_text_block("Generate a short story.") + message = HumanMessage(content=[text_block]) + + chunks = [] + for chunk in model.stream([message]): + chunks.append(chunk) + assert hasattr(chunk, "content") + + assert len(chunks) > 0 + + # Verify final aggregated message + final_message = chunks[0] + for chunk in chunks[1:]: + final_message = final_message + chunk + + assert isinstance(final_message.content, (str, list)) + + def test_content_block_serialization(self, model: BaseChatModelV1) -> None: + """Test that messages with content blocks can be serialized/deserialized.""" + if not self.supports_content_blocks_v1: + pytest.skip("Model does not support content blocks v1.") + + text_block = create_text_block("Test serialization") + message = HumanMessage(content=[text_block]) + + # Test serialization + serialized = dumpd(message) + assert isinstance(serialized, dict) + + # Test deserialization + deserialized = load(serialized) + assert isinstance(deserialized, HumanMessage) + assert deserialized.content == message.content + # TODO: make more robust + + def test_backwards_compatibility(self, model: BaseChatModelV1) -> None: + """Test that models still work with legacy string content.""" + # This should work regardless of content blocks support + legacy_message = HumanMessage("Hello, world!") + result = model.invoke([legacy_message]) + + assert isinstance(result, AIMessage) + assert result.content is not None + + legacy_message_named_param = HumanMessage(content="Hello, world!") + result_named_param = model.invoke([legacy_message_named_param]) + + assert isinstance(result_named_param, AIMessage) + assert result_named_param.content is not None + + def test_content_block_validation(self, model: BaseChatModelV1) -> None: + """Test that invalid content blocks are handled gracefully.""" + if not self.supports_content_blocks_v1: + pytest.skip("Model does not support content blocks v1.") + + # Test with invalid content block structure + invalid_block = {"type": "invalid_type", "invalid_field": "value"} + message = HumanMessage(content=[invalid_block]) # type: ignore[list-item] + + # Should handle gracefully (either convert to NonStandardContentBlock or reject) + try: + result = model.invoke([message]) + assert isinstance(result, AIMessage) + except (ValueError, TypeError) as e: + # Acceptable to raise validation errors for truly invalid blocks + assert "invalid" in str(e).lower() or "unknown" in str(e).lower() diff --git a/libs/standard-tests/tests/unit_tests/custom_chat_model.py b/libs/standard-tests/tests/unit_tests/custom_chat_model.py index cc9be763989..737745ec27e 100644 --- a/libs/standard-tests/tests/unit_tests/custom_chat_model.py +++ b/libs/standard-tests/tests/unit_tests/custom_chat_model.py @@ -1,15 +1,9 @@ from collections.abc import Iterator from typing import Any, Optional -from langchain_core.callbacks import ( - CallbackManagerForLLMRun, -) +from langchain_core.callbacks import CallbackManagerForLLMRun from langchain_core.language_models import BaseChatModel -from langchain_core.messages import ( - AIMessage, - AIMessageChunk, - BaseMessage, -) +from langchain_core.messages import AIMessage, AIMessageChunk, BaseMessage from langchain_core.messages.ai import UsageMetadata from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult from pydantic import Field diff --git a/libs/standard-tests/tests/unit_tests/custom_chat_model_v1.py b/libs/standard-tests/tests/unit_tests/custom_chat_model_v1.py new file mode 100644 index 00000000000..f7bf952a2a4 --- /dev/null +++ b/libs/standard-tests/tests/unit_tests/custom_chat_model_v1.py @@ -0,0 +1,263 @@ +"""``ChatParrotLinkV1`` implementation for standard-tests with v1 messages. + +This module provides a test implementation of ``BaseChatModelV1`` that supports the new +v1 message format with content blocks. +""" + +from collections.abc import AsyncIterator, Iterator +from typing import Any, Optional, cast + +from langchain_core.callbacks import CallbackManagerForLLMRun +from langchain_core.callbacks.manager import AsyncCallbackManagerForLLMRun +from langchain_core.language_models.v1.chat_models import BaseChatModelV1 +from langchain_core.messages.ai import UsageMetadata +from langchain_core.messages.v1 import AIMessage, AIMessageChunk, MessageV1 +from pydantic import Field + + +class ChatParrotLinkV1(BaseChatModelV1): + """A custom v1 chat model that echoes input with content blocks support. + + This model is designed for testing the v1 message format and content blocks. Echoes + the first ``parrot_buffer_length`` characters of the input and returns them as + proper v1 content blocks. + + Example: + .. code-block:: python + + model = ChatParrotLinkV1(parrot_buffer_length=10, model="parrot-v1") + result = model.invoke([HumanMessage(content="hello world")]) + # Returns AIMessage with content blocks format + """ + + model_name: str = Field(alias="model") + """The name of the model.""" + temperature: Optional[float] = None + max_tokens: Optional[int] = None + timeout: Optional[int] = None + stop: Optional[list[str]] = None + max_retries: int = 2 + + parrot_buffer_length: int = Field(default=50) + """The number of characters from the last message to echo.""" + + def _invoke( + self, + messages: list[MessageV1], + **kwargs: Any, + ) -> AIMessage: + """Generate a response by echoing the input as content blocks. + + Args: + messages: List of v1 messages to process. + **kwargs: Additional generation parameters. + + Returns: + AIMessage with content blocks format. + """ + _ = kwargs # Mark as used + + if not messages: + return AIMessage("No input provided") + + last_message = messages[-1] + + # Extract text content from the message + text_content = "" + for block in last_message.content: + if isinstance(block, dict) and block.get("type") == "text": + text_content += str(block.get("text", "")) + # elif isinstance(block, str): + # text_content += block + + # Echo the first parrot_buffer_length characters + echoed_text = text_content[: self.parrot_buffer_length] + + # Calculate usage metadata + total_input_chars = sum( + len(str(msg.content)) + if isinstance(msg.content, str) + else ( + sum(len(str(block)) for block in msg.content) + if isinstance(msg.content, list) + else 0 + ) + for msg in messages + ) + + usage_metadata = UsageMetadata( + input_tokens=total_input_chars, + output_tokens=len(echoed_text), + total_tokens=total_input_chars + len(echoed_text), + ) + + return AIMessage( + content=echoed_text, + response_metadata=cast( + Any, + { + "model_name": self.model_name, + "time_in_seconds": 0.1, + }, + ), + usage_metadata=usage_metadata, + ) + + def _stream( + self, + messages: list[MessageV1], + stop: Optional[list[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> Iterator[AIMessageChunk]: + """Stream the response by yielding character chunks. + + Args: + messages: List of v1 messages to process. + stop: Stop sequences (unused in this implementation). + run_manager: Callback manager for the LLM run. + **kwargs: Additional generation parameters. + + Yields: + AIMessageChunk objects with individual characters. + """ + _ = stop # Mark as used + _ = kwargs # Mark as used + + if not messages: + yield AIMessageChunk("No input provided") + return + + last_message = messages[-1] + + # Extract text content from the message + text_content = "" + # Extract text from content blocks + for block in last_message.content: + if isinstance(block, dict) and block.get("type") == "text": + text_content += str(block.get("text", "")) + # elif isinstance(block, str): + # text_content += block + + # Echo the first parrot_buffer_length characters + echoed_text = text_content[: self.parrot_buffer_length] + + # Calculate total input for usage metadata + total_input_chars = sum( + len(str(msg.content)) + if isinstance(msg.content, str) + else ( + sum(len(str(block)) for block in msg.content) + if isinstance(msg.content, list) + else 0 + ) + for msg in messages + ) + + # Stream each character as a chunk + for i, char in enumerate(echoed_text): + usage_metadata = UsageMetadata( + input_tokens=total_input_chars if i == 0 else 0, + output_tokens=1, + total_tokens=total_input_chars + 1 if i == 0 else 1, + ) + + chunk = AIMessageChunk( + content=char, + usage_metadata=usage_metadata, + ) + + if run_manager: + run_manager.on_llm_new_token(char, chunk=chunk) + + yield chunk + + # Final chunk with response metadata + final_chunk = AIMessageChunk( + content=[], + response_metadata=cast( + Any, + { + "model_name": self.model_name, + "time_in_seconds": 0.1, + }, + ), + ) + yield final_chunk + + async def _ainvoke( + self, + messages: list[MessageV1], + **kwargs: Any, + ) -> AIMessage: + """Async generate a response (delegates to sync implementation). + + Args: + messages: List of v1 messages to process. + **kwargs: Additional generation parameters. + + Returns: + AIMessage with content blocks format. + """ + # For simplicity, delegate to sync implementation + return self._invoke(messages, **kwargs) + + async def _astream( + self, + messages: list[MessageV1], + stop: Optional[list[str]] = None, + run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> AsyncIterator[AIMessageChunk]: + """Async stream the response (delegates to sync implementation). + + Args: + messages: List of v1 messages to process. + stop: Stop sequences (unused in this implementation). + run_manager: Async callback manager for the LLM run. + **kwargs: Additional generation parameters. + + Yields: + AIMessageChunk objects with individual characters. + """ + # For simplicity, delegate to sync implementation + for chunk in self._stream(messages, stop, None, **kwargs): + yield chunk + + @property + def _llm_type(self) -> str: + """Get the type of language model used by this chat model.""" + return "parrot-chat-model-v1" + + @property + def _identifying_params(self) -> dict[str, Any]: + """Return a dictionary of identifying parameters.""" + return { + "model_name": self.model_name, + "parrot_buffer_length": self.parrot_buffer_length, + } + + def get_token_ids(self, text: str) -> list[int]: + """Convert text to token IDs using simple character-based tokenization. + + For testing purposes, we use a simple approach where each character + maps to its ASCII/Unicode code point. + + Args: + text: The text to tokenize. + + Returns: + List of token IDs (character code points). + """ + return [ord(char) for char in text] + + def get_num_tokens(self, text: str) -> int: + """Get the number of tokens in the text. + + Args: + text: The text to count tokens for. + + Returns: + Number of tokens (characters in this simple implementation). + """ + return len(text) diff --git a/libs/standard-tests/tests/unit_tests/test_custom_chat_model_v1.py b/libs/standard-tests/tests/unit_tests/test_custom_chat_model_v1.py new file mode 100644 index 00000000000..091253865b0 --- /dev/null +++ b/libs/standard-tests/tests/unit_tests/test_custom_chat_model_v1.py @@ -0,0 +1,117 @@ +"""Test the standard v1 tests on the ChatParrotLinkV1 custom chat model.""" + +import pytest + +from langchain_tests.unit_tests.chat_models_v1 import ChatModelV1UnitTests + +from .custom_chat_model_v1 import ChatParrotLinkV1 + + +class TestChatParrotLinkV1Unit(ChatModelV1UnitTests): + """Unit tests for ``ChatParrotLinkV1`` using the standard v1 test suite.""" + + @property + def chat_model_class(self) -> type[ChatParrotLinkV1]: + """Return the chat model class to test.""" + return ChatParrotLinkV1 + + @property + def chat_model_params(self) -> dict: + """Return the parameters for initializing the chat model.""" + return { + "model": "parrot-v1-test", + "parrot_buffer_length": 20, + "temperature": 0.0, + } + + @pytest.fixture + def model(self) -> ChatParrotLinkV1: + """Create a model instance for testing.""" + return self.chat_model_class(**self.chat_model_params) + + # Override property methods to match ChatParrotLinkV1 capabilities + @property + def has_tool_calling(self) -> bool: + """``ChatParrotLinkV1`` does not support tool calling.""" + return False + + @property + def has_structured_output(self) -> bool: + """``ChatParrotLinkV1`` does not support structured output.""" + return False + + @property + def supports_json_mode(self) -> bool: + """``ChatParrotLinkV1`` does not support JSON mode.""" + return False + + @property + def supports_content_blocks_v1(self) -> bool: + """``ChatParrotLinkV1`` supports content blocks v1 format.""" + return True + + @property + def supports_text_content_blocks(self) -> bool: + """``ChatParrotLinkV1`` supports ``TextContentBlock``.""" + return True + + @property + def supports_non_standard_blocks(self) -> bool: + """``ChatParrotLinkV1`` can handle ``NonStandardContentBlock`` gracefully.""" + return True + + # All other content block types are not supported by ChatParrotLinkV1 + @property + def supports_reasoning_content_blocks(self) -> bool: + """``ChatParrotLinkV1`` does not generate ``ReasoningContentBlock``.""" + return False + + @property + def supports_plaintext_content_blocks(self) -> bool: + """``ChatParrotLinkV1`` does not support ``PlainTextContentBlock``.""" + return False + + @property + def supports_file_content_blocks(self) -> bool: + """``ChatParrotLinkV1`` does not support ``FileContentBlock``.""" + return False + + @property + def supports_image_content_blocks(self) -> bool: + """``ChatParrotLinkV1`` does not support ``ImageContentBlock``.""" + return False + + @property + def supports_audio_content_blocks(self) -> bool: + """``ChatParrotLinkV1`` does not support ``AudioContentBlock``.""" + return False + + @property + def supports_video_content_blocks(self) -> bool: + """``ChatParrotLinkV1`` does not support ``VideoContentBlock``.""" + return False + + @property + def supports_citations(self) -> bool: + """``ChatParrotLinkV1`` does not support citations.""" + return False + + @property + def supports_web_search_blocks(self) -> bool: + """``ChatParrotLinkV1`` does not support web search blocks.""" + return False + + @property + def supports_enhanced_tool_calls(self) -> bool: + """``ChatParrotLinkV1`` does not support enhanced tool calls.""" + return False + + @property + def supports_invalid_tool_calls(self) -> bool: + """``ChatParrotLinkV1`` does not support ``InvalidToolCall`` handling.""" + return False + + @property + def supports_tool_call_chunks(self) -> bool: + """``ChatParrotLinkV1`` does not support ``ToolCallChunk`` blocks.""" + return False