plan

2025-08-15 07:36:08 +00:00 · 2025-07-29 13:51:35 -04:00 · 2025-07-29 13:51:35 -04:00 · 0148b1a03c
commit 0148b1a03c
parent 9507d0f21c
1 changed files with 635 additions and 0 deletions
--- a/v1_implementation_plan_v2.md
+++ b/v1_implementation_plan_v2.md
@ -0,0 +1,635 @@
 # Provider V1 Content Support Implementation Plan
 Implementation plan for adding messages with standard content block types (v1) to any chat model provider.
 ## Architecture Overview
 ### V1 Chat Model Foundation
 The v1 architecture uses:
 - **Separate File Structure**: Create `base_v1.py` alongside existing `base.py`
 - **BaseChatModelV1 Inheritance**: Inherit from `langchain_core.language_models.v1.chat_models.BaseChatModelV1`
 - **Native V1 Messages**: Use `AIMessageV1`, `HumanMessageV1`, etc. throughout
 - **No `output_version` Field**: Always return v1 format (no conditional logic) - this is a deviation from our prior plan
 - **Standardized Content Blocks**: Native support for `TextContentBlock`, `ImageContentBlock`, `ToolCallContentBlock`, etc. (defined in `libs/core/langchain_core/messages/content_blocks.py`)
 ### Implementation Pattern
 Based on successful implementations, we have both:
 ```python
 # Existing v0 implementation (unchanged)
 libs/partners/{provider}/langchain_{provider}/chat_models/base.py
 class ChatProvider(BaseChatModel):  # v0 base class
    output_version: str = "v0"  # conditional v1 support - REMOVE????
 # NEW FILE: v1 implementation
 libs/partners/{provider}/langchain_{provider}/chat_models/base_v1.py
 class ChatProviderV1(BaseChatModelV1):  # v1 base class
    # Inherits from BaseChatModelV1
 ```
 ### V1 Message Flow Pattern
 ```python
 from langchain_core.messages.v1 import HumanMessage as HumanMessageV1
 from langchain_core.messages.content_blocks import TextContentBlock, ImageContentBlock
 user_message = HumanMessageV1(content=[
    TextContentBlock(type="text", text="Hello"),
    ImageContentBlock(type="image", mime_type="image/jpeg", base64="...")
 ])
 # Internal: Convert v1 to provider's native format
 # (For API calls, either direct or via SDK)
 provider_request = convert_from_v1_to_provider_api(user_message)
 provider_response = provider_api.chat(provider_request)
 # Output: Convert provider response to v1 format
 ai_message_v1 = convert_to_v1_from_provider_response(provider_response)
 ```
 ## Implementation Framework
 ### Phase 1: Core Infrastructure
 #### 1.1 Create Separate V1 File Structure
 **New File:** `langchain_{provider}/chat_models/base_v1.py`
 ```python
 """Provider chat model v1 implementation.
 As much as possible, this should be a direct copy of the v0
 implementation, but with the following changes:
 - Inherit from BaseChatModelV1
 - Use native v1 messages (AIMessageV1, HumanMessageV1, etc.)
 - `content` is a list of ContentBlock objects (TextContentBlock, ImageContentBlock, etc.)
 """
 from langchain_core.language_models.v1.chat_models import (
    BaseChatModelV1,
    agenerate_from_stream,
    generate_from_stream,
 )
 from langchain_core.messages.v1 import (
    AIMessage as AIMessageV1,
    AIMessageChunk as AIMessageChunkV1,
    HumanMessage as HumanMessageV1,
    MessageV1,
    SystemMessage as SystemMessageV1,
    ToolMessage as ToolMessageV1,
    ResponseMetadata,
 )
 from langchain_core.messages import content_blocks as types
 from pydantic import Field
 class BaseChatProviderV1(BaseChatModelV1):
    """Base class for provider v1 chat models."""
    model_name: str = Field(default="default-model", alias="model")
    """Model name to use."""
    # Provider-specific configuration fields
    # ... (copy from existing base.py but adapt for v1 where applicable)
 class ChatProviderV1(BaseChatProviderV1):
    """Provider chat model with v1 messages."""
    def _generate_stream(
        self,
        messages: list[MessageV1],
        stop: Optional[list[str]] = None,
        **kwargs: Any,
    ) -> Iterator[AIMessageChunkV1]:
        """Generate streaming response with v1 messages."""
    def _agenerate_stream(
        self,
        messages: list[MessageV1],
        stop: Optional[list[str]] = None,
        **kwargs: Any,
    ) -> AsyncIterator[AIMessageChunkV1]:
        """Generate async streaming response with v1 messages."""
 ```
 #### 1.2 Update Package Exports
 Include the new v1 chat model in the package exports:
 **File:** `langchain_{provider}/chat_models/__init__.py`
 ```python
 from langchain_{provider}.chat_models.base import ChatProvider
 from langchain_{provider}.chat_models.base_v1 import ChatProvider as ChatProviderV1
 __all__ = ["ChatProvider", "ChatProviderV1"]
 ```
 #### 1.3 Create Conversion Utilities
 **File:** `langchain_{provider}/chat_models/_compat.py`
 ```python
 """V1 message conversion utilities."""
 from typing import Any, Union, cast
 from langchain_core.messages.v1 import MessageV1, AIMessageV1
 from langchain_core.messages import content_blocks as types
 def _convert_from_v1_to_provider_format(message: MessageV1) -> dict[str, Any]:
    """Convert v1 message to provider API format."""
 def _convert_to_v1_from_provider_format(response: dict[str, Any]) -> AIMessageV1:
    """Convert provider API response to v1 message(s)."""
 def _format_v1_message_content(content: list[ContentBlock]) -> Any:
    """Format v1 content blocks for provider API."""
 ```
 ### Phase 2: Message Processing
 #### 2.1 Input Message Handling
 ```python
 def _convert_from_v1_to_provider_format(message: MessageV1) -> dict[str, Any]:
    """Convert v1 message to provider API format."""
    if isinstance(message, HumanMessageV1):
        return _convert_human_message_v1(message)
    elif isinstance(message, AIMessageV1):
        return _convert_ai_message_v1(message)
    elif isinstance(message, SystemMessageV1):
        return _convert_system_message_v1(message)
    elif isinstance(message, ToolMessageV1):
        return _convert_tool_message_v1(message)
    else:
        raise ValueError(f"Unsupported message type: {type(message)}")
 def _convert_content_blocks_to_provider_format(content: list[ContentBlock]) -> list[dict]:
    """Convert v1 content blocks to provider API format.
    Shared across all message types since they all support the same content blocks.
    """
    content_parts = []
    for block in content:
        block_type = block.get("type")
        if block_type == "text":
            # The format here will vary depending on the provider's API
            content_parts.append({
                "type": "text",
                "text": block.get("text", "")
            })
        elif block_type == "image":
            content_parts.append(_convert_image_block_to_provider(block))
        elif block_type == "audio":
            content_parts.append(_convert_audio_block_to_provider(block))
        elif block_type == "tool_call":
            # Skip tool calls - handled separately via tool_calls property
            continue
        # Add other content block types...
    return content_parts
 def _convert_human_message_v1(message: HumanMessageV1) -> dict[str, Any]:
    """Convert HumanMessageV1 to provider format."""
    # The format here will vary depending on the provider's API
    return {
        "role": "user",
        "content": _convert_content_blocks_to_provider_format(message.content),
        "name": message.name,
    }
 def _convert_ai_message_v1(message: AIMessageV1) -> dict[str, Any]:
    """Convert AIMessageV1 to provider format."""
    # Extract text content for main content field
    # The format here will vary depending on the provider's API
    text_content = ""
    for block in message.content:
        if block.get("type") == "text":
            text_content += block.get("text", "")
    return {
        "role": "assistant",
        "content": text_content,
        "tool_calls": [_convert_tool_call_to_provider(tc) for tc in message.tool_calls],
        "name": message.name,
    }
 ```
 #### 2.2 Output Message Generation
 Convert provider responses directly to v1 format:
 ```python
 from langchain_core.messages.content_blocks import (
    TextContentBlock,
    ToolCall,
    ReasoningContentBlock,
    AudioContentBlock,
    ...
 )
 def _convert_to_v1_from_provider_format(response: dict[str, Any]) -> AIMessageV1:
    """Convert provider response to AIMessageV1."""
    # The format here will vary depending on the provider's API
    # (This is a dummy implementation)
    content: list[ContentBlock] = []
    if text_content := response.get("content"):
        #  For example, if the text response from the provider comes in as `content`:
        if isinstance(text_content, str) and text_content:
            content.append(TextContentBlock(type="text", text=text_content))
        elif isinstance(text_content, list):
            # If the content is a list of text items
            for item in text_content:
                if item.get("type") == "text":
                    content.append(TextContentBlock(
                        type="text",
                        text=item.get("text", "")
                    ))
    if tool_calls := response.get("tool_calls"):
        # Similarly, if the provider returns tool calls under `tool_calls`:
        for tool_call in tool_calls:
            content.append(ToolCall(
                type="tool_call",
                id=tool_call.get("id", ""),
                name=tool_call.get("function", {}).get("name", ""),
                args=tool_call.get("function", {}).get("arguments", {}),
            ))
    # Some providers call this `reasoning`, `thoughts`, `thinking`
    if reasoning := response.get("reasoning"):
        # May opt to insert reasoning in specific order depending on API design
        content.insert(0, ReasoningContentBlock(
            type="reasoning",
            reasoning=reasoning
        ))
    if audio := response.get("audio"):
        content.append(AudioContentBlock(
            type="audio",
            # Provider-specific fields via PEP 728 TypedDict extra items (common for multimodal)
        ))
    return AIMessageV1(
        content=content,
        response_metadata=ResponseMetadata(
            model_name=response.get("model"),  # or whatever key the provider uses for model name
            usage=response.get("usage", {}),
            # Other provider-specific metadata
        ),
    )
 ```
 ### Phase 3: Streaming Implementation
 Implement streaming that yields `AIMessageChunkV1` directly:
 ```python
 def _generate_stream(
    self,
    messages: list[MessageV1],
    stop: Optional[list[str]] = None,
    **kwargs: Any,
 ) -> Iterator[AIMessageChunkV1]:
    """Generate streaming response with native v1 chunks."""
    # Convert v1 messages to provider format
    provider_messages = [
        _convert_from_v1_to_provider_format(msg) for msg in messages
    ]
    # Stream from provider API
    for chunk in self._provider_stream(provider_messages, **kwargs):
        # Convert each chunk to v1 format
        v1_chunk = _convert_chunk_to_v1(chunk)
        yield v1_chunk
 def _convert_chunk_to_v1(chunk: dict[str, Any]) -> AIMessageChunkV1:
    """Convert provider chunk to AIMessageChunkV1."""
    content: list[ContentBlock] = []
    if delta := chunk.get("delta"):
        if text := delta.get("content"):
            content.append(types.TextContentBlock(type="text", text=text))
        if tool_calls := delta.get("tool_calls"):
            for tool_call in tool_calls:
                if tool_call.get("id"):
                    content.append(types.ToolCallContentBlock(
                        type="tool_call",
                        id=tool_call["id"],
                        name=tool_call.get("function", {}).get("name", ""),
                        args=tool_call.get("function", {}).get("arguments", ""),
                    ))
    return AIMessageChunkV1(
        content=content,
        response_metadata=ResponseMetadata(
            model_name=chunk.get("model"),
        ),
    )
 # Note: _convert_chunk_to_v1 does NOT handle summing - that's handled by AIMessageChunkV1.__add__ automatically
 ```
 ### Phase 4: Content Block Support
 #### 4.1 Standard Content Block Types
 Support all standard v1 content blocks as defined in `libs/core/langchain_core/messages/content_blocks.py`
 #### 4.2 Provider-Specific Extensions
 Use **PEP 728 TypedDict with Typed Extra Items** for provider-specific content within standard blocks:
 ```python
 # Provider-specific fields within standard content blocks
 # PEP 728 allows extra keys beyond the defined TypedDict structure
 text_block_with_extras: TextContentBlock = {
    "type": "text",
    "text": "Hello world",
    "provider_confidence": 0.95,        # Extra field: provider-specific confidence score
    "provider_metadata": {              # Extra field: nested provider metadata
        "model_tier": "premium",
        "processing_time_ms": 150
    }
 }
 ```
 **About [PEP 728](https://peps.python.org/pep-0728/):** extends TypedDict to support typed extra items beyond the explicitly defined keys. This allows providers to add custom fields while maintaining type safety for the standard fields.
 **Extra Item Types:** Standard content blocks support extra keys via PEP 728 with `extra_items=Any`, meaning provider-specific fields can be of any type. This provides:
 - **Core fields** (like `type`, `text`, `id`) are **strongly typed** according to the TypedDict definition
 - **Extra fields** (provider-specific extensions) are typed as `Any`, allowing complete flexibility  
 - **Type safety** is maintained for the standard fields while allowing arbitrary extensions
 This is the most flexible approach - providers can add any kind of metadata, configuration, or custom data they need without breaking the type system or requiring changes to the core LangChain types.
 ```python
 def _handle_provider_specific_content(block: dict[str, Any]) -> ContentBlock:
    """Handle provider-specific content blocks."""
    # For known provider extensions, create typed blocks
    if block.get("type") == "provider_specific_type":
        return cast(types.ContentBlock, ProviderSpecificContentBlock(...))
    # For unknown types, use NonStandardContentBlock
    return cast(types.ContentBlock, types.NonStandardContentBlock(
        type="non_standard",
        content=block
    ))
 ```
 ### Phase 5: Testing Framework
 #### 5.1 V1 Tests
 Create comprehensive tests for v1 functionality:
 ```python
 from langchain_core.messages.v1 import HumanMessage as HumanMessageV1
 from langchain_core.messages.content_blocks import TextContentBlock, ImageContentBlock, AudioContentBlock
 def test_v1_native_message_handling():
    """Test native v1 message processing."""
    llm = ChatProviderV1(model="test-model")
    message = HumanMessageV1(content=[
        TextContentBlock(type="text", text="Hello"),
        ImageContentBlock(type="image", mime_type="image/jpeg", base64="base64data...")
    ])
    response = llm.invoke([message])
    assert isinstance(response, AIMessageV1)
    assert isinstance(response.content, list)
    assert all(isinstance(block, ContentBlock) for block in response.content)
    # Verify content block structure and content
    text_blocks = [b for b in response.content if b.get("type") == "text"]
    assert len(text_blocks) >= 1, "Response should contain at least one text block"
    assert text_blocks[0]["text"], "Text block should contain non-empty text content"
    assert isinstance(text_blocks[0]["text"], str), "Text content should be a string"
 def test_v1_streaming_consistency():
    """Test that streaming and non-streaming produce equivalent results."""
    llm = ChatProviderV1(model="test-model")
    message = HumanMessageV1(content=[
        TextContentBlock(type="text", text="Hello"),
    ])
    # Non-streaming
    non_stream = llm.invoke([message])
    # Streaming
    stream_chunks = list(llm.stream([message]))
    stream_combined = AIMessageV1(content=[])
    for chunk in stream_chunks:
        stream_combined = stream_combined + chunk
    # Should be equivalent
    assert non_stream.content == stream_combined.content
 def test_v1_content_block_types():
    """Test all supported content block types."""
    llm = ChatProviderV1(model="test-model")
    # Test each content block type
    test_cases = [
        TextContentBlock(type="text", text="Hello"),
        ImageContentBlock(type="image", mime_type="image/jpeg", base64="base64data..."),
        AudioContentBlock(type="audio", mime_type="audio/wav", base64="audiodata..."),
        # ...
    ]
    for block in test_cases:
        message = HumanMessageV1(content=[block])
        response = llm.invoke([message])
        assert isinstance(response, AIMessageV1)
 ```
 #### 5.2 Migration Tests
 Test compatibility between v0 and v1 implementations:
 ```python
 from langchain_core.messages.content_blocks import TextContentBlock
 def test_v0_v1_feature_parity():
    """Test that v1 implementation has feature parity with v0."""
    llm_v0 = ChatProvider(model="test-model")
    llm_v1 = ChatProviderV1(model="test-model")
    # Test basic functionality
    v0_response = llm_v0.invoke("Hello")
    v1_response = llm_v1.invoke([HumanMessageV1("Hello")
    ])
    v1_response = llm_v1.invoke([HumanMessageV1(content=[
        TextContentBlock(type="text", text="Hello")
    ])])
    # Extract text content for comparison
    v0_text = v0_response.content
    v1_text = "".join(
        block.get("text", "") for block in v1_response.content
        if block.get("type") == "text"
    )
    # Should produce equivalent text output
    assert v0_text == v1_text
 ```
 ### Phase 6: Documentation and Migration
 #### 6.1 V1 Documentation
 Document the v1 implementation separately:
 ```python
 class ChatProviderV1(BaseChatProvider):
    """Provider chat model with native v1 content block support.
    This implementation provides native support for structured content blocks
    and always returns AIMessageV1 format responses.
    Examples:
        Basic text conversation:
        .. code-block:: python
            from langchain_{provider}.chat_models import ChatProviderV1
            from langchain_core.messages.v1 import HumanMessage
            llm = ChatProviderV1(model="provider-model")
            response = llm.invoke([
                HumanMessage(content=[
                    TextContentBlock(type="text", text="Hello!")
                ])
            ])
            # Response is always structured
            print(response.content)
            # [{"type": "text", "text": "Hello! How can I help?"}]  # Type will be TextContentBlock
        Multi-modal input:
        .. code-block:: python
            response = llm.invoke([
                HumanMessage(content=[
                    TextContentBlock(type="text", text="Describe this image:"),
                    ImageContentBlock(
                        type="image",
                        mime_type="image/jpeg",
                        base64="base64_encoded_image"
                    )
                ])
            ])
    """
 ```
 #### 6.2 Migration Guide
 ```markdown
 # Migrating to V1 Chat Models
 ## Overview
 V1 chat models provide native support for standard content blocks and always return `AIMessageV1` format responses.
 ## Key Differences
 ### Import Changes
 ```python
 # V0 implementation (conditional v1 support)
 from langchain_{provider} import ChatProvider
 llm = ChatProvider(output_version="v1")
 # V1 implementation (v1 support)
 from langchain_{provider}.chat_models import ChatProviderV1
 llm = ChatProviderV1()
 ```
 ### Message Format
 ```python
 # V0 mixed format (strings or lists)
 message = HumanMessage(content="Hello")  # or content=[...]
 # V1 structured format (always lists)
 from langchain_core.messages.v1 import HumanMessage as HumanMessageV1
 message = HumanMessageV1(content=[{"type": "text", "text": "Hello"}])
 ```
 # Checklist
 ```txt
 Core Infrastructure
 - [ ] Create `base_v1.py` file with `BaseChatModelV1` inheritance
 - [ ] Implement `ChatProviderV1` class with native v1 support
 - [ ] Create `_compat.py` with v1 conversion utilities
 - [ ] Update package `__init__.py` exports
 Message Processing
 - [ ] Implement `_convert_from_v1_to_provider_format()` for API requests
 - [ ] Implement `_convert_to_v1_from_provider_format()` for responses
 - [ ] Add streaming support with `AIMessageChunkV1`
 - [ ] Handle all standard content block types
 Content Block Support
 - [ ] Support `TextContentBlock` for text content
 - [ ] Support `ImageContentBlock` for images (where applicable)
 - [ ] Support `AudioContentBlock` for audio (where applicable)
 - [ ] Support `ToolCallContentBlock` for tool calls
 - [ ] Support `ReasoningContentBlock` for reasoning (where applicable)
 - [ ] Support other multimodal content blocks (where applicable)
 - [ ] Handle provider-specific fields with `extra_items`
 - [ ] Handle provider-specific blocks by returning `NonStandardContentBlock`
 Testing
 - [ ] Create comprehensive unit tests for v1 functionality
 - [ ] Add streaming vs non-streaming consistency tests
 - [ ] Test all supported content block types
 - [ ] Add migration compatibility tests
 - [ ] Performance benchmarks vs v0 implementation
 Documentation
 - [ ] Update class docstrings with v1 examples
 - [ ] Create migration guide from v0 to v1
 - [ ] Document provider-specific content block support
 - [ ] Add troubleshooting section
 ## Success Criteria
 1. **Native V1 Support**: Full `BaseChatModelV1` implementation
 2. **Zero Conversion Overhead**: No runtime format conversion
 3. **Feature Complete**: All provider capabilities available natively
 4. **Type Safe**: Full typing for all content blocks and operations
 5. **Well Documented**: Clear migration path and usage examples
 6. **Performance Optimized**: Better performance than v0 conditional approach
 ## Provider-Specific Considerations
 When implementing v1 support for your specific provider:
 ### Content Block Mapping
 - Map your provider's native content types to standard v1 content blocks
 - Use `NonStandardContentBlock` for provider-specific content that doesn't map to standard types
 - Leverage PEP 728 TypedDict extra items for provider-specific fields/metadata within LangChain's standard blocks
 ### Tool Call Handling
 - Map your provider's tool calling format to v1 `ToolCall` content blocks
 - Handle both standard function calls and provider-specific built-in tools
 - Preserve provider-specific tool call metadata using extra fields
 ### Streaming Implementation
 - Ensure streaming chunks are properly typed as `AIMessageChunkV1`
 - Implement proper chunk merging using the `+` operator
 - Handle provider-specific streaming features (like reasoning) appropriately