feat(core): content block factories + ids + docs + tests (#32316)

## Benefits 1. **Type Safety**: Compile-time validation of required fields and proper type setting 2. **Less Boilerplate**: No need to manually set the `type` field or generate IDs 3. **Input Validation**: Runtime validation prevents common errors (e.g., base64 without MIME type) 4. **Consistent Patterns**: Standardized creation patterns across all block types 5. **Better Developer Experience**: Cleaner, more intuitive API than manual TypedDict construction. Also follows similar other patterns (e.g. `create_react_agent`, `init_chat_model`
2025-08-15 23:57:21 +00:00 · 2025-07-31 11:12:00 -04:00 · 2025-07-31 11:12:00 -04:00 · 44bd6fe837
commit 44bd6fe837
parent 740d9d3e7e
18 changed files with 3101 additions and 433 deletions
--- a/.github/workflows/codspeed.yml
+++ b/.github/workflows/codspeed.yml
@ -20,6 +20,7 @@ jobs:
  codspeed:
    name: 'Benchmark'
    runs-on: ubuntu-latest
+    if: ${{ !contains(github.event.pull_request.labels.*.name, 'codspeed-ignore') }}
    strategy:
      matrix:
        include:
--- a/docs/api_reference/create_api_rst.py
+++ b/docs/api_reference/create_api_rst.py
@ -217,7 +217,11 @@ def _load_package_modules(
        # Get the full namespace of the module
        namespace = str(relative_module_name).replace(".py", "").replace("/", ".")
        # Keep only the top level namespace
-        top_namespace = namespace.split(".")[0]
+        # (but make special exception for content_blocks and messages.v1)
+        if namespace == "messages.content_blocks" or namespace == "messages.v1":
+            top_namespace = namespace  # Keep full namespace for content_blocks
+        else:
+            top_namespace = namespace.split(".")[0]

        try:
            # If submodule is present, we need to construct the paths in a slightly
@ -283,7 +287,7 @@ def _construct_doc(
 .. toctree::
    :hidden:
    :maxdepth: 2
-    
+
 """
    index_autosummary = """
 """
@ -365,9 +369,9 @@ def _construct_doc(

                module_doc += f"""\
    :template: {template}
-    
+
    {class_["qualified_name"]}
-    
+
 """
                index_autosummary += f"""
    {class_["qualified_name"]}
@ -550,8 +554,8 @@ def _build_index(dirs: List[str]) -> None:
    integrations = sorted(dir_ for dir_ in dirs if dir_ not in main_)
    doc = """# LangChain Python API Reference

-Welcome to the LangChain Python API reference. This is a reference for all 
-`langchain-x` packages. 
+Welcome to the LangChain Python API reference. This is a reference for all
+`langchain-x` packages.

 For user guides see [https://python.langchain.com](https://python.langchain.com).

--- a/docs/docs/contributing/how_to/testing.mdx
+++ b/docs/docs/contributing/how_to/testing.mdx
@ -124,6 +124,47 @@ start "" htmlcov/index.html || open htmlcov/index.html

 ```

+## Snapshot Testing
+
+Some tests use [syrupy](https://github.com/tophat/syrupy) for snapshot testing, which captures the output of functions and compares them to stored snapshots. This is particularly useful for testing JSON schema generation and other structured outputs.
+
+### Updating Snapshots
+
+To update snapshots when the expected output has legitimately changed:
+
+```bash
+uv run --group test pytest path/to/test.py --snapshot-update
+```
+
+### Pydantic Version Compatibility Issues
+
+Pydantic generates different JSON schemas across versions, which can cause snapshot test failures in CI when tests run with different Pydantic versions than what was used to generate the snapshots.
+
+**Symptoms:**
+- CI fails with snapshot mismatches showing differences like missing or extra fields.
+- Tests pass locally but fail in CI with different Pydantic versions
+
+**Solution:**
+Locally update snapshots using the same Pydantic version that CI uses:
+
+1. **Identify the failing Pydantic version** from CI logs (e.g., `2.7.0`, `2.8.0`, `2.9.0`)
+
+2. **Update snapshots with that version:**
+   ```bash
+   uv run --with "pydantic==2.9.0" --group test pytest tests/unit_tests/path/to/test.py::test_name --snapshot-update
+   ```
+
+3. **Verify compatibility across supported versions:**
+   ```bash
+   # Test with the version you used to update
+   uv run --with "pydantic==2.9.0" --group test pytest tests/unit_tests/path/to/test.py::test_name
+
+   # Test with other supported versions
+   uv run --with "pydantic==2.8.0" --group test pytest tests/unit_tests/path/to/test.py::test_name
+   ```
+
+**Note:** Some tests use `@pytest.mark.skipif` decorators to only run with specific Pydantic version ranges (e.g., `PYDANTIC_VERSION_AT_LEAST_210`). Make sure to understand these constraints when updating snapshots.
+
 ## Coverage

 Code coverage (i.e. the amount of code that is covered by unit tests) helps identify areas of the code that are potentially more or less brittle.
--- a/libs/core/langchain_core/messages/ai.py
+++ b/libs/core/langchain_core/messages/ai.py
@ -8,11 +8,7 @@ from typing import Any, Literal, Optional, Union, cast
 from pydantic import model_validator
 from typing_extensions import NotRequired, Self, TypedDict, override

-from langchain_core.messages.base import (
-    BaseMessage,
-    BaseMessageChunk,
-    merge_content,
-)
+from langchain_core.messages.base import BaseMessage, BaseMessageChunk, merge_content
 from langchain_core.messages.tool import (
    InvalidToolCall,
    ToolCall,
@ -20,15 +16,9 @@ from langchain_core.messages.tool import (
    default_tool_chunk_parser,
    default_tool_parser,
 )
-from langchain_core.messages.tool import (
-    invalid_tool_call as create_invalid_tool_call,
-)
-from langchain_core.messages.tool import (
-    tool_call as create_tool_call,
-)
-from langchain_core.messages.tool import (
-    tool_call_chunk as create_tool_call_chunk,
-)
+from langchain_core.messages.tool import invalid_tool_call as create_invalid_tool_call
+from langchain_core.messages.tool import tool_call as create_tool_call
+from langchain_core.messages.tool import tool_call_chunk as create_tool_call_chunk
 from langchain_core.utils._merge import merge_dicts, merge_lists
 from langchain_core.utils.json import parse_partial_json
 from langchain_core.utils.usage import _dict_int_op
@ -37,6 +27,16 @@ logger = logging.getLogger(__name__)


 _LC_ID_PREFIX = "run-"
+"""Internal tracing/callback system identifier.
+
+Used for:
+- Tracing. Every LangChain operation (LLM call, chain execution, tool use, etc.)
+  gets a unique run_id (UUID)
+- Enables tracking parent-child relationships between operations
+"""
+
+_LC_AUTO_PREFIX = "lc_"
+"""LangChain auto-generated ID prefix for messages and content blocks."""


 class InputTokenDetails(TypedDict, total=False):
@ -428,17 +428,27 @@ def add_ai_message_chunks(

    chunk_id = None
    candidates = [left.id] + [o.id for o in others]
-    # first pass: pick the first non-run-* id
+    # first pass: pick the first provider-assigned id (non-run-* and non-lc_*)
    for id_ in candidates:
-        if id_ and not id_.startswith(_LC_ID_PREFIX):
+        if (
+            id_
+            and not id_.startswith(_LC_ID_PREFIX)
+            and not id_.startswith(_LC_AUTO_PREFIX)
+        ):
            chunk_id = id_
            break
    else:
-        # second pass: no provider-assigned id found, just take the first non-null
+        # second pass: prefer lc_* ids over run-* ids
        for id_ in candidates:
-            if id_:
+            if id_ and id_.startswith(_LC_AUTO_PREFIX):
                chunk_id = id_
                break
+        else:
+            # third pass: take any remaining id (run-* ids)
+            for id_ in candidates:
+                if id_:
+                    chunk_id = id_
+                    break

    return left.__class__(
        example=left.example,
--- a/libs/core/langchain_core/messages/content_blocks.py
+++ b/libs/core/langchain_core/messages/content_blocks.py
--- a/libs/core/langchain_core/messages/v1.py
+++ b/libs/core/langchain_core/messages/v1.py
@ -1,4 +1,8 @@
-"""LangChain 1.0 message format."""
+"""LangChain v1.0.0 message format.
+
+Each message has content that may be comprised of content blocks, defined under
+``langchain_core.messages.content_blocks``.
+"""

 import json
 import uuid
@ -9,14 +13,15 @@ from pydantic import BaseModel
 from typing_extensions import TypedDict

 import langchain_core.messages.content_blocks as types
-from langchain_core.messages.ai import _LC_ID_PREFIX, UsageMetadata, add_usage
+from langchain_core.messages.ai import (
+    _LC_AUTO_PREFIX,
+    _LC_ID_PREFIX,
+    UsageMetadata,
+    add_usage,
+)
 from langchain_core.messages.base import merge_content
-from langchain_core.messages.tool import (
-    ToolCallChunk,
-)
-from langchain_core.messages.tool import (
-    invalid_tool_call as create_invalid_tool_call,
-)
+from langchain_core.messages.tool import ToolCallChunk
+from langchain_core.messages.tool import invalid_tool_call as create_invalid_tool_call
 from langchain_core.messages.tool import tool_call as create_tool_call
 from langchain_core.messages.tool import tool_call_chunk as create_tool_call_chunk
 from langchain_core.utils._merge import merge_dicts, merge_lists
@ -26,13 +31,16 @@ from langchain_core.utils.json import parse_partial_json
 def _ensure_id(id_val: Optional[str]) -> str:
    """Ensure the ID is a valid string, generating a new UUID if not provided.

+    Auto-generated UUIDs are prefixed by ``'lc_'`` to indicate they are
+    LangChain-generated IDs.
+
    Args:
        id_val: Optional string ID value to validate.

    Returns:
        A valid string ID, either the provided value or a new UUID.
    """
-    return id_val or str(uuid.uuid4())
+    return id_val or str(f"{_LC_AUTO_PREFIX}{uuid.uuid4()}")


 class ResponseMetadata(TypedDict, total=False):
@ -41,7 +49,9 @@ class ResponseMetadata(TypedDict, total=False):
    Contains additional information returned by the provider, such as
    response headers, service tiers, log probabilities, system fingerprints, etc.

-    Extra keys are permitted from what is typed here.
+    Extra keys are permitted from what is typed here (via `total=False`), allowing
+    for provider-specific metadata to be included without breaking the type
+    definition.
    """

    model_provider: str
@ -70,6 +80,11 @@ class AIMessage:
    """

    type: Literal["ai"] = "ai"
+    """The type of the message. Must be a string that is unique to the message type.
+
+    The purpose of this field is to allow for easy identification of the message type
+    when deserializing messages.
+    """

    name: Optional[str] = None
    """An optional name for the message.
@ -83,13 +98,15 @@ class AIMessage:
    id: Optional[str] = None
    """Unique identifier for the message.

-    If the provider assigns a meaningful ID, it should be used here.
+    If the provider assigns a meaningful ID, it should be used here. Otherwise, a
+    LangChain-generated ID will be used.
    """

    lc_version: str = "v1"
-    """Encoding version for the message."""
+    """Encoding version for the message. Used for serialization."""

    content: list[types.ContentBlock] = field(default_factory=list)
+    """Message content as a list of content blocks."""

    usage_metadata: Optional[UsageMetadata] = None
    """If provided, usage metadata for a message, such as token counts."""
@ -138,7 +155,7 @@ class AIMessage:
        else:
            self.content = content

-        self.id = id
+        self.id = _ensure_id(id)
        self.name = name
        self.lc_version = lc_version
        self.usage_metadata = usage_metadata
@ -205,8 +222,19 @@ class AIMessageChunk(AIMessage):
    """

    type: Literal["ai_chunk"] = "ai_chunk"  # type: ignore[assignment]
+    """The type of the message. Must be a string that is unique to the message type.
+
+    The purpose of this field is to allow for easy identification of the message type
+    when deserializing messages.
+    """

    tool_call_chunks: list[types.ToolCallChunk] = field(init=False)
+    """List of partial tool call data.
+
+    Emitted by the model during streaming, this field contains
+    tool call chunks that may not yet be complete. It is used to reconstruct
+    tool calls from the streamed content.
+    """

    def __init__(
        self,
@ -236,7 +264,7 @@ class AIMessageChunk(AIMessage):
        else:
            self.content = content

-        self.id = id
+        self.id = _ensure_id(id)
        self.name = name
        self.lc_version = lc_version
        self.usage_metadata = usage_metadata
@ -430,17 +458,27 @@ def add_ai_message_chunks(

    chunk_id = None
    candidates = [left.id] + [o.id for o in others]
-    # first pass: pick the first non-run-* id
+    # first pass: pick the first provider-assigned id (non-`run-*` and non-`lc_*`)
    for id_ in candidates:
-        if id_ and not id_.startswith(_LC_ID_PREFIX):
+        if (
+            id_
+            and not id_.startswith(_LC_ID_PREFIX)
+            and not id_.startswith(_LC_AUTO_PREFIX)
+        ):
            chunk_id = id_
            break
    else:
-        # second pass: no provider-assigned id found, just take the first non-null
+        # second pass: prefer lc_* ids over run-* ids
        for id_ in candidates:
-            if id_:
+            if id_ and id_.startswith(_LC_AUTO_PREFIX):
                chunk_id = id_
                break
+        else:
+            # third pass: take any remaining id (run-* ids)
+            for id_ in candidates:
+                if id_:
+                    chunk_id = id_
+                    break

    return left.__class__(
        content=cast("list[types.ContentBlock]", content),
@ -467,7 +505,22 @@ class HumanMessage:
    """

    id: str
+    """Used for serialization.
+
+    If the provider assigns a meaningful ID, it should be used here. Otherwise, a
+    LangChain-generated ID will be used.
+    """
+
    content: list[types.ContentBlock]
+    """Message content as a list of content blocks."""
+
+    type: Literal["human"] = "human"
+    """The type of the message. Must be a string that is unique to the message type.
+
+    The purpose of this field is to allow for easy identification of the message type
+    when deserializing messages.
+    """
+
    name: Optional[str] = None
    """An optional name for the message.

@ -476,12 +529,6 @@ class HumanMessage:
    Usage of this field is optional, and whether it's used or not is up to the
    model implementation.
    """
-    type: Literal["human"] = "human"
-    """The type of the message. Must be a string that is unique to the message type.
-
-    The purpose of this field is to allow for easy identification of the message type
-    when deserializing messages.
-    """

    def __init__(
        self,
@ -529,8 +576,21 @@ class SystemMessage:
    """

    id: str
+    """Used for serialization.
+
+    If the provider assigns a meaningful ID, it should be used here. Otherwise, a
+    LangChain-generated ID will be used.
+    """
+
    content: list[types.ContentBlock]
+    """Message content as a list of content blocks."""
+
    type: Literal["system"] = "system"
+    """The type of the message. Must be a string that is unique to the message type.
+
+    The purpose of this field is to allow for easy identification of the message type
+    when deserializing messages.
+    """

    name: Optional[str] = None
    """An optional name for the message.
@ -598,9 +658,26 @@ class ToolMessage:
    """

    id: str
+    """Used for serialization."""
+
    tool_call_id: str
+    """ID of the tool call this message responds to.
+
+    This should match the ID of the tool call that this message is responding to.
+    """
+
    content: list[types.ContentBlock]
-    artifact: Optional[Any] = None  # App-side payload not for the model
+    """Message content as a list of content blocks."""
+
+    type: Literal["tool"] = "tool"
+    """The type of the message. Must be a string that is unique to the message type.
+
+    The purpose of this field is to allow for easy identification of the message type
+    when deserializing messages.
+    """
+
+    artifact: Optional[Any] = None
+    """App-side payload not for the model."""

    name: Optional[str] = None
    """An optional name for the message.
@ -612,7 +689,11 @@ class ToolMessage:
    """

    status: Literal["success", "error"] = "success"
-    type: Literal["tool"] = "tool"
+    """Execution status of the tool call.
+
+    Indicates whether the tool call was successful or encountered an error.
+    Defaults to "success".
+    """

    def __init__(
        self,
--- a/libs/core/pyproject.toml
+++ b/libs/core/pyproject.toml
@ -13,7 +13,7 @@ dependencies = [
    "PyYAML>=5.3",
    "typing-extensions>=4.7",
    "packaging>=23.2",
-    "pydantic>=2.7.4",
+    "pydantic>=2.11.7",
 ]
 name = "langchain-core"
 version = "0.3.72"
--- a/libs/core/tests/unit_tests/messages/test_content_block_factories.py
+++ b/libs/core/tests/unit_tests/messages/test_content_block_factories.py
@ -0,0 +1,974 @@
+"""Unit tests for ContentBlock factory functions."""
+
+from uuid import UUID
+
+import pytest
+
+from langchain_core.messages.content_blocks import (
+    CodeInterpreterCall,
+    CodeInterpreterOutput,
+    CodeInterpreterResult,
+    InvalidToolCall,
+    ToolCallChunk,
+    WebSearchCall,
+    WebSearchResult,
+    create_audio_block,
+    create_citation,
+    create_file_block,
+    create_image_block,
+    create_non_standard_block,
+    create_plaintext_block,
+    create_reasoning_block,
+    create_text_block,
+    create_tool_call,
+    create_video_block,
+)
+
+
+def _validate_lc_uuid(id_value: str) -> None:
+    """Validate that the ID has ``lc_`` prefix and valid UUID suffix.
+
+    Args:
+        id_value: The ID string to validate.
+
+    Raises:
+        AssertionError: If the ID doesn't have ``lc_`` prefix or invalid UUID.
+    """
+    assert id_value.startswith("lc_"), f"ID should start with 'lc_' but got: {id_value}"
+    # Validate the UUID part after the lc_ prefix
+    UUID(id_value[3:])
+
+
+class TestTextBlockFactory:
+    """Test create_text_block factory function."""
+
+    def test_basic_creation(self) -> None:
+        """Test basic text block creation."""
+        block = create_text_block("Hello world")
+
+        assert block["type"] == "text"
+        assert block.get("text") == "Hello world"
+        assert "id" in block
+        id_value = block.get("id")
+        assert id_value is not None, "block id is None"
+        _validate_lc_uuid(id_value)
+
+    def test_with_custom_id(self) -> None:
+        """Test text block creation with custom ID."""
+        custom_id = "custom-123"
+        block = create_text_block("Hello", id=custom_id)
+
+        assert block.get("id") == custom_id
+
+    def test_with_annotations(self) -> None:
+        """Test text block creation with annotations."""
+        citation = create_citation(url="https://example.com", title="Example")
+        block = create_text_block("Hello", annotations=[citation])
+
+        assert block.get("annotations") == [citation]
+
+    def test_with_index(self) -> None:
+        """Test text block creation with index."""
+        block = create_text_block("Hello", index=42)
+
+        assert block.get("index") == 42
+
+    def test_optional_fields_not_present_when_none(self) -> None:
+        """Test that optional fields are not included when None."""
+        block = create_text_block("Hello")
+
+        assert "annotations" not in block
+        assert "index" not in block
+
+
+class TestImageBlockFactory:
+    """Test create_image_block factory function."""
+
+    def test_with_url(self) -> None:
+        """Test image block creation with URL."""
+        block = create_image_block(url="https://example.com/image.jpg")
+
+        assert block["type"] == "image"
+        assert block.get("url") == "https://example.com/image.jpg"
+        assert "id" in block
+        id_value = block.get("id")
+        assert id_value is not None, "block id is None"
+        _validate_lc_uuid(id_value)
+
+    def test_with_base64(self) -> None:
+        """Test image block creation with base64 data."""
+        block = create_image_block(
+            base64="iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJ", mime_type="image/png"
+        )
+
+        assert block.get("base64") == "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJ"
+        assert block.get("mime_type") == "image/png"
+
+    def test_with_file_id(self) -> None:
+        """Test image block creation with file ID."""
+        block = create_image_block(file_id="file-123")
+
+        assert block.get("file_id") == "file-123"
+
+    def test_no_source_raises_error(self) -> None:
+        """Test that missing all sources raises ValueError."""
+        with pytest.raises(
+            ValueError, match="Must provide one of: url, base64, or file_id"
+        ):
+            create_image_block()
+
+    def test_base64_without_mime_type_raises_error(self) -> None:
+        """Test that base64 without mime_type raises ValueError."""
+        with pytest.raises(
+            ValueError, match="mime_type is required when using base64 data"
+        ):
+            create_image_block(base64="iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJ")
+
+    def test_with_index(self) -> None:
+        """Test image block creation with index."""
+        block = create_image_block(url="https://example.com/image.jpg", index=1)
+
+        assert block.get("index") == 1
+
+    def test_optional_fields_not_present_when_not_provided(self) -> None:
+        """Test that optional fields are not included when not provided."""
+        block = create_image_block(url="https://example.com/image.jpg")
+
+        assert "base64" not in block
+        assert "file_id" not in block
+        assert "mime_type" not in block
+        assert "index" not in block
+
+
+class TestVideoBlockFactory:
+    """Test create_video_block factory function."""
+
+    def test_with_url(self) -> None:
+        """Test video block creation with URL."""
+        block = create_video_block(url="https://example.com/video.mp4")
+
+        assert block["type"] == "video"
+        assert block.get("url") == "https://example.com/video.mp4"
+
+    def test_with_base64(self) -> None:
+        """Test video block creation with base64 data."""
+        block = create_video_block(
+            base64="UklGRnoGAABXQVZFZm10IBAAAAABAAEA", mime_type="video/mp4"
+        )
+
+        assert block.get("base64") == "UklGRnoGAABXQVZFZm10IBAAAAABAAEA"
+        assert block.get("mime_type") == "video/mp4"
+
+    def test_no_source_raises_error(self) -> None:
+        """Test that missing all sources raises ValueError."""
+        with pytest.raises(
+            ValueError, match="Must provide one of: url, base64, or file_id"
+        ):
+            create_video_block()
+
+    def test_base64_without_mime_type_raises_error(self) -> None:
+        """Test that base64 without mime_type raises ValueError."""
+        with pytest.raises(
+            ValueError, match="mime_type is required when using base64 data"
+        ):
+            create_video_block(base64="UklGRnoGAABXQVZFZm10IBAAAAABAAEA")
+
+
+class TestAudioBlockFactory:
+    """Test create_audio_block factory function."""
+
+    def test_with_url(self) -> None:
+        """Test audio block creation with URL."""
+        block = create_audio_block(url="https://example.com/audio.mp3")
+
+        assert block["type"] == "audio"
+        assert block.get("url") == "https://example.com/audio.mp3"
+
+    def test_with_base64(self) -> None:
+        """Test audio block creation with base64 data."""
+        block = create_audio_block(
+            base64="UklGRnoGAABXQVZFZm10IBAAAAABAAEA", mime_type="audio/mp3"
+        )
+
+        assert block.get("base64") == "UklGRnoGAABXQVZFZm10IBAAAAABAAEA"
+        assert block.get("mime_type") == "audio/mp3"
+
+    def test_no_source_raises_error(self) -> None:
+        """Test that missing all sources raises ValueError."""
+        with pytest.raises(
+            ValueError, match="Must provide one of: url, base64, or file_id"
+        ):
+            create_audio_block()
+
+
+class TestFileBlockFactory:
+    """Test create_file_block factory function."""
+
+    def test_with_url(self) -> None:
+        """Test file block creation with URL."""
+        block = create_file_block(url="https://example.com/document.pdf")
+
+        assert block["type"] == "file"
+        assert block.get("url") == "https://example.com/document.pdf"
+
+    def test_with_base64(self) -> None:
+        """Test file block creation with base64 data."""
+        block = create_file_block(
+            base64="JVBERi0xLjQKJdPr6eEKMSAwIG9iago8PAovVHlwZSAvQ2F0YWxvZwo=",
+            mime_type="application/pdf",
+        )
+
+        assert (
+            block.get("base64")
+            == "JVBERi0xLjQKJdPr6eEKMSAwIG9iago8PAovVHlwZSAvQ2F0YWxvZwo="
+        )
+        assert block.get("mime_type") == "application/pdf"
+
+    def test_no_source_raises_error(self) -> None:
+        """Test that missing all sources raises ValueError."""
+        with pytest.raises(
+            ValueError, match="Must provide one of: url, base64, or file_id"
+        ):
+            create_file_block()
+
+
+class TestPlainTextBlockFactory:
+    """Test create_plain_text_block factory function."""
+
+    def test_basic_creation(self) -> None:
+        """Test basic plain text block creation."""
+        block = create_plaintext_block("This is plain text content.")
+
+        assert block["type"] == "text-plain"
+        assert block.get("mime_type") == "text/plain"
+        assert block.get("text") == "This is plain text content."
+        assert "id" in block
+        id_value = block.get("id")
+        assert id_value is not None, "block id is None"
+        _validate_lc_uuid(id_value)
+
+    def test_with_title_and_context(self) -> None:
+        """Test plain text block creation with title and context."""
+        block = create_plaintext_block(
+            "Document content here.",
+            title="Important Document",
+            context="This document contains important information.",
+        )
+
+        assert block.get("title") == "Important Document"
+        assert block.get("context") == "This document contains important information."
+
+    def test_with_url(self) -> None:
+        """Test plain text block creation with URL."""
+        block = create_plaintext_block(
+            "Content", url="https://example.com/document.txt"
+        )
+
+        assert block.get("url") == "https://example.com/document.txt"
+
+
+class TestToolCallFactory:
+    """Test create_tool_call factory function."""
+
+    def test_basic_creation(self) -> None:
+        """Test basic tool call creation."""
+        block = create_tool_call("search", {"query": "python"})
+
+        assert block["type"] == "tool_call"
+        assert block["name"] == "search"
+        assert block["args"] == {"query": "python"}
+        assert "id" in block
+        id_value = block.get("id")
+        assert id_value is not None, "block id is None"
+        _validate_lc_uuid(id_value)
+
+    def test_with_custom_id(self) -> None:
+        """Test tool call creation with custom ID."""
+        block = create_tool_call("search", {"query": "python"}, id="tool-123")
+
+        assert block.get("id") == "tool-123"
+
+    def test_with_index(self) -> None:
+        """Test tool call creation with index."""
+        block = create_tool_call("search", {"query": "python"}, index=2)
+
+        assert block.get("index") == 2
+
+
+class TestReasoningBlockFactory:
+    """Test create_reasoning_block factory function."""
+
+    def test_basic_creation(self) -> None:
+        """Test basic reasoning block creation."""
+        block = create_reasoning_block("Let me think about this problem...")
+
+        assert block["type"] == "reasoning"
+        assert block.get("reasoning") == "Let me think about this problem..."
+        assert "id" in block
+        id_value = block.get("id")
+        assert id_value is not None, "block id is None"
+        _validate_lc_uuid(id_value)
+
+    @pytest.mark.xfail(reason="Optional fields not implemented yet")
+    def test_with_signatures(self) -> None:
+        """Test reasoning block creation with signatures."""
+        block = create_reasoning_block(
+            "Thinking...",
+            thought_signature="thought-sig-123",  # type: ignore[call-arg]
+            signature="auth-sig-456",  # type: ignore[call-arg, unused-ignore]
+        )
+
+        assert block.get("thought_signature") == "thought-sig-123"
+        assert block.get("signature") == "auth-sig-456"
+
+    def test_with_index(self) -> None:
+        """Test reasoning block creation with index."""
+        block = create_reasoning_block("Thinking...", index=3)
+
+        assert block.get("index") == 3
+
+
+class TestCitationFactory:
+    """Test create_citation factory function."""
+
+    def test_basic_creation(self) -> None:
+        """Test basic citation creation."""
+        block = create_citation()
+
+        assert block["type"] == "citation"
+        assert "id" in block
+        id_value = block.get("id")
+        assert id_value is not None, "block id is None"
+        _validate_lc_uuid(id_value)
+
+    def test_with_all_fields(self) -> None:
+        """Test citation creation with all fields."""
+        block = create_citation(
+            url="https://example.com/source",
+            title="Source Document",
+            start_index=10,
+            end_index=50,
+            cited_text="This is the cited text.",
+        )
+
+        assert block.get("url") == "https://example.com/source"
+        assert block.get("title") == "Source Document"
+        assert block.get("start_index") == 10
+        assert block.get("end_index") == 50
+        assert block.get("cited_text") == "This is the cited text."
+
+    def test_optional_fields_not_present_when_none(self) -> None:
+        """Test that optional fields are not included when None."""
+        block = create_citation()
+
+        assert "url" not in block
+        assert "title" not in block
+        assert "start_index" not in block
+        assert "end_index" not in block
+        assert "cited_text" not in block
+
+
+class TestNonStandardBlockFactory:
+    """Test create_non_standard_block factory function."""
+
+    def test_basic_creation(self) -> None:
+        """Test basic non-standard block creation."""
+        value = {"custom_field": "custom_value", "number": 42}
+        block = create_non_standard_block(value)
+
+        assert block["type"] == "non_standard"
+        assert block["value"] == value
+        assert "id" in block
+        id_value = block.get("id")
+        assert id_value is not None, "block id is None"
+        _validate_lc_uuid(id_value)
+
+    def test_with_index(self) -> None:
+        """Test non-standard block creation with index."""
+        value = {"data": "test"}
+        block = create_non_standard_block(value, index=5)
+
+        assert block.get("index") == 5
+
+    def test_optional_fields_not_present_when_none(self) -> None:
+        """Test that optional fields are not included when None."""
+        value = {"data": "test"}
+        block = create_non_standard_block(value)
+
+        assert "index" not in block
+
+
+class TestUUIDValidation:
+    """Test UUID generation and validation behavior."""
+
+    def test_custom_id_bypasses_lc_prefix_requirement(self) -> None:
+        """Test that custom IDs can use any format (don't require lc_ prefix)."""
+        custom_id = "custom-123"
+        block = create_text_block("Hello", id=custom_id)
+
+        assert block.get("id") == custom_id
+        # Custom IDs should not be validated with lc_ prefix requirement
+
+    def test_generated_ids_are_unique(self) -> None:
+        """Test that multiple factory calls generate unique IDs."""
+        blocks = [create_text_block("test") for _ in range(10)]
+        ids = [block.get("id") for block in blocks]
+
+        # All IDs should be unique
+        assert len(set(ids)) == len(ids)
+
+        # All generated IDs should have lc_ prefix
+        for id_value in ids:
+            _validate_lc_uuid(id_value or "")
+
+    def test_empty_string_id_generates_new_uuid(self) -> None:
+        """Test that empty string ID generates new UUID with lc_ prefix."""
+        block = create_text_block("Hello", id="")
+
+        id_value: str = block.get("id", "")
+        assert id_value != ""
+        _validate_lc_uuid(id_value)
+
+    def test_generated_id_length(self) -> None:
+        """Test that generated IDs have correct length (UUID4 + lc_ prefix)."""
+        block = create_text_block("Hello")
+
+        id_value = block.get("id")
+        assert id_value is not None
+
+        # UUID4 string length is 36 chars, plus 3 for "lc_" prefix = 39 total
+        expected_length = 36 + 3
+        assert len(id_value) == expected_length, (
+            f"Expected length {expected_length}, got {len(id_value)}"
+        )
+
+        # Validate it's properly formatted
+        _validate_lc_uuid(id_value)
+
+
+class TestFactoryTypeConsistency:
+    """Test that factory functions return correctly typed objects."""
+
+    def test_factories_return_correct_types(self) -> None:
+        """Test that all factory functions return the expected TypedDict types."""
+        text_block = create_text_block("test")
+        assert isinstance(text_block, dict)
+        assert text_block["type"] == "text"
+
+        image_block = create_image_block(url="https://example.com/image.jpg")
+        assert isinstance(image_block, dict)
+        assert image_block["type"] == "image"
+
+        video_block = create_video_block(url="https://example.com/video.mp4")
+        assert isinstance(video_block, dict)
+        assert video_block["type"] == "video"
+
+        audio_block = create_audio_block(url="https://example.com/audio.mp3")
+        assert isinstance(audio_block, dict)
+        assert audio_block["type"] == "audio"
+
+        file_block = create_file_block(url="https://example.com/file.pdf")
+        assert isinstance(file_block, dict)
+        assert file_block["type"] == "file"
+
+        plain_text_block = create_plaintext_block("content")
+        assert isinstance(plain_text_block, dict)
+        assert plain_text_block["type"] == "text-plain"
+
+        tool_call = create_tool_call("tool", {"arg": "value"})
+        assert isinstance(tool_call, dict)
+        assert tool_call["type"] == "tool_call"
+
+        reasoning_block = create_reasoning_block("reasoning")
+        assert isinstance(reasoning_block, dict)
+        assert reasoning_block["type"] == "reasoning"
+
+        citation = create_citation()
+        assert isinstance(citation, dict)
+        assert citation["type"] == "citation"
+
+        non_standard_block = create_non_standard_block({"data": "value"})
+        assert isinstance(non_standard_block, dict)
+        assert non_standard_block["type"] == "non_standard"
+
+
+class TestExtraItems:
+    """Test that content blocks support extra items via __extra_items__ field."""
+
+    def test_text_block_extra_items(self) -> None:
+        """Test that TextContentBlock can store extra provider-specific fields."""
+        block = create_text_block("Hello world")
+
+        block["openai_metadata"] = {"model": "gpt-4", "temperature": 0.7}  # type: ignore[typeddict-unknown-key]
+        block["anthropic_usage"] = {"input_tokens": 10, "output_tokens": 20}  # type: ignore[typeddict-unknown-key]
+        block["custom_field"] = "any value"  # type: ignore[typeddict-unknown-key]
+
+        assert block["type"] == "text"
+        assert block["text"] == "Hello world"
+        assert "id" in block
+        assert block.get("openai_metadata") == {"model": "gpt-4", "temperature": 0.7}
+        assert block.get("anthropic_usage") == {"input_tokens": 10, "output_tokens": 20}
+        assert block.get("custom_field") == "any value"
+
+    def test_text_block_extras_field(self) -> None:
+        """Test that TextContentBlock properly supports the explicit extras field."""
+        block = create_text_block("Hello world")
+
+        # Test direct assignment to extras field
+        block["extras"] = {
+            "openai_metadata": {"model": "gpt-4", "temperature": 0.7},
+            "anthropic_usage": {"input_tokens": 10, "output_tokens": 20},
+            "custom_field": "any value",
+        }
+
+        assert block["type"] == "text"
+        assert block["text"] == "Hello world"
+        assert "id" in block
+        assert "extras" in block
+
+        extras = block.get("extras", {})
+        assert extras.get("openai_metadata") == {"model": "gpt-4", "temperature": 0.7}
+        expected_usage = {"input_tokens": 10, "output_tokens": 20}
+        assert extras.get("anthropic_usage") == expected_usage
+        assert extras.get("custom_field") == "any value"
+
+    def test_mixed_extra_items_types(self) -> None:
+        """Test that extra items can be various types (str, int, bool, dict, list)."""
+        block = create_text_block("Test content")
+
+        # Add various types of extra fields
+        block["string_field"] = "string value"  # type: ignore[typeddict-unknown-key]
+        block["int_field"] = 42  # type: ignore[typeddict-unknown-key]
+        block["float_field"] = 3.14  # type: ignore[typeddict-unknown-key]
+        block["bool_field"] = True  # type: ignore[typeddict-unknown-key]
+        block["list_field"] = ["item1", "item2", "item3"]  # type: ignore[typeddict-unknown-key]
+        block["dict_field"] = {"nested": {"deeply": "nested value"}}  # type: ignore[typeddict-unknown-key]
+        block["none_field"] = None  # type: ignore[typeddict-unknown-key]
+
+        # Verify all types are preserved
+        assert block.get("string_field") == "string value"
+        assert block.get("int_field") == 42
+        assert block.get("float_field") == 3.14
+        assert block.get("bool_field") is True
+        assert block.get("list_field") == ["item1", "item2", "item3"]
+        dict_field = block.get("dict_field", {})
+        assert isinstance(dict_field, dict)
+        nested = dict_field.get("nested", {})
+        assert isinstance(nested, dict)
+        assert nested.get("deeply") == "nested value"
+        assert block.get("none_field") is None
+
+    def test_extra_items_do_not_interfere_with_standard_fields(self) -> None:
+        """Test that extra items don't interfere with standard field access."""
+        block = create_text_block("Original text", index=1)
+
+        # Add many extra fields
+        for i in range(10):
+            block[f"extra_field_{i}"] = f"value_{i}"  # type: ignore[literal-required]
+
+        # Standard fields should still work correctly
+        assert block["type"] == "text"
+        assert block["text"] == "Original text"
+        assert block["index"] == 1 if "index" in block else None
+        assert "id" in block
+
+        # Extra fields should also be accessible
+        for i in range(10):
+            assert block.get(f"extra_field_{i}") == f"value_{i}"
+
+    def test_extra_items_can_be_modified(self) -> None:
+        """Test that extra items can be modified after creation."""
+        block = create_image_block(url="https://example.com/image.jpg")
+
+        # Add an extra field
+        block["status"] = "pending"  # type: ignore[typeddict-unknown-key]
+        assert block.get("status") == "pending"
+
+        # Modify the extra field
+        block["status"] = "processed"  # type: ignore[typeddict-unknown-key]
+        assert block.get("status") == "processed"
+
+        # Add more fields
+        block["metadata"] = {"version": 1}  # type: ignore[typeddict-unknown-key]
+        metadata = block.get("metadata", {})
+        assert isinstance(metadata, dict)
+        assert metadata.get("version") == 1
+
+        # Modify nested extra field
+        block["metadata"]["version"] = 2  # type: ignore[typeddict-item]
+        metadata = block.get("metadata", {})
+        assert isinstance(metadata, dict)
+        assert metadata.get("version") == 2
+
+    def test_all_content_blocks_support_extra_items(self) -> None:
+        """Test that all content block types support extra items."""
+        # Test each content block type
+        text_block = create_text_block("test")
+        text_block["extra"] = "text_extra"  # type: ignore[typeddict-unknown-key]
+        assert text_block.get("extra") == "text_extra"
+
+        image_block = create_image_block(url="https://example.com/image.jpg")
+        image_block["extra"] = "image_extra"  # type: ignore[typeddict-unknown-key]
+        assert image_block.get("extra") == "image_extra"
+
+        video_block = create_video_block(url="https://example.com/video.mp4")
+        video_block["extra"] = "video_extra"  # type: ignore[typeddict-unknown-key]
+        assert video_block.get("extra") == "video_extra"
+
+        audio_block = create_audio_block(url="https://example.com/audio.mp3")
+        audio_block["extra"] = "audio_extra"  # type: ignore[typeddict-unknown-key]
+        assert audio_block.get("extra") == "audio_extra"
+
+        file_block = create_file_block(url="https://example.com/file.pdf")
+        file_block["extra"] = "file_extra"  # type: ignore[typeddict-unknown-key]
+        assert file_block.get("extra") == "file_extra"
+
+        plain_text_block = create_plaintext_block("content")
+        plain_text_block["extra"] = "plaintext_extra"  # type: ignore[typeddict-unknown-key]
+        assert plain_text_block.get("extra") == "plaintext_extra"
+
+        tool_call = create_tool_call("tool", {"arg": "value"})
+        tool_call["extra"] = "tool_extra"  # type: ignore[typeddict-unknown-key]
+        assert tool_call.get("extra") == "tool_extra"
+
+        reasoning_block = create_reasoning_block("reasoning")
+        reasoning_block["extra"] = "reasoning_extra"  # type: ignore[typeddict-unknown-key]
+        assert reasoning_block.get("extra") == "reasoning_extra"
+
+        non_standard_block = create_non_standard_block({"data": "value"})
+        non_standard_block["extra"] = "non_standard_extra"  # type: ignore[typeddict-unknown-key]
+        assert non_standard_block.get("extra") == "non_standard_extra"
+
+
+class TestExtrasField:
+    """Test the explicit extras field across all content block types."""
+
+    def test_all_content_blocks_support_extras_field(self) -> None:
+        """Test that all content block types support the explicit extras field."""
+        provider_metadata = {
+            "provider": "openai",
+            "model": "gpt-4",
+            "temperature": 0.7,
+            "usage": {"input_tokens": 10, "output_tokens": 20},
+        }
+
+        # Test TextContentBlock
+        text_block = create_text_block("test")
+        text_block["extras"] = provider_metadata
+        assert text_block.get("extras") == provider_metadata
+        assert text_block["type"] == "text"
+
+        # Test ImageContentBlock
+        image_block = create_image_block(url="https://example.com/image.jpg")
+        image_block["extras"] = provider_metadata
+        assert image_block.get("extras") == provider_metadata
+        assert image_block["type"] == "image"
+
+        # Test VideoContentBlock
+        video_block = create_video_block(url="https://example.com/video.mp4")
+        video_block["extras"] = provider_metadata
+        assert video_block.get("extras") == provider_metadata
+        assert video_block["type"] == "video"
+
+        # Test AudioContentBlock
+        audio_block = create_audio_block(url="https://example.com/audio.mp3")
+        audio_block["extras"] = provider_metadata
+        assert audio_block.get("extras") == provider_metadata
+        assert audio_block["type"] == "audio"
+
+        # Test FileContentBlock
+        file_block = create_file_block(url="https://example.com/file.pdf")
+        file_block["extras"] = provider_metadata
+        assert file_block.get("extras") == provider_metadata
+        assert file_block["type"] == "file"
+
+        # Test PlainTextContentBlock
+        plain_text_block = create_plaintext_block("content")
+        plain_text_block["extras"] = provider_metadata
+        assert plain_text_block.get("extras") == provider_metadata
+        assert plain_text_block["type"] == "text-plain"
+
+        # Test ToolCall
+        tool_call = create_tool_call("tool", {"arg": "value"})
+        tool_call["extras"] = provider_metadata
+        assert tool_call.get("extras") == provider_metadata
+        assert tool_call["type"] == "tool_call"
+
+        # Test ReasoningContentBlock
+        reasoning_block = create_reasoning_block("reasoning")
+        reasoning_block["extras"] = provider_metadata
+        assert reasoning_block.get("extras") == provider_metadata
+        assert reasoning_block["type"] == "reasoning"
+
+        # Test Citation
+        citation = create_citation()
+        citation["extras"] = provider_metadata
+        assert citation.get("extras") == provider_metadata
+        assert citation["type"] == "citation"
+
+    def test_extras_field_is_optional(self) -> None:
+        """Test that the extras field is optional and blocks work without it."""
+        # Create blocks without extras
+        text_block = create_text_block("test")
+        image_block = create_image_block(url="https://example.com/image.jpg")
+        tool_call = create_tool_call("tool", {"arg": "value"})
+        reasoning_block = create_reasoning_block("reasoning")
+        citation = create_citation()
+
+        # Verify blocks work correctly without extras
+        assert text_block["type"] == "text"
+        assert image_block["type"] == "image"
+        assert tool_call["type"] == "tool_call"
+        assert reasoning_block["type"] == "reasoning"
+        assert citation["type"] == "citation"
+
+        # Verify extras field is not present when not set
+        assert "extras" not in text_block
+        assert "extras" not in image_block
+        assert "extras" not in tool_call
+        assert "extras" not in reasoning_block
+        assert "extras" not in citation
+
+    def test_extras_field_can_be_modified(self) -> None:
+        """Test that the extras field can be modified after creation."""
+        block = create_text_block("test")
+
+        # Add extras
+        block["extras"] = {"initial": "value"}
+        assert block.get("extras") == {"initial": "value"}
+
+        # Modify extras
+        block["extras"] = {"updated": "value", "count": 42}
+        extras = block.get("extras", {})
+        assert extras.get("updated") == "value"
+        assert extras.get("count") == 42
+        assert "initial" not in extras
+
+        # Update nested values in extras
+        if "extras" in block:
+            block["extras"]["nested"] = {"deep": "value"}
+            extras = block.get("extras", {})
+            nested = extras.get("nested", {})
+            assert isinstance(nested, dict)
+            assert nested.get("deep") == "value"
+
+    def test_extras_field_supports_various_data_types(self) -> None:
+        """Test that the extras field can store various data types."""
+        block = create_text_block("test")
+
+        complex_extras = {
+            "string_val": "test string",
+            "int_val": 42,
+            "float_val": 3.14,
+            "bool_val": True,
+            "none_val": None,
+            "list_val": ["item1", "item2", {"nested": "in_list"}],
+            "dict_val": {"nested": {"deeply": {"nested": "value"}}},
+        }
+
+        block["extras"] = complex_extras
+
+        extras = block.get("extras", {})
+        assert extras.get("string_val") == "test string"
+        assert extras.get("int_val") == 42
+        assert extras.get("float_val") == 3.14
+        assert extras.get("bool_val") is True
+        assert extras.get("none_val") is None
+
+        list_val = extras.get("list_val", [])
+        assert isinstance(list_val, list)
+        assert len(list_val) == 3
+        assert list_val[0] == "item1"
+        assert list_val[1] == "item2"
+        assert isinstance(list_val[2], dict)
+        assert list_val[2].get("nested") == "in_list"
+
+        dict_val = extras.get("dict_val", {})
+        assert isinstance(dict_val, dict)
+        nested = dict_val.get("nested", {})
+        assert isinstance(nested, dict)
+        deeply = nested.get("deeply", {})
+        assert isinstance(deeply, dict)
+        assert deeply.get("nested") == "value"
+
+    def test_extras_field_does_not_interfere_with_standard_fields(self) -> None:
+        """Test that the extras field doesn't interfere with standard fields."""
+        # Create a complex block with all standard fields
+        block = create_text_block(
+            "Test content",
+            annotations=[create_citation(url="https://example.com")],
+            index=42,
+        )
+
+        # Add extensive extras
+        large_extras = {f"field_{i}": f"value_{i}" for i in range(100)}
+        block["extras"] = large_extras
+
+        # Verify all standard fields still work
+        assert block["type"] == "text"
+        assert block["text"] == "Test content"
+        assert block.get("index") == 42
+        assert "id" in block
+        assert "annotations" in block
+
+        annotations = block.get("annotations", [])
+        assert len(annotations) == 1
+        assert annotations[0]["type"] == "citation"
+
+        # Verify extras field works
+        extras = block.get("extras", {})
+        assert len(extras) == 100
+        for i in range(100):
+            assert extras.get(f"field_{i}") == f"value_{i}"
+
+    def test_special_content_blocks_support_extras_field(self) -> None:
+        """Test that special content blocks support extras field."""
+        provider_metadata = {
+            "provider": "openai",
+            "request_id": "req_12345",
+            "timing": {"start": 1234567890, "end": 1234567895},
+        }
+
+        # Test ToolCallChunk
+        tool_call_chunk: ToolCallChunk = {
+            "type": "tool_call_chunk",
+            "id": "tool_123",
+            "name": "search",
+            "args": '{"query": "test"}',
+            "index": 0,
+            "extras": provider_metadata,
+        }
+        assert tool_call_chunk.get("extras") == provider_metadata
+        assert tool_call_chunk["type"] == "tool_call_chunk"
+
+        # Test InvalidToolCall
+        invalid_tool_call: InvalidToolCall = {
+            "type": "invalid_tool_call",
+            "id": "invalid_123",
+            "name": "bad_tool",
+            "args": "invalid json",
+            "error": "JSON parse error",
+            "extras": provider_metadata,
+        }
+        assert invalid_tool_call.get("extras") == provider_metadata
+        assert invalid_tool_call["type"] == "invalid_tool_call"
+
+        # Test WebSearchCall
+        web_search_call: WebSearchCall = {
+            "type": "web_search_call",
+            "id": "search_123",
+            "query": "python langchain",
+            "index": 0,
+            "extras": provider_metadata,
+        }
+        assert web_search_call.get("extras") == provider_metadata
+        assert web_search_call["type"] == "web_search_call"
+
+        # Test WebSearchResult
+        web_search_result: WebSearchResult = {
+            "type": "web_search_result",
+            "id": "result_123",
+            "urls": ["https://example.com", "https://test.com"],
+            "index": 0,
+            "extras": provider_metadata,
+        }
+        assert web_search_result.get("extras") == provider_metadata
+        assert web_search_result["type"] == "web_search_result"
+
+        # Test CodeInterpreterCall
+        code_interpreter_call: CodeInterpreterCall = {
+            "type": "code_interpreter_call",
+            "id": "code_123",
+            "language": "python",
+            "code": "print('hello world')",
+            "index": 0,
+            "extras": provider_metadata,
+        }
+        assert code_interpreter_call.get("extras") == provider_metadata
+        assert code_interpreter_call["type"] == "code_interpreter_call"
+
+        # Test CodeInterpreterOutput
+        code_interpreter_output: CodeInterpreterOutput = {
+            "type": "code_interpreter_output",
+            "id": "output_123",
+            "return_code": 0,
+            "stderr": "",
+            "stdout": "hello world\n",
+            "file_ids": ["file_123"],
+            "index": 0,
+            "extras": provider_metadata,
+        }
+        assert code_interpreter_output.get("extras") == provider_metadata
+        assert code_interpreter_output["type"] == "code_interpreter_output"
+
+        # Test CodeInterpreterResult
+        code_interpreter_result: CodeInterpreterResult = {
+            "type": "code_interpreter_result",
+            "id": "result_123",
+            "output": [code_interpreter_output],
+            "index": 0,
+            "extras": provider_metadata,
+        }
+        assert code_interpreter_result.get("extras") == provider_metadata
+        assert code_interpreter_result["type"] == "code_interpreter_result"
+
+    def test_extras_field_is_not_required_for_special_blocks(self) -> None:
+        """Test that extras field is optional for all special content blocks."""
+        # Create blocks without extras field
+        tool_call_chunk: ToolCallChunk = {
+            "id": "tool_123",
+            "name": "search",
+            "args": '{"query": "test"}',
+            "index": 0,
+        }
+
+        invalid_tool_call: InvalidToolCall = {
+            "type": "invalid_tool_call",
+            "id": "invalid_123",
+            "name": "bad_tool",
+            "args": "invalid json",
+            "error": "JSON parse error",
+        }
+
+        web_search_call: WebSearchCall = {
+            "type": "web_search_call",
+            "query": "python langchain",
+        }
+
+        web_search_result: WebSearchResult = {
+            "type": "web_search_result",
+            "urls": ["https://example.com"],
+        }
+
+        code_interpreter_call: CodeInterpreterCall = {
+            "type": "code_interpreter_call",
+            "code": "print('hello')",
+        }
+
+        code_interpreter_output: CodeInterpreterOutput = {
+            "type": "code_interpreter_output",
+            "stdout": "hello\n",
+        }
+
+        code_interpreter_result: CodeInterpreterResult = {
+            "type": "code_interpreter_result",
+            "output": [code_interpreter_output],
+        }
+
+        # Verify they work without extras
+        assert tool_call_chunk.get("name") == "search"
+        assert invalid_tool_call["type"] == "invalid_tool_call"
+        assert web_search_call["type"] == "web_search_call"
+        assert web_search_result["type"] == "web_search_result"
+        assert code_interpreter_call["type"] == "code_interpreter_call"
+        assert code_interpreter_output["type"] == "code_interpreter_output"
+        assert code_interpreter_result["type"] == "code_interpreter_result"
+
+        # Verify extras field is not present
+        assert "extras" not in tool_call_chunk
+        assert "extras" not in invalid_tool_call
+        assert "extras" not in web_search_call
+        assert "extras" not in web_search_result
+        assert "extras" not in code_interpreter_call
+        assert "extras" not in code_interpreter_output
+        assert "extras" not in code_interpreter_result
--- a/libs/core/tests/unit_tests/messages/test_response_metadata.py
+++ b/libs/core/tests/unit_tests/messages/test_response_metadata.py
@ -0,0 +1,343 @@
+"""Unit tests for ResponseMetadata TypedDict."""
+
+from langchain_core.messages.v1 import AIMessage, AIMessageChunk, ResponseMetadata
+
+
+class TestResponseMetadata:
+    """Test the ResponseMetadata TypedDict functionality."""
+
+    def test_response_metadata_basic_fields(self) -> None:
+        """Test ResponseMetadata with basic required fields."""
+        metadata: ResponseMetadata = {
+            "model_provider": "openai",
+            "model_name": "gpt-4",
+        }
+
+        assert metadata.get("model_provider") == "openai"
+        assert metadata.get("model_name") == "gpt-4"
+
+    def test_response_metadata_is_optional(self) -> None:
+        """Test that ResponseMetadata fields are optional due to total=False."""
+        # Should be able to create empty ResponseMetadata
+        metadata: ResponseMetadata = {}
+        assert metadata == {}
+
+        # Should be able to create with just one field
+        metadata_partial: ResponseMetadata = {"model_provider": "anthropic"}
+        assert metadata_partial.get("model_provider") == "anthropic"
+        assert "model_name" not in metadata_partial
+
+    def test_response_metadata_supports_extra_fields(self) -> None:
+        """Test that ResponseMetadata supports provider-specific extra fields."""
+        metadata: ResponseMetadata = {
+            "model_provider": "openai",
+            "model_name": "gpt-4-turbo",
+            # Extra fields should be allowed
+            "system_fingerprint": "fp_12345",
+            "logprobs": None,
+            "finish_reason": "stop",
+            "request_id": "req_abc123",
+        }
+
+        assert metadata.get("model_provider") == "openai"
+        assert metadata.get("model_name") == "gpt-4-turbo"
+        assert metadata.get("system_fingerprint") == "fp_12345"
+        assert metadata.get("logprobs") is None
+        assert metadata.get("finish_reason") == "stop"
+        assert metadata.get("request_id") == "req_abc123"
+
+    def test_response_metadata_various_data_types(self) -> None:
+        """Test that ResponseMetadata can store various data types in extra fields."""
+        metadata: ResponseMetadata = {
+            "model_provider": "anthropic",
+            "model_name": "claude-3-sonnet",
+            "string_field": "test_value",
+            "int_field": 42,
+            "float_field": 3.14,
+            "bool_field": True,
+            "none_field": None,
+            "list_field": [1, 2, 3, "test"],
+            "dict_field": {"nested": {"deeply": "nested_value"}},
+        }
+
+        assert metadata.get("string_field") == "test_value"
+        assert metadata.get("int_field") == 42
+        assert metadata.get("float_field") == 3.14
+        assert metadata.get("bool_field") is True
+        assert metadata.get("none_field") is None
+
+        list_field = metadata.get("list_field")
+        assert isinstance(list_field, list)
+        assert list_field == [1, 2, 3, "test"]
+
+        dict_field = metadata.get("dict_field")
+        assert isinstance(dict_field, dict)
+        nested = dict_field.get("nested")
+        assert isinstance(nested, dict)
+        assert nested.get("deeply") == "nested_value"
+
+    def test_response_metadata_can_be_modified(self) -> None:
+        """Test that ResponseMetadata can be modified after creation."""
+        metadata: ResponseMetadata = {
+            "model_provider": "openai",
+            "model_name": "gpt-3.5-turbo",
+        }
+
+        # Modify existing fields
+        metadata["model_name"] = "gpt-4"
+        assert metadata.get("model_name") == "gpt-4"
+
+        # Add new fields
+        metadata["request_id"] = "req_12345"
+        assert metadata.get("request_id") == "req_12345"
+
+        # Modify nested structures
+        metadata["headers"] = {"x-request-id": "abc123"}
+        metadata["headers"]["x-rate-limit"] = "100"  # type: ignore[typeddict-item]
+
+        headers = metadata.get("headers")
+        assert isinstance(headers, dict)
+        assert headers.get("x-request-id") == "abc123"
+        assert headers.get("x-rate-limit") == "100"
+
+    def test_response_metadata_provider_specific_examples(self) -> None:
+        """Test ResponseMetadata with realistic provider-specific examples."""
+        # OpenAI-style metadata
+        openai_metadata: ResponseMetadata = {
+            "model_provider": "openai",
+            "model_name": "gpt-4-turbo-2024-04-09",
+            "system_fingerprint": "fp_abc123",
+            "created": 1234567890,
+            "logprobs": None,
+            "finish_reason": "stop",
+        }
+
+        assert openai_metadata.get("model_provider") == "openai"
+        assert openai_metadata.get("system_fingerprint") == "fp_abc123"
+
+        # Anthropic-style metadata
+        anthropic_metadata: ResponseMetadata = {
+            "model_provider": "anthropic",
+            "model_name": "claude-3-sonnet-20240229",
+            "stop_reason": "end_turn",
+            "stop_sequence": None,
+        }
+
+        assert anthropic_metadata.get("model_provider") == "anthropic"
+        assert anthropic_metadata.get("stop_reason") == "end_turn"
+
+        # Custom provider metadata
+        custom_metadata: ResponseMetadata = {
+            "model_provider": "custom_llm_service",
+            "model_name": "custom-model-v1",
+            "service_tier": "premium",
+            "rate_limit_info": {
+                "requests_remaining": 100,
+                "reset_time": "2024-01-01T00:00:00Z",
+            },
+            "response_time_ms": 1250,
+        }
+
+        assert custom_metadata.get("service_tier") == "premium"
+        rate_limit = custom_metadata.get("rate_limit_info")
+        assert isinstance(rate_limit, dict)
+        assert rate_limit.get("requests_remaining") == 100
+
+
+class TestResponseMetadataWithAIMessages:
+    """Test ResponseMetadata integration with AI message classes."""
+
+    def test_ai_message_with_response_metadata(self) -> None:
+        """Test AIMessage with ResponseMetadata."""
+        metadata: ResponseMetadata = {
+            "model_provider": "openai",
+            "model_name": "gpt-4",
+            "system_fingerprint": "fp_xyz789",
+        }
+
+        message = AIMessage(content="Hello, world!", response_metadata=metadata)
+
+        assert message.response_metadata == metadata
+        assert message.response_metadata.get("model_provider") == "openai"
+        assert message.response_metadata.get("model_name") == "gpt-4"
+        assert message.response_metadata.get("system_fingerprint") == "fp_xyz789"
+
+    def test_ai_message_chunk_with_response_metadata(self) -> None:
+        """Test AIMessageChunk with ResponseMetadata."""
+        metadata: ResponseMetadata = {
+            "model_provider": "anthropic",
+            "model_name": "claude-3-sonnet",
+            "stream_id": "stream_12345",
+        }
+
+        chunk = AIMessageChunk(content="Hello", response_metadata=metadata)
+
+        assert chunk.response_metadata == metadata
+        assert chunk.response_metadata.get("stream_id") == "stream_12345"
+
+    def test_ai_message_default_empty_response_metadata(self) -> None:
+        """Test that AIMessage creates empty ResponseMetadata by default."""
+        message = AIMessage(content="Test message")
+
+        # Should have empty dict as default
+        assert message.response_metadata == {}
+        assert isinstance(message.response_metadata, dict)
+
+    def test_ai_message_chunk_default_empty_response_metadata(self) -> None:
+        """Test that AIMessageChunk creates empty ResponseMetadata by default."""
+        chunk = AIMessageChunk(content="Test chunk")
+
+        # Should have empty dict as default
+        assert chunk.response_metadata == {}
+        assert isinstance(chunk.response_metadata, dict)
+
+    def test_response_metadata_merging_in_chunks(self) -> None:
+        """Test that ResponseMetadata is properly merged when adding AIMessageChunks."""
+        metadata1: ResponseMetadata = {
+            "model_provider": "openai",
+            "model_name": "gpt-4",
+            "request_id": "req_123",
+            "system_fingerprint": "fp_abc",
+        }
+
+        metadata2: ResponseMetadata = {
+            "stream_chunk": 1,
+            "finish_reason": "length",
+        }
+
+        chunk1 = AIMessageChunk(content="Hello ", response_metadata=metadata1)
+        chunk2 = AIMessageChunk(content="world!", response_metadata=metadata2)
+
+        merged = chunk1 + chunk2
+
+        # Should have merged response_metadata
+        assert merged.response_metadata.get("model_provider") == "openai"
+        assert merged.response_metadata.get("model_name") == "gpt-4"
+        assert merged.response_metadata.get("request_id") == "req_123"
+        assert merged.response_metadata.get("stream_chunk") == 1
+        assert merged.response_metadata.get("system_fingerprint") == "fp_abc"
+        assert merged.response_metadata.get("finish_reason") == "length"
+
+    def test_response_metadata_modification_after_message_creation(self) -> None:
+        """Test that ResponseMetadata can be modified after message creation."""
+        message = AIMessage(
+            content="Initial message",
+            response_metadata={"model_provider": "openai", "model_name": "gpt-3.5"},
+        )
+
+        # Modify existing field
+        message.response_metadata["model_name"] = "gpt-4"
+        assert message.response_metadata.get("model_name") == "gpt-4"
+
+        # Add new field
+        message.response_metadata["finish_reason"] = "stop"
+        assert message.response_metadata.get("finish_reason") == "stop"
+
+    def test_response_metadata_with_none_values(self) -> None:
+        """Test ResponseMetadata handling of None values."""
+        metadata: ResponseMetadata = {
+            "model_provider": "openai",
+            "model_name": "gpt-4",
+            "system_fingerprint": None,
+            "logprobs": None,
+        }
+
+        message = AIMessage(content="Test", response_metadata=metadata)
+
+        assert message.response_metadata.get("system_fingerprint") is None
+        assert message.response_metadata.get("logprobs") is None
+        assert "system_fingerprint" in message.response_metadata
+        assert "logprobs" in message.response_metadata
+
+
+class TestResponseMetadataEdgeCases:
+    """Test edge cases and error conditions for ResponseMetadata."""
+
+    def test_response_metadata_with_complex_nested_structures(self) -> None:
+        """Test ResponseMetadata with deeply nested and complex structures."""
+        metadata: ResponseMetadata = {
+            "model_provider": "custom",
+            "model_name": "complex-model",
+            "complex_data": {
+                "level1": {
+                    "level2": {
+                        "level3": {
+                            "deeply_nested": "value",
+                            "array": [
+                                {"item": 1, "metadata": {"nested": True}},
+                                {"item": 2, "metadata": {"nested": False}},
+                            ],
+                        }
+                    }
+                }
+            },
+        }
+
+        complex_data = metadata.get("complex_data")
+        assert isinstance(complex_data, dict)
+        level1 = complex_data.get("level1")
+        assert isinstance(level1, dict)
+        level2 = level1.get("level2")
+        assert isinstance(level2, dict)
+        level3 = level2.get("level3")
+        assert isinstance(level3, dict)
+
+        assert level3.get("deeply_nested") == "value"
+        array = level3.get("array")
+        assert isinstance(array, list)
+        assert len(array) == 2
+        assert array[0]["item"] == 1
+        assert array[0]["metadata"]["nested"] is True
+
+    def test_response_metadata_large_data(self) -> None:
+        """Test ResponseMetadata with large amounts of data."""
+        # Create metadata with many fields
+        large_metadata: ResponseMetadata = {
+            "model_provider": "test_provider",
+            "model_name": "test_model",
+        }
+
+        # Add 100 extra fields
+        for i in range(100):
+            large_metadata[f"field_{i}"] = f"value_{i}"  # type: ignore[literal-required]
+
+        message = AIMessage(content="Test", response_metadata=large_metadata)
+
+        # Verify all fields are accessible
+        assert message.response_metadata.get("model_provider") == "test_provider"
+        for i in range(100):
+            assert message.response_metadata.get(f"field_{i}") == f"value_{i}"
+
+    def test_response_metadata_empty_vs_none(self) -> None:
+        """Test the difference between empty ResponseMetadata and None."""
+        # Message with empty metadata
+        message_empty = AIMessage(content="Test", response_metadata={})
+        assert message_empty.response_metadata == {}
+        assert isinstance(message_empty.response_metadata, dict)
+
+        # Message with None metadata (should become empty dict)
+        message_none = AIMessage(content="Test", response_metadata=None)
+        assert message_none.response_metadata == {}
+        assert isinstance(message_none.response_metadata, dict)
+
+        # Default message (no metadata specified)
+        message_default = AIMessage(content="Test")
+        assert message_default.response_metadata == {}
+        assert isinstance(message_default.response_metadata, dict)
+
+    def test_response_metadata_preserves_original_dict_type(self) -> None:
+        """Test that ResponseMetadata preserves the original dict when passed."""
+        original_dict: ResponseMetadata = {
+            "model_provider": "openai",
+            "model_name": "gpt-4",
+            "custom_field": "custom_value",
+        }
+
+        message = AIMessage(content="Test", response_metadata=original_dict)
+
+        # Should be the same dict object
+        assert message.response_metadata is original_dict
+
+        # Modifications to the message's response_metadata should affect original
+        message.response_metadata["new_field"] = "new_value"
+        assert original_dict.get("new_field") == "new_value"
--- a/libs/core/tests/unit_tests/messages/test_response_metadata.py.bak
+++ b/libs/core/tests/unit_tests/messages/test_response_metadata.py.bak
@ -0,0 +1,361 @@
+"""Unit tests for ResponseMetadata TypedDict."""
+
+from langchain_core.messages.v1 import AIMessage, AIMessageChunk, ResponseMetadata
+
+
+class TestResponseMetadata:
+    """Test the ResponseMetadata TypedDict functionality."""
+
+    def test_response_metadata_basic_fields(self) -> None:
+        """Test ResponseMetadata with basic required fields."""
+        metadata: ResponseMetadata = {
+            "model_provider": "openai",
+            "model_name": "gpt-4",
+        }
+
+        assert metadata.get("model_provider") == "openai"
+        assert metadata.get("model_name") == "gpt-4"
+
+    def test_response_metadata_is_optional(self) -> None:
+        """Test that ResponseMetadata fields are optional due to total=False."""
+        # Should be able to create empty ResponseMetadata
+        metadata: ResponseMetadata = {}
+        assert metadata == {}
+
+        # Should be able to create with just one field
+        metadata_partial: ResponseMetadata = {"model_provider": "anthropic"}
+        assert metadata_partial.get("model_provider") == "anthropic"
+        assert "model_name" not in metadata_partial
+
+    def test_response_metadata_supports_extra_fields(self) -> None:
+        """Test that ResponseMetadata supports provider-specific extra fields."""
+        metadata: ResponseMetadata = {
+            "model_provider": "openai",
+            "model_name": "gpt-4-turbo",
+            # Extra fields should be allowed
+            "usage": {"input_tokens": 100, "output_tokens": 50},
+            "system_fingerprint": "fp_12345",
+            "logprobs": None,
+            "finish_reason": "stop",
+        }
+
+        assert metadata.get("model_provider") == "openai"
+        assert metadata.get("model_name") == "gpt-4-turbo"
+        assert metadata.get("usage") == {"input_tokens": 100, "output_tokens": 50}
+        assert metadata.get("system_fingerprint") == "fp_12345"
+        assert metadata.get("logprobs") is None
+        assert metadata.get("finish_reason") == "stop"
+
+    def test_response_metadata_various_data_types(self) -> None:
+        """Test that ResponseMetadata can store various data types in extra fields."""
+        metadata: ResponseMetadata = {
+            "model_provider": "anthropic",
+            "model_name": "claude-3-sonnet",
+            "string_field": "test_value",  # type: ignore[typeddict-unknown-key]
+            "int_field": 42,  # type: ignore[typeddict-unknown-key]
+            "float_field": 3.14,  # type: ignore[typeddict-unknown-key]
+            "bool_field": True,  # type: ignore[typeddict-unknown-key]
+            "none_field": None,  # type: ignore[typeddict-unknown-key]
+            "list_field": [1, 2, 3, "test"],  # type: ignore[typeddict-unknown-key]
+            "dict_field": {  # type: ignore[typeddict-unknown-key]
+                "nested": {"deeply": "nested_value"}
+            },
+        }
+
+        assert metadata.get("string_field") == "test_value"  # type: ignore[typeddict-item]
+        assert metadata.get("int_field") == 42  # type: ignore[typeddict-item]
+        assert metadata.get("float_field") == 3.14  # type: ignore[typeddict-item]
+        assert metadata.get("bool_field") is True  # type: ignore[typeddict-item]
+        assert metadata.get("none_field") is None  # type: ignore[typeddict-item]
+
+        list_field = metadata.get("list_field")  # type: ignore[typeddict-item]
+        assert isinstance(list_field, list)
+        assert list_field == [1, 2, 3, "test"]
+
+        dict_field = metadata.get("dict_field")  # type: ignore[typeddict-item]
+        assert isinstance(dict_field, dict)
+        nested = dict_field.get("nested")  # type: ignore[union-attr]
+        assert isinstance(nested, dict)
+        assert nested.get("deeply") == "nested_value"  # type: ignore[union-attr]
+
+    def test_response_metadata_can_be_modified(self) -> None:
+        """Test that ResponseMetadata can be modified after creation."""
+        metadata: ResponseMetadata = {
+            "model_provider": "openai",
+            "model_name": "gpt-3.5-turbo",
+        }
+
+        # Modify existing fields
+        metadata["model_name"] = "gpt-4"
+        assert metadata.get("model_name") == "gpt-4"
+
+        # Add new fields
+        metadata["request_id"] = "req_12345"  # type: ignore[typeddict-unknown-key]
+        assert metadata.get("request_id") == "req_12345"  # type: ignore[typeddict-item]
+
+        # Modify nested structures
+        metadata["usage"] = {"input_tokens": 10}  # type: ignore[typeddict-unknown-key]
+        metadata["usage"]["output_tokens"] = 20  # type: ignore[typeddict-item]
+
+        usage = metadata.get("usage")  # type: ignore[typeddict-item]
+        assert isinstance(usage, dict)
+        assert usage.get("input_tokens") == 10  # type: ignore[union-attr]
+        assert usage.get("output_tokens") == 20  # type: ignore[union-attr]
+
+    def test_response_metadata_provider_specific_examples(self) -> None:
+        """Test ResponseMetadata with realistic provider-specific examples."""
+        # OpenAI-style metadata
+        openai_metadata: ResponseMetadata = {
+            "model_provider": "openai",
+            "model_name": "gpt-4-turbo-2024-04-09",
+            "usage": {  # type: ignore[typeddict-unknown-key]
+                "prompt_tokens": 50,
+                "completion_tokens": 25,
+                "total_tokens": 75,
+            },
+            "system_fingerprint": "fp_abc123",  # type: ignore[typeddict-unknown-key]
+            "created": 1234567890,  # type: ignore[typeddict-unknown-key]
+            "logprobs": None,  # type: ignore[typeddict-unknown-key]
+            "finish_reason": "stop",  # type: ignore[typeddict-unknown-key]
+        }
+
+        assert openai_metadata.get("model_provider") == "openai"
+        assert openai_metadata.get("system_fingerprint") == "fp_abc123"  # type: ignore[typeddict-item]
+
+        # Anthropic-style metadata
+        anthropic_metadata: ResponseMetadata = {
+            "model_provider": "anthropic",
+            "model_name": "claude-3-sonnet-20240229",
+            "usage": {  # type: ignore[typeddict-unknown-key]
+                "input_tokens": 75,
+                "output_tokens": 30,
+            },
+            "stop_reason": "end_turn",  # type: ignore[typeddict-unknown-key]
+            "stop_sequence": None,  # type: ignore[typeddict-unknown-key]
+        }
+
+        assert anthropic_metadata.get("model_provider") == "anthropic"
+        assert anthropic_metadata.get("stop_reason") == "end_turn"  # type: ignore[typeddict-item]
+
+        # Custom provider metadata
+        custom_metadata: ResponseMetadata = {
+            "model_provider": "custom_llm_service",
+            "model_name": "custom-model-v1",
+            "service_tier": "premium",  # type: ignore[typeddict-unknown-key]
+            "rate_limit_info": {  # type: ignore[typeddict-unknown-key]
+                "requests_remaining": 100,
+                "reset_time": "2024-01-01T00:00:00Z",
+            },
+            "response_time_ms": 1250,  # type: ignore[typeddict-unknown-key]
+        }
+
+        assert custom_metadata.get("service_tier") == "premium"  # type: ignore[typeddict-item]
+        rate_limit = custom_metadata.get("rate_limit_info")  # type: ignore[typeddict-item]
+        assert isinstance(rate_limit, dict)
+        assert rate_limit.get("requests_remaining") == 100  # type: ignore[union-attr]
+
+
+class TestResponseMetadataWithAIMessages:
+    """Test ResponseMetadata integration with AI message classes."""
+
+    def test_ai_message_with_response_metadata(self) -> None:
+        """Test AIMessage with ResponseMetadata."""
+        metadata: ResponseMetadata = {
+            "model_provider": "openai",
+            "model_name": "gpt-4",
+            "usage": {"input_tokens": 10, "output_tokens": 5},  # type: ignore[typeddict-unknown-key]
+        }
+
+        message = AIMessage(content="Hello, world!", response_metadata=metadata)
+
+        assert message.response_metadata == metadata
+        assert message.response_metadata.get("model_provider") == "openai"
+        assert message.response_metadata.get("model_name") == "gpt-4"
+
+        usage = message.response_metadata.get("usage")  # type: ignore[typeddict-item]
+        assert isinstance(usage, dict)
+        assert usage.get("input_tokens") == 10  # type: ignore[union-attr]
+
+    def test_ai_message_chunk_with_response_metadata(self) -> None:
+        """Test AIMessageChunk with ResponseMetadata."""
+        metadata: ResponseMetadata = {
+            "model_provider": "anthropic",
+            "model_name": "claude-3-sonnet",
+            "stream_id": "stream_12345",  # type: ignore[typeddict-unknown-key]
+        }
+
+        chunk = AIMessageChunk(content="Hello", response_metadata=metadata)
+
+        assert chunk.response_metadata == metadata
+        assert chunk.response_metadata.get("stream_id") == "stream_12345"  # type: ignore[typeddict-item]
+
+    def test_ai_message_default_empty_response_metadata(self) -> None:
+        """Test that AIMessage creates empty ResponseMetadata by default."""
+        message = AIMessage(content="Test message")
+
+        # Should have empty dict as default
+        assert message.response_metadata == {}
+        assert isinstance(message.response_metadata, dict)
+
+    def test_ai_message_chunk_default_empty_response_metadata(self) -> None:
+        """Test that AIMessageChunk creates empty ResponseMetadata by default."""
+        chunk = AIMessageChunk(content="Test chunk")
+
+        # Should have empty dict as default
+        assert chunk.response_metadata == {}
+        assert isinstance(chunk.response_metadata, dict)
+
+    def test_response_metadata_merging_in_chunks(self) -> None:
+        """Test that ResponseMetadata is properly merged when adding AIMessageChunks."""
+        metadata1: ResponseMetadata = {
+            "model_provider": "openai",
+            "model_name": "gpt-4",
+            "request_id": "req_123",  # type: ignore[typeddict-unknown-key]
+            "usage": {"input_tokens": 10},  # type: ignore[typeddict-unknown-key]
+        }
+
+        metadata2: ResponseMetadata = {
+            "stream_chunk": 1,  # type: ignore[typeddict-unknown-key]
+            "usage": {"output_tokens": 5},  # type: ignore[typeddict-unknown-key]
+        }
+
+        chunk1 = AIMessageChunk(content="Hello ", response_metadata=metadata1)
+        chunk2 = AIMessageChunk(content="world!", response_metadata=metadata2)
+
+        merged = chunk1 + chunk2
+
+        # Should have merged response_metadata
+        assert merged.response_metadata.get("model_provider") == "openai"
+        assert merged.response_metadata.get("model_name") == "gpt-4"
+        assert merged.response_metadata.get("request_id") == "req_123"  # type: ignore[typeddict-item]
+        assert merged.response_metadata.get("stream_chunk") == 1  # type: ignore[typeddict-item]
+
+        # Usage should be merged (from merge_dicts behavior)
+        merged_usage = merged.response_metadata.get("usage")  # type: ignore[typeddict-item]
+        assert isinstance(merged_usage, dict)
+        assert merged_usage.get("input_tokens") == 10  # type: ignore[union-attr]
+        assert merged_usage.get("output_tokens") == 5  # type: ignore[union-attr]
+
+    def test_response_metadata_modification_after_message_creation(self) -> None:
+        """Test that ResponseMetadata can be modified after message creation."""
+        message = AIMessage(
+            content="Initial message",
+            response_metadata={"model_provider": "openai", "model_name": "gpt-3.5"},
+        )
+
+        # Modify existing field
+        message.response_metadata["model_name"] = "gpt-4"
+        assert message.response_metadata.get("model_name") == "gpt-4"
+
+        # Add new field
+        message.response_metadata["finish_reason"] = "stop"  # type: ignore[typeddict-unknown-key]
+        assert message.response_metadata.get("finish_reason") == "stop"  # type: ignore[typeddict-item]
+
+    def test_response_metadata_with_none_values(self) -> None:
+        """Test ResponseMetadata handling of None values."""
+        metadata: ResponseMetadata = {
+            "model_provider": "openai",
+            "model_name": "gpt-4",
+            "system_fingerprint": None,  # type: ignore[typeddict-unknown-key]
+            "logprobs": None,  # type: ignore[typeddict-unknown-key]
+        }
+
+        message = AIMessage(content="Test", response_metadata=metadata)
+
+        assert message.response_metadata.get("system_fingerprint") is None  # type: ignore[typeddict-item]
+        assert message.response_metadata.get("logprobs") is None  # type: ignore[typeddict-item]
+        assert "system_fingerprint" in message.response_metadata
+        assert "logprobs" in message.response_metadata
+
+
+class TestResponseMetadataEdgeCases:
+    """Test edge cases and error conditions for ResponseMetadata."""
+
+    def test_response_metadata_with_complex_nested_structures(self) -> None:
+        """Test ResponseMetadata with deeply nested and complex structures."""
+        metadata: ResponseMetadata = {
+            "model_provider": "custom",
+            "model_name": "complex-model",
+            "complex_data": {  # type: ignore[typeddict-unknown-key]
+                "level1": {
+                    "level2": {
+                        "level3": {
+                            "deeply_nested": "value",
+                            "array": [
+                                {"item": 1, "metadata": {"nested": True}},
+                                {"item": 2, "metadata": {"nested": False}},
+                            ],
+                        }
+                    }
+                }
+            },
+        }
+
+        complex_data = metadata.get("complex_data")  # type: ignore[typeddict-item]
+        assert isinstance(complex_data, dict)
+        level1 = complex_data.get("level1")  # type: ignore[union-attr]
+        assert isinstance(level1, dict)
+        level2 = level1.get("level2")  # type: ignore[union-attr]
+        assert isinstance(level2, dict)
+        level3 = level2.get("level3")  # type: ignore[union-attr]
+        assert isinstance(level3, dict)
+
+        assert level3.get("deeply_nested") == "value"  # type: ignore[union-attr]
+        array = level3.get("array")  # type: ignore[union-attr]
+        assert isinstance(array, list)
+        assert len(array) == 2  # type: ignore[arg-type]
+        assert array[0]["item"] == 1  # type: ignore[index, typeddict-item]
+        assert array[0]["metadata"]["nested"] is True  # type: ignore[index, typeddict-item]
+
+    def test_response_metadata_large_data(self) -> None:
+        """Test ResponseMetadata with large amounts of data."""
+        # Create metadata with many fields
+        large_metadata: ResponseMetadata = {
+            "model_provider": "test_provider",
+            "model_name": "test_model",
+        }
+
+        # Add 100 extra fields
+        for i in range(100):
+            large_metadata[f"field_{i}"] = f"value_{i}"  # type: ignore[literal-required]
+
+        message = AIMessage(content="Test", response_metadata=large_metadata)
+
+        # Verify all fields are accessible
+        assert message.response_metadata.get("model_provider") == "test_provider"
+        for i in range(100):
+            assert message.response_metadata.get(f"field_{i}") == f"value_{i}"  # type: ignore[typeddict-item]
+
+    def test_response_metadata_empty_vs_none(self) -> None:
+        """Test the difference between empty ResponseMetadata and None."""
+        # Message with empty metadata
+        message_empty = AIMessage(content="Test", response_metadata={})
+        assert message_empty.response_metadata == {}
+        assert isinstance(message_empty.response_metadata, dict)
+
+        # Message with None metadata (should become empty dict)
+        message_none = AIMessage(content="Test", response_metadata=None)
+        assert message_none.response_metadata == {}
+        assert isinstance(message_none.response_metadata, dict)
+
+        # Default message (no metadata specified)
+        message_default = AIMessage(content="Test")
+        assert message_default.response_metadata == {}
+        assert isinstance(message_default.response_metadata, dict)
+
+    def test_response_metadata_preserves_original_dict_type(self) -> None:
+        """Test that ResponseMetadata preserves the original dict when passed."""
+        original_dict = {
+            "model_provider": "openai",
+            "model_name": "gpt-4",
+            "custom_field": "custom_value",
+        }
+
+        message = AIMessage(content="Test", response_metadata=original_dict)
+
+        # Should be the same dict object
+        assert message.response_metadata is original_dict
+
+        # Modifications to the message's response_metadata should affect original
+        message.response_metadata["new_field"] = "new_value"  # type: ignore[typeddict-unknown-key]
+        assert original_dict.get("new_field") == "new_value"  # type: ignore[typeddict-item]
--- a/libs/core/tests/unit_tests/prompts/snapshots/test_chat.ambr
+++ b/libs/core/tests/unit_tests/prompts/snapshots/test_chat.ambr
@ -726,7 +726,7 @@
        'description': '''
          Allowance for errors made by LLM.
          
-          Here we add an `error` key to surface errors made during generation
+          Here we add an ``error`` key to surface errors made during generation
          (e.g., invalid JSON arguments.)
        ''',
        'properties': dict({
@ -752,6 +752,10 @@
            ]),
            'title': 'Error',
          }),
+          'extras': dict({
+            'title': 'Extras',
+            'type': 'object',
+          }),
          'id': dict({
            'anyOf': list([
              dict({
@ -763,10 +767,6 @@
            ]),
            'title': 'Id',
          }),
-          'index': dict({
-            'title': 'Index',
-            'type': 'integer',
-          }),
          'name': dict({
            'anyOf': list([
              dict({
@ -785,11 +785,11 @@
          }),
        }),
        'required': list([
+          'type',
+          'id',
          'name',
          'args',
-          'id',
          'error',
-          'type',
        ]),
        'title': 'InvalidToolCall',
        'type': 'object',
@ -1003,12 +1003,23 @@
          
              This represents a request to call the tool named "foo" with arguments {"a": 1}
              and an identifier of "123".
+          
+          .. note::
+              ``create_tool_call`` may also be used as a factory to create a
+              ``ToolCall``. Benefits include:
+          
+              * Automatic ID generation (when not provided)
+              * Required arguments strictly validated at creation time
        ''',
        'properties': dict({
          'args': dict({
            'title': 'Args',
            'type': 'object',
          }),
+          'extras': dict({
+            'title': 'Extras',
+            'type': 'object',
+          }),
          'id': dict({
            'anyOf': list([
              dict({
@ -1035,10 +1046,10 @@
          }),
        }),
        'required': list([
+          'type',
+          'id',
          'name',
          'args',
-          'id',
-          'type',
        ]),
        'title': 'ToolCall',
        'type': 'object',
@ -1047,9 +1058,9 @@
        'description': '''
          A chunk of a tool call (e.g., as part of a stream).
          
-          When merging ToolCallChunks (e.g., via AIMessageChunk.__add__),
+          When merging ToolCallChunks (e.g., via ``AIMessageChunk.__add__``),
          all string attributes are concatenated. Chunks are only merged if their
-          values of `index` are equal and not None.
+          values of ``index`` are equal and not ``None``.
          
          Example:
          
@ -1075,6 +1086,10 @@
            ]),
            'title': 'Args',
          }),
+          'extras': dict({
+            'title': 'Extras',
+            'type': 'object',
+          }),
          'id': dict({
            'anyOf': list([
              dict({
@ -1115,9 +1130,9 @@
          }),
        }),
        'required': list([
+          'id',
          'name',
          'args',
-          'id',
          'index',
        ]),
        'title': 'ToolCallChunk',
@ -2168,7 +2183,7 @@
        'description': '''
          Allowance for errors made by LLM.
          
-          Here we add an `error` key to surface errors made during generation
+          Here we add an ``error`` key to surface errors made during generation
          (e.g., invalid JSON arguments.)
        ''',
        'properties': dict({
@ -2194,6 +2209,10 @@
            ]),
            'title': 'Error',
          }),
+          'extras': dict({
+            'title': 'Extras',
+            'type': 'object',
+          }),
          'id': dict({
            'anyOf': list([
              dict({
@ -2205,10 +2224,6 @@
            ]),
            'title': 'Id',
          }),
-          'index': dict({
-            'title': 'Index',
-            'type': 'integer',
-          }),
          'name': dict({
            'anyOf': list([
              dict({
@ -2227,11 +2242,11 @@
          }),
        }),
        'required': list([
+          'type',
+          'id',
          'name',
          'args',
-          'id',
          'error',
-          'type',
        ]),
        'title': 'InvalidToolCall',
        'type': 'object',
@ -2445,12 +2460,23 @@
          
              This represents a request to call the tool named "foo" with arguments {"a": 1}
              and an identifier of "123".
+          
+          .. note::
+              ``create_tool_call`` may also be used as a factory to create a
+              ``ToolCall``. Benefits include:
+          
+              * Automatic ID generation (when not provided)
+              * Required arguments strictly validated at creation time
        ''',
        'properties': dict({
          'args': dict({
            'title': 'Args',
            'type': 'object',
          }),
+          'extras': dict({
+            'title': 'Extras',
+            'type': 'object',
+          }),
          'id': dict({
            'anyOf': list([
              dict({
@ -2477,10 +2503,10 @@
          }),
        }),
        'required': list([
+          'type',
+          'id',
          'name',
          'args',
-          'id',
-          'type',
        ]),
        'title': 'ToolCall',
        'type': 'object',
@ -2489,9 +2515,9 @@
        'description': '''
          A chunk of a tool call (e.g., as part of a stream).
          
-          When merging ToolCallChunks (e.g., via AIMessageChunk.__add__),
+          When merging ToolCallChunks (e.g., via ``AIMessageChunk.__add__``),
          all string attributes are concatenated. Chunks are only merged if their
-          values of `index` are equal and not None.
+          values of ``index`` are equal and not ``None``.
          
          Example:
          
@ -2517,6 +2543,10 @@
            ]),
            'title': 'Args',
          }),
+          'extras': dict({
+            'title': 'Extras',
+            'type': 'object',
+          }),
          'id': dict({
            'anyOf': list([
              dict({
@ -2557,9 +2587,9 @@
          }),
        }),
        'required': list([
+          'id',
          'name',
          'args',
-          'id',
          'index',
        ]),
        'title': 'ToolCallChunk',
--- a/libs/core/tests/unit_tests/runnables/snapshots/test_graph.ambr
+++ b/libs/core/tests/unit_tests/runnables/snapshots/test_graph.ambr
@ -406,12 +406,25 @@
        'data': dict({
          '$defs': dict({
            'AudioContentBlock': dict({
-              'description': 'Content block for audio data.',
+              'description': '''
+                Audio data.
+                
+                .. note::
+                    ``create_audio_block`` may also be used as a factory to create an
+                    ``AudioContentBlock``. Benefits include:
+                
+                    * Automatic ID generation (when not provided)
+                    * Required arguments strictly validated at creation time
+              ''',
              'properties': dict({
                'base64': dict({
                  'title': 'Base64',
                  'type': 'string',
                }),
+                'extras': dict({
+                  'title': 'Extras',
+                  'type': 'object',
+                }),
                'file_id': dict({
                  'title': 'File Id',
                  'type': 'string',
@ -608,6 +621,13 @@
                    ``start/end`` indices refer to the **response text**,
                    not the source text. This means that the indices are relative to the model's
                    response, not the original document (as specified in the ``url``).
+                
+                .. note::
+                    ``create_citation`` may also be used as a factory to create a ``Citation``.
+                    Benefits include:
+                
+                    * Automatic ID generation (when not provided)
+                    * Required arguments strictly validated at creation time
              ''',
              'properties': dict({
                'cited_text': dict({
@ -618,6 +638,10 @@
                  'title': 'End Index',
                  'type': 'integer',
                }),
+                'extras': dict({
+                  'title': 'Extras',
+                  'type': 'object',
+                }),
                'id': dict({
                  'title': 'Id',
                  'type': 'string',
@ -647,12 +671,16 @@
              'type': 'object',
            }),
            'CodeInterpreterCall': dict({
-              'description': 'Content block for a built-in code interpreter tool call.',
+              'description': 'Built-in code interpreter tool call.',
              'properties': dict({
                'code': dict({
                  'title': 'Code',
                  'type': 'string',
                }),
+                'extras': dict({
+                  'title': 'Extras',
+                  'type': 'object',
+                }),
                'id': dict({
                  'title': 'Id',
                  'type': 'string',
@ -679,12 +707,16 @@
            }),
            'CodeInterpreterOutput': dict({
              'description': '''
-                Content block for the output of a singular code interpreter tool call.
+                Output of a singular code interpreter tool call.
                
                Full output of a code interpreter tool call is represented by
                ``CodeInterpreterResult`` which is a list of these blocks.
              ''',
              'properties': dict({
+                'extras': dict({
+                  'title': 'Extras',
+                  'type': 'object',
+                }),
                'file_ids': dict({
                  'items': dict({
                    'type': 'string',
@ -725,8 +757,12 @@
              'type': 'object',
            }),
            'CodeInterpreterResult': dict({
-              'description': 'Content block for the result of a code interpreter tool call.',
+              'description': 'Result of a code interpreter tool call.',
              'properties': dict({
+                'extras': dict({
+                  'title': 'Extras',
+                  'type': 'object',
+                }),
                'id': dict({
                  'title': 'Id',
                  'type': 'string',
@ -757,7 +793,7 @@
            }),
            'FileContentBlock': dict({
              'description': '''
-                Content block for file data.
+                File data that doesn't fit into other multimodal blocks.
                
                This block is intended for files that are not images, audio, or plaintext. For
                example, it can be used for PDFs, Word documents, etc.
@ -765,12 +801,23 @@
                If the file is an image, audio, or plaintext, you should use the corresponding
                content block type (e.g., ``ImageContentBlock``, ``AudioContentBlock``,
                ``PlainTextContentBlock``).
+                
+                .. note::
+                    ``create_file_block`` may also be used as a factory to create a
+                    ``FileContentBlock``. Benefits include:
+                
+                    * Automatic ID generation (when not provided)
+                    * Required arguments strictly validated at creation time
              ''',
              'properties': dict({
                'base64': dict({
                  'title': 'Base64',
                  'type': 'string',
                }),
+                'extras': dict({
+                  'title': 'Extras',
+                  'type': 'object',
+                }),
                'file_id': dict({
                  'title': 'File Id',
                  'type': 'string',
@ -1014,12 +1061,25 @@
              'type': 'object',
            }),
            'ImageContentBlock': dict({
-              'description': 'Content block for image data.',
+              'description': '''
+                Image data.
+                
+                .. note::
+                    ``create_image_block`` may also be used as a factory to create a
+                    ``ImageContentBlock``. Benefits include:
+                
+                    * Automatic ID generation (when not provided)
+                    * Required arguments strictly validated at creation time
+              ''',
              'properties': dict({
                'base64': dict({
                  'title': 'Base64',
                  'type': 'string',
                }),
+                'extras': dict({
+                  'title': 'Extras',
+                  'type': 'object',
+                }),
                'file_id': dict({
                  'title': 'File Id',
                  'type': 'string',
@ -1093,7 +1153,7 @@
              'description': '''
                Allowance for errors made by LLM.
                
-                Here we add an `error` key to surface errors made during generation
+                Here we add an ``error`` key to surface errors made during generation
                (e.g., invalid JSON arguments.)
              ''',
              'properties': dict({
@ -1119,6 +1179,10 @@
                  ]),
                  'title': 'Error',
                }),
+                'extras': dict({
+                  'title': 'Extras',
+                  'type': 'object',
+                }),
                'id': dict({
                  'anyOf': list([
                    dict({
@ -1130,10 +1194,6 @@
                  ]),
                  'title': 'Id',
                }),
-                'index': dict({
-                  'title': 'Index',
-                  'type': 'integer',
-                }),
                'name': dict({
                  'anyOf': list([
                    dict({
@ -1152,11 +1212,11 @@
                }),
              }),
              'required': list([
+                'type',
+                'id',
                'name',
                'args',
-                'id',
                'error',
-                'type',
              ]),
              'title': 'InvalidToolCall',
              'type': 'object',
@ -1187,7 +1247,7 @@
            }),
            'NonStandardContentBlock': dict({
              'description': '''
-                Content block provider-specific data.
+                Provider-specific data.
                
                This block contains data for which there is not yet a standard type.
                
@ -1195,6 +1255,13 @@
                If a provider's non-standard output includes reasoning and tool calls, it should be
                the adapter's job to parse that payload and emit the corresponding standard
                ReasoningContentBlock and ToolCallContentBlocks.
+                
+                .. note::
+                    ``create_non_standard_block`` may also be used as a factory to create a
+                    ``NonStandardContentBlock``. Benefits include:
+                
+                    * Automatic ID generation (when not provided)
+                    * Required arguments strictly validated at creation time
              ''',
              'properties': dict({
                'id': dict({
@ -1254,11 +1321,18 @@
            }),
            'PlainTextContentBlock': dict({
              'description': '''
-                Content block for plaintext data (e.g., from a document).
+                Plaintext data (e.g., from a document).
                
                .. note::
                    Title and context are optional fields that may be passed to the model. See
                    Anthropic `example <https://docs.anthropic.com/en/docs/build-with-claude/citations#citable-vs-non-citable-content>`__.
+                
+                .. note::
+                    ``create_plaintext_block`` may also be used as a factory to create a
+                    ``PlainTextContentBlock``. Benefits include:
+                
+                    * Automatic ID generation (when not provided)
+                    * Required arguments strictly validated at creation time
              ''',
              'properties': dict({
                'base64': dict({
@ -1269,6 +1343,10 @@
                  'title': 'Context',
                  'type': 'string',
                }),
+                'extras': dict({
+                  'title': 'Extras',
+                  'type': 'object',
+                }),
                'file_id': dict({
                  'title': 'File Id',
                  'type': 'string',
@ -1312,8 +1390,21 @@
              'type': 'object',
            }),
            'ReasoningContentBlock': dict({
-              'description': 'Content block for reasoning output.',
+              'description': '''
+                Reasoning output from a LLM.
+                
+                .. note::
+                    ``create_reasoning_block`` may also be used as a factory to create a
+                    ``ReasoningContentBlock``. Benefits include:
+                
+                    * Automatic ID generation (when not provided)
+                    * Required arguments strictly validated at creation time
+              ''',
              'properties': dict({
+                'extras': dict({
+                  'title': 'Extras',
+                  'type': 'object',
+                }),
                'id': dict({
                  'title': 'Id',
                  'type': 'string',
@ -1326,14 +1417,6 @@
                  'title': 'Reasoning',
                  'type': 'string',
                }),
-                'signature': dict({
-                  'title': 'Signature',
-                  'type': 'string',
-                }),
-                'thought_signature': dict({
-                  'title': 'Thought Signature',
-                  'type': 'string',
-                }),
                'type': dict({
                  'const': 'reasoning',
                  'title': 'Type',
@ -1353,7 +1436,9 @@
                Contains additional information returned by the provider, such as
                response headers, service tiers, log probabilities, system fingerprints, etc.
                
-                Extra keys are permitted from what is typed here.
+                Extra keys are permitted from what is typed here (via `total=False`), allowing
+                for provider-specific metadata to be included without breaking the type
+                definition.
              ''',
              'properties': dict({
                'model_name': dict({
@ -1440,10 +1525,17 @@
            }),
            'TextContentBlock': dict({
              'description': '''
-                Content block for text output.
+                Text output from a LLM.
                
                This typically represents the main text content of a message, such as the response
                from a language model or the text of a user message.
+                
+                .. note::
+                    ``create_text_block`` may also be used as a factory to create a
+                    ``TextContentBlock``. Benefits include:
+                
+                    * Automatic ID generation (when not provided)
+                    * Required arguments strictly validated at creation time
              ''',
              'properties': dict({
                'annotations': dict({
@ -1460,6 +1552,10 @@
                  'title': 'Annotations',
                  'type': 'array',
                }),
+                'extras': dict({
+                  'title': 'Extras',
+                  'type': 'object',
+                }),
                'id': dict({
                  'title': 'Id',
                  'type': 'string',
@ -1501,12 +1597,23 @@
                
                    This represents a request to call the tool named "foo" with arguments {"a": 1}
                    and an identifier of "123".
+                
+                .. note::
+                    ``create_tool_call`` may also be used as a factory to create a
+                    ``ToolCall``. Benefits include:
+                
+                    * Automatic ID generation (when not provided)
+                    * Required arguments strictly validated at creation time
              ''',
              'properties': dict({
                'args': dict({
                  'title': 'Args',
                  'type': 'object',
                }),
+                'extras': dict({
+                  'title': 'Extras',
+                  'type': 'object',
+                }),
                'id': dict({
                  'anyOf': list([
                    dict({
@ -1533,10 +1640,10 @@
                }),
              }),
              'required': list([
+                'type',
+                'id',
                'name',
                'args',
-                'id',
-                'type',
              ]),
              'title': 'ToolCall',
              'type': 'object',
@ -1545,9 +1652,9 @@
              'description': '''
                A chunk of a tool call (e.g., as part of a stream).
                
-                When merging ToolCallChunks (e.g., via AIMessageChunk.__add__),
+                When merging ToolCallChunks (e.g., via ``AIMessageChunk.__add__``),
                all string attributes are concatenated. Chunks are only merged if their
-                values of `index` are equal and not None.
+                values of ``index`` are equal and not ``None``.
                
                Example:
                
@ -1573,6 +1680,10 @@
                  ]),
                  'title': 'Args',
                }),
+                'extras': dict({
+                  'title': 'Extras',
+                  'type': 'object',
+                }),
                'id': dict({
                  'anyOf': list([
                    dict({
@ -1613,9 +1724,9 @@
                }),
              }),
              'required': list([
+                'id',
                'name',
                'args',
-                'id',
                'index',
              ]),
              'title': 'ToolCallChunk',
@ -1761,12 +1872,25 @@
              'type': 'object',
            }),
            'VideoContentBlock': dict({
-              'description': 'Content block for video data.',
+              'description': '''
+                Video data.
+                
+                .. note::
+                    ``create_video_block`` may also be used as a factory to create a
+                    ``VideoContentBlock``. Benefits include:
+                
+                    * Automatic ID generation (when not provided)
+                    * Required arguments strictly validated at creation time
+              ''',
              'properties': dict({
                'base64': dict({
                  'title': 'Base64',
                  'type': 'string',
                }),
+                'extras': dict({
+                  'title': 'Extras',
+                  'type': 'object',
+                }),
                'file_id': dict({
                  'title': 'File Id',
                  'type': 'string',
@ -1800,8 +1924,12 @@
              'type': 'object',
            }),
            'WebSearchCall': dict({
-              'description': 'Content block for a built-in web search tool call.',
+              'description': 'Built-in web search tool call.',
              'properties': dict({
+                'extras': dict({
+                  'title': 'Extras',
+                  'type': 'object',
+                }),
                'id': dict({
                  'title': 'Id',
                  'type': 'string',
@ -1827,8 +1955,12 @@
              'type': 'object',
            }),
            'WebSearchResult': dict({
-              'description': 'Content block for the result of a built-in web search tool call.',
+              'description': 'Result of a built-in web search tool call.',
              'properties': dict({
+                'extras': dict({
+                  'title': 'Extras',
+                  'type': 'object',
+                }),
                'id': dict({
                  'title': 'Id',
                  'type': 'string',
--- a/libs/core/tests/unit_tests/runnables/snapshots/test_runnable.ambr
+++ b/libs/core/tests/unit_tests/runnables/snapshots/test_runnable.ambr
@ -2674,7 +2674,7 @@
        'description': '''
          Allowance for errors made by LLM.
          
-          Here we add an `error` key to surface errors made during generation
+          Here we add an ``error`` key to surface errors made during generation
          (e.g., invalid JSON arguments.)
        ''',
        'properties': dict({
@ -2711,10 +2711,6 @@
            ]),
            'title': 'Id',
          }),
-          'index': dict({
-            'title': 'Index',
-            'type': 'integer',
-          }),
          'name': dict({
            'anyOf': list([
              dict({
@ -2732,11 +2728,11 @@
          }),
        }),
        'required': list([
+          'type',
+          'id',
          'name',
          'args',
-          'id',
          'error',
-          'type',
        ]),
        'title': 'InvalidToolCall',
        'type': 'object',
@ -2948,6 +2944,13 @@
          
              This represents a request to call the tool named "foo" with arguments {"a": 1}
              and an identifier of "123".
+          
+          .. note::
+              ``create_tool_call`` may also be used as a factory to create a
+              ``ToolCall``. Benefits include:
+          
+              * Automatic ID generation (when not provided)
+              * Required arguments strictly validated at creation time
        ''',
        'properties': dict({
          'args': dict({
@ -2979,10 +2982,10 @@
          }),
        }),
        'required': list([
+          'type',
+          'id',
          'name',
          'args',
-          'id',
-          'type',
        ]),
        'title': 'ToolCall',
        'type': 'object',
@ -2991,9 +2994,9 @@
        'description': '''
          A chunk of a tool call (e.g., as part of a stream).
          
-          When merging ToolCallChunks (e.g., via AIMessageChunk.__add__),
+          When merging ToolCallChunks (e.g., via ``AIMessageChunk.__add__``),
          all string attributes are concatenated. Chunks are only merged if their
-          values of `index` are equal and not None.
+          values of ``index`` are equal and not ``None``.
          
          Example:
          
@ -3058,9 +3061,9 @@
          }),
        }),
        'required': list([
+          'id',
          'name',
          'args',
-          'id',
          'index',
        ]),
        'title': 'ToolCallChunk',
@ -4160,7 +4163,7 @@
        'description': '''
          Allowance for errors made by LLM.
          
-          Here we add an `error` key to surface errors made during generation
+          Here we add an ``error`` key to surface errors made during generation
          (e.g., invalid JSON arguments.)
        ''',
        'properties': dict({
@ -4197,10 +4200,6 @@
            ]),
            'title': 'Id',
          }),
-          'index': dict({
-            'title': 'Index',
-            'type': 'integer',
-          }),
          'name': dict({
            'anyOf': list([
              dict({
@ -4218,11 +4217,11 @@
          }),
        }),
        'required': list([
+          'type',
+          'id',
          'name',
          'args',
-          'id',
          'error',
-          'type',
        ]),
        'title': 'InvalidToolCall',
        'type': 'object',
@ -4453,6 +4452,13 @@
          
              This represents a request to call the tool named "foo" with arguments {"a": 1}
              and an identifier of "123".
+          
+          .. note::
+              ``create_tool_call`` may also be used as a factory to create a
+              ``ToolCall``. Benefits include:
+          
+              * Automatic ID generation (when not provided)
+              * Required arguments strictly validated at creation time
        ''',
        'properties': dict({
          'args': dict({
@ -4484,10 +4490,10 @@
          }),
        }),
        'required': list([
+          'type',
+          'id',
          'name',
          'args',
-          'id',
-          'type',
        ]),
        'title': 'ToolCall',
        'type': 'object',
@ -4496,9 +4502,9 @@
        'description': '''
          A chunk of a tool call (e.g., as part of a stream).
          
-          When merging ToolCallChunks (e.g., via AIMessageChunk.__add__),
+          When merging ToolCallChunks (e.g., via ``AIMessageChunk.__add__``),
          all string attributes are concatenated. Chunks are only merged if their
-          values of `index` are equal and not None.
+          values of ``index`` are equal and not ``None``.
          
          Example:
          
@ -4563,9 +4569,9 @@
          }),
        }),
        'required': list([
+          'id',
          'name',
          'args',
-          'id',
          'index',
        ]),
        'title': 'ToolCallChunk',
@ -5677,7 +5683,7 @@
        'description': '''
          Allowance for errors made by LLM.
          
-          Here we add an `error` key to surface errors made during generation
+          Here we add an ``error`` key to surface errors made during generation
          (e.g., invalid JSON arguments.)
        ''',
        'properties': dict({
@ -5714,10 +5720,6 @@
            ]),
            'title': 'Id',
          }),
-          'index': dict({
-            'title': 'Index',
-            'type': 'integer',
-          }),
          'name': dict({
            'anyOf': list([
              dict({
@ -5735,11 +5737,11 @@
          }),
        }),
        'required': list([
+          'type',
+          'id',
          'name',
          'args',
-          'id',
          'error',
-          'type',
        ]),
        'title': 'InvalidToolCall',
        'type': 'object',
@ -5970,6 +5972,13 @@
          
              This represents a request to call the tool named "foo" with arguments {"a": 1}
              and an identifier of "123".
+          
+          .. note::
+              ``create_tool_call`` may also be used as a factory to create a
+              ``ToolCall``. Benefits include:
+          
+              * Automatic ID generation (when not provided)
+              * Required arguments strictly validated at creation time
        ''',
        'properties': dict({
          'args': dict({
@ -6001,10 +6010,10 @@
          }),
        }),
        'required': list([
+          'type',
+          'id',
          'name',
          'args',
-          'id',
-          'type',
        ]),
        'title': 'ToolCall',
        'type': 'object',
@ -6013,9 +6022,9 @@
        'description': '''
          A chunk of a tool call (e.g., as part of a stream).
          
-          When merging ToolCallChunks (e.g., via AIMessageChunk.__add__),
+          When merging ToolCallChunks (e.g., via ``AIMessageChunk.__add__``),
          all string attributes are concatenated. Chunks are only merged if their
-          values of `index` are equal and not None.
+          values of ``index`` are equal and not ``None``.
          
          Example:
          
@ -6080,9 +6089,9 @@
          }),
        }),
        'required': list([
+          'id',
          'name',
          'args',
-          'id',
          'index',
        ]),
        'title': 'ToolCallChunk',
@ -7069,7 +7078,7 @@
        'description': '''
          Allowance for errors made by LLM.
          
-          Here we add an `error` key to surface errors made during generation
+          Here we add an ``error`` key to surface errors made during generation
          (e.g., invalid JSON arguments.)
        ''',
        'properties': dict({
@ -7106,10 +7115,6 @@
            ]),
            'title': 'Id',
          }),
-          'index': dict({
-            'title': 'Index',
-            'type': 'integer',
-          }),
          'name': dict({
            'anyOf': list([
              dict({
@ -7127,11 +7132,11 @@
          }),
        }),
        'required': list([
+          'type',
+          'id',
          'name',
          'args',
-          'id',
          'error',
-          'type',
        ]),
        'title': 'InvalidToolCall',
        'type': 'object',
@ -7343,6 +7348,13 @@
          
              This represents a request to call the tool named "foo" with arguments {"a": 1}
              and an identifier of "123".
+          
+          .. note::
+              ``create_tool_call`` may also be used as a factory to create a
+              ``ToolCall``. Benefits include:
+          
+              * Automatic ID generation (when not provided)
+              * Required arguments strictly validated at creation time
        ''',
        'properties': dict({
          'args': dict({
@ -7374,10 +7386,10 @@
          }),
        }),
        'required': list([
+          'type',
+          'id',
          'name',
          'args',
-          'id',
-          'type',
        ]),
        'title': 'ToolCall',
        'type': 'object',
@ -7386,9 +7398,9 @@
        'description': '''
          A chunk of a tool call (e.g., as part of a stream).
          
-          When merging ToolCallChunks (e.g., via AIMessageChunk.__add__),
+          When merging ToolCallChunks (e.g., via ``AIMessageChunk.__add__``),
          all string attributes are concatenated. Chunks are only merged if their
-          values of `index` are equal and not None.
+          values of ``index`` are equal and not ``None``.
          
          Example:
          
@ -7453,9 +7465,9 @@
          }),
        }),
        'required': list([
+          'id',
          'name',
          'args',
-          'id',
          'index',
        ]),
        'title': 'ToolCallChunk',
@ -8597,7 +8609,7 @@
        'description': '''
          Allowance for errors made by LLM.
          
-          Here we add an `error` key to surface errors made during generation
+          Here we add an ``error`` key to surface errors made during generation
          (e.g., invalid JSON arguments.)
        ''',
        'properties': dict({
@ -8634,10 +8646,6 @@
            ]),
            'title': 'Id',
          }),
-          'index': dict({
-            'title': 'Index',
-            'type': 'integer',
-          }),
          'name': dict({
            'anyOf': list([
              dict({
@ -8655,11 +8663,11 @@
          }),
        }),
        'required': list([
+          'type',
+          'id',
          'name',
          'args',
-          'id',
          'error',
-          'type',
        ]),
        'title': 'InvalidToolCall',
        'type': 'object',
@ -8890,6 +8898,13 @@
          
              This represents a request to call the tool named "foo" with arguments {"a": 1}
              and an identifier of "123".
+          
+          .. note::
+              ``create_tool_call`` may also be used as a factory to create a
+              ``ToolCall``. Benefits include:
+          
+              * Automatic ID generation (when not provided)
+              * Required arguments strictly validated at creation time
        ''',
        'properties': dict({
          'args': dict({
@ -8921,10 +8936,10 @@
          }),
        }),
        'required': list([
+          'type',
+          'id',
          'name',
          'args',
-          'id',
-          'type',
        ]),
        'title': 'ToolCall',
        'type': 'object',
@ -8933,9 +8948,9 @@
        'description': '''
          A chunk of a tool call (e.g., as part of a stream).
          
-          When merging ToolCallChunks (e.g., via AIMessageChunk.__add__),
+          When merging ToolCallChunks (e.g., via ``AIMessageChunk.__add__``),
          all string attributes are concatenated. Chunks are only merged if their
-          values of `index` are equal and not None.
+          values of ``index`` are equal and not ``None``.
          
          Example:
          
@ -9000,9 +9015,9 @@
          }),
        }),
        'required': list([
+          'id',
          'name',
          'args',
-          'id',
          'index',
        ]),
        'title': 'ToolCallChunk',
@ -9334,7 +9349,16 @@
    ]),
    'definitions': dict({
      'AudioContentBlock': dict({
-        'description': 'Content block for audio data.',
+        'description': '''
+          Audio data.
+          
+          .. note::
+              ``create_audio_block`` may also be used as a factory to create an
+              ``AudioContentBlock``. Benefits include:
+          
+              * Automatic ID generation (when not provided)
+              * Required arguments strictly validated at creation time
+        ''',
        'properties': dict({
          'base64': dict({
            'title': 'Base64',
@ -9533,6 +9557,13 @@
              ``start/end`` indices refer to the **response text**,
              not the source text. This means that the indices are relative to the model's
              response, not the original document (as specified in the ``url``).
+          
+          .. note::
+              ``create_citation`` may also be used as a factory to create a ``Citation``.
+              Benefits include:
+          
+              * Automatic ID generation (when not provided)
+              * Required arguments strictly validated at creation time
        ''',
        'properties': dict({
          'cited_text': dict({
@ -9571,7 +9602,7 @@
        'type': 'object',
      }),
      'CodeInterpreterCall': dict({
-        'description': 'Content block for a built-in code interpreter tool call.',
+        'description': 'Built-in code interpreter tool call.',
        'properties': dict({
          'code': dict({
            'title': 'Code',
@ -9602,7 +9633,7 @@
      }),
      'CodeInterpreterOutput': dict({
        'description': '''
-          Content block for the output of a singular code interpreter tool call.
+          Output of a singular code interpreter tool call.
          
          Full output of a code interpreter tool call is represented by
          ``CodeInterpreterResult`` which is a list of these blocks.
@ -9647,7 +9678,7 @@
        'type': 'object',
      }),
      'CodeInterpreterResult': dict({
-        'description': 'Content block for the result of a code interpreter tool call.',
+        'description': 'Result of a code interpreter tool call.',
        'properties': dict({
          'id': dict({
            'title': 'Id',
@ -9678,7 +9709,7 @@
      }),
      'FileContentBlock': dict({
        'description': '''
-          Content block for file data.
+          File data that doesn't fit into other multimodal blocks.
          
          This block is intended for files that are not images, audio, or plaintext. For
          example, it can be used for PDFs, Word documents, etc.
@ -9686,6 +9717,13 @@
          If the file is an image, audio, or plaintext, you should use the corresponding
          content block type (e.g., ``ImageContentBlock``, ``AudioContentBlock``,
          ``PlainTextContentBlock``).
+          
+          .. note::
+              ``create_file_block`` may also be used as a factory to create a
+              ``FileContentBlock``. Benefits include:
+          
+              * Automatic ID generation (when not provided)
+              * Required arguments strictly validated at creation time
        ''',
        'properties': dict({
          'base64': dict({
@ -9931,7 +9969,16 @@
        'type': 'object',
      }),
      'ImageContentBlock': dict({
-        'description': 'Content block for image data.',
+        'description': '''
+          Image data.
+          
+          .. note::
+              ``create_image_block`` may also be used as a factory to create a
+              ``ImageContentBlock``. Benefits include:
+          
+              * Automatic ID generation (when not provided)
+              * Required arguments strictly validated at creation time
+        ''',
        'properties': dict({
          'base64': dict({
            'title': 'Base64',
@ -10009,7 +10056,7 @@
        'description': '''
          Allowance for errors made by LLM.
          
-          Here we add an `error` key to surface errors made during generation
+          Here we add an ``error`` key to surface errors made during generation
          (e.g., invalid JSON arguments.)
        ''',
        'properties': dict({
@ -10046,10 +10093,6 @@
            ]),
            'title': 'Id',
          }),
-          'index': dict({
-            'title': 'Index',
-            'type': 'integer',
-          }),
          'name': dict({
            'anyOf': list([
              dict({
@ -10067,11 +10110,11 @@
          }),
        }),
        'required': list([
+          'type',
+          'id',
          'name',
          'args',
-          'id',
          'error',
-          'type',
        ]),
        'title': 'InvalidToolCall',
        'type': 'object',
@ -10101,7 +10144,7 @@
      }),
      'NonStandardContentBlock': dict({
        'description': '''
-          Content block provider-specific data.
+          Provider-specific data.
          
          This block contains data for which there is not yet a standard type.
          
@ -10109,6 +10152,13 @@
          If a provider's non-standard output includes reasoning and tool calls, it should be
          the adapter's job to parse that payload and emit the corresponding standard
          ReasoningContentBlock and ToolCallContentBlocks.
+          
+          .. note::
+              ``create_non_standard_block`` may also be used as a factory to create a
+              ``NonStandardContentBlock``. Benefits include:
+          
+              * Automatic ID generation (when not provided)
+              * Required arguments strictly validated at creation time
        ''',
        'properties': dict({
          'id': dict({
@ -10167,11 +10217,18 @@
      }),
      'PlainTextContentBlock': dict({
        'description': '''
-          Content block for plaintext data (e.g., from a document).
+          Plaintext data (e.g., from a document).
          
          .. note::
              Title and context are optional fields that may be passed to the model. See
              Anthropic `example <https://docs.anthropic.com/en/docs/build-with-claude/citations#citable-vs-non-citable-content>`__.
+          
+          .. note::
+              ``create_plaintext_block`` may also be used as a factory to create a
+              ``PlainTextContentBlock``. Benefits include:
+          
+              * Automatic ID generation (when not provided)
+              * Required arguments strictly validated at creation time
        ''',
        'properties': dict({
          'base64': dict({
@ -10223,7 +10280,16 @@
        'type': 'object',
      }),
      'ReasoningContentBlock': dict({
-        'description': 'Content block for reasoning output.',
+        'description': '''
+          Reasoning output from a LLM.
+          
+          .. note::
+              ``create_reasoning_block`` may also be used as a factory to create a
+              ``ReasoningContentBlock``. Benefits include:
+          
+              * Automatic ID generation (when not provided)
+              * Required arguments strictly validated at creation time
+        ''',
        'properties': dict({
          'id': dict({
            'title': 'Id',
@ -10237,14 +10303,6 @@
            'title': 'Reasoning',
            'type': 'string',
          }),
-          'signature': dict({
-            'title': 'Signature',
-            'type': 'string',
-          }),
-          'thought_signature': dict({
-            'title': 'Thought Signature',
-            'type': 'string',
-          }),
          'type': dict({
            'const': 'reasoning',
            'title': 'Type',
@ -10263,7 +10321,9 @@
          Contains additional information returned by the provider, such as
          response headers, service tiers, log probabilities, system fingerprints, etc.
          
-          Extra keys are permitted from what is typed here.
+          Extra keys are permitted from what is typed here (via `total=False`), allowing
+          for provider-specific metadata to be included without breaking the type
+          definition.
        ''',
        'properties': dict({
          'model_name': dict({
@ -10349,10 +10409,17 @@
      }),
      'TextContentBlock': dict({
        'description': '''
-          Content block for text output.
+          Text output from a LLM.
          
          This typically represents the main text content of a message, such as the response
          from a language model or the text of a user message.
+          
+          .. note::
+              ``create_text_block`` may also be used as a factory to create a
+              ``TextContentBlock``. Benefits include:
+          
+              * Automatic ID generation (when not provided)
+              * Required arguments strictly validated at creation time
        ''',
        'properties': dict({
          'annotations': dict({
@ -10409,6 +10476,13 @@
          
              This represents a request to call the tool named "foo" with arguments {"a": 1}
              and an identifier of "123".
+          
+          .. note::
+              ``create_tool_call`` may also be used as a factory to create a
+              ``ToolCall``. Benefits include:
+          
+              * Automatic ID generation (when not provided)
+              * Required arguments strictly validated at creation time
        ''',
        'properties': dict({
          'args': dict({
@ -10440,10 +10514,10 @@
          }),
        }),
        'required': list([
+          'type',
+          'id',
          'name',
          'args',
-          'id',
-          'type',
        ]),
        'title': 'ToolCall',
        'type': 'object',
@ -10452,9 +10526,9 @@
        'description': '''
          A chunk of a tool call (e.g., as part of a stream).
          
-          When merging ToolCallChunks (e.g., via AIMessageChunk.__add__),
+          When merging ToolCallChunks (e.g., via ``AIMessageChunk.__add__``),
          all string attributes are concatenated. Chunks are only merged if their
-          values of `index` are equal and not None.
+          values of ``index`` are equal and not ``None``.
          
          Example:
          
@ -10519,9 +10593,9 @@
          }),
        }),
        'required': list([
+          'id',
          'name',
          'args',
-          'id',
          'index',
        ]),
        'title': 'ToolCallChunk',
@ -10666,7 +10740,16 @@
        'type': 'object',
      }),
      'VideoContentBlock': dict({
-        'description': 'Content block for video data.',
+        'description': '''
+          Video data.
+          
+          .. note::
+              ``create_video_block`` may also be used as a factory to create a
+              ``VideoContentBlock``. Benefits include:
+          
+              * Automatic ID generation (when not provided)
+              * Required arguments strictly validated at creation time
+        ''',
        'properties': dict({
          'base64': dict({
            'title': 'Base64',
@ -10704,7 +10787,7 @@
        'type': 'object',
      }),
      'WebSearchCall': dict({
-        'description': 'Content block for a built-in web search tool call.',
+        'description': 'Built-in web search tool call.',
        'properties': dict({
          'id': dict({
            'title': 'Id',
@ -10730,7 +10813,7 @@
        'type': 'object',
      }),
      'WebSearchResult': dict({
-        'description': 'Content block for the result of a built-in web search tool call.',
+        'description': 'Result of a built-in web search tool call.',
        'properties': dict({
          'id': dict({
            'title': 'Id',
@ -12604,7 +12687,7 @@
        'description': '''
          Allowance for errors made by LLM.
          
-          Here we add an `error` key to surface errors made during generation
+          Here we add an ``error`` key to surface errors made during generation
          (e.g., invalid JSON arguments.)
        ''',
        'properties': dict({
@ -12641,10 +12724,6 @@
            ]),
            'title': 'Id',
          }),
-          'index': dict({
-            'title': 'Index',
-            'type': 'integer',
-          }),
          'name': dict({
            'anyOf': list([
              dict({
@ -12662,11 +12741,11 @@
          }),
        }),
        'required': list([
+          'type',
+          'id',
          'name',
          'args',
-          'id',
          'error',
-          'type',
        ]),
        'title': 'InvalidToolCall',
        'type': 'object',
@ -12908,6 +12987,13 @@
          
              This represents a request to call the tool named "foo" with arguments {"a": 1}
              and an identifier of "123".
+          
+          .. note::
+              ``create_tool_call`` may also be used as a factory to create a
+              ``ToolCall``. Benefits include:
+          
+              * Automatic ID generation (when not provided)
+              * Required arguments strictly validated at creation time
        ''',
        'properties': dict({
          'args': dict({
@ -12939,10 +13025,10 @@
          }),
        }),
        'required': list([
+          'type',
+          'id',
          'name',
          'args',
-          'id',
-          'type',
        ]),
        'title': 'ToolCall',
        'type': 'object',
@ -12951,9 +13037,9 @@
        'description': '''
          A chunk of a tool call (e.g., as part of a stream).
          
-          When merging ToolCallChunks (e.g., via AIMessageChunk.__add__),
+          When merging ToolCallChunks (e.g., via ``AIMessageChunk.__add__``),
          all string attributes are concatenated. Chunks are only merged if their
-          values of `index` are equal and not None.
+          values of ``index`` are equal and not ``None``.
          
          Example:
          
@ -13018,9 +13104,9 @@
          }),
        }),
        'required': list([
+          'id',
          'name',
          'args',
-          'id',
          'index',
        ]),
        'title': 'ToolCallChunk',
@ -14082,7 +14168,7 @@
        'description': '''
          Allowance for errors made by LLM.
          
-          Here we add an `error` key to surface errors made during generation
+          Here we add an ``error`` key to surface errors made during generation
          (e.g., invalid JSON arguments.)
        ''',
        'properties': dict({
@ -14119,10 +14205,6 @@
            ]),
            'title': 'Id',
          }),
-          'index': dict({
-            'title': 'Index',
-            'type': 'integer',
-          }),
          'name': dict({
            'anyOf': list([
              dict({
@ -14140,11 +14222,11 @@
          }),
        }),
        'required': list([
+          'type',
+          'id',
          'name',
          'args',
-          'id',
          'error',
-          'type',
        ]),
        'title': 'InvalidToolCall',
        'type': 'object',
@ -14375,6 +14457,13 @@
          
              This represents a request to call the tool named "foo" with arguments {"a": 1}
              and an identifier of "123".
+          
+          .. note::
+              ``create_tool_call`` may also be used as a factory to create a
+              ``ToolCall``. Benefits include:
+          
+              * Automatic ID generation (when not provided)
+              * Required arguments strictly validated at creation time
        ''',
        'properties': dict({
          'args': dict({
@ -14406,10 +14495,10 @@
          }),
        }),
        'required': list([
+          'type',
+          'id',
          'name',
          'args',
-          'id',
-          'type',
        ]),
        'title': 'ToolCall',
        'type': 'object',
@ -14418,9 +14507,9 @@
        'description': '''
          A chunk of a tool call (e.g., as part of a stream).
          
-          When merging ToolCallChunks (e.g., via AIMessageChunk.__add__),
+          When merging ToolCallChunks (e.g., via ``AIMessageChunk.__add__``),
          all string attributes are concatenated. Chunks are only merged if their
-          values of `index` are equal and not None.
+          values of ``index`` are equal and not ``None``.
          
          Example:
          
@ -14485,9 +14574,9 @@
          }),
        }),
        'required': list([
+          'id',
          'name',
          'args',
-          'id',
          'index',
        ]),
        'title': 'ToolCallChunk',
--- a/libs/core/tests/unit_tests/runnables/test_graph.py
+++ b/libs/core/tests/unit_tests/runnables/test_graph.py
@ -14,9 +14,7 @@ from langchain_core.runnables import RunnableConfig
 from langchain_core.runnables.base import Runnable
 from langchain_core.runnables.graph import Edge, Graph, Node
 from langchain_core.runnables.graph_mermaid import _escape_node_label
-from langchain_core.utils.pydantic import (
-    PYDANTIC_VERSION,
-)
+from langchain_core.utils.pydantic import PYDANTIC_VERSION
 from tests.unit_tests.pydantic_utils import _normalize_schema


--- a/libs/core/tests/unit_tests/runnables/test_runnable.py
+++ b/libs/core/tests/unit_tests/runnables/test_runnable.py
@ -6,13 +6,7 @@ import warnings
 from collections.abc import AsyncIterator, Awaitable, Iterator, Sequence
 from functools import partial
 from operator import itemgetter
-from typing import (
-    Any,
-    Callable,
-    Optional,
-    Union,
-    cast,
-)
+from typing import Any, Callable, Optional, Union, cast
 from uuid import UUID

 import pytest
@ -37,11 +31,7 @@ from langchain_core.language_models import (
 )
 from langchain_core.load import dumpd, dumps
 from langchain_core.load.load import loads
-from langchain_core.messages import (
-    AIMessageChunk,
-    HumanMessage,
-    SystemMessage,
-)
+from langchain_core.messages import AIMessageChunk, HumanMessage, SystemMessage
 from langchain_core.messages.base import BaseMessage
 from langchain_core.output_parsers import (
    BaseOutputParser,
@ -90,9 +80,7 @@ from langchain_core.tracers import (
    RunLogPatch,
 )
 from langchain_core.tracers.context import collect_runs
-from langchain_core.utils.pydantic import (
-    PYDANTIC_VERSION,
-)
+from langchain_core.utils.pydantic import PYDANTIC_VERSION
 from tests.unit_tests.pydantic_utils import _normalize_schema, _schema
 from tests.unit_tests.stubs import AnyStr, _any_id_ai_message, _any_id_ai_message_chunk

@ -243,7 +231,11 @@ def test_schemas(snapshot: SnapshotAssertion) -> None:
    }
    assert fake.get_config_jsonschema(include=["tags", "metadata", "run_name"]) == {
        "properties": {
-            "metadata": {"default": None, "title": "Metadata", "type": "object"},
+            "metadata": {
+                "default": None,
+                "title": "Metadata",
+                "type": "object",
+            },
            "run_name": {"default": None, "title": "Run Name", "type": "string"},
            "tags": {
                "default": None,
--- a/libs/core/tests/unit_tests/test_messages.py
+++ b/libs/core/tests/unit_tests/test_messages.py
@ -223,6 +223,7 @@ def test_message_chunks_v1() -> None:
            create_tool_call_chunk(name=None, args='ue}"', id=None, index=0)
        ],
    )
+    result = one + two + three
    expected = AIMessageChunkV1(
        [],
        tool_call_chunks=[
@ -230,11 +231,12 @@ def test_message_chunks_v1() -> None:
                name="tool1", args='{"arg1": "value}"', id="1", index=0
            )
        ],
+        id=result.id,  # Use the same ID as the result
    )
-    result = one + two + three
    assert result == expected

-    assert result.to_message() == AIMessageV1(
+    converted_message = result.to_message()
+    assert converted_message == AIMessageV1(
        content=[
            {
                "name": "tool1",
@ -242,29 +244,31 @@ def test_message_chunks_v1() -> None:
                "id": "1",
                "type": "tool_call",
            }
-        ]
+        ],
+        id=converted_message.id,  # Use the same ID as the converted message
    )

-    assert (
-        AIMessageChunkV1(
-            [],
-            tool_call_chunks=[
-                create_tool_call_chunk(name="tool1", args="", id="1", index=0)
-            ],
-        )
-        + AIMessageChunkV1(
-            [],
-            tool_call_chunks=[
-                create_tool_call_chunk(name="tool1", args="a", id=None, index=1)
-            ],
-        )
-        # Don't merge if `index` field does not match.
-    ) == AIMessageChunkV1(
+    chunk1 = AIMessageChunkV1(
+        [],
+        tool_call_chunks=[
+            create_tool_call_chunk(name="tool1", args="", id="1", index=0)
+        ],
+    )
+    chunk2 = AIMessageChunkV1(
+        [],
+        tool_call_chunks=[
+            create_tool_call_chunk(name="tool1", args="a", id=None, index=1)
+        ],
+    )
+    # Don't merge if `index` field does not match.
+    merge_result = chunk1 + chunk2
+    assert merge_result == AIMessageChunkV1(
        [],
        tool_call_chunks=[
            create_tool_call_chunk(name="tool1", args="", id="1", index=0),
            create_tool_call_chunk(name="tool1", args="a", id=None, index=1),
        ],
+        id=merge_result.id,  # Use the same ID as the merge result
    )

    ai_msg_chunk = AIMessageChunkV1([])
@ -274,8 +278,14 @@ def test_message_chunks_v1() -> None:
            create_tool_call_chunk(name="tool1", args="a", id=None, index=1)
        ],
    )
-    assert ai_msg_chunk + tool_calls_msg_chunk == tool_calls_msg_chunk
-    assert tool_calls_msg_chunk + ai_msg_chunk == tool_calls_msg_chunk
+    # These assertions test that adding empty chunks preserves the non-empty chunk
+    result1 = ai_msg_chunk + tool_calls_msg_chunk
+    assert result1.tool_call_chunks == tool_calls_msg_chunk.tool_call_chunks
+    assert result1.content == tool_calls_msg_chunk.content
+
+    result2 = tool_calls_msg_chunk + ai_msg_chunk
+    assert result2.tool_call_chunks == tool_calls_msg_chunk.tool_call_chunks
+    assert result2.content == tool_calls_msg_chunk.content

    ai_msg_chunk = AIMessageChunkV1(
        [],
@ -294,15 +304,26 @@ def test_message_chunks_v1() -> None:
        [],
        usage_metadata={"input_tokens": 4, "output_tokens": 5, "total_tokens": 9},
    )
-    assert left + right == AIMessageChunkV1(
+    usage_result = left + right
+    expected_usage = AIMessageChunkV1(
        content=[],
        usage_metadata={"input_tokens": 5, "output_tokens": 7, "total_tokens": 12},
+        id=usage_result.id,  # Use the same ID as the result
    )
-    assert AIMessageChunkV1(content=[]) + left == left
-    assert right + AIMessageChunkV1(content=[]) == right
+    assert usage_result == expected_usage
+
+    # Test adding empty chunks preserves the original
+    left_result = AIMessageChunkV1(content=[]) + left
+    assert left_result.usage_metadata == left.usage_metadata
+    assert left_result.content == left.content
+
+    right_result = right + AIMessageChunkV1(content=[])
+    assert right_result.usage_metadata == right.usage_metadata
+    assert right_result.content == right.content

    # Test ID order of precedence
-    null_id = AIMessageChunkV1(content=[], id=None)
+    # Note: AIMessageChunkV1 always generates an ID if none provided
+    auto_id = AIMessageChunkV1(content=[])  # Gets auto-generated lc_* ID
    default_id = AIMessageChunkV1(
        content=[], id="run-abc123"
    )  # LangChain-assigned run ID
@ -310,14 +331,21 @@ def test_message_chunks_v1() -> None:
        content=[], id="msg_def456"
    )  # provider-assigned ID

-    assert (null_id + default_id).id == "run-abc123"
-    assert (default_id + null_id).id == "run-abc123"
+    # Provider-assigned IDs (non-run-* and non-lc_*) have highest precedence
+    # Provider-assigned IDs always win over LangChain-generated IDs
+    assert (auto_id + meaningful_id).id == "msg_def456"  # provider-assigned wins
+    assert (meaningful_id + auto_id).id == "msg_def456"  # provider-assigned wins

-    assert (null_id + meaningful_id).id == "msg_def456"
-    assert (meaningful_id + null_id).id == "msg_def456"
+    assert (
+        default_id + meaningful_id
+    ).id == "msg_def456"  # meaningful_id is provider-assigned
+    assert (
+        meaningful_id + default_id
+    ).id == "msg_def456"  # meaningful_id is provider-assigned

-    assert (default_id + meaningful_id).id == "msg_def456"
-    assert (meaningful_id + default_id).id == "msg_def456"
+    # Between auto-generated and run-* IDs, auto-generated wins (since lc_ != run-)
+    assert (auto_id + default_id).id == auto_id.id
+    assert (default_id + auto_id).id == auto_id.id


 def test_chat_message_chunks() -> None:
@ -332,7 +360,7 @@ def test_chat_message_chunks() -> None:
    ):
        ChatMessageChunk(role="User", content="I am") + ChatMessageChunk(
            role="Assistant", content=" indeed."
-        )
+        )  # type: ignore[reportUnusedExpression, unused-ignore]

    assert ChatMessageChunk(role="User", content="I am") + AIMessageChunk(
        content=" indeed."
@ -441,7 +469,7 @@ def test_function_message_chunks() -> None:
    ):
        FunctionMessageChunk(name="hello", content="I am") + FunctionMessageChunk(
            name="bye", content=" indeed."
-        )
+        )  # type: ignore[reportUnusedExpression, unused-ignore]


 def test_ai_message_chunks() -> None:
@ -457,7 +485,7 @@ def test_ai_message_chunks() -> None:
    ):
        AIMessageChunk(example=True, content="I am") + AIMessageChunk(
            example=False, content=" indeed."
-        )
+        )  # type: ignore[reportUnusedExpression, unused-ignore]


 class TestGetBufferString(unittest.TestCase):
--- a/libs/core/uv.lock
+++ b/libs/core/uv.lock
@ -1040,7 +1040,7 @@ requires-dist = [
    { name = "jsonpatch", specifier = ">=1.33,<2.0" },
    { name = "langsmith", specifier = ">=0.3.45" },
    { name = "packaging", specifier = ">=23.2" },
-    { name = "pydantic", specifier = ">=2.7.4" },
+    { name = "pydantic", specifier = ">=2.11.7" },
    { name = "pyyaml", specifier = ">=5.3" },
    { name = "tenacity", specifier = ">=8.1.0,!=8.4.0,<10.0.0" },
    { name = "typing-extensions", specifier = ">=4.7" },
--- a/uv.lock
+++ b/uv.lock
@ -181,7 +181,7 @@ wheels = [

 [[package]]
 name = "anthropic"
-version = "0.57.1"
+version = "0.60.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "anyio" },
@ -192,9 +192,9 @@ dependencies = [
    { name = "sniffio" },
    { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/d7/75/6261a1a8d92aed47e27d2fcfb3a411af73b1435e6ae1186da02b760565d0/anthropic-0.57.1.tar.gz", hash = "sha256:7815dd92245a70d21f65f356f33fc80c5072eada87fb49437767ea2918b2c4b0", size = 423775, upload-time = "2025-07-03T16:57:35.932Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/4e/03/3334921dc54ed822b3dd993ae72d823a7402588521bbba3e024b3333a1fd/anthropic-0.60.0.tar.gz", hash = "sha256:a22ba187c6f4fd5afecb2fc913b960feccf72bc0d25c1b7ce0345e87caede577", size = 425983, upload-time = "2025-07-28T19:53:47.685Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/e5/cf/ca0ba77805aec6171629a8b665c7dc224dab374539c3d27005b5d8c100a0/anthropic-0.57.1-py3-none-any.whl", hash = "sha256:33afc1f395af207d07ff1bffc0a3d1caac53c371793792569c5d2f09283ea306", size = 292779, upload-time = "2025-07-03T16:57:34.636Z" },
+    { url = "https://files.pythonhosted.org/packages/da/bb/d84f287fb1c217b30c328af987cf8bbe3897edf0518dcc5fa39412f794ec/anthropic-0.60.0-py3-none-any.whl", hash = "sha256:65ad1f088a960217aaf82ba91ff743d6c89e9d811c6d64275b9a7c59ee9ac3c6", size = 293116, upload-time = "2025-07-28T19:53:45.944Z" },
 ]

 [[package]]
@ -2354,7 +2354,7 @@ typing = [

 [[package]]
 name = "langchain-anthropic"
-version = "0.3.17"
+version = "0.3.18"
 source = { editable = "libs/partners/anthropic" }
 dependencies = [
    { name = "anthropic" },
@ -2364,7 +2364,7 @@ dependencies = [

 [package.metadata]
 requires-dist = [
-    { name = "anthropic", specifier = ">=0.57.0,<1" },
+    { name = "anthropic", specifier = ">=0.60.0,<1" },
    { name = "langchain-core", editable = "libs/core" },
    { name = "pydantic", specifier = ">=2.7.4,<3.0.0" },
 ]