From 42c1159991657f40bb8161eb4cfae2e14abc2ab9 Mon Sep 17 00:00:00 2001
From: Mason Daugherty <mason@langchain.dev>
Date: Thu, 7 Aug 2025 12:16:31 -0400
Subject: [PATCH] feat: add TextAccessor, deprecate `.text()` as method
 (#32441)

Adds backward compat for `.text()` on messages while keeping `.text`
access

_The kicker:_

Any previous use of `.text()` will now need a `# type: ignore[operator]`
to silence type checkers. However, it will still behave as expected at
runtime. Deprecating in v0.4.0, to be removed in v2.0.0.
---
 libs/core/langchain_core/v1/messages.py       | 185 ++++++++++++++++--
 .../runnables/__snapshots__/test_graph.ambr   |  14 +-
 .../__snapshots__/test_runnable.ambr          |  14 +-
 libs/core/tests/unit_tests/test_messages.py   |  56 ++++++
 4 files changed, 247 insertions(+), 22 deletions(-)

diff --git a/libs/core/langchain_core/v1/messages.py b/libs/core/langchain_core/v1/messages.py
index 5c20ad552c8..451926b8351 100644
--- a/libs/core/langchain_core/v1/messages.py
+++ b/libs/core/langchain_core/v1/messages.py
@@ -2,6 +2,7 @@
 
 Each message has content that may be comprised of content blocks, defined under
 ``langchain_core.messages.content_blocks``.
+
 """
 
 import uuid
@@ -12,6 +13,7 @@ from pydantic import BaseModel
 from typing_extensions import TypedDict
 
 import langchain_core.messages.content_blocks as types
+from langchain_core._api.deprecation import warn_deprecated
 from langchain_core.messages.ai import (
     _LC_AUTO_PREFIX,
     _LC_ID_PREFIX,
@@ -26,6 +28,57 @@ from langchain_core.utils._merge import merge_dicts
 from langchain_core.utils.json import parse_partial_json
 
 
+class TextAccessor(str):
+    """String-like object that supports both property and method access patterns.
+
+    Exists to maintain backward compatibility while transitioning from method-based to
+    property-based text access in message objects. In LangChain <v0.4, message text was
+    accessed via ``.text()`` method calls. In v0.4=<, the preferred pattern is property
+    access via ``.text``.
+
+    Rather than breaking existing code immediately, ``TextAccessor`` allows both
+    patterns:
+    - Modern property access: ``message.text`` (returns string directly)
+    - Legacy method access: ``message.text()`` (callable, emits deprecation warning)
+
+    Examples:
+        >>> msg = AIMessage("Hello world")
+        >>> text = msg.text  # Preferred: property access
+        >>> text = msg.text()  # Deprecated: method access (shows warning)
+
+    """
+
+    __slots__ = ()
+
+    def __new__(cls, value: str) -> "TextAccessor":
+        """Create new TextAccessor instance."""
+        return str.__new__(cls, value)
+
+    def __call__(self) -> str:
+        """Enable method-style text access for backward compatibility.
+
+        This method exists solely to support legacy code that calls ``.text()``
+        as a method. New code should use property access (``.text``) instead.
+
+        .. deprecated:: 0.4.0
+            Calling .text() as a method is deprecated. Use .text as a property instead.
+            This method will be removed in 2.0.0.
+
+        Returns:
+            The string content, identical to property access.
+
+        """
+        warn_deprecated(
+            since="0.4.0",
+            message=(
+                "Calling .text() as a method is deprecated. "
+                "Use .text as a property instead (e.g., message.text)."
+            ),
+            removal="2.0.0",
+        )
+        return str(self)
+
+
 def _ensure_id(id_val: Optional[str]) -> str:
     """Ensure the ID is a valid string, generating a new UUID if not provided.
 
@@ -37,6 +90,7 @@ def _ensure_id(id_val: Optional[str]) -> str:
 
     Returns:
         A valid string ID, either the provided value or a new UUID.
+
     """
     return id_val or str(f"{_LC_AUTO_PREFIX}{uuid.uuid4()}")
 
@@ -47,9 +101,18 @@ class ResponseMetadata(TypedDict, total=False):
     Contains additional information returned by the provider, such as
     response headers, service tiers, log probabilities, system fingerprints, etc.
 
-    Extra keys are permitted from what is typed here (via ``total=False``), allowing
-    for provider-specific metadata to be included without breaking the type
-    definition.
+    **Extensibility Design:**
+
+    This uses ``total=False`` to allow arbitrary additional keys beyond the typed
+    fields below. This enables provider-specific metadata without breaking type safety:
+
+    - OpenAI might include: ``{"system_fingerprint": "fp_123", "logprobs": {...}}``
+    - Anthropic might include: ``{"stop_reason": "stop_sequence", "usage": {...}}``
+    - Custom providers can add their own fields
+
+    The common fields (``model_provider``, ``model_name``) provide a baseline
+    contract while preserving flexibility for provider innovations.
+
     """
 
     model_provider: str
@@ -75,6 +138,7 @@ class AIMessage:
         tool_calls: Optional list of tool calls made by the AI.
         invalid_tool_calls: Optional list of tool calls that failed validation.
         usage: Optional dictionary containing usage statistics.
+
     """
 
     type: Literal["ai"] = "ai"
@@ -82,6 +146,7 @@ class AIMessage:
 
     The purpose of this field is to allow for easy identification of the message type
     when deserializing messages.
+
     """
 
     name: Optional[str] = None
@@ -91,6 +156,7 @@ class AIMessage:
 
     Usage of this field is optional, and whether it's used or not is up to the
     model implementation.
+
     """
 
     id: Optional[str] = None
@@ -98,6 +164,7 @@ class AIMessage:
 
     If the provider assigns a meaningful ID, it should be used here. Otherwise, a
     LangChain-generated ID will be used.
+
     """
 
     lc_version: str = "v1"
@@ -116,6 +183,7 @@ class AIMessage:
 
     This field should include non-standard data returned by the provider, such as
     response headers, service tiers, or log probabilities.
+
     """
 
     parsed: Optional[Union[dict[str, Any], BaseModel]] = None
@@ -147,6 +215,7 @@ class AIMessage:
                 be added to the content list.
             invalid_tool_calls: Optional list of tool calls that failed validation.
             parsed: Optional auto-parsed message contents, if applicable.
+
         """
         if isinstance(content, str):
             self.content = [types.create_text_block(content)]
@@ -196,10 +265,19 @@ class AIMessage:
 
     @property
     def text(self) -> str:
-        """Extract all text content from the AI message as a string."""
-        return "".join(
+        """Extract all text content from the AI message as a string.
+
+        Can be used as both property (``message.text``) and method (``message.text()``).
+
+        .. deprecated:: 0.4.0
+            Calling .text() as a method is deprecated. Use .text as a property instead.
+            This method will be removed in 2.0.0.
+
+        """
+        text_value = "".join(
             block["text"] for block in self.content if types.is_text_block(block)
         )
+        return cast("str", TextAccessor(text_value))
 
     @property
     def tool_calls(self) -> list[types.ToolCall]:
@@ -232,7 +310,26 @@ class AIMessageChunk(AIMessage):
     """A partial chunk of an AI message during streaming.
 
     Represents a portion of an AI response that is delivered incrementally
-    during streaming generation. Contains partial content and metadata.
+    during streaming generation. When AI providers stream responses token-by-token,
+    each chunk contains partial content that gets accumulated into a complete message.
+
+    **Streaming Workflow:**
+
+    1. Provider streams partial responses as ``AIMessageChunk`` objects
+    2. Chunks are accumulated: ``chunk1 + chunk2 + ...``
+    3. Final accumulated chunk can be converted to ``AIMessage`` via ``.to_message()``
+
+    **Tool Call Handling:**
+
+    During streaming, tool calls arrive as ``ToolCallChunk`` objects with partial
+    JSON. When chunks are accumulated, the final chunk (marked with
+    ``chunk_position="last"``) triggers parsing of complete tool calls from the
+    accumulated JSON strings.
+
+    **Content Merging:**
+
+    Content blocks are merged intelligently - text blocks combine their strings,
+    tool call chunks accumulate arguments, and other blocks are concatenated.
 
     Attributes:
         type: Message type identifier, always ``'ai_chunk'``.
@@ -241,6 +338,7 @@ class AIMessageChunk(AIMessage):
         content: List of content blocks containing partial message data.
         tool_call_chunks: Optional list of partial tool call data.
         usage_metadata: Optional metadata about token usage and costs.
+
     """
 
     type: Literal["ai_chunk"] = "ai_chunk"  # type: ignore[assignment]
@@ -248,6 +346,7 @@ class AIMessageChunk(AIMessage):
 
     The purpose of this field is to allow for easy identification of the message type
     when deserializing messages.
+
     """
 
     def __init__(
@@ -274,8 +373,9 @@ class AIMessageChunk(AIMessage):
             usage_metadata: Optional metadata about token usage.
             tool_call_chunks: Optional list of partial tool call data.
             parsed: Optional auto-parsed message contents, if applicable.
-            chunk_position: Optional position of the chunk in the stream. If "last",
+            chunk_position: Optional position of the chunk in the stream. If ``'last'``,
                 tool calls will be parsed when aggregated into a stream.
+
         """
         if isinstance(content, str):
             self.content = [{"type": "text", "text": content, "index": 0}]
@@ -506,6 +606,7 @@ class HumanMessage:
         id: Unique identifier for the message.
         content: List of content blocks containing the user's input.
         name: Optional human-readable name for the message.
+
     """
 
     id: str
@@ -513,6 +614,7 @@ class HumanMessage:
 
     If the provider assigns a meaningful ID, it should be used here. Otherwise, a
     LangChain-generated ID will be used.
+
     """
 
     content: list[types.ContentBlock]
@@ -523,6 +625,7 @@ class HumanMessage:
 
     The purpose of this field is to allow for easy identification of the message type
     when deserializing messages.
+
     """
 
     name: Optional[str] = None
@@ -532,6 +635,7 @@ class HumanMessage:
 
     Usage of this field is optional, and whether it's used or not is up to the
     model implementation.
+
     """
 
     def __init__(
@@ -547,6 +651,7 @@ class HumanMessage:
             content: Message content as string or list of content blocks.
             id: Optional unique identifier for the message.
             name: Optional human-readable name for the message.
+
         """
         self.id = _ensure_id(id)
         if isinstance(content, str):
@@ -555,15 +660,21 @@ class HumanMessage:
             self.content = content
         self.name = name
 
+    @property
     def text(self) -> str:
-        """Extract all text content from the message.
+        """Extract all text content from the message as a string.
+
+        Can be used as both property (``message.text``) and method (``message.text()``).
+
+        .. deprecated:: 0.4.0
+            Calling .text() as a method is deprecated. Use .text as a property instead.
+            This method will be removed in 2.0.0.
 
-        Returns:
-            Concatenated string of all text blocks in the message.
         """
-        return "".join(
+        text_value = "".join(
             block["text"] for block in self.content if types.is_text_block(block)
         )
+        return cast("str", TextAccessor(text_value))
 
 
 @dataclass
@@ -577,6 +688,7 @@ class SystemMessage:
         type: Message type identifier, always ``'system'``.
         id: Unique identifier for the message.
         content: List of content blocks containing system instructions.
+
     """
 
     id: str
@@ -584,6 +696,7 @@ class SystemMessage:
 
     If the provider assigns a meaningful ID, it should be used here. Otherwise, a
     LangChain-generated ID will be used.
+
     """
 
     content: list[types.ContentBlock]
@@ -594,6 +707,7 @@ class SystemMessage:
 
     The purpose of this field is to allow for easy identification of the message type
     when deserializing messages.
+
     """
 
     name: Optional[str] = None
@@ -603,6 +717,7 @@ class SystemMessage:
 
     Usage of this field is optional, and whether it's used or not is up to the
     model implementation.
+
     """
 
     custom_role: Optional[str] = None
@@ -612,6 +727,7 @@ class SystemMessage:
 
     Integration packages may use this field to assign the system message role if it
     contains a recognized value.
+
     """
 
     def __init__(
@@ -629,6 +745,7 @@ class SystemMessage:
             id: Optional unique identifier for the message.
             custom_role: If provided, a custom role for the system message.
             name: Optional human-readable name for the message.
+
         """
         self.id = _ensure_id(id)
         if isinstance(content, str):
@@ -638,11 +755,21 @@ class SystemMessage:
         self.custom_role = custom_role
         self.name = name
 
+    @property
     def text(self) -> str:
-        """Extract all text content from the system message."""
-        return "".join(
+        """Extract all text content from the system message as a string.
+
+        Can be used as both property (``message.text``) and method (``message.text()``).
+
+        .. deprecated:: 0.4.0
+            Calling .text() as a method is deprecated. Use .text as a property instead.
+            This method will be removed in 2.0.0.
+
+        """
+        text_value = "".join(
             block["text"] for block in self.content if types.is_text_block(block)
         )
+        return cast("str", TextAccessor(text_value))
 
 
 @dataclass
@@ -659,6 +786,7 @@ class ToolMessage(ToolOutputMixin):
         content: The result content from tool execution.
         artifact: Optional app-side payload not intended for the model.
         status: Execution status ("success" or "error").
+
     """
 
     id: str
@@ -668,6 +796,7 @@ class ToolMessage(ToolOutputMixin):
     """ID of the tool call this message responds to.
 
     This should match the ID of the tool call that this message is responding to.
+
     """
 
     content: list[types.ContentBlock]
@@ -678,10 +807,21 @@ class ToolMessage(ToolOutputMixin):
 
     The purpose of this field is to allow for easy identification of the message type
     when deserializing messages.
+
     """
 
     artifact: Optional[Any] = None
-    """App-side payload not for the model."""
+    """App-side payload not intended for the model.
+
+    Artifacts contain data that your application needs but should not be sent
+    to the AI model. Example use cases:
+
+    - File handles or database connections used by the tool
+    - Raw binary data (images, documents) alongside text summaries
+    - Internal debugging information or execution traces
+    - Computed results that supplement the text content
+
+    """
 
     name: Optional[str] = None
     """An optional name for the message.
@@ -690,6 +830,7 @@ class ToolMessage(ToolOutputMixin):
 
     Usage of this field is optional, and whether it's used or not is up to the
     model implementation.
+
     """
 
     status: Literal["success", "error"] = "success"
@@ -697,6 +838,7 @@ class ToolMessage(ToolOutputMixin):
 
     Indicates whether the tool call was successful or encountered an error.
     Defaults to "success".
+
     """
 
     def __init__(
@@ -718,6 +860,7 @@ class ToolMessage(ToolOutputMixin):
             name: Optional human-readable name for the message.
             artifact: Optional app-side payload not intended for the model.
             status: Execution status (``'success'`` or ``'error'``).
+
         """
         self.id = _ensure_id(id)
         self.tool_call_id = tool_call_id
@@ -731,15 +874,25 @@ class ToolMessage(ToolOutputMixin):
 
     @property
     def text(self) -> str:
-        """Extract all text content from the tool message."""
-        return "".join(
+        """Extract all text content from the tool message as a string.
+
+        Can be used as both property (``message.text``) and method (``message.text()``).
+
+        .. deprecated:: 0.4.0
+            Calling .text() as a method is deprecated. Use .text as a property instead.
+            This method will be removed in 2.0.0.
+
+        """
+        text_value = "".join(
             block["text"] for block in self.content if types.is_text_block(block)
         )
+        return cast("str", TextAccessor(text_value))
 
     def __post_init__(self) -> None:
         """Initialize computed fields after dataclass creation.
 
         Ensures the tool message has a valid ID.
+
         """
         self.id = _ensure_id(self.id)
 
diff --git a/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr b/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr
index d9f1ec1204d..56df846c956 100644
--- a/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr
+++ b/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr
@@ -1439,9 +1439,17 @@
                 Contains additional information returned by the provider, such as
                 response headers, service tiers, log probabilities, system fingerprints, etc.
                 
-                Extra keys are permitted from what is typed here (via ``total=False``), allowing
-                for provider-specific metadata to be included without breaking the type
-                definition.
+                **Extensibility Design:**
+                
+                This uses ``total=False`` to allow arbitrary additional keys beyond the typed
+                fields below. This enables provider-specific metadata without breaking type safety:
+                
+                - OpenAI might include: ``{"system_fingerprint": "fp_123", "logprobs": {...}}``
+                - Anthropic might include: ``{"stop_reason": "stop_sequence", "usage": {...}}``
+                - Custom providers can add their own fields
+                
+                The common fields (``model_provider``, ``model_name``) provide a baseline
+                contract while preserving flexibility for provider innovations.
               ''',
               'properties': dict({
                 'model_name': dict({
diff --git a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr
index dadbdb103da..7a2c7b40621 100644
--- a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr
+++ b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr
@@ -10444,9 +10444,17 @@
           Contains additional information returned by the provider, such as
           response headers, service tiers, log probabilities, system fingerprints, etc.
           
-          Extra keys are permitted from what is typed here (via ``total=False``), allowing
-          for provider-specific metadata to be included without breaking the type
-          definition.
+          **Extensibility Design:**
+          
+          This uses ``total=False`` to allow arbitrary additional keys beyond the typed
+          fields below. This enables provider-specific metadata without breaking type safety:
+          
+          - OpenAI might include: ``{"system_fingerprint": "fp_123", "logprobs": {...}}``
+          - Anthropic might include: ``{"stop_reason": "stop_sequence", "usage": {...}}``
+          - Custom providers can add their own fields
+          
+          The common fields (``model_provider``, ``model_name``) provide a baseline
+          contract while preserving flexibility for provider innovations.
         ''',
         'properties': dict({
           'model_name': dict({
diff --git a/libs/core/tests/unit_tests/test_messages.py b/libs/core/tests/unit_tests/test_messages.py
index ea3ac1fb7d5..89c77414dc0 100644
--- a/libs/core/tests/unit_tests/test_messages.py
+++ b/libs/core/tests/unit_tests/test_messages.py
@@ -38,6 +38,9 @@ from langchain_core.messages.tool import tool_call_chunk as create_tool_call_chu
 from langchain_core.utils._merge import merge_lists
 from langchain_core.v1.messages import AIMessage as AIMessageV1
 from langchain_core.v1.messages import AIMessageChunk as AIMessageChunkV1
+from langchain_core.v1.messages import HumanMessage as HumanMessageV1
+from langchain_core.v1.messages import SystemMessage as SystemMessageV1
+from langchain_core.v1.messages import ToolMessage as ToolMessageV1
 
 
 def test_message_init() -> None:
@@ -1385,3 +1388,56 @@ def test_known_block_types() -> None:
         for t in expected
     }
     assert expected == KNOWN_BLOCK_TYPES
+
+
+def test_v1_text_accessor() -> None:
+    """Test that v1 message.text property and .text() method return the same value."""
+    # Test HumanMessage
+    human_msg = HumanMessageV1(content="Hello world")
+    assert human_msg.text == "Hello world"
+    assert human_msg.text() == "Hello world"  # type: ignore[operator]
+    assert str(human_msg.text) == human_msg.text()  # type: ignore[operator]
+
+    # Test SystemMessage
+    system_msg = SystemMessageV1(content="You are a helpful assistant")
+    assert system_msg.text == "You are a helpful assistant"
+    assert system_msg.text() == "You are a helpful assistant"  # type: ignore[operator]
+    assert str(system_msg.text) == system_msg.text()  # type: ignore[operator]
+
+    # Test AIMessage
+    ai_msg = AIMessageV1(content="I can help you with that")
+    assert ai_msg.text == "I can help you with that"
+    assert ai_msg.text() == "I can help you with that"  # type: ignore[operator]
+    assert str(ai_msg.text) == ai_msg.text()  # type: ignore[operator]
+
+    # Test ToolMessage
+    tool_msg = ToolMessageV1(content="Task completed", tool_call_id="tool_1")
+    assert tool_msg.text == "Task completed"
+    assert tool_msg.text() == "Task completed"  # type: ignore[operator]
+    assert str(tool_msg.text) == tool_msg.text()  # type: ignore[operator]
+
+    # Test with complex content (list of content blocks)
+    complex_msg = HumanMessageV1(
+        content=[{"type": "text", "text": "Hello "}, {"type": "text", "text": "world"}]
+    )
+    assert complex_msg.text == "Hello world"
+    assert complex_msg.text() == "Hello world"  # type: ignore[operator]
+    assert str(complex_msg.text) == complex_msg.text()  # type: ignore[operator]
+
+    # Test with mixed content (text and non-text blocks)
+    mixed_msg = AIMessageV1(
+        content=[
+            {"type": "text", "text": "The answer is "},
+            {"type": "tool_call", "name": "calculate", "args": {"x": 2}, "id": "1"},
+            {"type": "text", "text": "42"},
+        ]
+    )
+    assert mixed_msg.text == "The answer is 42"
+    assert mixed_msg.text() == "The answer is 42"  # type: ignore[operator]
+    assert str(mixed_msg.text) == mixed_msg.text()  # type: ignore[operator]
+
+    # Test empty content
+    empty_msg = HumanMessageV1(content=[])
+    assert empty_msg.text == ""
+    assert empty_msg.text() == ""  # type: ignore[operator]
+    assert str(empty_msg.text) == empty_msg.text()  # type: ignore[operator]