feat: add TextAccessor, deprecate .text() as method (#32441)

Adds backward compat for `.text()` on messages while keeping `.text`
access

_The kicker:_

Any previous use of `.text()` will now need a `# type: ignore[operator]`
to silence type checkers. However, it will still behave as expected at
runtime. Deprecating in v0.4.0, to be removed in v2.0.0.
This commit is contained in:
Mason Daugherty 2025-08-07 12:16:31 -04:00 committed by GitHub
parent cc6139860c
commit 42c1159991
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 247 additions and 22 deletions

View File

@ -2,6 +2,7 @@
Each message has content that may be comprised of content blocks, defined under Each message has content that may be comprised of content blocks, defined under
``langchain_core.messages.content_blocks``. ``langchain_core.messages.content_blocks``.
""" """
import uuid import uuid
@ -12,6 +13,7 @@ from pydantic import BaseModel
from typing_extensions import TypedDict from typing_extensions import TypedDict
import langchain_core.messages.content_blocks as types import langchain_core.messages.content_blocks as types
from langchain_core._api.deprecation import warn_deprecated
from langchain_core.messages.ai import ( from langchain_core.messages.ai import (
_LC_AUTO_PREFIX, _LC_AUTO_PREFIX,
_LC_ID_PREFIX, _LC_ID_PREFIX,
@ -26,6 +28,57 @@ from langchain_core.utils._merge import merge_dicts
from langchain_core.utils.json import parse_partial_json from langchain_core.utils.json import parse_partial_json
class TextAccessor(str):
"""String-like object that supports both property and method access patterns.
Exists to maintain backward compatibility while transitioning from method-based to
property-based text access in message objects. In LangChain <v0.4, message text was
accessed via ``.text()`` method calls. In v0.4=<, the preferred pattern is property
access via ``.text``.
Rather than breaking existing code immediately, ``TextAccessor`` allows both
patterns:
- Modern property access: ``message.text`` (returns string directly)
- Legacy method access: ``message.text()`` (callable, emits deprecation warning)
Examples:
>>> msg = AIMessage("Hello world")
>>> text = msg.text # Preferred: property access
>>> text = msg.text() # Deprecated: method access (shows warning)
"""
__slots__ = ()
def __new__(cls, value: str) -> "TextAccessor":
"""Create new TextAccessor instance."""
return str.__new__(cls, value)
def __call__(self) -> str:
"""Enable method-style text access for backward compatibility.
This method exists solely to support legacy code that calls ``.text()``
as a method. New code should use property access (``.text``) instead.
.. deprecated:: 0.4.0
Calling .text() as a method is deprecated. Use .text as a property instead.
This method will be removed in 2.0.0.
Returns:
The string content, identical to property access.
"""
warn_deprecated(
since="0.4.0",
message=(
"Calling .text() as a method is deprecated. "
"Use .text as a property instead (e.g., message.text)."
),
removal="2.0.0",
)
return str(self)
def _ensure_id(id_val: Optional[str]) -> str: def _ensure_id(id_val: Optional[str]) -> str:
"""Ensure the ID is a valid string, generating a new UUID if not provided. """Ensure the ID is a valid string, generating a new UUID if not provided.
@ -37,6 +90,7 @@ def _ensure_id(id_val: Optional[str]) -> str:
Returns: Returns:
A valid string ID, either the provided value or a new UUID. A valid string ID, either the provided value or a new UUID.
""" """
return id_val or str(f"{_LC_AUTO_PREFIX}{uuid.uuid4()}") return id_val or str(f"{_LC_AUTO_PREFIX}{uuid.uuid4()}")
@ -47,9 +101,18 @@ class ResponseMetadata(TypedDict, total=False):
Contains additional information returned by the provider, such as Contains additional information returned by the provider, such as
response headers, service tiers, log probabilities, system fingerprints, etc. response headers, service tiers, log probabilities, system fingerprints, etc.
Extra keys are permitted from what is typed here (via ``total=False``), allowing **Extensibility Design:**
for provider-specific metadata to be included without breaking the type
definition. This uses ``total=False`` to allow arbitrary additional keys beyond the typed
fields below. This enables provider-specific metadata without breaking type safety:
- OpenAI might include: ``{"system_fingerprint": "fp_123", "logprobs": {...}}``
- Anthropic might include: ``{"stop_reason": "stop_sequence", "usage": {...}}``
- Custom providers can add their own fields
The common fields (``model_provider``, ``model_name``) provide a baseline
contract while preserving flexibility for provider innovations.
""" """
model_provider: str model_provider: str
@ -75,6 +138,7 @@ class AIMessage:
tool_calls: Optional list of tool calls made by the AI. tool_calls: Optional list of tool calls made by the AI.
invalid_tool_calls: Optional list of tool calls that failed validation. invalid_tool_calls: Optional list of tool calls that failed validation.
usage: Optional dictionary containing usage statistics. usage: Optional dictionary containing usage statistics.
""" """
type: Literal["ai"] = "ai" type: Literal["ai"] = "ai"
@ -82,6 +146,7 @@ class AIMessage:
The purpose of this field is to allow for easy identification of the message type The purpose of this field is to allow for easy identification of the message type
when deserializing messages. when deserializing messages.
""" """
name: Optional[str] = None name: Optional[str] = None
@ -91,6 +156,7 @@ class AIMessage:
Usage of this field is optional, and whether it's used or not is up to the Usage of this field is optional, and whether it's used or not is up to the
model implementation. model implementation.
""" """
id: Optional[str] = None id: Optional[str] = None
@ -98,6 +164,7 @@ class AIMessage:
If the provider assigns a meaningful ID, it should be used here. Otherwise, a If the provider assigns a meaningful ID, it should be used here. Otherwise, a
LangChain-generated ID will be used. LangChain-generated ID will be used.
""" """
lc_version: str = "v1" lc_version: str = "v1"
@ -116,6 +183,7 @@ class AIMessage:
This field should include non-standard data returned by the provider, such as This field should include non-standard data returned by the provider, such as
response headers, service tiers, or log probabilities. response headers, service tiers, or log probabilities.
""" """
parsed: Optional[Union[dict[str, Any], BaseModel]] = None parsed: Optional[Union[dict[str, Any], BaseModel]] = None
@ -147,6 +215,7 @@ class AIMessage:
be added to the content list. be added to the content list.
invalid_tool_calls: Optional list of tool calls that failed validation. invalid_tool_calls: Optional list of tool calls that failed validation.
parsed: Optional auto-parsed message contents, if applicable. parsed: Optional auto-parsed message contents, if applicable.
""" """
if isinstance(content, str): if isinstance(content, str):
self.content = [types.create_text_block(content)] self.content = [types.create_text_block(content)]
@ -196,10 +265,19 @@ class AIMessage:
@property @property
def text(self) -> str: def text(self) -> str:
"""Extract all text content from the AI message as a string.""" """Extract all text content from the AI message as a string.
return "".join(
Can be used as both property (``message.text``) and method (``message.text()``).
.. deprecated:: 0.4.0
Calling .text() as a method is deprecated. Use .text as a property instead.
This method will be removed in 2.0.0.
"""
text_value = "".join(
block["text"] for block in self.content if types.is_text_block(block) block["text"] for block in self.content if types.is_text_block(block)
) )
return cast("str", TextAccessor(text_value))
@property @property
def tool_calls(self) -> list[types.ToolCall]: def tool_calls(self) -> list[types.ToolCall]:
@ -232,7 +310,26 @@ class AIMessageChunk(AIMessage):
"""A partial chunk of an AI message during streaming. """A partial chunk of an AI message during streaming.
Represents a portion of an AI response that is delivered incrementally Represents a portion of an AI response that is delivered incrementally
during streaming generation. Contains partial content and metadata. during streaming generation. When AI providers stream responses token-by-token,
each chunk contains partial content that gets accumulated into a complete message.
**Streaming Workflow:**
1. Provider streams partial responses as ``AIMessageChunk`` objects
2. Chunks are accumulated: ``chunk1 + chunk2 + ...``
3. Final accumulated chunk can be converted to ``AIMessage`` via ``.to_message()``
**Tool Call Handling:**
During streaming, tool calls arrive as ``ToolCallChunk`` objects with partial
JSON. When chunks are accumulated, the final chunk (marked with
``chunk_position="last"``) triggers parsing of complete tool calls from the
accumulated JSON strings.
**Content Merging:**
Content blocks are merged intelligently - text blocks combine their strings,
tool call chunks accumulate arguments, and other blocks are concatenated.
Attributes: Attributes:
type: Message type identifier, always ``'ai_chunk'``. type: Message type identifier, always ``'ai_chunk'``.
@ -241,6 +338,7 @@ class AIMessageChunk(AIMessage):
content: List of content blocks containing partial message data. content: List of content blocks containing partial message data.
tool_call_chunks: Optional list of partial tool call data. tool_call_chunks: Optional list of partial tool call data.
usage_metadata: Optional metadata about token usage and costs. usage_metadata: Optional metadata about token usage and costs.
""" """
type: Literal["ai_chunk"] = "ai_chunk" # type: ignore[assignment] type: Literal["ai_chunk"] = "ai_chunk" # type: ignore[assignment]
@ -248,6 +346,7 @@ class AIMessageChunk(AIMessage):
The purpose of this field is to allow for easy identification of the message type The purpose of this field is to allow for easy identification of the message type
when deserializing messages. when deserializing messages.
""" """
def __init__( def __init__(
@ -274,8 +373,9 @@ class AIMessageChunk(AIMessage):
usage_metadata: Optional metadata about token usage. usage_metadata: Optional metadata about token usage.
tool_call_chunks: Optional list of partial tool call data. tool_call_chunks: Optional list of partial tool call data.
parsed: Optional auto-parsed message contents, if applicable. parsed: Optional auto-parsed message contents, if applicable.
chunk_position: Optional position of the chunk in the stream. If "last", chunk_position: Optional position of the chunk in the stream. If ``'last'``,
tool calls will be parsed when aggregated into a stream. tool calls will be parsed when aggregated into a stream.
""" """
if isinstance(content, str): if isinstance(content, str):
self.content = [{"type": "text", "text": content, "index": 0}] self.content = [{"type": "text", "text": content, "index": 0}]
@ -506,6 +606,7 @@ class HumanMessage:
id: Unique identifier for the message. id: Unique identifier for the message.
content: List of content blocks containing the user's input. content: List of content blocks containing the user's input.
name: Optional human-readable name for the message. name: Optional human-readable name for the message.
""" """
id: str id: str
@ -513,6 +614,7 @@ class HumanMessage:
If the provider assigns a meaningful ID, it should be used here. Otherwise, a If the provider assigns a meaningful ID, it should be used here. Otherwise, a
LangChain-generated ID will be used. LangChain-generated ID will be used.
""" """
content: list[types.ContentBlock] content: list[types.ContentBlock]
@ -523,6 +625,7 @@ class HumanMessage:
The purpose of this field is to allow for easy identification of the message type The purpose of this field is to allow for easy identification of the message type
when deserializing messages. when deserializing messages.
""" """
name: Optional[str] = None name: Optional[str] = None
@ -532,6 +635,7 @@ class HumanMessage:
Usage of this field is optional, and whether it's used or not is up to the Usage of this field is optional, and whether it's used or not is up to the
model implementation. model implementation.
""" """
def __init__( def __init__(
@ -547,6 +651,7 @@ class HumanMessage:
content: Message content as string or list of content blocks. content: Message content as string or list of content blocks.
id: Optional unique identifier for the message. id: Optional unique identifier for the message.
name: Optional human-readable name for the message. name: Optional human-readable name for the message.
""" """
self.id = _ensure_id(id) self.id = _ensure_id(id)
if isinstance(content, str): if isinstance(content, str):
@ -555,15 +660,21 @@ class HumanMessage:
self.content = content self.content = content
self.name = name self.name = name
@property
def text(self) -> str: def text(self) -> str:
"""Extract all text content from the message. """Extract all text content from the message as a string.
Can be used as both property (``message.text``) and method (``message.text()``).
.. deprecated:: 0.4.0
Calling .text() as a method is deprecated. Use .text as a property instead.
This method will be removed in 2.0.0.
Returns:
Concatenated string of all text blocks in the message.
""" """
return "".join( text_value = "".join(
block["text"] for block in self.content if types.is_text_block(block) block["text"] for block in self.content if types.is_text_block(block)
) )
return cast("str", TextAccessor(text_value))
@dataclass @dataclass
@ -577,6 +688,7 @@ class SystemMessage:
type: Message type identifier, always ``'system'``. type: Message type identifier, always ``'system'``.
id: Unique identifier for the message. id: Unique identifier for the message.
content: List of content blocks containing system instructions. content: List of content blocks containing system instructions.
""" """
id: str id: str
@ -584,6 +696,7 @@ class SystemMessage:
If the provider assigns a meaningful ID, it should be used here. Otherwise, a If the provider assigns a meaningful ID, it should be used here. Otherwise, a
LangChain-generated ID will be used. LangChain-generated ID will be used.
""" """
content: list[types.ContentBlock] content: list[types.ContentBlock]
@ -594,6 +707,7 @@ class SystemMessage:
The purpose of this field is to allow for easy identification of the message type The purpose of this field is to allow for easy identification of the message type
when deserializing messages. when deserializing messages.
""" """
name: Optional[str] = None name: Optional[str] = None
@ -603,6 +717,7 @@ class SystemMessage:
Usage of this field is optional, and whether it's used or not is up to the Usage of this field is optional, and whether it's used or not is up to the
model implementation. model implementation.
""" """
custom_role: Optional[str] = None custom_role: Optional[str] = None
@ -612,6 +727,7 @@ class SystemMessage:
Integration packages may use this field to assign the system message role if it Integration packages may use this field to assign the system message role if it
contains a recognized value. contains a recognized value.
""" """
def __init__( def __init__(
@ -629,6 +745,7 @@ class SystemMessage:
id: Optional unique identifier for the message. id: Optional unique identifier for the message.
custom_role: If provided, a custom role for the system message. custom_role: If provided, a custom role for the system message.
name: Optional human-readable name for the message. name: Optional human-readable name for the message.
""" """
self.id = _ensure_id(id) self.id = _ensure_id(id)
if isinstance(content, str): if isinstance(content, str):
@ -638,11 +755,21 @@ class SystemMessage:
self.custom_role = custom_role self.custom_role = custom_role
self.name = name self.name = name
@property
def text(self) -> str: def text(self) -> str:
"""Extract all text content from the system message.""" """Extract all text content from the system message as a string.
return "".join(
Can be used as both property (``message.text``) and method (``message.text()``).
.. deprecated:: 0.4.0
Calling .text() as a method is deprecated. Use .text as a property instead.
This method will be removed in 2.0.0.
"""
text_value = "".join(
block["text"] for block in self.content if types.is_text_block(block) block["text"] for block in self.content if types.is_text_block(block)
) )
return cast("str", TextAccessor(text_value))
@dataclass @dataclass
@ -659,6 +786,7 @@ class ToolMessage(ToolOutputMixin):
content: The result content from tool execution. content: The result content from tool execution.
artifact: Optional app-side payload not intended for the model. artifact: Optional app-side payload not intended for the model.
status: Execution status ("success" or "error"). status: Execution status ("success" or "error").
""" """
id: str id: str
@ -668,6 +796,7 @@ class ToolMessage(ToolOutputMixin):
"""ID of the tool call this message responds to. """ID of the tool call this message responds to.
This should match the ID of the tool call that this message is responding to. This should match the ID of the tool call that this message is responding to.
""" """
content: list[types.ContentBlock] content: list[types.ContentBlock]
@ -678,10 +807,21 @@ class ToolMessage(ToolOutputMixin):
The purpose of this field is to allow for easy identification of the message type The purpose of this field is to allow for easy identification of the message type
when deserializing messages. when deserializing messages.
""" """
artifact: Optional[Any] = None artifact: Optional[Any] = None
"""App-side payload not for the model.""" """App-side payload not intended for the model.
Artifacts contain data that your application needs but should not be sent
to the AI model. Example use cases:
- File handles or database connections used by the tool
- Raw binary data (images, documents) alongside text summaries
- Internal debugging information or execution traces
- Computed results that supplement the text content
"""
name: Optional[str] = None name: Optional[str] = None
"""An optional name for the message. """An optional name for the message.
@ -690,6 +830,7 @@ class ToolMessage(ToolOutputMixin):
Usage of this field is optional, and whether it's used or not is up to the Usage of this field is optional, and whether it's used or not is up to the
model implementation. model implementation.
""" """
status: Literal["success", "error"] = "success" status: Literal["success", "error"] = "success"
@ -697,6 +838,7 @@ class ToolMessage(ToolOutputMixin):
Indicates whether the tool call was successful or encountered an error. Indicates whether the tool call was successful or encountered an error.
Defaults to "success". Defaults to "success".
""" """
def __init__( def __init__(
@ -718,6 +860,7 @@ class ToolMessage(ToolOutputMixin):
name: Optional human-readable name for the message. name: Optional human-readable name for the message.
artifact: Optional app-side payload not intended for the model. artifact: Optional app-side payload not intended for the model.
status: Execution status (``'success'`` or ``'error'``). status: Execution status (``'success'`` or ``'error'``).
""" """
self.id = _ensure_id(id) self.id = _ensure_id(id)
self.tool_call_id = tool_call_id self.tool_call_id = tool_call_id
@ -731,15 +874,25 @@ class ToolMessage(ToolOutputMixin):
@property @property
def text(self) -> str: def text(self) -> str:
"""Extract all text content from the tool message.""" """Extract all text content from the tool message as a string.
return "".join(
Can be used as both property (``message.text``) and method (``message.text()``).
.. deprecated:: 0.4.0
Calling .text() as a method is deprecated. Use .text as a property instead.
This method will be removed in 2.0.0.
"""
text_value = "".join(
block["text"] for block in self.content if types.is_text_block(block) block["text"] for block in self.content if types.is_text_block(block)
) )
return cast("str", TextAccessor(text_value))
def __post_init__(self) -> None: def __post_init__(self) -> None:
"""Initialize computed fields after dataclass creation. """Initialize computed fields after dataclass creation.
Ensures the tool message has a valid ID. Ensures the tool message has a valid ID.
""" """
self.id = _ensure_id(self.id) self.id = _ensure_id(self.id)

View File

@ -1439,9 +1439,17 @@
Contains additional information returned by the provider, such as Contains additional information returned by the provider, such as
response headers, service tiers, log probabilities, system fingerprints, etc. response headers, service tiers, log probabilities, system fingerprints, etc.
Extra keys are permitted from what is typed here (via ``total=False``), allowing **Extensibility Design:**
for provider-specific metadata to be included without breaking the type
definition. This uses ``total=False`` to allow arbitrary additional keys beyond the typed
fields below. This enables provider-specific metadata without breaking type safety:
- OpenAI might include: ``{"system_fingerprint": "fp_123", "logprobs": {...}}``
- Anthropic might include: ``{"stop_reason": "stop_sequence", "usage": {...}}``
- Custom providers can add their own fields
The common fields (``model_provider``, ``model_name``) provide a baseline
contract while preserving flexibility for provider innovations.
''', ''',
'properties': dict({ 'properties': dict({
'model_name': dict({ 'model_name': dict({

View File

@ -10444,9 +10444,17 @@
Contains additional information returned by the provider, such as Contains additional information returned by the provider, such as
response headers, service tiers, log probabilities, system fingerprints, etc. response headers, service tiers, log probabilities, system fingerprints, etc.
Extra keys are permitted from what is typed here (via ``total=False``), allowing **Extensibility Design:**
for provider-specific metadata to be included without breaking the type
definition. This uses ``total=False`` to allow arbitrary additional keys beyond the typed
fields below. This enables provider-specific metadata without breaking type safety:
- OpenAI might include: ``{"system_fingerprint": "fp_123", "logprobs": {...}}``
- Anthropic might include: ``{"stop_reason": "stop_sequence", "usage": {...}}``
- Custom providers can add their own fields
The common fields (``model_provider``, ``model_name``) provide a baseline
contract while preserving flexibility for provider innovations.
''', ''',
'properties': dict({ 'properties': dict({
'model_name': dict({ 'model_name': dict({

View File

@ -38,6 +38,9 @@ from langchain_core.messages.tool import tool_call_chunk as create_tool_call_chu
from langchain_core.utils._merge import merge_lists from langchain_core.utils._merge import merge_lists
from langchain_core.v1.messages import AIMessage as AIMessageV1 from langchain_core.v1.messages import AIMessage as AIMessageV1
from langchain_core.v1.messages import AIMessageChunk as AIMessageChunkV1 from langchain_core.v1.messages import AIMessageChunk as AIMessageChunkV1
from langchain_core.v1.messages import HumanMessage as HumanMessageV1
from langchain_core.v1.messages import SystemMessage as SystemMessageV1
from langchain_core.v1.messages import ToolMessage as ToolMessageV1
def test_message_init() -> None: def test_message_init() -> None:
@ -1385,3 +1388,56 @@ def test_known_block_types() -> None:
for t in expected for t in expected
} }
assert expected == KNOWN_BLOCK_TYPES assert expected == KNOWN_BLOCK_TYPES
def test_v1_text_accessor() -> None:
"""Test that v1 message.text property and .text() method return the same value."""
# Test HumanMessage
human_msg = HumanMessageV1(content="Hello world")
assert human_msg.text == "Hello world"
assert human_msg.text() == "Hello world" # type: ignore[operator]
assert str(human_msg.text) == human_msg.text() # type: ignore[operator]
# Test SystemMessage
system_msg = SystemMessageV1(content="You are a helpful assistant")
assert system_msg.text == "You are a helpful assistant"
assert system_msg.text() == "You are a helpful assistant" # type: ignore[operator]
assert str(system_msg.text) == system_msg.text() # type: ignore[operator]
# Test AIMessage
ai_msg = AIMessageV1(content="I can help you with that")
assert ai_msg.text == "I can help you with that"
assert ai_msg.text() == "I can help you with that" # type: ignore[operator]
assert str(ai_msg.text) == ai_msg.text() # type: ignore[operator]
# Test ToolMessage
tool_msg = ToolMessageV1(content="Task completed", tool_call_id="tool_1")
assert tool_msg.text == "Task completed"
assert tool_msg.text() == "Task completed" # type: ignore[operator]
assert str(tool_msg.text) == tool_msg.text() # type: ignore[operator]
# Test with complex content (list of content blocks)
complex_msg = HumanMessageV1(
content=[{"type": "text", "text": "Hello "}, {"type": "text", "text": "world"}]
)
assert complex_msg.text == "Hello world"
assert complex_msg.text() == "Hello world" # type: ignore[operator]
assert str(complex_msg.text) == complex_msg.text() # type: ignore[operator]
# Test with mixed content (text and non-text blocks)
mixed_msg = AIMessageV1(
content=[
{"type": "text", "text": "The answer is "},
{"type": "tool_call", "name": "calculate", "args": {"x": 2}, "id": "1"},
{"type": "text", "text": "42"},
]
)
assert mixed_msg.text == "The answer is 42"
assert mixed_msg.text() == "The answer is 42" # type: ignore[operator]
assert str(mixed_msg.text) == mixed_msg.text() # type: ignore[operator]
# Test empty content
empty_msg = HumanMessageV1(content=[])
assert empty_msg.text == ""
assert empty_msg.text() == "" # type: ignore[operator]
assert str(empty_msg.text) == empty_msg.text() # type: ignore[operator]