From 42c1159991657f40bb8161eb4cfae2e14abc2ab9 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Thu, 7 Aug 2025 12:16:31 -0400 Subject: [PATCH] feat: add TextAccessor, deprecate `.text()` as method (#32441) Adds backward compat for `.text()` on messages while keeping `.text` access _The kicker:_ Any previous use of `.text()` will now need a `# type: ignore[operator]` to silence type checkers. However, it will still behave as expected at runtime. Deprecating in v0.4.0, to be removed in v2.0.0. --- libs/core/langchain_core/v1/messages.py | 185 ++++++++++++++++-- .../runnables/__snapshots__/test_graph.ambr | 14 +- .../__snapshots__/test_runnable.ambr | 14 +- libs/core/tests/unit_tests/test_messages.py | 56 ++++++ 4 files changed, 247 insertions(+), 22 deletions(-) diff --git a/libs/core/langchain_core/v1/messages.py b/libs/core/langchain_core/v1/messages.py index 5c20ad552c8..451926b8351 100644 --- a/libs/core/langchain_core/v1/messages.py +++ b/libs/core/langchain_core/v1/messages.py @@ -2,6 +2,7 @@ Each message has content that may be comprised of content blocks, defined under ``langchain_core.messages.content_blocks``. + """ import uuid @@ -12,6 +13,7 @@ from pydantic import BaseModel from typing_extensions import TypedDict import langchain_core.messages.content_blocks as types +from langchain_core._api.deprecation import warn_deprecated from langchain_core.messages.ai import ( _LC_AUTO_PREFIX, _LC_ID_PREFIX, @@ -26,6 +28,57 @@ from langchain_core.utils._merge import merge_dicts from langchain_core.utils.json import parse_partial_json +class TextAccessor(str): + """String-like object that supports both property and method access patterns. + + Exists to maintain backward compatibility while transitioning from method-based to + property-based text access in message objects. In LangChain >> msg = AIMessage("Hello world") + >>> text = msg.text # Preferred: property access + >>> text = msg.text() # Deprecated: method access (shows warning) + + """ + + __slots__ = () + + def __new__(cls, value: str) -> "TextAccessor": + """Create new TextAccessor instance.""" + return str.__new__(cls, value) + + def __call__(self) -> str: + """Enable method-style text access for backward compatibility. + + This method exists solely to support legacy code that calls ``.text()`` + as a method. New code should use property access (``.text``) instead. + + .. deprecated:: 0.4.0 + Calling .text() as a method is deprecated. Use .text as a property instead. + This method will be removed in 2.0.0. + + Returns: + The string content, identical to property access. + + """ + warn_deprecated( + since="0.4.0", + message=( + "Calling .text() as a method is deprecated. " + "Use .text as a property instead (e.g., message.text)." + ), + removal="2.0.0", + ) + return str(self) + + def _ensure_id(id_val: Optional[str]) -> str: """Ensure the ID is a valid string, generating a new UUID if not provided. @@ -37,6 +90,7 @@ def _ensure_id(id_val: Optional[str]) -> str: Returns: A valid string ID, either the provided value or a new UUID. + """ return id_val or str(f"{_LC_AUTO_PREFIX}{uuid.uuid4()}") @@ -47,9 +101,18 @@ class ResponseMetadata(TypedDict, total=False): Contains additional information returned by the provider, such as response headers, service tiers, log probabilities, system fingerprints, etc. - Extra keys are permitted from what is typed here (via ``total=False``), allowing - for provider-specific metadata to be included without breaking the type - definition. + **Extensibility Design:** + + This uses ``total=False`` to allow arbitrary additional keys beyond the typed + fields below. This enables provider-specific metadata without breaking type safety: + + - OpenAI might include: ``{"system_fingerprint": "fp_123", "logprobs": {...}}`` + - Anthropic might include: ``{"stop_reason": "stop_sequence", "usage": {...}}`` + - Custom providers can add their own fields + + The common fields (``model_provider``, ``model_name``) provide a baseline + contract while preserving flexibility for provider innovations. + """ model_provider: str @@ -75,6 +138,7 @@ class AIMessage: tool_calls: Optional list of tool calls made by the AI. invalid_tool_calls: Optional list of tool calls that failed validation. usage: Optional dictionary containing usage statistics. + """ type: Literal["ai"] = "ai" @@ -82,6 +146,7 @@ class AIMessage: The purpose of this field is to allow for easy identification of the message type when deserializing messages. + """ name: Optional[str] = None @@ -91,6 +156,7 @@ class AIMessage: Usage of this field is optional, and whether it's used or not is up to the model implementation. + """ id: Optional[str] = None @@ -98,6 +164,7 @@ class AIMessage: If the provider assigns a meaningful ID, it should be used here. Otherwise, a LangChain-generated ID will be used. + """ lc_version: str = "v1" @@ -116,6 +183,7 @@ class AIMessage: This field should include non-standard data returned by the provider, such as response headers, service tiers, or log probabilities. + """ parsed: Optional[Union[dict[str, Any], BaseModel]] = None @@ -147,6 +215,7 @@ class AIMessage: be added to the content list. invalid_tool_calls: Optional list of tool calls that failed validation. parsed: Optional auto-parsed message contents, if applicable. + """ if isinstance(content, str): self.content = [types.create_text_block(content)] @@ -196,10 +265,19 @@ class AIMessage: @property def text(self) -> str: - """Extract all text content from the AI message as a string.""" - return "".join( + """Extract all text content from the AI message as a string. + + Can be used as both property (``message.text``) and method (``message.text()``). + + .. deprecated:: 0.4.0 + Calling .text() as a method is deprecated. Use .text as a property instead. + This method will be removed in 2.0.0. + + """ + text_value = "".join( block["text"] for block in self.content if types.is_text_block(block) ) + return cast("str", TextAccessor(text_value)) @property def tool_calls(self) -> list[types.ToolCall]: @@ -232,7 +310,26 @@ class AIMessageChunk(AIMessage): """A partial chunk of an AI message during streaming. Represents a portion of an AI response that is delivered incrementally - during streaming generation. Contains partial content and metadata. + during streaming generation. When AI providers stream responses token-by-token, + each chunk contains partial content that gets accumulated into a complete message. + + **Streaming Workflow:** + + 1. Provider streams partial responses as ``AIMessageChunk`` objects + 2. Chunks are accumulated: ``chunk1 + chunk2 + ...`` + 3. Final accumulated chunk can be converted to ``AIMessage`` via ``.to_message()`` + + **Tool Call Handling:** + + During streaming, tool calls arrive as ``ToolCallChunk`` objects with partial + JSON. When chunks are accumulated, the final chunk (marked with + ``chunk_position="last"``) triggers parsing of complete tool calls from the + accumulated JSON strings. + + **Content Merging:** + + Content blocks are merged intelligently - text blocks combine their strings, + tool call chunks accumulate arguments, and other blocks are concatenated. Attributes: type: Message type identifier, always ``'ai_chunk'``. @@ -241,6 +338,7 @@ class AIMessageChunk(AIMessage): content: List of content blocks containing partial message data. tool_call_chunks: Optional list of partial tool call data. usage_metadata: Optional metadata about token usage and costs. + """ type: Literal["ai_chunk"] = "ai_chunk" # type: ignore[assignment] @@ -248,6 +346,7 @@ class AIMessageChunk(AIMessage): The purpose of this field is to allow for easy identification of the message type when deserializing messages. + """ def __init__( @@ -274,8 +373,9 @@ class AIMessageChunk(AIMessage): usage_metadata: Optional metadata about token usage. tool_call_chunks: Optional list of partial tool call data. parsed: Optional auto-parsed message contents, if applicable. - chunk_position: Optional position of the chunk in the stream. If "last", + chunk_position: Optional position of the chunk in the stream. If ``'last'``, tool calls will be parsed when aggregated into a stream. + """ if isinstance(content, str): self.content = [{"type": "text", "text": content, "index": 0}] @@ -506,6 +606,7 @@ class HumanMessage: id: Unique identifier for the message. content: List of content blocks containing the user's input. name: Optional human-readable name for the message. + """ id: str @@ -513,6 +614,7 @@ class HumanMessage: If the provider assigns a meaningful ID, it should be used here. Otherwise, a LangChain-generated ID will be used. + """ content: list[types.ContentBlock] @@ -523,6 +625,7 @@ class HumanMessage: The purpose of this field is to allow for easy identification of the message type when deserializing messages. + """ name: Optional[str] = None @@ -532,6 +635,7 @@ class HumanMessage: Usage of this field is optional, and whether it's used or not is up to the model implementation. + """ def __init__( @@ -547,6 +651,7 @@ class HumanMessage: content: Message content as string or list of content blocks. id: Optional unique identifier for the message. name: Optional human-readable name for the message. + """ self.id = _ensure_id(id) if isinstance(content, str): @@ -555,15 +660,21 @@ class HumanMessage: self.content = content self.name = name + @property def text(self) -> str: - """Extract all text content from the message. + """Extract all text content from the message as a string. + + Can be used as both property (``message.text``) and method (``message.text()``). + + .. deprecated:: 0.4.0 + Calling .text() as a method is deprecated. Use .text as a property instead. + This method will be removed in 2.0.0. - Returns: - Concatenated string of all text blocks in the message. """ - return "".join( + text_value = "".join( block["text"] for block in self.content if types.is_text_block(block) ) + return cast("str", TextAccessor(text_value)) @dataclass @@ -577,6 +688,7 @@ class SystemMessage: type: Message type identifier, always ``'system'``. id: Unique identifier for the message. content: List of content blocks containing system instructions. + """ id: str @@ -584,6 +696,7 @@ class SystemMessage: If the provider assigns a meaningful ID, it should be used here. Otherwise, a LangChain-generated ID will be used. + """ content: list[types.ContentBlock] @@ -594,6 +707,7 @@ class SystemMessage: The purpose of this field is to allow for easy identification of the message type when deserializing messages. + """ name: Optional[str] = None @@ -603,6 +717,7 @@ class SystemMessage: Usage of this field is optional, and whether it's used or not is up to the model implementation. + """ custom_role: Optional[str] = None @@ -612,6 +727,7 @@ class SystemMessage: Integration packages may use this field to assign the system message role if it contains a recognized value. + """ def __init__( @@ -629,6 +745,7 @@ class SystemMessage: id: Optional unique identifier for the message. custom_role: If provided, a custom role for the system message. name: Optional human-readable name for the message. + """ self.id = _ensure_id(id) if isinstance(content, str): @@ -638,11 +755,21 @@ class SystemMessage: self.custom_role = custom_role self.name = name + @property def text(self) -> str: - """Extract all text content from the system message.""" - return "".join( + """Extract all text content from the system message as a string. + + Can be used as both property (``message.text``) and method (``message.text()``). + + .. deprecated:: 0.4.0 + Calling .text() as a method is deprecated. Use .text as a property instead. + This method will be removed in 2.0.0. + + """ + text_value = "".join( block["text"] for block in self.content if types.is_text_block(block) ) + return cast("str", TextAccessor(text_value)) @dataclass @@ -659,6 +786,7 @@ class ToolMessage(ToolOutputMixin): content: The result content from tool execution. artifact: Optional app-side payload not intended for the model. status: Execution status ("success" or "error"). + """ id: str @@ -668,6 +796,7 @@ class ToolMessage(ToolOutputMixin): """ID of the tool call this message responds to. This should match the ID of the tool call that this message is responding to. + """ content: list[types.ContentBlock] @@ -678,10 +807,21 @@ class ToolMessage(ToolOutputMixin): The purpose of this field is to allow for easy identification of the message type when deserializing messages. + """ artifact: Optional[Any] = None - """App-side payload not for the model.""" + """App-side payload not intended for the model. + + Artifacts contain data that your application needs but should not be sent + to the AI model. Example use cases: + + - File handles or database connections used by the tool + - Raw binary data (images, documents) alongside text summaries + - Internal debugging information or execution traces + - Computed results that supplement the text content + + """ name: Optional[str] = None """An optional name for the message. @@ -690,6 +830,7 @@ class ToolMessage(ToolOutputMixin): Usage of this field is optional, and whether it's used or not is up to the model implementation. + """ status: Literal["success", "error"] = "success" @@ -697,6 +838,7 @@ class ToolMessage(ToolOutputMixin): Indicates whether the tool call was successful or encountered an error. Defaults to "success". + """ def __init__( @@ -718,6 +860,7 @@ class ToolMessage(ToolOutputMixin): name: Optional human-readable name for the message. artifact: Optional app-side payload not intended for the model. status: Execution status (``'success'`` or ``'error'``). + """ self.id = _ensure_id(id) self.tool_call_id = tool_call_id @@ -731,15 +874,25 @@ class ToolMessage(ToolOutputMixin): @property def text(self) -> str: - """Extract all text content from the tool message.""" - return "".join( + """Extract all text content from the tool message as a string. + + Can be used as both property (``message.text``) and method (``message.text()``). + + .. deprecated:: 0.4.0 + Calling .text() as a method is deprecated. Use .text as a property instead. + This method will be removed in 2.0.0. + + """ + text_value = "".join( block["text"] for block in self.content if types.is_text_block(block) ) + return cast("str", TextAccessor(text_value)) def __post_init__(self) -> None: """Initialize computed fields after dataclass creation. Ensures the tool message has a valid ID. + """ self.id = _ensure_id(self.id) diff --git a/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr b/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr index d9f1ec1204d..56df846c956 100644 --- a/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr +++ b/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr @@ -1439,9 +1439,17 @@ Contains additional information returned by the provider, such as response headers, service tiers, log probabilities, system fingerprints, etc. - Extra keys are permitted from what is typed here (via ``total=False``), allowing - for provider-specific metadata to be included without breaking the type - definition. + **Extensibility Design:** + + This uses ``total=False`` to allow arbitrary additional keys beyond the typed + fields below. This enables provider-specific metadata without breaking type safety: + + - OpenAI might include: ``{"system_fingerprint": "fp_123", "logprobs": {...}}`` + - Anthropic might include: ``{"stop_reason": "stop_sequence", "usage": {...}}`` + - Custom providers can add their own fields + + The common fields (``model_provider``, ``model_name``) provide a baseline + contract while preserving flexibility for provider innovations. ''', 'properties': dict({ 'model_name': dict({ diff --git a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr index dadbdb103da..7a2c7b40621 100644 --- a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr +++ b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr @@ -10444,9 +10444,17 @@ Contains additional information returned by the provider, such as response headers, service tiers, log probabilities, system fingerprints, etc. - Extra keys are permitted from what is typed here (via ``total=False``), allowing - for provider-specific metadata to be included without breaking the type - definition. + **Extensibility Design:** + + This uses ``total=False`` to allow arbitrary additional keys beyond the typed + fields below. This enables provider-specific metadata without breaking type safety: + + - OpenAI might include: ``{"system_fingerprint": "fp_123", "logprobs": {...}}`` + - Anthropic might include: ``{"stop_reason": "stop_sequence", "usage": {...}}`` + - Custom providers can add their own fields + + The common fields (``model_provider``, ``model_name``) provide a baseline + contract while preserving flexibility for provider innovations. ''', 'properties': dict({ 'model_name': dict({ diff --git a/libs/core/tests/unit_tests/test_messages.py b/libs/core/tests/unit_tests/test_messages.py index ea3ac1fb7d5..89c77414dc0 100644 --- a/libs/core/tests/unit_tests/test_messages.py +++ b/libs/core/tests/unit_tests/test_messages.py @@ -38,6 +38,9 @@ from langchain_core.messages.tool import tool_call_chunk as create_tool_call_chu from langchain_core.utils._merge import merge_lists from langchain_core.v1.messages import AIMessage as AIMessageV1 from langchain_core.v1.messages import AIMessageChunk as AIMessageChunkV1 +from langchain_core.v1.messages import HumanMessage as HumanMessageV1 +from langchain_core.v1.messages import SystemMessage as SystemMessageV1 +from langchain_core.v1.messages import ToolMessage as ToolMessageV1 def test_message_init() -> None: @@ -1385,3 +1388,56 @@ def test_known_block_types() -> None: for t in expected } assert expected == KNOWN_BLOCK_TYPES + + +def test_v1_text_accessor() -> None: + """Test that v1 message.text property and .text() method return the same value.""" + # Test HumanMessage + human_msg = HumanMessageV1(content="Hello world") + assert human_msg.text == "Hello world" + assert human_msg.text() == "Hello world" # type: ignore[operator] + assert str(human_msg.text) == human_msg.text() # type: ignore[operator] + + # Test SystemMessage + system_msg = SystemMessageV1(content="You are a helpful assistant") + assert system_msg.text == "You are a helpful assistant" + assert system_msg.text() == "You are a helpful assistant" # type: ignore[operator] + assert str(system_msg.text) == system_msg.text() # type: ignore[operator] + + # Test AIMessage + ai_msg = AIMessageV1(content="I can help you with that") + assert ai_msg.text == "I can help you with that" + assert ai_msg.text() == "I can help you with that" # type: ignore[operator] + assert str(ai_msg.text) == ai_msg.text() # type: ignore[operator] + + # Test ToolMessage + tool_msg = ToolMessageV1(content="Task completed", tool_call_id="tool_1") + assert tool_msg.text == "Task completed" + assert tool_msg.text() == "Task completed" # type: ignore[operator] + assert str(tool_msg.text) == tool_msg.text() # type: ignore[operator] + + # Test with complex content (list of content blocks) + complex_msg = HumanMessageV1( + content=[{"type": "text", "text": "Hello "}, {"type": "text", "text": "world"}] + ) + assert complex_msg.text == "Hello world" + assert complex_msg.text() == "Hello world" # type: ignore[operator] + assert str(complex_msg.text) == complex_msg.text() # type: ignore[operator] + + # Test with mixed content (text and non-text blocks) + mixed_msg = AIMessageV1( + content=[ + {"type": "text", "text": "The answer is "}, + {"type": "tool_call", "name": "calculate", "args": {"x": 2}, "id": "1"}, + {"type": "text", "text": "42"}, + ] + ) + assert mixed_msg.text == "The answer is 42" + assert mixed_msg.text() == "The answer is 42" # type: ignore[operator] + assert str(mixed_msg.text) == mixed_msg.text() # type: ignore[operator] + + # Test empty content + empty_msg = HumanMessageV1(content=[]) + assert empty_msg.text == "" + assert empty_msg.text() == "" # type: ignore[operator] + assert str(empty_msg.text) == empty_msg.text() # type: ignore[operator]