From d8996810403bdf4639738e044e209417caf7c401 Mon Sep 17 00:00:00 2001
From: Weichen Zhao <61238101+SeasonPilot@users.noreply.github.com>
Date: Fri, 23 Jan 2026 02:33:08 +0800
Subject: [PATCH] feat(core): add XML format option for `get_buffer_string`
(#34802)
## Summary
Add XML format option for `get_buffer_string()` to provide unambiguous
message serialization. This fixes role prefix ambiguity when message
content contains strings like "Human:" or "AI:".
Fixes #34786
## Changes
- Add `format="xml"` parameter with proper XML escaping using
`quoteattr()` for attributes
- Add explicit validation for format parameter (raises `ValueError` for
invalid values)
- Add comprehensive tests for XML format edge cases
---------
Co-authored-by: Mason Daugherty
Co-authored-by: Mason Daugherty
---
libs/core/langchain_core/messages/utils.py | 357 ++++++-
.../tests/unit_tests/messages/test_utils.py | 883 ++++++++++++++++++
2 files changed, 1229 insertions(+), 11 deletions(-)
diff --git a/libs/core/langchain_core/messages/utils.py b/libs/core/langchain_core/messages/utils.py
index 609d8045ac2..d4659bf0bb9 100644
--- a/libs/core/langchain_core/messages/utils.py
+++ b/libs/core/langchain_core/messages/utils.py
@@ -28,6 +28,7 @@ from typing import (
cast,
overload,
)
+from xml.sax.saxutils import escape, quoteattr
from pydantic import Discriminator, Field, Tag
@@ -98,11 +99,199 @@ AnyMessage = Annotated[
"""A type representing any defined `Message` or `MessageChunk` type."""
+def _has_base64_data(block: dict) -> bool:
+ """Check if a content block contains base64 encoded data.
+
+ Args:
+ block: A content block dictionary.
+
+ Returns:
+ Whether the block contains base64 data.
+ """
+ # Check for explicit base64 field (standard content blocks)
+ if block.get("base64"):
+ return True
+
+ # Check for data: URL in url field
+ url = block.get("url", "")
+ if isinstance(url, str) and url.startswith("data:"):
+ return True
+
+ # Check for OpenAI-style image_url with data: URL
+ image_url = block.get("image_url", {})
+ if isinstance(image_url, dict):
+ url = image_url.get("url", "")
+ if isinstance(url, str) and url.startswith("data:"):
+ return True
+
+ return False
+
+
+_XML_CONTENT_BLOCK_MAX_LEN = 500
+
+
+def _truncate(text: str, max_len: int = _XML_CONTENT_BLOCK_MAX_LEN) -> str:
+ """Truncate text to `max_len` characters, adding ellipsis if truncated."""
+ if len(text) <= max_len:
+ return text
+ return text[:max_len] + "..."
+
+
+def _format_content_block_xml(block: dict) -> str | None:
+ """Format a content block as XML.
+
+ Args:
+ block: A LangChain content block.
+
+ Returns:
+ XML string representation of the block, or `None` if the block should be
+ skipped.
+
+ Note:
+ Plain text document content, server tool call arguments, and server tool
+ result outputs are truncated to 500 characters.
+ """
+ block_type = block.get("type", "")
+
+ # Skip blocks with base64 encoded data
+ if _has_base64_data(block):
+ return None
+
+ # Text blocks
+ if block_type == "text":
+ text = block.get("text", "")
+ return escape(text) if text else None
+
+ # Reasoning blocks
+ if block_type == "reasoning":
+ reasoning = block.get("reasoning", "")
+ if reasoning:
+ return f"{escape(reasoning)}"
+ return None
+
+ # Image blocks (URL only, base64 already filtered)
+ if block_type == "image":
+ url = block.get("url")
+ file_id = block.get("file_id")
+ if url:
+ return f""
+ if file_id:
+ return f""
+ return None
+
+ # OpenAI-style image_url blocks
+ if block_type == "image_url":
+ image_url = block.get("image_url", {})
+ if isinstance(image_url, dict):
+ url = image_url.get("url", "")
+ if url and not url.startswith("data:"):
+ return f""
+ return None
+
+ # Audio blocks (URL only)
+ if block_type == "audio":
+ url = block.get("url")
+ file_id = block.get("file_id")
+ if url:
+ return f""
+ if file_id:
+ return f""
+ return None
+
+ # Video blocks (URL only)
+ if block_type == "video":
+ url = block.get("url")
+ file_id = block.get("file_id")
+ if url:
+ return f""
+ if file_id:
+ return f""
+ return None
+
+ # Plain text document blocks
+ if block_type == "text-plain":
+ text = block.get("text", "")
+ return escape(_truncate(text)) if text else None
+
+ # Server tool call blocks (from AI messages)
+ if block_type == "server_tool_call":
+ tc_id = quoteattr(str(block.get("id") or ""))
+ tc_name = quoteattr(str(block.get("name") or ""))
+ tc_args_json = json.dumps(block.get("args", {}), ensure_ascii=False)
+ tc_args = escape(_truncate(tc_args_json))
+ return (
+ f"{tc_args}"
+ )
+
+ # Server tool result blocks
+ if block_type == "server_tool_result":
+ tool_call_id = quoteattr(str(block.get("tool_call_id") or ""))
+ status = quoteattr(str(block.get("status") or ""))
+ output = block.get("output")
+ if output:
+ output_json = json.dumps(output, ensure_ascii=False)
+ output_str = escape(_truncate(output_json))
+ else:
+ output_str = ""
+ return (
+ f""
+ f"{output_str}"
+ )
+
+ # Unknown block type - skip silently
+ return None
+
+
+def _get_message_type_str(
+ m: BaseMessage,
+ human_prefix: str,
+ ai_prefix: str,
+ system_prefix: str,
+ function_prefix: str,
+ tool_prefix: str,
+) -> str:
+ """Get the type string for XML message element.
+
+ Args:
+ m: The message to get the type string for.
+ human_prefix: The prefix to use for `HumanMessage`.
+ ai_prefix: The prefix to use for `AIMessage`.
+ system_prefix: The prefix to use for `SystemMessage`.
+ function_prefix: The prefix to use for `FunctionMessage`.
+ tool_prefix: The prefix to use for `ToolMessage`.
+
+ Returns:
+ The type string for the message element.
+
+ Raises:
+ ValueError: If an unsupported message type is encountered.
+ """
+ if isinstance(m, HumanMessage):
+ return human_prefix.lower()
+ if isinstance(m, AIMessage):
+ return ai_prefix.lower()
+ if isinstance(m, SystemMessage):
+ return system_prefix.lower()
+ if isinstance(m, FunctionMessage):
+ return function_prefix.lower()
+ if isinstance(m, ToolMessage):
+ return tool_prefix.lower()
+ if isinstance(m, ChatMessage):
+ return m.role
+ msg = f"Got unsupported message type: {m}"
+ raise ValueError(msg)
+
+
def get_buffer_string(
messages: Sequence[BaseMessage],
human_prefix: str = "Human",
ai_prefix: str = "AI",
+ *,
+ system_prefix: str = "System",
+ function_prefix: str = "Function",
+ tool_prefix: str = "Tool",
message_separator: str = "\n",
+ format: Literal["prefix", "xml"] = "prefix", # noqa: A002
) -> str:
r"""Convert a sequence of messages to strings and concatenate them into one string.
@@ -110,7 +299,15 @@ def get_buffer_string(
messages: Messages to be converted to strings.
human_prefix: The prefix to prepend to contents of `HumanMessage`s.
ai_prefix: The prefix to prepend to contents of `AIMessage`.
+ system_prefix: The prefix to prepend to contents of `SystemMessage`s.
+ function_prefix: The prefix to prepend to contents of `FunctionMessage`s.
+ tool_prefix: The prefix to prepend to contents of `ToolMessage`s.
message_separator: The separator to use between messages.
+ format: The output format. `'prefix'` uses `Role: content` format (default).
+
+ `'xml'` uses XML-style `` format with proper character
+ escaping, which is useful when message content may contain role-like
+ prefixes that could cause ambiguity.
Returns:
A single string concatenation of all input messages.
@@ -123,9 +320,33 @@ def get_buffer_string(
and a function call under `additional_kwargs["function_call"]`, only the tool
calls will be appended to the string representation.
+ When using `format='xml'`:
+
+ - All messages use uniform `content` format.
+ - The `type` attribute uses `human_prefix` (lowercased) for `HumanMessage`,
+ `ai_prefix` (lowercased) for `AIMessage`, `system_prefix` (lowercased)
+ for `SystemMessage`, `function_prefix` (lowercased) for `FunctionMessage`,
+ `tool_prefix` (lowercased) for `ToolMessage`, and the original role
+ (unchanged) for `ChatMessage`.
+ - Message content is escaped using `xml.sax.saxutils.escape()`.
+ - Attribute values are escaped using `xml.sax.saxutils.quoteattr()`.
+ - AI messages with tool calls use nested structure with `` and
+ `` elements.
+ - For multi-modal content (list of content blocks), supported block types
+ are: `text`, `reasoning`, `image` (URL/file_id only), `image_url`
+ (OpenAI-style, URL only), `audio` (URL/file_id only), `video` (URL/file_id
+ only), `text-plain`, `server_tool_call`, and `server_tool_result`.
+ - Content blocks with base64-encoded data are skipped (including blocks
+ with `base64` field or `data:` URLs).
+ - Unknown block types are skipped.
+ - Plain text document content (`text-plain`), server tool call arguments,
+ and server tool result outputs are truncated to 500 characters.
+
Example:
+ Default prefix format:
+
```python
- from langchain_core import AIMessage, HumanMessage
+ from langchain_core.messages import AIMessage, HumanMessage, get_buffer_string
messages = [
HumanMessage(content="Hi, how are you?"),
@@ -134,7 +355,54 @@ def get_buffer_string(
get_buffer_string(messages)
# -> "Human: Hi, how are you?\nAI: Good, how are you?"
```
+
+ XML format (useful when content contains role-like prefixes):
+
+ ```python
+ messages = [
+ HumanMessage(content="Example: Human: some text"),
+ AIMessage(content="I see the example."),
+ ]
+ get_buffer_string(messages, format="xml")
+ # -> 'Example: Human: some text\\n'
+ # -> 'I see the example.'
+ ```
+
+ XML format with special characters (automatically escaped):
+
+ ```python
+ messages = [
+ HumanMessage(content="Is 5 < 10 & 10 > 5?"),
+ ]
+ get_buffer_string(messages, format="xml")
+ # -> 'Is 5 < 10 & 10 > 5?'
+ ```
+
+ XML format with tool calls:
+
+ ```python
+ messages = [
+ AIMessage(
+ content="I'll search for that.",
+ tool_calls=[
+ {"id": "call_123", "name": "search", "args": {"query": "weather"}}
+ ],
+ ),
+ ]
+ get_buffer_string(messages, format="xml")
+ # -> '\\n'
+ # -> ' I\\'ll search for that.\\n'
+ # -> ' '
+ # -> '{"query": "weather"}\\n'
+ # -> ''
+ ```
"""
+ if format not in ("prefix", "xml"):
+ msg = (
+ f"Unrecognized format={format!r}. Supported formats are 'prefix' and 'xml'."
+ )
+ raise ValueError(msg)
+
string_messages = []
for m in messages:
if isinstance(m, HumanMessage):
@@ -142,25 +410,92 @@ def get_buffer_string(
elif isinstance(m, AIMessage):
role = ai_prefix
elif isinstance(m, SystemMessage):
- role = "System"
+ role = system_prefix
elif isinstance(m, FunctionMessage):
- role = "Function"
+ role = function_prefix
elif isinstance(m, ToolMessage):
- role = "Tool"
+ role = tool_prefix
elif isinstance(m, ChatMessage):
role = m.role
else:
msg = f"Got unsupported message type: {m}"
raise ValueError(msg) # noqa: TRY004
- message = f"{role}: {m.text}"
+ if format == "xml":
+ msg_type = _get_message_type_str(
+ m, human_prefix, ai_prefix, system_prefix, function_prefix, tool_prefix
+ )
- if isinstance(m, AIMessage):
- if m.tool_calls:
- message += f"{m.tool_calls}"
- elif "function_call" in m.additional_kwargs:
- # Legacy behavior assumes only one function call per message
- message += f"{m.additional_kwargs['function_call']}"
+ # Format content blocks
+ if isinstance(m.content, str):
+ content_parts = [escape(m.content)] if m.content else []
+ else:
+ # List of content blocks
+ content_parts = []
+ for block in m.content:
+ if isinstance(block, str):
+ if block:
+ content_parts.append(escape(block))
+ else:
+ formatted = _format_content_block_xml(block)
+ if formatted:
+ content_parts.append(formatted)
+
+ # Check if this is an AIMessage with tool calls
+ has_tool_calls = isinstance(m, AIMessage) and m.tool_calls
+ has_function_call = (
+ isinstance(m, AIMessage)
+ and not m.tool_calls
+ and "function_call" in m.additional_kwargs
+ )
+
+ if has_tool_calls or has_function_call:
+ # Use nested structure for AI messages with tool calls
+ # Type narrowing: at this point m is AIMessage (verified above)
+ ai_msg = cast("AIMessage", m)
+ parts = [f""]
+ if content_parts:
+ parts.append(f" {' '.join(content_parts)}")
+
+ if has_tool_calls:
+ for tc in ai_msg.tool_calls:
+ tc_id = quoteattr(str(tc.get("id") or ""))
+ tc_name = quoteattr(str(tc.get("name") or ""))
+ tc_args = escape(
+ json.dumps(tc.get("args", {}), ensure_ascii=False)
+ )
+ parts.append(
+ f" "
+ f"{tc_args}"
+ )
+ elif has_function_call:
+ fc = ai_msg.additional_kwargs["function_call"]
+ fc_name = quoteattr(str(fc.get("name") or ""))
+ fc_args = escape(str(fc.get("arguments") or "{}"))
+ parts.append(
+ f" {fc_args}"
+ )
+
+ parts.append("")
+ message = "\n".join(parts)
+ else:
+ # Simple structure for messages without tool calls
+ joined_content = " ".join(content_parts)
+ message = (
+ f"{joined_content}"
+ )
+ else: # format == "prefix"
+ content = m.text
+ message = f"{role}: {content}"
+ tool_info = ""
+ if isinstance(m, AIMessage):
+ if m.tool_calls:
+ tool_info = str(m.tool_calls)
+ elif "function_call" in m.additional_kwargs:
+ # Legacy behavior assumes only one function call per message
+ tool_info = str(m.additional_kwargs["function_call"])
+ if tool_info:
+ message += tool_info # Preserve original behavior
string_messages.append(message)
diff --git a/libs/core/tests/unit_tests/messages/test_utils.py b/libs/core/tests/unit_tests/messages/test_utils.py
index 72a7ed46483..f206db83cd6 100644
--- a/libs/core/tests/unit_tests/messages/test_utils.py
+++ b/libs/core/tests/unit_tests/messages/test_utils.py
@@ -11,6 +11,8 @@ from langchain_core.language_models.fake_chat_models import FakeChatModel
from langchain_core.messages import (
AIMessage,
BaseMessage,
+ ChatMessage,
+ FunctionMessage,
HumanMessage,
SystemMessage,
ToolCall,
@@ -1778,3 +1780,884 @@ def test_convert_to_openai_messages_reasoning_content() -> None:
],
}
assert mixed_result == expected_mixed
+
+
+# Tests for get_buffer_string XML format
+
+
+def test_get_buffer_string_xml_empty_messages_list() -> None:
+ """Test XML format with empty messages list."""
+ messages: list[BaseMessage] = []
+ result = get_buffer_string(messages, format="xml")
+ expected = ""
+ assert result == expected
+
+
+def test_get_buffer_string_xml_basic() -> None:
+ """Test XML format output with all message types."""
+ messages = [
+ SystemMessage(content="System message"),
+ HumanMessage(content="Human message"),
+ AIMessage(content="AI message"),
+ FunctionMessage(content="Function result", name="test_fn"),
+ ToolMessage(content="Tool result", tool_call_id="123"),
+ ]
+ result = get_buffer_string(messages, format="xml")
+ expected = (
+ 'System message\n'
+ 'Human message\n'
+ 'AI message\n'
+ 'Function result\n'
+ 'Tool result'
+ )
+ assert result == expected
+
+
+def test_get_buffer_string_xml_custom_prefixes() -> None:
+ """Test XML format with custom human and ai prefixes."""
+ messages = [
+ HumanMessage(content="Hello"),
+ AIMessage(content="Hi there"),
+ ]
+ result = get_buffer_string(
+ messages, human_prefix="User", ai_prefix="Assistant", format="xml"
+ )
+ expected = (
+ 'Hello\n'
+ 'Hi there'
+ )
+ assert result == expected
+
+
+def test_get_buffer_string_xml_custom_separator() -> None:
+ """Test XML format with custom message separator."""
+ messages = [
+ HumanMessage(content="Hello"),
+ AIMessage(content="Hi there"),
+ ]
+ result = get_buffer_string(messages, format="xml", message_separator="\n\n")
+ expected = (
+ 'Hello\n\nHi there'
+ )
+ assert result == expected
+
+
+def test_get_buffer_string_prefix_custom_separator() -> None:
+ """Test prefix format with custom message separator."""
+ messages = [
+ HumanMessage(content="Hello"),
+ AIMessage(content="Hi there"),
+ ]
+ result = get_buffer_string(messages, format="prefix", message_separator=" | ")
+ expected = "Human: Hello | AI: Hi there"
+ assert result == expected
+
+
+def test_get_buffer_string_xml_escaping() -> None:
+ """Test XML format properly escapes special characters in content."""
+ messages = [
+ HumanMessage(content="Is 5 < 10 & 10 > 5?"),
+ AIMessage(content='Yes, and here\'s a "quote"'),
+ ]
+ result = get_buffer_string(messages, format="xml")
+ # xml.sax.saxutils.escape escapes <, >, & (not quotes in content)
+ expected = (
+ 'Is 5 < 10 & 10 > 5?\n'
+ 'Yes, and here\'s a "quote"'
+ )
+ assert result == expected
+
+
+def test_get_buffer_string_xml_unicode_content() -> None:
+ """Test XML format with Unicode content."""
+ messages = [
+ HumanMessage(content="你好世界"), # Chinese: Hello World
+ AIMessage(content="こんにちは"), # Japanese: Hello
+ ]
+ result = get_buffer_string(messages, format="xml")
+ expected = (
+ '你好世界\n'
+ 'こんにちは'
+ )
+ assert result == expected
+
+
+def test_get_buffer_string_xml_chat_message_valid_role() -> None:
+ """Test XML format with `ChatMessage` having valid XML tag name role."""
+ messages = [
+ ChatMessage(content="Hello", role="Assistant"),
+ ]
+ result = get_buffer_string(messages, format="xml")
+ # Role is used directly as the type attribute value
+ expected = 'Hello'
+ assert result == expected
+
+ # Spaces in role
+ messages = [
+ ChatMessage(content="Hello", role="my custom role"),
+ ]
+ result = get_buffer_string(messages, format="xml")
+ # Custom roles with spaces use quoteattr for proper escaping
+ expected = 'Hello'
+ assert result == expected
+
+ # Special characters in role
+ messages = [
+ ChatMessage(content="Hello", role='role"with'),
+ ]
+ result = get_buffer_string(messages, format="xml")
+ # quoteattr handles escaping of special characters in attribute values
+ # Note: quoteattr uses single quotes when the string contains double quotes
+ expected = """Hello"""
+ assert result == expected
+
+
+def test_get_buffer_string_xml_empty_content() -> None:
+ """Test XML format with empty content."""
+ messages = [
+ HumanMessage(content=""),
+ AIMessage(content=""),
+ ]
+ result = get_buffer_string(messages, format="xml")
+ expected = '\n'
+ assert result == expected
+
+
+def test_get_buffer_string_xml_tool_calls_with_content() -> None:
+ """Test XML format with `AIMessage` having both `content` and `tool_calls`."""
+ messages = [
+ AIMessage(
+ content="Let me check that",
+ tool_calls=[
+ {
+ "name": "get_weather",
+ "args": {"city": "NYC"},
+ "id": "call_1",
+ "type": "tool_call",
+ }
+ ],
+ ),
+ ]
+ result = get_buffer_string(messages, format="xml")
+ # Nested structure with content and tool_call elements
+ expected = (
+ '\n'
+ " Let me check that\n"
+ ' {"city": "NYC"}\n'
+ ""
+ )
+ assert result == expected
+
+
+def test_get_buffer_string_xml_tool_calls_empty_content() -> None:
+ """Test XML format with `AIMessage` having empty `content` and `tool_calls`."""
+ messages = [
+ AIMessage(
+ content="",
+ tool_calls=[
+ {
+ "name": "search",
+ "args": {"query": "test"},
+ "id": "call_2",
+ "type": "tool_call",
+ }
+ ],
+ ),
+ ]
+ result = get_buffer_string(messages, format="xml")
+ # No content element when content is empty
+ expected = (
+ '\n'
+ ' {"query": "test"}\n'
+ ""
+ )
+ assert result == expected
+
+
+def test_get_buffer_string_xml_tool_calls_escaping() -> None:
+ """Test XML format escapes special characters in tool calls."""
+ messages = [
+ AIMessage(
+ content="",
+ tool_calls=[
+ {
+ "name": "calculate",
+ "args": {"expression": "5 < 10 & 10 > 5"},
+ "id": "call_3",
+ "type": "tool_call",
+ }
+ ],
+ ),
+ ]
+ result = get_buffer_string(messages, format="xml")
+ # Special characters in tool_calls args should be escaped
+ assert "<" in result
+ assert ">" in result
+ assert "&" in result
+ # Verify overall structure
+ assert result.startswith('')
+ assert result.endswith("")
+
+
+def test_get_buffer_string_xml_function_call_legacy() -> None:
+ """Test XML format with legacy `function_call` in `additional_kwargs`."""
+ messages = [
+ AIMessage(
+ content="Calling function",
+ additional_kwargs={
+ "function_call": {"name": "test_fn", "arguments": '{"x": 1}'}
+ },
+ ),
+ ]
+ result = get_buffer_string(messages, format="xml")
+ # Nested structure with function_call element
+ # Note: arguments is a string, so quotes inside are escaped
+ expected = (
+ '\n'
+ " Calling function\n"
+ ' {"x": 1}\n'
+ ""
+ )
+ assert result == expected
+
+
+def test_get_buffer_string_xml_structured_content() -> None:
+ """Test XML format with structured content (list content blocks)."""
+ messages = [
+ HumanMessage(content=[{"type": "text", "text": "Hello, world!"}]),
+ AIMessage(content=[{"type": "text", "text": "Hi there!"}]),
+ ]
+ result = get_buffer_string(messages, format="xml")
+ # message.text property should extract text from structured content
+ expected = (
+ 'Hello, world!\n'
+ 'Hi there!'
+ )
+ assert result == expected
+
+
+def test_get_buffer_string_xml_multiline_content() -> None:
+ """Test XML format with multiline content."""
+ messages = [
+ HumanMessage(content="Line 1\nLine 2\nLine 3"),
+ ]
+ result = get_buffer_string(messages, format="xml")
+ expected = 'Line 1\nLine 2\nLine 3'
+ assert result == expected
+
+
+def test_get_buffer_string_xml_tool_calls_preferred_over_function_call() -> None:
+ """Test that `tool_calls` takes precedence over legacy `function_call` in XML."""
+ messages = [
+ AIMessage(
+ content="Calling tools",
+ tool_calls=[
+ {
+ "name": "modern_tool",
+ "args": {"key": "value"},
+ "id": "call_3",
+ "type": "tool_call",
+ }
+ ],
+ additional_kwargs={
+ "function_call": {"name": "legacy_function", "arguments": "{}"}
+ },
+ ),
+ ]
+ result = get_buffer_string(messages, format="xml")
+ assert "modern_tool" in result
+ assert "legacy_function" not in result
+ # Should use tool_call element, not function_call
+ assert " None:
+ """Test XML format with `AIMessage` having multiple `tool_calls`."""
+ messages = [
+ AIMessage(
+ content="I'll help with that",
+ tool_calls=[
+ {
+ "name": "get_weather",
+ "args": {"city": "NYC"},
+ "id": "call_1",
+ "type": "tool_call",
+ },
+ {
+ "name": "get_time",
+ "args": {"timezone": "EST"},
+ "id": "call_2",
+ "type": "tool_call",
+ },
+ ],
+ ),
+ ]
+ result = get_buffer_string(messages, format="xml")
+ # Should have nested structure with multiple tool_call elements
+ expected = (
+ '\n'
+ " I'll help with that\n"
+ ' {"city": "NYC"}\n'
+ ' {"timezone": "EST"}\n'
+ ""
+ )
+ assert result == expected
+
+
+def test_get_buffer_string_xml_tool_call_special_chars_in_attrs() -> None:
+ """Test that tool call attributes with quotes are properly escaped."""
+ messages: list[BaseMessage] = [
+ AIMessage(
+ content="",
+ tool_calls=[
+ {
+ "name": 'search"with"quotes',
+ "args": {"query": "test"},
+ "id": 'call"id',
+ "type": "tool_call",
+ },
+ ],
+ ),
+ ]
+ result = get_buffer_string(messages, format="xml")
+ # quoteattr uses single quotes when value contains double quotes
+ assert "name='search\"with\"quotes'" in result
+ assert "id='call\"id'" in result
+
+
+def test_get_buffer_string_xml_tool_call_none_id() -> None:
+ """Test that tool calls with `None` id are handled correctly."""
+ messages: list[BaseMessage] = [
+ AIMessage(
+ content="",
+ tool_calls=[
+ {
+ "name": "search",
+ "args": {},
+ "id": None,
+ "type": "tool_call",
+ },
+ ],
+ ),
+ ]
+ result = get_buffer_string(messages, format="xml")
+ # Should handle None by converting to empty string
+ assert 'id=""' in result
+
+
+def test_get_buffer_string_xml_function_call_special_chars_in_name() -> None:
+ """Test that `function_call` name with quotes is properly escaped."""
+ messages: list[BaseMessage] = [
+ AIMessage(
+ content="",
+ additional_kwargs={
+ "function_call": {
+ "name": 'func"name',
+ "arguments": "{}",
+ }
+ },
+ ),
+ ]
+ result = get_buffer_string(messages, format="xml")
+ # quoteattr uses single quotes when value contains double quotes
+ assert "name='func\"name'" in result
+
+
+def test_get_buffer_string_invalid_format() -> None:
+ """Test that invalid format values raise `ValueError`."""
+ messages: list[BaseMessage] = [HumanMessage(content="Hello")]
+ with pytest.raises(ValueError, match="Unrecognized format"):
+ get_buffer_string(messages, format="xm") # type: ignore[arg-type]
+ with pytest.raises(ValueError, match="Unrecognized format"):
+ get_buffer_string(messages, format="invalid") # type: ignore[arg-type]
+ with pytest.raises(ValueError, match="Unrecognized format"):
+ get_buffer_string(messages, format="") # type: ignore[arg-type]
+
+
+def test_get_buffer_string_xml_image_url_block() -> None:
+ """Test XML format with image content block containing URL."""
+ messages: list[BaseMessage] = [
+ HumanMessage(
+ content=[
+ {"type": "text", "text": "What is in this image?"},
+ {"type": "image", "url": "https://example.com/image.png"},
+ ]
+ ),
+ ]
+ result = get_buffer_string(messages, format="xml")
+ assert '' in result
+ assert "What is in this image?" in result
+ assert '' in result
+
+
+def test_get_buffer_string_xml_image_file_id_block() -> None:
+ """Test XML format with image content block containing `file_id`."""
+ messages: list[BaseMessage] = [
+ HumanMessage(
+ content=[
+ {"type": "text", "text": "Describe this:"},
+ {"type": "image", "file_id": "file-abc123"},
+ ]
+ ),
+ ]
+ result = get_buffer_string(messages, format="xml")
+ assert '' in result
+
+
+def test_get_buffer_string_xml_image_base64_skipped() -> None:
+ """Test XML format skips image blocks with base64 data."""
+ messages: list[BaseMessage] = [
+ HumanMessage(
+ content=[
+ {"type": "text", "text": "What is this?"},
+ {"type": "image", "base64": "iVBORw0KGgo...", "mime_type": "image/png"},
+ ]
+ ),
+ ]
+ result = get_buffer_string(messages, format="xml")
+ assert "What is this?" in result
+ assert "base64" not in result
+ assert "iVBORw0KGgo" not in result
+
+
+def test_get_buffer_string_xml_image_data_url_skipped() -> None:
+ """Test XML format skips image blocks with data: URLs."""
+ messages: list[BaseMessage] = [
+ HumanMessage(
+ content=[
+ {"type": "text", "text": "Check this:"},
+ {"type": "image", "url": "data:image/png;base64,iVBORw0KGgo..."},
+ ]
+ ),
+ ]
+ result = get_buffer_string(messages, format="xml")
+ assert "Check this:" in result
+ assert "data:image" not in result
+
+
+def test_get_buffer_string_xml_openai_image_url_block() -> None:
+ """Test XML format with OpenAI-style `image_url` block."""
+ messages: list[BaseMessage] = [
+ HumanMessage(
+ content=[
+ {"type": "text", "text": "Analyze this:"},
+ {
+ "type": "image_url",
+ "image_url": {"url": "https://example.com/photo.jpg"},
+ },
+ ]
+ ),
+ ]
+ result = get_buffer_string(messages, format="xml")
+ assert "Analyze this:" in result
+ assert '' in result
+
+
+def test_get_buffer_string_xml_openai_image_url_data_skipped() -> None:
+ """Test XML format skips OpenAI-style `image_url` blocks with data: URLs."""
+ messages: list[BaseMessage] = [
+ HumanMessage(
+ content=[
+ {"type": "text", "text": "See this:"},
+ {
+ "type": "image_url",
+ "image_url": {"url": "data:image/jpeg;base64,/9j/4AAQ..."},
+ },
+ ]
+ ),
+ ]
+ result = get_buffer_string(messages, format="xml")
+ assert "See this:" in result
+ assert "data:image" not in result
+ assert "/9j/4AAQ" not in result
+
+
+def test_get_buffer_string_xml_audio_url_block() -> None:
+ """Test XML format with audio content block containing URL."""
+ messages: list[BaseMessage] = [
+ HumanMessage(
+ content=[
+ {"type": "text", "text": "Transcribe this:"},
+ {"type": "audio", "url": "https://example.com/audio.mp3"},
+ ]
+ ),
+ ]
+ result = get_buffer_string(messages, format="xml")
+ assert "Transcribe this:" in result
+ assert '' in result
+
+
+def test_get_buffer_string_xml_audio_base64_skipped() -> None:
+ """Test XML format skips audio blocks with base64 data."""
+ messages: list[BaseMessage] = [
+ HumanMessage(
+ content=[
+ {"type": "text", "text": "Listen:"},
+ {"type": "audio", "base64": "UklGRi...", "mime_type": "audio/wav"},
+ ]
+ ),
+ ]
+ result = get_buffer_string(messages, format="xml")
+ assert "Listen:" in result
+ assert "UklGRi" not in result
+
+
+def test_get_buffer_string_xml_video_url_block() -> None:
+ """Test XML format with video content block containing URL."""
+ messages: list[BaseMessage] = [
+ HumanMessage(
+ content=[
+ {"type": "text", "text": "Describe this video:"},
+ {"type": "video", "url": "https://example.com/video.mp4"},
+ ]
+ ),
+ ]
+ result = get_buffer_string(messages, format="xml")
+ assert "Describe this video:" in result
+ assert '' in result
+
+
+def test_get_buffer_string_xml_video_base64_skipped() -> None:
+ """Test XML format skips video blocks with base64 data."""
+ messages: list[BaseMessage] = [
+ HumanMessage(
+ content=[
+ {"type": "text", "text": "Watch:"},
+ {"type": "video", "base64": "AAAAFGZ0eXA...", "mime_type": "video/mp4"},
+ ]
+ ),
+ ]
+ result = get_buffer_string(messages, format="xml")
+ assert "Watch:" in result
+ assert "AAAAFGZ0eXA" not in result
+
+
+def test_get_buffer_string_xml_reasoning_block() -> None:
+ """Test XML format with reasoning content block."""
+ messages: list[BaseMessage] = [
+ AIMessage(
+ content=[
+ {"type": "reasoning", "reasoning": "Let me think about this..."},
+ {"type": "text", "text": "The answer is 42."},
+ ]
+ ),
+ ]
+ result = get_buffer_string(messages, format="xml")
+ assert "Let me think about this..." in result
+ assert "The answer is 42." in result
+
+
+def test_get_buffer_string_xml_text_plain_block() -> None:
+ """Test XML format with text-plain content block."""
+ messages: list[BaseMessage] = [
+ HumanMessage(
+ content=[
+ {"type": "text", "text": "Here is a document:"},
+ {
+ "type": "text-plain",
+ "text": "Document content here.",
+ "mime_type": "text/plain",
+ },
+ ]
+ ),
+ ]
+ result = get_buffer_string(messages, format="xml")
+ assert "Here is a document:" in result
+ assert "Document content here." in result
+
+
+def test_get_buffer_string_xml_server_tool_call_block() -> None:
+ """Test XML format with server_tool_call content block."""
+ messages: list[BaseMessage] = [
+ AIMessage(
+ content=[
+ {"type": "text", "text": "Let me search for that."},
+ {
+ "type": "server_tool_call",
+ "id": "call_123",
+ "name": "web_search",
+ "args": {"query": "weather today"},
+ },
+ ]
+ ),
+ ]
+ result = get_buffer_string(messages, format="xml")
+ assert "Let me search for that." in result
+ assert '' in result
+ assert '{"query": "weather today"}' in result
+ assert "" in result
+
+
+def test_get_buffer_string_xml_server_tool_result_block() -> None:
+ """Test XML format with server_tool_result content block."""
+ messages: list[BaseMessage] = [
+ HumanMessage(
+ content=[
+ {
+ "type": "server_tool_result",
+ "tool_call_id": "call_123",
+ "status": "success",
+ "output": {"temperature": 72, "conditions": "sunny"},
+ },
+ ]
+ ),
+ ]
+ result = get_buffer_string(messages, format="xml")
+ assert '' in result
+ assert '"temperature": 72' in result
+ assert "" in result
+
+
+def test_get_buffer_string_xml_unknown_block_type_skipped() -> None:
+ """Test XML format silently skips unknown block types."""
+ messages: list[BaseMessage] = [
+ HumanMessage(
+ content=[
+ {"type": "text", "text": "Hello"},
+ {"type": "unknown_type", "data": "some data"},
+ ]
+ ),
+ ]
+ result = get_buffer_string(messages, format="xml")
+ assert "Hello" in result
+ assert "unknown_type" not in result
+ assert "some data" not in result
+
+
+def test_get_buffer_string_xml_mixed_content_blocks() -> None:
+ """Test XML format with multiple different content block types."""
+ messages: list[BaseMessage] = [
+ HumanMessage(
+ content=[
+ {"type": "text", "text": "Look at this image and document:"},
+ {"type": "image", "url": "https://example.com/img.png"},
+ {
+ "type": "text-plain",
+ "text": "Doc content",
+ "mime_type": "text/plain",
+ },
+ # This should be skipped (base64)
+ {"type": "image", "base64": "abc123", "mime_type": "image/png"},
+ ]
+ ),
+ ]
+ result = get_buffer_string(messages, format="xml")
+ assert "Look at this image and document:" in result
+ assert '' in result
+ assert "Doc content" in result
+ assert "abc123" not in result
+
+
+def test_get_buffer_string_xml_escaping_in_content_blocks() -> None:
+ """Test that special XML characters are escaped in content blocks."""
+ messages: list[BaseMessage] = [
+ HumanMessage(
+ content=[
+ {"type": "text", "text": "Is 5 < 10 & 10 > 5?"},
+ {"type": "reasoning", "reasoning": "Let's check: & "},
+ ]
+ ),
+ ]
+ result = get_buffer_string(messages, format="xml")
+ assert "Is 5 < 10 & 10 > 5?" in result
+ assert "<value> & </value>" in result
+
+
+def test_get_buffer_string_xml_url_with_special_chars() -> None:
+ """Test that URLs with special characters are properly quoted."""
+ messages: list[BaseMessage] = [
+ HumanMessage(
+ content=[
+ {"type": "image", "url": "https://example.com/img?a=1&b=2"},
+ ]
+ ),
+ ]
+ result = get_buffer_string(messages, format="xml")
+ # quoteattr should handle the & in the URL
+ assert "https://example.com/img?a=1&b=2" in result
+
+
+def test_get_buffer_string_xml_text_plain_truncation() -> None:
+ """Test that text-plain content is truncated to 500 chars."""
+ long_text = "x" * 600
+ messages: list[BaseMessage] = [
+ HumanMessage(
+ content=[
+ {"type": "text-plain", "text": long_text, "mime_type": "text/plain"},
+ ]
+ ),
+ ]
+ result = get_buffer_string(messages, format="xml")
+ # Should be truncated to 500 chars + "..."
+ assert "x" * 500 + "..." in result
+ assert "x" * 501 not in result
+
+
+def test_get_buffer_string_xml_server_tool_call_args_truncation() -> None:
+ """Test that server_tool_call args are truncated to 500 chars."""
+ long_value = "y" * 600
+ messages: list[BaseMessage] = [
+ AIMessage(
+ content=[
+ {
+ "type": "server_tool_call",
+ "id": "call_1",
+ "name": "test_tool",
+ "args": {"data": long_value},
+ },
+ ]
+ ),
+ ]
+ result = get_buffer_string(messages, format="xml")
+ assert "..." in result
+ # The full 600-char value should not appear
+ assert long_value not in result
+
+
+def test_get_buffer_string_xml_server_tool_result_output_truncation() -> None:
+ """Test that server_tool_result output is truncated to 500 chars."""
+ long_output = "z" * 600
+ messages: list[BaseMessage] = [
+ HumanMessage(
+ content=[
+ {
+ "type": "server_tool_result",
+ "tool_call_id": "call_1",
+ "status": "success",
+ "output": {"result": long_output},
+ },
+ ]
+ ),
+ ]
+ result = get_buffer_string(messages, format="xml")
+ assert "..." in result
+ # The full 600-char value should not appear
+ assert long_output not in result
+
+
+def test_get_buffer_string_xml_no_truncation_under_limit() -> None:
+ """Test that content under 500 chars is not truncated."""
+ short_text = "a" * 400
+ messages: list[BaseMessage] = [
+ HumanMessage(
+ content=[
+ {"type": "text-plain", "text": short_text, "mime_type": "text/plain"},
+ ]
+ ),
+ ]
+ result = get_buffer_string(messages, format="xml")
+ assert short_text in result
+ assert "..." not in result
+
+
+def test_get_buffer_string_custom_system_prefix() -> None:
+ """Test `get_buffer_string` with custom `system_prefix`."""
+ messages: list[BaseMessage] = [
+ SystemMessage(content="You are a helpful assistant."),
+ HumanMessage(content="Hello"),
+ ]
+ result = get_buffer_string(messages, system_prefix="Instructions")
+ assert result == "Instructions: You are a helpful assistant.\nHuman: Hello"
+
+
+def test_get_buffer_string_custom_function_prefix() -> None:
+ """Test `get_buffer_string` with custom `function_prefix`."""
+ messages: list[BaseMessage] = [
+ HumanMessage(content="Call a function"),
+ FunctionMessage(name="test_func", content="Function result"),
+ ]
+ result = get_buffer_string(messages, function_prefix="Func")
+ assert result == "Human: Call a function\nFunc: Function result"
+
+
+def test_get_buffer_string_custom_tool_prefix() -> None:
+ """Test `get_buffer_string` with custom `tool_prefix`."""
+ messages: list[BaseMessage] = [
+ HumanMessage(content="Use a tool"),
+ ToolMessage(tool_call_id="call_123", content="Tool result"),
+ ]
+ result = get_buffer_string(messages, tool_prefix="ToolResult")
+ assert result == "Human: Use a tool\nToolResult: Tool result"
+
+
+def test_get_buffer_string_all_custom_prefixes() -> None:
+ """Test `get_buffer_string` with all custom prefixes."""
+ messages: list[BaseMessage] = [
+ SystemMessage(content="System says hello"),
+ HumanMessage(content="Human says hello"),
+ AIMessage(content="AI says hello"),
+ FunctionMessage(name="func", content="Function says hello"),
+ ToolMessage(tool_call_id="call_1", content="Tool says hello"),
+ ]
+ result = get_buffer_string(
+ messages,
+ human_prefix="User",
+ ai_prefix="Assistant",
+ system_prefix="Sys",
+ function_prefix="Fn",
+ tool_prefix="T",
+ )
+ expected = (
+ "Sys: System says hello\n"
+ "User: Human says hello\n"
+ "Assistant: AI says hello\n"
+ "Fn: Function says hello\n"
+ "T: Tool says hello"
+ )
+ assert result == expected
+
+
+def test_get_buffer_string_xml_custom_system_prefix() -> None:
+ """Test `get_buffer_string` XML format with custom `system_prefix`."""
+ messages: list[BaseMessage] = [
+ SystemMessage(content="You are a helpful assistant."),
+ ]
+ result = get_buffer_string(messages, system_prefix="Instructions", format="xml")
+ assert (
+ result == 'You are a helpful assistant.'
+ )
+
+
+def test_get_buffer_string_xml_custom_function_prefix() -> None:
+ """Test `get_buffer_string` XML format with custom `function_prefix`."""
+ messages: list[BaseMessage] = [
+ FunctionMessage(name="test_func", content="Function result"),
+ ]
+ result = get_buffer_string(messages, function_prefix="Fn", format="xml")
+ assert result == 'Function result'
+
+
+def test_get_buffer_string_xml_custom_tool_prefix() -> None:
+ """Test `get_buffer_string` XML format with custom `tool_prefix`."""
+ messages: list[BaseMessage] = [
+ ToolMessage(tool_call_id="call_123", content="Tool result"),
+ ]
+ result = get_buffer_string(messages, tool_prefix="ToolOutput", format="xml")
+ assert result == 'Tool result'
+
+
+def test_get_buffer_string_xml_all_custom_prefixes() -> None:
+ """Test `get_buffer_string` XML format with all custom prefixes."""
+ messages: list[BaseMessage] = [
+ SystemMessage(content="System message"),
+ HumanMessage(content="Human message"),
+ AIMessage(content="AI message"),
+ FunctionMessage(name="func", content="Function message"),
+ ToolMessage(tool_call_id="call_1", content="Tool message"),
+ ]
+ result = get_buffer_string(
+ messages,
+ human_prefix="User",
+ ai_prefix="Assistant",
+ system_prefix="Sys",
+ function_prefix="Fn",
+ tool_prefix="T",
+ format="xml",
+ )
+ # The messages are processed in order, not by type
+ assert 'System message' in result
+ assert 'Human message' in result
+ assert 'AI message' in result
+ assert 'Function message' in result
+ assert 'Tool message' in result