fix(langchain): switch summary format (#38171)

Part of https://github.com/langchain-ai/deepagents/issues/2873

---

`SummarizationMiddleware` now serializes the history passed to the
summarizer with XML formatting so URL-backed multimodal content remains
available in the prompt. The existing behavior avoided dumping raw
message metadata into the token budget, but the prefix serialization
path omitted image/audio/video URL blocks before the summary model saw
them.

## Changes

- Update `SummarizationMiddleware._create_summary` and
`SummarizationMiddleware._acreate_summary` to call
`get_buffer_string(..., format="xml")` for trimmed conversation history
- Preserve URL-backed multimodal blocks in the summary prompt while
still avoiding raw message metadata expansion
- Add sync and async unit coverage with a prompt-capturing chat model to
assert image URLs survive summarization input serialization

---------

Co-authored-by: Mason Daugherty <mason@langchain.dev>
Co-authored-by: Mason Daugherty <github@mdrxy.com>
This commit is contained in:
Nishitha M
2026-06-18 13:34:42 -04:00
committed by GitHub
parent 9ac8882a2c
commit 15c38c8555
2 changed files with 78 additions and 6 deletions

View File

@@ -797,9 +797,9 @@ class SummarizationMiddleware(AgentMiddleware[AgentState[ResponseT], ContextT, R
if not trimmed_messages:
return "Previous conversation was too long to summarize."
# Format messages to avoid token inflation from metadata when str() is called on
# message objects
formatted_messages = get_buffer_string(trimmed_messages)
# Serialize as XML so URL-based multimodal blocks remain visible in the summary
# prompt while excluding raw message metadata from the token budget.
formatted_messages = get_buffer_string(trimmed_messages, format="xml")
try:
response = self.model.invoke(
@@ -823,9 +823,9 @@ class SummarizationMiddleware(AgentMiddleware[AgentState[ResponseT], ContextT, R
if not trimmed_messages:
return "Previous conversation was too long to summarize."
# Format messages to avoid token inflation from metadata when str() is called on
# message objects
formatted_messages = get_buffer_string(trimmed_messages)
# Serialize as XML so URL-based multimodal blocks remain visible in the summary
# prompt while excluding raw message metadata from the token budget.
formatted_messages = get_buffer_string(trimmed_messages, format="xml")
try:
response = await self.model.ainvoke(

View File

@@ -1763,6 +1763,78 @@ def test_create_summary_uses_get_buffer_string_format() -> None:
)
class PromptCapturingModel(BaseChatModel):
"""Mock model that captures the prompt input passed to invoke/ainvoke."""
captured_inputs: list[LanguageModelInput] = Field(default_factory=list, exclude=True)
@override
def invoke(
self,
input: LanguageModelInput,
config: RunnableConfig | None = None,
*,
stop: list[str] | None = None,
**kwargs: Any,
) -> AIMessage:
self.captured_inputs.append(input)
return AIMessage(content="Summary")
@override
async def ainvoke(
self,
input: LanguageModelInput,
config: RunnableConfig | None = None,
*,
stop: list[str] | None = None,
**kwargs: Any,
) -> AIMessage:
self.captured_inputs.append(input)
return AIMessage(content="Summary")
@override
def _generate(
self,
messages: list[BaseMessage],
stop: list[str] | None = None,
run_manager: CallbackManagerForLLMRun | None = None,
**kwargs: Any,
) -> ChatResult:
return ChatResult(generations=[ChatGeneration(message=AIMessage(content="Summary"))])
@property
def _llm_type(self) -> str:
return "prompt-capturing"
@pytest.mark.parametrize("use_async", [False, True], ids=["sync", "async"])
async def test_create_summary_preserves_image_urls(use_async: bool) -> None: # noqa: FBT001
"""Test that URL-backed image content is serialized into the summary prompt."""
model = PromptCapturingModel()
middleware = SummarizationMiddleware(model=model, trigger=("tokens", 1000))
image_url = "https://example.com/shared-image.png"
messages: list[AnyMessage] = [
HumanMessage(
content=[
{"type": "text", "text": "What is in this image?"},
{"type": "image_url", "image_url": {"url": image_url}},
]
),
AIMessage(content="The image shows a cat."),
]
if use_async:
summary = await middleware._acreate_summary(messages)
else:
summary = middleware._create_summary(messages)
assert summary == "Summary"
prompt = model.captured_inputs[0]
assert isinstance(prompt, str)
# Preserve the URL in the serialized history passed to the summarizer.
assert image_url in prompt
@pytest.mark.requires("langchain_anthropic")
def test_usage_metadata_trigger() -> None:
model = init_chat_model("anthropic:claude-sonnet-4-5")