fix(langchain): switch summary format (#38171)

Part of https://github.com/langchain-ai/deepagents/issues/2873 --- `SummarizationMiddleware` now serializes the history passed to the summarizer with XML formatting so URL-backed multimodal content remains available in the prompt. The existing behavior avoided dumping raw message metadata into the token budget, but the prefix serialization path omitted image/audio/video URL blocks before the summary model saw them. ## Changes - Update `SummarizationMiddleware._create_summary` and `SummarizationMiddleware._acreate_summary` to call `get_buffer_string(..., format="xml")` for trimmed conversation history - Preserve URL-backed multimodal blocks in the summary prompt while still avoiding raw message metadata expansion - Add sync and async unit coverage with a prompt-capturing chat model to assert image URLs survive summarization input serialization --------- Co-authored-by: Mason Daugherty <mason@langchain.dev> Co-authored-by: Mason Daugherty <github@mdrxy.com>
2026-07-01 22:59:06 +00:00 · 2026-06-18 13:34:42 -04:00
parent 9ac8882a2c
commit 15c38c8555
2 changed files with 78 additions and 6 deletions
--- a/libs/langchain_v1/langchain/agents/middleware/summarization.py
+++ b/libs/langchain_v1/langchain/agents/middleware/summarization.py
@@ -797,9 +797,9 @@ class SummarizationMiddleware(AgentMiddleware[AgentState[ResponseT], ContextT, R
        if not trimmed_messages:
            return "Previous conversation was too long to summarize."

-        # Format messages to avoid token inflation from metadata when str() is called on
-        # message objects
-        formatted_messages = get_buffer_string(trimmed_messages)
+        # Serialize as XML so URL-based multimodal blocks remain visible in the summary
+        # prompt while excluding raw message metadata from the token budget.
+        formatted_messages = get_buffer_string(trimmed_messages, format="xml")

        try:
            response = self.model.invoke(
@@ -823,9 +823,9 @@ class SummarizationMiddleware(AgentMiddleware[AgentState[ResponseT], ContextT, R
        if not trimmed_messages:
            return "Previous conversation was too long to summarize."

-        # Format messages to avoid token inflation from metadata when str() is called on
-        # message objects
-        formatted_messages = get_buffer_string(trimmed_messages)
+        # Serialize as XML so URL-based multimodal blocks remain visible in the summary
+        # prompt while excluding raw message metadata from the token budget.
+        formatted_messages = get_buffer_string(trimmed_messages, format="xml")

        try:
            response = await self.model.ainvoke(
--- a/libs/langchain_v1/tests/unit_tests/agents/middleware/implementations/test_summarization.py
+++ b/libs/langchain_v1/tests/unit_tests/agents/middleware/implementations/test_summarization.py
@@ -1763,6 +1763,78 @@ def test_create_summary_uses_get_buffer_string_format() -> None:
    )


+class PromptCapturingModel(BaseChatModel):
+    """Mock model that captures the prompt input passed to invoke/ainvoke."""
+
+    captured_inputs: list[LanguageModelInput] = Field(default_factory=list, exclude=True)
+
+    @override
+    def invoke(
+        self,
+        input: LanguageModelInput,
+        config: RunnableConfig | None = None,
+        *,
+        stop: list[str] | None = None,
+        **kwargs: Any,
+    ) -> AIMessage:
+        self.captured_inputs.append(input)
+        return AIMessage(content="Summary")
+
+    @override
+    async def ainvoke(
+        self,
+        input: LanguageModelInput,
+        config: RunnableConfig | None = None,
+        *,
+        stop: list[str] | None = None,
+        **kwargs: Any,
+    ) -> AIMessage:
+        self.captured_inputs.append(input)
+        return AIMessage(content="Summary")
+
+    @override
+    def _generate(
+        self,
+        messages: list[BaseMessage],
+        stop: list[str] | None = None,
+        run_manager: CallbackManagerForLLMRun | None = None,
+        **kwargs: Any,
+    ) -> ChatResult:
+        return ChatResult(generations=[ChatGeneration(message=AIMessage(content="Summary"))])
+
+    @property
+    def _llm_type(self) -> str:
+        return "prompt-capturing"
+
+
+@pytest.mark.parametrize("use_async", [False, True], ids=["sync", "async"])
+async def test_create_summary_preserves_image_urls(use_async: bool) -> None:  # noqa: FBT001
+    """Test that URL-backed image content is serialized into the summary prompt."""
+    model = PromptCapturingModel()
+    middleware = SummarizationMiddleware(model=model, trigger=("tokens", 1000))
+    image_url = "https://example.com/shared-image.png"
+    messages: list[AnyMessage] = [
+        HumanMessage(
+            content=[
+                {"type": "text", "text": "What is in this image?"},
+                {"type": "image_url", "image_url": {"url": image_url}},
+            ]
+        ),
+        AIMessage(content="The image shows a cat."),
+    ]
+
+    if use_async:
+        summary = await middleware._acreate_summary(messages)
+    else:
+        summary = middleware._create_summary(messages)
+
+    assert summary == "Summary"
+    prompt = model.captured_inputs[0]
+    assert isinstance(prompt, str)
+    # Preserve the URL in the serialized history passed to the summarizer.
+    assert image_url in prompt
+
+
@pytest.mark.requires("langchain_anthropic")
 def test_usage_metadata_trigger() -> None:
    model = init_chat_model("anthropic:claude-sonnet-4-5")