From 15c38c8555471df979bf6117f886e0558e15b9ee Mon Sep 17 00:00:00 2001 From: Nishitha M <32355027+imnishitha@users.noreply.github.com> Date: Thu, 18 Jun 2026 13:34:42 -0400 Subject: [PATCH] fix(langchain): switch summary format (#38171) Part of https://github.com/langchain-ai/deepagents/issues/2873 --- `SummarizationMiddleware` now serializes the history passed to the summarizer with XML formatting so URL-backed multimodal content remains available in the prompt. The existing behavior avoided dumping raw message metadata into the token budget, but the prefix serialization path omitted image/audio/video URL blocks before the summary model saw them. ## Changes - Update `SummarizationMiddleware._create_summary` and `SummarizationMiddleware._acreate_summary` to call `get_buffer_string(..., format="xml")` for trimmed conversation history - Preserve URL-backed multimodal blocks in the summary prompt while still avoiding raw message metadata expansion - Add sync and async unit coverage with a prompt-capturing chat model to assert image URLs survive summarization input serialization --------- Co-authored-by: Mason Daugherty Co-authored-by: Mason Daugherty --- .../agents/middleware/summarization.py | 12 ++-- .../implementations/test_summarization.py | 72 +++++++++++++++++++ 2 files changed, 78 insertions(+), 6 deletions(-) diff --git a/libs/langchain_v1/langchain/agents/middleware/summarization.py b/libs/langchain_v1/langchain/agents/middleware/summarization.py index 5dd88182ef1..81c47f48622 100644 --- a/libs/langchain_v1/langchain/agents/middleware/summarization.py +++ b/libs/langchain_v1/langchain/agents/middleware/summarization.py @@ -797,9 +797,9 @@ class SummarizationMiddleware(AgentMiddleware[AgentState[ResponseT], ContextT, R if not trimmed_messages: return "Previous conversation was too long to summarize." - # Format messages to avoid token inflation from metadata when str() is called on - # message objects - formatted_messages = get_buffer_string(trimmed_messages) + # Serialize as XML so URL-based multimodal blocks remain visible in the summary + # prompt while excluding raw message metadata from the token budget. + formatted_messages = get_buffer_string(trimmed_messages, format="xml") try: response = self.model.invoke( @@ -823,9 +823,9 @@ class SummarizationMiddleware(AgentMiddleware[AgentState[ResponseT], ContextT, R if not trimmed_messages: return "Previous conversation was too long to summarize." - # Format messages to avoid token inflation from metadata when str() is called on - # message objects - formatted_messages = get_buffer_string(trimmed_messages) + # Serialize as XML so URL-based multimodal blocks remain visible in the summary + # prompt while excluding raw message metadata from the token budget. + formatted_messages = get_buffer_string(trimmed_messages, format="xml") try: response = await self.model.ainvoke( diff --git a/libs/langchain_v1/tests/unit_tests/agents/middleware/implementations/test_summarization.py b/libs/langchain_v1/tests/unit_tests/agents/middleware/implementations/test_summarization.py index 4d4f1254a84..460bf513f8b 100644 --- a/libs/langchain_v1/tests/unit_tests/agents/middleware/implementations/test_summarization.py +++ b/libs/langchain_v1/tests/unit_tests/agents/middleware/implementations/test_summarization.py @@ -1763,6 +1763,78 @@ def test_create_summary_uses_get_buffer_string_format() -> None: ) +class PromptCapturingModel(BaseChatModel): + """Mock model that captures the prompt input passed to invoke/ainvoke.""" + + captured_inputs: list[LanguageModelInput] = Field(default_factory=list, exclude=True) + + @override + def invoke( + self, + input: LanguageModelInput, + config: RunnableConfig | None = None, + *, + stop: list[str] | None = None, + **kwargs: Any, + ) -> AIMessage: + self.captured_inputs.append(input) + return AIMessage(content="Summary") + + @override + async def ainvoke( + self, + input: LanguageModelInput, + config: RunnableConfig | None = None, + *, + stop: list[str] | None = None, + **kwargs: Any, + ) -> AIMessage: + self.captured_inputs.append(input) + return AIMessage(content="Summary") + + @override + def _generate( + self, + messages: list[BaseMessage], + stop: list[str] | None = None, + run_manager: CallbackManagerForLLMRun | None = None, + **kwargs: Any, + ) -> ChatResult: + return ChatResult(generations=[ChatGeneration(message=AIMessage(content="Summary"))]) + + @property + def _llm_type(self) -> str: + return "prompt-capturing" + + +@pytest.mark.parametrize("use_async", [False, True], ids=["sync", "async"]) +async def test_create_summary_preserves_image_urls(use_async: bool) -> None: # noqa: FBT001 + """Test that URL-backed image content is serialized into the summary prompt.""" + model = PromptCapturingModel() + middleware = SummarizationMiddleware(model=model, trigger=("tokens", 1000)) + image_url = "https://example.com/shared-image.png" + messages: list[AnyMessage] = [ + HumanMessage( + content=[ + {"type": "text", "text": "What is in this image?"}, + {"type": "image_url", "image_url": {"url": image_url}}, + ] + ), + AIMessage(content="The image shows a cat."), + ] + + if use_async: + summary = await middleware._acreate_summary(messages) + else: + summary = middleware._create_summary(messages) + + assert summary == "Summary" + prompt = model.captured_inputs[0] + assert isinstance(prompt, str) + # Preserve the URL in the serialized history passed to the summarizer. + assert image_url in prompt + + @pytest.mark.requires("langchain_anthropic") def test_usage_metadata_trigger() -> None: model = init_chat_model("anthropic:claude-sonnet-4-5")