mirror of
https://github.com/hwchase17/langchain.git
synced 2026-07-01 22:59:06 +00:00
fix(langchain): switch summary format (#38171)
Part of https://github.com/langchain-ai/deepagents/issues/2873 --- `SummarizationMiddleware` now serializes the history passed to the summarizer with XML formatting so URL-backed multimodal content remains available in the prompt. The existing behavior avoided dumping raw message metadata into the token budget, but the prefix serialization path omitted image/audio/video URL blocks before the summary model saw them. ## Changes - Update `SummarizationMiddleware._create_summary` and `SummarizationMiddleware._acreate_summary` to call `get_buffer_string(..., format="xml")` for trimmed conversation history - Preserve URL-backed multimodal blocks in the summary prompt while still avoiding raw message metadata expansion - Add sync and async unit coverage with a prompt-capturing chat model to assert image URLs survive summarization input serialization --------- Co-authored-by: Mason Daugherty <mason@langchain.dev> Co-authored-by: Mason Daugherty <github@mdrxy.com>
This commit is contained in:
@@ -797,9 +797,9 @@ class SummarizationMiddleware(AgentMiddleware[AgentState[ResponseT], ContextT, R
|
||||
if not trimmed_messages:
|
||||
return "Previous conversation was too long to summarize."
|
||||
|
||||
# Format messages to avoid token inflation from metadata when str() is called on
|
||||
# message objects
|
||||
formatted_messages = get_buffer_string(trimmed_messages)
|
||||
# Serialize as XML so URL-based multimodal blocks remain visible in the summary
|
||||
# prompt while excluding raw message metadata from the token budget.
|
||||
formatted_messages = get_buffer_string(trimmed_messages, format="xml")
|
||||
|
||||
try:
|
||||
response = self.model.invoke(
|
||||
@@ -823,9 +823,9 @@ class SummarizationMiddleware(AgentMiddleware[AgentState[ResponseT], ContextT, R
|
||||
if not trimmed_messages:
|
||||
return "Previous conversation was too long to summarize."
|
||||
|
||||
# Format messages to avoid token inflation from metadata when str() is called on
|
||||
# message objects
|
||||
formatted_messages = get_buffer_string(trimmed_messages)
|
||||
# Serialize as XML so URL-based multimodal blocks remain visible in the summary
|
||||
# prompt while excluding raw message metadata from the token budget.
|
||||
formatted_messages = get_buffer_string(trimmed_messages, format="xml")
|
||||
|
||||
try:
|
||||
response = await self.model.ainvoke(
|
||||
|
||||
@@ -1763,6 +1763,78 @@ def test_create_summary_uses_get_buffer_string_format() -> None:
|
||||
)
|
||||
|
||||
|
||||
class PromptCapturingModel(BaseChatModel):
|
||||
"""Mock model that captures the prompt input passed to invoke/ainvoke."""
|
||||
|
||||
captured_inputs: list[LanguageModelInput] = Field(default_factory=list, exclude=True)
|
||||
|
||||
@override
|
||||
def invoke(
|
||||
self,
|
||||
input: LanguageModelInput,
|
||||
config: RunnableConfig | None = None,
|
||||
*,
|
||||
stop: list[str] | None = None,
|
||||
**kwargs: Any,
|
||||
) -> AIMessage:
|
||||
self.captured_inputs.append(input)
|
||||
return AIMessage(content="Summary")
|
||||
|
||||
@override
|
||||
async def ainvoke(
|
||||
self,
|
||||
input: LanguageModelInput,
|
||||
config: RunnableConfig | None = None,
|
||||
*,
|
||||
stop: list[str] | None = None,
|
||||
**kwargs: Any,
|
||||
) -> AIMessage:
|
||||
self.captured_inputs.append(input)
|
||||
return AIMessage(content="Summary")
|
||||
|
||||
@override
|
||||
def _generate(
|
||||
self,
|
||||
messages: list[BaseMessage],
|
||||
stop: list[str] | None = None,
|
||||
run_manager: CallbackManagerForLLMRun | None = None,
|
||||
**kwargs: Any,
|
||||
) -> ChatResult:
|
||||
return ChatResult(generations=[ChatGeneration(message=AIMessage(content="Summary"))])
|
||||
|
||||
@property
|
||||
def _llm_type(self) -> str:
|
||||
return "prompt-capturing"
|
||||
|
||||
|
||||
@pytest.mark.parametrize("use_async", [False, True], ids=["sync", "async"])
|
||||
async def test_create_summary_preserves_image_urls(use_async: bool) -> None: # noqa: FBT001
|
||||
"""Test that URL-backed image content is serialized into the summary prompt."""
|
||||
model = PromptCapturingModel()
|
||||
middleware = SummarizationMiddleware(model=model, trigger=("tokens", 1000))
|
||||
image_url = "https://example.com/shared-image.png"
|
||||
messages: list[AnyMessage] = [
|
||||
HumanMessage(
|
||||
content=[
|
||||
{"type": "text", "text": "What is in this image?"},
|
||||
{"type": "image_url", "image_url": {"url": image_url}},
|
||||
]
|
||||
),
|
||||
AIMessage(content="The image shows a cat."),
|
||||
]
|
||||
|
||||
if use_async:
|
||||
summary = await middleware._acreate_summary(messages)
|
||||
else:
|
||||
summary = middleware._create_summary(messages)
|
||||
|
||||
assert summary == "Summary"
|
||||
prompt = model.captured_inputs[0]
|
||||
assert isinstance(prompt, str)
|
||||
# Preserve the URL in the serialized history passed to the summarizer.
|
||||
assert image_url in prompt
|
||||
|
||||
|
||||
@pytest.mark.requires("langchain_anthropic")
|
||||
def test_usage_metadata_trigger() -> None:
|
||||
model = init_chat_model("anthropic:claude-sonnet-4-5")
|
||||
|
||||
Reference in New Issue
Block a user