From c01467b1f4f9beae8f1edb105b17aa4f36bf6573 Mon Sep 17 00:00:00 2001 From: Jacob Lee Date: Mon, 3 Jun 2024 09:46:40 -0700 Subject: [PATCH] core[patch]: RFC: Allow concatenation of messages with multi part content (#22002) Anthropic's streaming treats tool calls as different content parts (streamed back with a different index) from normal content in the `content`. This means that we need to update our chunk-merging logic to handle chunks with multi-part content. The alternative is coerceing Anthropic's responses into a string, but we generally like to preserve model provider responses faithfully when we can. This will also likely be useful for multimodal outputs in the future. This current PR does unfortunately make `index` a magic field within content parts, but Anthropic and OpenAI both use it at the moment to determine order anyway. To avoid cases where we have content arrays with holes and to simplify the logic, I've also restricted merging to chunks in order. TODO: tests CC @baskaryan @ccurme @efriis --- libs/core/langchain_core/messages/base.py | 9 +++-- libs/core/tests/unit_tests/test_messages.py | 42 +++++++++++++++++++++ 2 files changed, 47 insertions(+), 4 deletions(-) diff --git a/libs/core/langchain_core/messages/base.py b/libs/core/langchain_core/messages/base.py index 60c57220033..d73b4d526b3 100644 --- a/libs/core/langchain_core/messages/base.py +++ b/libs/core/langchain_core/messages/base.py @@ -1,11 +1,11 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Union +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Union, cast from langchain_core.load.serializable import Serializable from langchain_core.pydantic_v1 import Extra, Field from langchain_core.utils import get_bolded_text -from langchain_core.utils._merge import merge_dicts +from langchain_core.utils._merge import merge_dicts, merge_lists from langchain_core.utils.interactive_env import is_interactive_env if TYPE_CHECKING: @@ -95,9 +95,10 @@ def merge_content( else: return_list: List[Union[str, Dict]] = [first_content] return return_list + second_content - # If both are lists, merge them naively elif isinstance(second_content, List): - return first_content + second_content + # If both are lists + merged_list = merge_lists(first_content, second_content) + return cast(list, merged_list) # If the first content is a list, and the second content is a string else: # If the last element of the first content is a string diff --git a/libs/core/tests/unit_tests/test_messages.py b/libs/core/tests/unit_tests/test_messages.py index 21884cf1e83..c893f8ced57 100644 --- a/libs/core/tests/unit_tests/test_messages.py +++ b/libs/core/tests/unit_tests/test_messages.py @@ -162,6 +162,48 @@ def test_chat_message_chunks() -> None: ), "Other MessageChunk + ChatMessageChunk should be a MessageChunk as the left side" +def test_complex_ai_message_chunks() -> None: + assert AIMessageChunk(content=["I am"], id="ai4") + AIMessageChunk( + content=[" indeed."] + ) == AIMessageChunk( + id="ai4", content=["I am", " indeed."] + ), "Content concatenation with arrays of strings should naively combine" + + assert AIMessageChunk(content=[{"index": 0, "text": "I am"}]) + AIMessageChunk( + content=" indeed." + ) == AIMessageChunk( + content=[{"index": 0, "text": "I am"}, " indeed."] + ), "Concatenating mixed content arrays should naively combine them" + + assert ( + AIMessageChunk(content=[{"index": 0, "text": "I am"}]) + + AIMessageChunk(content=[{"index": 0, "text": " indeed."}]) + == AIMessageChunk(content=[{"index": 0, "text": "I am indeed."}]) + ), "Concatenating when both content arrays are dicts with the same index should merge" # noqa: E501 + + assert AIMessageChunk(content=[{"index": 0, "text": "I am"}]) + AIMessageChunk( + content=[{"text": " indeed."}] + ) == AIMessageChunk( + content=[{"index": 0, "text": "I am"}, {"text": " indeed."}] + ), "Concatenating when one chunk is missing an index should not merge or throw" # noqa: E501 + + assert ( + AIMessageChunk(content=[{"index": 0, "text": "I am"}]) + + AIMessageChunk(content=[{"index": 2, "text": " indeed."}]) + == AIMessageChunk( + content=[{"index": 0, "text": "I am"}, {"index": 2, "text": " indeed."}] + ) + ), "Concatenating when both content arrays are dicts with a gap between indexes should not result in a holey array" # noqa: E501 + + assert ( + AIMessageChunk(content=[{"index": 0, "text": "I am"}]) + + AIMessageChunk(content=[{"index": 1, "text": " indeed."}]) + == AIMessageChunk( + content=[{"index": 0, "text": "I am"}, {"index": 1, "text": " indeed."}] + ) + ), "Concatenating when both content arrays are dicts with separate indexes should not merge" # noqa: E501 + + def test_function_message_chunks() -> None: assert FunctionMessageChunk( name="hello", content="I am", id="ai5"