core[patch]: RFC: Allow concatenation of messages with multi part content (#22002)

Anthropic's streaming treats tool calls as different content parts
(streamed back with a different index) from normal content in the
`content`.

This means that we need to update our chunk-merging logic to handle
chunks with multi-part content. The alternative is coerceing Anthropic's
responses into a string, but we generally like to preserve model
provider responses faithfully when we can. This will also likely be
useful for multimodal outputs in the future.

This current PR does unfortunately make `index` a magic field within
content parts, but Anthropic and OpenAI both use it at the moment to
determine order anyway. To avoid cases where we have content arrays with
holes and to simplify the logic, I've also restricted merging to chunks
in order.

TODO: tests

CC @baskaryan @ccurme @efriis
This commit is contained in:
Jacob Lee 2024-06-03 09:46:40 -07:00 committed by GitHub
parent 86509161b0
commit c01467b1f4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 47 additions and 4 deletions

View File

@ -1,11 +1,11 @@
from __future__ import annotations from __future__ import annotations
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Union from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Union, cast
from langchain_core.load.serializable import Serializable from langchain_core.load.serializable import Serializable
from langchain_core.pydantic_v1 import Extra, Field from langchain_core.pydantic_v1 import Extra, Field
from langchain_core.utils import get_bolded_text from langchain_core.utils import get_bolded_text
from langchain_core.utils._merge import merge_dicts from langchain_core.utils._merge import merge_dicts, merge_lists
from langchain_core.utils.interactive_env import is_interactive_env from langchain_core.utils.interactive_env import is_interactive_env
if TYPE_CHECKING: if TYPE_CHECKING:
@ -95,9 +95,10 @@ def merge_content(
else: else:
return_list: List[Union[str, Dict]] = [first_content] return_list: List[Union[str, Dict]] = [first_content]
return return_list + second_content return return_list + second_content
# If both are lists, merge them naively
elif isinstance(second_content, List): elif isinstance(second_content, List):
return first_content + second_content # If both are lists
merged_list = merge_lists(first_content, second_content)
return cast(list, merged_list)
# If the first content is a list, and the second content is a string # If the first content is a list, and the second content is a string
else: else:
# If the last element of the first content is a string # If the last element of the first content is a string

View File

@ -162,6 +162,48 @@ def test_chat_message_chunks() -> None:
), "Other MessageChunk + ChatMessageChunk should be a MessageChunk as the left side" ), "Other MessageChunk + ChatMessageChunk should be a MessageChunk as the left side"
def test_complex_ai_message_chunks() -> None:
assert AIMessageChunk(content=["I am"], id="ai4") + AIMessageChunk(
content=[" indeed."]
) == AIMessageChunk(
id="ai4", content=["I am", " indeed."]
), "Content concatenation with arrays of strings should naively combine"
assert AIMessageChunk(content=[{"index": 0, "text": "I am"}]) + AIMessageChunk(
content=" indeed."
) == AIMessageChunk(
content=[{"index": 0, "text": "I am"}, " indeed."]
), "Concatenating mixed content arrays should naively combine them"
assert (
AIMessageChunk(content=[{"index": 0, "text": "I am"}])
+ AIMessageChunk(content=[{"index": 0, "text": " indeed."}])
== AIMessageChunk(content=[{"index": 0, "text": "I am indeed."}])
), "Concatenating when both content arrays are dicts with the same index should merge" # noqa: E501
assert AIMessageChunk(content=[{"index": 0, "text": "I am"}]) + AIMessageChunk(
content=[{"text": " indeed."}]
) == AIMessageChunk(
content=[{"index": 0, "text": "I am"}, {"text": " indeed."}]
), "Concatenating when one chunk is missing an index should not merge or throw" # noqa: E501
assert (
AIMessageChunk(content=[{"index": 0, "text": "I am"}])
+ AIMessageChunk(content=[{"index": 2, "text": " indeed."}])
== AIMessageChunk(
content=[{"index": 0, "text": "I am"}, {"index": 2, "text": " indeed."}]
)
), "Concatenating when both content arrays are dicts with a gap between indexes should not result in a holey array" # noqa: E501
assert (
AIMessageChunk(content=[{"index": 0, "text": "I am"}])
+ AIMessageChunk(content=[{"index": 1, "text": " indeed."}])
== AIMessageChunk(
content=[{"index": 0, "text": "I am"}, {"index": 1, "text": " indeed."}]
)
), "Concatenating when both content arrays are dicts with separate indexes should not merge" # noqa: E501
def test_function_message_chunks() -> None: def test_function_message_chunks() -> None:
assert FunctionMessageChunk( assert FunctionMessageChunk(
name="hello", content="I am", id="ai5" name="hello", content="I am", id="ai5"