mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-11 05:45:01 +00:00
core[patch]: RFC: Allow concatenation of messages with multi part content (#22002)
Anthropic's streaming treats tool calls as different content parts (streamed back with a different index) from normal content in the `content`. This means that we need to update our chunk-merging logic to handle chunks with multi-part content. The alternative is coerceing Anthropic's responses into a string, but we generally like to preserve model provider responses faithfully when we can. This will also likely be useful for multimodal outputs in the future. This current PR does unfortunately make `index` a magic field within content parts, but Anthropic and OpenAI both use it at the moment to determine order anyway. To avoid cases where we have content arrays with holes and to simplify the logic, I've also restricted merging to chunks in order. TODO: tests CC @baskaryan @ccurme @efriis
This commit is contained in:
parent
86509161b0
commit
c01467b1f4
@ -1,11 +1,11 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Union
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Union, cast
|
||||
|
||||
from langchain_core.load.serializable import Serializable
|
||||
from langchain_core.pydantic_v1 import Extra, Field
|
||||
from langchain_core.utils import get_bolded_text
|
||||
from langchain_core.utils._merge import merge_dicts
|
||||
from langchain_core.utils._merge import merge_dicts, merge_lists
|
||||
from langchain_core.utils.interactive_env import is_interactive_env
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@ -95,9 +95,10 @@ def merge_content(
|
||||
else:
|
||||
return_list: List[Union[str, Dict]] = [first_content]
|
||||
return return_list + second_content
|
||||
# If both are lists, merge them naively
|
||||
elif isinstance(second_content, List):
|
||||
return first_content + second_content
|
||||
# If both are lists
|
||||
merged_list = merge_lists(first_content, second_content)
|
||||
return cast(list, merged_list)
|
||||
# If the first content is a list, and the second content is a string
|
||||
else:
|
||||
# If the last element of the first content is a string
|
||||
|
@ -162,6 +162,48 @@ def test_chat_message_chunks() -> None:
|
||||
), "Other MessageChunk + ChatMessageChunk should be a MessageChunk as the left side"
|
||||
|
||||
|
||||
def test_complex_ai_message_chunks() -> None:
|
||||
assert AIMessageChunk(content=["I am"], id="ai4") + AIMessageChunk(
|
||||
content=[" indeed."]
|
||||
) == AIMessageChunk(
|
||||
id="ai4", content=["I am", " indeed."]
|
||||
), "Content concatenation with arrays of strings should naively combine"
|
||||
|
||||
assert AIMessageChunk(content=[{"index": 0, "text": "I am"}]) + AIMessageChunk(
|
||||
content=" indeed."
|
||||
) == AIMessageChunk(
|
||||
content=[{"index": 0, "text": "I am"}, " indeed."]
|
||||
), "Concatenating mixed content arrays should naively combine them"
|
||||
|
||||
assert (
|
||||
AIMessageChunk(content=[{"index": 0, "text": "I am"}])
|
||||
+ AIMessageChunk(content=[{"index": 0, "text": " indeed."}])
|
||||
== AIMessageChunk(content=[{"index": 0, "text": "I am indeed."}])
|
||||
), "Concatenating when both content arrays are dicts with the same index should merge" # noqa: E501
|
||||
|
||||
assert AIMessageChunk(content=[{"index": 0, "text": "I am"}]) + AIMessageChunk(
|
||||
content=[{"text": " indeed."}]
|
||||
) == AIMessageChunk(
|
||||
content=[{"index": 0, "text": "I am"}, {"text": " indeed."}]
|
||||
), "Concatenating when one chunk is missing an index should not merge or throw" # noqa: E501
|
||||
|
||||
assert (
|
||||
AIMessageChunk(content=[{"index": 0, "text": "I am"}])
|
||||
+ AIMessageChunk(content=[{"index": 2, "text": " indeed."}])
|
||||
== AIMessageChunk(
|
||||
content=[{"index": 0, "text": "I am"}, {"index": 2, "text": " indeed."}]
|
||||
)
|
||||
), "Concatenating when both content arrays are dicts with a gap between indexes should not result in a holey array" # noqa: E501
|
||||
|
||||
assert (
|
||||
AIMessageChunk(content=[{"index": 0, "text": "I am"}])
|
||||
+ AIMessageChunk(content=[{"index": 1, "text": " indeed."}])
|
||||
== AIMessageChunk(
|
||||
content=[{"index": 0, "text": "I am"}, {"index": 1, "text": " indeed."}]
|
||||
)
|
||||
), "Concatenating when both content arrays are dicts with separate indexes should not merge" # noqa: E501
|
||||
|
||||
|
||||
def test_function_message_chunks() -> None:
|
||||
assert FunctionMessageChunk(
|
||||
name="hello", content="I am", id="ai5"
|
||||
|
Loading…
Reference in New Issue
Block a user