core[patch]: RFC: Allow concatenation of messages with multi part content (#22002)

Anthropic's streaming treats tool calls as different content parts
(streamed back with a different index) from normal content in the
`content`.

This means that we need to update our chunk-merging logic to handle
chunks with multi-part content. The alternative is coerceing Anthropic's
responses into a string, but we generally like to preserve model
provider responses faithfully when we can. This will also likely be
useful for multimodal outputs in the future.

This current PR does unfortunately make `index` a magic field within
content parts, but Anthropic and OpenAI both use it at the moment to
determine order anyway. To avoid cases where we have content arrays with
holes and to simplify the logic, I've also restricted merging to chunks
in order.

TODO: tests

CC @baskaryan @ccurme @efriis
This commit is contained in:
Jacob Lee
2024-06-03 09:46:40 -07:00
committed by GitHub
parent 86509161b0
commit c01467b1f4
2 changed files with 47 additions and 4 deletions

View File

@@ -162,6 +162,48 @@ def test_chat_message_chunks() -> None:
), "Other MessageChunk + ChatMessageChunk should be a MessageChunk as the left side"
def test_complex_ai_message_chunks() -> None:
assert AIMessageChunk(content=["I am"], id="ai4") + AIMessageChunk(
content=[" indeed."]
) == AIMessageChunk(
id="ai4", content=["I am", " indeed."]
), "Content concatenation with arrays of strings should naively combine"
assert AIMessageChunk(content=[{"index": 0, "text": "I am"}]) + AIMessageChunk(
content=" indeed."
) == AIMessageChunk(
content=[{"index": 0, "text": "I am"}, " indeed."]
), "Concatenating mixed content arrays should naively combine them"
assert (
AIMessageChunk(content=[{"index": 0, "text": "I am"}])
+ AIMessageChunk(content=[{"index": 0, "text": " indeed."}])
== AIMessageChunk(content=[{"index": 0, "text": "I am indeed."}])
), "Concatenating when both content arrays are dicts with the same index should merge" # noqa: E501
assert AIMessageChunk(content=[{"index": 0, "text": "I am"}]) + AIMessageChunk(
content=[{"text": " indeed."}]
) == AIMessageChunk(
content=[{"index": 0, "text": "I am"}, {"text": " indeed."}]
), "Concatenating when one chunk is missing an index should not merge or throw" # noqa: E501
assert (
AIMessageChunk(content=[{"index": 0, "text": "I am"}])
+ AIMessageChunk(content=[{"index": 2, "text": " indeed."}])
== AIMessageChunk(
content=[{"index": 0, "text": "I am"}, {"index": 2, "text": " indeed."}]
)
), "Concatenating when both content arrays are dicts with a gap between indexes should not result in a holey array" # noqa: E501
assert (
AIMessageChunk(content=[{"index": 0, "text": "I am"}])
+ AIMessageChunk(content=[{"index": 1, "text": " indeed."}])
== AIMessageChunk(
content=[{"index": 0, "text": "I am"}, {"index": 1, "text": " indeed."}]
)
), "Concatenating when both content arrays are dicts with separate indexes should not merge" # noqa: E501
def test_function_message_chunks() -> None:
assert FunctionMessageChunk(
name="hello", content="I am", id="ai5"