mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-22 11:00:37 +00:00
core[patch]: add option to specify the chunk separator in merge_message_runs
(#24783)
**Description:** LLM will stop generating text even in the middle of a sentence if `finish_reason` is `length` (for OpenAI) or `stop_reason` is `max_tokens` (for Anthropic). To obtain longer outputs from LLM, we should call the message generation API multiple times and merge the results into the text to circumvent the API's output token limit. The extra line breaks forced by the `merge_message_runs` function when seamlessly merging messages can be annoying, so I added the option to specify the chunk separator. **Issue:** No corresponding issues. **Dependencies:** No dependencies required. **Twitter handle:** @hanama_chem https://x.com/hanama_chem --------- Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com> Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
@@ -30,6 +30,30 @@ def test_merge_message_runs_str(msg_cls: Type[BaseMessage]) -> None:
|
||||
assert messages == messages_copy
|
||||
|
||||
|
||||
@pytest.mark.parametrize("msg_cls", [HumanMessage, AIMessage, SystemMessage])
|
||||
def test_merge_message_runs_str_with_specified_separator(
|
||||
msg_cls: Type[BaseMessage],
|
||||
) -> None:
|
||||
messages = [msg_cls("foo"), msg_cls("bar"), msg_cls("baz")]
|
||||
messages_copy = [m.copy(deep=True) for m in messages]
|
||||
expected = [msg_cls("foo<sep>bar<sep>baz")]
|
||||
actual = merge_message_runs(messages, chunk_separator="<sep>")
|
||||
assert actual == expected
|
||||
assert messages == messages_copy
|
||||
|
||||
|
||||
@pytest.mark.parametrize("msg_cls", [HumanMessage, AIMessage, SystemMessage])
|
||||
def test_merge_message_runs_str_without_separator(
|
||||
msg_cls: Type[BaseMessage],
|
||||
) -> None:
|
||||
messages = [msg_cls("foo"), msg_cls("bar"), msg_cls("baz")]
|
||||
messages_copy = [m.copy(deep=True) for m in messages]
|
||||
expected = [msg_cls("foobarbaz")]
|
||||
actual = merge_message_runs(messages, chunk_separator="")
|
||||
assert actual == expected
|
||||
assert messages == messages_copy
|
||||
|
||||
|
||||
def test_merge_message_runs_content() -> None:
|
||||
messages = [
|
||||
AIMessage("foo", id="1"),
|
||||
|
Reference in New Issue
Block a user