mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-24 12:00:52 +00:00
core[minor]: Add msg content formatting util
This commit is contained in:
parent
d6c4803ab0
commit
49f7c8cdd8
@ -9,8 +9,10 @@ Some examples of what you can do with these functions include:
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import inspect
|
||||
import json
|
||||
import re
|
||||
from functools import partial
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
@ -37,6 +39,9 @@ from langchain_core.messages.human import HumanMessage, HumanMessageChunk
|
||||
from langchain_core.messages.modifier import RemoveMessage
|
||||
from langchain_core.messages.system import SystemMessage, SystemMessageChunk
|
||||
from langchain_core.messages.tool import ToolMessage, ToolMessageChunk
|
||||
from langchain_core.messages.tool import (
|
||||
tool_call as create_tool_call,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from langchain_text_splitters import TextSplitter
|
||||
@ -252,7 +257,9 @@ def _create_message_from_message_type(
|
||||
return message
|
||||
|
||||
|
||||
def _convert_to_message(message: MessageLikeRepresentation) -> BaseMessage:
|
||||
def _convert_to_message(
|
||||
message: MessageLikeRepresentation, *, copy: bool = False
|
||||
) -> BaseMessage:
|
||||
"""Instantiate a message from a variety of message formats.
|
||||
|
||||
The message format can be one of the following:
|
||||
@ -274,7 +281,10 @@ def _convert_to_message(message: MessageLikeRepresentation) -> BaseMessage:
|
||||
ValueError: if the message dict does not contain the required keys.
|
||||
"""
|
||||
if isinstance(message, BaseMessage):
|
||||
_message = message
|
||||
if copy:
|
||||
_message = message.__class__(**message.dict())
|
||||
else:
|
||||
_message = message
|
||||
elif isinstance(message, str):
|
||||
_message = _create_message_from_message_type("human", message)
|
||||
elif isinstance(message, Sequence) and len(message) == 2:
|
||||
@ -305,6 +315,8 @@ def _convert_to_message(message: MessageLikeRepresentation) -> BaseMessage:
|
||||
|
||||
def convert_to_messages(
|
||||
messages: Union[Iterable[MessageLikeRepresentation], PromptValue],
|
||||
*,
|
||||
copy: bool = False,
|
||||
) -> List[BaseMessage]:
|
||||
"""Convert a sequence of messages to a list of messages.
|
||||
|
||||
@ -319,35 +331,87 @@ def convert_to_messages(
|
||||
|
||||
if isinstance(messages, PromptValue):
|
||||
return messages.to_messages()
|
||||
return [_convert_to_message(m) for m in messages]
|
||||
return [_convert_to_message(m, copy=copy) for m in messages]
|
||||
|
||||
|
||||
def _runnable_support(func: Callable) -> Callable:
|
||||
@overload
|
||||
def wrapped(
|
||||
messages: Literal[None] = None, **kwargs: Any
|
||||
) -> Runnable[Sequence[MessageLikeRepresentation], List[BaseMessage]]: ...
|
||||
def _runnable_support(*args: Callable, supports_single: bool = False) -> Callable:
|
||||
if supports_single:
|
||||
|
||||
@overload
|
||||
def wrapped(
|
||||
messages: Sequence[MessageLikeRepresentation], **kwargs: Any
|
||||
) -> List[BaseMessage]: ...
|
||||
def runnable_support(func: Callable) -> Callable:
|
||||
@overload
|
||||
def wrapped(
|
||||
messages: Literal[None] = None, **kwargs: Any
|
||||
) -> Runnable[
|
||||
Union[MessageLikeRepresentation, Sequence[MessageLikeRepresentation]],
|
||||
Union[BaseMessage, List[BaseMessage]],
|
||||
]: ...
|
||||
|
||||
def wrapped(
|
||||
messages: Optional[Sequence[MessageLikeRepresentation]] = None, **kwargs: Any
|
||||
) -> Union[
|
||||
List[BaseMessage],
|
||||
Runnable[Sequence[MessageLikeRepresentation], List[BaseMessage]],
|
||||
]:
|
||||
from langchain_core.runnables.base import RunnableLambda
|
||||
@overload
|
||||
def wrapped(
|
||||
messages: Sequence[Union[BaseMessage, Dict, Tuple]], **kwargs: Any
|
||||
) -> List[BaseMessage]: ...
|
||||
|
||||
if messages is not None:
|
||||
return func(messages, **kwargs)
|
||||
else:
|
||||
return RunnableLambda(partial(func, **kwargs), name=func.__name__)
|
||||
@overload
|
||||
def wrapped(
|
||||
messages: MessageLikeRepresentation, **kwargs: Any
|
||||
) -> BaseMessage: ...
|
||||
|
||||
wrapped.__doc__ = func.__doc__
|
||||
return wrapped
|
||||
def wrapped(
|
||||
messages: Union[
|
||||
MessageLikeRepresentation, Sequence[MessageLikeRepresentation], None
|
||||
] = None,
|
||||
**kwargs: Any,
|
||||
) -> Union[
|
||||
BaseMessage,
|
||||
List[BaseMessage],
|
||||
Runnable[
|
||||
Union[
|
||||
MessageLikeRepresentation, Sequence[MessageLikeRepresentation]
|
||||
],
|
||||
Union[BaseMessage, List[BaseMessage]],
|
||||
],
|
||||
]:
|
||||
from langchain_core.runnables.base import RunnableLambda
|
||||
|
||||
if messages is not None:
|
||||
return func(messages, **kwargs)
|
||||
else:
|
||||
return RunnableLambda(partial(func, **kwargs), name=func.__name__)
|
||||
|
||||
wrapped.__doc__ = func.__doc__
|
||||
return wrapped
|
||||
|
||||
else:
|
||||
|
||||
def runnable_support(func: Callable) -> Callable:
|
||||
@overload
|
||||
def wrapped(
|
||||
messages: Literal[None] = None, **kwargs: Any
|
||||
) -> Runnable[Sequence[MessageLikeRepresentation], List[BaseMessage]]: ...
|
||||
|
||||
@overload
|
||||
def wrapped(
|
||||
messages: Sequence[MessageLikeRepresentation], **kwargs: Any
|
||||
) -> List[BaseMessage]: ...
|
||||
|
||||
def wrapped(
|
||||
messages: Union[Sequence[MessageLikeRepresentation], None] = None,
|
||||
**kwargs: Any,
|
||||
) -> Union[
|
||||
Runnable[Sequence[MessageLikeRepresentation], List[BaseMessage]],
|
||||
List[BaseMessage],
|
||||
]:
|
||||
from langchain_core.runnables.base import RunnableLambda
|
||||
|
||||
if messages is not None:
|
||||
return func(messages, **kwargs)
|
||||
else:
|
||||
return RunnableLambda(partial(func, **kwargs), name=func.__name__)
|
||||
|
||||
wrapped.__doc__ = func.__doc__
|
||||
return wrapped
|
||||
|
||||
return runnable_support(*args) if args else cast(Callable, runnable_support)
|
||||
|
||||
|
||||
@_runnable_support
|
||||
@ -845,6 +909,571 @@ def trim_messages(
|
||||
)
|
||||
|
||||
|
||||
@_runnable_support(supports_single=True)
|
||||
def format_content_as(
|
||||
messages: Union[MessageLikeRepresentation, Iterable[MessageLikeRepresentation]],
|
||||
*,
|
||||
format: Literal["openai", "anthropic"],
|
||||
text: Literal["string", "block"],
|
||||
) -> Union[BaseMessage, List[BaseMessage]]:
|
||||
"""Convert message contents into a standard format.
|
||||
|
||||
.. versionadded:: 0.2.36
|
||||
|
||||
Args:
|
||||
messages: Message-like object or iterable of objects whose contents are already
|
||||
in OpenAI, Anthropic, Bedrock Converse, or VertexAI formats.
|
||||
format: Format to convert message contents to.
|
||||
text: How to format text contents. If ``text='string'`` then any string
|
||||
contents are left as strings. If a message has content blocks that are all
|
||||
of type 'text', these are joined with a newline to make a single string. If
|
||||
a message has content blocks and at least one isn't of type 'text', then
|
||||
all blocks are left as dicts. If ``text='block'`` then all contents are
|
||||
turned into a list of dicts.
|
||||
|
||||
Returns:
|
||||
A single BaseMessage is a single message-like object was passed in, else list
|
||||
of BaseMessages.
|
||||
|
||||
.. dropdown:: Basic usage
|
||||
:open:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_core.messages import format_content_as
|
||||
|
||||
messages = [
|
||||
SystemMessage,
|
||||
{},
|
||||
(),
|
||||
AIMessage(),
|
||||
ToolMessage(),
|
||||
]
|
||||
oai_strings = format_content_as(messages, format="openai", text="string")
|
||||
anthropic_blocks = format_content_as(messages, format="anthropic", text="block")
|
||||
|
||||
.. dropdown:: Chain usage
|
||||
:open:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_core.messages import format_content_as
|
||||
from langchain.chat_models import init_chat_model
|
||||
|
||||
formatter = format_content_as(format="openai", text="block")
|
||||
llm = init_chat_model() | formatter
|
||||
|
||||
llm.invoke(
|
||||
[{"role": "user", "content": "how are you"}],
|
||||
config={"model": "gpt-4o"},
|
||||
)
|
||||
# -> AIMessage([{"type": "text", "text": ""}], ...)
|
||||
|
||||
llm.invoke(
|
||||
[{"role": "user", "content": "whats your name"}],
|
||||
config={"model": "claude-3-5-sonnet-20240620"})
|
||||
# -> AIMessage([{"type": "text", "text": ""}], ...)
|
||||
|
||||
.. note:: Doesn't support streaming
|
||||
|
||||
This util does not support formatting streamed chunks on the fly (i.e.
|
||||
"transforming" chunks). This means if you pipe the outputs of a model to this
|
||||
formatter in a chain, the chain will not have token-level streaming when
|
||||
using ``chain.stream()/.astream()``. You'll still see the
|
||||
token stream when using ``chat.astream_events()`` but the message chunks will
|
||||
not yet be formatted.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_core.messages import format_content_as
|
||||
from langchain.chat_models import init_chat_model
|
||||
|
||||
formatter = format_content_as(format="openai", text="block")
|
||||
llm = init_chat_model() | formatter
|
||||
|
||||
# Will contain a single, completed chunk.
|
||||
list(llm.stream(
|
||||
[{"role": "user", "content": "how are you"}],
|
||||
config={"model": "gpt-4o"},
|
||||
))
|
||||
|
||||
# Will include token-level events, but the streamed chunks will not yet be
|
||||
# formatted.
|
||||
async for chunk in llm.astream_events(
|
||||
[{"role": "user", "content": "how are you"}],
|
||||
config={"model": "gpt-4o"},
|
||||
version="v2",
|
||||
):
|
||||
...
|
||||
|
||||
|
||||
""" # noqa: E501
|
||||
if is_single := isinstance(messages, (BaseMessage, dict)):
|
||||
messages = [messages]
|
||||
messages = convert_to_messages(messages, copy=True)
|
||||
if format.lower() == "openai":
|
||||
formatted = _format_contents_as_openai(messages, text=text)
|
||||
elif format.lower() == "anthropic":
|
||||
formatted = _format_contents_as_anthropic(messages, text=text)
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Unrecognized {format=}. Expected one of ('openai', 'anthropic')."
|
||||
)
|
||||
if is_single:
|
||||
return formatted[0]
|
||||
else:
|
||||
return formatted
|
||||
|
||||
|
||||
def _format_contents_as_openai(
|
||||
messages: Sequence[BaseMessage], *, text: Literal["string", "block"]
|
||||
) -> List[BaseMessage]:
|
||||
"""Mutates messages so their contents match OpenAI messages API."""
|
||||
updated_messages: list = []
|
||||
for i, message in enumerate(messages):
|
||||
tool_messages: list = []
|
||||
if not message.content:
|
||||
message.content = "" if text == "string" else []
|
||||
elif isinstance(message.content, str):
|
||||
if text == "string":
|
||||
pass
|
||||
else:
|
||||
message.content = [{"type": "text", "text": message.content}]
|
||||
else:
|
||||
if text == "string" and all(
|
||||
isinstance(block, str) or block.get("type") == "text"
|
||||
for block in message.content
|
||||
):
|
||||
message.content = "\n".join(
|
||||
block if isinstance(block, str) else block["text"]
|
||||
for block in message.content
|
||||
)
|
||||
else:
|
||||
content: List[dict] = []
|
||||
for j, block in enumerate(message.content):
|
||||
# OpenAI format
|
||||
if isinstance(block, str):
|
||||
content.append({"type": "text", "text": block})
|
||||
elif block.get("type") == "text":
|
||||
if missing := [k for k in ("text",) if k not in block]:
|
||||
raise ValueError(
|
||||
f"Unrecognized content block at "
|
||||
f"messages[{i}].content[{j}] has 'type': 'text' "
|
||||
f"but is missing expected key(s) "
|
||||
f"{missing}. Full content block:\n\n{block}"
|
||||
)
|
||||
content.append({"type": block["type"], "text": block["text"]})
|
||||
elif block.get("type") == "image_url":
|
||||
if missing := [k for k in ("image_url",) if k not in block]:
|
||||
raise ValueError(
|
||||
f"Unrecognized content block at "
|
||||
f"messages[{i}].content[{j}] has 'type': 'image_url' "
|
||||
f"but is missing expected key(s) "
|
||||
f"{missing}. Full content block:\n\n{block}"
|
||||
)
|
||||
content.append(
|
||||
{"type": "image_url", "image_url": block["image_url"]}
|
||||
)
|
||||
# Anthropic and Bedrock converse format
|
||||
elif (block.get("type") == "image") or "image" in block:
|
||||
# Anthropic
|
||||
if source := block.get("source"):
|
||||
if missing := [
|
||||
k
|
||||
for k in ("media_type", "type", "data")
|
||||
if k not in source
|
||||
]:
|
||||
raise ValueError(
|
||||
f"Unrecognized content block at "
|
||||
f"messages[{i}].content[{j}] has 'type': 'image' "
|
||||
f"but 'source' is missing expected key(s) "
|
||||
f"{missing}. Full content block:\n\n{block}"
|
||||
)
|
||||
content.append(
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": (
|
||||
f"data:{source['media_type']};"
|
||||
f"{source['type']},{source['data']}"
|
||||
)
|
||||
},
|
||||
}
|
||||
)
|
||||
# Bedrock converse
|
||||
elif image := block.get("image"):
|
||||
raise ValueError("1064")
|
||||
if missing := [
|
||||
k for k in ("source", "format") if k not in image
|
||||
]:
|
||||
raise ValueError(
|
||||
f"Unrecognized content block at "
|
||||
f"messages[{i}].content[{j}] has key 'image', "
|
||||
f"but 'image' is missing expected key(s) "
|
||||
f"{missing}. Full content block:\n\n{block}"
|
||||
)
|
||||
b64_image = _bytes_to_b64_str(image["source"]["bytes"])
|
||||
content.append(
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": (
|
||||
f"data:image/{image['format']};"
|
||||
f"base64,{b64_image}"
|
||||
)
|
||||
},
|
||||
}
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Unrecognized content block at "
|
||||
f"messages[{i}].content[{j}] has 'type': 'image' "
|
||||
f"but does not have a 'source' or 'image' key. Full "
|
||||
f"content block:\n\n{block}"
|
||||
)
|
||||
elif block.get("type") == "tool_use":
|
||||
if missing := [
|
||||
k for k in ("id", "name", "input") if k not in block
|
||||
]:
|
||||
raise ValueError(
|
||||
f"Unrecognized content block at "
|
||||
f"messages[{i}].content[{j}] has 'type': 'tool_use', "
|
||||
f"but is missing expected key(s) "
|
||||
f"{missing}. Full content block:\n\n{block}"
|
||||
)
|
||||
if not any(
|
||||
tool_call["id"] == block["id"]
|
||||
for tool_call in cast(AIMessage, message).tool_calls
|
||||
):
|
||||
cast(AIMessage, message).tool_calls.append(
|
||||
create_tool_call(
|
||||
name=block["name"],
|
||||
id=block["id"],
|
||||
args=block["input"],
|
||||
)
|
||||
)
|
||||
elif block.get("type") == "tool_result":
|
||||
if missing := [
|
||||
k for k in ("content", "tool_use_id") if k not in block
|
||||
]:
|
||||
raise ValueError(
|
||||
f"Unrecognized content block at "
|
||||
f"messages[{i}].content[{j}] has 'type': "
|
||||
f"'tool_result', but is missing expected key(s) "
|
||||
f"{missing}. Full content block:\n\n{block}"
|
||||
)
|
||||
tool_message = ToolMessage(
|
||||
block["content"],
|
||||
tool_call_id=block["tool_use_id"],
|
||||
status="error" if block.get("is_error") else "success",
|
||||
)
|
||||
# Recurse to make sure tool message contents are OpenAI format.
|
||||
tool_messages.extend(
|
||||
_format_contents_as_openai([tool_message], text=text)
|
||||
)
|
||||
elif (block.get("type") == "json") or "json" in block:
|
||||
if "json" not in block:
|
||||
raise ValueError(
|
||||
f"Unrecognized content block at "
|
||||
f"messages[{i}].content[{j}] has 'type': 'json' "
|
||||
f"but does not have a 'json' key. Full "
|
||||
f"content block:\n\n{block}"
|
||||
)
|
||||
content.append(
|
||||
{"type": "text", "text": json.dumps(block["json"])}
|
||||
)
|
||||
elif (
|
||||
block.get("type") == "guard_content"
|
||||
) or "guard_content" in block:
|
||||
if (
|
||||
"guard_content" not in block
|
||||
or "text" not in block["guard_content"]
|
||||
):
|
||||
raise ValueError(
|
||||
f"Unrecognized content block at "
|
||||
f"messages[{i}].content[{j}] has 'type': "
|
||||
f"'guard_content' but does not have a "
|
||||
f"messages[{i}].content[{j}]['guard_content']['text'] "
|
||||
f"key. Full content block:\n\n{block}"
|
||||
)
|
||||
text = block["guard_content"]["text"]
|
||||
if isinstance(text, dict):
|
||||
text = text["text"]
|
||||
content.append({"type": "text", "text": text})
|
||||
# VertexAI format
|
||||
elif block.get("type") == "media":
|
||||
if missing := [
|
||||
k for k in ("mime_type", "data") if k not in block
|
||||
]:
|
||||
raise ValueError(
|
||||
f"Unrecognized content block at "
|
||||
f"messages[{i}].content[{j}] has 'type': "
|
||||
f"'media' but does not have key(s) {missing}. Full "
|
||||
f"content block:\n\n{block}"
|
||||
)
|
||||
if "image" not in block["mime_type"]:
|
||||
raise ValueError(
|
||||
f"OpenAI messages can only support text and image data."
|
||||
f" Received content block with media of type:"
|
||||
f" {block['mime_type']}"
|
||||
)
|
||||
b64_image = _bytes_to_b64_str(block["data"])
|
||||
content.append(
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": (
|
||||
f"data:{block['mime_type']};base64,{b64_image}"
|
||||
)
|
||||
},
|
||||
}
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Unrecognized content block at "
|
||||
f"messages[{i}].content[{j}] does not match OpenAI, "
|
||||
f"Anthropic, Bedrock Converse, or VertexAI format. Full "
|
||||
f"content block:\n\n{block}"
|
||||
)
|
||||
message.content = content # type: ignore[assignment]
|
||||
updated_messages.extend([message, *tool_messages])
|
||||
return updated_messages
|
||||
|
||||
|
||||
_OPTIONAL_ANTHROPIC_KEYS = ("cache_control", "is_error")
|
||||
|
||||
|
||||
def _format_contents_as_anthropic(
|
||||
messages: Sequence[BaseMessage], *, text: Literal["string", "block"]
|
||||
) -> List[BaseMessage]:
|
||||
"""Mutates messages so their contents match Anthropic messages API."""
|
||||
updated_messages: List = []
|
||||
for i, message in enumerate(messages):
|
||||
if isinstance(message, ToolMessage):
|
||||
tool_result_block = {
|
||||
"type": "tool_result",
|
||||
"content": message.content,
|
||||
"tool_use_id": message.tool_call_id,
|
||||
"is_error": message.status == "error",
|
||||
}
|
||||
if updated_messages and isinstance(updated_messages[-1], HumanMessage):
|
||||
if isinstance(updated_messages[-1].content, str):
|
||||
updated_messages[-1].content = [
|
||||
{"type": "text", "text": updated_messages[-1].content}
|
||||
]
|
||||
updated_messages[-1].content.append(tool_result_block)
|
||||
else:
|
||||
updated_messages.append(HumanMessage([tool_result_block]))
|
||||
continue
|
||||
elif not message.content:
|
||||
message.content = "" if text == "string" else []
|
||||
elif isinstance(message.content, str):
|
||||
if text == "string":
|
||||
pass
|
||||
else:
|
||||
message.content = [{"type": "text", "text": message.content}]
|
||||
else:
|
||||
if text == "string" and all(
|
||||
isinstance(block, str)
|
||||
or (block.get("type") == "text" and "cache_control" not in block)
|
||||
for block in message.content
|
||||
):
|
||||
message.content = "\n".join(
|
||||
block if isinstance(block, str) else block["text"]
|
||||
for block in message.content
|
||||
)
|
||||
else:
|
||||
content = []
|
||||
for j, block in enumerate(message.content):
|
||||
# OpenAI format
|
||||
if isinstance(block, str):
|
||||
content.append({"type": "text", "text": block})
|
||||
elif block.get("type") == "text":
|
||||
block_extra = {
|
||||
k: block[k] for k in _OPTIONAL_ANTHROPIC_KEYS if k in block
|
||||
}
|
||||
if missing := [k for k in ("text",) if k not in block]:
|
||||
raise ValueError(
|
||||
f"Unrecognized content block at "
|
||||
f"messages[{i}].content[{j}] has 'type': 'text' "
|
||||
f"but is missing expected key(s) "
|
||||
f"{missing}. Full content block:\n\n{block}"
|
||||
)
|
||||
content.append(
|
||||
{"type": "text", "text": block["text"], **block_extra}
|
||||
)
|
||||
elif block.get("type") == "image_url":
|
||||
if missing := [k for k in ("image_url",) if k not in block]:
|
||||
raise ValueError(
|
||||
f"Unrecognized content block at "
|
||||
f"messages[{i}].content[{j}] has 'type': 'image_url' "
|
||||
f"but is missing expected key(s) "
|
||||
f"{missing}. Full content block:\n\n{block}"
|
||||
)
|
||||
content.append(
|
||||
{**_openai_image_to_anthropic(block), **block_extra}
|
||||
)
|
||||
# Anthropic and Bedrock converse format
|
||||
elif (block.get("type") == "image") or "image" in block:
|
||||
# Anthropic
|
||||
if source := block.get("source"):
|
||||
if missing := [
|
||||
k
|
||||
for k in ("media_type", "type", "data")
|
||||
if k not in source
|
||||
]:
|
||||
raise ValueError(
|
||||
f"Unrecognized content block at "
|
||||
f"messages[{i}].content[{j}] has 'type': 'image' "
|
||||
f"but 'source' is missing expected key(s) "
|
||||
f"{missing}. Full content block:\n\n{block}"
|
||||
)
|
||||
content.append(
|
||||
{
|
||||
"type": "image",
|
||||
"source": block["source"],
|
||||
**block_extra,
|
||||
}
|
||||
)
|
||||
# Bedrock converse
|
||||
elif image := block.get("image"):
|
||||
if missing := [
|
||||
k for k in ("source", "format") if k not in image
|
||||
]:
|
||||
raise ValueError(
|
||||
f"Unrecognized content block at "
|
||||
f"messages[{i}].content[{j}] has key 'image', "
|
||||
f"but 'image' is missing expected key(s) "
|
||||
f"{missing}. Full content block:\n\n{block}"
|
||||
)
|
||||
content.append(
|
||||
{
|
||||
**_bedrock_converse_image_to_anthropic(
|
||||
block["image"]
|
||||
),
|
||||
**block_extra,
|
||||
}
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Unrecognized content block at "
|
||||
f"messages[{i}].content[{j}] has 'type': 'image' "
|
||||
f"but does not have a 'source' or 'image' key. Full "
|
||||
f"content block:\n\n{block}"
|
||||
)
|
||||
elif block.get("type") == "tool_use":
|
||||
if missing := [
|
||||
k for k in ("id", "name", "input") if k not in block
|
||||
]:
|
||||
raise ValueError(
|
||||
f"Unrecognized content block at "
|
||||
f"messages[{i}].content[{j}] has 'type': 'tool_use', "
|
||||
f"but is missing expected key(s) "
|
||||
f"{missing}. Full content block:\n\n{block}"
|
||||
)
|
||||
content.append(
|
||||
{
|
||||
"type": "tool_use",
|
||||
"name": block["name"],
|
||||
"id": block["id"],
|
||||
"input": block["input"],
|
||||
**block_extra,
|
||||
}
|
||||
)
|
||||
if not any(
|
||||
tool_call["id"] == block["id"]
|
||||
for tool_call in cast(AIMessage, message).tool_calls
|
||||
):
|
||||
cast(AIMessage, message).tool_calls.append(
|
||||
create_tool_call(
|
||||
name=block["name"],
|
||||
id=block["id"],
|
||||
args=block["input"],
|
||||
)
|
||||
)
|
||||
elif block.get("type") == "tool_result":
|
||||
if missing := [
|
||||
k for k in ("content", "tool_use_id") if k not in block
|
||||
]:
|
||||
raise ValueError(
|
||||
f"Unrecognized content block at "
|
||||
f"messages[{i}].content[{j}] has 'type': "
|
||||
f"'tool_result', but is missing expected key(s) "
|
||||
f"{missing}. Full content block:\n\n{block}"
|
||||
)
|
||||
content.append(
|
||||
{
|
||||
"type": "tool_result",
|
||||
"content": block["content"],
|
||||
"tool_use_id": block["tool_use_id"],
|
||||
**block_extra,
|
||||
}
|
||||
)
|
||||
elif (block.get("type") == "json") or "json" in block:
|
||||
if "json" not in block:
|
||||
raise ValueError(
|
||||
f"Unrecognized content block at "
|
||||
f"messages[{i}].content[{j}] has 'type': 'json' "
|
||||
f"but does not have a 'json' key. Full "
|
||||
f"content block:\n\n{block}"
|
||||
)
|
||||
content.append(
|
||||
{
|
||||
"type": "text",
|
||||
"text": json.dumps(block["json"]),
|
||||
**block_extra,
|
||||
}
|
||||
)
|
||||
elif (
|
||||
block.get("type") == "guard_content"
|
||||
) or "guard_content" in block:
|
||||
if (
|
||||
"guard_content" not in block
|
||||
or "text" not in block["guard_content"]
|
||||
):
|
||||
raise ValueError(
|
||||
f"Unrecognized content block at "
|
||||
f"messages[{i}].content[{j}] has 'type': "
|
||||
f"'guard_content' but does not have a "
|
||||
f"messages[{i}].content[{j}]['guard_content']['text'] "
|
||||
f"key. Full content block:\n\n{block}"
|
||||
)
|
||||
text = block["guard_content"]["text"]
|
||||
if isinstance(text, dict):
|
||||
text = text["text"]
|
||||
content.append({"type": "text", "text": text, **block_extra})
|
||||
# VertexAI format
|
||||
elif block.get("type") == "media":
|
||||
if missing := [
|
||||
k for k in ("mime_type", "data") if k not in block
|
||||
]:
|
||||
raise ValueError(
|
||||
f"Unrecognized content block at "
|
||||
f"messages[{i}].content[{j}] has 'type': "
|
||||
f"'media' but does not have key(s) {missing}. Full "
|
||||
f"content block:\n\n{block}"
|
||||
)
|
||||
if "image" not in block["mime_type"]:
|
||||
raise ValueError(
|
||||
f"Anthropic messages can only support text and image "
|
||||
f"data. Received content block with media of type: "
|
||||
f"{block['mime_type']}"
|
||||
)
|
||||
content.append(
|
||||
{**_vertexai_image_to_anthropic(block), **block_extra}
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Unrecognized content block at "
|
||||
f"messages[{i}].content[{j}] does not match OpenAI, "
|
||||
f"Anthropic, Bedrock Converse, or VertexAI format. Full "
|
||||
f"content block:\n\n{block}"
|
||||
)
|
||||
message.content = content # type: ignore[assignment]
|
||||
updated_messages.append(message)
|
||||
return merge_message_runs(updated_messages)
|
||||
|
||||
|
||||
def _first_max_tokens(
|
||||
messages: Sequence[BaseMessage],
|
||||
*,
|
||||
@ -1012,3 +1641,59 @@ def _is_message_type(
|
||||
types_types = tuple(t for t in types if isinstance(t, type))
|
||||
|
||||
return message.type in types_str or isinstance(message, types_types)
|
||||
|
||||
|
||||
def _bytes_to_b64_str(bytes_: bytes) -> str:
|
||||
return base64.b64encode(bytes_).decode("utf-8")
|
||||
|
||||
|
||||
def _openai_image_to_anthropic(image: dict) -> Dict:
|
||||
"""
|
||||
Formats an image of format data:image/jpeg;base64,{b64_string}
|
||||
to a dict for anthropic api
|
||||
|
||||
{
|
||||
"type": "base64",
|
||||
"media_type": "image/jpeg",
|
||||
"data": "/9j/4AAQSkZJRg...",
|
||||
}
|
||||
|
||||
And throws an error if it's not a b64 image
|
||||
"""
|
||||
regex = r"^data:(?P<media_type>image/.+);base64,(?P<data>.+)$"
|
||||
match = re.match(regex, image["image_url"])
|
||||
if match is None:
|
||||
raise ValueError(
|
||||
"Anthropic only supports base64-encoded images currently."
|
||||
" Example: data:image/png;base64,'/9j/4AAQSk'..."
|
||||
)
|
||||
return {
|
||||
"type": "image",
|
||||
"source": {
|
||||
"type": "base64",
|
||||
"media_type": match.group("media_type"),
|
||||
"data": match.group("data"),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _bedrock_converse_image_to_anthropic(image: dict) -> dict:
|
||||
return {
|
||||
"type": "image",
|
||||
"source": {
|
||||
"media_type": f"image/{image['format']}",
|
||||
"type": "base64",
|
||||
"data": _bytes_to_b64_str(image["source"]["bytes"]),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _vertexai_image_to_anthropic(image: dict) -> dict:
|
||||
return {
|
||||
"type": "image",
|
||||
"source": {
|
||||
"media_type": image["mime_type"],
|
||||
"type": "base64",
|
||||
"data": _bytes_to_b64_str(image["data"]),
|
||||
},
|
||||
}
|
||||
|
@ -13,8 +13,10 @@ from langchain_core.messages import (
|
||||
ToolMessage,
|
||||
)
|
||||
from langchain_core.messages.utils import (
|
||||
_bytes_to_b64_str,
|
||||
convert_to_messages,
|
||||
filter_messages,
|
||||
format_content_as,
|
||||
merge_message_runs,
|
||||
trim_messages,
|
||||
)
|
||||
@ -556,3 +558,222 @@ def test_convert_to_messages() -> None:
|
||||
@pytest.mark.xfail(reason="AI message does not support refusal key yet.")
|
||||
def test_convert_to_messages_openai_refusal() -> None:
|
||||
convert_to_messages([{"role": "assistant", "refusal": "9.1"}])
|
||||
|
||||
|
||||
def create_base64_image(format: str = "jpeg") -> str:
|
||||
return f"data:image/{format};base64,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAABAAEDASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwD3+iiigD//2Q==" # noqa: E501
|
||||
|
||||
|
||||
def test_format_content_as_single_message() -> None:
|
||||
message = HumanMessage(content="Hello")
|
||||
result = format_content_as(message, format="openai", text="string")
|
||||
assert isinstance(result, BaseMessage)
|
||||
assert result.content == "Hello"
|
||||
|
||||
|
||||
def test_format_content_as_multiple_messages() -> None:
|
||||
messages = [
|
||||
SystemMessage(content="System message"),
|
||||
HumanMessage(content="Human message"),
|
||||
AIMessage(content="AI message"),
|
||||
]
|
||||
result = format_content_as(messages, format="openai", text="string")
|
||||
assert isinstance(result, list)
|
||||
assert len(result) == 3
|
||||
assert all(isinstance(msg, BaseMessage) for msg in result)
|
||||
assert [msg.content for msg in result] == [
|
||||
"System message",
|
||||
"Human message",
|
||||
"AI message",
|
||||
]
|
||||
|
||||
|
||||
def test_format_content_as_openai_string() -> None:
|
||||
messages = [
|
||||
HumanMessage(
|
||||
content=[
|
||||
{"type": "text", "text": "Hello"},
|
||||
{"type": "text", "text": "World"},
|
||||
]
|
||||
),
|
||||
AIMessage(
|
||||
content=[{"type": "text", "text": "Hi"}, {"type": "text", "text": "there"}]
|
||||
),
|
||||
]
|
||||
result = format_content_as(messages, format="openai", text="string")
|
||||
assert [msg.content for msg in result] == ["Hello\nWorld", "Hi\nthere"]
|
||||
|
||||
|
||||
def test_format_content_as_openai_block() -> None:
|
||||
messages = [
|
||||
HumanMessage(content="Hello"),
|
||||
AIMessage(content="Hi there"),
|
||||
]
|
||||
result = format_content_as(messages, format="openai", text="block")
|
||||
assert [msg.content for msg in result] == [
|
||||
[{"type": "text", "text": "Hello"}],
|
||||
[{"type": "text", "text": "Hi there"}],
|
||||
]
|
||||
|
||||
|
||||
def test_format_content_as_anthropic_string() -> None:
|
||||
messages = [
|
||||
HumanMessage(
|
||||
content=[
|
||||
{"type": "text", "text": "Hello"},
|
||||
{"type": "text", "text": "World"},
|
||||
]
|
||||
),
|
||||
AIMessage(
|
||||
content=[{"type": "text", "text": "Hi"}, {"type": "text", "text": "there"}]
|
||||
),
|
||||
]
|
||||
result = format_content_as(messages, format="anthropic", text="string")
|
||||
assert [msg.content for msg in result] == ["Hello\nWorld", "Hi\nthere"]
|
||||
|
||||
|
||||
def test_format_content_as_anthropic_block() -> None:
|
||||
messages = [
|
||||
HumanMessage(content="Hello"),
|
||||
AIMessage(content="Hi there"),
|
||||
]
|
||||
result = format_content_as(messages, format="anthropic", text="block")
|
||||
assert [msg.content for msg in result] == [
|
||||
[{"type": "text", "text": "Hello"}],
|
||||
[{"type": "text", "text": "Hi there"}],
|
||||
]
|
||||
|
||||
|
||||
def test_format_content_as_invalid_format() -> None:
|
||||
with pytest.raises(ValueError, match="Unrecognized format="):
|
||||
format_content_as(
|
||||
[HumanMessage(content="Hello")], format="invalid", text="string"
|
||||
)
|
||||
|
||||
|
||||
def test_format_content_as_openai_image() -> None:
|
||||
base64_image = create_base64_image()
|
||||
messages = [
|
||||
HumanMessage(
|
||||
content=[
|
||||
{"type": "text", "text": "Here's an image:"},
|
||||
{"type": "image_url", "image_url": {"url": base64_image}},
|
||||
]
|
||||
)
|
||||
]
|
||||
result = format_content_as(messages, format="openai", text="block")
|
||||
assert result[0].content[1]["type"] == "image_url"
|
||||
assert result[0].content[1]["image_url"]["url"] == base64_image
|
||||
|
||||
|
||||
def test_format_content_as_anthropic_image() -> None:
|
||||
base64_image = create_base64_image()
|
||||
messages = [
|
||||
HumanMessage(
|
||||
content=[
|
||||
{"type": "text", "text": "Here's an image:"},
|
||||
{"type": "image_url", "image_url": base64_image},
|
||||
]
|
||||
)
|
||||
]
|
||||
result = format_content_as(messages, format="anthropic", text="block")
|
||||
assert result[0].content[1]["type"] == "image"
|
||||
assert result[0].content[1]["source"]["type"] == "base64"
|
||||
assert result[0].content[1]["source"]["media_type"] == "image/jpeg"
|
||||
|
||||
|
||||
def test_format_content_as_tool_message() -> None:
|
||||
tool_message = ToolMessage(content="Tool result", tool_call_id="123")
|
||||
result = format_content_as([tool_message], format="openai", text="block")
|
||||
assert isinstance(result[0], ToolMessage)
|
||||
assert result[0].content == [{"type": "text", "text": "Tool result"}]
|
||||
assert result[0].tool_call_id == "123"
|
||||
|
||||
|
||||
def test_format_content_as_tool_use() -> None:
|
||||
messages = [
|
||||
AIMessage(
|
||||
content=[
|
||||
{"type": "tool_use", "id": "123", "name": "calculator", "input": "2+2"}
|
||||
]
|
||||
)
|
||||
]
|
||||
result = format_content_as(messages, format="openai", text="block")
|
||||
assert result[0].tool_calls[0]["id"] == "123"
|
||||
assert result[0].tool_calls[0]["name"] == "calculator"
|
||||
assert result[0].tool_calls[0]["args"] == "2+2"
|
||||
|
||||
|
||||
def test_format_content_as_json() -> None:
|
||||
json_data = {"key": "value"}
|
||||
messages = [HumanMessage(content=[{"type": "json", "json": json_data}])]
|
||||
result = format_content_as(messages, format="openai", text="block")
|
||||
assert result[0].content[0]["type"] == "text"
|
||||
assert json.loads(result[0].content[0]["text"]) == json_data
|
||||
|
||||
|
||||
def test_format_content_as_guard_content() -> None:
|
||||
messages = [
|
||||
HumanMessage(
|
||||
content=[
|
||||
{
|
||||
"type": "guard_content",
|
||||
"guard_content": {"text": "Protected content"},
|
||||
}
|
||||
]
|
||||
)
|
||||
]
|
||||
result = format_content_as(messages, format="openai", text="block")
|
||||
assert result[0].content[0]["type"] == "text"
|
||||
assert result[0].content[0]["text"] == "Protected content"
|
||||
|
||||
|
||||
def test_format_content_as_vertexai_image() -> None:
|
||||
messages = [
|
||||
HumanMessage(
|
||||
content=[
|
||||
{"type": "media", "mime_type": "image/jpeg", "data": b"image_bytes"}
|
||||
]
|
||||
)
|
||||
]
|
||||
result = format_content_as(messages, format="openai", text="block")
|
||||
assert result[0].content[0]["type"] == "image_url"
|
||||
assert (
|
||||
result[0].content[0]["image_url"]["url"]
|
||||
== f"data:image/jpeg;base64,{_bytes_to_b64_str(b'image_bytes')}"
|
||||
)
|
||||
|
||||
|
||||
def test_format_content_as_invalid_block() -> None:
|
||||
messages = [HumanMessage(content=[{"type": "invalid", "foo": "bar"}])]
|
||||
with pytest.raises(ValueError, match="Unrecognized content block"):
|
||||
format_content_as(messages, format="openai", text="block")
|
||||
with pytest.raises(ValueError, match="Unrecognized content block"):
|
||||
format_content_as(messages, format="anthropic", text="block")
|
||||
|
||||
|
||||
def test_format_content_as_empty_message() -> None:
|
||||
result = format_content_as(HumanMessage(content=""), format="openai", text="string")
|
||||
assert result.content == ""
|
||||
|
||||
|
||||
def test_format_content_as_empty_list() -> None:
|
||||
result = format_content_as([], format="openai", text="string")
|
||||
assert result == []
|
||||
|
||||
|
||||
def test_format_content_as_mixed_content_types() -> None:
|
||||
messages = [
|
||||
HumanMessage(
|
||||
content=[
|
||||
"Text message",
|
||||
{"type": "text", "text": "Structured text"},
|
||||
{"type": "image_url", "image_url": create_base64_image()},
|
||||
]
|
||||
)
|
||||
]
|
||||
result = format_content_as(messages, format="openai", text="block")
|
||||
assert len(result[0].content) == 3
|
||||
assert isinstance(result[0].content[0], dict)
|
||||
assert isinstance(result[0].content[1], dict)
|
||||
assert isinstance(result[0].content[2], dict)
|
||||
|
Loading…
Reference in New Issue
Block a user