mirror of
https://github.com/hwchase17/langchain.git
synced 2025-04-27 19:46:55 +00:00
partners: ChatAnthropic supports urls (#30809)
**Description:** partners-anthropic: ChatAnthropic supports b64 and urls in the part[image_url][url] message variable **Issue**: ChatAnthropic right now only supports b64 encoded images in the part[image_url][url] message variable. This PR enables ChatAnthropic to also accept image urls in said variable and makes it compatible with OpenAI messages to make model switching easier. --------- Co-authored-by: Chester Curme <chester.curme@gmail.com>
This commit is contained in:
parent
d0cd115356
commit
017c8079e1
@ -103,31 +103,47 @@ def _is_builtin_tool(tool: Any) -> bool:
|
||||
return any(tool_type.startswith(prefix) for prefix in _builtin_tool_prefixes)
|
||||
|
||||
|
||||
def _format_image(image_url: str) -> dict:
|
||||
def _format_image(url: str) -> dict:
|
||||
"""
|
||||
Formats an image of format data:image/jpeg;base64,{b64_string}
|
||||
to a dict for anthropic api
|
||||
|
||||
Converts part["image_url"]["url"] strings (OpenAI format)
|
||||
to the correct Anthropic format:
|
||||
{
|
||||
"type": "base64",
|
||||
"media_type": "image/jpeg",
|
||||
"data": "/9j/4AAQSkZJRg...",
|
||||
}
|
||||
|
||||
And throws an error if it's not a b64 image
|
||||
"""
|
||||
regex = r"^data:(?P<media_type>image/.+);base64,(?P<data>.+)$"
|
||||
match = re.match(regex, image_url)
|
||||
if match is None:
|
||||
raise ValueError(
|
||||
"Anthropic only supports base64-encoded images currently."
|
||||
" Example: data:image/png;base64,'/9j/4AAQSk'..."
|
||||
)
|
||||
return {
|
||||
"type": "base64",
|
||||
"media_type": match.group("media_type"),
|
||||
"data": match.group("data"),
|
||||
Or
|
||||
{
|
||||
"type": "url",
|
||||
"url": "https://example.com/image.jpg",
|
||||
}
|
||||
"""
|
||||
# Base64 encoded image
|
||||
base64_regex = r"^data:(?P<media_type>image/.+);base64,(?P<data>.+)$"
|
||||
base64_match = re.match(base64_regex, url)
|
||||
|
||||
if base64_match:
|
||||
return {
|
||||
"type": "base64",
|
||||
"media_type": base64_match.group("media_type"),
|
||||
"data": base64_match.group("data"),
|
||||
}
|
||||
|
||||
# Url
|
||||
url_regex = r"^https?://.*$"
|
||||
url_match = re.match(url_regex, url)
|
||||
|
||||
if url_match:
|
||||
return {
|
||||
"type": "url",
|
||||
"url": url,
|
||||
}
|
||||
|
||||
raise ValueError(
|
||||
"Malformed url parameter."
|
||||
" Must be either an image URL (https://example.com/image.jpg)"
|
||||
" or base64 encoded string (data:image/png;base64,'/9j/4AAQSk'...)"
|
||||
)
|
||||
|
||||
|
||||
def _merge_messages(
|
||||
@ -578,20 +594,37 @@ class ChatAnthropic(BaseChatModel):
|
||||
See ``ChatAnthropic.with_structured_output()`` for more.
|
||||
|
||||
Image input:
|
||||
See `multimodal guides <https://python.langchain.com/docs/how_to/multimodal_inputs/>`_
|
||||
for more detail.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import base64
|
||||
|
||||
import httpx
|
||||
from langchain_anthropic import ChatAnthropic
|
||||
from langchain_core.messages import HumanMessage
|
||||
|
||||
image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
|
||||
image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8")
|
||||
|
||||
llm = ChatAnthropic(model="claude-3-5-sonnet-latest")
|
||||
message = HumanMessage(
|
||||
content=[
|
||||
{"type": "text", "text": "describe the weather in this image"},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": f"data:image/jpeg;base64,{image_data}"},
|
||||
"type": "text",
|
||||
"text": "Can you highlight the differences between these two images?",
|
||||
},
|
||||
{
|
||||
"type": "image",
|
||||
"source_type": "base64",
|
||||
"data": image_data,
|
||||
"mime_type": "image/jpeg",
|
||||
},
|
||||
{
|
||||
"type": "image",
|
||||
"source_type": "url",
|
||||
"url": image_url,
|
||||
},
|
||||
],
|
||||
)
|
||||
@ -600,9 +633,12 @@ class ChatAnthropic(BaseChatModel):
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
"The image depicts a sunny day with a partly cloudy sky. The sky is a brilliant blue color with scattered white clouds drifting across. The lighting and cloud patterns suggest pleasant, mild weather conditions. The scene shows a grassy field or meadow with a wooden boardwalk trail leading through it, indicating an outdoor setting on a nice day well-suited for enjoying nature."
|
||||
"After examining both images carefully, I can see that they are actually identical."
|
||||
|
||||
PDF input:
|
||||
See `multimodal guides <https://python.langchain.com/docs/how_to/multimodal_inputs/>`_
|
||||
for more detail.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from base64 import b64encode
|
||||
@ -620,12 +656,10 @@ class ChatAnthropic(BaseChatModel):
|
||||
[
|
||||
"Summarize this document.",
|
||||
{
|
||||
"type": "document",
|
||||
"source": {
|
||||
"type": "base64",
|
||||
"data": data,
|
||||
"media_type": "application/pdf",
|
||||
},
|
||||
"type": "file",
|
||||
"source_type": "base64",
|
||||
"mime_type": "application/pdf",
|
||||
"data": data,
|
||||
},
|
||||
]
|
||||
)
|
||||
|
@ -13,6 +13,7 @@ from pytest import CaptureFixture, MonkeyPatch
|
||||
|
||||
from langchain_anthropic import ChatAnthropic
|
||||
from langchain_anthropic.chat_models import (
|
||||
_format_image,
|
||||
_format_messages,
|
||||
_merge_messages,
|
||||
convert_to_anthropic_tool,
|
||||
@ -296,6 +297,12 @@ def test__merge_messages_mutation() -> None:
|
||||
assert messages == original_messages
|
||||
|
||||
|
||||
def test__format_image() -> None:
|
||||
url = "dummyimage.com/600x400/000/fff"
|
||||
with pytest.raises(ValueError):
|
||||
_format_image(url)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def pydantic() -> type[BaseModel]:
|
||||
class dummy_function(BaseModel):
|
||||
@ -770,6 +777,56 @@ def test__format_messages_with_citations() -> None:
|
||||
assert actual_messages == expected_messages
|
||||
|
||||
|
||||
def test__format_messages_openai_image_format() -> None:
|
||||
message = HumanMessage(
|
||||
content=[
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Can you highlight the differences between these two images?",
|
||||
},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": "data:image/jpeg;base64,<base64 data>"},
|
||||
},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": "https://<image url>"},
|
||||
},
|
||||
],
|
||||
)
|
||||
actual_system, actual_messages = _format_messages([message])
|
||||
assert actual_system is None
|
||||
expected_messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": (
|
||||
"Can you highlight the differences between these two images?"
|
||||
),
|
||||
},
|
||||
{
|
||||
"type": "image",
|
||||
"source": {
|
||||
"type": "base64",
|
||||
"media_type": "image/jpeg",
|
||||
"data": "<base64 data>",
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "image",
|
||||
"source": {
|
||||
"type": "url",
|
||||
"url": "https://<image url>",
|
||||
},
|
||||
},
|
||||
],
|
||||
}
|
||||
]
|
||||
assert actual_messages == expected_messages
|
||||
|
||||
|
||||
def test__format_messages_with_multiple_system() -> None:
|
||||
messages = [
|
||||
HumanMessage("baz"),
|
||||
|
Loading…
Reference in New Issue
Block a user