mirror of
https://github.com/hwchase17/langchain.git
synced 2025-05-01 21:35:34 +00:00
partners: ChatAnthropic supports urls (#30809)
**Description:** partners-anthropic: ChatAnthropic supports b64 and urls in the part[image_url][url] message variable **Issue**: ChatAnthropic right now only supports b64 encoded images in the part[image_url][url] message variable. This PR enables ChatAnthropic to also accept image urls in said variable and makes it compatible with OpenAI messages to make model switching easier. --------- Co-authored-by: Chester Curme <chester.curme@gmail.com>
This commit is contained in:
parent
d0cd115356
commit
017c8079e1
libs/partners/anthropic
@ -103,31 +103,47 @@ def _is_builtin_tool(tool: Any) -> bool:
|
|||||||
return any(tool_type.startswith(prefix) for prefix in _builtin_tool_prefixes)
|
return any(tool_type.startswith(prefix) for prefix in _builtin_tool_prefixes)
|
||||||
|
|
||||||
|
|
||||||
def _format_image(image_url: str) -> dict:
|
def _format_image(url: str) -> dict:
|
||||||
"""
|
"""
|
||||||
Formats an image of format data:image/jpeg;base64,{b64_string}
|
Converts part["image_url"]["url"] strings (OpenAI format)
|
||||||
to a dict for anthropic api
|
to the correct Anthropic format:
|
||||||
|
|
||||||
{
|
{
|
||||||
"type": "base64",
|
"type": "base64",
|
||||||
"media_type": "image/jpeg",
|
"media_type": "image/jpeg",
|
||||||
"data": "/9j/4AAQSkZJRg...",
|
"data": "/9j/4AAQSkZJRg...",
|
||||||
}
|
}
|
||||||
|
Or
|
||||||
And throws an error if it's not a b64 image
|
{
|
||||||
"""
|
"type": "url",
|
||||||
regex = r"^data:(?P<media_type>image/.+);base64,(?P<data>.+)$"
|
"url": "https://example.com/image.jpg",
|
||||||
match = re.match(regex, image_url)
|
|
||||||
if match is None:
|
|
||||||
raise ValueError(
|
|
||||||
"Anthropic only supports base64-encoded images currently."
|
|
||||||
" Example: data:image/png;base64,'/9j/4AAQSk'..."
|
|
||||||
)
|
|
||||||
return {
|
|
||||||
"type": "base64",
|
|
||||||
"media_type": match.group("media_type"),
|
|
||||||
"data": match.group("data"),
|
|
||||||
}
|
}
|
||||||
|
"""
|
||||||
|
# Base64 encoded image
|
||||||
|
base64_regex = r"^data:(?P<media_type>image/.+);base64,(?P<data>.+)$"
|
||||||
|
base64_match = re.match(base64_regex, url)
|
||||||
|
|
||||||
|
if base64_match:
|
||||||
|
return {
|
||||||
|
"type": "base64",
|
||||||
|
"media_type": base64_match.group("media_type"),
|
||||||
|
"data": base64_match.group("data"),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Url
|
||||||
|
url_regex = r"^https?://.*$"
|
||||||
|
url_match = re.match(url_regex, url)
|
||||||
|
|
||||||
|
if url_match:
|
||||||
|
return {
|
||||||
|
"type": "url",
|
||||||
|
"url": url,
|
||||||
|
}
|
||||||
|
|
||||||
|
raise ValueError(
|
||||||
|
"Malformed url parameter."
|
||||||
|
" Must be either an image URL (https://example.com/image.jpg)"
|
||||||
|
" or base64 encoded string (data:image/png;base64,'/9j/4AAQSk'...)"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _merge_messages(
|
def _merge_messages(
|
||||||
@ -578,20 +594,37 @@ class ChatAnthropic(BaseChatModel):
|
|||||||
See ``ChatAnthropic.with_structured_output()`` for more.
|
See ``ChatAnthropic.with_structured_output()`` for more.
|
||||||
|
|
||||||
Image input:
|
Image input:
|
||||||
|
See `multimodal guides <https://python.langchain.com/docs/how_to/multimodal_inputs/>`_
|
||||||
|
for more detail.
|
||||||
|
|
||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
from langchain_anthropic import ChatAnthropic
|
||||||
from langchain_core.messages import HumanMessage
|
from langchain_core.messages import HumanMessage
|
||||||
|
|
||||||
image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
|
image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
|
||||||
image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8")
|
image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8")
|
||||||
|
|
||||||
|
llm = ChatAnthropic(model="claude-3-5-sonnet-latest")
|
||||||
message = HumanMessage(
|
message = HumanMessage(
|
||||||
content=[
|
content=[
|
||||||
{"type": "text", "text": "describe the weather in this image"},
|
|
||||||
{
|
{
|
||||||
"type": "image_url",
|
"type": "text",
|
||||||
"image_url": {"url": f"data:image/jpeg;base64,{image_data}"},
|
"text": "Can you highlight the differences between these two images?",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "image",
|
||||||
|
"source_type": "base64",
|
||||||
|
"data": image_data,
|
||||||
|
"mime_type": "image/jpeg",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "image",
|
||||||
|
"source_type": "url",
|
||||||
|
"url": image_url,
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
@ -600,9 +633,12 @@ class ChatAnthropic(BaseChatModel):
|
|||||||
|
|
||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
|
|
||||||
"The image depicts a sunny day with a partly cloudy sky. The sky is a brilliant blue color with scattered white clouds drifting across. The lighting and cloud patterns suggest pleasant, mild weather conditions. The scene shows a grassy field or meadow with a wooden boardwalk trail leading through it, indicating an outdoor setting on a nice day well-suited for enjoying nature."
|
"After examining both images carefully, I can see that they are actually identical."
|
||||||
|
|
||||||
PDF input:
|
PDF input:
|
||||||
|
See `multimodal guides <https://python.langchain.com/docs/how_to/multimodal_inputs/>`_
|
||||||
|
for more detail.
|
||||||
|
|
||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
|
|
||||||
from base64 import b64encode
|
from base64 import b64encode
|
||||||
@ -620,12 +656,10 @@ class ChatAnthropic(BaseChatModel):
|
|||||||
[
|
[
|
||||||
"Summarize this document.",
|
"Summarize this document.",
|
||||||
{
|
{
|
||||||
"type": "document",
|
"type": "file",
|
||||||
"source": {
|
"source_type": "base64",
|
||||||
"type": "base64",
|
"mime_type": "application/pdf",
|
||||||
"data": data,
|
"data": data,
|
||||||
"media_type": "application/pdf",
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
@ -13,6 +13,7 @@ from pytest import CaptureFixture, MonkeyPatch
|
|||||||
|
|
||||||
from langchain_anthropic import ChatAnthropic
|
from langchain_anthropic import ChatAnthropic
|
||||||
from langchain_anthropic.chat_models import (
|
from langchain_anthropic.chat_models import (
|
||||||
|
_format_image,
|
||||||
_format_messages,
|
_format_messages,
|
||||||
_merge_messages,
|
_merge_messages,
|
||||||
convert_to_anthropic_tool,
|
convert_to_anthropic_tool,
|
||||||
@ -296,6 +297,12 @@ def test__merge_messages_mutation() -> None:
|
|||||||
assert messages == original_messages
|
assert messages == original_messages
|
||||||
|
|
||||||
|
|
||||||
|
def test__format_image() -> None:
|
||||||
|
url = "dummyimage.com/600x400/000/fff"
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
_format_image(url)
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture()
|
@pytest.fixture()
|
||||||
def pydantic() -> type[BaseModel]:
|
def pydantic() -> type[BaseModel]:
|
||||||
class dummy_function(BaseModel):
|
class dummy_function(BaseModel):
|
||||||
@ -770,6 +777,56 @@ def test__format_messages_with_citations() -> None:
|
|||||||
assert actual_messages == expected_messages
|
assert actual_messages == expected_messages
|
||||||
|
|
||||||
|
|
||||||
|
def test__format_messages_openai_image_format() -> None:
|
||||||
|
message = HumanMessage(
|
||||||
|
content=[
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "Can you highlight the differences between these two images?",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": {"url": "data:image/jpeg;base64,<base64 data>"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": {"url": "https://<image url>"},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
actual_system, actual_messages = _format_messages([message])
|
||||||
|
assert actual_system is None
|
||||||
|
expected_messages = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": (
|
||||||
|
"Can you highlight the differences between these two images?"
|
||||||
|
),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "image",
|
||||||
|
"source": {
|
||||||
|
"type": "base64",
|
||||||
|
"media_type": "image/jpeg",
|
||||||
|
"data": "<base64 data>",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "image",
|
||||||
|
"source": {
|
||||||
|
"type": "url",
|
||||||
|
"url": "https://<image url>",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
]
|
||||||
|
assert actual_messages == expected_messages
|
||||||
|
|
||||||
|
|
||||||
def test__format_messages_with_multiple_system() -> None:
|
def test__format_messages_with_multiple_system() -> None:
|
||||||
messages = [
|
messages = [
|
||||||
HumanMessage("baz"),
|
HumanMessage("baz"),
|
||||||
|
Loading…
Reference in New Issue
Block a user