Merge remote-tracking branch 'upstream/master' into pprados/pdf-router

This commit is contained in:
Philippe Prados 2025-04-15 16:22:32 +02:00
commit b5221f2476
18 changed files with 861 additions and 31 deletions

View File

@ -14,8 +14,8 @@ on:
jobs: jobs:
codspeed: codspeed:
name: Run benchmarks name: Run benchmarks
if: (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'run-benchmarks')) if: (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'run-codspeed-benchmarks')) || github.event_name == 'workflow_dispatch' || github.event_name == 'push'
runs-on: codspeed-macro runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
@ -41,3 +41,4 @@ jobs:
run: | run: |
cd libs/core cd libs/core
uv run --no-sync pytest ./tests/benchmarks --codspeed uv run --no-sync pytest ./tests/benchmarks --codspeed
mode: walltime

View File

@ -15,7 +15,7 @@
"\n", "\n",
"To build a production application, you will need to do more work to keep track of application state appropriately.\n", "To build a production application, you will need to do more work to keep track of application state appropriately.\n",
"\n", "\n",
"We recommend using `langgraph` for powering such a capability. For more details, please see this [guide](https://langchain-ai.github.io/langgraph/how-tos/human-in-the-loop/).\n", "We recommend using `langgraph` for powering such a capability. For more details, please see this [guide](https://langchain-ai.github.io/langgraph/concepts/human_in_the_loop/).\n",
":::\n" ":::\n"
] ]
}, },
@ -209,7 +209,7 @@
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stdin", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Do you approve of the following tool invocations\n", "Do you approve of the following tool invocations\n",
@ -252,7 +252,7 @@
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stdin", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Do you approve of the following tool invocations\n", "Do you approve of the following tool invocations\n",

View File

@ -53,6 +53,8 @@ from langchain_core.messages import (
BaseMessageChunk, BaseMessageChunk,
HumanMessage, HumanMessage,
convert_to_messages, convert_to_messages,
convert_to_openai_image_block,
is_data_content_block,
message_chunk_to_message, message_chunk_to_message,
) )
from langchain_core.outputs import ( from langchain_core.outputs import (
@ -103,6 +105,41 @@ def _generate_response_from_error(error: BaseException) -> list[ChatGeneration]:
return generations return generations
def _format_for_tracing(messages: list[BaseMessage]) -> list[BaseMessage]:
"""Format messages for tracing in on_chat_model_start.
For backward compatibility, we update image content blocks to OpenAI Chat
Completions format.
Args:
messages: List of messages to format.
Returns:
List of messages formatted for tracing.
"""
messages_to_trace = []
for message in messages:
message_to_trace = message
if isinstance(message.content, list):
for idx, block in enumerate(message.content):
if (
isinstance(block, dict)
and block.get("type") == "image"
and is_data_content_block(block)
):
if message_to_trace is message:
message_to_trace = message.model_copy()
# Also shallow-copy content
message_to_trace.content = list(message_to_trace.content)
message_to_trace.content[idx] = ( # type: ignore[index] # mypy confused by .model_copy
convert_to_openai_image_block(block)
)
messages_to_trace.append(message_to_trace)
return messages_to_trace
def generate_from_stream(stream: Iterator[ChatGenerationChunk]) -> ChatResult: def generate_from_stream(stream: Iterator[ChatGenerationChunk]) -> ChatResult:
"""Generate from a stream. """Generate from a stream.
@ -439,7 +476,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
) )
(run_manager,) = callback_manager.on_chat_model_start( (run_manager,) = callback_manager.on_chat_model_start(
self._serialized, self._serialized,
[messages], [_format_for_tracing(messages)],
invocation_params=params, invocation_params=params,
options=options, options=options,
name=config.get("run_name"), name=config.get("run_name"),
@ -524,7 +561,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
) )
(run_manager,) = await callback_manager.on_chat_model_start( (run_manager,) = await callback_manager.on_chat_model_start(
self._serialized, self._serialized,
[messages], [_format_for_tracing(messages)],
invocation_params=params, invocation_params=params,
options=options, options=options,
name=config.get("run_name"), name=config.get("run_name"),
@ -703,9 +740,12 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
inheritable_metadata, inheritable_metadata,
self.metadata, self.metadata,
) )
messages_to_trace = [
_format_for_tracing(message_list) for message_list in messages
]
run_managers = callback_manager.on_chat_model_start( run_managers = callback_manager.on_chat_model_start(
self._serialized, self._serialized,
messages, messages_to_trace,
invocation_params=params, invocation_params=params,
options=options, options=options,
name=run_name, name=run_name,
@ -812,9 +852,12 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
self.metadata, self.metadata,
) )
messages_to_trace = [
_format_for_tracing(message_list) for message_list in messages
]
run_managers = await callback_manager.on_chat_model_start( run_managers = await callback_manager.on_chat_model_start(
self._serialized, self._serialized,
messages, messages_to_trace,
invocation_params=params, invocation_params=params,
options=options, options=options,
name=run_name, name=run_name,

View File

@ -31,6 +31,10 @@ if TYPE_CHECKING:
messages_to_dict, messages_to_dict,
) )
from langchain_core.messages.chat import ChatMessage, ChatMessageChunk from langchain_core.messages.chat import ChatMessage, ChatMessageChunk
from langchain_core.messages.content_blocks import (
convert_to_openai_image_block,
is_data_content_block,
)
from langchain_core.messages.function import FunctionMessage, FunctionMessageChunk from langchain_core.messages.function import FunctionMessage, FunctionMessageChunk
from langchain_core.messages.human import HumanMessage, HumanMessageChunk from langchain_core.messages.human import HumanMessage, HumanMessageChunk
from langchain_core.messages.modifier import RemoveMessage from langchain_core.messages.modifier import RemoveMessage
@ -78,8 +82,10 @@ __all__ = [
"ToolMessageChunk", "ToolMessageChunk",
"RemoveMessage", "RemoveMessage",
"_message_from_dict", "_message_from_dict",
"convert_to_openai_image_block",
"convert_to_messages", "convert_to_messages",
"get_buffer_string", "get_buffer_string",
"is_data_content_block",
"merge_content", "merge_content",
"message_chunk_to_message", "message_chunk_to_message",
"message_to_dict", "message_to_dict",
@ -117,9 +123,11 @@ _dynamic_imports = {
"MessageLikeRepresentation": "utils", "MessageLikeRepresentation": "utils",
"_message_from_dict": "utils", "_message_from_dict": "utils",
"convert_to_messages": "utils", "convert_to_messages": "utils",
"convert_to_openai_image_block": "content_blocks",
"convert_to_openai_messages": "utils", "convert_to_openai_messages": "utils",
"filter_messages": "utils", "filter_messages": "utils",
"get_buffer_string": "utils", "get_buffer_string": "utils",
"is_data_content_block": "content_blocks",
"merge_message_runs": "utils", "merge_message_runs": "utils",
"message_chunk_to_message": "utils", "message_chunk_to_message": "utils",
"messages_from_dict": "utils", "messages_from_dict": "utils",

View File

@ -0,0 +1,112 @@
"""Types for content blocks."""
from typing import Any, Literal, Union
from pydantic import TypeAdapter, ValidationError
from typing_extensions import NotRequired, TypedDict
class BaseDataContentBlock(TypedDict):
"""Base class for data content blocks."""
mime_type: NotRequired[str]
"""MIME type of the content block (if needed)."""
metadata: NotRequired[dict]
"""Provider-specific metadata such as citations or filenames."""
class URLContentBlock(BaseDataContentBlock):
"""Content block for data from a URL."""
type: Literal["image", "audio", "file"]
"""Type of the content block."""
source_type: Literal["url"]
"""Source type (url)."""
url: str
"""URL for data."""
class Base64ContentBlock(BaseDataContentBlock):
"""Content block for inline data from a base64 string."""
type: Literal["image", "audio", "file"]
"""Type of the content block."""
source_type: Literal["base64"]
"""Source type (base64)."""
data: str
"""Data as a base64 string."""
class PlainTextContentBlock(BaseDataContentBlock):
"""Content block for plain text data (e.g., from a document)."""
type: Literal["file"]
"""Type of the content block."""
source_type: Literal["text"]
"""Source type (text)."""
text: str
"""Text data."""
class IDContentBlock(TypedDict):
"""Content block for data specified by an identifier."""
type: Literal["image", "audio", "file"]
"""Type of the content block."""
source_type: Literal["id"]
"""Source type (id)."""
id: str
"""Identifier for data source."""
DataContentBlock = Union[
URLContentBlock,
Base64ContentBlock,
PlainTextContentBlock,
IDContentBlock,
]
_DataContentBlockAdapter: TypeAdapter[DataContentBlock] = TypeAdapter(DataContentBlock)
def is_data_content_block(
content_block: dict,
) -> bool:
"""Check if the content block is a standard data content block.
Args:
content_block: The content block to check.
Returns:
True if the content block is a data content block, False otherwise.
"""
try:
_ = _DataContentBlockAdapter.validate_python(content_block)
except ValidationError:
return False
else:
return True
def convert_to_openai_image_block(content_block: dict[str, Any]) -> dict:
"""Convert image content block to format expected by OpenAI Chat Completions API."""
if content_block["source_type"] == "url":
return {
"type": "image_url",
"image_url": {
"url": content_block["url"],
},
}
if content_block["source_type"] == "base64":
if "mime_type" not in content_block:
error_message = "mime_type key is required for base64 data."
raise ValueError(error_message)
mime_type = content_block["mime_type"]
return {
"type": "image_url",
"image_url": {
"url": f"data:{mime_type};base64,{content_block['data']}",
},
}
error_message = "Unsupported source type. Only 'url' and 'base64' are supported."
raise ValueError(error_message)

View File

@ -46,7 +46,7 @@ class MyCustomAsyncHandler(AsyncCallbackHandler):
@pytest.mark.benchmark @pytest.mark.benchmark
async def test_async_callbacks_in_sync(benchmark: BenchmarkFixture) -> None: async def test_async_callbacks_in_sync(benchmark: BenchmarkFixture) -> None:
infinite_cycle = cycle([AIMessage(content=" ".join(["hello", "goodbye"] * 500))]) infinite_cycle = cycle([AIMessage(content=" ".join(["hello", "goodbye"] * 5))])
model = GenericFakeChatModel(messages=infinite_cycle) model = GenericFakeChatModel(messages=infinite_cycle)
@benchmark # type: ignore[misc] @benchmark # type: ignore[misc]

View File

@ -8,7 +8,11 @@ import pytest
from typing_extensions import override from typing_extensions import override
from langchain_core.callbacks import CallbackManagerForLLMRun from langchain_core.callbacks import CallbackManagerForLLMRun
from langchain_core.language_models import BaseChatModel, FakeListChatModel from langchain_core.language_models import (
BaseChatModel,
FakeListChatModel,
ParrotFakeChatModel,
)
from langchain_core.language_models.fake_chat_models import FakeListChatModelError from langchain_core.language_models.fake_chat_models import FakeListChatModelError
from langchain_core.messages import ( from langchain_core.messages import (
AIMessage, AIMessage,
@ -396,3 +400,58 @@ async def test_disable_streaming_no_streaming_model_async(
async for c in model.astream([], tools=[{}]): async for c in model.astream([], tools=[{}]):
assert c.content == "invoke" assert c.content == "invoke"
break break
class FakeChatModelStartTracer(FakeTracer):
def __init__(self) -> None:
super().__init__()
self.messages: list = []
def on_chat_model_start(self, *args: Any, **kwargs: Any) -> Run:
_, messages = args
self.messages.append(messages)
return super().on_chat_model_start(
*args,
**kwargs,
)
def test_trace_images_in_openai_format() -> None:
"""Test that images are traced in OpenAI format."""
llm = ParrotFakeChatModel()
messages = [
{
"role": "user",
"content": [
{
"type": "image",
"source_type": "url",
"url": "https://example.com/image.png",
}
],
}
]
tracer = FakeChatModelStartTracer()
response = llm.invoke(messages, config={"callbacks": [tracer]})
assert tracer.messages == [
[
[
HumanMessage(
content=[
{
"type": "image_url",
"image_url": {"url": "https://example.com/image.png"},
}
]
)
]
]
]
# Test no mutation
assert response.content == [
{
"type": "image",
"source_type": "url",
"url": "https://example.com/image.png",
}
]

View File

@ -24,6 +24,7 @@ EXPECTED_ALL = [
"RemoveMessage", "RemoveMessage",
"convert_to_messages", "convert_to_messages",
"get_buffer_string", "get_buffer_string",
"is_data_content_block",
"merge_content", "merge_content",
"message_chunk_to_message", "message_chunk_to_message",
"message_to_dict", "message_to_dict",
@ -32,6 +33,7 @@ EXPECTED_ALL = [
"filter_messages", "filter_messages",
"merge_message_runs", "merge_message_runs",
"trim_messages", "trim_messages",
"convert_to_openai_image_block",
"convert_to_openai_messages", "convert_to_openai_messages",
] ]

View File

@ -21,7 +21,9 @@ from langchain_core.messages import (
SystemMessage, SystemMessage,
ToolMessage, ToolMessage,
convert_to_messages, convert_to_messages,
convert_to_openai_image_block,
get_buffer_string, get_buffer_string,
is_data_content_block,
merge_content, merge_content,
message_chunk_to_message, message_chunk_to_message,
message_to_dict, message_to_dict,
@ -1087,3 +1089,86 @@ def test_message_text() -> None:
).text() ).text()
== "" == ""
) )
def test_is_data_content_block() -> None:
assert is_data_content_block(
{
"type": "image",
"source_type": "url",
"url": "https://...",
}
)
assert is_data_content_block(
{
"type": "image",
"source_type": "base64",
"data": "<base64 data>",
"mime_type": "image/jpeg",
}
)
assert is_data_content_block(
{
"type": "image",
"source_type": "base64",
"data": "<base64 data>",
"mime_type": "image/jpeg",
"metadata": {"cache_control": {"type": "ephemeral"}},
}
)
assert not is_data_content_block(
{
"type": "text",
"text": "foo",
}
)
assert not is_data_content_block(
{
"type": "image_url",
"image_url": {"url": "https://..."},
}
)
assert not is_data_content_block(
{
"type": "image",
"source_type": "base64",
}
)
assert not is_data_content_block(
{
"type": "image",
"source": "<base64 data>",
}
)
def test_convert_to_openai_image_block() -> None:
input_block = {
"type": "image",
"source_type": "url",
"url": "https://...",
"metadata": {"cache_control": {"type": "ephemeral"}},
}
expected = {
"type": "image_url",
"image_url": {"url": "https://..."},
}
result = convert_to_openai_image_block(input_block)
assert result == expected
input_block = {
"type": "image",
"source_type": "base64",
"data": "<base64 data>",
"mime_type": "image/jpeg",
"metadata": {"cache_control": {"type": "ephemeral"}},
}
expected = {
"type": "image_url",
"image_url": {
"url": "data:image/jpeg;base64,<base64 data>",
},
}
result = convert_to_openai_image_block(input_block)
assert result == expected

View File

@ -35,6 +35,7 @@ from langchain_core.messages import (
SystemMessage, SystemMessage,
ToolCall, ToolCall,
ToolMessage, ToolMessage,
is_data_content_block,
) )
from langchain_core.messages.ai import InputTokenDetails, UsageMetadata from langchain_core.messages.ai import InputTokenDetails, UsageMetadata
from langchain_core.messages.tool import tool_call_chunk as create_tool_call_chunk from langchain_core.messages.tool import tool_call_chunk as create_tool_call_chunk
@ -177,8 +178,78 @@ def _merge_messages(
return merged return merged
def _format_data_content_block(block: dict) -> dict:
"""Format standard data content block to format expected by Anthropic."""
if block["type"] == "image":
if block["source_type"] == "url":
if block["url"].startswith("data:"):
# Data URI
formatted_block = {
"type": "image",
"source": _format_image(block["url"]),
}
else:
formatted_block = {
"type": "image",
"source": {"type": "url", "url": block["url"]},
}
elif block["source_type"] == "base64":
formatted_block = {
"type": "image",
"source": {
"type": "base64",
"media_type": block["mime_type"],
"data": block["data"],
},
}
else:
raise ValueError(
"Anthropic only supports 'url' and 'base64' source_type for image "
"content blocks."
)
elif block["type"] == "file":
if block["source_type"] == "url":
formatted_block = {
"type": "document",
"source": {
"type": "url",
"url": block["url"],
},
}
elif block["source_type"] == "base64":
formatted_block = {
"type": "document",
"source": {
"type": "base64",
"media_type": block.get("mime_type") or "application/pdf",
"data": block["data"],
},
}
elif block["source_type"] == "text":
formatted_block = {
"type": "document",
"source": {
"type": "text",
"media_type": block.get("mime_type") or "text/plain",
"data": block["text"],
},
}
else:
raise ValueError(f"Block of type {block['type']} is not supported.")
if formatted_block and (metadata := block.get("metadata")):
if "cache_control" in metadata:
formatted_block["cache_control"] = metadata["cache_control"]
if "citations" in metadata:
formatted_block["citations"] = metadata["citations"]
return formatted_block
def _format_messages( def _format_messages(
messages: list[BaseMessage], messages: Sequence[BaseMessage],
) -> tuple[Union[str, list[dict], None], list[dict]]: ) -> tuple[Union[str, list[dict], None], list[dict]]:
"""Format messages for anthropic.""" """Format messages for anthropic."""
@ -233,6 +304,8 @@ def _format_messages(
# convert format # convert format
source = _format_image(block["image_url"]["url"]) source = _format_image(block["image_url"]["url"])
content.append({"type": "image", "source": source}) content.append({"type": "image", "source": source})
elif is_data_content_block(block):
content.append(_format_data_content_block(block))
elif block["type"] == "tool_use": elif block["type"] == "tool_use":
# If a tool_call with the same id as a tool_use content block # If a tool_call with the same id as a tool_use content block
# exists, the tool_call is preferred. # exists, the tool_call is preferred.

View File

@ -25,6 +25,14 @@ class TestAnthropicStandard(ChatModelIntegrationTests):
def supports_image_inputs(self) -> bool: def supports_image_inputs(self) -> bool:
return True return True
@property
def supports_image_urls(self) -> bool:
return True
@property
def supports_pdf_inputs(self) -> bool:
return True
@property @property
def supports_image_tool_message(self) -> bool: def supports_image_tool_message(self) -> bool:
return True return True

View File

@ -690,6 +690,85 @@ def test__format_messages_with_cache_control() -> None:
assert expected_system == actual_system assert expected_system == actual_system
assert expected_messages == actual_messages assert expected_messages == actual_messages
# Test standard multi-modal format
messages = [
HumanMessage(
[
{
"type": "text",
"text": "Summarize this document:",
},
{
"type": "file",
"source_type": "base64",
"mime_type": "application/pdf",
"data": "<base64 data>",
"metadata": {"cache_control": {"type": "ephemeral"}},
},
]
)
]
actual_system, actual_messages = _format_messages(messages)
assert actual_system is None
expected_messages = [
{
"role": "user",
"content": [
{
"type": "text",
"text": "Summarize this document:",
},
{
"type": "document",
"source": {
"type": "base64",
"media_type": "application/pdf",
"data": "<base64 data>",
},
"cache_control": {"type": "ephemeral"},
},
],
}
]
assert actual_messages == expected_messages
def test__format_messages_with_citations() -> None:
input_messages = [
HumanMessage(
content=[
{
"type": "file",
"source_type": "text",
"text": "The grass is green. The sky is blue.",
"mime_type": "text/plain",
"metadata": {"citations": {"enabled": True}},
},
{"type": "text", "text": "What color is the grass and sky?"},
]
)
]
expected_messages = [
{
"role": "user",
"content": [
{
"type": "document",
"source": {
"type": "text",
"media_type": "text/plain",
"data": "The grass is green. The sky is blue.",
},
"citations": {"enabled": True},
},
{"type": "text", "text": "What color is the grass and sky?"},
],
}
]
actual_system, actual_messages = _format_messages(input_messages)
assert actual_system is None
assert actual_messages == expected_messages
def test__format_messages_with_multiple_system() -> None: def test__format_messages_with_multiple_system() -> None:
messages = [ messages = [

View File

@ -61,6 +61,8 @@ from langchain_core.messages import (
ToolCall, ToolCall,
ToolMessage, ToolMessage,
ToolMessageChunk, ToolMessageChunk,
convert_to_openai_image_block,
is_data_content_block,
) )
from langchain_core.messages.ai import ( from langchain_core.messages.ai import (
InputTokenDetails, InputTokenDetails,
@ -184,6 +186,32 @@ def _convert_dict_to_message(_dict: Mapping[str, Any]) -> BaseMessage:
return ChatMessage(content=_dict.get("content", ""), role=role, id=id_) # type: ignore[arg-type] return ChatMessage(content=_dict.get("content", ""), role=role, id=id_) # type: ignore[arg-type]
def _format_data_content_block(block: dict) -> dict:
"""Format standard data content block to format expected by OpenAI."""
if block["type"] == "image":
formatted_block = convert_to_openai_image_block(block)
elif block["type"] == "file":
if block["source_type"] == "base64":
file = {"file_data": f"data:{block['mime_type']};base64,{block['data']}"}
if (metadata := block.get("metadata")) and ("filename" in metadata):
file["filename"] = metadata["filename"]
else:
warnings.warn(
"OpenAI may require a filename for file inputs. Specify a filename "
"in the metadata: {'type': 'file', 'source_type': 'base64', "
"'mime_type': 'application/pdf', 'data': '...', "
"'metadata': {'filename': 'my-pdf'}}"
)
formatted_block = {"type": "file", "file": file}
elif block["source_type"] == "id":
formatted_block = {"type": "file", "file": {"file_id": block["id"]}}
else:
raise ValueError(f"Block of type {block['type']} is not supported.")
return formatted_block
def _format_message_content(content: Any) -> Any: def _format_message_content(content: Any) -> Any:
"""Format message content.""" """Format message content."""
if content and isinstance(content, list): if content and isinstance(content, list):
@ -196,6 +224,8 @@ def _format_message_content(content: Any) -> Any:
and block["type"] in ("tool_use", "thinking") and block["type"] in ("tool_use", "thinking")
): ):
continue continue
elif isinstance(block, dict) and is_data_content_block(block):
formatted_content.append(_format_data_content_block(block))
# Anthropic image blocks # Anthropic image blocks
elif ( elif (
isinstance(block, dict) isinstance(block, dict)
@ -3122,6 +3152,9 @@ def _construct_responses_api_input(messages: Sequence[BaseMessage]) -> list:
if block["image_url"].get("detail"): if block["image_url"].get("detail"):
new_block["detail"] = block["image_url"]["detail"] new_block["detail"] = block["image_url"]["detail"]
new_blocks.append(new_block) new_blocks.append(new_block)
elif block["type"] == "file":
new_block = {"type": "input_file", **block["file"]}
new_blocks.append(new_block)
elif block["type"] in ("input_text", "input_image", "input_file"): elif block["type"] in ("input_text", "input_image", "input_file"):
new_blocks.append(block) new_blocks.append(block)
else: else:

View File

@ -30,6 +30,10 @@ class TestAzureOpenAIStandard(ChatModelIntegrationTests):
def supports_image_inputs(self) -> bool: def supports_image_inputs(self) -> bool:
return True return True
@property
def supports_image_urls(self) -> bool:
return True
@property @property
def supports_json_mode(self) -> bool: def supports_json_mode(self) -> bool:
return True return True

View File

@ -1,10 +1,12 @@
"""Standard LangChain interface tests""" """Standard LangChain interface tests"""
import base64
from pathlib import Path from pathlib import Path
from typing import Literal, cast from typing import Literal, cast
import httpx
from langchain_core.language_models import BaseChatModel from langchain_core.language_models import BaseChatModel
from langchain_core.messages import AIMessage from langchain_core.messages import AIMessage, HumanMessage
from langchain_tests.integration_tests import ChatModelIntegrationTests from langchain_tests.integration_tests import ChatModelIntegrationTests
from langchain_openai import ChatOpenAI from langchain_openai import ChatOpenAI
@ -25,6 +27,10 @@ class TestOpenAIStandard(ChatModelIntegrationTests):
def supports_image_inputs(self) -> bool: def supports_image_inputs(self) -> bool:
return True return True
@property
def supports_image_urls(self) -> bool:
return True
@property @property
def supports_json_mode(self) -> bool: def supports_json_mode(self) -> bool:
return True return True
@ -71,6 +77,31 @@ class TestOpenAIStandard(ChatModelIntegrationTests):
) )
return _invoke(llm, input_, stream) return _invoke(llm, input_, stream)
@property
def supports_pdf_inputs(self) -> bool:
# OpenAI requires a filename for PDF inputs
# For now, we test with filename in OpenAI-specific tests
return False
def test_openai_pdf_inputs(self, model: BaseChatModel) -> None:
"""Test that the model can process PDF inputs."""
url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"
pdf_data = base64.b64encode(httpx.get(url).content).decode("utf-8")
message = HumanMessage(
[
{"type": "text", "text": "Summarize this document:"},
{
"type": "file",
"source_type": "base64",
"mime_type": "application/pdf",
"data": pdf_data,
"metadata": {"filename": "my-pdf"}, # OpenAI requires a filename
},
]
)
_ = model.invoke([message])
def _invoke(llm: ChatOpenAI, input_: str, stream: bool) -> AIMessage: def _invoke(llm: ChatOpenAI, input_: str, stream: bool) -> AIMessage:
if stream: if stream:

View File

@ -649,6 +649,51 @@ def test_format_message_content() -> None:
] ]
assert [{"type": "text", "text": "hello"}] == _format_message_content(content) assert [{"type": "text", "text": "hello"}] == _format_message_content(content)
# Standard multi-modal inputs
content = [{"type": "image", "source_type": "url", "url": "https://..."}]
expected = [{"type": "image_url", "image_url": {"url": "https://..."}}]
assert expected == _format_message_content(content)
content = [
{
"type": "image",
"source_type": "base64",
"data": "<base64 data>",
"mime_type": "image/png",
}
]
expected = [
{
"type": "image_url",
"image_url": {"url": "data:image/png;base64,<base64 data>"},
}
]
assert expected == _format_message_content(content)
content = [
{
"type": "file",
"source_type": "base64",
"data": "<base64 data>",
"mime_type": "application/pdf",
"metadata": {"filename": "my_file"},
}
]
expected = [
{
"type": "file",
"file": {
"filename": "my_file",
"file_data": "data:application/pdf;base64,<base64 data>",
},
}
]
assert expected == _format_message_content(content)
content = [{"type": "file", "source_type": "id", "id": "file-abc123"}]
expected = [{"type": "file", "file": {"file_id": "file-abc123"}}]
assert expected == _format_message_content(content)
class GenerateUsername(BaseModel): class GenerateUsername(BaseModel):
"Get a username based on someone's name and hair color." "Get a username based on someone's name and hair color."

View File

@ -298,13 +298,21 @@ class ChatModelIntegrationTests(ChatModelTests):
.. code-block:: python .. code-block:: python
[ {
{"type": "text", "text": "describe the weather in this image"}, "type": "image",
{ "source_type": "base64",
"type": "image_url", "data": "<base64 image data>",
"image_url": {"url": f"data:image/jpeg;base64,{image_data}"}, "mime_type": "image/jpeg", # or appropriate mime-type
}, }
]
In addition to OpenAI-style content blocks:
.. code-block:: python
{
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{image_data}"},
}
See https://python.langchain.com/docs/concepts/multimodality/ See https://python.langchain.com/docs/concepts/multimodality/
@ -316,6 +324,59 @@ class ChatModelIntegrationTests(ChatModelTests):
def supports_image_inputs(self) -> bool: def supports_image_inputs(self) -> bool:
return True return True
.. dropdown:: supports_image_urls
Boolean property indicating whether the chat model supports image inputs from
URLs. Defaults to ``False``.
If set to ``True``, the chat model will be tested using content blocks of the
form
.. code-block:: python
{
"type": "image",
"source_type": "url",
"url": "https://...",
}
See https://python.langchain.com/docs/concepts/multimodality/
Example:
.. code-block:: python
@property
def supports_image_urls(self) -> bool:
return True
.. dropdown:: supports_pdf_inputs
Boolean property indicating whether the chat model supports PDF inputs.
Defaults to ``False``.
If set to ``True``, the chat model will be tested using content blocks of the
form
.. code-block:: python
{
"type": "file",
"source_type": "base64",
"data": "<base64 file data>",
"mime_type": "application/pdf",
}
See https://python.langchain.com/docs/concepts/multimodality/
Example:
.. code-block:: python
@property
def supports_pdf_inputs(self) -> bool:
return True
.. dropdown:: supports_video_inputs .. dropdown:: supports_video_inputs
Boolean property indicating whether the chat model supports image inputs. Boolean property indicating whether the chat model supports image inputs.
@ -1891,11 +1952,79 @@ class ChatModelIntegrationTests(ChatModelTests):
result = model_with_tools.invoke(messages) result = model_with_tools.invoke(messages)
assert isinstance(result, AIMessage) assert isinstance(result, AIMessage)
def test_pdf_inputs(self, model: BaseChatModel) -> None:
"""Test that the model can process PDF inputs.
This test should be skipped (see Configuration below) if the model does not
support PDF inputs. These will take the form:
.. code-block:: python
{
"type": "image",
"source_type": "base64",
"data": "<base64 image data>",
"mime_type": "application/pdf",
}
See https://python.langchain.com/docs/concepts/multimodality/
.. dropdown:: Configuration
To disable this test, set ``supports_pdf_inputs`` to False in your
test class:
.. code-block:: python
class TestMyChatModelIntegration(ChatModelIntegrationTests):
@property
def supports_pdf_inputs(self) -> bool:
return False
.. dropdown:: Troubleshooting
If this test fails, check that the model can correctly handle messages
with pdf content blocks, including base64-encoded files. Otherwise, set
the ``supports_pdf_inputs`` property to False.
"""
if not self.supports_pdf_inputs:
pytest.skip("Model does not support PDF inputs.")
url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"
pdf_data = base64.b64encode(httpx.get(url).content).decode("utf-8")
message = HumanMessage(
[
{
"type": "text",
"text": "Summarize this document:",
},
{
"type": "file",
"source_type": "base64",
"mime_type": "application/pdf",
"data": pdf_data,
},
]
)
_ = model.invoke([message])
def test_image_inputs(self, model: BaseChatModel) -> None: def test_image_inputs(self, model: BaseChatModel) -> None:
"""Test that the model can process image inputs. """Test that the model can process image inputs.
This test should be skipped (see Configuration below) if the model does not This test should be skipped (see Configuration below) if the model does not
support image inputs These will take the form of messages with OpenAI-style support image inputs. These will take the form:
.. code-block:: python
{
"type": "image",
"source_type": "base64",
"data": "<base64 image data>",
"mime_type": "image/jpeg", # or appropriate mime-type
}
For backward-compatibility, we must also support OpenAI-style
image content blocks: image content blocks:
.. code-block:: python .. code-block:: python
@ -1910,6 +2039,17 @@ class ChatModelIntegrationTests(ChatModelTests):
See https://python.langchain.com/docs/concepts/multimodality/ See https://python.langchain.com/docs/concepts/multimodality/
If the property ``supports_image_urls`` is set to True, the test will also
check that we can process content blocks of the form:
.. code-block:: python
{
"type": "image",
"source_type": "url",
"url": "<url>",
}
.. dropdown:: Configuration .. dropdown:: Configuration
To disable this test, set ``supports_image_inputs`` to False in your To disable this test, set ``supports_image_inputs`` to False in your
@ -1922,16 +2062,23 @@ class ChatModelIntegrationTests(ChatModelTests):
def supports_image_inputs(self) -> bool: def supports_image_inputs(self) -> bool:
return False return False
# Can also explicitly disable testing image URLs:
@property
def supports_image_urls(self) -> bool:
return False
.. dropdown:: Troubleshooting .. dropdown:: Troubleshooting
If this test fails, check that the model can correctly handle messages If this test fails, check that the model can correctly handle messages
with image content blocks in OpenAI format, including base64-encoded with image content blocks, including base64-encoded images. Otherwise, set
images. Otherwise, set the ``supports_image_inputs`` property to False. the ``supports_image_inputs`` property to False.
""" """
if not self.supports_image_inputs: if not self.supports_image_inputs:
pytest.skip("Model does not support image message.") pytest.skip("Model does not support image message.")
image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8") image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8")
# OpenAI format, base64 data
message = HumanMessage( message = HumanMessage(
content=[ content=[
{"type": "text", "text": "describe the weather in this image"}, {"type": "text", "text": "describe the weather in this image"},
@ -1941,7 +2088,35 @@ class ChatModelIntegrationTests(ChatModelTests):
}, },
], ],
) )
model.invoke([message]) _ = model.invoke([message])
# Standard format, base64 data
message = HumanMessage(
content=[
{"type": "text", "text": "describe the weather in this image"},
{
"type": "image",
"source_type": "base64",
"mime_type": "image/jpeg",
"data": image_data,
},
],
)
_ = model.invoke([message])
# Standard format, URL
if self.supports_image_urls:
message = HumanMessage(
content=[
{"type": "text", "text": "describe the weather in this image"},
{
"type": "image",
"source_type": "url",
"url": image_url,
},
],
)
_ = model.invoke([message])
def test_image_tool_message(self, model: BaseChatModel) -> None: def test_image_tool_message(self, model: BaseChatModel) -> None:
"""Test that the model can process ToolMessages with image inputs. """Test that the model can process ToolMessages with image inputs.

View File

@ -160,6 +160,17 @@ class ChatModelTests(BaseStandardTests):
``False``.""" ``False``."""
return False return False
@property
def supports_image_urls(self) -> bool:
"""(bool) whether the chat model supports image inputs from URLs, defaults to
``False``."""
return False
@property
def supports_pdf_inputs(self) -> bool:
"""(bool) whether the chat model supports PDF inputs, defaults to ``False``."""
return False
@property @property
def supports_video_inputs(self) -> bool: def supports_video_inputs(self) -> bool:
"""(bool) whether the chat model supports video inputs, efaults to ``False``. """(bool) whether the chat model supports video inputs, efaults to ``False``.
@ -373,13 +384,21 @@ class ChatModelUnitTests(ChatModelTests):
.. code-block:: python .. code-block:: python
[ {
{"type": "text", "text": "describe the weather in this image"}, "type": "image",
{ "source_type": "base64",
"type": "image_url", "data": "<base64 image data>",
"image_url": {"url": f"data:image/jpeg;base64,{image_data}"}, "mime_type": "image/jpeg", # or appropriate mime-type
}, }
]
In addition to OpenAI-style content blocks:
.. code-block:: python
{
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{image_data}"},
}
See https://python.langchain.com/docs/concepts/multimodality/ See https://python.langchain.com/docs/concepts/multimodality/
@ -391,6 +410,59 @@ class ChatModelUnitTests(ChatModelTests):
def supports_image_inputs(self) -> bool: def supports_image_inputs(self) -> bool:
return True return True
.. dropdown:: supports_image_urls
Boolean property indicating whether the chat model supports image inputs from
URLs. Defaults to ``False``.
If set to ``True``, the chat model will be tested using content blocks of the
form
.. code-block:: python
{
"type": "image",
"source_type": "url",
"url": "https://...",
}
See https://python.langchain.com/docs/concepts/multimodality/
Example:
.. code-block:: python
@property
def supports_image_urls(self) -> bool:
return True
.. dropdown:: supports_pdf_inputs
Boolean property indicating whether the chat model supports PDF inputs.
Defaults to ``False``.
If set to ``True``, the chat model will be tested using content blocks of the
form
.. code-block:: python
{
"type": "file",
"source_type": "base64",
"data": "<base64 file data>",
"mime_type": "application/pdf",
}
See https://python.langchain.com/docs/concepts/multimodality/
Example:
.. code-block:: python
@property
def supports_pdf_inputs(self) -> bool:
return True
.. dropdown:: supports_video_inputs .. dropdown:: supports_video_inputs
Boolean property indicating whether the chat model supports image inputs. Boolean property indicating whether the chat model supports image inputs.