mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-22 02:45:49 +00:00
Merge remote-tracking branch 'upstream/master' into pprados/pdf-router
This commit is contained in:
commit
b5221f2476
5
.github/workflows/codspeed.yml
vendored
5
.github/workflows/codspeed.yml
vendored
@ -14,8 +14,8 @@ on:
|
||||
jobs:
|
||||
codspeed:
|
||||
name: Run benchmarks
|
||||
if: (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'run-benchmarks'))
|
||||
runs-on: codspeed-macro
|
||||
if: (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'run-codspeed-benchmarks')) || github.event_name == 'workflow_dispatch' || github.event_name == 'push'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
@ -41,3 +41,4 @@ jobs:
|
||||
run: |
|
||||
cd libs/core
|
||||
uv run --no-sync pytest ./tests/benchmarks --codspeed
|
||||
mode: walltime
|
||||
|
@ -15,7 +15,7 @@
|
||||
"\n",
|
||||
"To build a production application, you will need to do more work to keep track of application state appropriately.\n",
|
||||
"\n",
|
||||
"We recommend using `langgraph` for powering such a capability. For more details, please see this [guide](https://langchain-ai.github.io/langgraph/how-tos/human-in-the-loop/).\n",
|
||||
"We recommend using `langgraph` for powering such a capability. For more details, please see this [guide](https://langchain-ai.github.io/langgraph/concepts/human_in_the_loop/).\n",
|
||||
":::\n"
|
||||
]
|
||||
},
|
||||
@ -209,7 +209,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdin",
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Do you approve of the following tool invocations\n",
|
||||
@ -252,7 +252,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdin",
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Do you approve of the following tool invocations\n",
|
||||
|
@ -53,6 +53,8 @@ from langchain_core.messages import (
|
||||
BaseMessageChunk,
|
||||
HumanMessage,
|
||||
convert_to_messages,
|
||||
convert_to_openai_image_block,
|
||||
is_data_content_block,
|
||||
message_chunk_to_message,
|
||||
)
|
||||
from langchain_core.outputs import (
|
||||
@ -103,6 +105,41 @@ def _generate_response_from_error(error: BaseException) -> list[ChatGeneration]:
|
||||
return generations
|
||||
|
||||
|
||||
def _format_for_tracing(messages: list[BaseMessage]) -> list[BaseMessage]:
|
||||
"""Format messages for tracing in on_chat_model_start.
|
||||
|
||||
For backward compatibility, we update image content blocks to OpenAI Chat
|
||||
Completions format.
|
||||
|
||||
Args:
|
||||
messages: List of messages to format.
|
||||
|
||||
Returns:
|
||||
List of messages formatted for tracing.
|
||||
"""
|
||||
messages_to_trace = []
|
||||
for message in messages:
|
||||
message_to_trace = message
|
||||
if isinstance(message.content, list):
|
||||
for idx, block in enumerate(message.content):
|
||||
if (
|
||||
isinstance(block, dict)
|
||||
and block.get("type") == "image"
|
||||
and is_data_content_block(block)
|
||||
):
|
||||
if message_to_trace is message:
|
||||
message_to_trace = message.model_copy()
|
||||
# Also shallow-copy content
|
||||
message_to_trace.content = list(message_to_trace.content)
|
||||
|
||||
message_to_trace.content[idx] = ( # type: ignore[index] # mypy confused by .model_copy
|
||||
convert_to_openai_image_block(block)
|
||||
)
|
||||
messages_to_trace.append(message_to_trace)
|
||||
|
||||
return messages_to_trace
|
||||
|
||||
|
||||
def generate_from_stream(stream: Iterator[ChatGenerationChunk]) -> ChatResult:
|
||||
"""Generate from a stream.
|
||||
|
||||
@ -439,7 +476,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
|
||||
)
|
||||
(run_manager,) = callback_manager.on_chat_model_start(
|
||||
self._serialized,
|
||||
[messages],
|
||||
[_format_for_tracing(messages)],
|
||||
invocation_params=params,
|
||||
options=options,
|
||||
name=config.get("run_name"),
|
||||
@ -524,7 +561,7 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
|
||||
)
|
||||
(run_manager,) = await callback_manager.on_chat_model_start(
|
||||
self._serialized,
|
||||
[messages],
|
||||
[_format_for_tracing(messages)],
|
||||
invocation_params=params,
|
||||
options=options,
|
||||
name=config.get("run_name"),
|
||||
@ -703,9 +740,12 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
|
||||
inheritable_metadata,
|
||||
self.metadata,
|
||||
)
|
||||
messages_to_trace = [
|
||||
_format_for_tracing(message_list) for message_list in messages
|
||||
]
|
||||
run_managers = callback_manager.on_chat_model_start(
|
||||
self._serialized,
|
||||
messages,
|
||||
messages_to_trace,
|
||||
invocation_params=params,
|
||||
options=options,
|
||||
name=run_name,
|
||||
@ -812,9 +852,12 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
|
||||
self.metadata,
|
||||
)
|
||||
|
||||
messages_to_trace = [
|
||||
_format_for_tracing(message_list) for message_list in messages
|
||||
]
|
||||
run_managers = await callback_manager.on_chat_model_start(
|
||||
self._serialized,
|
||||
messages,
|
||||
messages_to_trace,
|
||||
invocation_params=params,
|
||||
options=options,
|
||||
name=run_name,
|
||||
|
@ -31,6 +31,10 @@ if TYPE_CHECKING:
|
||||
messages_to_dict,
|
||||
)
|
||||
from langchain_core.messages.chat import ChatMessage, ChatMessageChunk
|
||||
from langchain_core.messages.content_blocks import (
|
||||
convert_to_openai_image_block,
|
||||
is_data_content_block,
|
||||
)
|
||||
from langchain_core.messages.function import FunctionMessage, FunctionMessageChunk
|
||||
from langchain_core.messages.human import HumanMessage, HumanMessageChunk
|
||||
from langchain_core.messages.modifier import RemoveMessage
|
||||
@ -78,8 +82,10 @@ __all__ = [
|
||||
"ToolMessageChunk",
|
||||
"RemoveMessage",
|
||||
"_message_from_dict",
|
||||
"convert_to_openai_image_block",
|
||||
"convert_to_messages",
|
||||
"get_buffer_string",
|
||||
"is_data_content_block",
|
||||
"merge_content",
|
||||
"message_chunk_to_message",
|
||||
"message_to_dict",
|
||||
@ -117,9 +123,11 @@ _dynamic_imports = {
|
||||
"MessageLikeRepresentation": "utils",
|
||||
"_message_from_dict": "utils",
|
||||
"convert_to_messages": "utils",
|
||||
"convert_to_openai_image_block": "content_blocks",
|
||||
"convert_to_openai_messages": "utils",
|
||||
"filter_messages": "utils",
|
||||
"get_buffer_string": "utils",
|
||||
"is_data_content_block": "content_blocks",
|
||||
"merge_message_runs": "utils",
|
||||
"message_chunk_to_message": "utils",
|
||||
"messages_from_dict": "utils",
|
||||
|
112
libs/core/langchain_core/messages/content_blocks.py
Normal file
112
libs/core/langchain_core/messages/content_blocks.py
Normal file
@ -0,0 +1,112 @@
|
||||
"""Types for content blocks."""
|
||||
|
||||
from typing import Any, Literal, Union
|
||||
|
||||
from pydantic import TypeAdapter, ValidationError
|
||||
from typing_extensions import NotRequired, TypedDict
|
||||
|
||||
|
||||
class BaseDataContentBlock(TypedDict):
|
||||
"""Base class for data content blocks."""
|
||||
|
||||
mime_type: NotRequired[str]
|
||||
"""MIME type of the content block (if needed)."""
|
||||
metadata: NotRequired[dict]
|
||||
"""Provider-specific metadata such as citations or filenames."""
|
||||
|
||||
|
||||
class URLContentBlock(BaseDataContentBlock):
|
||||
"""Content block for data from a URL."""
|
||||
|
||||
type: Literal["image", "audio", "file"]
|
||||
"""Type of the content block."""
|
||||
source_type: Literal["url"]
|
||||
"""Source type (url)."""
|
||||
url: str
|
||||
"""URL for data."""
|
||||
|
||||
|
||||
class Base64ContentBlock(BaseDataContentBlock):
|
||||
"""Content block for inline data from a base64 string."""
|
||||
|
||||
type: Literal["image", "audio", "file"]
|
||||
"""Type of the content block."""
|
||||
source_type: Literal["base64"]
|
||||
"""Source type (base64)."""
|
||||
data: str
|
||||
"""Data as a base64 string."""
|
||||
|
||||
|
||||
class PlainTextContentBlock(BaseDataContentBlock):
|
||||
"""Content block for plain text data (e.g., from a document)."""
|
||||
|
||||
type: Literal["file"]
|
||||
"""Type of the content block."""
|
||||
source_type: Literal["text"]
|
||||
"""Source type (text)."""
|
||||
text: str
|
||||
"""Text data."""
|
||||
|
||||
|
||||
class IDContentBlock(TypedDict):
|
||||
"""Content block for data specified by an identifier."""
|
||||
|
||||
type: Literal["image", "audio", "file"]
|
||||
"""Type of the content block."""
|
||||
source_type: Literal["id"]
|
||||
"""Source type (id)."""
|
||||
id: str
|
||||
"""Identifier for data source."""
|
||||
|
||||
|
||||
DataContentBlock = Union[
|
||||
URLContentBlock,
|
||||
Base64ContentBlock,
|
||||
PlainTextContentBlock,
|
||||
IDContentBlock,
|
||||
]
|
||||
|
||||
_DataContentBlockAdapter: TypeAdapter[DataContentBlock] = TypeAdapter(DataContentBlock)
|
||||
|
||||
|
||||
def is_data_content_block(
|
||||
content_block: dict,
|
||||
) -> bool:
|
||||
"""Check if the content block is a standard data content block.
|
||||
|
||||
Args:
|
||||
content_block: The content block to check.
|
||||
|
||||
Returns:
|
||||
True if the content block is a data content block, False otherwise.
|
||||
"""
|
||||
try:
|
||||
_ = _DataContentBlockAdapter.validate_python(content_block)
|
||||
except ValidationError:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
|
||||
def convert_to_openai_image_block(content_block: dict[str, Any]) -> dict:
|
||||
"""Convert image content block to format expected by OpenAI Chat Completions API."""
|
||||
if content_block["source_type"] == "url":
|
||||
return {
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": content_block["url"],
|
||||
},
|
||||
}
|
||||
if content_block["source_type"] == "base64":
|
||||
if "mime_type" not in content_block:
|
||||
error_message = "mime_type key is required for base64 data."
|
||||
raise ValueError(error_message)
|
||||
mime_type = content_block["mime_type"]
|
||||
return {
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:{mime_type};base64,{content_block['data']}",
|
||||
},
|
||||
}
|
||||
error_message = "Unsupported source type. Only 'url' and 'base64' are supported."
|
||||
raise ValueError(error_message)
|
@ -46,7 +46,7 @@ class MyCustomAsyncHandler(AsyncCallbackHandler):
|
||||
|
||||
@pytest.mark.benchmark
|
||||
async def test_async_callbacks_in_sync(benchmark: BenchmarkFixture) -> None:
|
||||
infinite_cycle = cycle([AIMessage(content=" ".join(["hello", "goodbye"] * 500))])
|
||||
infinite_cycle = cycle([AIMessage(content=" ".join(["hello", "goodbye"] * 5))])
|
||||
model = GenericFakeChatModel(messages=infinite_cycle)
|
||||
|
||||
@benchmark # type: ignore[misc]
|
||||
|
@ -8,7 +8,11 @@ import pytest
|
||||
from typing_extensions import override
|
||||
|
||||
from langchain_core.callbacks import CallbackManagerForLLMRun
|
||||
from langchain_core.language_models import BaseChatModel, FakeListChatModel
|
||||
from langchain_core.language_models import (
|
||||
BaseChatModel,
|
||||
FakeListChatModel,
|
||||
ParrotFakeChatModel,
|
||||
)
|
||||
from langchain_core.language_models.fake_chat_models import FakeListChatModelError
|
||||
from langchain_core.messages import (
|
||||
AIMessage,
|
||||
@ -396,3 +400,58 @@ async def test_disable_streaming_no_streaming_model_async(
|
||||
async for c in model.astream([], tools=[{}]):
|
||||
assert c.content == "invoke"
|
||||
break
|
||||
|
||||
|
||||
class FakeChatModelStartTracer(FakeTracer):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self.messages: list = []
|
||||
|
||||
def on_chat_model_start(self, *args: Any, **kwargs: Any) -> Run:
|
||||
_, messages = args
|
||||
self.messages.append(messages)
|
||||
return super().on_chat_model_start(
|
||||
*args,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
def test_trace_images_in_openai_format() -> None:
|
||||
"""Test that images are traced in OpenAI format."""
|
||||
llm = ParrotFakeChatModel()
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "image",
|
||||
"source_type": "url",
|
||||
"url": "https://example.com/image.png",
|
||||
}
|
||||
],
|
||||
}
|
||||
]
|
||||
tracer = FakeChatModelStartTracer()
|
||||
response = llm.invoke(messages, config={"callbacks": [tracer]})
|
||||
assert tracer.messages == [
|
||||
[
|
||||
[
|
||||
HumanMessage(
|
||||
content=[
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": "https://example.com/image.png"},
|
||||
}
|
||||
]
|
||||
)
|
||||
]
|
||||
]
|
||||
]
|
||||
# Test no mutation
|
||||
assert response.content == [
|
||||
{
|
||||
"type": "image",
|
||||
"source_type": "url",
|
||||
"url": "https://example.com/image.png",
|
||||
}
|
||||
]
|
||||
|
@ -24,6 +24,7 @@ EXPECTED_ALL = [
|
||||
"RemoveMessage",
|
||||
"convert_to_messages",
|
||||
"get_buffer_string",
|
||||
"is_data_content_block",
|
||||
"merge_content",
|
||||
"message_chunk_to_message",
|
||||
"message_to_dict",
|
||||
@ -32,6 +33,7 @@ EXPECTED_ALL = [
|
||||
"filter_messages",
|
||||
"merge_message_runs",
|
||||
"trim_messages",
|
||||
"convert_to_openai_image_block",
|
||||
"convert_to_openai_messages",
|
||||
]
|
||||
|
||||
|
@ -21,7 +21,9 @@ from langchain_core.messages import (
|
||||
SystemMessage,
|
||||
ToolMessage,
|
||||
convert_to_messages,
|
||||
convert_to_openai_image_block,
|
||||
get_buffer_string,
|
||||
is_data_content_block,
|
||||
merge_content,
|
||||
message_chunk_to_message,
|
||||
message_to_dict,
|
||||
@ -1087,3 +1089,86 @@ def test_message_text() -> None:
|
||||
).text()
|
||||
== ""
|
||||
)
|
||||
|
||||
|
||||
def test_is_data_content_block() -> None:
|
||||
assert is_data_content_block(
|
||||
{
|
||||
"type": "image",
|
||||
"source_type": "url",
|
||||
"url": "https://...",
|
||||
}
|
||||
)
|
||||
assert is_data_content_block(
|
||||
{
|
||||
"type": "image",
|
||||
"source_type": "base64",
|
||||
"data": "<base64 data>",
|
||||
"mime_type": "image/jpeg",
|
||||
}
|
||||
)
|
||||
assert is_data_content_block(
|
||||
{
|
||||
"type": "image",
|
||||
"source_type": "base64",
|
||||
"data": "<base64 data>",
|
||||
"mime_type": "image/jpeg",
|
||||
"metadata": {"cache_control": {"type": "ephemeral"}},
|
||||
}
|
||||
)
|
||||
|
||||
assert not is_data_content_block(
|
||||
{
|
||||
"type": "text",
|
||||
"text": "foo",
|
||||
}
|
||||
)
|
||||
assert not is_data_content_block(
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": "https://..."},
|
||||
}
|
||||
)
|
||||
assert not is_data_content_block(
|
||||
{
|
||||
"type": "image",
|
||||
"source_type": "base64",
|
||||
}
|
||||
)
|
||||
assert not is_data_content_block(
|
||||
{
|
||||
"type": "image",
|
||||
"source": "<base64 data>",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def test_convert_to_openai_image_block() -> None:
|
||||
input_block = {
|
||||
"type": "image",
|
||||
"source_type": "url",
|
||||
"url": "https://...",
|
||||
"metadata": {"cache_control": {"type": "ephemeral"}},
|
||||
}
|
||||
expected = {
|
||||
"type": "image_url",
|
||||
"image_url": {"url": "https://..."},
|
||||
}
|
||||
result = convert_to_openai_image_block(input_block)
|
||||
assert result == expected
|
||||
|
||||
input_block = {
|
||||
"type": "image",
|
||||
"source_type": "base64",
|
||||
"data": "<base64 data>",
|
||||
"mime_type": "image/jpeg",
|
||||
"metadata": {"cache_control": {"type": "ephemeral"}},
|
||||
}
|
||||
expected = {
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": "data:image/jpeg;base64,<base64 data>",
|
||||
},
|
||||
}
|
||||
result = convert_to_openai_image_block(input_block)
|
||||
assert result == expected
|
||||
|
@ -35,6 +35,7 @@ from langchain_core.messages import (
|
||||
SystemMessage,
|
||||
ToolCall,
|
||||
ToolMessage,
|
||||
is_data_content_block,
|
||||
)
|
||||
from langchain_core.messages.ai import InputTokenDetails, UsageMetadata
|
||||
from langchain_core.messages.tool import tool_call_chunk as create_tool_call_chunk
|
||||
@ -177,8 +178,78 @@ def _merge_messages(
|
||||
return merged
|
||||
|
||||
|
||||
def _format_data_content_block(block: dict) -> dict:
|
||||
"""Format standard data content block to format expected by Anthropic."""
|
||||
if block["type"] == "image":
|
||||
if block["source_type"] == "url":
|
||||
if block["url"].startswith("data:"):
|
||||
# Data URI
|
||||
formatted_block = {
|
||||
"type": "image",
|
||||
"source": _format_image(block["url"]),
|
||||
}
|
||||
else:
|
||||
formatted_block = {
|
||||
"type": "image",
|
||||
"source": {"type": "url", "url": block["url"]},
|
||||
}
|
||||
elif block["source_type"] == "base64":
|
||||
formatted_block = {
|
||||
"type": "image",
|
||||
"source": {
|
||||
"type": "base64",
|
||||
"media_type": block["mime_type"],
|
||||
"data": block["data"],
|
||||
},
|
||||
}
|
||||
else:
|
||||
raise ValueError(
|
||||
"Anthropic only supports 'url' and 'base64' source_type for image "
|
||||
"content blocks."
|
||||
)
|
||||
|
||||
elif block["type"] == "file":
|
||||
if block["source_type"] == "url":
|
||||
formatted_block = {
|
||||
"type": "document",
|
||||
"source": {
|
||||
"type": "url",
|
||||
"url": block["url"],
|
||||
},
|
||||
}
|
||||
elif block["source_type"] == "base64":
|
||||
formatted_block = {
|
||||
"type": "document",
|
||||
"source": {
|
||||
"type": "base64",
|
||||
"media_type": block.get("mime_type") or "application/pdf",
|
||||
"data": block["data"],
|
||||
},
|
||||
}
|
||||
elif block["source_type"] == "text":
|
||||
formatted_block = {
|
||||
"type": "document",
|
||||
"source": {
|
||||
"type": "text",
|
||||
"media_type": block.get("mime_type") or "text/plain",
|
||||
"data": block["text"],
|
||||
},
|
||||
}
|
||||
|
||||
else:
|
||||
raise ValueError(f"Block of type {block['type']} is not supported.")
|
||||
|
||||
if formatted_block and (metadata := block.get("metadata")):
|
||||
if "cache_control" in metadata:
|
||||
formatted_block["cache_control"] = metadata["cache_control"]
|
||||
if "citations" in metadata:
|
||||
formatted_block["citations"] = metadata["citations"]
|
||||
|
||||
return formatted_block
|
||||
|
||||
|
||||
def _format_messages(
|
||||
messages: list[BaseMessage],
|
||||
messages: Sequence[BaseMessage],
|
||||
) -> tuple[Union[str, list[dict], None], list[dict]]:
|
||||
"""Format messages for anthropic."""
|
||||
|
||||
@ -233,6 +304,8 @@ def _format_messages(
|
||||
# convert format
|
||||
source = _format_image(block["image_url"]["url"])
|
||||
content.append({"type": "image", "source": source})
|
||||
elif is_data_content_block(block):
|
||||
content.append(_format_data_content_block(block))
|
||||
elif block["type"] == "tool_use":
|
||||
# If a tool_call with the same id as a tool_use content block
|
||||
# exists, the tool_call is preferred.
|
||||
|
@ -25,6 +25,14 @@ class TestAnthropicStandard(ChatModelIntegrationTests):
|
||||
def supports_image_inputs(self) -> bool:
|
||||
return True
|
||||
|
||||
@property
|
||||
def supports_image_urls(self) -> bool:
|
||||
return True
|
||||
|
||||
@property
|
||||
def supports_pdf_inputs(self) -> bool:
|
||||
return True
|
||||
|
||||
@property
|
||||
def supports_image_tool_message(self) -> bool:
|
||||
return True
|
||||
|
@ -690,6 +690,85 @@ def test__format_messages_with_cache_control() -> None:
|
||||
assert expected_system == actual_system
|
||||
assert expected_messages == actual_messages
|
||||
|
||||
# Test standard multi-modal format
|
||||
messages = [
|
||||
HumanMessage(
|
||||
[
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Summarize this document:",
|
||||
},
|
||||
{
|
||||
"type": "file",
|
||||
"source_type": "base64",
|
||||
"mime_type": "application/pdf",
|
||||
"data": "<base64 data>",
|
||||
"metadata": {"cache_control": {"type": "ephemeral"}},
|
||||
},
|
||||
]
|
||||
)
|
||||
]
|
||||
actual_system, actual_messages = _format_messages(messages)
|
||||
assert actual_system is None
|
||||
expected_messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Summarize this document:",
|
||||
},
|
||||
{
|
||||
"type": "document",
|
||||
"source": {
|
||||
"type": "base64",
|
||||
"media_type": "application/pdf",
|
||||
"data": "<base64 data>",
|
||||
},
|
||||
"cache_control": {"type": "ephemeral"},
|
||||
},
|
||||
],
|
||||
}
|
||||
]
|
||||
assert actual_messages == expected_messages
|
||||
|
||||
|
||||
def test__format_messages_with_citations() -> None:
|
||||
input_messages = [
|
||||
HumanMessage(
|
||||
content=[
|
||||
{
|
||||
"type": "file",
|
||||
"source_type": "text",
|
||||
"text": "The grass is green. The sky is blue.",
|
||||
"mime_type": "text/plain",
|
||||
"metadata": {"citations": {"enabled": True}},
|
||||
},
|
||||
{"type": "text", "text": "What color is the grass and sky?"},
|
||||
]
|
||||
)
|
||||
]
|
||||
expected_messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "document",
|
||||
"source": {
|
||||
"type": "text",
|
||||
"media_type": "text/plain",
|
||||
"data": "The grass is green. The sky is blue.",
|
||||
},
|
||||
"citations": {"enabled": True},
|
||||
},
|
||||
{"type": "text", "text": "What color is the grass and sky?"},
|
||||
],
|
||||
}
|
||||
]
|
||||
actual_system, actual_messages = _format_messages(input_messages)
|
||||
assert actual_system is None
|
||||
assert actual_messages == expected_messages
|
||||
|
||||
|
||||
def test__format_messages_with_multiple_system() -> None:
|
||||
messages = [
|
||||
|
@ -61,6 +61,8 @@ from langchain_core.messages import (
|
||||
ToolCall,
|
||||
ToolMessage,
|
||||
ToolMessageChunk,
|
||||
convert_to_openai_image_block,
|
||||
is_data_content_block,
|
||||
)
|
||||
from langchain_core.messages.ai import (
|
||||
InputTokenDetails,
|
||||
@ -184,6 +186,32 @@ def _convert_dict_to_message(_dict: Mapping[str, Any]) -> BaseMessage:
|
||||
return ChatMessage(content=_dict.get("content", ""), role=role, id=id_) # type: ignore[arg-type]
|
||||
|
||||
|
||||
def _format_data_content_block(block: dict) -> dict:
|
||||
"""Format standard data content block to format expected by OpenAI."""
|
||||
if block["type"] == "image":
|
||||
formatted_block = convert_to_openai_image_block(block)
|
||||
|
||||
elif block["type"] == "file":
|
||||
if block["source_type"] == "base64":
|
||||
file = {"file_data": f"data:{block['mime_type']};base64,{block['data']}"}
|
||||
if (metadata := block.get("metadata")) and ("filename" in metadata):
|
||||
file["filename"] = metadata["filename"]
|
||||
else:
|
||||
warnings.warn(
|
||||
"OpenAI may require a filename for file inputs. Specify a filename "
|
||||
"in the metadata: {'type': 'file', 'source_type': 'base64', "
|
||||
"'mime_type': 'application/pdf', 'data': '...', "
|
||||
"'metadata': {'filename': 'my-pdf'}}"
|
||||
)
|
||||
formatted_block = {"type": "file", "file": file}
|
||||
elif block["source_type"] == "id":
|
||||
formatted_block = {"type": "file", "file": {"file_id": block["id"]}}
|
||||
else:
|
||||
raise ValueError(f"Block of type {block['type']} is not supported.")
|
||||
|
||||
return formatted_block
|
||||
|
||||
|
||||
def _format_message_content(content: Any) -> Any:
|
||||
"""Format message content."""
|
||||
if content and isinstance(content, list):
|
||||
@ -196,6 +224,8 @@ def _format_message_content(content: Any) -> Any:
|
||||
and block["type"] in ("tool_use", "thinking")
|
||||
):
|
||||
continue
|
||||
elif isinstance(block, dict) and is_data_content_block(block):
|
||||
formatted_content.append(_format_data_content_block(block))
|
||||
# Anthropic image blocks
|
||||
elif (
|
||||
isinstance(block, dict)
|
||||
@ -3122,6 +3152,9 @@ def _construct_responses_api_input(messages: Sequence[BaseMessage]) -> list:
|
||||
if block["image_url"].get("detail"):
|
||||
new_block["detail"] = block["image_url"]["detail"]
|
||||
new_blocks.append(new_block)
|
||||
elif block["type"] == "file":
|
||||
new_block = {"type": "input_file", **block["file"]}
|
||||
new_blocks.append(new_block)
|
||||
elif block["type"] in ("input_text", "input_image", "input_file"):
|
||||
new_blocks.append(block)
|
||||
else:
|
||||
|
@ -30,6 +30,10 @@ class TestAzureOpenAIStandard(ChatModelIntegrationTests):
|
||||
def supports_image_inputs(self) -> bool:
|
||||
return True
|
||||
|
||||
@property
|
||||
def supports_image_urls(self) -> bool:
|
||||
return True
|
||||
|
||||
@property
|
||||
def supports_json_mode(self) -> bool:
|
||||
return True
|
||||
|
@ -1,10 +1,12 @@
|
||||
"""Standard LangChain interface tests"""
|
||||
|
||||
import base64
|
||||
from pathlib import Path
|
||||
from typing import Literal, cast
|
||||
|
||||
import httpx
|
||||
from langchain_core.language_models import BaseChatModel
|
||||
from langchain_core.messages import AIMessage
|
||||
from langchain_core.messages import AIMessage, HumanMessage
|
||||
from langchain_tests.integration_tests import ChatModelIntegrationTests
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
@ -25,6 +27,10 @@ class TestOpenAIStandard(ChatModelIntegrationTests):
|
||||
def supports_image_inputs(self) -> bool:
|
||||
return True
|
||||
|
||||
@property
|
||||
def supports_image_urls(self) -> bool:
|
||||
return True
|
||||
|
||||
@property
|
||||
def supports_json_mode(self) -> bool:
|
||||
return True
|
||||
@ -71,6 +77,31 @@ class TestOpenAIStandard(ChatModelIntegrationTests):
|
||||
)
|
||||
return _invoke(llm, input_, stream)
|
||||
|
||||
@property
|
||||
def supports_pdf_inputs(self) -> bool:
|
||||
# OpenAI requires a filename for PDF inputs
|
||||
# For now, we test with filename in OpenAI-specific tests
|
||||
return False
|
||||
|
||||
def test_openai_pdf_inputs(self, model: BaseChatModel) -> None:
|
||||
"""Test that the model can process PDF inputs."""
|
||||
url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"
|
||||
pdf_data = base64.b64encode(httpx.get(url).content).decode("utf-8")
|
||||
|
||||
message = HumanMessage(
|
||||
[
|
||||
{"type": "text", "text": "Summarize this document:"},
|
||||
{
|
||||
"type": "file",
|
||||
"source_type": "base64",
|
||||
"mime_type": "application/pdf",
|
||||
"data": pdf_data,
|
||||
"metadata": {"filename": "my-pdf"}, # OpenAI requires a filename
|
||||
},
|
||||
]
|
||||
)
|
||||
_ = model.invoke([message])
|
||||
|
||||
|
||||
def _invoke(llm: ChatOpenAI, input_: str, stream: bool) -> AIMessage:
|
||||
if stream:
|
||||
|
@ -649,6 +649,51 @@ def test_format_message_content() -> None:
|
||||
]
|
||||
assert [{"type": "text", "text": "hello"}] == _format_message_content(content)
|
||||
|
||||
# Standard multi-modal inputs
|
||||
content = [{"type": "image", "source_type": "url", "url": "https://..."}]
|
||||
expected = [{"type": "image_url", "image_url": {"url": "https://..."}}]
|
||||
assert expected == _format_message_content(content)
|
||||
|
||||
content = [
|
||||
{
|
||||
"type": "image",
|
||||
"source_type": "base64",
|
||||
"data": "<base64 data>",
|
||||
"mime_type": "image/png",
|
||||
}
|
||||
]
|
||||
expected = [
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": "data:image/png;base64,<base64 data>"},
|
||||
}
|
||||
]
|
||||
assert expected == _format_message_content(content)
|
||||
|
||||
content = [
|
||||
{
|
||||
"type": "file",
|
||||
"source_type": "base64",
|
||||
"data": "<base64 data>",
|
||||
"mime_type": "application/pdf",
|
||||
"metadata": {"filename": "my_file"},
|
||||
}
|
||||
]
|
||||
expected = [
|
||||
{
|
||||
"type": "file",
|
||||
"file": {
|
||||
"filename": "my_file",
|
||||
"file_data": "data:application/pdf;base64,<base64 data>",
|
||||
},
|
||||
}
|
||||
]
|
||||
assert expected == _format_message_content(content)
|
||||
|
||||
content = [{"type": "file", "source_type": "id", "id": "file-abc123"}]
|
||||
expected = [{"type": "file", "file": {"file_id": "file-abc123"}}]
|
||||
assert expected == _format_message_content(content)
|
||||
|
||||
|
||||
class GenerateUsername(BaseModel):
|
||||
"Get a username based on someone's name and hair color."
|
||||
|
@ -298,13 +298,21 @@ class ChatModelIntegrationTests(ChatModelTests):
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
[
|
||||
{"type": "text", "text": "describe the weather in this image"},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": f"data:image/jpeg;base64,{image_data}"},
|
||||
},
|
||||
]
|
||||
{
|
||||
"type": "image",
|
||||
"source_type": "base64",
|
||||
"data": "<base64 image data>",
|
||||
"mime_type": "image/jpeg", # or appropriate mime-type
|
||||
}
|
||||
|
||||
In addition to OpenAI-style content blocks:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": f"data:image/jpeg;base64,{image_data}"},
|
||||
}
|
||||
|
||||
See https://python.langchain.com/docs/concepts/multimodality/
|
||||
|
||||
@ -316,6 +324,59 @@ class ChatModelIntegrationTests(ChatModelTests):
|
||||
def supports_image_inputs(self) -> bool:
|
||||
return True
|
||||
|
||||
.. dropdown:: supports_image_urls
|
||||
|
||||
Boolean property indicating whether the chat model supports image inputs from
|
||||
URLs. Defaults to ``False``.
|
||||
|
||||
If set to ``True``, the chat model will be tested using content blocks of the
|
||||
form
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
{
|
||||
"type": "image",
|
||||
"source_type": "url",
|
||||
"url": "https://...",
|
||||
}
|
||||
|
||||
See https://python.langchain.com/docs/concepts/multimodality/
|
||||
|
||||
Example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
@property
|
||||
def supports_image_urls(self) -> bool:
|
||||
return True
|
||||
|
||||
.. dropdown:: supports_pdf_inputs
|
||||
|
||||
Boolean property indicating whether the chat model supports PDF inputs.
|
||||
Defaults to ``False``.
|
||||
|
||||
If set to ``True``, the chat model will be tested using content blocks of the
|
||||
form
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
{
|
||||
"type": "file",
|
||||
"source_type": "base64",
|
||||
"data": "<base64 file data>",
|
||||
"mime_type": "application/pdf",
|
||||
}
|
||||
|
||||
See https://python.langchain.com/docs/concepts/multimodality/
|
||||
|
||||
Example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
@property
|
||||
def supports_pdf_inputs(self) -> bool:
|
||||
return True
|
||||
|
||||
.. dropdown:: supports_video_inputs
|
||||
|
||||
Boolean property indicating whether the chat model supports image inputs.
|
||||
@ -1891,11 +1952,79 @@ class ChatModelIntegrationTests(ChatModelTests):
|
||||
result = model_with_tools.invoke(messages)
|
||||
assert isinstance(result, AIMessage)
|
||||
|
||||
def test_pdf_inputs(self, model: BaseChatModel) -> None:
|
||||
"""Test that the model can process PDF inputs.
|
||||
|
||||
This test should be skipped (see Configuration below) if the model does not
|
||||
support PDF inputs. These will take the form:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
{
|
||||
"type": "image",
|
||||
"source_type": "base64",
|
||||
"data": "<base64 image data>",
|
||||
"mime_type": "application/pdf",
|
||||
}
|
||||
|
||||
See https://python.langchain.com/docs/concepts/multimodality/
|
||||
|
||||
.. dropdown:: Configuration
|
||||
|
||||
To disable this test, set ``supports_pdf_inputs`` to False in your
|
||||
test class:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
class TestMyChatModelIntegration(ChatModelIntegrationTests):
|
||||
|
||||
@property
|
||||
def supports_pdf_inputs(self) -> bool:
|
||||
return False
|
||||
|
||||
.. dropdown:: Troubleshooting
|
||||
|
||||
If this test fails, check that the model can correctly handle messages
|
||||
with pdf content blocks, including base64-encoded files. Otherwise, set
|
||||
the ``supports_pdf_inputs`` property to False.
|
||||
"""
|
||||
if not self.supports_pdf_inputs:
|
||||
pytest.skip("Model does not support PDF inputs.")
|
||||
url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"
|
||||
pdf_data = base64.b64encode(httpx.get(url).content).decode("utf-8")
|
||||
|
||||
message = HumanMessage(
|
||||
[
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Summarize this document:",
|
||||
},
|
||||
{
|
||||
"type": "file",
|
||||
"source_type": "base64",
|
||||
"mime_type": "application/pdf",
|
||||
"data": pdf_data,
|
||||
},
|
||||
]
|
||||
)
|
||||
_ = model.invoke([message])
|
||||
|
||||
def test_image_inputs(self, model: BaseChatModel) -> None:
|
||||
"""Test that the model can process image inputs.
|
||||
|
||||
This test should be skipped (see Configuration below) if the model does not
|
||||
support image inputs These will take the form of messages with OpenAI-style
|
||||
support image inputs. These will take the form:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
{
|
||||
"type": "image",
|
||||
"source_type": "base64",
|
||||
"data": "<base64 image data>",
|
||||
"mime_type": "image/jpeg", # or appropriate mime-type
|
||||
}
|
||||
|
||||
For backward-compatibility, we must also support OpenAI-style
|
||||
image content blocks:
|
||||
|
||||
.. code-block:: python
|
||||
@ -1910,6 +2039,17 @@ class ChatModelIntegrationTests(ChatModelTests):
|
||||
|
||||
See https://python.langchain.com/docs/concepts/multimodality/
|
||||
|
||||
If the property ``supports_image_urls`` is set to True, the test will also
|
||||
check that we can process content blocks of the form:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
{
|
||||
"type": "image",
|
||||
"source_type": "url",
|
||||
"url": "<url>",
|
||||
}
|
||||
|
||||
.. dropdown:: Configuration
|
||||
|
||||
To disable this test, set ``supports_image_inputs`` to False in your
|
||||
@ -1922,16 +2062,23 @@ class ChatModelIntegrationTests(ChatModelTests):
|
||||
def supports_image_inputs(self) -> bool:
|
||||
return False
|
||||
|
||||
# Can also explicitly disable testing image URLs:
|
||||
@property
|
||||
def supports_image_urls(self) -> bool:
|
||||
return False
|
||||
|
||||
.. dropdown:: Troubleshooting
|
||||
|
||||
If this test fails, check that the model can correctly handle messages
|
||||
with image content blocks in OpenAI format, including base64-encoded
|
||||
images. Otherwise, set the ``supports_image_inputs`` property to False.
|
||||
with image content blocks, including base64-encoded images. Otherwise, set
|
||||
the ``supports_image_inputs`` property to False.
|
||||
"""
|
||||
if not self.supports_image_inputs:
|
||||
pytest.skip("Model does not support image message.")
|
||||
image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
|
||||
image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8")
|
||||
|
||||
# OpenAI format, base64 data
|
||||
message = HumanMessage(
|
||||
content=[
|
||||
{"type": "text", "text": "describe the weather in this image"},
|
||||
@ -1941,7 +2088,35 @@ class ChatModelIntegrationTests(ChatModelTests):
|
||||
},
|
||||
],
|
||||
)
|
||||
model.invoke([message])
|
||||
_ = model.invoke([message])
|
||||
|
||||
# Standard format, base64 data
|
||||
message = HumanMessage(
|
||||
content=[
|
||||
{"type": "text", "text": "describe the weather in this image"},
|
||||
{
|
||||
"type": "image",
|
||||
"source_type": "base64",
|
||||
"mime_type": "image/jpeg",
|
||||
"data": image_data,
|
||||
},
|
||||
],
|
||||
)
|
||||
_ = model.invoke([message])
|
||||
|
||||
# Standard format, URL
|
||||
if self.supports_image_urls:
|
||||
message = HumanMessage(
|
||||
content=[
|
||||
{"type": "text", "text": "describe the weather in this image"},
|
||||
{
|
||||
"type": "image",
|
||||
"source_type": "url",
|
||||
"url": image_url,
|
||||
},
|
||||
],
|
||||
)
|
||||
_ = model.invoke([message])
|
||||
|
||||
def test_image_tool_message(self, model: BaseChatModel) -> None:
|
||||
"""Test that the model can process ToolMessages with image inputs.
|
||||
|
@ -160,6 +160,17 @@ class ChatModelTests(BaseStandardTests):
|
||||
``False``."""
|
||||
return False
|
||||
|
||||
@property
|
||||
def supports_image_urls(self) -> bool:
|
||||
"""(bool) whether the chat model supports image inputs from URLs, defaults to
|
||||
``False``."""
|
||||
return False
|
||||
|
||||
@property
|
||||
def supports_pdf_inputs(self) -> bool:
|
||||
"""(bool) whether the chat model supports PDF inputs, defaults to ``False``."""
|
||||
return False
|
||||
|
||||
@property
|
||||
def supports_video_inputs(self) -> bool:
|
||||
"""(bool) whether the chat model supports video inputs, efaults to ``False``.
|
||||
@ -373,13 +384,21 @@ class ChatModelUnitTests(ChatModelTests):
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
[
|
||||
{"type": "text", "text": "describe the weather in this image"},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": f"data:image/jpeg;base64,{image_data}"},
|
||||
},
|
||||
]
|
||||
{
|
||||
"type": "image",
|
||||
"source_type": "base64",
|
||||
"data": "<base64 image data>",
|
||||
"mime_type": "image/jpeg", # or appropriate mime-type
|
||||
}
|
||||
|
||||
In addition to OpenAI-style content blocks:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": f"data:image/jpeg;base64,{image_data}"},
|
||||
}
|
||||
|
||||
See https://python.langchain.com/docs/concepts/multimodality/
|
||||
|
||||
@ -391,6 +410,59 @@ class ChatModelUnitTests(ChatModelTests):
|
||||
def supports_image_inputs(self) -> bool:
|
||||
return True
|
||||
|
||||
.. dropdown:: supports_image_urls
|
||||
|
||||
Boolean property indicating whether the chat model supports image inputs from
|
||||
URLs. Defaults to ``False``.
|
||||
|
||||
If set to ``True``, the chat model will be tested using content blocks of the
|
||||
form
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
{
|
||||
"type": "image",
|
||||
"source_type": "url",
|
||||
"url": "https://...",
|
||||
}
|
||||
|
||||
See https://python.langchain.com/docs/concepts/multimodality/
|
||||
|
||||
Example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
@property
|
||||
def supports_image_urls(self) -> bool:
|
||||
return True
|
||||
|
||||
.. dropdown:: supports_pdf_inputs
|
||||
|
||||
Boolean property indicating whether the chat model supports PDF inputs.
|
||||
Defaults to ``False``.
|
||||
|
||||
If set to ``True``, the chat model will be tested using content blocks of the
|
||||
form
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
{
|
||||
"type": "file",
|
||||
"source_type": "base64",
|
||||
"data": "<base64 file data>",
|
||||
"mime_type": "application/pdf",
|
||||
}
|
||||
|
||||
See https://python.langchain.com/docs/concepts/multimodality/
|
||||
|
||||
Example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
@property
|
||||
def supports_pdf_inputs(self) -> bool:
|
||||
return True
|
||||
|
||||
.. dropdown:: supports_video_inputs
|
||||
|
||||
Boolean property indicating whether the chat model supports image inputs.
|
||||
|
Loading…
Reference in New Issue
Block a user