mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-21 14:18:52 +00:00
multiple: permit optional fields on multimodal content blocks (#30887)
Instead of stuffing provider-specific fields in `metadata`, they can go directly on the content block.
This commit is contained in:
parent
83b66cb916
commit
86d51f6be6
@ -6,13 +6,11 @@ from pydantic import TypeAdapter, ValidationError
|
|||||||
from typing_extensions import NotRequired, TypedDict
|
from typing_extensions import NotRequired, TypedDict
|
||||||
|
|
||||||
|
|
||||||
class BaseDataContentBlock(TypedDict):
|
class BaseDataContentBlock(TypedDict, total=False):
|
||||||
"""Base class for data content blocks."""
|
"""Base class for data content blocks."""
|
||||||
|
|
||||||
mime_type: NotRequired[str]
|
mime_type: NotRequired[str]
|
||||||
"""MIME type of the content block (if needed)."""
|
"""MIME type of the content block (if needed)."""
|
||||||
metadata: NotRequired[dict]
|
|
||||||
"""Provider-specific metadata such as citations or filenames."""
|
|
||||||
|
|
||||||
|
|
||||||
class URLContentBlock(BaseDataContentBlock):
|
class URLContentBlock(BaseDataContentBlock):
|
||||||
|
@ -1107,6 +1107,15 @@ def test_is_data_content_block() -> None:
|
|||||||
"mime_type": "image/jpeg",
|
"mime_type": "image/jpeg",
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
assert is_data_content_block(
|
||||||
|
{
|
||||||
|
"type": "image",
|
||||||
|
"source_type": "base64",
|
||||||
|
"data": "<base64 data>",
|
||||||
|
"mime_type": "image/jpeg",
|
||||||
|
"cache_control": {"type": "ephemeral"},
|
||||||
|
}
|
||||||
|
)
|
||||||
assert is_data_content_block(
|
assert is_data_content_block(
|
||||||
{
|
{
|
||||||
"type": "image",
|
"type": "image",
|
||||||
@ -1148,7 +1157,7 @@ def test_convert_to_openai_image_block() -> None:
|
|||||||
"type": "image",
|
"type": "image",
|
||||||
"source_type": "url",
|
"source_type": "url",
|
||||||
"url": "https://...",
|
"url": "https://...",
|
||||||
"metadata": {"cache_control": {"type": "ephemeral"}},
|
"cache_control": {"type": "ephemeral"},
|
||||||
}
|
}
|
||||||
expected = {
|
expected = {
|
||||||
"type": "image_url",
|
"type": "image_url",
|
||||||
@ -1162,7 +1171,7 @@ def test_convert_to_openai_image_block() -> None:
|
|||||||
"source_type": "base64",
|
"source_type": "base64",
|
||||||
"data": "<base64 data>",
|
"data": "<base64 data>",
|
||||||
"mime_type": "image/jpeg",
|
"mime_type": "image/jpeg",
|
||||||
"metadata": {"cache_control": {"type": "ephemeral"}},
|
"cache_control": {"type": "ephemeral"},
|
||||||
}
|
}
|
||||||
expected = {
|
expected = {
|
||||||
"type": "image_url",
|
"type": "image_url",
|
||||||
|
@ -239,11 +239,12 @@ def _format_data_content_block(block: dict) -> dict:
|
|||||||
else:
|
else:
|
||||||
raise ValueError(f"Block of type {block['type']} is not supported.")
|
raise ValueError(f"Block of type {block['type']} is not supported.")
|
||||||
|
|
||||||
if formatted_block and (metadata := block.get("metadata")):
|
if formatted_block:
|
||||||
if "cache_control" in metadata:
|
for key in ["cache_control", "citations", "title", "context"]:
|
||||||
formatted_block["cache_control"] = metadata["cache_control"]
|
if key in block:
|
||||||
if "citations" in metadata:
|
formatted_block[key] = block[key]
|
||||||
formatted_block["citations"] = metadata["citations"]
|
elif (metadata := block.get("metadata")) and key in metadata:
|
||||||
|
formatted_block[key] = metadata[key]
|
||||||
|
|
||||||
return formatted_block
|
return formatted_block
|
||||||
|
|
||||||
|
@ -703,7 +703,7 @@ def test__format_messages_with_cache_control() -> None:
|
|||||||
"source_type": "base64",
|
"source_type": "base64",
|
||||||
"mime_type": "application/pdf",
|
"mime_type": "application/pdf",
|
||||||
"data": "<base64 data>",
|
"data": "<base64 data>",
|
||||||
"metadata": {"cache_control": {"type": "ephemeral"}},
|
"cache_control": {"type": "ephemeral"},
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
@ -742,7 +742,7 @@ def test__format_messages_with_citations() -> None:
|
|||||||
"source_type": "text",
|
"source_type": "text",
|
||||||
"text": "The grass is green. The sky is blue.",
|
"text": "The grass is green. The sky is blue.",
|
||||||
"mime_type": "text/plain",
|
"mime_type": "text/plain",
|
||||||
"metadata": {"citations": {"enabled": True}},
|
"citations": {"enabled": True},
|
||||||
},
|
},
|
||||||
{"type": "text", "text": "What color is the grass and sky?"},
|
{"type": "text", "text": "What color is the grass and sky?"},
|
||||||
]
|
]
|
||||||
|
@ -194,14 +194,16 @@ def _format_data_content_block(block: dict) -> dict:
|
|||||||
elif block["type"] == "file":
|
elif block["type"] == "file":
|
||||||
if block["source_type"] == "base64":
|
if block["source_type"] == "base64":
|
||||||
file = {"file_data": f"data:{block['mime_type']};base64,{block['data']}"}
|
file = {"file_data": f"data:{block['mime_type']};base64,{block['data']}"}
|
||||||
if (metadata := block.get("metadata")) and ("filename" in metadata):
|
if filename := block.get("filename"):
|
||||||
|
file["filename"] = filename
|
||||||
|
elif (metadata := block.get("metadata")) and ("filename" in metadata):
|
||||||
file["filename"] = metadata["filename"]
|
file["filename"] = metadata["filename"]
|
||||||
else:
|
else:
|
||||||
warnings.warn(
|
warnings.warn(
|
||||||
"OpenAI may require a filename for file inputs. Specify a filename "
|
"OpenAI may require a filename for file inputs. Specify a filename "
|
||||||
"in the metadata: {'type': 'file', 'source_type': 'base64', "
|
"in the content block: {'type': 'file', 'source_type': 'base64', "
|
||||||
"'mime_type': 'application/pdf', 'data': '...', "
|
"'mime_type': 'application/pdf', 'data': '...', "
|
||||||
"'metadata': {'filename': 'my-pdf'}}"
|
"'filename': 'my-pdf'}"
|
||||||
)
|
)
|
||||||
formatted_block = {"type": "file", "file": file}
|
formatted_block = {"type": "file", "file": file}
|
||||||
elif block["source_type"] == "id":
|
elif block["source_type"] == "id":
|
||||||
|
@ -96,7 +96,7 @@ class TestOpenAIStandard(ChatModelIntegrationTests):
|
|||||||
"source_type": "base64",
|
"source_type": "base64",
|
||||||
"mime_type": "application/pdf",
|
"mime_type": "application/pdf",
|
||||||
"data": pdf_data,
|
"data": pdf_data,
|
||||||
"metadata": {"filename": "my-pdf"}, # OpenAI requires a filename
|
"filename": "my-pdf", # OpenAI requires a filename
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
@ -676,7 +676,7 @@ def test_format_message_content() -> None:
|
|||||||
"source_type": "base64",
|
"source_type": "base64",
|
||||||
"data": "<base64 data>",
|
"data": "<base64 data>",
|
||||||
"mime_type": "application/pdf",
|
"mime_type": "application/pdf",
|
||||||
"metadata": {"filename": "my_file"},
|
"filename": "my_file",
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
expected = [
|
expected = [
|
||||||
|
Loading…
Reference in New Issue
Block a user