mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-21 06:14:37 +00:00
multiple: permit optional fields on multimodal content blocks (#30887)
Instead of stuffing provider-specific fields in `metadata`, they can go directly on the content block.
This commit is contained in:
parent
83b66cb916
commit
86d51f6be6
@ -6,13 +6,11 @@ from pydantic import TypeAdapter, ValidationError
|
||||
from typing_extensions import NotRequired, TypedDict
|
||||
|
||||
|
||||
class BaseDataContentBlock(TypedDict):
|
||||
class BaseDataContentBlock(TypedDict, total=False):
|
||||
"""Base class for data content blocks."""
|
||||
|
||||
mime_type: NotRequired[str]
|
||||
"""MIME type of the content block (if needed)."""
|
||||
metadata: NotRequired[dict]
|
||||
"""Provider-specific metadata such as citations or filenames."""
|
||||
|
||||
|
||||
class URLContentBlock(BaseDataContentBlock):
|
||||
|
@ -1107,6 +1107,15 @@ def test_is_data_content_block() -> None:
|
||||
"mime_type": "image/jpeg",
|
||||
}
|
||||
)
|
||||
assert is_data_content_block(
|
||||
{
|
||||
"type": "image",
|
||||
"source_type": "base64",
|
||||
"data": "<base64 data>",
|
||||
"mime_type": "image/jpeg",
|
||||
"cache_control": {"type": "ephemeral"},
|
||||
}
|
||||
)
|
||||
assert is_data_content_block(
|
||||
{
|
||||
"type": "image",
|
||||
@ -1148,7 +1157,7 @@ def test_convert_to_openai_image_block() -> None:
|
||||
"type": "image",
|
||||
"source_type": "url",
|
||||
"url": "https://...",
|
||||
"metadata": {"cache_control": {"type": "ephemeral"}},
|
||||
"cache_control": {"type": "ephemeral"},
|
||||
}
|
||||
expected = {
|
||||
"type": "image_url",
|
||||
@ -1162,7 +1171,7 @@ def test_convert_to_openai_image_block() -> None:
|
||||
"source_type": "base64",
|
||||
"data": "<base64 data>",
|
||||
"mime_type": "image/jpeg",
|
||||
"metadata": {"cache_control": {"type": "ephemeral"}},
|
||||
"cache_control": {"type": "ephemeral"},
|
||||
}
|
||||
expected = {
|
||||
"type": "image_url",
|
||||
|
@ -239,11 +239,12 @@ def _format_data_content_block(block: dict) -> dict:
|
||||
else:
|
||||
raise ValueError(f"Block of type {block['type']} is not supported.")
|
||||
|
||||
if formatted_block and (metadata := block.get("metadata")):
|
||||
if "cache_control" in metadata:
|
||||
formatted_block["cache_control"] = metadata["cache_control"]
|
||||
if "citations" in metadata:
|
||||
formatted_block["citations"] = metadata["citations"]
|
||||
if formatted_block:
|
||||
for key in ["cache_control", "citations", "title", "context"]:
|
||||
if key in block:
|
||||
formatted_block[key] = block[key]
|
||||
elif (metadata := block.get("metadata")) and key in metadata:
|
||||
formatted_block[key] = metadata[key]
|
||||
|
||||
return formatted_block
|
||||
|
||||
|
@ -703,7 +703,7 @@ def test__format_messages_with_cache_control() -> None:
|
||||
"source_type": "base64",
|
||||
"mime_type": "application/pdf",
|
||||
"data": "<base64 data>",
|
||||
"metadata": {"cache_control": {"type": "ephemeral"}},
|
||||
"cache_control": {"type": "ephemeral"},
|
||||
},
|
||||
]
|
||||
)
|
||||
@ -742,7 +742,7 @@ def test__format_messages_with_citations() -> None:
|
||||
"source_type": "text",
|
||||
"text": "The grass is green. The sky is blue.",
|
||||
"mime_type": "text/plain",
|
||||
"metadata": {"citations": {"enabled": True}},
|
||||
"citations": {"enabled": True},
|
||||
},
|
||||
{"type": "text", "text": "What color is the grass and sky?"},
|
||||
]
|
||||
|
@ -194,14 +194,16 @@ def _format_data_content_block(block: dict) -> dict:
|
||||
elif block["type"] == "file":
|
||||
if block["source_type"] == "base64":
|
||||
file = {"file_data": f"data:{block['mime_type']};base64,{block['data']}"}
|
||||
if (metadata := block.get("metadata")) and ("filename" in metadata):
|
||||
if filename := block.get("filename"):
|
||||
file["filename"] = filename
|
||||
elif (metadata := block.get("metadata")) and ("filename" in metadata):
|
||||
file["filename"] = metadata["filename"]
|
||||
else:
|
||||
warnings.warn(
|
||||
"OpenAI may require a filename for file inputs. Specify a filename "
|
||||
"in the metadata: {'type': 'file', 'source_type': 'base64', "
|
||||
"in the content block: {'type': 'file', 'source_type': 'base64', "
|
||||
"'mime_type': 'application/pdf', 'data': '...', "
|
||||
"'metadata': {'filename': 'my-pdf'}}"
|
||||
"'filename': 'my-pdf'}"
|
||||
)
|
||||
formatted_block = {"type": "file", "file": file}
|
||||
elif block["source_type"] == "id":
|
||||
|
@ -96,7 +96,7 @@ class TestOpenAIStandard(ChatModelIntegrationTests):
|
||||
"source_type": "base64",
|
||||
"mime_type": "application/pdf",
|
||||
"data": pdf_data,
|
||||
"metadata": {"filename": "my-pdf"}, # OpenAI requires a filename
|
||||
"filename": "my-pdf", # OpenAI requires a filename
|
||||
},
|
||||
]
|
||||
)
|
||||
|
@ -676,7 +676,7 @@ def test_format_message_content() -> None:
|
||||
"source_type": "base64",
|
||||
"data": "<base64 data>",
|
||||
"mime_type": "application/pdf",
|
||||
"metadata": {"filename": "my_file"},
|
||||
"filename": "my_file",
|
||||
}
|
||||
]
|
||||
expected = [
|
||||
|
Loading…
Reference in New Issue
Block a user