feat(core): Autogenerate filenames for when converting file content blocks to OpenAI format (#30984)

CC @ccurme

---------

Co-authored-by: Chester Curme <chester.curme@gmail.com>
This commit is contained in:
Jacob Lee
2025-04-24 06:36:31 -07:00
committed by GitHub
parent 21962e2201
commit 6b0b317cb5
2 changed files with 40 additions and 8 deletions

View File

@@ -12,6 +12,7 @@ from __future__ import annotations
import base64 import base64
import inspect import inspect
import json import json
import logging
import math import math
from collections.abc import Iterable, Sequence from collections.abc import Iterable, Sequence
from functools import partial from functools import partial
@@ -47,6 +48,8 @@ if TYPE_CHECKING:
from langchain_core.prompt_values import PromptValue from langchain_core.prompt_values import PromptValue
from langchain_core.runnables.base import Runnable from langchain_core.runnables.base import Runnable
logger = logging.getLogger(__name__)
def _get_type(v: Any) -> str: def _get_type(v: Any) -> str:
"""Get the type associated with the object for serialization purposes.""" """Get the type associated with the object for serialization purposes."""
@@ -1070,7 +1073,15 @@ def convert_to_openai_messages(
) )
# Standard multi-modal content block # Standard multi-modal content block
elif is_data_content_block(block): elif is_data_content_block(block):
content.append(convert_to_openai_data_block(block)) formatted_block = convert_to_openai_data_block(block)
if (
formatted_block.get("type") == "file"
and "file" in formatted_block
and "filename" not in formatted_block["file"]
):
logger.info("Generating a fallback filename.")
formatted_block["file"]["filename"] = "LC_AUTOGENERATED"
content.append(formatted_block)
# Anthropic and Bedrock converse format # Anthropic and Bedrock converse format
elif (block.get("type") == "image") or "image" in block: elif (block.get("type") == "image") or "image" in block:
# Anthropic # Anthropic

View File

@@ -1202,12 +1202,6 @@ def test_convert_to_openai_messages_multimodal() -> None:
"data": "<base64 string>", "data": "<base64 string>",
"mime_type": "image/png", "mime_type": "image/png",
}, },
{
"type": "file",
"source_type": "base64",
"data": "<base64 string>",
"mime_type": "application/pdf",
},
{ {
"type": "file", "type": "file",
"source_type": "base64", "source_type": "base64",
@@ -1232,7 +1226,34 @@ def test_convert_to_openai_messages_multimodal() -> None:
result = convert_to_openai_messages(messages, text_format="block") result = convert_to_openai_messages(messages, text_format="block")
assert len(result) == 1 assert len(result) == 1
message = result[0] message = result[0]
assert len(message["content"]) == 7 assert len(message["content"]) == 6
# Test adding filename
messages = [
HumanMessage(
content=[
{
"type": "file",
"source_type": "base64",
"data": "<base64 string>",
"mime_type": "application/pdf",
},
]
)
]
with pytest.warns(match="filename"):
result = convert_to_openai_messages(messages, text_format="block")
assert len(result) == 1
message = result[0]
assert len(message["content"]) == 1
block = message["content"][0]
assert block == {
"type": "file",
"file": {
"file_data": "data:application/pdf;base64,<base64 string>",
"filename": "LC_AUTOGENERATED",
},
}
def test_count_tokens_approximately_empty_messages() -> None: def test_count_tokens_approximately_empty_messages() -> None: