feat(core): Autogenerate filenames for when converting file content blocks to OpenAI format (#30984)

CC @ccurme

---------

Co-authored-by: Chester Curme <chester.curme@gmail.com>
This commit is contained in:
Jacob Lee
2025-04-24 06:36:31 -07:00
committed by GitHub
parent 21962e2201
commit 6b0b317cb5
2 changed files with 40 additions and 8 deletions

View File

@@ -12,6 +12,7 @@ from __future__ import annotations
import base64
import inspect
import json
import logging
import math
from collections.abc import Iterable, Sequence
from functools import partial
@@ -47,6 +48,8 @@ if TYPE_CHECKING:
from langchain_core.prompt_values import PromptValue
from langchain_core.runnables.base import Runnable
logger = logging.getLogger(__name__)
def _get_type(v: Any) -> str:
"""Get the type associated with the object for serialization purposes."""
@@ -1070,7 +1073,15 @@ def convert_to_openai_messages(
)
# Standard multi-modal content block
elif is_data_content_block(block):
content.append(convert_to_openai_data_block(block))
formatted_block = convert_to_openai_data_block(block)
if (
formatted_block.get("type") == "file"
and "file" in formatted_block
and "filename" not in formatted_block["file"]
):
logger.info("Generating a fallback filename.")
formatted_block["file"]["filename"] = "LC_AUTOGENERATED"
content.append(formatted_block)
# Anthropic and Bedrock converse format
elif (block.get("type") == "image") or "image" in block:
# Anthropic

View File

@@ -1202,12 +1202,6 @@ def test_convert_to_openai_messages_multimodal() -> None:
"data": "<base64 string>",
"mime_type": "image/png",
},
{
"type": "file",
"source_type": "base64",
"data": "<base64 string>",
"mime_type": "application/pdf",
},
{
"type": "file",
"source_type": "base64",
@@ -1232,7 +1226,34 @@ def test_convert_to_openai_messages_multimodal() -> None:
result = convert_to_openai_messages(messages, text_format="block")
assert len(result) == 1
message = result[0]
assert len(message["content"]) == 7
assert len(message["content"]) == 6
# Test adding filename
messages = [
HumanMessage(
content=[
{
"type": "file",
"source_type": "base64",
"data": "<base64 string>",
"mime_type": "application/pdf",
},
]
)
]
with pytest.warns(match="filename"):
result = convert_to_openai_messages(messages, text_format="block")
assert len(result) == 1
message = result[0]
assert len(message["content"]) == 1
block = message["content"][0]
assert block == {
"type": "file",
"file": {
"file_data": "data:application/pdf;base64,<base64 string>",
"filename": "LC_AUTOGENERATED",
},
}
def test_count_tokens_approximately_empty_messages() -> None: