feat(core): Autogenerate filenames for when converting file content blocks to OpenAI format (#30984)

CC @ccurme --------- Co-authored-by: Chester Curme <chester.curme@gmail.com>
2025-09-26 05:48:40 +00:00 · 2025-04-24 06:36:31 -07:00
parent 21962e2201
commit 6b0b317cb5
2 changed files with 40 additions and 8 deletions
--- a/libs/core/langchain_core/messages/utils.py
+++ b/libs/core/langchain_core/messages/utils.py
@@ -12,6 +12,7 @@ from __future__ import annotations
 import base64
 import inspect
 import json
 import logging
 import math
 from collections.abc import Iterable, Sequence
 from functools import partial
@@ -47,6 +48,8 @@ if TYPE_CHECKING:
    from langchain_core.prompt_values import PromptValue
    from langchain_core.runnables.base import Runnable
 logger = logging.getLogger(__name__)
 def _get_type(v: Any) -> str:
    """Get the type associated with the object for serialization purposes."""
@@ -1070,7 +1073,15 @@ def convert_to_openai_messages(
                    )
                # Standard multi-modal content block
                elif is_data_content_block(block):
-                    content.append(convert_to_openai_data_block(block))
+                    formatted_block = convert_to_openai_data_block(block)
                    if (
                        formatted_block.get("type") == "file"
                        and "file" in formatted_block
                        and "filename" not in formatted_block["file"]
                    ):
                        logger.info("Generating a fallback filename.")
                        formatted_block["file"]["filename"] = "LC_AUTOGENERATED"
                    content.append(formatted_block)
                # Anthropic and Bedrock converse format
                elif (block.get("type") == "image") or "image" in block:
                    # Anthropic
--- a/libs/core/tests/unit_tests/messages/test_utils.py
+++ b/libs/core/tests/unit_tests/messages/test_utils.py
@@ -1202,12 +1202,6 @@ def test_convert_to_openai_messages_multimodal() -> None:
                    "data": "<base64 string>",
                    "mime_type": "image/png",
                },
                {
                    "type": "file",
                    "source_type": "base64",
                    "data": "<base64 string>",
                    "mime_type": "application/pdf",
                },
                {
                    "type": "file",
                    "source_type": "base64",
@@ -1232,7 +1226,34 @@ def test_convert_to_openai_messages_multimodal() -> None:
    result = convert_to_openai_messages(messages, text_format="block")
    assert len(result) == 1
    message = result[0]
-    assert len(message["content"]) == 7
+    assert len(message["content"]) == 6
    # Test adding filename
    messages = [
        HumanMessage(
            content=[
                {
                    "type": "file",
                    "source_type": "base64",
                    "data": "<base64 string>",
                    "mime_type": "application/pdf",
                },
            ]
        )
    ]
    with pytest.warns(match="filename"):
        result = convert_to_openai_messages(messages, text_format="block")
    assert len(result) == 1
    message = result[0]
    assert len(message["content"]) == 1
    block = message["content"][0]
    assert block == {
        "type": "file",
        "file": {
            "file_data": "data:application/pdf;base64,<base64 string>",
            "filename": "LC_AUTOGENERATED",
        },
    }
 def test_count_tokens_approximately_empty_messages() -> None: