mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-24 20:09:01 +00:00
feat(core): Autogenerate filenames for when converting file content blocks to OpenAI format (#30984)
CC @ccurme --------- Co-authored-by: Chester Curme <chester.curme@gmail.com>
This commit is contained in:
@@ -12,6 +12,7 @@ from __future__ import annotations
|
||||
import base64
|
||||
import inspect
|
||||
import json
|
||||
import logging
|
||||
import math
|
||||
from collections.abc import Iterable, Sequence
|
||||
from functools import partial
|
||||
@@ -47,6 +48,8 @@ if TYPE_CHECKING:
|
||||
from langchain_core.prompt_values import PromptValue
|
||||
from langchain_core.runnables.base import Runnable
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _get_type(v: Any) -> str:
|
||||
"""Get the type associated with the object for serialization purposes."""
|
||||
@@ -1070,7 +1073,15 @@ def convert_to_openai_messages(
|
||||
)
|
||||
# Standard multi-modal content block
|
||||
elif is_data_content_block(block):
|
||||
content.append(convert_to_openai_data_block(block))
|
||||
formatted_block = convert_to_openai_data_block(block)
|
||||
if (
|
||||
formatted_block.get("type") == "file"
|
||||
and "file" in formatted_block
|
||||
and "filename" not in formatted_block["file"]
|
||||
):
|
||||
logger.info("Generating a fallback filename.")
|
||||
formatted_block["file"]["filename"] = "LC_AUTOGENERATED"
|
||||
content.append(formatted_block)
|
||||
# Anthropic and Bedrock converse format
|
||||
elif (block.get("type") == "image") or "image" in block:
|
||||
# Anthropic
|
||||
|
@@ -1202,12 +1202,6 @@ def test_convert_to_openai_messages_multimodal() -> None:
|
||||
"data": "<base64 string>",
|
||||
"mime_type": "image/png",
|
||||
},
|
||||
{
|
||||
"type": "file",
|
||||
"source_type": "base64",
|
||||
"data": "<base64 string>",
|
||||
"mime_type": "application/pdf",
|
||||
},
|
||||
{
|
||||
"type": "file",
|
||||
"source_type": "base64",
|
||||
@@ -1232,7 +1226,34 @@ def test_convert_to_openai_messages_multimodal() -> None:
|
||||
result = convert_to_openai_messages(messages, text_format="block")
|
||||
assert len(result) == 1
|
||||
message = result[0]
|
||||
assert len(message["content"]) == 7
|
||||
assert len(message["content"]) == 6
|
||||
|
||||
# Test adding filename
|
||||
messages = [
|
||||
HumanMessage(
|
||||
content=[
|
||||
{
|
||||
"type": "file",
|
||||
"source_type": "base64",
|
||||
"data": "<base64 string>",
|
||||
"mime_type": "application/pdf",
|
||||
},
|
||||
]
|
||||
)
|
||||
]
|
||||
with pytest.warns(match="filename"):
|
||||
result = convert_to_openai_messages(messages, text_format="block")
|
||||
assert len(result) == 1
|
||||
message = result[0]
|
||||
assert len(message["content"]) == 1
|
||||
block = message["content"][0]
|
||||
assert block == {
|
||||
"type": "file",
|
||||
"file": {
|
||||
"file_data": "data:application/pdf;base64,<base64 string>",
|
||||
"filename": "LC_AUTOGENERATED",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def test_count_tokens_approximately_empty_messages() -> None:
|
||||
|
Reference in New Issue
Block a user