mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-26 05:48:40 +00:00
feat(core): Autogenerate filenames for when converting file content blocks to OpenAI format (#30984)
CC @ccurme --------- Co-authored-by: Chester Curme <chester.curme@gmail.com>
This commit is contained in:
@@ -12,6 +12,7 @@ from __future__ import annotations
|
|||||||
import base64
|
import base64
|
||||||
import inspect
|
import inspect
|
||||||
import json
|
import json
|
||||||
|
import logging
|
||||||
import math
|
import math
|
||||||
from collections.abc import Iterable, Sequence
|
from collections.abc import Iterable, Sequence
|
||||||
from functools import partial
|
from functools import partial
|
||||||
@@ -47,6 +48,8 @@ if TYPE_CHECKING:
|
|||||||
from langchain_core.prompt_values import PromptValue
|
from langchain_core.prompt_values import PromptValue
|
||||||
from langchain_core.runnables.base import Runnable
|
from langchain_core.runnables.base import Runnable
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def _get_type(v: Any) -> str:
|
def _get_type(v: Any) -> str:
|
||||||
"""Get the type associated with the object for serialization purposes."""
|
"""Get the type associated with the object for serialization purposes."""
|
||||||
@@ -1070,7 +1073,15 @@ def convert_to_openai_messages(
|
|||||||
)
|
)
|
||||||
# Standard multi-modal content block
|
# Standard multi-modal content block
|
||||||
elif is_data_content_block(block):
|
elif is_data_content_block(block):
|
||||||
content.append(convert_to_openai_data_block(block))
|
formatted_block = convert_to_openai_data_block(block)
|
||||||
|
if (
|
||||||
|
formatted_block.get("type") == "file"
|
||||||
|
and "file" in formatted_block
|
||||||
|
and "filename" not in formatted_block["file"]
|
||||||
|
):
|
||||||
|
logger.info("Generating a fallback filename.")
|
||||||
|
formatted_block["file"]["filename"] = "LC_AUTOGENERATED"
|
||||||
|
content.append(formatted_block)
|
||||||
# Anthropic and Bedrock converse format
|
# Anthropic and Bedrock converse format
|
||||||
elif (block.get("type") == "image") or "image" in block:
|
elif (block.get("type") == "image") or "image" in block:
|
||||||
# Anthropic
|
# Anthropic
|
||||||
|
@@ -1202,12 +1202,6 @@ def test_convert_to_openai_messages_multimodal() -> None:
|
|||||||
"data": "<base64 string>",
|
"data": "<base64 string>",
|
||||||
"mime_type": "image/png",
|
"mime_type": "image/png",
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"type": "file",
|
|
||||||
"source_type": "base64",
|
|
||||||
"data": "<base64 string>",
|
|
||||||
"mime_type": "application/pdf",
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"type": "file",
|
"type": "file",
|
||||||
"source_type": "base64",
|
"source_type": "base64",
|
||||||
@@ -1232,7 +1226,34 @@ def test_convert_to_openai_messages_multimodal() -> None:
|
|||||||
result = convert_to_openai_messages(messages, text_format="block")
|
result = convert_to_openai_messages(messages, text_format="block")
|
||||||
assert len(result) == 1
|
assert len(result) == 1
|
||||||
message = result[0]
|
message = result[0]
|
||||||
assert len(message["content"]) == 7
|
assert len(message["content"]) == 6
|
||||||
|
|
||||||
|
# Test adding filename
|
||||||
|
messages = [
|
||||||
|
HumanMessage(
|
||||||
|
content=[
|
||||||
|
{
|
||||||
|
"type": "file",
|
||||||
|
"source_type": "base64",
|
||||||
|
"data": "<base64 string>",
|
||||||
|
"mime_type": "application/pdf",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
)
|
||||||
|
]
|
||||||
|
with pytest.warns(match="filename"):
|
||||||
|
result = convert_to_openai_messages(messages, text_format="block")
|
||||||
|
assert len(result) == 1
|
||||||
|
message = result[0]
|
||||||
|
assert len(message["content"]) == 1
|
||||||
|
block = message["content"][0]
|
||||||
|
assert block == {
|
||||||
|
"type": "file",
|
||||||
|
"file": {
|
||||||
|
"file_data": "data:application/pdf;base64,<base64 string>",
|
||||||
|
"filename": "LC_AUTOGENERATED",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def test_count_tokens_approximately_empty_messages() -> None:
|
def test_count_tokens_approximately_empty_messages() -> None:
|
||||||
|
Reference in New Issue
Block a user