From c1e742347f9701aadba8920e4d1f79a636e50b68 Mon Sep 17 00:00:00 2001 From: Bagatur <22008038+baskaryan@users.noreply.github.com> Date: Thu, 31 Oct 2024 10:34:51 -0700 Subject: [PATCH] core[patch]: rm image loading (#27797) --- libs/core/langchain_core/prompts/image.py | 33 +++++++-------- libs/core/langchain_core/utils/image.py | 33 +++------------ .../tests/unit_tests/prompts/test_chat.py | 42 ++++++------------- 3 files changed, 35 insertions(+), 73 deletions(-) diff --git a/libs/core/langchain_core/prompts/image.py b/libs/core/langchain_core/prompts/image.py index 9336e20f60a..334e2b85aee 100644 --- a/libs/core/langchain_core/prompts/image.py +++ b/libs/core/langchain_core/prompts/image.py @@ -9,7 +9,6 @@ from langchain_core.prompts.string import ( PromptTemplateFormat, ) from langchain_core.runnables import run_in_executor -from langchain_core.utils import image as image_utils class ImagePromptTemplate(BasePromptTemplate[ImageURL]): @@ -80,8 +79,8 @@ class ImagePromptTemplate(BasePromptTemplate[ImageURL]): A formatted string. Raises: - ValueError: If the url or path is not provided. - ValueError: If the path or url is not a string. + ValueError: If the url is not provided. + ValueError: If the url is not a string. Example: @@ -98,23 +97,24 @@ class ImagePromptTemplate(BasePromptTemplate[ImageURL]): else: formatted[k] = v url = kwargs.get("url") or formatted.get("url") - path = kwargs.get("path") or formatted.get("path") - detail = kwargs.get("detail") or formatted.get("detail") - if not url and not path: - msg = "Must provide either url or path." + if kwargs.get("path") or formatted.get("path"): + msg = ( + "Loading images from 'path' has been removed as of 0.3.15 for security " + "reasons. Please specify images by 'url'." + ) raise ValueError(msg) + detail = kwargs.get("detail") or formatted.get("detail") if not url: - if not isinstance(path, str): - msg = "path must be a string." - raise ValueError(msg) - url = image_utils.image_to_data_url(path) - if not isinstance(url, str): + msg = "Must provide url." + raise ValueError(msg) + elif not isinstance(url, str): msg = "url must be a string." raise ValueError(msg) - output: ImageURL = {"url": url} - if detail: - # Don't check literal values here: let the API check them - output["detail"] = detail # type: ignore[typeddict-item] + else: + output: ImageURL = {"url": url} + if detail: + # Don't check literal values here: let the API check them + output["detail"] = detail # type: ignore[typeddict-item] return output async def aformat(self, **kwargs: Any) -> ImageURL: @@ -127,7 +127,6 @@ class ImagePromptTemplate(BasePromptTemplate[ImageURL]): A formatted string. Raises: - ValueError: If the url or path is not provided. ValueError: If the path or url is not a string. """ return await run_in_executor(None, self.format, **kwargs) diff --git a/libs/core/langchain_core/utils/image.py b/libs/core/langchain_core/utils/image.py index 708f0c7fdea..7bbc499a4ac 100644 --- a/libs/core/langchain_core/utils/image.py +++ b/libs/core/langchain_core/utils/image.py @@ -1,29 +1,8 @@ -import base64 -import mimetypes +from typing import Any -def encode_image(image_path: str) -> str: - """Get base64 string from image URI. - - Args: - image_path: The path to the image. - - Returns: - The base64 string of the image. - """ - with open(image_path, "rb") as image_file: - return base64.b64encode(image_file.read()).decode("utf-8") - - -def image_to_data_url(image_path: str) -> str: - """Get data URL from image URI. - - Args: - image_path: The path to the image. - - Returns: - The data URL of the image. - """ - encoding = encode_image(image_path) - mime_type = mimetypes.guess_type(image_path)[0] - return f"data:{mime_type};base64,{encoding}" +def __getattr__(name: str) -> Any: + if name in ("encode_image", "image_to_data_url"): + msg = f"'{name}' has been removed for security reasons." + raise ValueError(msg) + raise AttributeError(name) diff --git a/libs/core/tests/unit_tests/prompts/test_chat.py b/libs/core/tests/unit_tests/prompts/test_chat.py index eee1ac498ec..8038281b58d 100644 --- a/libs/core/tests/unit_tests/prompts/test_chat.py +++ b/libs/core/tests/unit_tests/prompts/test_chat.py @@ -719,7 +719,7 @@ async def test_chat_tmpl_from_messages_multipart_image() -> None: async def test_chat_tmpl_from_messages_multipart_formatting_with_path() -> None: - """Verify that we can pass `path` for an image as a variable.""" + """Verify that we cannot pass `path` for an image as a variable.""" in_mem = "base64mem" in_file_data = "base64file01" @@ -746,35 +746,19 @@ async def test_chat_tmpl_from_messages_multipart_formatting_with_path() -> None: ), ] ) - expected = [ - SystemMessage(content="You are an AI assistant named R2D2."), - HumanMessage( - content=[ - {"type": "text", "text": "What's in this image?"}, - { - "type": "image_url", - "image_url": {"url": f"data:image/jpeg;base64,{in_mem}"}, - }, - { - "type": "image_url", - "image_url": {"url": f"data:image/jpeg;base64,{in_file_data}"}, - }, - ] - ), - ] - messages = template.format_messages( - name="R2D2", - in_mem=in_mem, - file_path=temp_file.name, - ) - assert messages == expected + with pytest.raises(ValueError): + template.format_messages( + name="R2D2", + in_mem=in_mem, + file_path=temp_file.name, + ) - messages = await template.aformat_messages( - name="R2D2", - in_mem=in_mem, - file_path=temp_file.name, - ) - assert messages == expected + with pytest.raises(ValueError): + await template.aformat_messages( + name="R2D2", + in_mem=in_mem, + file_path=temp_file.name, + ) def test_messages_placeholder() -> None: