add prompt template

2025-08-07 12:06:43 +00:00 · 2025-04-09 14:39:24 -04:00 · 2025-04-09 14:39:24 -04:00 · 7e3caec720
commit 7e3caec720
parent 35fbe24532
5 changed files with 273 additions and 4 deletions
--- a/libs/core/langchain_core/load/mapping.py
+++ b/libs/core/langchain_core/load/mapping.py
@ -146,6 +146,12 @@ SERIALIZABLE_MAPPING: dict[tuple[str, ...], tuple[str, ...]] = {
        "image",
        "ImagePromptTemplate",
    ),
    ("langchain", "prompts", "data", "DataPromptTemplate"): (
        "langchain_core",
        "prompts",
        "data",
        "DataPromptTemplate",
    ),
    ("langchain", "schema", "agent", "AgentActionMessageLog"): (
        "langchain_core",
        "agents",
--- a/libs/core/langchain_core/prompt_values.py
+++ b/libs/core/langchain_core/prompt_values.py
@ -16,6 +16,7 @@ from langchain_core.load.serializable import Serializable
 from langchain_core.messages import (
    AnyMessage,
    BaseMessage,
    DataContentBlock,
    HumanMessage,
    get_buffer_string,
 )
@ -130,6 +131,22 @@ class ImagePromptValue(PromptValue):
        return [HumanMessage(content=[cast("dict", self.image_url)])]
 class DataPromptValue(PromptValue):
    """Prompt value for multi-modal data."""
    content_block: DataContentBlock
    """Multi-modal content block."""
    type: Literal["DataPromptValue"] = "DataPromptValue"
    def to_string(self) -> str:
        """Return source data as a string."""
        return self.content_block["source"]
    def to_messages(self) -> list[BaseMessage]:
        """Return prompt (image URL) as messages."""
        return [HumanMessage(content=[cast("dict", self.content_block)])]
 class ChatPromptValueConcrete(ChatPromptValue):
    """Chat prompt value which explicitly lists out the message types it accepts.
--- a/libs/core/langchain_core/prompts/chat.py
+++ b/libs/core/langchain_core/prompts/chat.py
@ -31,13 +31,16 @@ from langchain_core.messages import (
    AnyMessage,
    BaseMessage,
    ChatMessage,
    DataContentBlock,
    HumanMessage,
    SystemMessage,
    convert_to_messages,
    is_data_content_block,
 )
 from langchain_core.messages.base import get_msg_title_repr
 from langchain_core.prompt_values import ChatPromptValue, ImageURL, PromptValue
 from langchain_core.prompts.base import BasePromptTemplate
 from langchain_core.prompts.data import DataPromptTemplate
 from langchain_core.prompts.image import ImagePromptTemplate
 from langchain_core.prompts.prompt import PromptTemplate
 from langchain_core.prompts.string import (
@ -468,7 +471,8 @@ class _StringImageMessagePromptTemplate(BaseMessagePromptTemplate):
    """Human message prompt template. This is a message sent from the user."""
    prompt: Union[
-        StringPromptTemplate, list[Union[StringPromptTemplate, ImagePromptTemplate]]
+        StringPromptTemplate,
        list[Union[StringPromptTemplate, ImagePromptTemplate, DataPromptTemplate]],
    ]
    """Prompt template."""
    additional_kwargs: dict = Field(default_factory=dict)
@ -479,7 +483,10 @@ class _StringImageMessagePromptTemplate(BaseMessagePromptTemplate):
    @classmethod
    def from_template(
        cls: type[Self],
-        template: Union[str, list[Union[str, _TextTemplateParam, _ImageTemplateParam]]],
+        template: Union[
            str,
            list[Union[str, _TextTemplateParam, _ImageTemplateParam, DataContentBlock]],
        ],
        template_format: PromptTemplateFormat = "f-string",
        *,
        partial_variables: Optional[dict[str, Any]] = None,
@ -562,6 +569,23 @@ class _StringImageMessagePromptTemplate(BaseMessagePromptTemplate):
                        msg = f"Invalid image template: {tmpl}"
                        raise ValueError(msg)
                    prompt.append(img_template_obj)
                elif isinstance(tmpl, dict) and is_data_content_block(tmpl):  # type: ignore[arg-type]
                    data_template = cast("DataContentBlock", tmpl)
                    input_variables = []
                    for key in ["source", "source_type", "mime_type"]:
                        if key in data_template:
                            input_variables.extend(
                                get_template_variables(
                                    data_template[key],  # type: ignore[literal-required]
                                    template_format,
                                )
                            )
                    data_template_obj = DataPromptTemplate(
                        input_variables=input_variables,
                        template=data_template,
                        template_format=template_format,
                    )
                    prompt.append(data_template_obj)
                else:
                    msg = f"Invalid template: {tmpl}"
                    raise ValueError(msg)
@ -639,11 +663,16 @@ class _StringImageMessagePromptTemplate(BaseMessagePromptTemplate):
        for prompt in self.prompt:
            inputs = {var: kwargs[var] for var in prompt.input_variables}
            if isinstance(prompt, StringPromptTemplate):
-                formatted: Union[str, ImageURL] = prompt.format(**inputs)
+                formatted: Union[str, ImageURL, DataContentBlock] = prompt.format(
                    **inputs
                )
                content.append({"type": "text", "text": formatted})
            elif isinstance(prompt, ImagePromptTemplate):
                formatted = prompt.format(**inputs)
                content.append({"type": "image_url", "image_url": formatted})
            elif isinstance(prompt, DataPromptTemplate):
                formatted = prompt.format(**inputs)
                content.append(formatted)
        return self._msg_class(
            content=content, additional_kwargs=self.additional_kwargs
        )
--- a/libs/core/langchain_core/prompts/data.py
+++ b/libs/core/langchain_core/prompts/data.py
@ -0,0 +1,145 @@
 """Image prompt template for a multimodal model."""
 from typing import Any, cast
 from pydantic import Field
 from langchain_core.messages import DataContentBlock
 from langchain_core.prompt_values import DataPromptValue, PromptValue
 from langchain_core.prompts.base import BasePromptTemplate
 from langchain_core.prompts.string import (
    DEFAULT_FORMATTER_MAPPING,
    PromptTemplateFormat,
 )
 from langchain_core.runnables import run_in_executor
 class DataPromptTemplate(BasePromptTemplate[DataContentBlock]):
    """Prompt template for a multi-modal model."""
    template: dict = Field(default_factory=dict)
    """Template for the prompt."""
    template_format: PromptTemplateFormat = "f-string"
    """The format of the prompt template.
    Options are: 'f-string', 'mustache', 'jinja2'."""
    def __init__(self, **kwargs: Any) -> None:
        """Create a prompt template for multi-modal data."""
        if "input_variables" not in kwargs:
            kwargs["input_variables"] = []
        overlap = set(kwargs["input_variables"]) & {
            "source",
            "source_type",
            "mime_type",
            "metadata",
        }
        if overlap:
            msg = (
                "input_variables for the template cannot contain"
                " any of 'source', 'source_type', 'mime_type', or 'metadata'."
                f" Found: {overlap}"
            )
            raise ValueError(msg)
        super().__init__(**kwargs)
    @property
    def _prompt_type(self) -> str:
        """Return the prompt type key."""
        return "data-prompt"
    @classmethod
    def get_lc_namespace(cls) -> list[str]:
        """Get the namespace of the langchain object."""
        return ["langchain", "prompts", "data"]
    def format_prompt(self, **kwargs: Any) -> PromptValue:
        """Format the prompt with the inputs.
        Args:
            kwargs: Any arguments to be passed to the prompt template.
        Returns:
            A formatted string.
        """
        return DataPromptValue(content_block=self.format(**kwargs))
    async def aformat_prompt(self, **kwargs: Any) -> PromptValue:
        """Async format the prompt with the inputs.
        Args:
            kwargs: Any arguments to be passed to the prompt template.
        Returns:
            A formatted string.
        """
        return DataPromptValue(content_block=await self.aformat(**kwargs))
    def format(
        self,
        **kwargs: Any,
    ) -> DataContentBlock:
        """Format the prompt with the inputs.
        Args:
            kwargs: Any arguments to be passed to the prompt template.
        Returns:
            A formatted string.
        Raises:
            ValueError: If the url is not provided.
            ValueError: If the url is not a string.
        Example:
            .. code-block:: python
                prompt.format(variable1="foo")
        """
        formatted = {}
        for k, v in self.template.items():
            if isinstance(v, str):
                formatted[k] = DEFAULT_FORMATTER_MAPPING[self.template_format](
                    v, **kwargs
                )
            else:
                formatted[k] = v
        block = {}
        for k in ["type", "source_type", "source", "mime_type", "metadata"]:
            value = kwargs.get(k) or formatted.get(k)
            if value:
                block[k] = value
        for required_field in ["source", "source_type"]:
            if required_field not in block:
                msg = f"Missing required field: {required_field}"
                raise ValueError(msg)
        return cast("DataContentBlock", block)
    async def aformat(self, **kwargs: Any) -> DataContentBlock:
        """Async format the prompt with the inputs.
        Args:
            kwargs: Any arguments to be passed to the prompt template.
        Returns:
            A formatted string.
        Raises:
            ValueError: If the path or url is not a string.
        """
        return await run_in_executor(None, self.format, **kwargs)
    def pretty_repr(self, html: bool = False) -> str:
        """Return a pretty representation of the prompt.
        Args:
            html: Whether to return an html formatted string.
        Returns:
            A pretty representation of the prompt.
        """
        raise NotImplementedError
--- a/libs/core/tests/unit_tests/prompts/test_chat.py
+++ b/libs/core/tests/unit_tests/prompts/test_chat.py
@ -11,7 +11,7 @@ from syrupy import SnapshotAssertion
 from langchain_core._api.deprecation import (
    LangChainPendingDeprecationWarning,
 )
-from langchain_core.load import dumpd, load
+from langchain_core.load import dump, dumpd, load, loads
 from langchain_core.messages import (
    AIMessage,
    BaseMessage,
@ -1049,3 +1049,75 @@ def test_chat_prompt_template_variable_names() -> None:
        "title": "PromptInput",
        "type": "object",
    }
 def test_data_prompt_template_deserializable() -> None:
    """Test that the image prompt template is serializable."""
    loads(
        dump.dumps(
            ChatPromptTemplate.from_messages(
                [
                    (
                        "system",
                        [{"type": "image", "source_type": "url", "source": "{url}"}],
                    )
                ]
            )
        )
    )
@pytest.mark.requires("jinja2")
@pytest.mark.parametrize(
    ("template_format", "mime_type_placeholder", "source_data_placeholder"),
    [
        ("f-string", "{media_type}", "{source_data}"),
        ("mustache", "{{media_type}}", "{{source_data}}"),
        ("jinja2", "{{ media_type }}", "{{ source_data }}"),
    ],
 )
 def test_chat_prompt_template_data_prompt_from_message(
    template_format: PromptTemplateFormat,
    mime_type_placeholder: str,
    source_data_placeholder: str,
 ) -> None:
    prompt = {
        "type": "image",
        "source_type": "base64",
        "source": f"{source_data_placeholder}",
    }
    template = ChatPromptTemplate.from_messages(
        [("human", [prompt])], template_format=template_format
    )
    assert template.format_messages(source_data="base64data") == [
        HumanMessage(
            content=[
                {
                    "type": "image",
                    "source_type": "base64",
                    "source": "base64data",
                }
            ]
        )
    ]
    # mime_type
    prompt["mime_type"] = f"{mime_type_placeholder}"
    template = ChatPromptTemplate.from_messages(
        [("human", [prompt])], template_format=template_format
    )
    assert template.format_messages(
        media_type="image/png", source_data="base64data"
    ) == [
        HumanMessage(
            content=[
                {
                    "type": "image",
                    "source_type": "base64",
                    "source": "base64data",
                    "mime_type": "image/png",
                }
            ]
        )
    ]