mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-22 19:09:57 +00:00
multiple: multi-modal content blocks (#30746)
Introduces standard content block format for images, audio, and files. ## Examples Image from url: ``` { "type": "image", "source_type": "url", "url": "https://path.to.image.png", } ``` Image, in-line data: ``` { "type": "image", "source_type": "base64", "data": "<base64 string>", "mime_type": "image/png", } ``` PDF, in-line data: ``` { "type": "file", "source_type": "base64", "data": "<base64 string>", "mime_type": "application/pdf", } ``` File from ID: ``` { "type": "file", "source_type": "id", "id": "file-abc123", } ``` Plain-text file: ``` { "type": "file", "source_type": "text", "text": "foo bar", } ```
This commit is contained in:
@@ -160,6 +160,17 @@ class ChatModelTests(BaseStandardTests):
|
||||
``False``."""
|
||||
return False
|
||||
|
||||
@property
|
||||
def supports_image_urls(self) -> bool:
|
||||
"""(bool) whether the chat model supports image inputs from URLs, defaults to
|
||||
``False``."""
|
||||
return False
|
||||
|
||||
@property
|
||||
def supports_pdf_inputs(self) -> bool:
|
||||
"""(bool) whether the chat model supports PDF inputs, defaults to ``False``."""
|
||||
return False
|
||||
|
||||
@property
|
||||
def supports_video_inputs(self) -> bool:
|
||||
"""(bool) whether the chat model supports video inputs, efaults to ``False``.
|
||||
@@ -373,13 +384,21 @@ class ChatModelUnitTests(ChatModelTests):
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
[
|
||||
{"type": "text", "text": "describe the weather in this image"},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": f"data:image/jpeg;base64,{image_data}"},
|
||||
},
|
||||
]
|
||||
{
|
||||
"type": "image",
|
||||
"source_type": "base64",
|
||||
"data": "<base64 image data>",
|
||||
"mime_type": "image/jpeg", # or appropriate mime-type
|
||||
}
|
||||
|
||||
In addition to OpenAI-style content blocks:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": f"data:image/jpeg;base64,{image_data}"},
|
||||
}
|
||||
|
||||
See https://python.langchain.com/docs/concepts/multimodality/
|
||||
|
||||
@@ -391,6 +410,59 @@ class ChatModelUnitTests(ChatModelTests):
|
||||
def supports_image_inputs(self) -> bool:
|
||||
return True
|
||||
|
||||
.. dropdown:: supports_image_urls
|
||||
|
||||
Boolean property indicating whether the chat model supports image inputs from
|
||||
URLs. Defaults to ``False``.
|
||||
|
||||
If set to ``True``, the chat model will be tested using content blocks of the
|
||||
form
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
{
|
||||
"type": "image",
|
||||
"source_type": "url",
|
||||
"url": "https://...",
|
||||
}
|
||||
|
||||
See https://python.langchain.com/docs/concepts/multimodality/
|
||||
|
||||
Example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
@property
|
||||
def supports_image_urls(self) -> bool:
|
||||
return True
|
||||
|
||||
.. dropdown:: supports_pdf_inputs
|
||||
|
||||
Boolean property indicating whether the chat model supports PDF inputs.
|
||||
Defaults to ``False``.
|
||||
|
||||
If set to ``True``, the chat model will be tested using content blocks of the
|
||||
form
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
{
|
||||
"type": "file",
|
||||
"source_type": "base64",
|
||||
"data": "<base64 file data>",
|
||||
"mime_type": "application/pdf",
|
||||
}
|
||||
|
||||
See https://python.langchain.com/docs/concepts/multimodality/
|
||||
|
||||
Example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
@property
|
||||
def supports_pdf_inputs(self) -> bool:
|
||||
return True
|
||||
|
||||
.. dropdown:: supports_video_inputs
|
||||
|
||||
Boolean property indicating whether the chat model supports image inputs.
|
||||
|
Reference in New Issue
Block a user