diff --git a/.github/workflows/codspeed.yml b/.github/workflows/codspeed.yml
index c6836d192c4..6d6025082df 100644
--- a/.github/workflows/codspeed.yml
+++ b/.github/workflows/codspeed.yml
@@ -20,6 +20,7 @@ jobs:
codspeed:
name: 'Benchmark'
runs-on: ubuntu-latest
+ if: ${{ !contains(github.event.pull_request.labels.*.name, 'codspeed-ignore') }}
strategy:
matrix:
include:
diff --git a/docs/api_reference/create_api_rst.py b/docs/api_reference/create_api_rst.py
index f4c5f977b3f..a37d213d725 100644
--- a/docs/api_reference/create_api_rst.py
+++ b/docs/api_reference/create_api_rst.py
@@ -217,7 +217,11 @@ def _load_package_modules(
# Get the full namespace of the module
namespace = str(relative_module_name).replace(".py", "").replace("/", ".")
# Keep only the top level namespace
- top_namespace = namespace.split(".")[0]
+ # (but make special exception for content_blocks and messages.v1)
+ if namespace == "messages.content_blocks" or namespace == "messages.v1":
+ top_namespace = namespace # Keep full namespace for content_blocks
+ else:
+ top_namespace = namespace.split(".")[0]
try:
# If submodule is present, we need to construct the paths in a slightly
@@ -283,7 +287,7 @@ def _construct_doc(
.. toctree::
:hidden:
:maxdepth: 2
-
+
"""
index_autosummary = """
"""
@@ -365,9 +369,9 @@ def _construct_doc(
module_doc += f"""\
:template: {template}
-
+
{class_["qualified_name"]}
-
+
"""
index_autosummary += f"""
{class_["qualified_name"]}
@@ -550,8 +554,8 @@ def _build_index(dirs: List[str]) -> None:
integrations = sorted(dir_ for dir_ in dirs if dir_ not in main_)
doc = """# LangChain Python API Reference
-Welcome to the LangChain Python API reference. This is a reference for all
-`langchain-x` packages.
+Welcome to the LangChain Python API reference. This is a reference for all
+`langchain-x` packages.
For user guides see [https://python.langchain.com](https://python.langchain.com).
diff --git a/docs/docs/contributing/how_to/testing.mdx b/docs/docs/contributing/how_to/testing.mdx
index cc5a1155c32..853ad44e7e3 100644
--- a/docs/docs/contributing/how_to/testing.mdx
+++ b/docs/docs/contributing/how_to/testing.mdx
@@ -124,6 +124,47 @@ start "" htmlcov/index.html || open htmlcov/index.html
```
+## Snapshot Testing
+
+Some tests use [syrupy](https://github.com/tophat/syrupy) for snapshot testing, which captures the output of functions and compares them to stored snapshots. This is particularly useful for testing JSON schema generation and other structured outputs.
+
+### Updating Snapshots
+
+To update snapshots when the expected output has legitimately changed:
+
+```bash
+uv run --group test pytest path/to/test.py --snapshot-update
+```
+
+### Pydantic Version Compatibility Issues
+
+Pydantic generates different JSON schemas across versions, which can cause snapshot test failures in CI when tests run with different Pydantic versions than what was used to generate the snapshots.
+
+**Symptoms:**
+- CI fails with snapshot mismatches showing differences like missing or extra fields.
+- Tests pass locally but fail in CI with different Pydantic versions
+
+**Solution:**
+Locally update snapshots using the same Pydantic version that CI uses:
+
+1. **Identify the failing Pydantic version** from CI logs (e.g., `2.7.0`, `2.8.0`, `2.9.0`)
+
+2. **Update snapshots with that version:**
+ ```bash
+ uv run --with "pydantic==2.9.0" --group test pytest tests/unit_tests/path/to/test.py::test_name --snapshot-update
+ ```
+
+3. **Verify compatibility across supported versions:**
+ ```bash
+ # Test with the version you used to update
+ uv run --with "pydantic==2.9.0" --group test pytest tests/unit_tests/path/to/test.py::test_name
+
+ # Test with other supported versions
+ uv run --with "pydantic==2.8.0" --group test pytest tests/unit_tests/path/to/test.py::test_name
+ ```
+
+**Note:** Some tests use `@pytest.mark.skipif` decorators to only run with specific Pydantic version ranges (e.g., `PYDANTIC_VERSION_AT_LEAST_210`). Make sure to understand these constraints when updating snapshots.
+
## Coverage
Code coverage (i.e. the amount of code that is covered by unit tests) helps identify areas of the code that are potentially more or less brittle.
diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py
index c81187dc3f6..b980761a121 100644
--- a/libs/core/langchain_core/messages/ai.py
+++ b/libs/core/langchain_core/messages/ai.py
@@ -8,11 +8,7 @@ from typing import Any, Literal, Optional, Union, cast
from pydantic import model_validator
from typing_extensions import NotRequired, Self, TypedDict, override
-from langchain_core.messages.base import (
- BaseMessage,
- BaseMessageChunk,
- merge_content,
-)
+from langchain_core.messages.base import BaseMessage, BaseMessageChunk, merge_content
from langchain_core.messages.tool import (
InvalidToolCall,
ToolCall,
@@ -20,15 +16,9 @@ from langchain_core.messages.tool import (
default_tool_chunk_parser,
default_tool_parser,
)
-from langchain_core.messages.tool import (
- invalid_tool_call as create_invalid_tool_call,
-)
-from langchain_core.messages.tool import (
- tool_call as create_tool_call,
-)
-from langchain_core.messages.tool import (
- tool_call_chunk as create_tool_call_chunk,
-)
+from langchain_core.messages.tool import invalid_tool_call as create_invalid_tool_call
+from langchain_core.messages.tool import tool_call as create_tool_call
+from langchain_core.messages.tool import tool_call_chunk as create_tool_call_chunk
from langchain_core.utils._merge import merge_dicts, merge_lists
from langchain_core.utils.json import parse_partial_json
from langchain_core.utils.usage import _dict_int_op
@@ -37,6 +27,16 @@ logger = logging.getLogger(__name__)
_LC_ID_PREFIX = "run-"
+"""Internal tracing/callback system identifier.
+
+Used for:
+- Tracing. Every LangChain operation (LLM call, chain execution, tool use, etc.)
+ gets a unique run_id (UUID)
+- Enables tracking parent-child relationships between operations
+"""
+
+_LC_AUTO_PREFIX = "lc_"
+"""LangChain auto-generated ID prefix for messages and content blocks."""
class InputTokenDetails(TypedDict, total=False):
@@ -428,17 +428,27 @@ def add_ai_message_chunks(
chunk_id = None
candidates = [left.id] + [o.id for o in others]
- # first pass: pick the first non-run-* id
+ # first pass: pick the first provider-assigned id (non-run-* and non-lc_*)
for id_ in candidates:
- if id_ and not id_.startswith(_LC_ID_PREFIX):
+ if (
+ id_
+ and not id_.startswith(_LC_ID_PREFIX)
+ and not id_.startswith(_LC_AUTO_PREFIX)
+ ):
chunk_id = id_
break
else:
- # second pass: no provider-assigned id found, just take the first non-null
+ # second pass: prefer lc_* ids over run-* ids
for id_ in candidates:
- if id_:
+ if id_ and id_.startswith(_LC_AUTO_PREFIX):
chunk_id = id_
break
+ else:
+ # third pass: take any remaining id (run-* ids)
+ for id_ in candidates:
+ if id_:
+ chunk_id = id_
+ break
return left.__class__(
example=left.example,
diff --git a/libs/core/langchain_core/messages/content_blocks.py b/libs/core/langchain_core/messages/content_blocks.py
index 4c9d79ef36e..38d05cced75 100644
--- a/libs/core/langchain_core/messages/content_blocks.py
+++ b/libs/core/langchain_core/messages/content_blocks.py
@@ -13,31 +13,46 @@ Data **not yet mapped** to a standard block may be represented using the
``NonStandardContentBlock``, which allows for provider-specific data to be included
without losing the benefits of type checking and validation.
-Furthermore, provider-specific fields *within* a standard block will be allowed as extra
-keys on the TypedDict per `PEP 728 `__. This allows
-for flexibility in the data structure while maintaining a consistent interface.
+Furthermore, provider-specific fields **within** a standard block are fully supported
+by default. However, since current type checkers do not recognize this, we are temporarily
+applying type ignore comments to suppress warnings. In the future,
+`PEP 728 `__ will add an extra param, ``extra_items=Any``.
+When this is supported, we will apply it to block signatures to signify to type checkers
+that additional provider-specific fields are allowed.
-**Example using ``extra_items=Any``:**
+**Example with PEP 728 provider-specific fields:**
.. code-block:: python
+
+ # Note `extra_items=Any`
+ class TextContentBlock(TypedDict, extra_items=Any):
+ type: Literal["text"]
+ id: NotRequired[str]
+ text: str
+ annotations: NotRequired[list[Annotation]]
+ index: NotRequired[int]
+
+.. code-block:: python
+
from langchain_core.messages.content_blocks import TextContentBlock
- from typing import Any
my_block: TextContentBlock = {
+ # Add required fields
"type": "text",
"text": "Hello, world!",
- "extra_field": "This is allowed",
- "another_field": 42, # Any type is allowed
+ # Additional fields not specified in the TypedDict
+ # These are valid with PEP 728 and are typed as Any
+ "openai_metadata": {"model": "gpt-4", "temperature": 0.7},
+ "anthropic_usage": {"input_tokens": 10, "output_tokens": 20},
+ "custom_field": "any value",
}
- # A type checker that supports PEP 728 would validate the object above.
- # Accessing the provider-specific key is possible, and its type is 'Any'.
- block_extra_field = my_block["extra_field"]
+ openai_data = my_block["openai_metadata"] # Type: Any
-.. warning::
- Type checkers such as MyPy do not yet support `PEP 728 `__,
- so you may see type errors when using provider-specific fields. These are safe to
- ignore, as the fields are still validated at runtime.
+.. note::
+ PEP 728 is enabled with ``# type: ignore[call-arg]`` comments to suppress warnings
+ from type checkers that don't yet support it. The functionality works correctly
+ in Python 3.13+ and will be fully supported as the ecosystem catches up.
**Rationale**
@@ -54,17 +69,17 @@ blocks into the format required by its API.
The module defines several types of content blocks, including:
-- **``TextContentBlock``**: Standard text.
-- **``ImageContentBlock``**, **``AudioContentBlock``**, **``VideoContentBlock``**: For
- multimodal data.
-- **``ToolCallContentBlock``**, **``ToolOutputContentBlock``**: For function calling.
-- **``ReasoningContentBlock``**: To capture a model's thought process.
-- **``Citation``**: For annotations that link generated text to a source document.
+- ``TextContentBlock``: Standard text.
+- ``ImageContentBlock``, ``Audio...``, ``Video...``, ``PlainText...``, ``File...``: For multimodal data.
+- ``ToolCallContentBlock``, ``ToolOutputContentBlock``: For function calling.
+- ``ReasoningContentBlock``: To capture a model's thought process.
+- ``Citation``: For annotations that link generated text to a source document.
**Example Usage**
.. code-block:: python
+ # Direct construction:
from langchain_core.messages.content_blocks import TextContentBlock, ImageContentBlock
multimodal_message: AIMessage = [
@@ -75,14 +90,39 @@ The module defines several types of content blocks, including:
mime_type="image/png",
),
]
+
+ from langchain_core.messages.content_blocks import create_text_block, create_image_block
+
+ # Using factory functions:
+ multimodal_message: AIMessage = [
+ create_text_block("What is shown in this image?"),
+ create_image_block(
+ url="https://www.langchain.com/images/brand/langchain_logo_text_w_white.png",
+ mime_type="image/png",
+ ),
+ ]
""" # noqa: E501
import warnings
from typing import Any, Literal, Optional, Union
+from uuid import uuid4
from typing_extensions import NotRequired, TypedDict, get_args, get_origin
-# --- Text and annotations ---
+
+def _ensure_id(id_val: Optional[str]) -> str:
+ """Ensure the ID is a valid string, generating a new UUID if not provided.
+
+ Auto-generated UUIDs are prefixed by ``'lc_'`` to indicate they are
+ LangChain-generated IDs.
+
+ Args:
+ id_val: Optional string ID value to validate.
+
+ Returns:
+ A valid string ID, either the provided value or a new UUID.
+ """
+ return id_val or str(f"lc_{uuid4()}")
class Citation(TypedDict):
@@ -92,16 +132,24 @@ class Citation(TypedDict):
``start/end`` indices refer to the **response text**,
not the source text. This means that the indices are relative to the model's
response, not the original document (as specified in the ``url``).
+
+ .. note::
+ ``create_citation`` may also be used as a factory to create a ``Citation``.
+ Benefits include:
+
+ * Automatic ID generation (when not provided)
+ * Required arguments strictly validated at creation time
+
"""
type: Literal["citation"]
- """Type of the content block."""
+ """Type of the content block. Used for discrimination."""
id: NotRequired[str]
"""Content block identifier. Either:
- Generated by the provider (e.g., OpenAI's file ID)
- - Generated by LangChain upon creation (as ``UUID4``)
+ - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
"""
url: NotRequired[str]
@@ -137,18 +185,21 @@ class Citation(TypedDict):
# is difficult to reliably extract spans from the raw document text across file
# formats or encoding schemes.
+ extras: NotRequired[dict[str, Any]]
+ """Provider-specific metadata."""
+
class NonStandardAnnotation(TypedDict):
"""Provider-specific annotation format."""
type: Literal["non_standard_annotation"]
- """Type of the content block."""
+ """Type of the content block. Used for discrimination."""
id: NotRequired[str]
"""Content block identifier. Either:
- Generated by the provider (e.g., OpenAI's file ID)
- - Generated by LangChain upon creation (as ``UUID4``)
+ - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
"""
value: dict[str, Any]
@@ -159,20 +210,28 @@ Annotation = Union[Citation, NonStandardAnnotation]
class TextContentBlock(TypedDict):
- """Content block for text output.
+ """Text output from a LLM.
This typically represents the main text content of a message, such as the response
from a language model or the text of a user message.
+
+ .. note::
+ ``create_text_block`` may also be used as a factory to create a
+ ``TextContentBlock``. Benefits include:
+
+ * Automatic ID generation (when not provided)
+ * Required arguments strictly validated at creation time
+
"""
type: Literal["text"]
- """Type of the content block."""
+ """Type of the content block. Used for discrimination."""
id: NotRequired[str]
"""Content block identifier. Either:
- Generated by the provider (e.g., OpenAI's file ID)
- - Generated by LangChain upon creation (as ``UUID4``)
+ - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
"""
text: str
@@ -184,8 +243,10 @@ class TextContentBlock(TypedDict):
index: NotRequired[int]
"""Index of block in aggregate response. Used during streaming."""
+ extras: NotRequired[dict[str, Any]]
+ """Provider-specific metadata."""
+
-# --- Tool calls ---
class ToolCall(TypedDict):
"""Represents a request to call a tool.
@@ -201,49 +262,46 @@ class ToolCall(TypedDict):
This represents a request to call the tool named "foo" with arguments {"a": 1}
and an identifier of "123".
+
+ .. note::
+ ``create_tool_call`` may also be used as a factory to create a
+ ``ToolCall``. Benefits include:
+
+ * Automatic ID generation (when not provided)
+ * Required arguments strictly validated at creation time
+
"""
- name: str
- """The name of the tool to be called."""
- args: dict[str, Any]
- """The arguments to the tool call."""
+ type: Literal["tool_call"]
+ """Used for discrimination."""
+
id: Optional[str]
"""An identifier associated with the tool call.
An identifier is needed to associate a tool call request with a tool
call result in events when multiple concurrent tool calls are made.
"""
- index: NotRequired[int]
- """Index of block in aggregate response. Used during streaming."""
- type: Literal["tool_call"]
+ # TODO: Consider making this NotRequired[str] in the future.
-
-class InvalidToolCall(TypedDict):
- """Allowance for errors made by LLM.
-
- Here we add an `error` key to surface errors made during generation
- (e.g., invalid JSON arguments.)
- """
-
- name: Optional[str]
+ name: str
"""The name of the tool to be called."""
- args: Optional[str]
+
+ args: dict[str, Any]
"""The arguments to the tool call."""
- id: Optional[str]
- """An identifier associated with the tool call."""
- error: Optional[str]
- """An error message associated with the tool call."""
+
index: NotRequired[int]
"""Index of block in aggregate response. Used during streaming."""
- type: Literal["invalid_tool_call"]
+
+ extras: NotRequired[dict[str, Any]]
+ """Provider-specific metadata."""
class ToolCallChunk(TypedDict):
"""A chunk of a tool call (e.g., as part of a stream).
- When merging ToolCallChunks (e.g., via AIMessageChunk.__add__),
+ When merging ToolCallChunks (e.g., via ``AIMessageChunk.__add__``),
all string attributes are concatenated. Chunks are only merged if their
- values of `index` are equal and not None.
+ values of ``index`` are equal and not ``None``.
Example:
@@ -258,33 +316,68 @@ class ToolCallChunk(TypedDict):
).tool_call_chunks == [ToolCallChunk(name='foo', args='{"a":1}', index=0)]
"""
- name: Optional[str]
- """The name of the tool to be called."""
- args: Optional[str]
- """The arguments to the tool call."""
+ # TODO: Consider making fields NotRequired[str] in the future.
+
+ type: NotRequired[Literal["tool_call_chunk"]]
+ """Used for serialization."""
+
id: Optional[str]
"""An identifier associated with the tool call."""
+
+ name: Optional[str]
+ """The name of the tool to be called."""
+
+ args: Optional[str]
+ """The arguments to the tool call."""
+
index: Optional[int]
"""The index of the tool call in a sequence."""
- type: NotRequired[Literal["tool_call_chunk"]]
+
+ extras: NotRequired[dict[str, Any]]
+ """Provider-specific metadata."""
+
+
+class InvalidToolCall(TypedDict):
+ """Allowance for errors made by LLM.
+
+ Here we add an ``error`` key to surface errors made during generation
+ (e.g., invalid JSON arguments.)
+ """
+
+ # TODO: Consider making fields NotRequired[str] in the future.
+
+ type: Literal["invalid_tool_call"]
+ """Used for discrimination."""
+
+ id: Optional[str]
+ """An identifier associated with the tool call."""
+
+ name: Optional[str]
+ """The name of the tool to be called."""
+
+ args: Optional[str]
+ """The arguments to the tool call."""
+
+ error: Optional[str]
+ """An error message associated with the tool call."""
+
+ extras: NotRequired[dict[str, Any]]
+ """Provider-specific metadata."""
-# --- Provider tool calls (built-in tools) ---
# Note: These are not standard tool calls, but rather provider-specific built-in tools.
-
-
# Web search
class WebSearchCall(TypedDict):
- """Content block for a built-in web search tool call."""
+ """Built-in web search tool call."""
type: Literal["web_search_call"]
- """Type of the content block."""
+ """Type of the content block. Used for discrimination."""
id: NotRequired[str]
"""Content block identifier. Either:
- Generated by the provider (e.g., OpenAI's file ID)
- - Generated by LangChain upon creation (as ``UUID4``)
+ - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
"""
query: NotRequired[str]
@@ -293,18 +386,21 @@ class WebSearchCall(TypedDict):
index: NotRequired[int]
"""Index of block in aggregate response. Used during streaming."""
+ extras: NotRequired[dict[str, Any]]
+ """Provider-specific metadata."""
+
class WebSearchResult(TypedDict):
- """Content block for the result of a built-in web search tool call."""
+ """Result of a built-in web search tool call."""
type: Literal["web_search_result"]
- """Type of the content block."""
+ """Type of the content block. Used for discrimination."""
id: NotRequired[str]
"""Content block identifier. Either:
- Generated by the provider (e.g., OpenAI's file ID)
- - Generated by LangChain upon creation (as ``UUID4``)
+ - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
"""
urls: NotRequired[list[str]]
@@ -313,26 +409,25 @@ class WebSearchResult(TypedDict):
index: NotRequired[int]
"""Index of block in aggregate response. Used during streaming."""
-
-# Code interpreter
+ extras: NotRequired[dict[str, Any]]
+ """Provider-specific metadata."""
-# Call
class CodeInterpreterCall(TypedDict):
- """Content block for a built-in code interpreter tool call."""
+ """Built-in code interpreter tool call."""
type: Literal["code_interpreter_call"]
- """Type of the content block."""
+ """Type of the content block. Used for discrimination."""
id: NotRequired[str]
"""Content block identifier. Either:
- Generated by the provider (e.g., OpenAI's file ID)
- - Generated by LangChain upon creation (as ``UUID4``)
+ - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
"""
language: NotRequired[str]
- """The programming language used in the code interpreter tool call."""
+ """The name of the programming language used in the code interpreter tool call."""
code: NotRequired[str]
"""The code to be executed by the code interpreter."""
@@ -340,29 +435,31 @@ class CodeInterpreterCall(TypedDict):
index: NotRequired[int]
"""Index of block in aggregate response. Used during streaming."""
+ extras: NotRequired[dict[str, Any]]
+ """Provider-specific metadata."""
+
-# Result block is CodeInterpreterResult
class CodeInterpreterOutput(TypedDict):
- """Content block for the output of a singular code interpreter tool call.
+ """Output of a singular code interpreter tool call.
Full output of a code interpreter tool call is represented by
``CodeInterpreterResult`` which is a list of these blocks.
"""
type: Literal["code_interpreter_output"]
- """Type of the content block."""
+ """Type of the content block. Used for discrimination."""
id: NotRequired[str]
"""Content block identifier. Either:
- Generated by the provider (e.g., OpenAI's file ID)
- - Generated by LangChain upon creation (as ``UUID4``)
+ - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
"""
return_code: NotRequired[int]
"""Return code of the executed code.
- Example: 0 for success, non-zero for failure.
+ Example: ``0`` for success, non-zero for failure.
"""
stderr: NotRequired[str]
@@ -377,18 +474,21 @@ class CodeInterpreterOutput(TypedDict):
index: NotRequired[int]
"""Index of block in aggregate response. Used during streaming."""
+ extras: NotRequired[dict[str, Any]]
+ """Provider-specific metadata."""
+
class CodeInterpreterResult(TypedDict):
- """Content block for the result of a code interpreter tool call."""
+ """Result of a code interpreter tool call."""
type: Literal["code_interpreter_result"]
- """Type of the content block."""
+ """Type of the content block. Used for discrimination."""
id: NotRequired[str]
"""Content block identifier. Either:
- Generated by the provider (e.g., OpenAI's file ID)
- - Generated by LangChain upon creation (as ``UUID4``)
+ - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
"""
output: list[CodeInterpreterOutput]
@@ -397,19 +497,30 @@ class CodeInterpreterResult(TypedDict):
index: NotRequired[int]
"""Index of block in aggregate response. Used during streaming."""
+ extras: NotRequired[dict[str, Any]]
+ """Provider-specific metadata."""
+
-# --- Reasoning ---
class ReasoningContentBlock(TypedDict):
- """Content block for reasoning output."""
+ """Reasoning output from a LLM.
+
+ .. note::
+ ``create_reasoning_block`` may also be used as a factory to create a
+ ``ReasoningContentBlock``. Benefits include:
+
+ * Automatic ID generation (when not provided)
+ * Required arguments strictly validated at creation time
+
+ """
type: Literal["reasoning"]
- """Type of the content block."""
+ """Type of the content block. Used for discrimination."""
id: NotRequired[str]
"""Content block identifier. Either:
- Generated by the provider (e.g., OpenAI's file ID)
- - Generated by LangChain upon creation (as ``UUID4``)
+ - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
"""
reasoning: NotRequired[str]
@@ -419,48 +530,36 @@ class ReasoningContentBlock(TypedDict):
from ```` tags in the model's response.
"""
- thought_signature: NotRequired[str]
- """Opaque state handle representation of the model's internal thought process.
-
- Maintains the context of the model's thinking across multiple interactions
- (e.g. multi-turn conversations) since many APIs are stateless.
-
- Not to be used to verify authenticity or integrity of the response (`'signature'`).
-
- Examples:
- - https://ai.google.dev/gemini-api/docs/thinking#signatures
- """
-
- signature: NotRequired[str]
- """Signature of the reasoning content block used to verify **authenticity**.
-
- Prevents from modifying or fabricating the model's reasoning process.
-
- Examples:
- - https://docs.anthropic.com/en/docs/build-with-claude/context-windows#the-context-window-with-extended-thinking-and-tool-use
- """
-
index: NotRequired[int]
"""Index of block in aggregate response. Used during streaming."""
-
-# --- Multi-modal ---
+ extras: NotRequired[dict[str, Any]]
+ """Provider-specific metadata."""
# Note: `title` and `context` are fields that could be used to provide additional
# information about the file, such as a description or summary of its content.
# E.g. with Claude, you can provide a context for a file which is passed to the model.
class ImageContentBlock(TypedDict):
- """Content block for image data."""
+ """Image data.
+
+ .. note::
+ ``create_image_block`` may also be used as a factory to create a
+ ``ImageContentBlock``. Benefits include:
+
+ * Automatic ID generation (when not provided)
+ * Required arguments strictly validated at creation time
+
+ """
type: Literal["image"]
- """Type of the content block."""
+ """Type of the content block. Used for discrimination."""
id: NotRequired[str]
"""Content block identifier. Either:
- Generated by the provider (e.g., OpenAI's file ID)
- - Generated by LangChain upon creation (as ``UUID4``)
+ - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
"""
file_id: NotRequired[str]
@@ -481,24 +580,30 @@ class ImageContentBlock(TypedDict):
base64: NotRequired[str]
"""Data as a base64 string."""
- # title: NotRequired[str]
- # """Title of the image."""
-
- # context: NotRequired[str]
- # """Context for the image, e.g., a description or summary of the image's content.""" # noqa: E501
+ extras: NotRequired[dict[str, Any]]
+ """Provider-specific metadata."""
class VideoContentBlock(TypedDict):
- """Content block for video data."""
+ """Video data.
+
+ .. note::
+ ``create_video_block`` may also be used as a factory to create a
+ ``VideoContentBlock``. Benefits include:
+
+ * Automatic ID generation (when not provided)
+ * Required arguments strictly validated at creation time
+
+ """
type: Literal["video"]
- """Type of the content block."""
+ """Type of the content block. Used for discrimination."""
id: NotRequired[str]
"""Content block identifier. Either:
- Generated by the provider (e.g., OpenAI's file ID)
- - Generated by LangChain upon creation (as ``UUID4``)
+ - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
"""
file_id: NotRequired[str]
@@ -519,24 +624,30 @@ class VideoContentBlock(TypedDict):
base64: NotRequired[str]
"""Data as a base64 string."""
- # title: NotRequired[str]
- # """Title of the video."""
-
- # context: NotRequired[str]
- # """Context for the video, e.g., a description or summary of the video's content.""" # noqa: E501
+ extras: NotRequired[dict[str, Any]]
+ """Provider-specific metadata."""
class AudioContentBlock(TypedDict):
- """Content block for audio data."""
+ """Audio data.
+
+ .. note::
+ ``create_audio_block`` may also be used as a factory to create an
+ ``AudioContentBlock``. Benefits include:
+
+ * Automatic ID generation (when not provided)
+ * Required arguments strictly validated at creation time
+
+ """
type: Literal["audio"]
- """Type of the content block."""
+ """Type of the content block. Used for discrimination."""
id: NotRequired[str]
"""Content block identifier. Either:
- Generated by the provider (e.g., OpenAI's file ID)
- - Generated by LangChain upon creation (as ``UUID4``)
+ - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
"""
file_id: NotRequired[str]
@@ -557,29 +668,34 @@ class AudioContentBlock(TypedDict):
base64: NotRequired[str]
"""Data as a base64 string."""
- # title: NotRequired[str]
- # """Title of the audio."""
-
- # context: NotRequired[str]
- # """Context for the audio, e.g., a description or summary of the audio's content.""" # noqa: E501
+ extras: NotRequired[dict[str, Any]]
+ """Provider-specific metadata."""
class PlainTextContentBlock(TypedDict):
- """Content block for plaintext data (e.g., from a document).
+ """Plaintext data (e.g., from a document).
.. note::
Title and context are optional fields that may be passed to the model. See
Anthropic `example `__.
+
+ .. note::
+ ``create_plaintext_block`` may also be used as a factory to create a
+ ``PlainTextContentBlock``. Benefits include:
+
+ * Automatic ID generation (when not provided)
+ * Required arguments strictly validated at creation time
+
"""
type: Literal["text-plain"]
- """Type of the content block."""
+ """Type of the content block. Used for discrimination."""
id: NotRequired[str]
"""Content block identifier. Either:
- Generated by the provider (e.g., OpenAI's file ID)
- - Generated by LangChain upon creation (as ``UUID4``)
+ - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
"""
file_id: NotRequired[str]
@@ -606,9 +722,12 @@ class PlainTextContentBlock(TypedDict):
context: NotRequired[str]
"""Context for the text, e.g., a description or summary of the text's content."""
+ extras: NotRequired[dict[str, Any]]
+ """Provider-specific metadata."""
+
class FileContentBlock(TypedDict):
- """Content block for file data.
+ """File data that doesn't fit into other multimodal blocks.
This block is intended for files that are not images, audio, or plaintext. For
example, it can be used for PDFs, Word documents, etc.
@@ -616,16 +735,24 @@ class FileContentBlock(TypedDict):
If the file is an image, audio, or plaintext, you should use the corresponding
content block type (e.g., ``ImageContentBlock``, ``AudioContentBlock``,
``PlainTextContentBlock``).
+
+ .. note::
+ ``create_file_block`` may also be used as a factory to create a
+ ``FileContentBlock``. Benefits include:
+
+ * Automatic ID generation (when not provided)
+ * Required arguments strictly validated at creation time
+
"""
type: Literal["file"]
- """Type of the content block."""
+ """Type of the content block. Used for discrimination."""
id: NotRequired[str]
"""Content block identifier. Either:
- Generated by the provider (e.g., OpenAI's file ID)
- - Generated by LangChain upon creation (as ``UUID4``)
+ - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
"""
file_id: NotRequired[str]
@@ -646,11 +773,8 @@ class FileContentBlock(TypedDict):
base64: NotRequired[str]
"""Data as a base64 string."""
- # title: NotRequired[str]
- # """Title of the file, e.g., the name of a document or file."""
-
- # context: NotRequired[str]
- # """Context for the file, e.g., a description or summary of the file's content."""
+ extras: NotRequired[dict[str, Any]]
+ """Provider-specific metadata."""
# Future modalities to consider:
@@ -658,9 +782,8 @@ class FileContentBlock(TypedDict):
# - Tabular data
-# Non-standard
class NonStandardContentBlock(TypedDict):
- """Content block provider-specific data.
+ """Provider-specific data.
This block contains data for which there is not yet a standard type.
@@ -668,16 +791,24 @@ class NonStandardContentBlock(TypedDict):
If a provider's non-standard output includes reasoning and tool calls, it should be
the adapter's job to parse that payload and emit the corresponding standard
ReasoningContentBlock and ToolCallContentBlocks.
+
+ .. note::
+ ``create_non_standard_block`` may also be used as a factory to create a
+ ``NonStandardContentBlock``. Benefits include:
+
+ * Automatic ID generation (when not provided)
+ * Required arguments strictly validated at creation time
+
"""
type: Literal["non_standard"]
- """Type of the content block."""
+ """Type of the content block. Used for discrimination."""
id: NotRequired[str]
"""Content block identifier. Either:
- Generated by the provider (e.g., OpenAI's file ID)
- - Generated by LangChain upon creation (as ``UUID4``)
+ - Generated by LangChain upon creation (``UUID4`` prefixed with ``'lc_'``))
"""
value: dict[str, Any]
@@ -830,3 +961,456 @@ def convert_to_openai_data_block(block: dict) -> dict:
raise ValueError(error_msg)
return formatted_block
+
+
+def create_text_block(
+ text: str,
+ *,
+ id: Optional[str] = None,
+ annotations: Optional[list[Annotation]] = None,
+ index: Optional[int] = None,
+) -> TextContentBlock:
+ """Create a ``TextContentBlock``.
+
+ Args:
+ text: The text content of the block.
+ id: Content block identifier. Generated automatically if not provided.
+ annotations: Citations and other annotations for the text.
+ index: Index of block in aggregate response. Used during streaming.
+
+ Returns:
+ A properly formatted ``TextContentBlock``.
+
+ .. note::
+ The ``id`` is generated automatically if not provided, using a UUID4 format
+ prefixed with ``'lc_'`` to indicate it is a LangChain-generated ID.
+
+ """
+ block = TextContentBlock(
+ type="text",
+ text=text,
+ id=_ensure_id(id),
+ )
+ if annotations is not None:
+ block["annotations"] = annotations
+ if index is not None:
+ block["index"] = index
+ return block
+
+
+def create_image_block(
+ *,
+ url: Optional[str] = None,
+ base64: Optional[str] = None,
+ file_id: Optional[str] = None,
+ mime_type: Optional[str] = None,
+ id: Optional[str] = None,
+ index: Optional[int] = None,
+) -> ImageContentBlock:
+ """Create an ``ImageContentBlock``.
+
+ Args:
+ url: URL of the image.
+ base64: Base64-encoded image data.
+ file_id: ID of the image file from a file storage system.
+ mime_type: MIME type of the image. Required for base64 data.
+ id: Content block identifier. Generated automatically if not provided.
+ index: Index of block in aggregate response. Used during streaming.
+
+ Returns:
+ A properly formatted ``ImageContentBlock``.
+
+ Raises:
+ ValueError: If no image source is provided or if ``base64`` is used without
+ ``mime_type``.
+
+ .. note::
+ The ``id`` is generated automatically if not provided, using a UUID4 format
+ prefixed with ``'lc_'`` to indicate it is a LangChain-generated ID.
+
+ """
+ if not any([url, base64, file_id]):
+ msg = "Must provide one of: url, base64, or file_id"
+ raise ValueError(msg)
+
+ if base64 and not mime_type:
+ msg = "mime_type is required when using base64 data"
+ raise ValueError(msg)
+
+ block = ImageContentBlock(type="image", id=_ensure_id(id))
+
+ if url is not None:
+ block["url"] = url
+ if base64 is not None:
+ block["base64"] = base64
+ if file_id is not None:
+ block["file_id"] = file_id
+ if mime_type is not None:
+ block["mime_type"] = mime_type
+ if index is not None:
+ block["index"] = index
+
+ return block
+
+
+def create_video_block(
+ *,
+ url: Optional[str] = None,
+ base64: Optional[str] = None,
+ file_id: Optional[str] = None,
+ mime_type: Optional[str] = None,
+ id: Optional[str] = None,
+ index: Optional[int] = None,
+) -> VideoContentBlock:
+ """Create a ``VideoContentBlock``.
+
+ Args:
+ url: URL of the video.
+ base64: Base64-encoded video data.
+ file_id: ID of the video file from a file storage system.
+ mime_type: MIME type of the video. Required for base64 data.
+ id: Content block identifier. Generated automatically if not provided.
+ index: Index of block in aggregate response. Used during streaming.
+
+ Returns:
+ A properly formatted ``VideoContentBlock``.
+
+ Raises:
+ ValueError: If no video source is provided or if ``base64`` is used without
+ ``mime_type``.
+
+ .. note::
+ The ``id`` is generated automatically if not provided, using a UUID4 format
+ prefixed with ``'lc_'`` to indicate it is a LangChain-generated ID.
+
+ """
+ if not any([url, base64, file_id]):
+ msg = "Must provide one of: url, base64, or file_id"
+ raise ValueError(msg)
+
+ if base64 and not mime_type:
+ msg = "mime_type is required when using base64 data"
+ raise ValueError(msg)
+
+ block = VideoContentBlock(type="video", id=_ensure_id(id))
+
+ if url is not None:
+ block["url"] = url
+ if base64 is not None:
+ block["base64"] = base64
+ if file_id is not None:
+ block["file_id"] = file_id
+ if mime_type is not None:
+ block["mime_type"] = mime_type
+ if index is not None:
+ block["index"] = index
+
+ return block
+
+
+def create_audio_block(
+ *,
+ url: Optional[str] = None,
+ base64: Optional[str] = None,
+ file_id: Optional[str] = None,
+ mime_type: Optional[str] = None,
+ id: Optional[str] = None,
+ index: Optional[int] = None,
+) -> AudioContentBlock:
+ """Create an ``AudioContentBlock``.
+
+ Args:
+ url: URL of the audio.
+ base64: Base64-encoded audio data.
+ file_id: ID of the audio file from a file storage system.
+ mime_type: MIME type of the audio. Required for base64 data.
+ id: Content block identifier. Generated automatically if not provided.
+ index: Index of block in aggregate response. Used during streaming.
+
+ Returns:
+ A properly formatted ``AudioContentBlock``.
+
+ Raises:
+ ValueError: If no audio source is provided or if ``base64`` is used without
+ ``mime_type``.
+
+ .. note::
+ The ``id`` is generated automatically if not provided, using a UUID4 format
+ prefixed with ``'lc_'`` to indicate it is a LangChain-generated ID.
+
+ """
+ if not any([url, base64, file_id]):
+ msg = "Must provide one of: url, base64, or file_id"
+ raise ValueError(msg)
+
+ if base64 and not mime_type:
+ msg = "mime_type is required when using base64 data"
+ raise ValueError(msg)
+
+ block = AudioContentBlock(type="audio", id=_ensure_id(id))
+
+ if url is not None:
+ block["url"] = url
+ if base64 is not None:
+ block["base64"] = base64
+ if file_id is not None:
+ block["file_id"] = file_id
+ if mime_type is not None:
+ block["mime_type"] = mime_type
+ if index is not None:
+ block["index"] = index
+
+ return block
+
+
+def create_file_block(
+ *,
+ url: Optional[str] = None,
+ base64: Optional[str] = None,
+ file_id: Optional[str] = None,
+ mime_type: Optional[str] = None,
+ id: Optional[str] = None,
+ index: Optional[int] = None,
+) -> FileContentBlock:
+ """Create a ``FileContentBlock``.
+
+ Args:
+ url: URL of the file.
+ base64: Base64-encoded file data.
+ file_id: ID of the file from a file storage system.
+ mime_type: MIME type of the file. Required for base64 data.
+ id: Content block identifier. Generated automatically if not provided.
+ index: Index of block in aggregate response. Used during streaming.
+
+ Returns:
+ A properly formatted ``FileContentBlock``.
+
+ Raises:
+ ValueError: If no file source is provided or if ``base64`` is used without
+ ``mime_type``.
+
+ .. note::
+ The ``id`` is generated automatically if not provided, using a UUID4 format
+ prefixed with ``'lc_'`` to indicate it is a LangChain-generated ID.
+
+ """
+ if not any([url, base64, file_id]):
+ msg = "Must provide one of: url, base64, or file_id"
+ raise ValueError(msg)
+
+ if base64 and not mime_type:
+ msg = "mime_type is required when using base64 data"
+ raise ValueError(msg)
+
+ block = FileContentBlock(type="file", id=_ensure_id(id))
+
+ if url is not None:
+ block["url"] = url
+ if base64 is not None:
+ block["base64"] = base64
+ if file_id is not None:
+ block["file_id"] = file_id
+ if mime_type is not None:
+ block["mime_type"] = mime_type
+ if index is not None:
+ block["index"] = index
+
+ return block
+
+
+def create_plaintext_block(
+ text: str,
+ *,
+ url: Optional[str] = None,
+ base64: Optional[str] = None,
+ file_id: Optional[str] = None,
+ title: Optional[str] = None,
+ context: Optional[str] = None,
+ id: Optional[str] = None,
+ index: Optional[int] = None,
+) -> PlainTextContentBlock:
+ """Create a ``PlainTextContentBlock``.
+
+ Args:
+ text: The plaintext content.
+ url: URL of the plaintext file.
+ base64: Base64-encoded plaintext data.
+ file_id: ID of the plaintext file from a file storage system.
+ title: Title of the text data.
+ context: Context or description of the text content.
+ id: Content block identifier. Generated automatically if not provided.
+ index: Index of block in aggregate response. Used during streaming.
+
+ Returns:
+ A properly formatted ``PlainTextContentBlock``.
+
+ .. note::
+ The ``id`` is generated automatically if not provided, using a UUID4 format
+ prefixed with ``'lc_'`` to indicate it is a LangChain-generated ID.
+
+ """
+ block = PlainTextContentBlock(
+ type="text-plain",
+ mime_type="text/plain",
+ text=text,
+ id=_ensure_id(id),
+ )
+
+ if url is not None:
+ block["url"] = url
+ if base64 is not None:
+ block["base64"] = base64
+ if file_id is not None:
+ block["file_id"] = file_id
+ if title is not None:
+ block["title"] = title
+ if context is not None:
+ block["context"] = context
+ if index is not None:
+ block["index"] = index
+
+ return block
+
+
+def create_tool_call(
+ name: str,
+ args: dict[str, Any],
+ *,
+ id: Optional[str] = None,
+ index: Optional[int] = None,
+) -> ToolCall:
+ """Create a ``ToolCall``.
+
+ Args:
+ name: The name of the tool to be called.
+ args: The arguments to the tool call.
+ id: An identifier for the tool call. Generated automatically if not provided.
+ index: Index of block in aggregate response. Used during streaming.
+
+ Returns:
+ A properly formatted ``ToolCall``.
+
+ .. note::
+ The ``id`` is generated automatically if not provided, using a UUID4 format
+ prefixed with ``'lc_'`` to indicate it is a LangChain-generated ID.
+
+ """
+ block = ToolCall(
+ type="tool_call",
+ name=name,
+ args=args,
+ id=_ensure_id(id),
+ )
+
+ if index is not None:
+ block["index"] = index
+
+ return block
+
+
+def create_reasoning_block(
+ reasoning: Optional[str] = None,
+ id: Optional[str] = None,
+ index: Optional[int] = None,
+) -> ReasoningContentBlock:
+ """Create a ``ReasoningContentBlock``.
+
+ Args:
+ reasoning: The reasoning text or thought summary.
+ id: Content block identifier. Generated automatically if not provided.
+ index: Index of block in aggregate response. Used during streaming.
+
+ Returns:
+ A properly formatted ``ReasoningContentBlock``.
+
+ .. note::
+ The ``id`` is generated automatically if not provided, using a UUID4 format
+ prefixed with ``'lc_'`` to indicate it is a LangChain-generated ID.
+
+ """
+ block = ReasoningContentBlock(
+ type="reasoning",
+ reasoning=reasoning or "",
+ id=_ensure_id(id),
+ )
+
+ if index is not None:
+ block["index"] = index
+
+ return block
+
+
+def create_citation(
+ *,
+ url: Optional[str] = None,
+ title: Optional[str] = None,
+ start_index: Optional[int] = None,
+ end_index: Optional[int] = None,
+ cited_text: Optional[str] = None,
+ id: Optional[str] = None,
+) -> Citation:
+ """Create a ``Citation``.
+
+ Args:
+ url: URL of the document source.
+ title: Source document title.
+ start_index: Start index in the response text where citation applies.
+ end_index: End index in the response text where citation applies.
+ cited_text: Excerpt of source text being cited.
+ id: Content block identifier. Generated automatically if not provided.
+
+ Returns:
+ A properly formatted ``Citation``.
+
+ .. note::
+ The ``id`` is generated automatically if not provided, using a UUID4 format
+ prefixed with ``'lc_'`` to indicate it is a LangChain-generated ID.
+
+ """
+ block = Citation(type="citation", id=_ensure_id(id))
+
+ if url is not None:
+ block["url"] = url
+ if title is not None:
+ block["title"] = title
+ if start_index is not None:
+ block["start_index"] = start_index
+ if end_index is not None:
+ block["end_index"] = end_index
+ if cited_text is not None:
+ block["cited_text"] = cited_text
+
+ return block
+
+
+def create_non_standard_block(
+ value: dict[str, Any],
+ *,
+ id: Optional[str] = None,
+ index: Optional[int] = None,
+) -> NonStandardContentBlock:
+ """Create a ``NonStandardContentBlock``.
+
+ Args:
+ value: Provider-specific data.
+ id: Content block identifier. Generated automatically if not provided.
+ index: Index of block in aggregate response. Used during streaming.
+
+ Returns:
+ A properly formatted ``NonStandardContentBlock``.
+
+ .. note::
+ The ``id`` is generated automatically if not provided, using a UUID4 format
+ prefixed with ``'lc_'`` to indicate it is a LangChain-generated ID.
+
+ """
+ block = NonStandardContentBlock(
+ type="non_standard",
+ value=value,
+ id=_ensure_id(id),
+ )
+
+ if index is not None:
+ block["index"] = index
+
+ return block
diff --git a/libs/core/langchain_core/messages/v1.py b/libs/core/langchain_core/messages/v1.py
index e7147714d40..79ebbf4a72c 100644
--- a/libs/core/langchain_core/messages/v1.py
+++ b/libs/core/langchain_core/messages/v1.py
@@ -1,4 +1,8 @@
-"""LangChain 1.0 message format."""
+"""LangChain v1.0.0 message format.
+
+Each message has content that may be comprised of content blocks, defined under
+``langchain_core.messages.content_blocks``.
+"""
import json
import uuid
@@ -9,14 +13,15 @@ from pydantic import BaseModel
from typing_extensions import TypedDict
import langchain_core.messages.content_blocks as types
-from langchain_core.messages.ai import _LC_ID_PREFIX, UsageMetadata, add_usage
+from langchain_core.messages.ai import (
+ _LC_AUTO_PREFIX,
+ _LC_ID_PREFIX,
+ UsageMetadata,
+ add_usage,
+)
from langchain_core.messages.base import merge_content
-from langchain_core.messages.tool import (
- ToolCallChunk,
-)
-from langchain_core.messages.tool import (
- invalid_tool_call as create_invalid_tool_call,
-)
+from langchain_core.messages.tool import ToolCallChunk
+from langchain_core.messages.tool import invalid_tool_call as create_invalid_tool_call
from langchain_core.messages.tool import tool_call as create_tool_call
from langchain_core.messages.tool import tool_call_chunk as create_tool_call_chunk
from langchain_core.utils._merge import merge_dicts, merge_lists
@@ -26,13 +31,16 @@ from langchain_core.utils.json import parse_partial_json
def _ensure_id(id_val: Optional[str]) -> str:
"""Ensure the ID is a valid string, generating a new UUID if not provided.
+ Auto-generated UUIDs are prefixed by ``'lc_'`` to indicate they are
+ LangChain-generated IDs.
+
Args:
id_val: Optional string ID value to validate.
Returns:
A valid string ID, either the provided value or a new UUID.
"""
- return id_val or str(uuid.uuid4())
+ return id_val or str(f"{_LC_AUTO_PREFIX}{uuid.uuid4()}")
class ResponseMetadata(TypedDict, total=False):
@@ -41,7 +49,9 @@ class ResponseMetadata(TypedDict, total=False):
Contains additional information returned by the provider, such as
response headers, service tiers, log probabilities, system fingerprints, etc.
- Extra keys are permitted from what is typed here.
+ Extra keys are permitted from what is typed here (via `total=False`), allowing
+ for provider-specific metadata to be included without breaking the type
+ definition.
"""
model_provider: str
@@ -70,6 +80,11 @@ class AIMessage:
"""
type: Literal["ai"] = "ai"
+ """The type of the message. Must be a string that is unique to the message type.
+
+ The purpose of this field is to allow for easy identification of the message type
+ when deserializing messages.
+ """
name: Optional[str] = None
"""An optional name for the message.
@@ -83,13 +98,15 @@ class AIMessage:
id: Optional[str] = None
"""Unique identifier for the message.
- If the provider assigns a meaningful ID, it should be used here.
+ If the provider assigns a meaningful ID, it should be used here. Otherwise, a
+ LangChain-generated ID will be used.
"""
lc_version: str = "v1"
- """Encoding version for the message."""
+ """Encoding version for the message. Used for serialization."""
content: list[types.ContentBlock] = field(default_factory=list)
+ """Message content as a list of content blocks."""
usage_metadata: Optional[UsageMetadata] = None
"""If provided, usage metadata for a message, such as token counts."""
@@ -138,7 +155,7 @@ class AIMessage:
else:
self.content = content
- self.id = id
+ self.id = _ensure_id(id)
self.name = name
self.lc_version = lc_version
self.usage_metadata = usage_metadata
@@ -205,8 +222,19 @@ class AIMessageChunk(AIMessage):
"""
type: Literal["ai_chunk"] = "ai_chunk" # type: ignore[assignment]
+ """The type of the message. Must be a string that is unique to the message type.
+
+ The purpose of this field is to allow for easy identification of the message type
+ when deserializing messages.
+ """
tool_call_chunks: list[types.ToolCallChunk] = field(init=False)
+ """List of partial tool call data.
+
+ Emitted by the model during streaming, this field contains
+ tool call chunks that may not yet be complete. It is used to reconstruct
+ tool calls from the streamed content.
+ """
def __init__(
self,
@@ -236,7 +264,7 @@ class AIMessageChunk(AIMessage):
else:
self.content = content
- self.id = id
+ self.id = _ensure_id(id)
self.name = name
self.lc_version = lc_version
self.usage_metadata = usage_metadata
@@ -430,17 +458,27 @@ def add_ai_message_chunks(
chunk_id = None
candidates = [left.id] + [o.id for o in others]
- # first pass: pick the first non-run-* id
+ # first pass: pick the first provider-assigned id (non-`run-*` and non-`lc_*`)
for id_ in candidates:
- if id_ and not id_.startswith(_LC_ID_PREFIX):
+ if (
+ id_
+ and not id_.startswith(_LC_ID_PREFIX)
+ and not id_.startswith(_LC_AUTO_PREFIX)
+ ):
chunk_id = id_
break
else:
- # second pass: no provider-assigned id found, just take the first non-null
+ # second pass: prefer lc_* ids over run-* ids
for id_ in candidates:
- if id_:
+ if id_ and id_.startswith(_LC_AUTO_PREFIX):
chunk_id = id_
break
+ else:
+ # third pass: take any remaining id (run-* ids)
+ for id_ in candidates:
+ if id_:
+ chunk_id = id_
+ break
return left.__class__(
content=cast("list[types.ContentBlock]", content),
@@ -467,7 +505,22 @@ class HumanMessage:
"""
id: str
+ """Used for serialization.
+
+ If the provider assigns a meaningful ID, it should be used here. Otherwise, a
+ LangChain-generated ID will be used.
+ """
+
content: list[types.ContentBlock]
+ """Message content as a list of content blocks."""
+
+ type: Literal["human"] = "human"
+ """The type of the message. Must be a string that is unique to the message type.
+
+ The purpose of this field is to allow for easy identification of the message type
+ when deserializing messages.
+ """
+
name: Optional[str] = None
"""An optional name for the message.
@@ -476,12 +529,6 @@ class HumanMessage:
Usage of this field is optional, and whether it's used or not is up to the
model implementation.
"""
- type: Literal["human"] = "human"
- """The type of the message. Must be a string that is unique to the message type.
-
- The purpose of this field is to allow for easy identification of the message type
- when deserializing messages.
- """
def __init__(
self,
@@ -529,8 +576,21 @@ class SystemMessage:
"""
id: str
+ """Used for serialization.
+
+ If the provider assigns a meaningful ID, it should be used here. Otherwise, a
+ LangChain-generated ID will be used.
+ """
+
content: list[types.ContentBlock]
+ """Message content as a list of content blocks."""
+
type: Literal["system"] = "system"
+ """The type of the message. Must be a string that is unique to the message type.
+
+ The purpose of this field is to allow for easy identification of the message type
+ when deserializing messages.
+ """
name: Optional[str] = None
"""An optional name for the message.
@@ -598,9 +658,26 @@ class ToolMessage:
"""
id: str
+ """Used for serialization."""
+
tool_call_id: str
+ """ID of the tool call this message responds to.
+
+ This should match the ID of the tool call that this message is responding to.
+ """
+
content: list[types.ContentBlock]
- artifact: Optional[Any] = None # App-side payload not for the model
+ """Message content as a list of content blocks."""
+
+ type: Literal["tool"] = "tool"
+ """The type of the message. Must be a string that is unique to the message type.
+
+ The purpose of this field is to allow for easy identification of the message type
+ when deserializing messages.
+ """
+
+ artifact: Optional[Any] = None
+ """App-side payload not for the model."""
name: Optional[str] = None
"""An optional name for the message.
@@ -612,7 +689,11 @@ class ToolMessage:
"""
status: Literal["success", "error"] = "success"
- type: Literal["tool"] = "tool"
+ """Execution status of the tool call.
+
+ Indicates whether the tool call was successful or encountered an error.
+ Defaults to "success".
+ """
def __init__(
self,
diff --git a/libs/core/pyproject.toml b/libs/core/pyproject.toml
index e54a785bf5d..48af9fea9ff 100644
--- a/libs/core/pyproject.toml
+++ b/libs/core/pyproject.toml
@@ -13,7 +13,7 @@ dependencies = [
"PyYAML>=5.3",
"typing-extensions>=4.7",
"packaging>=23.2",
- "pydantic>=2.7.4",
+ "pydantic>=2.11.7",
]
name = "langchain-core"
version = "0.3.72"
diff --git a/libs/core/tests/unit_tests/messages/test_content_block_factories.py b/libs/core/tests/unit_tests/messages/test_content_block_factories.py
new file mode 100644
index 00000000000..51b30f501eb
--- /dev/null
+++ b/libs/core/tests/unit_tests/messages/test_content_block_factories.py
@@ -0,0 +1,974 @@
+"""Unit tests for ContentBlock factory functions."""
+
+from uuid import UUID
+
+import pytest
+
+from langchain_core.messages.content_blocks import (
+ CodeInterpreterCall,
+ CodeInterpreterOutput,
+ CodeInterpreterResult,
+ InvalidToolCall,
+ ToolCallChunk,
+ WebSearchCall,
+ WebSearchResult,
+ create_audio_block,
+ create_citation,
+ create_file_block,
+ create_image_block,
+ create_non_standard_block,
+ create_plaintext_block,
+ create_reasoning_block,
+ create_text_block,
+ create_tool_call,
+ create_video_block,
+)
+
+
+def _validate_lc_uuid(id_value: str) -> None:
+ """Validate that the ID has ``lc_`` prefix and valid UUID suffix.
+
+ Args:
+ id_value: The ID string to validate.
+
+ Raises:
+ AssertionError: If the ID doesn't have ``lc_`` prefix or invalid UUID.
+ """
+ assert id_value.startswith("lc_"), f"ID should start with 'lc_' but got: {id_value}"
+ # Validate the UUID part after the lc_ prefix
+ UUID(id_value[3:])
+
+
+class TestTextBlockFactory:
+ """Test create_text_block factory function."""
+
+ def test_basic_creation(self) -> None:
+ """Test basic text block creation."""
+ block = create_text_block("Hello world")
+
+ assert block["type"] == "text"
+ assert block.get("text") == "Hello world"
+ assert "id" in block
+ id_value = block.get("id")
+ assert id_value is not None, "block id is None"
+ _validate_lc_uuid(id_value)
+
+ def test_with_custom_id(self) -> None:
+ """Test text block creation with custom ID."""
+ custom_id = "custom-123"
+ block = create_text_block("Hello", id=custom_id)
+
+ assert block.get("id") == custom_id
+
+ def test_with_annotations(self) -> None:
+ """Test text block creation with annotations."""
+ citation = create_citation(url="https://example.com", title="Example")
+ block = create_text_block("Hello", annotations=[citation])
+
+ assert block.get("annotations") == [citation]
+
+ def test_with_index(self) -> None:
+ """Test text block creation with index."""
+ block = create_text_block("Hello", index=42)
+
+ assert block.get("index") == 42
+
+ def test_optional_fields_not_present_when_none(self) -> None:
+ """Test that optional fields are not included when None."""
+ block = create_text_block("Hello")
+
+ assert "annotations" not in block
+ assert "index" not in block
+
+
+class TestImageBlockFactory:
+ """Test create_image_block factory function."""
+
+ def test_with_url(self) -> None:
+ """Test image block creation with URL."""
+ block = create_image_block(url="https://example.com/image.jpg")
+
+ assert block["type"] == "image"
+ assert block.get("url") == "https://example.com/image.jpg"
+ assert "id" in block
+ id_value = block.get("id")
+ assert id_value is not None, "block id is None"
+ _validate_lc_uuid(id_value)
+
+ def test_with_base64(self) -> None:
+ """Test image block creation with base64 data."""
+ block = create_image_block(
+ base64="iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJ", mime_type="image/png"
+ )
+
+ assert block.get("base64") == "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJ"
+ assert block.get("mime_type") == "image/png"
+
+ def test_with_file_id(self) -> None:
+ """Test image block creation with file ID."""
+ block = create_image_block(file_id="file-123")
+
+ assert block.get("file_id") == "file-123"
+
+ def test_no_source_raises_error(self) -> None:
+ """Test that missing all sources raises ValueError."""
+ with pytest.raises(
+ ValueError, match="Must provide one of: url, base64, or file_id"
+ ):
+ create_image_block()
+
+ def test_base64_without_mime_type_raises_error(self) -> None:
+ """Test that base64 without mime_type raises ValueError."""
+ with pytest.raises(
+ ValueError, match="mime_type is required when using base64 data"
+ ):
+ create_image_block(base64="iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJ")
+
+ def test_with_index(self) -> None:
+ """Test image block creation with index."""
+ block = create_image_block(url="https://example.com/image.jpg", index=1)
+
+ assert block.get("index") == 1
+
+ def test_optional_fields_not_present_when_not_provided(self) -> None:
+ """Test that optional fields are not included when not provided."""
+ block = create_image_block(url="https://example.com/image.jpg")
+
+ assert "base64" not in block
+ assert "file_id" not in block
+ assert "mime_type" not in block
+ assert "index" not in block
+
+
+class TestVideoBlockFactory:
+ """Test create_video_block factory function."""
+
+ def test_with_url(self) -> None:
+ """Test video block creation with URL."""
+ block = create_video_block(url="https://example.com/video.mp4")
+
+ assert block["type"] == "video"
+ assert block.get("url") == "https://example.com/video.mp4"
+
+ def test_with_base64(self) -> None:
+ """Test video block creation with base64 data."""
+ block = create_video_block(
+ base64="UklGRnoGAABXQVZFZm10IBAAAAABAAEA", mime_type="video/mp4"
+ )
+
+ assert block.get("base64") == "UklGRnoGAABXQVZFZm10IBAAAAABAAEA"
+ assert block.get("mime_type") == "video/mp4"
+
+ def test_no_source_raises_error(self) -> None:
+ """Test that missing all sources raises ValueError."""
+ with pytest.raises(
+ ValueError, match="Must provide one of: url, base64, or file_id"
+ ):
+ create_video_block()
+
+ def test_base64_without_mime_type_raises_error(self) -> None:
+ """Test that base64 without mime_type raises ValueError."""
+ with pytest.raises(
+ ValueError, match="mime_type is required when using base64 data"
+ ):
+ create_video_block(base64="UklGRnoGAABXQVZFZm10IBAAAAABAAEA")
+
+
+class TestAudioBlockFactory:
+ """Test create_audio_block factory function."""
+
+ def test_with_url(self) -> None:
+ """Test audio block creation with URL."""
+ block = create_audio_block(url="https://example.com/audio.mp3")
+
+ assert block["type"] == "audio"
+ assert block.get("url") == "https://example.com/audio.mp3"
+
+ def test_with_base64(self) -> None:
+ """Test audio block creation with base64 data."""
+ block = create_audio_block(
+ base64="UklGRnoGAABXQVZFZm10IBAAAAABAAEA", mime_type="audio/mp3"
+ )
+
+ assert block.get("base64") == "UklGRnoGAABXQVZFZm10IBAAAAABAAEA"
+ assert block.get("mime_type") == "audio/mp3"
+
+ def test_no_source_raises_error(self) -> None:
+ """Test that missing all sources raises ValueError."""
+ with pytest.raises(
+ ValueError, match="Must provide one of: url, base64, or file_id"
+ ):
+ create_audio_block()
+
+
+class TestFileBlockFactory:
+ """Test create_file_block factory function."""
+
+ def test_with_url(self) -> None:
+ """Test file block creation with URL."""
+ block = create_file_block(url="https://example.com/document.pdf")
+
+ assert block["type"] == "file"
+ assert block.get("url") == "https://example.com/document.pdf"
+
+ def test_with_base64(self) -> None:
+ """Test file block creation with base64 data."""
+ block = create_file_block(
+ base64="JVBERi0xLjQKJdPr6eEKMSAwIG9iago8PAovVHlwZSAvQ2F0YWxvZwo=",
+ mime_type="application/pdf",
+ )
+
+ assert (
+ block.get("base64")
+ == "JVBERi0xLjQKJdPr6eEKMSAwIG9iago8PAovVHlwZSAvQ2F0YWxvZwo="
+ )
+ assert block.get("mime_type") == "application/pdf"
+
+ def test_no_source_raises_error(self) -> None:
+ """Test that missing all sources raises ValueError."""
+ with pytest.raises(
+ ValueError, match="Must provide one of: url, base64, or file_id"
+ ):
+ create_file_block()
+
+
+class TestPlainTextBlockFactory:
+ """Test create_plain_text_block factory function."""
+
+ def test_basic_creation(self) -> None:
+ """Test basic plain text block creation."""
+ block = create_plaintext_block("This is plain text content.")
+
+ assert block["type"] == "text-plain"
+ assert block.get("mime_type") == "text/plain"
+ assert block.get("text") == "This is plain text content."
+ assert "id" in block
+ id_value = block.get("id")
+ assert id_value is not None, "block id is None"
+ _validate_lc_uuid(id_value)
+
+ def test_with_title_and_context(self) -> None:
+ """Test plain text block creation with title and context."""
+ block = create_plaintext_block(
+ "Document content here.",
+ title="Important Document",
+ context="This document contains important information.",
+ )
+
+ assert block.get("title") == "Important Document"
+ assert block.get("context") == "This document contains important information."
+
+ def test_with_url(self) -> None:
+ """Test plain text block creation with URL."""
+ block = create_plaintext_block(
+ "Content", url="https://example.com/document.txt"
+ )
+
+ assert block.get("url") == "https://example.com/document.txt"
+
+
+class TestToolCallFactory:
+ """Test create_tool_call factory function."""
+
+ def test_basic_creation(self) -> None:
+ """Test basic tool call creation."""
+ block = create_tool_call("search", {"query": "python"})
+
+ assert block["type"] == "tool_call"
+ assert block["name"] == "search"
+ assert block["args"] == {"query": "python"}
+ assert "id" in block
+ id_value = block.get("id")
+ assert id_value is not None, "block id is None"
+ _validate_lc_uuid(id_value)
+
+ def test_with_custom_id(self) -> None:
+ """Test tool call creation with custom ID."""
+ block = create_tool_call("search", {"query": "python"}, id="tool-123")
+
+ assert block.get("id") == "tool-123"
+
+ def test_with_index(self) -> None:
+ """Test tool call creation with index."""
+ block = create_tool_call("search", {"query": "python"}, index=2)
+
+ assert block.get("index") == 2
+
+
+class TestReasoningBlockFactory:
+ """Test create_reasoning_block factory function."""
+
+ def test_basic_creation(self) -> None:
+ """Test basic reasoning block creation."""
+ block = create_reasoning_block("Let me think about this problem...")
+
+ assert block["type"] == "reasoning"
+ assert block.get("reasoning") == "Let me think about this problem..."
+ assert "id" in block
+ id_value = block.get("id")
+ assert id_value is not None, "block id is None"
+ _validate_lc_uuid(id_value)
+
+ @pytest.mark.xfail(reason="Optional fields not implemented yet")
+ def test_with_signatures(self) -> None:
+ """Test reasoning block creation with signatures."""
+ block = create_reasoning_block(
+ "Thinking...",
+ thought_signature="thought-sig-123", # type: ignore[call-arg]
+ signature="auth-sig-456", # type: ignore[call-arg, unused-ignore]
+ )
+
+ assert block.get("thought_signature") == "thought-sig-123"
+ assert block.get("signature") == "auth-sig-456"
+
+ def test_with_index(self) -> None:
+ """Test reasoning block creation with index."""
+ block = create_reasoning_block("Thinking...", index=3)
+
+ assert block.get("index") == 3
+
+
+class TestCitationFactory:
+ """Test create_citation factory function."""
+
+ def test_basic_creation(self) -> None:
+ """Test basic citation creation."""
+ block = create_citation()
+
+ assert block["type"] == "citation"
+ assert "id" in block
+ id_value = block.get("id")
+ assert id_value is not None, "block id is None"
+ _validate_lc_uuid(id_value)
+
+ def test_with_all_fields(self) -> None:
+ """Test citation creation with all fields."""
+ block = create_citation(
+ url="https://example.com/source",
+ title="Source Document",
+ start_index=10,
+ end_index=50,
+ cited_text="This is the cited text.",
+ )
+
+ assert block.get("url") == "https://example.com/source"
+ assert block.get("title") == "Source Document"
+ assert block.get("start_index") == 10
+ assert block.get("end_index") == 50
+ assert block.get("cited_text") == "This is the cited text."
+
+ def test_optional_fields_not_present_when_none(self) -> None:
+ """Test that optional fields are not included when None."""
+ block = create_citation()
+
+ assert "url" not in block
+ assert "title" not in block
+ assert "start_index" not in block
+ assert "end_index" not in block
+ assert "cited_text" not in block
+
+
+class TestNonStandardBlockFactory:
+ """Test create_non_standard_block factory function."""
+
+ def test_basic_creation(self) -> None:
+ """Test basic non-standard block creation."""
+ value = {"custom_field": "custom_value", "number": 42}
+ block = create_non_standard_block(value)
+
+ assert block["type"] == "non_standard"
+ assert block["value"] == value
+ assert "id" in block
+ id_value = block.get("id")
+ assert id_value is not None, "block id is None"
+ _validate_lc_uuid(id_value)
+
+ def test_with_index(self) -> None:
+ """Test non-standard block creation with index."""
+ value = {"data": "test"}
+ block = create_non_standard_block(value, index=5)
+
+ assert block.get("index") == 5
+
+ def test_optional_fields_not_present_when_none(self) -> None:
+ """Test that optional fields are not included when None."""
+ value = {"data": "test"}
+ block = create_non_standard_block(value)
+
+ assert "index" not in block
+
+
+class TestUUIDValidation:
+ """Test UUID generation and validation behavior."""
+
+ def test_custom_id_bypasses_lc_prefix_requirement(self) -> None:
+ """Test that custom IDs can use any format (don't require lc_ prefix)."""
+ custom_id = "custom-123"
+ block = create_text_block("Hello", id=custom_id)
+
+ assert block.get("id") == custom_id
+ # Custom IDs should not be validated with lc_ prefix requirement
+
+ def test_generated_ids_are_unique(self) -> None:
+ """Test that multiple factory calls generate unique IDs."""
+ blocks = [create_text_block("test") for _ in range(10)]
+ ids = [block.get("id") for block in blocks]
+
+ # All IDs should be unique
+ assert len(set(ids)) == len(ids)
+
+ # All generated IDs should have lc_ prefix
+ for id_value in ids:
+ _validate_lc_uuid(id_value or "")
+
+ def test_empty_string_id_generates_new_uuid(self) -> None:
+ """Test that empty string ID generates new UUID with lc_ prefix."""
+ block = create_text_block("Hello", id="")
+
+ id_value: str = block.get("id", "")
+ assert id_value != ""
+ _validate_lc_uuid(id_value)
+
+ def test_generated_id_length(self) -> None:
+ """Test that generated IDs have correct length (UUID4 + lc_ prefix)."""
+ block = create_text_block("Hello")
+
+ id_value = block.get("id")
+ assert id_value is not None
+
+ # UUID4 string length is 36 chars, plus 3 for "lc_" prefix = 39 total
+ expected_length = 36 + 3
+ assert len(id_value) == expected_length, (
+ f"Expected length {expected_length}, got {len(id_value)}"
+ )
+
+ # Validate it's properly formatted
+ _validate_lc_uuid(id_value)
+
+
+class TestFactoryTypeConsistency:
+ """Test that factory functions return correctly typed objects."""
+
+ def test_factories_return_correct_types(self) -> None:
+ """Test that all factory functions return the expected TypedDict types."""
+ text_block = create_text_block("test")
+ assert isinstance(text_block, dict)
+ assert text_block["type"] == "text"
+
+ image_block = create_image_block(url="https://example.com/image.jpg")
+ assert isinstance(image_block, dict)
+ assert image_block["type"] == "image"
+
+ video_block = create_video_block(url="https://example.com/video.mp4")
+ assert isinstance(video_block, dict)
+ assert video_block["type"] == "video"
+
+ audio_block = create_audio_block(url="https://example.com/audio.mp3")
+ assert isinstance(audio_block, dict)
+ assert audio_block["type"] == "audio"
+
+ file_block = create_file_block(url="https://example.com/file.pdf")
+ assert isinstance(file_block, dict)
+ assert file_block["type"] == "file"
+
+ plain_text_block = create_plaintext_block("content")
+ assert isinstance(plain_text_block, dict)
+ assert plain_text_block["type"] == "text-plain"
+
+ tool_call = create_tool_call("tool", {"arg": "value"})
+ assert isinstance(tool_call, dict)
+ assert tool_call["type"] == "tool_call"
+
+ reasoning_block = create_reasoning_block("reasoning")
+ assert isinstance(reasoning_block, dict)
+ assert reasoning_block["type"] == "reasoning"
+
+ citation = create_citation()
+ assert isinstance(citation, dict)
+ assert citation["type"] == "citation"
+
+ non_standard_block = create_non_standard_block({"data": "value"})
+ assert isinstance(non_standard_block, dict)
+ assert non_standard_block["type"] == "non_standard"
+
+
+class TestExtraItems:
+ """Test that content blocks support extra items via __extra_items__ field."""
+
+ def test_text_block_extra_items(self) -> None:
+ """Test that TextContentBlock can store extra provider-specific fields."""
+ block = create_text_block("Hello world")
+
+ block["openai_metadata"] = {"model": "gpt-4", "temperature": 0.7} # type: ignore[typeddict-unknown-key]
+ block["anthropic_usage"] = {"input_tokens": 10, "output_tokens": 20} # type: ignore[typeddict-unknown-key]
+ block["custom_field"] = "any value" # type: ignore[typeddict-unknown-key]
+
+ assert block["type"] == "text"
+ assert block["text"] == "Hello world"
+ assert "id" in block
+ assert block.get("openai_metadata") == {"model": "gpt-4", "temperature": 0.7}
+ assert block.get("anthropic_usage") == {"input_tokens": 10, "output_tokens": 20}
+ assert block.get("custom_field") == "any value"
+
+ def test_text_block_extras_field(self) -> None:
+ """Test that TextContentBlock properly supports the explicit extras field."""
+ block = create_text_block("Hello world")
+
+ # Test direct assignment to extras field
+ block["extras"] = {
+ "openai_metadata": {"model": "gpt-4", "temperature": 0.7},
+ "anthropic_usage": {"input_tokens": 10, "output_tokens": 20},
+ "custom_field": "any value",
+ }
+
+ assert block["type"] == "text"
+ assert block["text"] == "Hello world"
+ assert "id" in block
+ assert "extras" in block
+
+ extras = block.get("extras", {})
+ assert extras.get("openai_metadata") == {"model": "gpt-4", "temperature": 0.7}
+ expected_usage = {"input_tokens": 10, "output_tokens": 20}
+ assert extras.get("anthropic_usage") == expected_usage
+ assert extras.get("custom_field") == "any value"
+
+ def test_mixed_extra_items_types(self) -> None:
+ """Test that extra items can be various types (str, int, bool, dict, list)."""
+ block = create_text_block("Test content")
+
+ # Add various types of extra fields
+ block["string_field"] = "string value" # type: ignore[typeddict-unknown-key]
+ block["int_field"] = 42 # type: ignore[typeddict-unknown-key]
+ block["float_field"] = 3.14 # type: ignore[typeddict-unknown-key]
+ block["bool_field"] = True # type: ignore[typeddict-unknown-key]
+ block["list_field"] = ["item1", "item2", "item3"] # type: ignore[typeddict-unknown-key]
+ block["dict_field"] = {"nested": {"deeply": "nested value"}} # type: ignore[typeddict-unknown-key]
+ block["none_field"] = None # type: ignore[typeddict-unknown-key]
+
+ # Verify all types are preserved
+ assert block.get("string_field") == "string value"
+ assert block.get("int_field") == 42
+ assert block.get("float_field") == 3.14
+ assert block.get("bool_field") is True
+ assert block.get("list_field") == ["item1", "item2", "item3"]
+ dict_field = block.get("dict_field", {})
+ assert isinstance(dict_field, dict)
+ nested = dict_field.get("nested", {})
+ assert isinstance(nested, dict)
+ assert nested.get("deeply") == "nested value"
+ assert block.get("none_field") is None
+
+ def test_extra_items_do_not_interfere_with_standard_fields(self) -> None:
+ """Test that extra items don't interfere with standard field access."""
+ block = create_text_block("Original text", index=1)
+
+ # Add many extra fields
+ for i in range(10):
+ block[f"extra_field_{i}"] = f"value_{i}" # type: ignore[literal-required]
+
+ # Standard fields should still work correctly
+ assert block["type"] == "text"
+ assert block["text"] == "Original text"
+ assert block["index"] == 1 if "index" in block else None
+ assert "id" in block
+
+ # Extra fields should also be accessible
+ for i in range(10):
+ assert block.get(f"extra_field_{i}") == f"value_{i}"
+
+ def test_extra_items_can_be_modified(self) -> None:
+ """Test that extra items can be modified after creation."""
+ block = create_image_block(url="https://example.com/image.jpg")
+
+ # Add an extra field
+ block["status"] = "pending" # type: ignore[typeddict-unknown-key]
+ assert block.get("status") == "pending"
+
+ # Modify the extra field
+ block["status"] = "processed" # type: ignore[typeddict-unknown-key]
+ assert block.get("status") == "processed"
+
+ # Add more fields
+ block["metadata"] = {"version": 1} # type: ignore[typeddict-unknown-key]
+ metadata = block.get("metadata", {})
+ assert isinstance(metadata, dict)
+ assert metadata.get("version") == 1
+
+ # Modify nested extra field
+ block["metadata"]["version"] = 2 # type: ignore[typeddict-item]
+ metadata = block.get("metadata", {})
+ assert isinstance(metadata, dict)
+ assert metadata.get("version") == 2
+
+ def test_all_content_blocks_support_extra_items(self) -> None:
+ """Test that all content block types support extra items."""
+ # Test each content block type
+ text_block = create_text_block("test")
+ text_block["extra"] = "text_extra" # type: ignore[typeddict-unknown-key]
+ assert text_block.get("extra") == "text_extra"
+
+ image_block = create_image_block(url="https://example.com/image.jpg")
+ image_block["extra"] = "image_extra" # type: ignore[typeddict-unknown-key]
+ assert image_block.get("extra") == "image_extra"
+
+ video_block = create_video_block(url="https://example.com/video.mp4")
+ video_block["extra"] = "video_extra" # type: ignore[typeddict-unknown-key]
+ assert video_block.get("extra") == "video_extra"
+
+ audio_block = create_audio_block(url="https://example.com/audio.mp3")
+ audio_block["extra"] = "audio_extra" # type: ignore[typeddict-unknown-key]
+ assert audio_block.get("extra") == "audio_extra"
+
+ file_block = create_file_block(url="https://example.com/file.pdf")
+ file_block["extra"] = "file_extra" # type: ignore[typeddict-unknown-key]
+ assert file_block.get("extra") == "file_extra"
+
+ plain_text_block = create_plaintext_block("content")
+ plain_text_block["extra"] = "plaintext_extra" # type: ignore[typeddict-unknown-key]
+ assert plain_text_block.get("extra") == "plaintext_extra"
+
+ tool_call = create_tool_call("tool", {"arg": "value"})
+ tool_call["extra"] = "tool_extra" # type: ignore[typeddict-unknown-key]
+ assert tool_call.get("extra") == "tool_extra"
+
+ reasoning_block = create_reasoning_block("reasoning")
+ reasoning_block["extra"] = "reasoning_extra" # type: ignore[typeddict-unknown-key]
+ assert reasoning_block.get("extra") == "reasoning_extra"
+
+ non_standard_block = create_non_standard_block({"data": "value"})
+ non_standard_block["extra"] = "non_standard_extra" # type: ignore[typeddict-unknown-key]
+ assert non_standard_block.get("extra") == "non_standard_extra"
+
+
+class TestExtrasField:
+ """Test the explicit extras field across all content block types."""
+
+ def test_all_content_blocks_support_extras_field(self) -> None:
+ """Test that all content block types support the explicit extras field."""
+ provider_metadata = {
+ "provider": "openai",
+ "model": "gpt-4",
+ "temperature": 0.7,
+ "usage": {"input_tokens": 10, "output_tokens": 20},
+ }
+
+ # Test TextContentBlock
+ text_block = create_text_block("test")
+ text_block["extras"] = provider_metadata
+ assert text_block.get("extras") == provider_metadata
+ assert text_block["type"] == "text"
+
+ # Test ImageContentBlock
+ image_block = create_image_block(url="https://example.com/image.jpg")
+ image_block["extras"] = provider_metadata
+ assert image_block.get("extras") == provider_metadata
+ assert image_block["type"] == "image"
+
+ # Test VideoContentBlock
+ video_block = create_video_block(url="https://example.com/video.mp4")
+ video_block["extras"] = provider_metadata
+ assert video_block.get("extras") == provider_metadata
+ assert video_block["type"] == "video"
+
+ # Test AudioContentBlock
+ audio_block = create_audio_block(url="https://example.com/audio.mp3")
+ audio_block["extras"] = provider_metadata
+ assert audio_block.get("extras") == provider_metadata
+ assert audio_block["type"] == "audio"
+
+ # Test FileContentBlock
+ file_block = create_file_block(url="https://example.com/file.pdf")
+ file_block["extras"] = provider_metadata
+ assert file_block.get("extras") == provider_metadata
+ assert file_block["type"] == "file"
+
+ # Test PlainTextContentBlock
+ plain_text_block = create_plaintext_block("content")
+ plain_text_block["extras"] = provider_metadata
+ assert plain_text_block.get("extras") == provider_metadata
+ assert plain_text_block["type"] == "text-plain"
+
+ # Test ToolCall
+ tool_call = create_tool_call("tool", {"arg": "value"})
+ tool_call["extras"] = provider_metadata
+ assert tool_call.get("extras") == provider_metadata
+ assert tool_call["type"] == "tool_call"
+
+ # Test ReasoningContentBlock
+ reasoning_block = create_reasoning_block("reasoning")
+ reasoning_block["extras"] = provider_metadata
+ assert reasoning_block.get("extras") == provider_metadata
+ assert reasoning_block["type"] == "reasoning"
+
+ # Test Citation
+ citation = create_citation()
+ citation["extras"] = provider_metadata
+ assert citation.get("extras") == provider_metadata
+ assert citation["type"] == "citation"
+
+ def test_extras_field_is_optional(self) -> None:
+ """Test that the extras field is optional and blocks work without it."""
+ # Create blocks without extras
+ text_block = create_text_block("test")
+ image_block = create_image_block(url="https://example.com/image.jpg")
+ tool_call = create_tool_call("tool", {"arg": "value"})
+ reasoning_block = create_reasoning_block("reasoning")
+ citation = create_citation()
+
+ # Verify blocks work correctly without extras
+ assert text_block["type"] == "text"
+ assert image_block["type"] == "image"
+ assert tool_call["type"] == "tool_call"
+ assert reasoning_block["type"] == "reasoning"
+ assert citation["type"] == "citation"
+
+ # Verify extras field is not present when not set
+ assert "extras" not in text_block
+ assert "extras" not in image_block
+ assert "extras" not in tool_call
+ assert "extras" not in reasoning_block
+ assert "extras" not in citation
+
+ def test_extras_field_can_be_modified(self) -> None:
+ """Test that the extras field can be modified after creation."""
+ block = create_text_block("test")
+
+ # Add extras
+ block["extras"] = {"initial": "value"}
+ assert block.get("extras") == {"initial": "value"}
+
+ # Modify extras
+ block["extras"] = {"updated": "value", "count": 42}
+ extras = block.get("extras", {})
+ assert extras.get("updated") == "value"
+ assert extras.get("count") == 42
+ assert "initial" not in extras
+
+ # Update nested values in extras
+ if "extras" in block:
+ block["extras"]["nested"] = {"deep": "value"}
+ extras = block.get("extras", {})
+ nested = extras.get("nested", {})
+ assert isinstance(nested, dict)
+ assert nested.get("deep") == "value"
+
+ def test_extras_field_supports_various_data_types(self) -> None:
+ """Test that the extras field can store various data types."""
+ block = create_text_block("test")
+
+ complex_extras = {
+ "string_val": "test string",
+ "int_val": 42,
+ "float_val": 3.14,
+ "bool_val": True,
+ "none_val": None,
+ "list_val": ["item1", "item2", {"nested": "in_list"}],
+ "dict_val": {"nested": {"deeply": {"nested": "value"}}},
+ }
+
+ block["extras"] = complex_extras
+
+ extras = block.get("extras", {})
+ assert extras.get("string_val") == "test string"
+ assert extras.get("int_val") == 42
+ assert extras.get("float_val") == 3.14
+ assert extras.get("bool_val") is True
+ assert extras.get("none_val") is None
+
+ list_val = extras.get("list_val", [])
+ assert isinstance(list_val, list)
+ assert len(list_val) == 3
+ assert list_val[0] == "item1"
+ assert list_val[1] == "item2"
+ assert isinstance(list_val[2], dict)
+ assert list_val[2].get("nested") == "in_list"
+
+ dict_val = extras.get("dict_val", {})
+ assert isinstance(dict_val, dict)
+ nested = dict_val.get("nested", {})
+ assert isinstance(nested, dict)
+ deeply = nested.get("deeply", {})
+ assert isinstance(deeply, dict)
+ assert deeply.get("nested") == "value"
+
+ def test_extras_field_does_not_interfere_with_standard_fields(self) -> None:
+ """Test that the extras field doesn't interfere with standard fields."""
+ # Create a complex block with all standard fields
+ block = create_text_block(
+ "Test content",
+ annotations=[create_citation(url="https://example.com")],
+ index=42,
+ )
+
+ # Add extensive extras
+ large_extras = {f"field_{i}": f"value_{i}" for i in range(100)}
+ block["extras"] = large_extras
+
+ # Verify all standard fields still work
+ assert block["type"] == "text"
+ assert block["text"] == "Test content"
+ assert block.get("index") == 42
+ assert "id" in block
+ assert "annotations" in block
+
+ annotations = block.get("annotations", [])
+ assert len(annotations) == 1
+ assert annotations[0]["type"] == "citation"
+
+ # Verify extras field works
+ extras = block.get("extras", {})
+ assert len(extras) == 100
+ for i in range(100):
+ assert extras.get(f"field_{i}") == f"value_{i}"
+
+ def test_special_content_blocks_support_extras_field(self) -> None:
+ """Test that special content blocks support extras field."""
+ provider_metadata = {
+ "provider": "openai",
+ "request_id": "req_12345",
+ "timing": {"start": 1234567890, "end": 1234567895},
+ }
+
+ # Test ToolCallChunk
+ tool_call_chunk: ToolCallChunk = {
+ "type": "tool_call_chunk",
+ "id": "tool_123",
+ "name": "search",
+ "args": '{"query": "test"}',
+ "index": 0,
+ "extras": provider_metadata,
+ }
+ assert tool_call_chunk.get("extras") == provider_metadata
+ assert tool_call_chunk["type"] == "tool_call_chunk"
+
+ # Test InvalidToolCall
+ invalid_tool_call: InvalidToolCall = {
+ "type": "invalid_tool_call",
+ "id": "invalid_123",
+ "name": "bad_tool",
+ "args": "invalid json",
+ "error": "JSON parse error",
+ "extras": provider_metadata,
+ }
+ assert invalid_tool_call.get("extras") == provider_metadata
+ assert invalid_tool_call["type"] == "invalid_tool_call"
+
+ # Test WebSearchCall
+ web_search_call: WebSearchCall = {
+ "type": "web_search_call",
+ "id": "search_123",
+ "query": "python langchain",
+ "index": 0,
+ "extras": provider_metadata,
+ }
+ assert web_search_call.get("extras") == provider_metadata
+ assert web_search_call["type"] == "web_search_call"
+
+ # Test WebSearchResult
+ web_search_result: WebSearchResult = {
+ "type": "web_search_result",
+ "id": "result_123",
+ "urls": ["https://example.com", "https://test.com"],
+ "index": 0,
+ "extras": provider_metadata,
+ }
+ assert web_search_result.get("extras") == provider_metadata
+ assert web_search_result["type"] == "web_search_result"
+
+ # Test CodeInterpreterCall
+ code_interpreter_call: CodeInterpreterCall = {
+ "type": "code_interpreter_call",
+ "id": "code_123",
+ "language": "python",
+ "code": "print('hello world')",
+ "index": 0,
+ "extras": provider_metadata,
+ }
+ assert code_interpreter_call.get("extras") == provider_metadata
+ assert code_interpreter_call["type"] == "code_interpreter_call"
+
+ # Test CodeInterpreterOutput
+ code_interpreter_output: CodeInterpreterOutput = {
+ "type": "code_interpreter_output",
+ "id": "output_123",
+ "return_code": 0,
+ "stderr": "",
+ "stdout": "hello world\n",
+ "file_ids": ["file_123"],
+ "index": 0,
+ "extras": provider_metadata,
+ }
+ assert code_interpreter_output.get("extras") == provider_metadata
+ assert code_interpreter_output["type"] == "code_interpreter_output"
+
+ # Test CodeInterpreterResult
+ code_interpreter_result: CodeInterpreterResult = {
+ "type": "code_interpreter_result",
+ "id": "result_123",
+ "output": [code_interpreter_output],
+ "index": 0,
+ "extras": provider_metadata,
+ }
+ assert code_interpreter_result.get("extras") == provider_metadata
+ assert code_interpreter_result["type"] == "code_interpreter_result"
+
+ def test_extras_field_is_not_required_for_special_blocks(self) -> None:
+ """Test that extras field is optional for all special content blocks."""
+ # Create blocks without extras field
+ tool_call_chunk: ToolCallChunk = {
+ "id": "tool_123",
+ "name": "search",
+ "args": '{"query": "test"}',
+ "index": 0,
+ }
+
+ invalid_tool_call: InvalidToolCall = {
+ "type": "invalid_tool_call",
+ "id": "invalid_123",
+ "name": "bad_tool",
+ "args": "invalid json",
+ "error": "JSON parse error",
+ }
+
+ web_search_call: WebSearchCall = {
+ "type": "web_search_call",
+ "query": "python langchain",
+ }
+
+ web_search_result: WebSearchResult = {
+ "type": "web_search_result",
+ "urls": ["https://example.com"],
+ }
+
+ code_interpreter_call: CodeInterpreterCall = {
+ "type": "code_interpreter_call",
+ "code": "print('hello')",
+ }
+
+ code_interpreter_output: CodeInterpreterOutput = {
+ "type": "code_interpreter_output",
+ "stdout": "hello\n",
+ }
+
+ code_interpreter_result: CodeInterpreterResult = {
+ "type": "code_interpreter_result",
+ "output": [code_interpreter_output],
+ }
+
+ # Verify they work without extras
+ assert tool_call_chunk.get("name") == "search"
+ assert invalid_tool_call["type"] == "invalid_tool_call"
+ assert web_search_call["type"] == "web_search_call"
+ assert web_search_result["type"] == "web_search_result"
+ assert code_interpreter_call["type"] == "code_interpreter_call"
+ assert code_interpreter_output["type"] == "code_interpreter_output"
+ assert code_interpreter_result["type"] == "code_interpreter_result"
+
+ # Verify extras field is not present
+ assert "extras" not in tool_call_chunk
+ assert "extras" not in invalid_tool_call
+ assert "extras" not in web_search_call
+ assert "extras" not in web_search_result
+ assert "extras" not in code_interpreter_call
+ assert "extras" not in code_interpreter_output
+ assert "extras" not in code_interpreter_result
diff --git a/libs/core/tests/unit_tests/messages/test_response_metadata.py b/libs/core/tests/unit_tests/messages/test_response_metadata.py
new file mode 100644
index 00000000000..efecf1cf2e1
--- /dev/null
+++ b/libs/core/tests/unit_tests/messages/test_response_metadata.py
@@ -0,0 +1,343 @@
+"""Unit tests for ResponseMetadata TypedDict."""
+
+from langchain_core.messages.v1 import AIMessage, AIMessageChunk, ResponseMetadata
+
+
+class TestResponseMetadata:
+ """Test the ResponseMetadata TypedDict functionality."""
+
+ def test_response_metadata_basic_fields(self) -> None:
+ """Test ResponseMetadata with basic required fields."""
+ metadata: ResponseMetadata = {
+ "model_provider": "openai",
+ "model_name": "gpt-4",
+ }
+
+ assert metadata.get("model_provider") == "openai"
+ assert metadata.get("model_name") == "gpt-4"
+
+ def test_response_metadata_is_optional(self) -> None:
+ """Test that ResponseMetadata fields are optional due to total=False."""
+ # Should be able to create empty ResponseMetadata
+ metadata: ResponseMetadata = {}
+ assert metadata == {}
+
+ # Should be able to create with just one field
+ metadata_partial: ResponseMetadata = {"model_provider": "anthropic"}
+ assert metadata_partial.get("model_provider") == "anthropic"
+ assert "model_name" not in metadata_partial
+
+ def test_response_metadata_supports_extra_fields(self) -> None:
+ """Test that ResponseMetadata supports provider-specific extra fields."""
+ metadata: ResponseMetadata = {
+ "model_provider": "openai",
+ "model_name": "gpt-4-turbo",
+ # Extra fields should be allowed
+ "system_fingerprint": "fp_12345",
+ "logprobs": None,
+ "finish_reason": "stop",
+ "request_id": "req_abc123",
+ }
+
+ assert metadata.get("model_provider") == "openai"
+ assert metadata.get("model_name") == "gpt-4-turbo"
+ assert metadata.get("system_fingerprint") == "fp_12345"
+ assert metadata.get("logprobs") is None
+ assert metadata.get("finish_reason") == "stop"
+ assert metadata.get("request_id") == "req_abc123"
+
+ def test_response_metadata_various_data_types(self) -> None:
+ """Test that ResponseMetadata can store various data types in extra fields."""
+ metadata: ResponseMetadata = {
+ "model_provider": "anthropic",
+ "model_name": "claude-3-sonnet",
+ "string_field": "test_value",
+ "int_field": 42,
+ "float_field": 3.14,
+ "bool_field": True,
+ "none_field": None,
+ "list_field": [1, 2, 3, "test"],
+ "dict_field": {"nested": {"deeply": "nested_value"}},
+ }
+
+ assert metadata.get("string_field") == "test_value"
+ assert metadata.get("int_field") == 42
+ assert metadata.get("float_field") == 3.14
+ assert metadata.get("bool_field") is True
+ assert metadata.get("none_field") is None
+
+ list_field = metadata.get("list_field")
+ assert isinstance(list_field, list)
+ assert list_field == [1, 2, 3, "test"]
+
+ dict_field = metadata.get("dict_field")
+ assert isinstance(dict_field, dict)
+ nested = dict_field.get("nested")
+ assert isinstance(nested, dict)
+ assert nested.get("deeply") == "nested_value"
+
+ def test_response_metadata_can_be_modified(self) -> None:
+ """Test that ResponseMetadata can be modified after creation."""
+ metadata: ResponseMetadata = {
+ "model_provider": "openai",
+ "model_name": "gpt-3.5-turbo",
+ }
+
+ # Modify existing fields
+ metadata["model_name"] = "gpt-4"
+ assert metadata.get("model_name") == "gpt-4"
+
+ # Add new fields
+ metadata["request_id"] = "req_12345"
+ assert metadata.get("request_id") == "req_12345"
+
+ # Modify nested structures
+ metadata["headers"] = {"x-request-id": "abc123"}
+ metadata["headers"]["x-rate-limit"] = "100" # type: ignore[typeddict-item]
+
+ headers = metadata.get("headers")
+ assert isinstance(headers, dict)
+ assert headers.get("x-request-id") == "abc123"
+ assert headers.get("x-rate-limit") == "100"
+
+ def test_response_metadata_provider_specific_examples(self) -> None:
+ """Test ResponseMetadata with realistic provider-specific examples."""
+ # OpenAI-style metadata
+ openai_metadata: ResponseMetadata = {
+ "model_provider": "openai",
+ "model_name": "gpt-4-turbo-2024-04-09",
+ "system_fingerprint": "fp_abc123",
+ "created": 1234567890,
+ "logprobs": None,
+ "finish_reason": "stop",
+ }
+
+ assert openai_metadata.get("model_provider") == "openai"
+ assert openai_metadata.get("system_fingerprint") == "fp_abc123"
+
+ # Anthropic-style metadata
+ anthropic_metadata: ResponseMetadata = {
+ "model_provider": "anthropic",
+ "model_name": "claude-3-sonnet-20240229",
+ "stop_reason": "end_turn",
+ "stop_sequence": None,
+ }
+
+ assert anthropic_metadata.get("model_provider") == "anthropic"
+ assert anthropic_metadata.get("stop_reason") == "end_turn"
+
+ # Custom provider metadata
+ custom_metadata: ResponseMetadata = {
+ "model_provider": "custom_llm_service",
+ "model_name": "custom-model-v1",
+ "service_tier": "premium",
+ "rate_limit_info": {
+ "requests_remaining": 100,
+ "reset_time": "2024-01-01T00:00:00Z",
+ },
+ "response_time_ms": 1250,
+ }
+
+ assert custom_metadata.get("service_tier") == "premium"
+ rate_limit = custom_metadata.get("rate_limit_info")
+ assert isinstance(rate_limit, dict)
+ assert rate_limit.get("requests_remaining") == 100
+
+
+class TestResponseMetadataWithAIMessages:
+ """Test ResponseMetadata integration with AI message classes."""
+
+ def test_ai_message_with_response_metadata(self) -> None:
+ """Test AIMessage with ResponseMetadata."""
+ metadata: ResponseMetadata = {
+ "model_provider": "openai",
+ "model_name": "gpt-4",
+ "system_fingerprint": "fp_xyz789",
+ }
+
+ message = AIMessage(content="Hello, world!", response_metadata=metadata)
+
+ assert message.response_metadata == metadata
+ assert message.response_metadata.get("model_provider") == "openai"
+ assert message.response_metadata.get("model_name") == "gpt-4"
+ assert message.response_metadata.get("system_fingerprint") == "fp_xyz789"
+
+ def test_ai_message_chunk_with_response_metadata(self) -> None:
+ """Test AIMessageChunk with ResponseMetadata."""
+ metadata: ResponseMetadata = {
+ "model_provider": "anthropic",
+ "model_name": "claude-3-sonnet",
+ "stream_id": "stream_12345",
+ }
+
+ chunk = AIMessageChunk(content="Hello", response_metadata=metadata)
+
+ assert chunk.response_metadata == metadata
+ assert chunk.response_metadata.get("stream_id") == "stream_12345"
+
+ def test_ai_message_default_empty_response_metadata(self) -> None:
+ """Test that AIMessage creates empty ResponseMetadata by default."""
+ message = AIMessage(content="Test message")
+
+ # Should have empty dict as default
+ assert message.response_metadata == {}
+ assert isinstance(message.response_metadata, dict)
+
+ def test_ai_message_chunk_default_empty_response_metadata(self) -> None:
+ """Test that AIMessageChunk creates empty ResponseMetadata by default."""
+ chunk = AIMessageChunk(content="Test chunk")
+
+ # Should have empty dict as default
+ assert chunk.response_metadata == {}
+ assert isinstance(chunk.response_metadata, dict)
+
+ def test_response_metadata_merging_in_chunks(self) -> None:
+ """Test that ResponseMetadata is properly merged when adding AIMessageChunks."""
+ metadata1: ResponseMetadata = {
+ "model_provider": "openai",
+ "model_name": "gpt-4",
+ "request_id": "req_123",
+ "system_fingerprint": "fp_abc",
+ }
+
+ metadata2: ResponseMetadata = {
+ "stream_chunk": 1,
+ "finish_reason": "length",
+ }
+
+ chunk1 = AIMessageChunk(content="Hello ", response_metadata=metadata1)
+ chunk2 = AIMessageChunk(content="world!", response_metadata=metadata2)
+
+ merged = chunk1 + chunk2
+
+ # Should have merged response_metadata
+ assert merged.response_metadata.get("model_provider") == "openai"
+ assert merged.response_metadata.get("model_name") == "gpt-4"
+ assert merged.response_metadata.get("request_id") == "req_123"
+ assert merged.response_metadata.get("stream_chunk") == 1
+ assert merged.response_metadata.get("system_fingerprint") == "fp_abc"
+ assert merged.response_metadata.get("finish_reason") == "length"
+
+ def test_response_metadata_modification_after_message_creation(self) -> None:
+ """Test that ResponseMetadata can be modified after message creation."""
+ message = AIMessage(
+ content="Initial message",
+ response_metadata={"model_provider": "openai", "model_name": "gpt-3.5"},
+ )
+
+ # Modify existing field
+ message.response_metadata["model_name"] = "gpt-4"
+ assert message.response_metadata.get("model_name") == "gpt-4"
+
+ # Add new field
+ message.response_metadata["finish_reason"] = "stop"
+ assert message.response_metadata.get("finish_reason") == "stop"
+
+ def test_response_metadata_with_none_values(self) -> None:
+ """Test ResponseMetadata handling of None values."""
+ metadata: ResponseMetadata = {
+ "model_provider": "openai",
+ "model_name": "gpt-4",
+ "system_fingerprint": None,
+ "logprobs": None,
+ }
+
+ message = AIMessage(content="Test", response_metadata=metadata)
+
+ assert message.response_metadata.get("system_fingerprint") is None
+ assert message.response_metadata.get("logprobs") is None
+ assert "system_fingerprint" in message.response_metadata
+ assert "logprobs" in message.response_metadata
+
+
+class TestResponseMetadataEdgeCases:
+ """Test edge cases and error conditions for ResponseMetadata."""
+
+ def test_response_metadata_with_complex_nested_structures(self) -> None:
+ """Test ResponseMetadata with deeply nested and complex structures."""
+ metadata: ResponseMetadata = {
+ "model_provider": "custom",
+ "model_name": "complex-model",
+ "complex_data": {
+ "level1": {
+ "level2": {
+ "level3": {
+ "deeply_nested": "value",
+ "array": [
+ {"item": 1, "metadata": {"nested": True}},
+ {"item": 2, "metadata": {"nested": False}},
+ ],
+ }
+ }
+ }
+ },
+ }
+
+ complex_data = metadata.get("complex_data")
+ assert isinstance(complex_data, dict)
+ level1 = complex_data.get("level1")
+ assert isinstance(level1, dict)
+ level2 = level1.get("level2")
+ assert isinstance(level2, dict)
+ level3 = level2.get("level3")
+ assert isinstance(level3, dict)
+
+ assert level3.get("deeply_nested") == "value"
+ array = level3.get("array")
+ assert isinstance(array, list)
+ assert len(array) == 2
+ assert array[0]["item"] == 1
+ assert array[0]["metadata"]["nested"] is True
+
+ def test_response_metadata_large_data(self) -> None:
+ """Test ResponseMetadata with large amounts of data."""
+ # Create metadata with many fields
+ large_metadata: ResponseMetadata = {
+ "model_provider": "test_provider",
+ "model_name": "test_model",
+ }
+
+ # Add 100 extra fields
+ for i in range(100):
+ large_metadata[f"field_{i}"] = f"value_{i}" # type: ignore[literal-required]
+
+ message = AIMessage(content="Test", response_metadata=large_metadata)
+
+ # Verify all fields are accessible
+ assert message.response_metadata.get("model_provider") == "test_provider"
+ for i in range(100):
+ assert message.response_metadata.get(f"field_{i}") == f"value_{i}"
+
+ def test_response_metadata_empty_vs_none(self) -> None:
+ """Test the difference between empty ResponseMetadata and None."""
+ # Message with empty metadata
+ message_empty = AIMessage(content="Test", response_metadata={})
+ assert message_empty.response_metadata == {}
+ assert isinstance(message_empty.response_metadata, dict)
+
+ # Message with None metadata (should become empty dict)
+ message_none = AIMessage(content="Test", response_metadata=None)
+ assert message_none.response_metadata == {}
+ assert isinstance(message_none.response_metadata, dict)
+
+ # Default message (no metadata specified)
+ message_default = AIMessage(content="Test")
+ assert message_default.response_metadata == {}
+ assert isinstance(message_default.response_metadata, dict)
+
+ def test_response_metadata_preserves_original_dict_type(self) -> None:
+ """Test that ResponseMetadata preserves the original dict when passed."""
+ original_dict: ResponseMetadata = {
+ "model_provider": "openai",
+ "model_name": "gpt-4",
+ "custom_field": "custom_value",
+ }
+
+ message = AIMessage(content="Test", response_metadata=original_dict)
+
+ # Should be the same dict object
+ assert message.response_metadata is original_dict
+
+ # Modifications to the message's response_metadata should affect original
+ message.response_metadata["new_field"] = "new_value"
+ assert original_dict.get("new_field") == "new_value"
diff --git a/libs/core/tests/unit_tests/messages/test_response_metadata.py.bak b/libs/core/tests/unit_tests/messages/test_response_metadata.py.bak
new file mode 100644
index 00000000000..c8cc7d79f70
--- /dev/null
+++ b/libs/core/tests/unit_tests/messages/test_response_metadata.py.bak
@@ -0,0 +1,361 @@
+"""Unit tests for ResponseMetadata TypedDict."""
+
+from langchain_core.messages.v1 import AIMessage, AIMessageChunk, ResponseMetadata
+
+
+class TestResponseMetadata:
+ """Test the ResponseMetadata TypedDict functionality."""
+
+ def test_response_metadata_basic_fields(self) -> None:
+ """Test ResponseMetadata with basic required fields."""
+ metadata: ResponseMetadata = {
+ "model_provider": "openai",
+ "model_name": "gpt-4",
+ }
+
+ assert metadata.get("model_provider") == "openai"
+ assert metadata.get("model_name") == "gpt-4"
+
+ def test_response_metadata_is_optional(self) -> None:
+ """Test that ResponseMetadata fields are optional due to total=False."""
+ # Should be able to create empty ResponseMetadata
+ metadata: ResponseMetadata = {}
+ assert metadata == {}
+
+ # Should be able to create with just one field
+ metadata_partial: ResponseMetadata = {"model_provider": "anthropic"}
+ assert metadata_partial.get("model_provider") == "anthropic"
+ assert "model_name" not in metadata_partial
+
+ def test_response_metadata_supports_extra_fields(self) -> None:
+ """Test that ResponseMetadata supports provider-specific extra fields."""
+ metadata: ResponseMetadata = {
+ "model_provider": "openai",
+ "model_name": "gpt-4-turbo",
+ # Extra fields should be allowed
+ "usage": {"input_tokens": 100, "output_tokens": 50},
+ "system_fingerprint": "fp_12345",
+ "logprobs": None,
+ "finish_reason": "stop",
+ }
+
+ assert metadata.get("model_provider") == "openai"
+ assert metadata.get("model_name") == "gpt-4-turbo"
+ assert metadata.get("usage") == {"input_tokens": 100, "output_tokens": 50}
+ assert metadata.get("system_fingerprint") == "fp_12345"
+ assert metadata.get("logprobs") is None
+ assert metadata.get("finish_reason") == "stop"
+
+ def test_response_metadata_various_data_types(self) -> None:
+ """Test that ResponseMetadata can store various data types in extra fields."""
+ metadata: ResponseMetadata = {
+ "model_provider": "anthropic",
+ "model_name": "claude-3-sonnet",
+ "string_field": "test_value", # type: ignore[typeddict-unknown-key]
+ "int_field": 42, # type: ignore[typeddict-unknown-key]
+ "float_field": 3.14, # type: ignore[typeddict-unknown-key]
+ "bool_field": True, # type: ignore[typeddict-unknown-key]
+ "none_field": None, # type: ignore[typeddict-unknown-key]
+ "list_field": [1, 2, 3, "test"], # type: ignore[typeddict-unknown-key]
+ "dict_field": { # type: ignore[typeddict-unknown-key]
+ "nested": {"deeply": "nested_value"}
+ },
+ }
+
+ assert metadata.get("string_field") == "test_value" # type: ignore[typeddict-item]
+ assert metadata.get("int_field") == 42 # type: ignore[typeddict-item]
+ assert metadata.get("float_field") == 3.14 # type: ignore[typeddict-item]
+ assert metadata.get("bool_field") is True # type: ignore[typeddict-item]
+ assert metadata.get("none_field") is None # type: ignore[typeddict-item]
+
+ list_field = metadata.get("list_field") # type: ignore[typeddict-item]
+ assert isinstance(list_field, list)
+ assert list_field == [1, 2, 3, "test"]
+
+ dict_field = metadata.get("dict_field") # type: ignore[typeddict-item]
+ assert isinstance(dict_field, dict)
+ nested = dict_field.get("nested") # type: ignore[union-attr]
+ assert isinstance(nested, dict)
+ assert nested.get("deeply") == "nested_value" # type: ignore[union-attr]
+
+ def test_response_metadata_can_be_modified(self) -> None:
+ """Test that ResponseMetadata can be modified after creation."""
+ metadata: ResponseMetadata = {
+ "model_provider": "openai",
+ "model_name": "gpt-3.5-turbo",
+ }
+
+ # Modify existing fields
+ metadata["model_name"] = "gpt-4"
+ assert metadata.get("model_name") == "gpt-4"
+
+ # Add new fields
+ metadata["request_id"] = "req_12345" # type: ignore[typeddict-unknown-key]
+ assert metadata.get("request_id") == "req_12345" # type: ignore[typeddict-item]
+
+ # Modify nested structures
+ metadata["usage"] = {"input_tokens": 10} # type: ignore[typeddict-unknown-key]
+ metadata["usage"]["output_tokens"] = 20 # type: ignore[typeddict-item]
+
+ usage = metadata.get("usage") # type: ignore[typeddict-item]
+ assert isinstance(usage, dict)
+ assert usage.get("input_tokens") == 10 # type: ignore[union-attr]
+ assert usage.get("output_tokens") == 20 # type: ignore[union-attr]
+
+ def test_response_metadata_provider_specific_examples(self) -> None:
+ """Test ResponseMetadata with realistic provider-specific examples."""
+ # OpenAI-style metadata
+ openai_metadata: ResponseMetadata = {
+ "model_provider": "openai",
+ "model_name": "gpt-4-turbo-2024-04-09",
+ "usage": { # type: ignore[typeddict-unknown-key]
+ "prompt_tokens": 50,
+ "completion_tokens": 25,
+ "total_tokens": 75,
+ },
+ "system_fingerprint": "fp_abc123", # type: ignore[typeddict-unknown-key]
+ "created": 1234567890, # type: ignore[typeddict-unknown-key]
+ "logprobs": None, # type: ignore[typeddict-unknown-key]
+ "finish_reason": "stop", # type: ignore[typeddict-unknown-key]
+ }
+
+ assert openai_metadata.get("model_provider") == "openai"
+ assert openai_metadata.get("system_fingerprint") == "fp_abc123" # type: ignore[typeddict-item]
+
+ # Anthropic-style metadata
+ anthropic_metadata: ResponseMetadata = {
+ "model_provider": "anthropic",
+ "model_name": "claude-3-sonnet-20240229",
+ "usage": { # type: ignore[typeddict-unknown-key]
+ "input_tokens": 75,
+ "output_tokens": 30,
+ },
+ "stop_reason": "end_turn", # type: ignore[typeddict-unknown-key]
+ "stop_sequence": None, # type: ignore[typeddict-unknown-key]
+ }
+
+ assert anthropic_metadata.get("model_provider") == "anthropic"
+ assert anthropic_metadata.get("stop_reason") == "end_turn" # type: ignore[typeddict-item]
+
+ # Custom provider metadata
+ custom_metadata: ResponseMetadata = {
+ "model_provider": "custom_llm_service",
+ "model_name": "custom-model-v1",
+ "service_tier": "premium", # type: ignore[typeddict-unknown-key]
+ "rate_limit_info": { # type: ignore[typeddict-unknown-key]
+ "requests_remaining": 100,
+ "reset_time": "2024-01-01T00:00:00Z",
+ },
+ "response_time_ms": 1250, # type: ignore[typeddict-unknown-key]
+ }
+
+ assert custom_metadata.get("service_tier") == "premium" # type: ignore[typeddict-item]
+ rate_limit = custom_metadata.get("rate_limit_info") # type: ignore[typeddict-item]
+ assert isinstance(rate_limit, dict)
+ assert rate_limit.get("requests_remaining") == 100 # type: ignore[union-attr]
+
+
+class TestResponseMetadataWithAIMessages:
+ """Test ResponseMetadata integration with AI message classes."""
+
+ def test_ai_message_with_response_metadata(self) -> None:
+ """Test AIMessage with ResponseMetadata."""
+ metadata: ResponseMetadata = {
+ "model_provider": "openai",
+ "model_name": "gpt-4",
+ "usage": {"input_tokens": 10, "output_tokens": 5}, # type: ignore[typeddict-unknown-key]
+ }
+
+ message = AIMessage(content="Hello, world!", response_metadata=metadata)
+
+ assert message.response_metadata == metadata
+ assert message.response_metadata.get("model_provider") == "openai"
+ assert message.response_metadata.get("model_name") == "gpt-4"
+
+ usage = message.response_metadata.get("usage") # type: ignore[typeddict-item]
+ assert isinstance(usage, dict)
+ assert usage.get("input_tokens") == 10 # type: ignore[union-attr]
+
+ def test_ai_message_chunk_with_response_metadata(self) -> None:
+ """Test AIMessageChunk with ResponseMetadata."""
+ metadata: ResponseMetadata = {
+ "model_provider": "anthropic",
+ "model_name": "claude-3-sonnet",
+ "stream_id": "stream_12345", # type: ignore[typeddict-unknown-key]
+ }
+
+ chunk = AIMessageChunk(content="Hello", response_metadata=metadata)
+
+ assert chunk.response_metadata == metadata
+ assert chunk.response_metadata.get("stream_id") == "stream_12345" # type: ignore[typeddict-item]
+
+ def test_ai_message_default_empty_response_metadata(self) -> None:
+ """Test that AIMessage creates empty ResponseMetadata by default."""
+ message = AIMessage(content="Test message")
+
+ # Should have empty dict as default
+ assert message.response_metadata == {}
+ assert isinstance(message.response_metadata, dict)
+
+ def test_ai_message_chunk_default_empty_response_metadata(self) -> None:
+ """Test that AIMessageChunk creates empty ResponseMetadata by default."""
+ chunk = AIMessageChunk(content="Test chunk")
+
+ # Should have empty dict as default
+ assert chunk.response_metadata == {}
+ assert isinstance(chunk.response_metadata, dict)
+
+ def test_response_metadata_merging_in_chunks(self) -> None:
+ """Test that ResponseMetadata is properly merged when adding AIMessageChunks."""
+ metadata1: ResponseMetadata = {
+ "model_provider": "openai",
+ "model_name": "gpt-4",
+ "request_id": "req_123", # type: ignore[typeddict-unknown-key]
+ "usage": {"input_tokens": 10}, # type: ignore[typeddict-unknown-key]
+ }
+
+ metadata2: ResponseMetadata = {
+ "stream_chunk": 1, # type: ignore[typeddict-unknown-key]
+ "usage": {"output_tokens": 5}, # type: ignore[typeddict-unknown-key]
+ }
+
+ chunk1 = AIMessageChunk(content="Hello ", response_metadata=metadata1)
+ chunk2 = AIMessageChunk(content="world!", response_metadata=metadata2)
+
+ merged = chunk1 + chunk2
+
+ # Should have merged response_metadata
+ assert merged.response_metadata.get("model_provider") == "openai"
+ assert merged.response_metadata.get("model_name") == "gpt-4"
+ assert merged.response_metadata.get("request_id") == "req_123" # type: ignore[typeddict-item]
+ assert merged.response_metadata.get("stream_chunk") == 1 # type: ignore[typeddict-item]
+
+ # Usage should be merged (from merge_dicts behavior)
+ merged_usage = merged.response_metadata.get("usage") # type: ignore[typeddict-item]
+ assert isinstance(merged_usage, dict)
+ assert merged_usage.get("input_tokens") == 10 # type: ignore[union-attr]
+ assert merged_usage.get("output_tokens") == 5 # type: ignore[union-attr]
+
+ def test_response_metadata_modification_after_message_creation(self) -> None:
+ """Test that ResponseMetadata can be modified after message creation."""
+ message = AIMessage(
+ content="Initial message",
+ response_metadata={"model_provider": "openai", "model_name": "gpt-3.5"},
+ )
+
+ # Modify existing field
+ message.response_metadata["model_name"] = "gpt-4"
+ assert message.response_metadata.get("model_name") == "gpt-4"
+
+ # Add new field
+ message.response_metadata["finish_reason"] = "stop" # type: ignore[typeddict-unknown-key]
+ assert message.response_metadata.get("finish_reason") == "stop" # type: ignore[typeddict-item]
+
+ def test_response_metadata_with_none_values(self) -> None:
+ """Test ResponseMetadata handling of None values."""
+ metadata: ResponseMetadata = {
+ "model_provider": "openai",
+ "model_name": "gpt-4",
+ "system_fingerprint": None, # type: ignore[typeddict-unknown-key]
+ "logprobs": None, # type: ignore[typeddict-unknown-key]
+ }
+
+ message = AIMessage(content="Test", response_metadata=metadata)
+
+ assert message.response_metadata.get("system_fingerprint") is None # type: ignore[typeddict-item]
+ assert message.response_metadata.get("logprobs") is None # type: ignore[typeddict-item]
+ assert "system_fingerprint" in message.response_metadata
+ assert "logprobs" in message.response_metadata
+
+
+class TestResponseMetadataEdgeCases:
+ """Test edge cases and error conditions for ResponseMetadata."""
+
+ def test_response_metadata_with_complex_nested_structures(self) -> None:
+ """Test ResponseMetadata with deeply nested and complex structures."""
+ metadata: ResponseMetadata = {
+ "model_provider": "custom",
+ "model_name": "complex-model",
+ "complex_data": { # type: ignore[typeddict-unknown-key]
+ "level1": {
+ "level2": {
+ "level3": {
+ "deeply_nested": "value",
+ "array": [
+ {"item": 1, "metadata": {"nested": True}},
+ {"item": 2, "metadata": {"nested": False}},
+ ],
+ }
+ }
+ }
+ },
+ }
+
+ complex_data = metadata.get("complex_data") # type: ignore[typeddict-item]
+ assert isinstance(complex_data, dict)
+ level1 = complex_data.get("level1") # type: ignore[union-attr]
+ assert isinstance(level1, dict)
+ level2 = level1.get("level2") # type: ignore[union-attr]
+ assert isinstance(level2, dict)
+ level3 = level2.get("level3") # type: ignore[union-attr]
+ assert isinstance(level3, dict)
+
+ assert level3.get("deeply_nested") == "value" # type: ignore[union-attr]
+ array = level3.get("array") # type: ignore[union-attr]
+ assert isinstance(array, list)
+ assert len(array) == 2 # type: ignore[arg-type]
+ assert array[0]["item"] == 1 # type: ignore[index, typeddict-item]
+ assert array[0]["metadata"]["nested"] is True # type: ignore[index, typeddict-item]
+
+ def test_response_metadata_large_data(self) -> None:
+ """Test ResponseMetadata with large amounts of data."""
+ # Create metadata with many fields
+ large_metadata: ResponseMetadata = {
+ "model_provider": "test_provider",
+ "model_name": "test_model",
+ }
+
+ # Add 100 extra fields
+ for i in range(100):
+ large_metadata[f"field_{i}"] = f"value_{i}" # type: ignore[literal-required]
+
+ message = AIMessage(content="Test", response_metadata=large_metadata)
+
+ # Verify all fields are accessible
+ assert message.response_metadata.get("model_provider") == "test_provider"
+ for i in range(100):
+ assert message.response_metadata.get(f"field_{i}") == f"value_{i}" # type: ignore[typeddict-item]
+
+ def test_response_metadata_empty_vs_none(self) -> None:
+ """Test the difference between empty ResponseMetadata and None."""
+ # Message with empty metadata
+ message_empty = AIMessage(content="Test", response_metadata={})
+ assert message_empty.response_metadata == {}
+ assert isinstance(message_empty.response_metadata, dict)
+
+ # Message with None metadata (should become empty dict)
+ message_none = AIMessage(content="Test", response_metadata=None)
+ assert message_none.response_metadata == {}
+ assert isinstance(message_none.response_metadata, dict)
+
+ # Default message (no metadata specified)
+ message_default = AIMessage(content="Test")
+ assert message_default.response_metadata == {}
+ assert isinstance(message_default.response_metadata, dict)
+
+ def test_response_metadata_preserves_original_dict_type(self) -> None:
+ """Test that ResponseMetadata preserves the original dict when passed."""
+ original_dict = {
+ "model_provider": "openai",
+ "model_name": "gpt-4",
+ "custom_field": "custom_value",
+ }
+
+ message = AIMessage(content="Test", response_metadata=original_dict)
+
+ # Should be the same dict object
+ assert message.response_metadata is original_dict
+
+ # Modifications to the message's response_metadata should affect original
+ message.response_metadata["new_field"] = "new_value" # type: ignore[typeddict-unknown-key]
+ assert original_dict.get("new_field") == "new_value" # type: ignore[typeddict-item]
diff --git a/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr b/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr
index 7851b29c4c9..11df1714380 100644
--- a/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr
+++ b/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr
@@ -726,7 +726,7 @@
'description': '''
Allowance for errors made by LLM.
- Here we add an `error` key to surface errors made during generation
+ Here we add an ``error`` key to surface errors made during generation
(e.g., invalid JSON arguments.)
''',
'properties': dict({
@@ -752,6 +752,10 @@
]),
'title': 'Error',
}),
+ 'extras': dict({
+ 'title': 'Extras',
+ 'type': 'object',
+ }),
'id': dict({
'anyOf': list([
dict({
@@ -763,10 +767,6 @@
]),
'title': 'Id',
}),
- 'index': dict({
- 'title': 'Index',
- 'type': 'integer',
- }),
'name': dict({
'anyOf': list([
dict({
@@ -785,11 +785,11 @@
}),
}),
'required': list([
+ 'type',
+ 'id',
'name',
'args',
- 'id',
'error',
- 'type',
]),
'title': 'InvalidToolCall',
'type': 'object',
@@ -1003,12 +1003,23 @@
This represents a request to call the tool named "foo" with arguments {"a": 1}
and an identifier of "123".
+
+ .. note::
+ ``create_tool_call`` may also be used as a factory to create a
+ ``ToolCall``. Benefits include:
+
+ * Automatic ID generation (when not provided)
+ * Required arguments strictly validated at creation time
''',
'properties': dict({
'args': dict({
'title': 'Args',
'type': 'object',
}),
+ 'extras': dict({
+ 'title': 'Extras',
+ 'type': 'object',
+ }),
'id': dict({
'anyOf': list([
dict({
@@ -1035,10 +1046,10 @@
}),
}),
'required': list([
+ 'type',
+ 'id',
'name',
'args',
- 'id',
- 'type',
]),
'title': 'ToolCall',
'type': 'object',
@@ -1047,9 +1058,9 @@
'description': '''
A chunk of a tool call (e.g., as part of a stream).
- When merging ToolCallChunks (e.g., via AIMessageChunk.__add__),
+ When merging ToolCallChunks (e.g., via ``AIMessageChunk.__add__``),
all string attributes are concatenated. Chunks are only merged if their
- values of `index` are equal and not None.
+ values of ``index`` are equal and not ``None``.
Example:
@@ -1075,6 +1086,10 @@
]),
'title': 'Args',
}),
+ 'extras': dict({
+ 'title': 'Extras',
+ 'type': 'object',
+ }),
'id': dict({
'anyOf': list([
dict({
@@ -1115,9 +1130,9 @@
}),
}),
'required': list([
+ 'id',
'name',
'args',
- 'id',
'index',
]),
'title': 'ToolCallChunk',
@@ -2168,7 +2183,7 @@
'description': '''
Allowance for errors made by LLM.
- Here we add an `error` key to surface errors made during generation
+ Here we add an ``error`` key to surface errors made during generation
(e.g., invalid JSON arguments.)
''',
'properties': dict({
@@ -2194,6 +2209,10 @@
]),
'title': 'Error',
}),
+ 'extras': dict({
+ 'title': 'Extras',
+ 'type': 'object',
+ }),
'id': dict({
'anyOf': list([
dict({
@@ -2205,10 +2224,6 @@
]),
'title': 'Id',
}),
- 'index': dict({
- 'title': 'Index',
- 'type': 'integer',
- }),
'name': dict({
'anyOf': list([
dict({
@@ -2227,11 +2242,11 @@
}),
}),
'required': list([
+ 'type',
+ 'id',
'name',
'args',
- 'id',
'error',
- 'type',
]),
'title': 'InvalidToolCall',
'type': 'object',
@@ -2445,12 +2460,23 @@
This represents a request to call the tool named "foo" with arguments {"a": 1}
and an identifier of "123".
+
+ .. note::
+ ``create_tool_call`` may also be used as a factory to create a
+ ``ToolCall``. Benefits include:
+
+ * Automatic ID generation (when not provided)
+ * Required arguments strictly validated at creation time
''',
'properties': dict({
'args': dict({
'title': 'Args',
'type': 'object',
}),
+ 'extras': dict({
+ 'title': 'Extras',
+ 'type': 'object',
+ }),
'id': dict({
'anyOf': list([
dict({
@@ -2477,10 +2503,10 @@
}),
}),
'required': list([
+ 'type',
+ 'id',
'name',
'args',
- 'id',
- 'type',
]),
'title': 'ToolCall',
'type': 'object',
@@ -2489,9 +2515,9 @@
'description': '''
A chunk of a tool call (e.g., as part of a stream).
- When merging ToolCallChunks (e.g., via AIMessageChunk.__add__),
+ When merging ToolCallChunks (e.g., via ``AIMessageChunk.__add__``),
all string attributes are concatenated. Chunks are only merged if their
- values of `index` are equal and not None.
+ values of ``index`` are equal and not ``None``.
Example:
@@ -2517,6 +2543,10 @@
]),
'title': 'Args',
}),
+ 'extras': dict({
+ 'title': 'Extras',
+ 'type': 'object',
+ }),
'id': dict({
'anyOf': list([
dict({
@@ -2557,9 +2587,9 @@
}),
}),
'required': list([
+ 'id',
'name',
'args',
- 'id',
'index',
]),
'title': 'ToolCallChunk',
diff --git a/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr b/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr
index 3dcbb0c5150..7ae8ecdf154 100644
--- a/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr
+++ b/libs/core/tests/unit_tests/runnables/__snapshots__/test_graph.ambr
@@ -406,12 +406,25 @@
'data': dict({
'$defs': dict({
'AudioContentBlock': dict({
- 'description': 'Content block for audio data.',
+ 'description': '''
+ Audio data.
+
+ .. note::
+ ``create_audio_block`` may also be used as a factory to create an
+ ``AudioContentBlock``. Benefits include:
+
+ * Automatic ID generation (when not provided)
+ * Required arguments strictly validated at creation time
+ ''',
'properties': dict({
'base64': dict({
'title': 'Base64',
'type': 'string',
}),
+ 'extras': dict({
+ 'title': 'Extras',
+ 'type': 'object',
+ }),
'file_id': dict({
'title': 'File Id',
'type': 'string',
@@ -608,6 +621,13 @@
``start/end`` indices refer to the **response text**,
not the source text. This means that the indices are relative to the model's
response, not the original document (as specified in the ``url``).
+
+ .. note::
+ ``create_citation`` may also be used as a factory to create a ``Citation``.
+ Benefits include:
+
+ * Automatic ID generation (when not provided)
+ * Required arguments strictly validated at creation time
''',
'properties': dict({
'cited_text': dict({
@@ -618,6 +638,10 @@
'title': 'End Index',
'type': 'integer',
}),
+ 'extras': dict({
+ 'title': 'Extras',
+ 'type': 'object',
+ }),
'id': dict({
'title': 'Id',
'type': 'string',
@@ -647,12 +671,16 @@
'type': 'object',
}),
'CodeInterpreterCall': dict({
- 'description': 'Content block for a built-in code interpreter tool call.',
+ 'description': 'Built-in code interpreter tool call.',
'properties': dict({
'code': dict({
'title': 'Code',
'type': 'string',
}),
+ 'extras': dict({
+ 'title': 'Extras',
+ 'type': 'object',
+ }),
'id': dict({
'title': 'Id',
'type': 'string',
@@ -679,12 +707,16 @@
}),
'CodeInterpreterOutput': dict({
'description': '''
- Content block for the output of a singular code interpreter tool call.
+ Output of a singular code interpreter tool call.
Full output of a code interpreter tool call is represented by
``CodeInterpreterResult`` which is a list of these blocks.
''',
'properties': dict({
+ 'extras': dict({
+ 'title': 'Extras',
+ 'type': 'object',
+ }),
'file_ids': dict({
'items': dict({
'type': 'string',
@@ -725,8 +757,12 @@
'type': 'object',
}),
'CodeInterpreterResult': dict({
- 'description': 'Content block for the result of a code interpreter tool call.',
+ 'description': 'Result of a code interpreter tool call.',
'properties': dict({
+ 'extras': dict({
+ 'title': 'Extras',
+ 'type': 'object',
+ }),
'id': dict({
'title': 'Id',
'type': 'string',
@@ -757,7 +793,7 @@
}),
'FileContentBlock': dict({
'description': '''
- Content block for file data.
+ File data that doesn't fit into other multimodal blocks.
This block is intended for files that are not images, audio, or plaintext. For
example, it can be used for PDFs, Word documents, etc.
@@ -765,12 +801,23 @@
If the file is an image, audio, or plaintext, you should use the corresponding
content block type (e.g., ``ImageContentBlock``, ``AudioContentBlock``,
``PlainTextContentBlock``).
+
+ .. note::
+ ``create_file_block`` may also be used as a factory to create a
+ ``FileContentBlock``. Benefits include:
+
+ * Automatic ID generation (when not provided)
+ * Required arguments strictly validated at creation time
''',
'properties': dict({
'base64': dict({
'title': 'Base64',
'type': 'string',
}),
+ 'extras': dict({
+ 'title': 'Extras',
+ 'type': 'object',
+ }),
'file_id': dict({
'title': 'File Id',
'type': 'string',
@@ -1014,12 +1061,25 @@
'type': 'object',
}),
'ImageContentBlock': dict({
- 'description': 'Content block for image data.',
+ 'description': '''
+ Image data.
+
+ .. note::
+ ``create_image_block`` may also be used as a factory to create a
+ ``ImageContentBlock``. Benefits include:
+
+ * Automatic ID generation (when not provided)
+ * Required arguments strictly validated at creation time
+ ''',
'properties': dict({
'base64': dict({
'title': 'Base64',
'type': 'string',
}),
+ 'extras': dict({
+ 'title': 'Extras',
+ 'type': 'object',
+ }),
'file_id': dict({
'title': 'File Id',
'type': 'string',
@@ -1093,7 +1153,7 @@
'description': '''
Allowance for errors made by LLM.
- Here we add an `error` key to surface errors made during generation
+ Here we add an ``error`` key to surface errors made during generation
(e.g., invalid JSON arguments.)
''',
'properties': dict({
@@ -1119,6 +1179,10 @@
]),
'title': 'Error',
}),
+ 'extras': dict({
+ 'title': 'Extras',
+ 'type': 'object',
+ }),
'id': dict({
'anyOf': list([
dict({
@@ -1130,10 +1194,6 @@
]),
'title': 'Id',
}),
- 'index': dict({
- 'title': 'Index',
- 'type': 'integer',
- }),
'name': dict({
'anyOf': list([
dict({
@@ -1152,11 +1212,11 @@
}),
}),
'required': list([
+ 'type',
+ 'id',
'name',
'args',
- 'id',
'error',
- 'type',
]),
'title': 'InvalidToolCall',
'type': 'object',
@@ -1187,7 +1247,7 @@
}),
'NonStandardContentBlock': dict({
'description': '''
- Content block provider-specific data.
+ Provider-specific data.
This block contains data for which there is not yet a standard type.
@@ -1195,6 +1255,13 @@
If a provider's non-standard output includes reasoning and tool calls, it should be
the adapter's job to parse that payload and emit the corresponding standard
ReasoningContentBlock and ToolCallContentBlocks.
+
+ .. note::
+ ``create_non_standard_block`` may also be used as a factory to create a
+ ``NonStandardContentBlock``. Benefits include:
+
+ * Automatic ID generation (when not provided)
+ * Required arguments strictly validated at creation time
''',
'properties': dict({
'id': dict({
@@ -1254,11 +1321,18 @@
}),
'PlainTextContentBlock': dict({
'description': '''
- Content block for plaintext data (e.g., from a document).
+ Plaintext data (e.g., from a document).
.. note::
Title and context are optional fields that may be passed to the model. See
Anthropic `example `__.
+
+ .. note::
+ ``create_plaintext_block`` may also be used as a factory to create a
+ ``PlainTextContentBlock``. Benefits include:
+
+ * Automatic ID generation (when not provided)
+ * Required arguments strictly validated at creation time
''',
'properties': dict({
'base64': dict({
@@ -1269,6 +1343,10 @@
'title': 'Context',
'type': 'string',
}),
+ 'extras': dict({
+ 'title': 'Extras',
+ 'type': 'object',
+ }),
'file_id': dict({
'title': 'File Id',
'type': 'string',
@@ -1312,8 +1390,21 @@
'type': 'object',
}),
'ReasoningContentBlock': dict({
- 'description': 'Content block for reasoning output.',
+ 'description': '''
+ Reasoning output from a LLM.
+
+ .. note::
+ ``create_reasoning_block`` may also be used as a factory to create a
+ ``ReasoningContentBlock``. Benefits include:
+
+ * Automatic ID generation (when not provided)
+ * Required arguments strictly validated at creation time
+ ''',
'properties': dict({
+ 'extras': dict({
+ 'title': 'Extras',
+ 'type': 'object',
+ }),
'id': dict({
'title': 'Id',
'type': 'string',
@@ -1326,14 +1417,6 @@
'title': 'Reasoning',
'type': 'string',
}),
- 'signature': dict({
- 'title': 'Signature',
- 'type': 'string',
- }),
- 'thought_signature': dict({
- 'title': 'Thought Signature',
- 'type': 'string',
- }),
'type': dict({
'const': 'reasoning',
'title': 'Type',
@@ -1353,7 +1436,9 @@
Contains additional information returned by the provider, such as
response headers, service tiers, log probabilities, system fingerprints, etc.
- Extra keys are permitted from what is typed here.
+ Extra keys are permitted from what is typed here (via `total=False`), allowing
+ for provider-specific metadata to be included without breaking the type
+ definition.
''',
'properties': dict({
'model_name': dict({
@@ -1440,10 +1525,17 @@
}),
'TextContentBlock': dict({
'description': '''
- Content block for text output.
+ Text output from a LLM.
This typically represents the main text content of a message, such as the response
from a language model or the text of a user message.
+
+ .. note::
+ ``create_text_block`` may also be used as a factory to create a
+ ``TextContentBlock``. Benefits include:
+
+ * Automatic ID generation (when not provided)
+ * Required arguments strictly validated at creation time
''',
'properties': dict({
'annotations': dict({
@@ -1460,6 +1552,10 @@
'title': 'Annotations',
'type': 'array',
}),
+ 'extras': dict({
+ 'title': 'Extras',
+ 'type': 'object',
+ }),
'id': dict({
'title': 'Id',
'type': 'string',
@@ -1501,12 +1597,23 @@
This represents a request to call the tool named "foo" with arguments {"a": 1}
and an identifier of "123".
+
+ .. note::
+ ``create_tool_call`` may also be used as a factory to create a
+ ``ToolCall``. Benefits include:
+
+ * Automatic ID generation (when not provided)
+ * Required arguments strictly validated at creation time
''',
'properties': dict({
'args': dict({
'title': 'Args',
'type': 'object',
}),
+ 'extras': dict({
+ 'title': 'Extras',
+ 'type': 'object',
+ }),
'id': dict({
'anyOf': list([
dict({
@@ -1533,10 +1640,10 @@
}),
}),
'required': list([
+ 'type',
+ 'id',
'name',
'args',
- 'id',
- 'type',
]),
'title': 'ToolCall',
'type': 'object',
@@ -1545,9 +1652,9 @@
'description': '''
A chunk of a tool call (e.g., as part of a stream).
- When merging ToolCallChunks (e.g., via AIMessageChunk.__add__),
+ When merging ToolCallChunks (e.g., via ``AIMessageChunk.__add__``),
all string attributes are concatenated. Chunks are only merged if their
- values of `index` are equal and not None.
+ values of ``index`` are equal and not ``None``.
Example:
@@ -1573,6 +1680,10 @@
]),
'title': 'Args',
}),
+ 'extras': dict({
+ 'title': 'Extras',
+ 'type': 'object',
+ }),
'id': dict({
'anyOf': list([
dict({
@@ -1613,9 +1724,9 @@
}),
}),
'required': list([
+ 'id',
'name',
'args',
- 'id',
'index',
]),
'title': 'ToolCallChunk',
@@ -1761,12 +1872,25 @@
'type': 'object',
}),
'VideoContentBlock': dict({
- 'description': 'Content block for video data.',
+ 'description': '''
+ Video data.
+
+ .. note::
+ ``create_video_block`` may also be used as a factory to create a
+ ``VideoContentBlock``. Benefits include:
+
+ * Automatic ID generation (when not provided)
+ * Required arguments strictly validated at creation time
+ ''',
'properties': dict({
'base64': dict({
'title': 'Base64',
'type': 'string',
}),
+ 'extras': dict({
+ 'title': 'Extras',
+ 'type': 'object',
+ }),
'file_id': dict({
'title': 'File Id',
'type': 'string',
@@ -1800,8 +1924,12 @@
'type': 'object',
}),
'WebSearchCall': dict({
- 'description': 'Content block for a built-in web search tool call.',
+ 'description': 'Built-in web search tool call.',
'properties': dict({
+ 'extras': dict({
+ 'title': 'Extras',
+ 'type': 'object',
+ }),
'id': dict({
'title': 'Id',
'type': 'string',
@@ -1827,8 +1955,12 @@
'type': 'object',
}),
'WebSearchResult': dict({
- 'description': 'Content block for the result of a built-in web search tool call.',
+ 'description': 'Result of a built-in web search tool call.',
'properties': dict({
+ 'extras': dict({
+ 'title': 'Extras',
+ 'type': 'object',
+ }),
'id': dict({
'title': 'Id',
'type': 'string',
diff --git a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr
index 3fab204ca04..7df5f44e70a 100644
--- a/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr
+++ b/libs/core/tests/unit_tests/runnables/__snapshots__/test_runnable.ambr
@@ -2674,7 +2674,7 @@
'description': '''
Allowance for errors made by LLM.
- Here we add an `error` key to surface errors made during generation
+ Here we add an ``error`` key to surface errors made during generation
(e.g., invalid JSON arguments.)
''',
'properties': dict({
@@ -2711,10 +2711,6 @@
]),
'title': 'Id',
}),
- 'index': dict({
- 'title': 'Index',
- 'type': 'integer',
- }),
'name': dict({
'anyOf': list([
dict({
@@ -2732,11 +2728,11 @@
}),
}),
'required': list([
+ 'type',
+ 'id',
'name',
'args',
- 'id',
'error',
- 'type',
]),
'title': 'InvalidToolCall',
'type': 'object',
@@ -2948,6 +2944,13 @@
This represents a request to call the tool named "foo" with arguments {"a": 1}
and an identifier of "123".
+
+ .. note::
+ ``create_tool_call`` may also be used as a factory to create a
+ ``ToolCall``. Benefits include:
+
+ * Automatic ID generation (when not provided)
+ * Required arguments strictly validated at creation time
''',
'properties': dict({
'args': dict({
@@ -2979,10 +2982,10 @@
}),
}),
'required': list([
+ 'type',
+ 'id',
'name',
'args',
- 'id',
- 'type',
]),
'title': 'ToolCall',
'type': 'object',
@@ -2991,9 +2994,9 @@
'description': '''
A chunk of a tool call (e.g., as part of a stream).
- When merging ToolCallChunks (e.g., via AIMessageChunk.__add__),
+ When merging ToolCallChunks (e.g., via ``AIMessageChunk.__add__``),
all string attributes are concatenated. Chunks are only merged if their
- values of `index` are equal and not None.
+ values of ``index`` are equal and not ``None``.
Example:
@@ -3058,9 +3061,9 @@
}),
}),
'required': list([
+ 'id',
'name',
'args',
- 'id',
'index',
]),
'title': 'ToolCallChunk',
@@ -4160,7 +4163,7 @@
'description': '''
Allowance for errors made by LLM.
- Here we add an `error` key to surface errors made during generation
+ Here we add an ``error`` key to surface errors made during generation
(e.g., invalid JSON arguments.)
''',
'properties': dict({
@@ -4197,10 +4200,6 @@
]),
'title': 'Id',
}),
- 'index': dict({
- 'title': 'Index',
- 'type': 'integer',
- }),
'name': dict({
'anyOf': list([
dict({
@@ -4218,11 +4217,11 @@
}),
}),
'required': list([
+ 'type',
+ 'id',
'name',
'args',
- 'id',
'error',
- 'type',
]),
'title': 'InvalidToolCall',
'type': 'object',
@@ -4453,6 +4452,13 @@
This represents a request to call the tool named "foo" with arguments {"a": 1}
and an identifier of "123".
+
+ .. note::
+ ``create_tool_call`` may also be used as a factory to create a
+ ``ToolCall``. Benefits include:
+
+ * Automatic ID generation (when not provided)
+ * Required arguments strictly validated at creation time
''',
'properties': dict({
'args': dict({
@@ -4484,10 +4490,10 @@
}),
}),
'required': list([
+ 'type',
+ 'id',
'name',
'args',
- 'id',
- 'type',
]),
'title': 'ToolCall',
'type': 'object',
@@ -4496,9 +4502,9 @@
'description': '''
A chunk of a tool call (e.g., as part of a stream).
- When merging ToolCallChunks (e.g., via AIMessageChunk.__add__),
+ When merging ToolCallChunks (e.g., via ``AIMessageChunk.__add__``),
all string attributes are concatenated. Chunks are only merged if their
- values of `index` are equal and not None.
+ values of ``index`` are equal and not ``None``.
Example:
@@ -4563,9 +4569,9 @@
}),
}),
'required': list([
+ 'id',
'name',
'args',
- 'id',
'index',
]),
'title': 'ToolCallChunk',
@@ -5677,7 +5683,7 @@
'description': '''
Allowance for errors made by LLM.
- Here we add an `error` key to surface errors made during generation
+ Here we add an ``error`` key to surface errors made during generation
(e.g., invalid JSON arguments.)
''',
'properties': dict({
@@ -5714,10 +5720,6 @@
]),
'title': 'Id',
}),
- 'index': dict({
- 'title': 'Index',
- 'type': 'integer',
- }),
'name': dict({
'anyOf': list([
dict({
@@ -5735,11 +5737,11 @@
}),
}),
'required': list([
+ 'type',
+ 'id',
'name',
'args',
- 'id',
'error',
- 'type',
]),
'title': 'InvalidToolCall',
'type': 'object',
@@ -5970,6 +5972,13 @@
This represents a request to call the tool named "foo" with arguments {"a": 1}
and an identifier of "123".
+
+ .. note::
+ ``create_tool_call`` may also be used as a factory to create a
+ ``ToolCall``. Benefits include:
+
+ * Automatic ID generation (when not provided)
+ * Required arguments strictly validated at creation time
''',
'properties': dict({
'args': dict({
@@ -6001,10 +6010,10 @@
}),
}),
'required': list([
+ 'type',
+ 'id',
'name',
'args',
- 'id',
- 'type',
]),
'title': 'ToolCall',
'type': 'object',
@@ -6013,9 +6022,9 @@
'description': '''
A chunk of a tool call (e.g., as part of a stream).
- When merging ToolCallChunks (e.g., via AIMessageChunk.__add__),
+ When merging ToolCallChunks (e.g., via ``AIMessageChunk.__add__``),
all string attributes are concatenated. Chunks are only merged if their
- values of `index` are equal and not None.
+ values of ``index`` are equal and not ``None``.
Example:
@@ -6080,9 +6089,9 @@
}),
}),
'required': list([
+ 'id',
'name',
'args',
- 'id',
'index',
]),
'title': 'ToolCallChunk',
@@ -7069,7 +7078,7 @@
'description': '''
Allowance for errors made by LLM.
- Here we add an `error` key to surface errors made during generation
+ Here we add an ``error`` key to surface errors made during generation
(e.g., invalid JSON arguments.)
''',
'properties': dict({
@@ -7106,10 +7115,6 @@
]),
'title': 'Id',
}),
- 'index': dict({
- 'title': 'Index',
- 'type': 'integer',
- }),
'name': dict({
'anyOf': list([
dict({
@@ -7127,11 +7132,11 @@
}),
}),
'required': list([
+ 'type',
+ 'id',
'name',
'args',
- 'id',
'error',
- 'type',
]),
'title': 'InvalidToolCall',
'type': 'object',
@@ -7343,6 +7348,13 @@
This represents a request to call the tool named "foo" with arguments {"a": 1}
and an identifier of "123".
+
+ .. note::
+ ``create_tool_call`` may also be used as a factory to create a
+ ``ToolCall``. Benefits include:
+
+ * Automatic ID generation (when not provided)
+ * Required arguments strictly validated at creation time
''',
'properties': dict({
'args': dict({
@@ -7374,10 +7386,10 @@
}),
}),
'required': list([
+ 'type',
+ 'id',
'name',
'args',
- 'id',
- 'type',
]),
'title': 'ToolCall',
'type': 'object',
@@ -7386,9 +7398,9 @@
'description': '''
A chunk of a tool call (e.g., as part of a stream).
- When merging ToolCallChunks (e.g., via AIMessageChunk.__add__),
+ When merging ToolCallChunks (e.g., via ``AIMessageChunk.__add__``),
all string attributes are concatenated. Chunks are only merged if their
- values of `index` are equal and not None.
+ values of ``index`` are equal and not ``None``.
Example:
@@ -7453,9 +7465,9 @@
}),
}),
'required': list([
+ 'id',
'name',
'args',
- 'id',
'index',
]),
'title': 'ToolCallChunk',
@@ -8597,7 +8609,7 @@
'description': '''
Allowance for errors made by LLM.
- Here we add an `error` key to surface errors made during generation
+ Here we add an ``error`` key to surface errors made during generation
(e.g., invalid JSON arguments.)
''',
'properties': dict({
@@ -8634,10 +8646,6 @@
]),
'title': 'Id',
}),
- 'index': dict({
- 'title': 'Index',
- 'type': 'integer',
- }),
'name': dict({
'anyOf': list([
dict({
@@ -8655,11 +8663,11 @@
}),
}),
'required': list([
+ 'type',
+ 'id',
'name',
'args',
- 'id',
'error',
- 'type',
]),
'title': 'InvalidToolCall',
'type': 'object',
@@ -8890,6 +8898,13 @@
This represents a request to call the tool named "foo" with arguments {"a": 1}
and an identifier of "123".
+
+ .. note::
+ ``create_tool_call`` may also be used as a factory to create a
+ ``ToolCall``. Benefits include:
+
+ * Automatic ID generation (when not provided)
+ * Required arguments strictly validated at creation time
''',
'properties': dict({
'args': dict({
@@ -8921,10 +8936,10 @@
}),
}),
'required': list([
+ 'type',
+ 'id',
'name',
'args',
- 'id',
- 'type',
]),
'title': 'ToolCall',
'type': 'object',
@@ -8933,9 +8948,9 @@
'description': '''
A chunk of a tool call (e.g., as part of a stream).
- When merging ToolCallChunks (e.g., via AIMessageChunk.__add__),
+ When merging ToolCallChunks (e.g., via ``AIMessageChunk.__add__``),
all string attributes are concatenated. Chunks are only merged if their
- values of `index` are equal and not None.
+ values of ``index`` are equal and not ``None``.
Example:
@@ -9000,9 +9015,9 @@
}),
}),
'required': list([
+ 'id',
'name',
'args',
- 'id',
'index',
]),
'title': 'ToolCallChunk',
@@ -9334,7 +9349,16 @@
]),
'definitions': dict({
'AudioContentBlock': dict({
- 'description': 'Content block for audio data.',
+ 'description': '''
+ Audio data.
+
+ .. note::
+ ``create_audio_block`` may also be used as a factory to create an
+ ``AudioContentBlock``. Benefits include:
+
+ * Automatic ID generation (when not provided)
+ * Required arguments strictly validated at creation time
+ ''',
'properties': dict({
'base64': dict({
'title': 'Base64',
@@ -9533,6 +9557,13 @@
``start/end`` indices refer to the **response text**,
not the source text. This means that the indices are relative to the model's
response, not the original document (as specified in the ``url``).
+
+ .. note::
+ ``create_citation`` may also be used as a factory to create a ``Citation``.
+ Benefits include:
+
+ * Automatic ID generation (when not provided)
+ * Required arguments strictly validated at creation time
''',
'properties': dict({
'cited_text': dict({
@@ -9571,7 +9602,7 @@
'type': 'object',
}),
'CodeInterpreterCall': dict({
- 'description': 'Content block for a built-in code interpreter tool call.',
+ 'description': 'Built-in code interpreter tool call.',
'properties': dict({
'code': dict({
'title': 'Code',
@@ -9602,7 +9633,7 @@
}),
'CodeInterpreterOutput': dict({
'description': '''
- Content block for the output of a singular code interpreter tool call.
+ Output of a singular code interpreter tool call.
Full output of a code interpreter tool call is represented by
``CodeInterpreterResult`` which is a list of these blocks.
@@ -9647,7 +9678,7 @@
'type': 'object',
}),
'CodeInterpreterResult': dict({
- 'description': 'Content block for the result of a code interpreter tool call.',
+ 'description': 'Result of a code interpreter tool call.',
'properties': dict({
'id': dict({
'title': 'Id',
@@ -9678,7 +9709,7 @@
}),
'FileContentBlock': dict({
'description': '''
- Content block for file data.
+ File data that doesn't fit into other multimodal blocks.
This block is intended for files that are not images, audio, or plaintext. For
example, it can be used for PDFs, Word documents, etc.
@@ -9686,6 +9717,13 @@
If the file is an image, audio, or plaintext, you should use the corresponding
content block type (e.g., ``ImageContentBlock``, ``AudioContentBlock``,
``PlainTextContentBlock``).
+
+ .. note::
+ ``create_file_block`` may also be used as a factory to create a
+ ``FileContentBlock``. Benefits include:
+
+ * Automatic ID generation (when not provided)
+ * Required arguments strictly validated at creation time
''',
'properties': dict({
'base64': dict({
@@ -9931,7 +9969,16 @@
'type': 'object',
}),
'ImageContentBlock': dict({
- 'description': 'Content block for image data.',
+ 'description': '''
+ Image data.
+
+ .. note::
+ ``create_image_block`` may also be used as a factory to create a
+ ``ImageContentBlock``. Benefits include:
+
+ * Automatic ID generation (when not provided)
+ * Required arguments strictly validated at creation time
+ ''',
'properties': dict({
'base64': dict({
'title': 'Base64',
@@ -10009,7 +10056,7 @@
'description': '''
Allowance for errors made by LLM.
- Here we add an `error` key to surface errors made during generation
+ Here we add an ``error`` key to surface errors made during generation
(e.g., invalid JSON arguments.)
''',
'properties': dict({
@@ -10046,10 +10093,6 @@
]),
'title': 'Id',
}),
- 'index': dict({
- 'title': 'Index',
- 'type': 'integer',
- }),
'name': dict({
'anyOf': list([
dict({
@@ -10067,11 +10110,11 @@
}),
}),
'required': list([
+ 'type',
+ 'id',
'name',
'args',
- 'id',
'error',
- 'type',
]),
'title': 'InvalidToolCall',
'type': 'object',
@@ -10101,7 +10144,7 @@
}),
'NonStandardContentBlock': dict({
'description': '''
- Content block provider-specific data.
+ Provider-specific data.
This block contains data for which there is not yet a standard type.
@@ -10109,6 +10152,13 @@
If a provider's non-standard output includes reasoning and tool calls, it should be
the adapter's job to parse that payload and emit the corresponding standard
ReasoningContentBlock and ToolCallContentBlocks.
+
+ .. note::
+ ``create_non_standard_block`` may also be used as a factory to create a
+ ``NonStandardContentBlock``. Benefits include:
+
+ * Automatic ID generation (when not provided)
+ * Required arguments strictly validated at creation time
''',
'properties': dict({
'id': dict({
@@ -10167,11 +10217,18 @@
}),
'PlainTextContentBlock': dict({
'description': '''
- Content block for plaintext data (e.g., from a document).
+ Plaintext data (e.g., from a document).
.. note::
Title and context are optional fields that may be passed to the model. See
Anthropic `example `__.
+
+ .. note::
+ ``create_plaintext_block`` may also be used as a factory to create a
+ ``PlainTextContentBlock``. Benefits include:
+
+ * Automatic ID generation (when not provided)
+ * Required arguments strictly validated at creation time
''',
'properties': dict({
'base64': dict({
@@ -10223,7 +10280,16 @@
'type': 'object',
}),
'ReasoningContentBlock': dict({
- 'description': 'Content block for reasoning output.',
+ 'description': '''
+ Reasoning output from a LLM.
+
+ .. note::
+ ``create_reasoning_block`` may also be used as a factory to create a
+ ``ReasoningContentBlock``. Benefits include:
+
+ * Automatic ID generation (when not provided)
+ * Required arguments strictly validated at creation time
+ ''',
'properties': dict({
'id': dict({
'title': 'Id',
@@ -10237,14 +10303,6 @@
'title': 'Reasoning',
'type': 'string',
}),
- 'signature': dict({
- 'title': 'Signature',
- 'type': 'string',
- }),
- 'thought_signature': dict({
- 'title': 'Thought Signature',
- 'type': 'string',
- }),
'type': dict({
'const': 'reasoning',
'title': 'Type',
@@ -10263,7 +10321,9 @@
Contains additional information returned by the provider, such as
response headers, service tiers, log probabilities, system fingerprints, etc.
- Extra keys are permitted from what is typed here.
+ Extra keys are permitted from what is typed here (via `total=False`), allowing
+ for provider-specific metadata to be included without breaking the type
+ definition.
''',
'properties': dict({
'model_name': dict({
@@ -10349,10 +10409,17 @@
}),
'TextContentBlock': dict({
'description': '''
- Content block for text output.
+ Text output from a LLM.
This typically represents the main text content of a message, such as the response
from a language model or the text of a user message.
+
+ .. note::
+ ``create_text_block`` may also be used as a factory to create a
+ ``TextContentBlock``. Benefits include:
+
+ * Automatic ID generation (when not provided)
+ * Required arguments strictly validated at creation time
''',
'properties': dict({
'annotations': dict({
@@ -10409,6 +10476,13 @@
This represents a request to call the tool named "foo" with arguments {"a": 1}
and an identifier of "123".
+
+ .. note::
+ ``create_tool_call`` may also be used as a factory to create a
+ ``ToolCall``. Benefits include:
+
+ * Automatic ID generation (when not provided)
+ * Required arguments strictly validated at creation time
''',
'properties': dict({
'args': dict({
@@ -10440,10 +10514,10 @@
}),
}),
'required': list([
+ 'type',
+ 'id',
'name',
'args',
- 'id',
- 'type',
]),
'title': 'ToolCall',
'type': 'object',
@@ -10452,9 +10526,9 @@
'description': '''
A chunk of a tool call (e.g., as part of a stream).
- When merging ToolCallChunks (e.g., via AIMessageChunk.__add__),
+ When merging ToolCallChunks (e.g., via ``AIMessageChunk.__add__``),
all string attributes are concatenated. Chunks are only merged if their
- values of `index` are equal and not None.
+ values of ``index`` are equal and not ``None``.
Example:
@@ -10519,9 +10593,9 @@
}),
}),
'required': list([
+ 'id',
'name',
'args',
- 'id',
'index',
]),
'title': 'ToolCallChunk',
@@ -10666,7 +10740,16 @@
'type': 'object',
}),
'VideoContentBlock': dict({
- 'description': 'Content block for video data.',
+ 'description': '''
+ Video data.
+
+ .. note::
+ ``create_video_block`` may also be used as a factory to create a
+ ``VideoContentBlock``. Benefits include:
+
+ * Automatic ID generation (when not provided)
+ * Required arguments strictly validated at creation time
+ ''',
'properties': dict({
'base64': dict({
'title': 'Base64',
@@ -10704,7 +10787,7 @@
'type': 'object',
}),
'WebSearchCall': dict({
- 'description': 'Content block for a built-in web search tool call.',
+ 'description': 'Built-in web search tool call.',
'properties': dict({
'id': dict({
'title': 'Id',
@@ -10730,7 +10813,7 @@
'type': 'object',
}),
'WebSearchResult': dict({
- 'description': 'Content block for the result of a built-in web search tool call.',
+ 'description': 'Result of a built-in web search tool call.',
'properties': dict({
'id': dict({
'title': 'Id',
@@ -12604,7 +12687,7 @@
'description': '''
Allowance for errors made by LLM.
- Here we add an `error` key to surface errors made during generation
+ Here we add an ``error`` key to surface errors made during generation
(e.g., invalid JSON arguments.)
''',
'properties': dict({
@@ -12641,10 +12724,6 @@
]),
'title': 'Id',
}),
- 'index': dict({
- 'title': 'Index',
- 'type': 'integer',
- }),
'name': dict({
'anyOf': list([
dict({
@@ -12662,11 +12741,11 @@
}),
}),
'required': list([
+ 'type',
+ 'id',
'name',
'args',
- 'id',
'error',
- 'type',
]),
'title': 'InvalidToolCall',
'type': 'object',
@@ -12908,6 +12987,13 @@
This represents a request to call the tool named "foo" with arguments {"a": 1}
and an identifier of "123".
+
+ .. note::
+ ``create_tool_call`` may also be used as a factory to create a
+ ``ToolCall``. Benefits include:
+
+ * Automatic ID generation (when not provided)
+ * Required arguments strictly validated at creation time
''',
'properties': dict({
'args': dict({
@@ -12939,10 +13025,10 @@
}),
}),
'required': list([
+ 'type',
+ 'id',
'name',
'args',
- 'id',
- 'type',
]),
'title': 'ToolCall',
'type': 'object',
@@ -12951,9 +13037,9 @@
'description': '''
A chunk of a tool call (e.g., as part of a stream).
- When merging ToolCallChunks (e.g., via AIMessageChunk.__add__),
+ When merging ToolCallChunks (e.g., via ``AIMessageChunk.__add__``),
all string attributes are concatenated. Chunks are only merged if their
- values of `index` are equal and not None.
+ values of ``index`` are equal and not ``None``.
Example:
@@ -13018,9 +13104,9 @@
}),
}),
'required': list([
+ 'id',
'name',
'args',
- 'id',
'index',
]),
'title': 'ToolCallChunk',
@@ -14082,7 +14168,7 @@
'description': '''
Allowance for errors made by LLM.
- Here we add an `error` key to surface errors made during generation
+ Here we add an ``error`` key to surface errors made during generation
(e.g., invalid JSON arguments.)
''',
'properties': dict({
@@ -14119,10 +14205,6 @@
]),
'title': 'Id',
}),
- 'index': dict({
- 'title': 'Index',
- 'type': 'integer',
- }),
'name': dict({
'anyOf': list([
dict({
@@ -14140,11 +14222,11 @@
}),
}),
'required': list([
+ 'type',
+ 'id',
'name',
'args',
- 'id',
'error',
- 'type',
]),
'title': 'InvalidToolCall',
'type': 'object',
@@ -14375,6 +14457,13 @@
This represents a request to call the tool named "foo" with arguments {"a": 1}
and an identifier of "123".
+
+ .. note::
+ ``create_tool_call`` may also be used as a factory to create a
+ ``ToolCall``. Benefits include:
+
+ * Automatic ID generation (when not provided)
+ * Required arguments strictly validated at creation time
''',
'properties': dict({
'args': dict({
@@ -14406,10 +14495,10 @@
}),
}),
'required': list([
+ 'type',
+ 'id',
'name',
'args',
- 'id',
- 'type',
]),
'title': 'ToolCall',
'type': 'object',
@@ -14418,9 +14507,9 @@
'description': '''
A chunk of a tool call (e.g., as part of a stream).
- When merging ToolCallChunks (e.g., via AIMessageChunk.__add__),
+ When merging ToolCallChunks (e.g., via ``AIMessageChunk.__add__``),
all string attributes are concatenated. Chunks are only merged if their
- values of `index` are equal and not None.
+ values of ``index`` are equal and not ``None``.
Example:
@@ -14485,9 +14574,9 @@
}),
}),
'required': list([
+ 'id',
'name',
'args',
- 'id',
'index',
]),
'title': 'ToolCallChunk',
diff --git a/libs/core/tests/unit_tests/runnables/test_graph.py b/libs/core/tests/unit_tests/runnables/test_graph.py
index 7944d0b1da4..fd9ff2f813e 100644
--- a/libs/core/tests/unit_tests/runnables/test_graph.py
+++ b/libs/core/tests/unit_tests/runnables/test_graph.py
@@ -14,9 +14,7 @@ from langchain_core.runnables import RunnableConfig
from langchain_core.runnables.base import Runnable
from langchain_core.runnables.graph import Edge, Graph, Node
from langchain_core.runnables.graph_mermaid import _escape_node_label
-from langchain_core.utils.pydantic import (
- PYDANTIC_VERSION,
-)
+from langchain_core.utils.pydantic import PYDANTIC_VERSION
from tests.unit_tests.pydantic_utils import _normalize_schema
diff --git a/libs/core/tests/unit_tests/runnables/test_runnable.py b/libs/core/tests/unit_tests/runnables/test_runnable.py
index 4b63fb50ae2..d9a9db349e8 100644
--- a/libs/core/tests/unit_tests/runnables/test_runnable.py
+++ b/libs/core/tests/unit_tests/runnables/test_runnable.py
@@ -6,13 +6,7 @@ import warnings
from collections.abc import AsyncIterator, Awaitable, Iterator, Sequence
from functools import partial
from operator import itemgetter
-from typing import (
- Any,
- Callable,
- Optional,
- Union,
- cast,
-)
+from typing import Any, Callable, Optional, Union, cast
from uuid import UUID
import pytest
@@ -37,11 +31,7 @@ from langchain_core.language_models import (
)
from langchain_core.load import dumpd, dumps
from langchain_core.load.load import loads
-from langchain_core.messages import (
- AIMessageChunk,
- HumanMessage,
- SystemMessage,
-)
+from langchain_core.messages import AIMessageChunk, HumanMessage, SystemMessage
from langchain_core.messages.base import BaseMessage
from langchain_core.output_parsers import (
BaseOutputParser,
@@ -90,9 +80,7 @@ from langchain_core.tracers import (
RunLogPatch,
)
from langchain_core.tracers.context import collect_runs
-from langchain_core.utils.pydantic import (
- PYDANTIC_VERSION,
-)
+from langchain_core.utils.pydantic import PYDANTIC_VERSION
from tests.unit_tests.pydantic_utils import _normalize_schema, _schema
from tests.unit_tests.stubs import AnyStr, _any_id_ai_message, _any_id_ai_message_chunk
@@ -243,7 +231,11 @@ def test_schemas(snapshot: SnapshotAssertion) -> None:
}
assert fake.get_config_jsonschema(include=["tags", "metadata", "run_name"]) == {
"properties": {
- "metadata": {"default": None, "title": "Metadata", "type": "object"},
+ "metadata": {
+ "default": None,
+ "title": "Metadata",
+ "type": "object",
+ },
"run_name": {"default": None, "title": "Run Name", "type": "string"},
"tags": {
"default": None,
diff --git a/libs/core/tests/unit_tests/test_messages.py b/libs/core/tests/unit_tests/test_messages.py
index 3e5773d4123..77ddf96c974 100644
--- a/libs/core/tests/unit_tests/test_messages.py
+++ b/libs/core/tests/unit_tests/test_messages.py
@@ -223,6 +223,7 @@ def test_message_chunks_v1() -> None:
create_tool_call_chunk(name=None, args='ue}"', id=None, index=0)
],
)
+ result = one + two + three
expected = AIMessageChunkV1(
[],
tool_call_chunks=[
@@ -230,11 +231,12 @@ def test_message_chunks_v1() -> None:
name="tool1", args='{"arg1": "value}"', id="1", index=0
)
],
+ id=result.id, # Use the same ID as the result
)
- result = one + two + three
assert result == expected
- assert result.to_message() == AIMessageV1(
+ converted_message = result.to_message()
+ assert converted_message == AIMessageV1(
content=[
{
"name": "tool1",
@@ -242,29 +244,31 @@ def test_message_chunks_v1() -> None:
"id": "1",
"type": "tool_call",
}
- ]
+ ],
+ id=converted_message.id, # Use the same ID as the converted message
)
- assert (
- AIMessageChunkV1(
- [],
- tool_call_chunks=[
- create_tool_call_chunk(name="tool1", args="", id="1", index=0)
- ],
- )
- + AIMessageChunkV1(
- [],
- tool_call_chunks=[
- create_tool_call_chunk(name="tool1", args="a", id=None, index=1)
- ],
- )
- # Don't merge if `index` field does not match.
- ) == AIMessageChunkV1(
+ chunk1 = AIMessageChunkV1(
+ [],
+ tool_call_chunks=[
+ create_tool_call_chunk(name="tool1", args="", id="1", index=0)
+ ],
+ )
+ chunk2 = AIMessageChunkV1(
+ [],
+ tool_call_chunks=[
+ create_tool_call_chunk(name="tool1", args="a", id=None, index=1)
+ ],
+ )
+ # Don't merge if `index` field does not match.
+ merge_result = chunk1 + chunk2
+ assert merge_result == AIMessageChunkV1(
[],
tool_call_chunks=[
create_tool_call_chunk(name="tool1", args="", id="1", index=0),
create_tool_call_chunk(name="tool1", args="a", id=None, index=1),
],
+ id=merge_result.id, # Use the same ID as the merge result
)
ai_msg_chunk = AIMessageChunkV1([])
@@ -274,8 +278,14 @@ def test_message_chunks_v1() -> None:
create_tool_call_chunk(name="tool1", args="a", id=None, index=1)
],
)
- assert ai_msg_chunk + tool_calls_msg_chunk == tool_calls_msg_chunk
- assert tool_calls_msg_chunk + ai_msg_chunk == tool_calls_msg_chunk
+ # These assertions test that adding empty chunks preserves the non-empty chunk
+ result1 = ai_msg_chunk + tool_calls_msg_chunk
+ assert result1.tool_call_chunks == tool_calls_msg_chunk.tool_call_chunks
+ assert result1.content == tool_calls_msg_chunk.content
+
+ result2 = tool_calls_msg_chunk + ai_msg_chunk
+ assert result2.tool_call_chunks == tool_calls_msg_chunk.tool_call_chunks
+ assert result2.content == tool_calls_msg_chunk.content
ai_msg_chunk = AIMessageChunkV1(
[],
@@ -294,15 +304,26 @@ def test_message_chunks_v1() -> None:
[],
usage_metadata={"input_tokens": 4, "output_tokens": 5, "total_tokens": 9},
)
- assert left + right == AIMessageChunkV1(
+ usage_result = left + right
+ expected_usage = AIMessageChunkV1(
content=[],
usage_metadata={"input_tokens": 5, "output_tokens": 7, "total_tokens": 12},
+ id=usage_result.id, # Use the same ID as the result
)
- assert AIMessageChunkV1(content=[]) + left == left
- assert right + AIMessageChunkV1(content=[]) == right
+ assert usage_result == expected_usage
+
+ # Test adding empty chunks preserves the original
+ left_result = AIMessageChunkV1(content=[]) + left
+ assert left_result.usage_metadata == left.usage_metadata
+ assert left_result.content == left.content
+
+ right_result = right + AIMessageChunkV1(content=[])
+ assert right_result.usage_metadata == right.usage_metadata
+ assert right_result.content == right.content
# Test ID order of precedence
- null_id = AIMessageChunkV1(content=[], id=None)
+ # Note: AIMessageChunkV1 always generates an ID if none provided
+ auto_id = AIMessageChunkV1(content=[]) # Gets auto-generated lc_* ID
default_id = AIMessageChunkV1(
content=[], id="run-abc123"
) # LangChain-assigned run ID
@@ -310,14 +331,21 @@ def test_message_chunks_v1() -> None:
content=[], id="msg_def456"
) # provider-assigned ID
- assert (null_id + default_id).id == "run-abc123"
- assert (default_id + null_id).id == "run-abc123"
+ # Provider-assigned IDs (non-run-* and non-lc_*) have highest precedence
+ # Provider-assigned IDs always win over LangChain-generated IDs
+ assert (auto_id + meaningful_id).id == "msg_def456" # provider-assigned wins
+ assert (meaningful_id + auto_id).id == "msg_def456" # provider-assigned wins
- assert (null_id + meaningful_id).id == "msg_def456"
- assert (meaningful_id + null_id).id == "msg_def456"
+ assert (
+ default_id + meaningful_id
+ ).id == "msg_def456" # meaningful_id is provider-assigned
+ assert (
+ meaningful_id + default_id
+ ).id == "msg_def456" # meaningful_id is provider-assigned
- assert (default_id + meaningful_id).id == "msg_def456"
- assert (meaningful_id + default_id).id == "msg_def456"
+ # Between auto-generated and run-* IDs, auto-generated wins (since lc_ != run-)
+ assert (auto_id + default_id).id == auto_id.id
+ assert (default_id + auto_id).id == auto_id.id
def test_chat_message_chunks() -> None:
@@ -332,7 +360,7 @@ def test_chat_message_chunks() -> None:
):
ChatMessageChunk(role="User", content="I am") + ChatMessageChunk(
role="Assistant", content=" indeed."
- )
+ ) # type: ignore[reportUnusedExpression, unused-ignore]
assert ChatMessageChunk(role="User", content="I am") + AIMessageChunk(
content=" indeed."
@@ -441,7 +469,7 @@ def test_function_message_chunks() -> None:
):
FunctionMessageChunk(name="hello", content="I am") + FunctionMessageChunk(
name="bye", content=" indeed."
- )
+ ) # type: ignore[reportUnusedExpression, unused-ignore]
def test_ai_message_chunks() -> None:
@@ -457,7 +485,7 @@ def test_ai_message_chunks() -> None:
):
AIMessageChunk(example=True, content="I am") + AIMessageChunk(
example=False, content=" indeed."
- )
+ ) # type: ignore[reportUnusedExpression, unused-ignore]
class TestGetBufferString(unittest.TestCase):
diff --git a/libs/core/uv.lock b/libs/core/uv.lock
index 7d20a625e3d..fa75e31b4f6 100644
--- a/libs/core/uv.lock
+++ b/libs/core/uv.lock
@@ -1040,7 +1040,7 @@ requires-dist = [
{ name = "jsonpatch", specifier = ">=1.33,<2.0" },
{ name = "langsmith", specifier = ">=0.3.45" },
{ name = "packaging", specifier = ">=23.2" },
- { name = "pydantic", specifier = ">=2.7.4" },
+ { name = "pydantic", specifier = ">=2.11.7" },
{ name = "pyyaml", specifier = ">=5.3" },
{ name = "tenacity", specifier = ">=8.1.0,!=8.4.0,<10.0.0" },
{ name = "typing-extensions", specifier = ">=4.7" },
diff --git a/uv.lock b/uv.lock
index bb8f22f98e7..00e1cafc046 100644
--- a/uv.lock
+++ b/uv.lock
@@ -181,7 +181,7 @@ wheels = [
[[package]]
name = "anthropic"
-version = "0.57.1"
+version = "0.60.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "anyio" },
@@ -192,9 +192,9 @@ dependencies = [
{ name = "sniffio" },
{ name = "typing-extensions" },
]
-sdist = { url = "https://files.pythonhosted.org/packages/d7/75/6261a1a8d92aed47e27d2fcfb3a411af73b1435e6ae1186da02b760565d0/anthropic-0.57.1.tar.gz", hash = "sha256:7815dd92245a70d21f65f356f33fc80c5072eada87fb49437767ea2918b2c4b0", size = 423775, upload-time = "2025-07-03T16:57:35.932Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/4e/03/3334921dc54ed822b3dd993ae72d823a7402588521bbba3e024b3333a1fd/anthropic-0.60.0.tar.gz", hash = "sha256:a22ba187c6f4fd5afecb2fc913b960feccf72bc0d25c1b7ce0345e87caede577", size = 425983, upload-time = "2025-07-28T19:53:47.685Z" }
wheels = [
- { url = "https://files.pythonhosted.org/packages/e5/cf/ca0ba77805aec6171629a8b665c7dc224dab374539c3d27005b5d8c100a0/anthropic-0.57.1-py3-none-any.whl", hash = "sha256:33afc1f395af207d07ff1bffc0a3d1caac53c371793792569c5d2f09283ea306", size = 292779, upload-time = "2025-07-03T16:57:34.636Z" },
+ { url = "https://files.pythonhosted.org/packages/da/bb/d84f287fb1c217b30c328af987cf8bbe3897edf0518dcc5fa39412f794ec/anthropic-0.60.0-py3-none-any.whl", hash = "sha256:65ad1f088a960217aaf82ba91ff743d6c89e9d811c6d64275b9a7c59ee9ac3c6", size = 293116, upload-time = "2025-07-28T19:53:45.944Z" },
]
[[package]]
@@ -2354,7 +2354,7 @@ typing = [
[[package]]
name = "langchain-anthropic"
-version = "0.3.17"
+version = "0.3.18"
source = { editable = "libs/partners/anthropic" }
dependencies = [
{ name = "anthropic" },
@@ -2364,7 +2364,7 @@ dependencies = [
[package.metadata]
requires-dist = [
- { name = "anthropic", specifier = ">=0.57.0,<1" },
+ { name = "anthropic", specifier = ">=0.60.0,<1" },
{ name = "langchain-core", editable = "libs/core" },
{ name = "pydantic", specifier = ">=2.7.4,<3.0.0" },
]