From 1c5d51965ea9c1b640dbd48be3a972f9ab6ce7c0 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Wed, 6 Aug 2025 17:41:33 -0400 Subject: [PATCH] docs --- .../langchain_core/messages/content_blocks.py | 121 ++++++++++-------- 1 file changed, 69 insertions(+), 52 deletions(-) diff --git a/libs/core/langchain_core/messages/content_blocks.py b/libs/core/langchain_core/messages/content_blocks.py index 57c72907ea9..727e2cd1085 100644 --- a/libs/core/langchain_core/messages/content_blocks.py +++ b/libs/core/langchain_core/messages/content_blocks.py @@ -8,51 +8,6 @@ This module provides a standardized data structure for representing inputs to an outputs from LLMs. The core abstraction is the **Content Block**, a ``TypedDict`` that can represent a piece of text, an image, a tool call, or other structured data. -Data **not yet mapped** to a standard block may be represented using the -``NonStandardContentBlock``, which allows for provider-specific data to be included -without losing the benefits of type checking and validation. - -Furthermore, provider-specific fields **within** a standard block are fully supported -by default. However, since current type checkers do not recognize this, we are temporarily -applying type ignore comments to suppress warnings. In the future, -`PEP 728 `__ will add an extra param, ``extra_items=Any``. -When this is supported, we will apply it to block signatures to signify to type checkers -that additional provider-specific fields are allowed. - -**Example with PEP 728 provider-specific fields:** - -.. code-block:: python - - # Note `extra_items=Any` - class TextContentBlock(TypedDict, extra_items=Any): - type: Literal["text"] - id: NotRequired[str] - text: str - annotations: NotRequired[list[Annotation]] - index: NotRequired[int] - -.. code-block:: python - - from langchain_core.messages.content_blocks import TextContentBlock - - my_block: TextContentBlock = { - # Add required fields - "type": "text", - "text": "Hello, world!", - # Additional fields not specified in the TypedDict - # These are valid with PEP 728 and are typed as Any - "openai_metadata": {"model": "gpt-4", "temperature": 0.7}, - "anthropic_usage": {"input_tokens": 10, "output_tokens": 20}, - "custom_field": "any value", - } - - openai_data = my_block["openai_metadata"] # Type: Any - -.. note:: - PEP 728 is enabled with ``# type: ignore[call-arg]`` comments to suppress warnings - from type checkers that don't yet support it. The functionality works correctly - in Python 3.13+ and will be fully supported as the ecosystem catches up. - **Rationale** Different LLM providers use distinct and incompatible API schemas. This module @@ -64,6 +19,62 @@ sequence. An adapter for a specific provider is responsible for translating this standard list of blocks into the format required by its API. +**Extensibility** + +Data **not yet mapped** to a standard block may be represented using the +``NonStandardContentBlock``, which allows for provider-specific data to be included +without losing the benefits of type checking and validation. + +Furthermore, provider-specific fields **within** a standard block are fully supported +by default in the ``extras`` field of each block. This allows for additional metadata +to be included without breaking the standard structure. + +Following widespread adoption of `PEP 728 `__, we will add +``extra_items=Any`` as a param to Content Blocks. This will signify to type checkers +that additional provider-specific fields are allowed outside of the ``extras`` field, +and that will become the new standard approach to adding provider-specific metadata. + +.. warning:: + Do not heavily rely on the ``extras`` field for provider-specific data! This field + is subject to deprecation in future releases as we move towards PEP 728. + +**Example with PEP 728 provider-specific fields:** + +.. code-block:: python + + # Content block definition + # NOTE: `extra_items=Any` + class TextContentBlock(TypedDict, extra_items=Any): + type: Literal["text"] + id: NotRequired[str] + text: str + annotations: NotRequired[list[Annotation]] + index: NotRequired[int] + +.. code-block:: python + + from langchain_core.messages.content_blocks import TextContentBlock + + # Create a text content block with provider-specific fields + my_block: TextContentBlock = { + # Add required fields + "type": "text", + "text": "Hello, world!", + # Additional fields not specified in the TypedDict + # These are valid with PEP 728 and are typed as Any + "openai_metadata": {"model": "gpt-4", "temperature": 0.7}, + "anthropic_usage": {"input_tokens": 10, "output_tokens": 20}, + "custom_field": "any value", + } + + # Mutating an existing block to add provider-specific fields + openai_data = my_block["openai_metadata"] # Type: Any + +.. note:: + PEP 728 is enabled with ``# type: ignore[call-arg]`` comments to suppress warnings + from type checkers that don't yet support it. The functionality works correctly + in Python 3.13+ and will be fully supported as the ecosystem catches up. + **Key Block Types** The module defines several types of content blocks, including: @@ -100,6 +111,12 @@ The module defines several types of content blocks, including: mime_type="image/png", ), ] + +Factory functions like ``create_text_block`` and ``create_image_block`` are provided +and offer benefits such as: +- Automatic ID generation (when not provided) +- No need to manually specify the ``type`` field + """ # noqa: E501 import warnings @@ -237,7 +254,7 @@ class TextContentBlock(TypedDict): """Block text.""" annotations: NotRequired[list[Annotation]] - """Citations and other annotations.""" + """``Citation``s and other annotations.""" index: NotRequired[int] """Index of block in aggregate response. Used during streaming.""" @@ -900,29 +917,29 @@ def is_data_content_block(block: dict) -> bool: def is_tool_call_block(block: ContentBlock) -> TypeGuard[ToolCall]: - """Type guard to check if a content block is a tool call.""" + """Type guard to check if a content block is a ``ToolCall``.""" return block.get("type") == "tool_call" def is_tool_call_chunk(block: ContentBlock) -> TypeGuard[ToolCallChunk]: - """Type guard to check if a content block is a tool call chunk.""" + """Type guard to check if a content block is a ``ToolCallChunk``.""" return block.get("type") == "tool_call_chunk" def is_text_block(block: ContentBlock) -> TypeGuard[TextContentBlock]: - """Type guard to check if a content block is a text block.""" + """Type guard to check if a content block is a ``TextContentBlock``.""" return block.get("type") == "text" def is_reasoning_block(block: ContentBlock) -> TypeGuard[ReasoningContentBlock]: - """Type guard to check if a content block is a reasoning block.""" + """Type guard to check if a content block is a ``ReasoningContentBlock``.""" return block.get("type") == "reasoning" def is_invalid_tool_call_block( block: ContentBlock, ) -> TypeGuard[InvalidToolCall]: - """Type guard to check if a content block is an invalid tool call.""" + """Type guard to check if a content block is an ``InvalidToolCall``.""" return block.get("type") == "invalid_tool_call" @@ -1009,7 +1026,7 @@ def create_text_block( Args: text: The text content of the block. id: Content block identifier. Generated automatically if not provided. - annotations: Citations and other annotations for the text. + annotations: ``Citation``s and other annotations for the text. index: Index of block in aggregate response. Used during streaming. Returns: