diff --git a/libs/core/langchain_core/language_models/base.py b/libs/core/langchain_core/language_models/base.py index 1fef01a6859..8df8aeda0b8 100644 --- a/libs/core/langchain_core/language_models/base.py +++ b/libs/core/langchain_core/language_models/base.py @@ -2,8 +2,7 @@ from __future__ import annotations -import warnings -from abc import ABC, abstractmethod +from abc import ABC from collections.abc import Mapping, Sequence from functools import cache from typing import ( @@ -26,7 +25,6 @@ from langchain_core.messages import ( AnyMessage, BaseMessage, MessageLikeRepresentation, - get_buffer_string, ) from langchain_core.messages.v1 import AIMessage as AIMessageV1 from langchain_core.prompt_values import PromptValue @@ -166,7 +164,6 @@ class BaseLanguageModel( list[AnyMessage], ] - @abstractmethod def generate_prompt( self, prompts: list[PromptValue], @@ -201,7 +198,6 @@ class BaseLanguageModel( prompt and additional model provider-specific output. """ - @abstractmethod async def agenerate_prompt( self, prompts: list[PromptValue], @@ -245,7 +241,6 @@ class BaseLanguageModel( raise NotImplementedError @deprecated("0.1.7", alternative="invoke", removal="1.0") - @abstractmethod def predict( self, text: str, *, stop: Optional[Sequence[str]] = None, **kwargs: Any ) -> str: @@ -266,7 +261,6 @@ class BaseLanguageModel( """ @deprecated("0.1.7", alternative="invoke", removal="1.0") - @abstractmethod def predict_messages( self, messages: list[BaseMessage], @@ -291,7 +285,6 @@ class BaseLanguageModel( """ @deprecated("0.1.7", alternative="ainvoke", removal="1.0") - @abstractmethod async def apredict( self, text: str, *, stop: Optional[Sequence[str]] = None, **kwargs: Any ) -> str: @@ -312,7 +305,6 @@ class BaseLanguageModel( """ @deprecated("0.1.7", alternative="ainvoke", removal="1.0") - @abstractmethod async def apredict_messages( self, messages: list[BaseMessage], @@ -368,33 +360,6 @@ class BaseLanguageModel( """ return len(self.get_token_ids(text)) - def get_num_tokens_from_messages( - self, - messages: list[BaseMessage], - tools: Optional[Sequence] = None, - ) -> int: - """Get the number of tokens in the messages. - - Useful for checking if an input fits in a model's context window. - - **Note**: the base implementation of get_num_tokens_from_messages ignores - tool schemas. - - Args: - messages: The message inputs to tokenize. - tools: If provided, sequence of dict, BaseModel, function, or BaseTools - to be converted to tool schemas. - - Returns: - The sum of the number of tokens across the messages. - """ - if tools is not None: - warnings.warn( - "Counting tokens in tool schemas is not yet supported. Ignoring tools.", - stacklevel=2, - ) - return sum(self.get_num_tokens(get_buffer_string([m])) for m in messages) - @classmethod def _all_required_field_names(cls) -> set: """DEPRECATED: Kept for backwards compatibility. diff --git a/libs/core/langchain_core/language_models/chat_models.py b/libs/core/langchain_core/language_models/chat_models.py index e96003d26fe..0c631e8011a 100644 --- a/libs/core/langchain_core/language_models/chat_models.py +++ b/libs/core/langchain_core/language_models/chat_models.py @@ -55,12 +55,11 @@ from langchain_core.messages import ( HumanMessage, convert_to_messages, convert_to_openai_image_block, + get_buffer_string, is_data_content_block, message_chunk_to_message, ) -from langchain_core.messages import content_blocks as types from langchain_core.messages.ai import _LC_ID_PREFIX -from langchain_core.messages.v1 import AIMessage as AIMessageV1 from langchain_core.outputs import ( ChatGeneration, ChatGenerationChunk, @@ -222,23 +221,6 @@ def _format_ls_structured_output(ls_structured_output_format: Optional[dict]) -> return ls_structured_output_format_dict -def _convert_to_v1(message: AIMessage) -> AIMessageV1: - """Best-effort conversion of a V0 AIMessage to V1.""" - if isinstance(message.content, str): - content: list[types.ContentBlock] = [] - if message.content: - content = [{"type": "text", "text": message.content}] - - for tool_call in message.tool_calls: - content.append(tool_call) - - return AIMessageV1( - content=content, - usage_metadata=message.usage_metadata, - response_metadata=message.response_metadata, - ) - - class BaseChatModel(BaseLanguageModel[BaseMessage], ABC): """Base class for chat models. @@ -1370,6 +1352,33 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC): starter_dict["_type"] = self._llm_type return starter_dict + def get_num_tokens_from_messages( + self, + messages: list[BaseMessage], + tools: Optional[Sequence] = None, + ) -> int: + """Get the number of tokens in the messages. + + Useful for checking if an input fits in a model's context window. + + **Note**: the base implementation of get_num_tokens_from_messages ignores + tool schemas. + + Args: + messages: The message inputs to tokenize. + tools: If provided, sequence of dict, BaseModel, function, or BaseTools + to be converted to tool schemas. + + Returns: + The sum of the number of tokens across the messages. + """ + if tools is not None: + warnings.warn( + "Counting tokens in tool schemas is not yet supported. Ignoring tools.", + stacklevel=2, + ) + return sum(self.get_num_tokens(get_buffer_string([m])) for m in messages) + def bind_tools( self, tools: Sequence[ diff --git a/libs/core/langchain_core/language_models/v1/chat_models.py b/libs/core/langchain_core/language_models/v1/chat_models.py index 437fb876496..bc39787c682 100644 --- a/libs/core/langchain_core/language_models/v1/chat_models.py +++ b/libs/core/langchain_core/language_models/v1/chat_models.py @@ -4,6 +4,7 @@ from __future__ import annotations import copy import typing +import warnings from abc import ABC, abstractmethod from collections.abc import AsyncIterator, Iterator, Sequence from operator import itemgetter @@ -38,11 +39,14 @@ from langchain_core.language_models.base import ( ) from langchain_core.messages import ( AIMessage, - BaseMessage, convert_to_openai_image_block, + get_buffer_string, is_data_content_block, ) -from langchain_core.messages.utils import convert_to_messages_v1 +from langchain_core.messages.utils import ( + _convert_from_v1_message, + convert_to_messages_v1, +) from langchain_core.messages.v1 import AIMessage as AIMessageV1 from langchain_core.messages.v1 import AIMessageChunk as AIMessageChunkV1 from langchain_core.messages.v1 import HumanMessage as HumanMessageV1 @@ -735,7 +739,7 @@ class BaseChatModelV1(BaseLanguageModel[AIMessageV1], ABC): *, tool_choice: Optional[Union[str]] = None, **kwargs: Any, - ) -> Runnable[LanguageModelInput, BaseMessage]: + ) -> Runnable[LanguageModelInput, AIMessageV1]: """Bind tools to the model. Args: @@ -899,6 +903,34 @@ class BaseChatModelV1(BaseLanguageModel[AIMessageV1], ABC): return RunnableMap(raw=llm) | parser_with_fallback return llm | output_parser + def get_num_tokens_from_messages( + self, + messages: list[MessageV1], + tools: Optional[Sequence] = None, + ) -> int: + """Get the number of tokens in the messages. + + Useful for checking if an input fits in a model's context window. + + **Note**: the base implementation of get_num_tokens_from_messages ignores + tool schemas. + + Args: + messages: The message inputs to tokenize. + tools: If provided, sequence of dict, BaseModel, function, or BaseTools + to be converted to tool schemas. + + Returns: + The sum of the number of tokens across the messages. + """ + messages_v0 = [_convert_from_v1_message(message) for message in messages] + if tools is not None: + warnings.warn( + "Counting tokens in tool schemas is not yet supported. Ignoring tools.", + stacklevel=2, + ) + return sum(self.get_num_tokens(get_buffer_string([m])) for m in messages_v0) + def _gen_info_and_msg_metadata( generation: Union[ChatGeneration, ChatGenerationChunk], diff --git a/libs/core/langchain_core/messages/content_blocks.py b/libs/core/langchain_core/messages/content_blocks.py index a2efd0fef9c..e459915c75d 100644 --- a/libs/core/langchain_core/messages/content_blocks.py +++ b/libs/core/langchain_core/messages/content_blocks.py @@ -706,6 +706,7 @@ ToolContentBlock = Union[ ContentBlock = Union[ TextContentBlock, ToolCall, + InvalidToolCall, ReasoningContentBlock, NonStandardContentBlock, DataContentBlock, diff --git a/libs/core/langchain_core/messages/utils.py b/libs/core/langchain_core/messages/utils.py index ed9e8d39745..7df908d3142 100644 --- a/libs/core/langchain_core/messages/utils.py +++ b/libs/core/langchain_core/messages/utils.py @@ -384,38 +384,37 @@ def _convert_from_v1_message(message: MessageV1) -> BaseMessage: Returns: BaseMessage: Converted message instance. """ - # type ignores here are because AIMessageV1.content is a list of dicts. - # AIMessageV0.content expects str or list[str | dict]. + content = cast("Union[str, list[str | dict]]", message.content) if isinstance(message, AIMessageV1): return AIMessage( - content=message.content, # type: ignore[arg-type] + content=content, id=message.id, name=message.name, tool_calls=message.tool_calls, - response_metadata=message.response_metadata, + response_metadata=cast("dict", message.response_metadata), ) if isinstance(message, AIMessageChunkV1): return AIMessageChunk( - content=message.content, # type: ignore[arg-type] + content=content, id=message.id, name=message.name, tool_call_chunks=message.tool_call_chunks, - response_metadata=message.response_metadata, + response_metadata=cast("dict", message.response_metadata), ) if isinstance(message, HumanMessageV1): return HumanMessage( - content=message.content, # type: ignore[arg-type] + content=content, id=message.id, name=message.name, ) if isinstance(message, SystemMessageV1): return SystemMessage( - content=message.content, # type: ignore[arg-type] + content=content, id=message.id, ) if isinstance(message, ToolMessageV1): return ToolMessage( - content=message.content, # type: ignore[arg-type] + content=content, id=message.id, ) message = f"Unsupported message type: {type(message)}" @@ -501,7 +500,10 @@ def _convert_to_message_v1(message: MessageLikeRepresentation) -> MessageV1: ValueError: if the message dict does not contain the required keys. """ if isinstance(message, MessageV1Types): - message_ = message + if isinstance(message, AIMessageChunkV1): + message_ = message.to_message() + else: + message_ = message elif isinstance(message, str): message_ = _create_message_from_message_type_v1("human", message) elif isinstance(message, Sequence) and len(message) == 2: diff --git a/libs/core/langchain_core/messages/v1.py b/libs/core/langchain_core/messages/v1.py index 9ff2eaed4ab..4726becc1e6 100644 --- a/libs/core/langchain_core/messages/v1.py +++ b/libs/core/langchain_core/messages/v1.py @@ -5,6 +5,8 @@ import uuid from dataclasses import dataclass, field from typing import Any, Literal, Optional, TypedDict, Union, cast, get_args +from pydantic import BaseModel + import langchain_core.messages.content_blocks as types from langchain_core.messages.ai import _LC_ID_PREFIX, UsageMetadata, add_usage from langchain_core.messages.base import merge_content @@ -32,20 +34,20 @@ def _ensure_id(id_val: Optional[str]) -> str: return id_val or str(uuid.uuid4()) -class Provider(TypedDict): - """Information about the provider that generated the message. +class ResponseMetadata(TypedDict, total=False): + """Metadata about the response from the AI provider. - Contains metadata about the AI provider and model used to generate content. + Contains additional information returned by the provider, such as + response headers, service tiers, log probabilities, system fingerprints, etc. - Attributes: - name: Name and version of the provider that created the content block. - model_name: Name of the model that generated the content block. + Extra keys are permitted from what is typed here. """ - name: str - """Name and version of the provider that created the content block.""" + model_provider: str + """Name and version of the provider that created the message (e.g., openai).""" + model_name: str - """Name of the model that generated the content block.""" + """Name of the model that generated the message.""" @dataclass @@ -91,21 +93,29 @@ class AIMessage: usage_metadata: Optional[UsageMetadata] = None """If provided, usage metadata for a message, such as token counts.""" - response_metadata: dict = field(default_factory=dict) + response_metadata: ResponseMetadata = field( + default_factory=lambda: ResponseMetadata() + ) """Metadata about the response. This field should include non-standard data returned by the provider, such as response headers, service tiers, or log probabilities. """ + parsed: Optional[Union[dict[str, Any], BaseModel]] = None + """Auto-parsed message contents, if applicable.""" + def __init__( self, content: Union[str, list[types.ContentBlock]], id: Optional[str] = None, name: Optional[str] = None, lc_version: str = "v1", - response_metadata: Optional[dict] = None, + response_metadata: Optional[ResponseMetadata] = None, usage_metadata: Optional[UsageMetadata] = None, + tool_calls: Optional[list[types.ToolCall]] = None, + invalid_tool_calls: Optional[list[types.InvalidToolCall]] = None, + parsed: Optional[Union[dict[str, Any], BaseModel]] = None, ): """Initialize an AI message. @@ -116,6 +126,11 @@ class AIMessage: lc_version: Encoding version for the message. response_metadata: Optional metadata about the response. usage_metadata: Optional metadata about token usage. + tool_calls: Optional list of tool calls made by the AI. Tool calls should + generally be included in message content. If passed on init, they will + be added to the content list. + invalid_tool_calls: Optional list of tool calls that failed validation. + parsed: Optional auto-parsed message contents, if applicable. """ if isinstance(content, str): self.content = [{"type": "text", "text": content}] @@ -126,13 +141,27 @@ class AIMessage: self.name = name self.lc_version = lc_version self.usage_metadata = usage_metadata + self.parsed = parsed if response_metadata is None: self.response_metadata = {} else: self.response_metadata = response_metadata - self._tool_calls: list[types.ToolCall] = [] - self._invalid_tool_calls: list[types.InvalidToolCall] = [] + # Add tool calls to content if provided on init + if tool_calls: + content_tool_calls = { + block["id"] + for block in self.content + if block["type"] == "tool_call" and "id" in block + } + for tool_call in tool_calls: + if "id" in tool_call and tool_call["id"] in content_tool_calls: + continue + self.content.append(tool_call) + self._tool_calls = [ + block for block in self.content if block["type"] == "tool_call" + ] + self.invalid_tool_calls = invalid_tool_calls or [] @property def text(self) -> Optional[str]: @@ -150,7 +179,7 @@ class AIMessage: tool_calls = [block for block in self.content if block["type"] == "tool_call"] if tool_calls: self._tool_calls = tool_calls - return self._tool_calls + return [block for block in self.content if block["type"] == "tool_call"] @tool_calls.setter def tool_calls(self, value: list[types.ToolCall]) -> None: @@ -202,13 +231,16 @@ class AIMessageChunk: These data represent incremental usage statistics, as opposed to a running total. """ - response_metadata: dict = field(init=False) + response_metadata: ResponseMetadata = field(init=False) """Metadata about the response chunk. This field should include non-standard data returned by the provider, such as response headers, service tiers, or log probabilities. """ + parsed: Optional[Union[dict[str, Any], BaseModel]] = None + """Auto-parsed message contents, if applicable.""" + tool_call_chunks: list[types.ToolCallChunk] = field(init=False) def __init__( @@ -217,9 +249,10 @@ class AIMessageChunk: id: Optional[str] = None, name: Optional[str] = None, lc_version: str = "v1", - response_metadata: Optional[dict] = None, + response_metadata: Optional[ResponseMetadata] = None, usage_metadata: Optional[UsageMetadata] = None, tool_call_chunks: Optional[list[types.ToolCallChunk]] = None, + parsed: Optional[Union[dict[str, Any], BaseModel]] = None, ): """Initialize an AI message. @@ -231,6 +264,7 @@ class AIMessageChunk: response_metadata: Optional metadata about the response. usage_metadata: Optional metadata about token usage. tool_call_chunks: Optional list of partial tool call data. + parsed: Optional auto-parsed message contents, if applicable. """ if isinstance(content, str): self.content = [{"type": "text", "text": content, "index": 0}] @@ -241,6 +275,7 @@ class AIMessageChunk: self.name = name self.lc_version = lc_version self.usage_metadata = usage_metadata + self.parsed = parsed if response_metadata is None: self.response_metadata = {} else: @@ -251,7 +286,7 @@ class AIMessageChunk: self.tool_call_chunks = tool_call_chunks self._tool_calls: list[types.ToolCall] = [] - self._invalid_tool_calls: list[types.InvalidToolCall] = [] + self.invalid_tool_calls: list[types.InvalidToolCall] = [] self._init_tool_calls() def _init_tool_calls(self) -> None: @@ -264,7 +299,7 @@ class AIMessageChunk: ValueError: If the tool call chunks are malformed. """ self._tool_calls = [] - self._invalid_tool_calls = [] + self.invalid_tool_calls = [] if not self.tool_call_chunks: if self._tool_calls: self.tool_call_chunks = [ @@ -276,14 +311,14 @@ class AIMessageChunk: ) for tc in self._tool_calls ] - if self._invalid_tool_calls: + if self.invalid_tool_calls: tool_call_chunks = self.tool_call_chunks tool_call_chunks.extend( [ create_tool_call_chunk( name=tc["name"], args=tc["args"], id=tc["id"], index=None ) - for tc in self._invalid_tool_calls + for tc in self.invalid_tool_calls ] ) self.tool_call_chunks = tool_call_chunks @@ -294,9 +329,9 @@ class AIMessageChunk: def add_chunk_to_invalid_tool_calls(chunk: ToolCallChunk) -> None: invalid_tool_calls.append( create_invalid_tool_call( - name=chunk["name"], - args=chunk["args"], - id=chunk["id"], + name=chunk.get("name", ""), + args=chunk.get("args", ""), + id=chunk.get("id", ""), error=None, ) ) @@ -307,9 +342,9 @@ class AIMessageChunk: if isinstance(args_, dict): tool_calls.append( create_tool_call( - name=chunk["name"] or "", + name=chunk.get("name", ""), args=args_, - id=chunk["id"], + id=chunk.get("id", ""), ) ) else: @@ -317,7 +352,7 @@ class AIMessageChunk: except Exception: add_chunk_to_invalid_tool_calls(chunk) self._tool_calls = tool_calls - self._invalid_tool_calls = invalid_tool_calls + self.invalid_tool_calls = invalid_tool_calls @property def text(self) -> Optional[str]: @@ -361,6 +396,20 @@ class AIMessageChunk: error_msg = "Can only add AIMessageChunk or sequence of AIMessageChunk." raise NotImplementedError(error_msg) + def to_message(self) -> "AIMessage": + """Convert this AIMessageChunk to an AIMessage.""" + return AIMessage( + content=self.content, + id=self.id, + name=self.name, + lc_version=self.lc_version, + response_metadata=self.response_metadata, + usage_metadata=self.usage_metadata, + tool_calls=self.tool_calls, + invalid_tool_calls=self.invalid_tool_calls, + parsed=self.parsed, + ) + def add_ai_message_chunks( left: AIMessageChunk, *others: AIMessageChunk @@ -373,7 +422,8 @@ def add_ai_message_chunks( *(cast("list[str | dict[Any, Any]]", o.content) for o in others), ) response_metadata = merge_dicts( - left.response_metadata, *(o.response_metadata for o in others) + cast("dict", left.response_metadata), + *(cast("dict", o.response_metadata) for o in others), ) # Merge tool call chunks @@ -400,6 +450,15 @@ def add_ai_message_chunks( else: usage_metadata = None + # Parsed + # 'parsed' always represents an aggregation not an incremental value, so the last + # non-null value is kept. + parsed = None + for m in reversed([left, *others]): + if m.parsed is not None: + parsed = m.parsed + break + chunk_id = None candidates = [left.id] + [o.id for o in others] # first pass: pick the first non-run-* id @@ -417,8 +476,9 @@ def add_ai_message_chunks( return left.__class__( content=cast("list[types.ContentBlock]", content), tool_call_chunks=tool_call_chunks, - response_metadata=response_metadata, + response_metadata=cast("ResponseMetadata", response_metadata), usage_metadata=usage_metadata, + parsed=parsed, id=chunk_id, ) @@ -455,19 +515,25 @@ class HumanMessage: """ def __init__( - self, content: Union[str, list[types.ContentBlock]], id: Optional[str] = None + self, + content: Union[str, list[types.ContentBlock]], + *, + id: Optional[str] = None, + name: Optional[str] = None, ): """Initialize a human message. Args: content: Message content as string or list of content blocks. id: Optional unique identifier for the message. + name: Optional human-readable name for the message. """ self.id = _ensure_id(id) if isinstance(content, str): self.content = [{"type": "text", "text": content}] else: self.content = content + self.name = name def text(self) -> str: """Extract all text content from the message. @@ -497,20 +563,47 @@ class SystemMessage: content: list[types.ContentBlock] type: Literal["system"] = "system" + name: Optional[str] = None + """An optional name for the message. + + This can be used to provide a human-readable name for the message. + + Usage of this field is optional, and whether it's used or not is up to the + model implementation. + """ + + custom_role: Optional[str] = None + """If provided, a custom role for the system message. + + Example: ``"developer"``. + + Integration packages may use this field to assign the system message role if it + contains a recognized value. + """ + def __init__( - self, content: Union[str, list[types.ContentBlock]], *, id: Optional[str] = None + self, + content: Union[str, list[types.ContentBlock]], + *, + id: Optional[str] = None, + custom_role: Optional[str] = None, + name: Optional[str] = None, ): - """Initialize a system message. + """Initialize a human message. Args: - content: System instructions as string or list of content blocks. + content: Message content as string or list of content blocks. id: Optional unique identifier for the message. + custom_role: If provided, a custom role for the system message. + name: Optional human-readable name for the message. """ self.id = _ensure_id(id) if isinstance(content, str): self.content = [{"type": "text", "text": content}] else: self.content = content + self.custom_role = custom_role + self.name = name def text(self) -> str: """Extract all text content from the system message.""" @@ -537,11 +630,51 @@ class ToolMessage: id: str tool_call_id: str - content: list[dict[str, Any]] + content: list[types.ContentBlock] artifact: Optional[Any] = None # App-side payload not for the model + + name: Optional[str] = None + """An optional name for the message. + + This can be used to provide a human-readable name for the message. + + Usage of this field is optional, and whether it's used or not is up to the + model implementation. + """ + status: Literal["success", "error"] = "success" type: Literal["tool"] = "tool" + def __init__( + self, + content: Union[str, list[types.ContentBlock]], + tool_call_id: str, + *, + id: Optional[str] = None, + name: Optional[str] = None, + artifact: Optional[Any] = None, + status: Literal["success", "error"] = "success", + ): + """Initialize a human message. + + Args: + content: Message content as string or list of content blocks. + tool_call_id: ID of the tool call this message responds to. + id: Optional unique identifier for the message. + name: Optional human-readable name for the message. + artifact: Optional app-side payload not intended for the model. + status: Execution status ("success" or "error"). + """ + self.id = _ensure_id(id) + self.tool_call_id = tool_call_id + if isinstance(content, str): + self.content = [{"type": "text", "text": content}] + else: + self.content = content + self.name = name + self.artifact = artifact + self.status = status + @property def text(self) -> str: """Extract all text content from the tool message.""" diff --git a/libs/core/langchain_core/output_parsers/openai_tools.py b/libs/core/langchain_core/output_parsers/openai_tools.py index 63495bc2d84..e01f919606e 100644 --- a/libs/core/langchain_core/output_parsers/openai_tools.py +++ b/libs/core/langchain_core/output_parsers/openai_tools.py @@ -9,7 +9,7 @@ from typing import Annotated, Any, Optional from pydantic import SkipValidation, ValidationError from langchain_core.exceptions import OutputParserException -from langchain_core.messages import AIMessage, InvalidToolCall +from langchain_core.messages import AIMessage, InvalidToolCall, ToolCall from langchain_core.messages.tool import invalid_tool_call from langchain_core.messages.tool import tool_call as create_tool_call from langchain_core.output_parsers.transform import BaseCumulativeTransformOutputParser @@ -26,7 +26,7 @@ def parse_tool_call( partial: bool = False, strict: bool = False, return_id: bool = True, -) -> Optional[dict[str, Any]]: +) -> Optional[ToolCall]: """Parse a single tool call. Args: diff --git a/libs/core/langchain_core/prompt_values.py b/libs/core/langchain_core/prompt_values.py index 7652bd76e3c..68007a7c8ce 100644 --- a/libs/core/langchain_core/prompt_values.py +++ b/libs/core/langchain_core/prompt_values.py @@ -8,17 +8,65 @@ from __future__ import annotations from abc import ABC, abstractmethod from collections.abc import Sequence -from typing import Literal, cast +from typing import Literal, Union, cast -from typing_extensions import TypedDict +from typing_extensions import TypedDict, overload from langchain_core.load.serializable import Serializable from langchain_core.messages import ( + AIMessage, AnyMessage, BaseMessage, HumanMessage, + SystemMessage, + ToolMessage, get_buffer_string, ) +from langchain_core.messages import content_blocks as types +from langchain_core.messages.v1 import AIMessage as AIMessageV1 +from langchain_core.messages.v1 import HumanMessage as HumanMessageV1 +from langchain_core.messages.v1 import MessageV1, ResponseMetadata +from langchain_core.messages.v1 import SystemMessage as SystemMessageV1 +from langchain_core.messages.v1 import ToolMessage as ToolMessageV1 + + +def _convert_to_v1(message: BaseMessage) -> MessageV1: + """Best-effort conversion of a V0 AIMessage to V1.""" + if isinstance(message.content, str): + content: list[types.ContentBlock] = [] + if message.content: + content = [{"type": "text", "text": message.content}] + else: + content = [] + for block in message.content: + if isinstance(block, str): + content.append({"type": "text", "text": block}) + elif isinstance(block, dict): + content.append(cast("types.ContentBlock", block)) + else: + pass + + if isinstance(message, HumanMessage): + return HumanMessageV1(content=content) + if isinstance(message, AIMessage): + for tool_call in message.tool_calls: + content.append(tool_call) + return AIMessageV1( + content=content, + usage_metadata=message.usage_metadata, + response_metadata=cast("ResponseMetadata", message.response_metadata), + tool_calls=message.tool_calls, + ) + if isinstance(message, SystemMessage): + return SystemMessageV1(content=content) + if isinstance(message, ToolMessage): + return ToolMessageV1( + tool_call_id=message.tool_call_id, + content=content, + artifact=message.artifact, + ) + error_message = f"Unsupported message type: {type(message)}" + raise TypeError(error_message) class PromptValue(Serializable, ABC): @@ -46,8 +94,18 @@ class PromptValue(Serializable, ABC): def to_string(self) -> str: """Return prompt value as string.""" + @overload + def to_messages( + self, output_version: Literal["v0"] = "v0" + ) -> list[BaseMessage]: ... + + @overload + def to_messages(self, output_version: Literal["v1"]) -> list[MessageV1]: ... + @abstractmethod - def to_messages(self) -> list[BaseMessage]: + def to_messages( + self, output_version: Literal["v0", "v1"] = "v0" + ) -> Union[Sequence[BaseMessage], Sequence[MessageV1]]: """Return prompt as a list of Messages.""" @@ -71,8 +129,20 @@ class StringPromptValue(PromptValue): """Return prompt as string.""" return self.text - def to_messages(self) -> list[BaseMessage]: + @overload + def to_messages( + self, output_version: Literal["v0"] = "v0" + ) -> list[BaseMessage]: ... + + @overload + def to_messages(self, output_version: Literal["v1"]) -> list[MessageV1]: ... + + def to_messages( + self, output_version: Literal["v0", "v1"] = "v0" + ) -> Union[Sequence[BaseMessage], Sequence[MessageV1]]: """Return prompt as messages.""" + if output_version == "v1": + return [HumanMessageV1(content=self.text)] return [HumanMessage(content=self.text)] @@ -89,8 +159,24 @@ class ChatPromptValue(PromptValue): """Return prompt as string.""" return get_buffer_string(self.messages) - def to_messages(self) -> list[BaseMessage]: - """Return prompt as a list of messages.""" + @overload + def to_messages( + self, output_version: Literal["v0"] = "v0" + ) -> list[BaseMessage]: ... + + @overload + def to_messages(self, output_version: Literal["v1"]) -> list[MessageV1]: ... + + def to_messages( + self, output_version: Literal["v0", "v1"] = "v0" + ) -> Union[Sequence[BaseMessage], Sequence[MessageV1]]: + """Return prompt as a list of messages. + + Args: + output_version: The output version, either "v0" (default) or "v1". + """ + if output_version == "v1": + return [_convert_to_v1(m) for m in self.messages] return list(self.messages) @classmethod @@ -125,8 +211,26 @@ class ImagePromptValue(PromptValue): """Return prompt (image URL) as string.""" return self.image_url["url"] - def to_messages(self) -> list[BaseMessage]: + @overload + def to_messages( + self, output_version: Literal["v0"] = "v0" + ) -> list[BaseMessage]: ... + + @overload + def to_messages(self, output_version: Literal["v1"]) -> list[MessageV1]: ... + + def to_messages( + self, output_version: Literal["v0", "v1"] = "v0" + ) -> Union[Sequence[BaseMessage], Sequence[MessageV1]]: """Return prompt (image URL) as messages.""" + if output_version == "v1": + block: types.ImageContentBlock = { + "type": "image", + "url": self.image_url["url"], + } + if "detail" in self.image_url: + block["detail"] = self.image_url["detail"] + return [HumanMessageV1(content=[block])] return [HumanMessage(content=[cast("dict", self.image_url)])] diff --git a/libs/core/pyproject.toml b/libs/core/pyproject.toml index 66a4c5bc3b4..e54a785bf5d 100644 --- a/libs/core/pyproject.toml +++ b/libs/core/pyproject.toml @@ -67,6 +67,7 @@ langchain-text-splitters = { path = "../text-splitters" } strict = "True" strict_bytes = "True" enable_error_code = "deprecated" +disable_error_code = ["typeddict-unknown-key"] # TODO: activate for 'strict' checking disallow_any_generics = "False" diff --git a/libs/core/tests/unit_tests/test_messages.py b/libs/core/tests/unit_tests/test_messages.py index 75bbb804c2e..3e5773d4123 100644 --- a/libs/core/tests/unit_tests/test_messages.py +++ b/libs/core/tests/unit_tests/test_messages.py @@ -34,6 +34,7 @@ from langchain_core.messages.content_blocks import KNOWN_BLOCK_TYPES from langchain_core.messages.tool import invalid_tool_call as create_invalid_tool_call from langchain_core.messages.tool import tool_call as create_tool_call from langchain_core.messages.tool import tool_call_chunk as create_tool_call_chunk +from langchain_core.messages.v1 import AIMessage as AIMessageV1 from langchain_core.messages.v1 import AIMessageChunk as AIMessageChunkV1 from langchain_core.utils._merge import merge_lists @@ -197,7 +198,7 @@ def test_message_chunks() -> None: assert (meaningful_id + default_id).id == "msg_def456" -def test_message_chunks_v2() -> None: +def test_message_chunks_v1() -> None: left = AIMessageChunkV1("foo ", id="abc") right = AIMessageChunkV1("bar") expected = AIMessageChunkV1("foo bar", id="abc") @@ -230,7 +231,19 @@ def test_message_chunks_v2() -> None: ) ], ) - assert one + two + three == expected + result = one + two + three + assert result == expected + + assert result.to_message() == AIMessageV1( + content=[ + { + "name": "tool1", + "args": {"arg1": "value}"}, + "id": "1", + "type": "tool_call", + } + ] + ) assert ( AIMessageChunkV1( @@ -1326,6 +1339,7 @@ def test_known_block_types() -> None: "text", "text-plain", "tool_call", + "invalid_tool_call", "reasoning", "non_standard", "image", diff --git a/libs/partners/openai/langchain_openai/__init__.py b/libs/partners/openai/langchain_openai/__init__.py index a1756f0526d..e8bcdb920ac 100644 --- a/libs/partners/openai/langchain_openai/__init__.py +++ b/libs/partners/openai/langchain_openai/__init__.py @@ -1,10 +1,11 @@ -from langchain_openai.chat_models import AzureChatOpenAI, ChatOpenAI +from langchain_openai.chat_models import AzureChatOpenAI, ChatOpenAI, ChatOpenAIV1 from langchain_openai.embeddings import AzureOpenAIEmbeddings, OpenAIEmbeddings from langchain_openai.llms import AzureOpenAI, OpenAI __all__ = [ "OpenAI", "ChatOpenAI", + "ChatOpenAIV1", "OpenAIEmbeddings", "AzureOpenAI", "AzureChatOpenAI", diff --git a/libs/partners/openai/langchain_openai/chat_models/__init__.py b/libs/partners/openai/langchain_openai/chat_models/__init__.py index 574128d2704..8e2d4b53de7 100644 --- a/libs/partners/openai/langchain_openai/chat_models/__init__.py +++ b/libs/partners/openai/langchain_openai/chat_models/__init__.py @@ -1,4 +1,5 @@ from langchain_openai.chat_models.azure import AzureChatOpenAI from langchain_openai.chat_models.base import ChatOpenAI +from langchain_openai.chat_models.base_v1 import ChatOpenAI as ChatOpenAIV1 -__all__ = ["ChatOpenAI", "AzureChatOpenAI"] +__all__ = ["ChatOpenAI", "AzureChatOpenAI", "ChatOpenAIV1"] diff --git a/libs/partners/openai/langchain_openai/chat_models/_compat.py b/libs/partners/openai/langchain_openai/chat_models/_compat.py index 67bd333293f..ec0935e3686 100644 --- a/libs/partners/openai/langchain_openai/chat_models/_compat.py +++ b/libs/partners/openai/langchain_openai/chat_models/_compat.py @@ -66,11 +66,14 @@ For backwards compatibility, this module provides functions to convert between t formats. The functions are used internally by ChatOpenAI. """ # noqa: E501 +import copy import json from collections.abc import Iterable, Iterator -from typing import Any, Literal, Union, cast +from typing import Any, Literal, Optional, Union, cast from langchain_core.messages import AIMessage, AIMessageChunk, is_data_content_block +from langchain_core.messages import content_blocks as types +from langchain_core.messages.v1 import AIMessage as AIMessageV1 _FUNCTION_CALL_IDS_MAP_KEY = "__openai_function_call_ids__" @@ -289,25 +292,21 @@ def _convert_to_v1_from_chat_completions_chunk(chunk: AIMessageChunk) -> AIMessa return cast(AIMessageChunk, result) -def _convert_from_v1_to_chat_completions(message: AIMessage) -> AIMessage: +def _convert_from_v1_to_chat_completions(message: AIMessageV1) -> AIMessageV1: """Convert a v1 message to the Chat Completions format.""" - if isinstance(message.content, list): - new_content: list = [] - for block in message.content: - if isinstance(block, dict): - block_type = block.get("type") - if block_type == "text": - # Strip annotations - new_content.append({"type": "text", "text": block["text"]}) - elif block_type in ("reasoning", "tool_call"): - pass - else: - new_content.append(block) - else: - new_content.append(block) - return message.model_copy(update={"content": new_content}) + new_content: list[types.ContentBlock] = [] + for block in message.content: + if block["type"] == "text": + # Strip annotations + new_content.append({"type": "text", "text": block["text"]}) + elif block["type"] in ("reasoning", "tool_call"): + pass + else: + new_content.append(block) + new_message = copy.copy(message) + new_message.content = new_content - return message + return new_message # v1 / Responses @@ -319,17 +318,18 @@ def _convert_annotation_to_v1(annotation: dict[str, Any]) -> dict[str, Any]: for field in ("end_index", "start_index", "title"): if field in annotation: url_citation[field] = annotation[field] - url_citation["type"] = "url_citation" + url_citation["type"] = "citation" url_citation["url"] = annotation["url"] return url_citation elif annotation_type == "file_citation": - document_citation = {"type": "document_citation"} + document_citation = {"type": "citation"} if "filename" in annotation: document_citation["title"] = annotation["filename"] - for field in ("file_id", "index"): # OpenAI-specific - if field in annotation: - document_citation[field] = annotation[field] + if "file_id" in annotation: + document_citation["file_id"] = annotation["file_id"] + if "index" in annotation: + document_citation["file_index"] = annotation["index"] return document_citation # TODO: standardise container_file_citation? @@ -367,13 +367,15 @@ def _explode_reasoning(block: dict[str, Any]) -> Iterable[dict[str, Any]]: yield new_block -def _convert_to_v1_from_responses(message: AIMessage) -> AIMessage: +def _convert_to_v1_from_responses( + content: list[dict[str, Any]], + tool_calls: Optional[list[types.ToolCall]] = None, + invalid_tool_calls: Optional[list[types.InvalidToolCall]] = None, +) -> list[types.ContentBlock]: """Mutate a Responses message to v1 format.""" - if not isinstance(message.content, list): - return message def _iter_blocks() -> Iterable[dict[str, Any]]: - for block in message.content: + for block in content: if not isinstance(block, dict): continue block_type = block.get("type") @@ -409,13 +411,24 @@ def _convert_to_v1_from_responses(message: AIMessage) -> AIMessage: yield new_block elif block_type == "function_call": - new_block = {"type": "tool_call", "id": block.get("call_id", "")} - if "id" in block: - new_block["item_id"] = block["id"] - for extra_key in ("arguments", "name", "index"): - if extra_key in block: - new_block[extra_key] = block[extra_key] - yield new_block + new_block = None + call_id = block.get("call_id", "") + if call_id: + for tool_call in tool_calls or []: + if tool_call.get("id") == call_id: + new_block = tool_call.copy() + break + else: + for invalid_tool_call in invalid_tool_calls or []: + if invalid_tool_call.get("id") == call_id: + new_block = invalid_tool_call.copy() + break + if new_block: + if "id" in block: + new_block["item_id"] = block["id"] + if "index" in block: + new_block["index"] = block["index"] + yield new_block elif block_type == "web_search_call": web_search_call = {"type": "web_search_call", "id": block["id"]} @@ -485,28 +498,26 @@ def _convert_to_v1_from_responses(message: AIMessage) -> AIMessage: new_block["index"] = new_block["value"].pop("index") yield new_block - # Replace the list with the fully converted one - message.content = list(_iter_blocks()) - - return message + return list(_iter_blocks()) -def _convert_annotation_from_v1(annotation: dict[str, Any]) -> dict[str, Any]: - annotation_type = annotation.get("type") +def _convert_annotation_from_v1(annotation: types.Annotation) -> dict[str, Any]: + if annotation["type"] == "citation": + if "url" in annotation: + return {**annotation, "type": "url_citation"} - if annotation_type == "document_citation": new_ann: dict[str, Any] = {"type": "file_citation"} if "title" in annotation: new_ann["filename"] = annotation["title"] - - for fld in ("file_id", "index"): - if fld in annotation: - new_ann[fld] = annotation[fld] + if "file_id" in annotation: + new_ann["file_id"] = annotation["file_id"] + if "file_index" in annotation: + new_ann["index"] = annotation["file_index"] return new_ann - elif annotation_type == "non_standard_annotation": + elif annotation["type"] == "non_standard_annotation": return annotation["value"] else: @@ -528,7 +539,10 @@ def _implode_reasoning_blocks(blocks: list[dict[str, Any]]) -> Iterable[dict[str elif "reasoning" not in block and "summary" not in block: # {"type": "reasoning", "id": "rs_..."} oai_format = {**block, "summary": []} + # Update key order oai_format["type"] = oai_format.pop("type", "reasoning") + if "encrypted_content" in oai_format: + oai_format["encrypted_content"] = oai_format.pop("encrypted_content") yield oai_format i += 1 continue @@ -594,13 +608,11 @@ def _consolidate_calls( # If this really is the matching “result” – collapse if nxt.get("type") == result_name and nxt.get("id") == current.get("id"): if call_name == "web_search_call": - collapsed = { - "id": current["id"], - "status": current["status"], - "type": "web_search_call", - } + collapsed = {"id": current["id"]} if "action" in current: collapsed["action"] = current["action"] + collapsed["status"] = current["status"] + collapsed["type"] = "web_search_call" if call_name == "code_interpreter_call": collapsed = {"id": current["id"]} @@ -621,51 +633,50 @@ def _consolidate_calls( yield nxt -def _convert_from_v1_to_responses(message: AIMessage) -> AIMessage: - if not isinstance(message.content, list): - return message - +def _convert_from_v1_to_responses( + content: list[types.ContentBlock], tool_calls: list[types.ToolCall] +) -> list[dict[str, Any]]: new_content: list = [] - for block in message.content: - if isinstance(block, dict): - block_type = block.get("type") - if block_type == "text" and "annotations" in block: - # Need a copy because we’re changing the annotations list - new_block = dict(block) - new_block["annotations"] = [ - _convert_annotation_from_v1(a) for a in block["annotations"] + for block in content: + if block["type"] == "text" and "annotations" in block: + # Need a copy because we’re changing the annotations list + new_block = dict(block) + new_block["annotations"] = [ + _convert_annotation_from_v1(a) for a in block["annotations"] + ] + new_content.append(new_block) + elif block["type"] == "tool_call": + new_block = {"type": "function_call", "call_id": block["id"]} + if "item_id" in block: + new_block["id"] = block["item_id"] # type: ignore[typeddict-item] + if "name" in block and "arguments" in block: + new_block["name"] = block["name"] + new_block["arguments"] = block["arguments"] # type: ignore[typeddict-item] + else: + matching_tool_calls = [ + call for call in tool_calls if call["id"] == block["id"] ] - new_content.append(new_block) - elif block_type == "tool_call": - new_block = {"type": "function_call", "call_id": block["id"]} - if "item_id" in block: - new_block["id"] = block["item_id"] - if "name" in block and "arguments" in block: - new_block["name"] = block["name"] - new_block["arguments"] = block["arguments"] - else: - tool_call = next( - call for call in message.tool_calls if call["id"] == block["id"] - ) + if matching_tool_calls: + tool_call = matching_tool_calls[0] if "name" not in block: new_block["name"] = tool_call["name"] if "arguments" not in block: new_block["arguments"] = json.dumps(tool_call["args"]) - new_content.append(new_block) - elif ( - is_data_content_block(block) - and block["type"] == "image" - and "base64" in block - ): - new_block = {"type": "image_generation_call", "result": block["base64"]} - for extra_key in ("id", "status"): - if extra_key in block: - new_block[extra_key] = block[extra_key] - new_content.append(new_block) - elif block_type == "non_standard" and "value" in block: - new_content.append(block["value"]) - else: - new_content.append(block) + new_content.append(new_block) + elif ( + is_data_content_block(cast(dict, block)) + and block["type"] == "image" + and "base64" in block + and isinstance(block.get("id"), str) + and block["id"].startswith("ig_") + ): + new_block = {"type": "image_generation_call", "result": block["base64"]} + for extra_key in ("id", "status"): + if extra_key in block: + new_block[extra_key] = block[extra_key] # type: ignore[typeddict-item] + new_content.append(new_block) + elif block["type"] == "non_standard" and "value" in block: + new_content.append(block["value"]) else: new_content.append(block) @@ -679,4 +690,4 @@ def _convert_from_v1_to_responses(message: AIMessage) -> AIMessage: ) ) - return message.model_copy(update={"content": new_content}) + return new_content diff --git a/libs/partners/openai/langchain_openai/chat_models/base_v1.py b/libs/partners/openai/langchain_openai/chat_models/base_v1.py new file mode 100644 index 00000000000..89e9679aef3 --- /dev/null +++ b/libs/partners/openai/langchain_openai/chat_models/base_v1.py @@ -0,0 +1,3813 @@ +"""OpenAI chat wrapper.""" + +from __future__ import annotations + +import base64 +import json +import logging +import os +import re +import ssl +import sys +import warnings +from collections.abc import AsyncIterator, Iterator, Mapping, Sequence +from functools import partial +from io import BytesIO +from json import JSONDecodeError +from math import ceil +from operator import itemgetter +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Literal, + Optional, + TypedDict, + TypeVar, + Union, + cast, +) +from urllib.parse import urlparse + +import certifi +import openai +import tiktoken +from langchain_core.callbacks import ( + AsyncCallbackManagerForLLMRun, + CallbackManagerForLLMRun, +) +from langchain_core.language_models import LanguageModelInput +from langchain_core.language_models.chat_models import LangSmithParams +from langchain_core.language_models.v1.chat_models import ( + BaseChatModelV1, + agenerate_from_stream, + generate_from_stream, +) +from langchain_core.messages import ( + InvalidToolCall, + ToolCall, + convert_to_openai_data_block, + is_data_content_block, +) +from langchain_core.messages.ai import ( + InputTokenDetails, + OutputTokenDetails, + UsageMetadata, +) +from langchain_core.messages.tool import tool_call_chunk +from langchain_core.messages.v1 import AIMessage as AIMessageV1 +from langchain_core.messages.v1 import AIMessageChunk as AIMessageChunkV1 +from langchain_core.messages.v1 import HumanMessage as HumanMessageV1 +from langchain_core.messages.v1 import MessageV1, ResponseMetadata +from langchain_core.messages.v1 import SystemMessage as SystemMessageV1 +from langchain_core.messages.v1 import ToolMessage as ToolMessageV1 +from langchain_core.output_parsers import JsonOutputParser, PydanticOutputParser +from langchain_core.output_parsers.openai_tools import ( + JsonOutputKeyToolsParser, + PydanticToolsParser, + make_invalid_tool_call, + parse_tool_call, +) +from langchain_core.runnables import ( + Runnable, + RunnableLambda, + RunnableMap, + RunnablePassthrough, +) +from langchain_core.runnables.config import run_in_executor +from langchain_core.tools import BaseTool +from langchain_core.tools.base import _stringify +from langchain_core.utils import get_pydantic_field_names +from langchain_core.utils.function_calling import ( + convert_to_openai_function, + convert_to_openai_tool, +) +from langchain_core.utils.pydantic import ( + PydanticBaseModel, + TypeBaseModel, + is_basemodel_subclass, +) +from langchain_core.utils.utils import _build_model_kwargs, from_env, secret_from_env +from pydantic import BaseModel, ConfigDict, Field, SecretStr, model_validator +from pydantic.v1 import BaseModel as BaseModelV1 +from typing_extensions import Self + +from langchain_openai.chat_models._client_utils import ( + _get_default_async_httpx_client, + _get_default_httpx_client, +) +from langchain_openai.chat_models._compat import ( + _convert_from_v1_to_chat_completions, + _convert_from_v1_to_responses, + _convert_to_v1_from_responses, +) + +if TYPE_CHECKING: + from langchain_core.messages import content_blocks as types + from openai.types.responses import Response + +logger = logging.getLogger(__name__) + +# This SSL context is equivelent to the default `verify=True`. +# https://www.python-httpx.org/advanced/ssl/#configuring-client-instances +global_ssl_context = ssl.create_default_context(cafile=certifi.where()) + +WellKnownTools = ( + "file_search", + "web_search_preview", + "computer_use_preview", + "code_interpreter", + "mcp", + "image_generation", +) + + +def _convert_dict_to_message(_dict: Mapping[str, Any]) -> MessageV1: + """Convert a dictionary to a LangChain message. + + Args: + _dict: The dictionary. + + Returns: + The LangChain message. + """ + role = _dict.get("role") + name = _dict.get("name") + id_ = _dict.get("id") + if role == "user": + return HumanMessageV1(content=_dict.get("content", ""), id=id_, name=name) + elif role == "assistant": + # Fix for azure + # Also OpenAI returns None for tool invocations + content: list[types.ContentBlock] = [ + {"type": "text", "text": _dict.get("content", "") or ""} + ] + tool_calls = [] + invalid_tool_calls = [] + if raw_tool_calls := _dict.get("tool_calls"): + for raw_tool_call in raw_tool_calls: + try: + tool_call = parse_tool_call(raw_tool_call, return_id=True) + if tool_call: + tool_calls.append(tool_call) + except Exception as e: + invalid_tool_calls.append( + make_invalid_tool_call(raw_tool_call, str(e)) + ) + content.extend(tool_calls) + if audio := _dict.get("audio"): + # TODO: populate standard fields + content.append( + cast(types.AudioContentBlock, {"type": "audio", "audio": audio}) + ) + return AIMessageV1( + content=content, + name=name, + id=id_, + tool_calls=cast(list[ToolCall], tool_calls), + invalid_tool_calls=cast(list[InvalidToolCall], invalid_tool_calls), + ) + elif role in ("system", "developer"): + return SystemMessageV1( + content=_dict.get("content", ""), + name=name, + id=id_, + custom_role=role if role == "developer" else None, + ) + elif role == "tool": + return ToolMessageV1( + content=_dict.get("content", ""), + tool_call_id=cast(str, _dict.get("tool_call_id")), + name=name, + id=id_, + ) + else: + error_message = f"Unexpected role {role} in message." + raise ValueError(error_message) + + +def _format_message_content(content: Any, responses_api: bool = False) -> Any: + """Format message content.""" + if content and isinstance(content, list): + formatted_content = [] + for block in content: + # Remove unexpected block types + if ( + isinstance(block, dict) + and "type" in block + and block["type"] in ("tool_use", "thinking", "reasoning_content") + ): + continue + elif ( + isinstance(block, dict) + and is_data_content_block(block) + and not responses_api + ): + formatted_content.append(convert_to_openai_data_block(block)) + # Anthropic image blocks + elif ( + isinstance(block, dict) + and block.get("type") == "image" + and (source := block.get("source")) + and isinstance(source, dict) + ): + if source.get("type") == "base64" and ( + (media_type := source.get("media_type")) + and (data := source.get("data")) + ): + formatted_content.append( + { + "type": "image_url", + "image_url": {"url": f"data:{media_type};base64,{data}"}, + } + ) + elif source.get("type") == "url" and (url := source.get("url")): + formatted_content.append( + {"type": "image_url", "image_url": {"url": url}} + ) + else: + continue + else: + formatted_content.append(block) + else: + formatted_content = content + + return formatted_content + + +def _convert_message_to_dict(message: MessageV1, responses_api: bool = False) -> dict: + """Convert a LangChain message to a dictionary. + + Args: + message: The LangChain message. + + Returns: + The dictionary. + """ + message_dict: dict[str, Any] = { + "content": _format_message_content(message.content, responses_api=responses_api) + } + if name := message.name: + message_dict["name"] = name + + # populate role and additional message data + if isinstance(message, HumanMessageV1): + message_dict["role"] = "user" + elif isinstance(message, AIMessageV1): + message_dict["role"] = "assistant" + if message.tool_calls or message.invalid_tool_calls: + message_dict["tool_calls"] = [ + _lc_tool_call_to_openai_tool_call(tc) for tc in message.tool_calls + ] + [ + _lc_invalid_tool_call_to_openai_tool_call(tc) + for tc in message.invalid_tool_calls + ] + else: + pass + # If tool calls present, content null value should be None not empty string. + if "tool_calls" in message_dict: + message_dict["content"] = message_dict["content"] or None + + audio: Optional[dict[str, Any]] = None + for block in message.content: + if ( + block.get("type") == "audio" + and (id_ := block.get("id")) + and not responses_api + ): + # openai doesn't support passing the data back - only the id + # https://platform.openai.com/docs/guides/audio/multi-turn-conversations + audio = {"id": id_} + if audio: + message_dict["audio"] = audio + elif isinstance(message, SystemMessageV1): + if message.custom_role == "developer": + message_dict["role"] = "developer" + else: + message_dict["role"] = "system" + elif isinstance(message, ToolMessageV1): + message_dict["role"] = "tool" + message_dict["tool_call_id"] = message.tool_call_id + + supported_props = {"content", "role", "tool_call_id"} + message_dict = {k: v for k, v in message_dict.items() if k in supported_props} + else: + raise TypeError(f"Got unknown type {message}") + return message_dict + + +def _convert_delta_to_message_chunk(_dict: Mapping[str, Any]) -> AIMessageChunkV1: + id_ = _dict.get("id") + content = cast(str, _dict.get("content") or "") + tool_call_chunks = [] + if raw_tool_calls := _dict.get("tool_calls"): + try: + tool_call_chunks = [ + tool_call_chunk( + name=rtc["function"].get("name"), + args=rtc["function"].get("arguments"), + id=rtc.get("id"), + index=rtc["index"], + ) + for rtc in raw_tool_calls + ] + except KeyError: + pass + + return AIMessageChunkV1(content=content, id=id_, tool_call_chunks=tool_call_chunks) + + +def _update_token_usage( + overall_token_usage: Union[int, dict], new_usage: Union[int, dict] +) -> Union[int, dict]: + # Token usage is either ints or dictionaries + # `reasoning_tokens` is nested inside `completion_tokens_details` + if isinstance(new_usage, int): + if not isinstance(overall_token_usage, int): + raise ValueError( + f"Got different types for token usage: " + f"{type(new_usage)} and {type(overall_token_usage)}" + ) + return new_usage + overall_token_usage + elif isinstance(new_usage, dict): + if not isinstance(overall_token_usage, dict): + raise ValueError( + f"Got different types for token usage: " + f"{type(new_usage)} and {type(overall_token_usage)}" + ) + return { + k: _update_token_usage(overall_token_usage.get(k, 0), v) + for k, v in new_usage.items() + } + else: + warnings.warn(f"Unexpected type for token usage: {type(new_usage)}") + return new_usage + + +def _handle_openai_bad_request(e: openai.BadRequestError) -> None: + if ( + "'response_format' of type 'json_schema' is not supported with this model" + ) in e.message: + message = ( + "This model does not support OpenAI's structured output feature, which " + "is the default method for `with_structured_output` as of " + "langchain-openai==0.3. To use `with_structured_output` with this model, " + 'specify `method="function_calling"`.' + ) + warnings.warn(message) + raise e + elif "Invalid schema for response_format" in e.message: + message = ( + "Invalid schema for OpenAI's structured output feature, which is the " + "default method for `with_structured_output` as of langchain-openai==0.3. " + 'Specify `method="function_calling"` instead or update your schema. ' + "See supported schemas: " + "https://platform.openai.com/docs/guides/structured-outputs#supported-schemas" # noqa: E501 + ) + warnings.warn(message) + raise e + else: + raise + + +class _FunctionCall(TypedDict): + name: str + + +_BM = TypeVar("_BM", bound=BaseModel) +_DictOrPydanticClass = Union[dict[str, Any], type[_BM], type] +_DictOrPydantic = Union[dict, _BM] + + +class _AllReturnType(TypedDict): + raw: AIMessageV1 + parsed: Optional[_DictOrPydantic] + parsing_error: Optional[BaseException] + + +class BaseChatOpenAI(BaseChatModelV1): + client: Any = Field(default=None, exclude=True) #: :meta private: + async_client: Any = Field(default=None, exclude=True) #: :meta private: + root_client: Any = Field(default=None, exclude=True) #: :meta private: + root_async_client: Any = Field(default=None, exclude=True) #: :meta private: + model_name: str = Field(default="gpt-3.5-turbo", alias="model") + """Model name to use.""" + temperature: Optional[float] = None + """What sampling temperature to use.""" + model_kwargs: dict[str, Any] = Field(default_factory=dict) + """Holds any model parameters valid for `create` call not explicitly specified.""" + openai_api_key: Optional[SecretStr] = Field( + alias="api_key", default_factory=secret_from_env("OPENAI_API_KEY", default=None) + ) + openai_api_base: Optional[str] = Field(default=None, alias="base_url") + """Base URL path for API requests, leave blank if not using a proxy or service + emulator.""" + openai_organization: Optional[str] = Field(default=None, alias="organization") + """Automatically inferred from env var `OPENAI_ORG_ID` if not provided.""" + # to support explicit proxy for OpenAI + openai_proxy: Optional[str] = Field( + default_factory=from_env("OPENAI_PROXY", default=None) + ) + request_timeout: Union[float, tuple[float, float], Any, None] = Field( + default=None, alias="timeout" + ) + """Timeout for requests to OpenAI completion API. Can be float, httpx.Timeout or + None.""" + stream_usage: bool = False + """Whether to include usage metadata in streaming output. If True, an additional + message chunk will be generated during the stream including usage metadata. + + .. versionadded:: 0.3.9 + """ + max_retries: Optional[int] = None + """Maximum number of retries to make when generating.""" + presence_penalty: Optional[float] = None + """Penalizes repeated tokens.""" + frequency_penalty: Optional[float] = None + """Penalizes repeated tokens according to frequency.""" + seed: Optional[int] = None + """Seed for generation""" + logprobs: Optional[bool] = None + """Whether to return logprobs.""" + top_logprobs: Optional[int] = None + """Number of most likely tokens to return at each token position, each with + an associated log probability. `logprobs` must be set to true + if this parameter is used.""" + logit_bias: Optional[dict[int, int]] = None + """Modify the likelihood of specified tokens appearing in the completion.""" + streaming: bool = False + """Whether to stream the results or not.""" + n: Optional[int] = None + """Number of chat completions to generate for each prompt.""" + top_p: Optional[float] = None + """Total probability mass of tokens to consider at each step.""" + max_tokens: Optional[int] = Field(default=None) + """Maximum number of tokens to generate.""" + reasoning_effort: Optional[str] = None + """Constrains effort on reasoning for reasoning models. For use with the Chat + Completions API. + + Reasoning models only, like OpenAI o1, o3, and o4-mini. + + Currently supported values are low, medium, and high. Reducing reasoning effort + can result in faster responses and fewer tokens used on reasoning in a response. + + .. versionadded:: 0.2.14 + """ + reasoning: Optional[dict[str, Any]] = None + """Reasoning parameters for reasoning models, i.e., OpenAI o-series models (o1, o3, + o4-mini, etc.). For use with the Responses API. + + Example: + + .. code-block:: python + + reasoning={ + "effort": "medium", # can be "low", "medium", or "high" + "summary": "auto", # can be "auto", "concise", or "detailed" + } + + .. versionadded:: 0.3.24 + """ + tiktoken_model_name: Optional[str] = None + """The model name to pass to tiktoken when using this class. + Tiktoken is used to count the number of tokens in documents to constrain + them to be under a certain limit. By default, when set to None, this will + be the same as the embedding model name. However, there are some cases + where you may want to use this Embedding class with a model name not + supported by tiktoken. This can include when using Azure embeddings or + when using one of the many model providers that expose an OpenAI-like + API but with different models. In those cases, in order to avoid erroring + when tiktoken is called, you can specify a model name to use here.""" + default_headers: Union[Mapping[str, str], None] = None + default_query: Union[Mapping[str, object], None] = None + # Configure a custom httpx client. See the + # [httpx documentation](https://www.python-httpx.org/api/#client) for more details. + http_client: Union[Any, None] = Field(default=None, exclude=True) + """Optional ``httpx.Client``. Only used for sync invocations. Must specify + ``http_async_client`` as well if you'd like a custom client for async + invocations. + """ + http_async_client: Union[Any, None] = Field(default=None, exclude=True) + """Optional httpx.AsyncClient. Only used for async invocations. Must specify + ``http_client`` as well if you'd like a custom client for sync invocations.""" + stop: Optional[Union[list[str], str]] = Field(default=None, alias="stop_sequences") + """Default stop sequences.""" + extra_body: Optional[Mapping[str, Any]] = None + """Optional additional JSON properties to include in the request parameters when + making requests to OpenAI compatible APIs, such as vLLM.""" + include_response_headers: bool = False + """Whether to include response headers in the output message response_metadata.""" + disabled_params: Optional[dict[str, Any]] = Field(default=None) + """Parameters of the OpenAI client or chat.completions endpoint that should be + disabled for the given model. + + Should be specified as ``{"param": None | ['val1', 'val2']}`` where the key is the + parameter and the value is either None, meaning that parameter should never be + used, or it's a list of disabled values for the parameter. + + For example, older models may not support the 'parallel_tool_calls' parameter at + all, in which case ``disabled_params={"parallel_tool_calls": None}`` can be passed + in. + + If a parameter is disabled then it will not be used by default in any methods, e.g. + in :meth:`~langchain_openai.chat_models.base.ChatOpenAI.with_structured_output`. + However this does not prevent a user from directly passed in the parameter during + invocation. + """ + + include: Optional[list[str]] = None + """Additional fields to include in generations from Responses API. + + Supported values: + + - ``"file_search_call.results"`` + - ``"message.input_image.image_url"`` + - ``"computer_call_output.output.image_url"`` + - ``"reasoning.encrypted_content"`` + - ``"code_interpreter_call.outputs"`` + + .. versionadded:: 0.3.24 + """ + + service_tier: Optional[str] = None + """Latency tier for request. Options are ``'auto'``, ``'default'``, or ``'flex'``. + Relevant for users of OpenAI's scale tier service. + """ + + store: Optional[bool] = None + """If True, OpenAI may store response data for future use. Defaults to True + for the Responses API and False for the Chat Completions API. + + .. versionadded:: 0.3.24 + """ + + truncation: Optional[str] = None + """Truncation strategy (Responses API). Can be ``'auto'`` or ``'disabled'`` + (default). If ``'auto'``, model may drop input items from the middle of the + message sequence to fit the context window. + + .. versionadded:: 0.3.24 + """ + + use_previous_response_id: bool = False + """If True, always pass ``previous_response_id`` using the ID of the most recent + response. Responses API only. + + Input messages up to the most recent response will be dropped from request + payloads. + + For example, the following two are equivalent: + + .. code-block:: python + + llm = ChatOpenAI( + model="o4-mini", + use_previous_response_id=True, + ) + llm.invoke( + [ + HumanMessage("Hello"), + AIMessage("Hi there!", response_metadata={"id": "resp_123"}), + HumanMessage("How are you?"), + ] + ) + + .. code-block:: python + + llm = ChatOpenAI( + model="o4-mini", + use_responses_api=True, + ) + llm.invoke([HumanMessage("How are you?")], previous_response_id="resp_123") + + .. versionadded:: 0.3.26 + """ + + use_responses_api: Optional[bool] = None + """Whether to use the Responses API instead of the Chat API. + + If not specified then will be inferred based on invocation params. + + .. versionadded:: 0.3.9 + """ + + output_version: str = "v1" + """Version of AIMessage output format to use. + + This field is used to roll-out new output formats for chat model AIMessages + in a backwards-compatible way. + + Supported values: + + - ``"v0"``: AIMessage format as of langchain-openai 0.3.x. + - ``"responses/v1"``: Formats Responses API output + items into AIMessage content blocks. + - ``"v1"``: v1 of LangChain cross-provider standard. + + ``output_version="v1"`` is recommended. + + .. versionadded:: 0.3.25 + + """ + + model_config = ConfigDict(populate_by_name=True) + + @model_validator(mode="before") + @classmethod + def build_extra(cls, values: dict[str, Any]) -> Any: + """Build extra kwargs from additional params that were passed in.""" + all_required_field_names = get_pydantic_field_names(cls) + values = _build_model_kwargs(values, all_required_field_names) + return values + + @model_validator(mode="before") + @classmethod + def validate_temperature(cls, values: dict[str, Any]) -> Any: + """Currently o1 models only allow temperature=1.""" + model = values.get("model_name") or values.get("model") or "" + if model.startswith("o1") and "temperature" not in values: + values["temperature"] = 1 + return values + + @model_validator(mode="after") + def validate_environment(self) -> Self: + """Validate that api key and python package exists in environment.""" + if self.n is not None and self.n < 1: + raise ValueError("n must be at least 1.") + elif self.n is not None and self.n > 1 and self.streaming: + raise ValueError("n must be 1 when streaming.") + + # Check OPENAI_ORGANIZATION for backwards compatibility. + self.openai_organization = ( + self.openai_organization + or os.getenv("OPENAI_ORG_ID") + or os.getenv("OPENAI_ORGANIZATION") + ) + self.openai_api_base = self.openai_api_base or os.getenv("OPENAI_API_BASE") + client_params: dict = { + "api_key": ( + self.openai_api_key.get_secret_value() if self.openai_api_key else None + ), + "organization": self.openai_organization, + "base_url": self.openai_api_base, + "timeout": self.request_timeout, + "default_headers": self.default_headers, + "default_query": self.default_query, + } + if self.max_retries is not None: + client_params["max_retries"] = self.max_retries + + if self.openai_proxy and (self.http_client or self.http_async_client): + openai_proxy = self.openai_proxy + http_client = self.http_client + http_async_client = self.http_async_client + raise ValueError( + "Cannot specify 'openai_proxy' if one of " + "'http_client'/'http_async_client' is already specified. Received:\n" + f"{openai_proxy=}\n{http_client=}\n{http_async_client=}" + ) + if not self.client: + if self.openai_proxy and not self.http_client: + try: + import httpx + except ImportError as e: + raise ImportError( + "Could not import httpx python package. " + "Please install it with `pip install httpx`." + ) from e + self.http_client = httpx.Client( + proxy=self.openai_proxy, verify=global_ssl_context + ) + sync_specific = { + "http_client": self.http_client + or _get_default_httpx_client(self.openai_api_base, self.request_timeout) + } + self.root_client = openai.OpenAI(**client_params, **sync_specific) # type: ignore[arg-type] + self.client = self.root_client.chat.completions + if not self.async_client: + if self.openai_proxy and not self.http_async_client: + try: + import httpx + except ImportError as e: + raise ImportError( + "Could not import httpx python package. " + "Please install it with `pip install httpx`." + ) from e + self.http_async_client = httpx.AsyncClient( + proxy=self.openai_proxy, verify=global_ssl_context + ) + async_specific = { + "http_client": self.http_async_client + or _get_default_async_httpx_client( + self.openai_api_base, self.request_timeout + ) + } + self.root_async_client = openai.AsyncOpenAI( + **client_params, + **async_specific, # type: ignore[arg-type] + ) + self.async_client = self.root_async_client.chat.completions + return self + + @property + def _default_params(self) -> dict[str, Any]: + """Get the default parameters for calling OpenAI API.""" + exclude_if_none = { + "presence_penalty": self.presence_penalty, + "frequency_penalty": self.frequency_penalty, + "seed": self.seed, + "top_p": self.top_p, + "logprobs": self.logprobs, + "top_logprobs": self.top_logprobs, + "logit_bias": self.logit_bias, + "stop": self.stop or None, # also exclude empty list for this + "max_tokens": self.max_tokens, + "extra_body": self.extra_body, + "n": self.n, + "temperature": self.temperature, + "reasoning_effort": self.reasoning_effort, + "reasoning": self.reasoning, + "include": self.include, + "service_tier": self.service_tier, + "truncation": self.truncation, + "store": self.store, + } + + params = { + "model": self.model_name, + "stream": self.streaming, + **{k: v for k, v in exclude_if_none.items() if v is not None}, + **self.model_kwargs, + } + + return params + + def _convert_chunk_to_message_chunk( + self, chunk: dict, base_generation_info: Optional[dict] + ) -> Optional[AIMessageChunkV1]: + if chunk.get("type") == "content.delta": # from beta.chat.completions.stream + return None + token_usage = chunk.get("usage") + choices = ( + chunk.get("choices", []) + # from beta.chat.completions.stream + or chunk.get("chunk", {}).get("choices", []) + ) + + usage_metadata: Optional[UsageMetadata] = ( + _create_usage_metadata(token_usage) if token_usage else None + ) + if len(choices) == 0: + # logprobs is implicitly None + return AIMessageChunkV1( + content=[], + usage_metadata=usage_metadata, + response_metadata=cast(ResponseMetadata, base_generation_info), + ) + + choice = choices[0] + if choice["delta"] is None: + return None + + message_chunk = _convert_delta_to_message_chunk(choice["delta"]) + generation_info = {**base_generation_info} if base_generation_info else {} + + if finish_reason := choice.get("finish_reason"): + generation_info["finish_reason"] = finish_reason + if model_name := chunk.get("model"): + generation_info["model_name"] = model_name + if system_fingerprint := chunk.get("system_fingerprint"): + generation_info["system_fingerprint"] = system_fingerprint + if service_tier := chunk.get("service_tier"): + generation_info["service_tier"] = service_tier + + logprobs = choice.get("logprobs") + if logprobs: + generation_info["logprobs"] = logprobs + + if usage_metadata: + message_chunk.usage_metadata = usage_metadata + + message_chunk.response_metadata = { + **message_chunk.response_metadata, + **generation_info, + } + return message_chunk + + def _stream_responses( + self, + messages: list[MessageV1], + stop: Optional[list[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> Iterator[AIMessageChunkV1]: + kwargs["stream"] = True + payload = self._get_request_payload(messages, stop=stop, **kwargs) + if self.include_response_headers: + raw_context_manager = self.root_client.with_raw_response.responses.create( + **payload + ) + context_manager = raw_context_manager.parse() + headers = {"headers": dict(raw_context_manager.headers)} + else: + context_manager = self.root_client.responses.create(**payload) + headers = {} + original_schema_obj = kwargs.get("response_format") + + with context_manager as response: + is_first_chunk = True + current_index = -1 + current_output_index = -1 + current_sub_index = -1 + for chunk in response: + metadata = headers if is_first_chunk else {} + ( + current_index, + current_output_index, + current_sub_index, + generation_chunk, + ) = _convert_responses_chunk_to_generation_chunk( + chunk, + current_index, + current_output_index, + current_sub_index, + schema=original_schema_obj, + metadata=metadata, + ) + if generation_chunk: + if run_manager: + run_manager.on_llm_new_token( + generation_chunk.text or "", chunk=generation_chunk + ) + is_first_chunk = False + yield generation_chunk + + async def _astream_responses( + self, + messages: list[MessageV1], + stop: Optional[list[str]] = None, + run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> AsyncIterator[AIMessageChunkV1]: + kwargs["stream"] = True + payload = self._get_request_payload(messages, stop=stop, **kwargs) + if self.include_response_headers: + raw_context_manager = ( + await self.root_async_client.with_raw_response.responses.create( + **payload + ) + ) + context_manager = raw_context_manager.parse() + headers = {"headers": dict(raw_context_manager.headers)} + else: + context_manager = await self.root_async_client.responses.create(**payload) + headers = {} + original_schema_obj = kwargs.get("response_format") + + async with context_manager as response: + is_first_chunk = True + current_index = -1 + current_output_index = -1 + current_sub_index = -1 + async for chunk in response: + metadata = headers if is_first_chunk else {} + ( + current_index, + current_output_index, + current_sub_index, + generation_chunk, + ) = _convert_responses_chunk_to_generation_chunk( + chunk, + current_index, + current_output_index, + current_sub_index, + schema=original_schema_obj, + metadata=metadata, + ) + if generation_chunk: + if run_manager: + await run_manager.on_llm_new_token( + generation_chunk.text or "", chunk=generation_chunk + ) + is_first_chunk = False + yield generation_chunk + + def _should_stream_usage( + self, stream_usage: Optional[bool] = None, **kwargs: Any + ) -> bool: + """Determine whether to include usage metadata in streaming output. + + For backwards compatibility, we check for `stream_options` passed + explicitly to kwargs or in the model_kwargs and override self.stream_usage. + """ + stream_usage_sources = [ # order of precedence + stream_usage, + kwargs.get("stream_options", {}).get("include_usage"), + self.model_kwargs.get("stream_options", {}).get("include_usage"), + self.stream_usage, + ] + for source in stream_usage_sources: + if isinstance(source, bool): + return source + return self.stream_usage + + def _stream( + self, + messages: list[MessageV1], + stop: Optional[list[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + *, + stream_usage: Optional[bool] = None, + **kwargs: Any, + ) -> Iterator[AIMessageChunkV1]: + kwargs["stream"] = True + stream_usage = self._should_stream_usage(stream_usage, **kwargs) + if stream_usage: + kwargs["stream_options"] = {"include_usage": stream_usage} + payload = self._get_request_payload(messages, stop=stop, **kwargs) + base_generation_info = {} + + if "response_format" in payload: + if self.include_response_headers: + warnings.warn( + "Cannot currently include response headers when response_format is " + "specified." + ) + payload.pop("stream") + response_stream = self.root_client.beta.chat.completions.stream(**payload) + context_manager = response_stream + else: + if self.include_response_headers: + raw_response = self.client.with_raw_response.create(**payload) + response = raw_response.parse() + base_generation_info = {"headers": dict(raw_response.headers)} + else: + response = self.client.create(**payload) + context_manager = response + try: + with context_manager as response: + is_first_chunk = True + for chunk in response: + if not isinstance(chunk, dict): + chunk = chunk.model_dump() + message_chunk = self._convert_chunk_to_message_chunk( + chunk, base_generation_info if is_first_chunk else {} + ) + if message_chunk is None: + continue + logprobs = message_chunk.response_metadata.get("logprobs") + if run_manager: + run_manager.on_llm_new_token( + message_chunk.text or "", + chunk=message_chunk, + logprobs=logprobs, + ) + is_first_chunk = False + yield message_chunk + except openai.BadRequestError as e: + _handle_openai_bad_request(e) + if hasattr(response, "get_final_completion") and "response_format" in payload: + final_completion = response.get_final_completion() + message_chunk = self._get_message_chunk_from_completion(final_completion) + if run_manager: + run_manager.on_llm_new_token( + message_chunk.text or "", chunk=message_chunk + ) + yield message_chunk + + def _invoke( + self, + messages: list[MessageV1], + stop: Optional[list[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> AIMessageV1: + if self.streaming: + stream_iter = self._stream( + messages, stop=stop, run_manager=run_manager, **kwargs + ) + return generate_from_stream(stream_iter) + payload = self._get_request_payload(messages, stop=stop, **kwargs) + generation_info = None + if "response_format" in payload: + if self.include_response_headers: + warnings.warn( + "Cannot currently include response headers when response_format is " + "specified." + ) + payload.pop("stream") + try: + response = self.root_client.beta.chat.completions.parse(**payload) + except openai.BadRequestError as e: + _handle_openai_bad_request(e) + elif self._use_responses_api(payload): + original_schema_obj = kwargs.get("response_format") + if original_schema_obj and _is_pydantic_class(original_schema_obj): + response = self.root_client.responses.parse(**payload) + else: + if self.include_response_headers: + raw_response = self.root_client.with_raw_response.responses.create( + **payload + ) + response = raw_response.parse() + generation_info = {"headers": dict(raw_response.headers)} + else: + response = self.root_client.responses.create(**payload) + return _construct_lc_result_from_responses_api( + response, schema=original_schema_obj, metadata=generation_info + ) + elif self.include_response_headers: + raw_response = self.client.with_raw_response.create(**payload) + response = raw_response.parse() + generation_info = {"headers": dict(raw_response.headers)} + else: + response = self.client.create(**payload) + return self._create_ai_message(response, generation_info) + + def _use_responses_api(self, payload: dict) -> bool: + if isinstance(self.use_responses_api, bool): + return self.use_responses_api + elif self.output_version == "responses/v1": + return True + elif self.include is not None: + return True + elif self.reasoning is not None: + return True + elif self.truncation is not None: + return True + elif self.use_previous_response_id: + return True + else: + return _use_responses_api(payload) + + def _get_request_payload( + self, + input_: LanguageModelInput, + *, + stop: Optional[list[str]] = None, + **kwargs: Any, + ) -> dict: + messages = self._convert_input(input_) + if stop is not None: + kwargs["stop"] = stop + + payload = {**self._default_params, **kwargs} + if self._use_responses_api(payload): + if self.use_previous_response_id: + last_messages, previous_response_id = _get_last_messages(messages) + payload_to_use = last_messages if previous_response_id else messages + if previous_response_id: + payload["previous_response_id"] = previous_response_id + payload = _construct_responses_api_payload(payload_to_use, payload) + else: + payload = _construct_responses_api_payload(messages, payload) + else: + payload["messages"] = [ + _convert_message_to_dict(_convert_from_v1_to_chat_completions(m)) + if isinstance(m, AIMessageV1) + else _convert_message_to_dict(m) + for m in messages + ] + return payload + + def _create_ai_message( + self, + response: Union[dict, openai.BaseModel], + generation_info: Optional[dict] = None, + ) -> AIMessageV1: + response_dict = ( + response if isinstance(response, dict) else response.model_dump() + ) + # Sometimes the AI Model calling will get error, we should raise it (this is + # typically followed by a null value for `choices`, which we raise for + # separately below). + if response_dict.get("error"): + raise ValueError(response_dict.get("error")) + + # Raise informative error messages for non-OpenAI chat completions APIs + # that return malformed responses. + try: + choices = response_dict["choices"] + except KeyError as e: + raise KeyError( + f"Response missing `choices` key: {response_dict.keys()}" + ) from e + + if choices is None: + raise TypeError("Received response with null value for `choices`.") + + token_usage = response_dict.get("usage") + + for res in choices: + message = cast(AIMessageV1, _convert_dict_to_message(res["message"])) + if token_usage: + message.usage_metadata = _create_usage_metadata(token_usage) + generation_info = generation_info or {} + generation_info["finish_reason"] = ( + res.get("finish_reason") + if res.get("finish_reason") is not None + else generation_info.get("finish_reason") + ) + if "logprobs" in res: + generation_info["logprobs"] = res["logprobs"] + message.response_metadata = {**message.response_metadata, **generation_info} + llm_output = { + "model_name": response_dict.get("model", self.model_name), + "model_provider": "openai", + "system_fingerprint": response_dict.get("system_fingerprint", ""), + } + if "id" in response_dict: + llm_output["id"] = response_dict["id"] + if "service_tier" in response_dict: + llm_output["service_tier"] = response_dict["service_tier"] + + if isinstance(response, openai.BaseModel) and getattr( + response, "choices", None + ): + oai_message = response.choices[0].message # type: ignore[attr-defined] + if hasattr(oai_message, "parsed"): + message.parsed = oai_message.parsed + if refusal := getattr(oai_message, "refusal", None): + message.content.append( + {"type": "non_standard", "value": {"refusal": refusal}} + ) + + message.response_metadata = {**message.response_metadata, **llm_output} # type: ignore[typeddict-item] + return message + + async def _astream( + self, + messages: list[MessageV1], + stop: Optional[list[str]] = None, + run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, + *, + stream_usage: Optional[bool] = None, + **kwargs: Any, + ) -> AsyncIterator[AIMessageChunkV1]: + kwargs["stream"] = True + stream_usage = self._should_stream_usage(stream_usage, **kwargs) + if stream_usage: + kwargs["stream_options"] = {"include_usage": stream_usage} + payload = self._get_request_payload(messages, stop=stop, **kwargs) + base_generation_info = {} + + if "response_format" in payload: + if self.include_response_headers: + warnings.warn( + "Cannot currently include response headers when response_format is " + "specified." + ) + payload.pop("stream") + response_stream = self.root_async_client.beta.chat.completions.stream( + **payload + ) + context_manager = response_stream + else: + if self.include_response_headers: + raw_response = await self.async_client.with_raw_response.create( + **payload + ) + response = raw_response.parse() + base_generation_info = {"headers": dict(raw_response.headers)} + else: + response = await self.async_client.create(**payload) + context_manager = response + try: + async with context_manager as response: + is_first_chunk = True + async for chunk in response: + if not isinstance(chunk, dict): + chunk = chunk.model_dump() + message_chunk = self._convert_chunk_to_message_chunk( + chunk, base_generation_info if is_first_chunk else {} + ) + if message_chunk is None: + continue + logprobs = message_chunk.response_metadata.get("logprobs") + if run_manager: + await run_manager.on_llm_new_token( + message_chunk.text or "", + chunk=message_chunk, + logprobs=logprobs, + ) + is_first_chunk = False + yield message_chunk + except openai.BadRequestError as e: + _handle_openai_bad_request(e) + if hasattr(response, "get_final_completion") and "response_format" in payload: + final_completion = await response.get_final_completion() + message_chunk = self._get_message_chunk_from_completion(final_completion) + if run_manager: + await run_manager.on_llm_new_token( + message_chunk.text or "", chunk=message_chunk + ) + yield message_chunk + + async def _ainvoke( + self, + messages: list[MessageV1], + stop: Optional[list[str]] = None, + run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> AIMessageV1: + if self.streaming: + stream_iter = self._astream( + messages, stop=stop, run_manager=run_manager, **kwargs + ) + return await agenerate_from_stream(stream_iter) + payload = self._get_request_payload(messages, stop=stop, **kwargs) + generation_info = None + if "response_format" in payload: + if self.include_response_headers: + warnings.warn( + "Cannot currently include response headers when response_format is " + "specified." + ) + payload.pop("stream") + try: + response = await self.root_async_client.beta.chat.completions.parse( + **payload + ) + except openai.BadRequestError as e: + _handle_openai_bad_request(e) + elif self._use_responses_api(payload): + original_schema_obj = kwargs.get("response_format") + if original_schema_obj and _is_pydantic_class(original_schema_obj): + response = await self.root_async_client.responses.parse(**payload) + else: + if self.include_response_headers: + raw_response = ( + await self.root_async_client.with_raw_response.responses.create( + **payload + ) + ) + response = raw_response.parse() + generation_info = {"headers": dict(raw_response.headers)} + else: + response = await self.root_async_client.responses.create(**payload) + return _construct_lc_result_from_responses_api( + response, schema=original_schema_obj, metadata=generation_info + ) + elif self.include_response_headers: + raw_response = await self.async_client.with_raw_response.create(**payload) + response = raw_response.parse() + generation_info = {"headers": dict(raw_response.headers)} + else: + response = await self.async_client.create(**payload) + return await run_in_executor( + None, self._create_ai_message, response, generation_info + ) + + @property + def _identifying_params(self) -> dict[str, Any]: + """Get the identifying parameters.""" + return {"model_name": self.model_name, **self._default_params} + + def _get_invocation_params( + self, stop: Optional[list[str]] = None, **kwargs: Any + ) -> dict[str, Any]: + """Get the parameters used to invoke the model.""" + params = { + "model": self.model_name, + **super()._get_invocation_params(stop=stop), + **self._default_params, + **kwargs, + } + # Redact headers from built-in remote MCP tool invocations + if (tools := params.get("tools")) and isinstance(tools, list): + params["tools"] = [ + ({**tool, "headers": "**REDACTED**"} if "headers" in tool else tool) + if isinstance(tool, dict) and tool.get("type") == "mcp" + else tool + for tool in tools + ] + + return params + + def _get_ls_params( + self, stop: Optional[list[str]] = None, **kwargs: Any + ) -> LangSmithParams: + """Get standard params for tracing.""" + params = self._get_invocation_params(stop=stop, **kwargs) + ls_params = LangSmithParams( + ls_provider="openai", + ls_model_name=self.model_name, + ls_model_type="chat", + ls_temperature=params.get("temperature", self.temperature), + ) + if ls_max_tokens := params.get("max_tokens", self.max_tokens) or params.get( + "max_completion_tokens", self.max_tokens + ): + ls_params["ls_max_tokens"] = ls_max_tokens + if ls_stop := stop or params.get("stop", None): + ls_params["ls_stop"] = ls_stop + return ls_params + + @property + def _llm_type(self) -> str: + """Return type of chat model.""" + return "openai-chat" + + def _get_encoding_model(self) -> tuple[str, tiktoken.Encoding]: + if self.tiktoken_model_name is not None: + model = self.tiktoken_model_name + else: + model = self.model_name + try: + encoding = tiktoken.encoding_for_model(model) + except KeyError: + encoder = "cl100k_base" + if self.model_name.startswith("gpt-4o") or self.model_name.startswith( + "gpt-4.1" + ): + encoder = "o200k_base" + encoding = tiktoken.get_encoding(encoder) + return model, encoding + + def get_token_ids(self, text: str) -> list[int]: + """Get the tokens present in the text with tiktoken package.""" + if self.custom_get_token_ids is not None: + return self.custom_get_token_ids(text) + # tiktoken NOT supported for Python 3.7 or below + if sys.version_info[1] <= 7: + return super().get_token_ids(text) + _, encoding_model = self._get_encoding_model() + return encoding_model.encode(text) + + def get_num_tokens_from_messages( + self, + messages: list[MessageV1], + tools: Optional[ + Sequence[Union[dict[str, Any], type, Callable, BaseTool]] + ] = None, + ) -> int: + """Calculate num tokens for ``gpt-3.5-turbo`` and ``gpt-4`` with ``tiktoken`` package. + + **Requirements**: You must have the ``pillow`` installed if you want to count + image tokens if you are specifying the image as a base64 string, and you must + have both ``pillow`` and ``httpx`` installed if you are specifying the image + as a URL. If these aren't installed image inputs will be ignored in token + counting. + + `OpenAI reference `__ + + Args: + messages: The message inputs to tokenize. + tools: If provided, sequence of dict, BaseModel, function, or BaseTools + to be converted to tool schemas. + """ # noqa: E501 + # TODO: Count bound tools as part of input. + if tools is not None: + warnings.warn( + "Counting tokens in tool schemas is not yet supported. Ignoring tools." + ) + model, encoding = self._get_encoding_model() + if model.startswith("gpt-3.5-turbo-0301"): + # every message follows {role/name}\n{content}\n + tokens_per_message = 4 + # if there's a name, the role is omitted + tokens_per_name = -1 + elif model.startswith("gpt-3.5-turbo") or model.startswith("gpt-4"): + tokens_per_message = 3 + tokens_per_name = 1 + else: + raise NotImplementedError( + f"get_num_tokens_from_messages() is not presently implemented " + f"for model {model}. See " + "https://platform.openai.com/docs/guides/text-generation/managing-tokens" # noqa: E501 + " for information on how messages are converted to tokens." + ) + num_tokens = 0 + messages_dict = [_convert_message_to_dict(m) for m in messages] + for message in messages_dict: + num_tokens += tokens_per_message + for key, value in message.items(): + # This is an inferred approximation. OpenAI does not document how to + # count tool message tokens. + if key == "tool_call_id": + num_tokens += 3 + continue + if isinstance(value, list): + # content or tool calls + for val in value: + if isinstance(val, str) or val["type"] == "text": + text = val["text"] if isinstance(val, dict) else val + num_tokens += len(encoding.encode(text)) + elif val["type"] == "image_url": + if val["image_url"].get("detail") == "low": + num_tokens += 85 + else: + image_size = _url_to_size(val["image_url"]["url"]) + if not image_size: + continue + num_tokens += _count_image_tokens(*image_size) + # Tool/function call token counting is not documented by OpenAI. + # This is an approximation. + elif val["type"] == "function": + num_tokens += len( + encoding.encode(val["function"]["arguments"]) + ) + num_tokens += len(encoding.encode(val["function"]["name"])) + elif val["type"] == "file": + warnings.warn( + "Token counts for file inputs are not supported. " + "Ignoring file inputs." + ) + pass + else: + raise ValueError( + f"Unrecognized content block type\n\n{val}" + ) + elif not value: + continue + else: + # Cast str(value) in case the message value is not a string + # This occurs with function messages + num_tokens += len(encoding.encode(str(value))) + if key == "name": + num_tokens += tokens_per_name + # every reply is primed with assistant + num_tokens += 3 + return num_tokens + + def bind_tools( + self, + tools: Sequence[Union[dict[str, Any], type, Callable, BaseTool]], + *, + tool_choice: Optional[ + Union[dict, str, Literal["auto", "none", "required", "any"], bool] + ] = None, + strict: Optional[bool] = None, + parallel_tool_calls: Optional[bool] = None, + **kwargs: Any, + ) -> Runnable[LanguageModelInput, AIMessageV1]: + """Bind tool-like objects to this chat model. + + Assumes model is compatible with OpenAI tool-calling API. + + Args: + tools: A list of tool definitions to bind to this chat model. + Supports any tool definition handled by + :meth:`langchain_core.utils.function_calling.convert_to_openai_tool`. + tool_choice: Which tool to require the model to call. Options are: + + - str of the form ``"<>"``: calls <> tool. + - ``"auto"``: automatically selects a tool (including no tool). + - ``"none"``: does not call a tool. + - ``"any"`` or ``"required"`` or ``True``: force at least one tool to be called. + - dict of the form ``{"type": "function", "function": {"name": <>}}``: calls <> tool. + - ``False`` or ``None``: no effect, default OpenAI behavior. + strict: If True, model output is guaranteed to exactly match the JSON Schema + provided in the tool definition. If True, the input schema will be + validated according to + https://platform.openai.com/docs/guides/structured-outputs/supported-schemas. + If False, input schema will not be validated and model output will not + be validated. + If None, ``strict`` argument will not be passed to the model. + parallel_tool_calls: Set to ``False`` to disable parallel tool use. + Defaults to ``None`` (no specification, which allows parallel tool use). + kwargs: Any additional parameters are passed directly to + :meth:`~langchain_openai.chat_models.base.ChatOpenAI.bind`. + + .. versionchanged:: 0.1.21 + + Support for ``strict`` argument added. + + """ # noqa: E501 + + if parallel_tool_calls is not None: + kwargs["parallel_tool_calls"] = parallel_tool_calls + formatted_tools = [ + convert_to_openai_tool(tool, strict=strict) for tool in tools + ] + tool_names = [] + for tool in formatted_tools: + if "function" in tool: + tool_names.append(tool["function"]["name"]) + elif "name" in tool: + tool_names.append(tool["name"]) + else: + pass + if tool_choice: + if isinstance(tool_choice, str): + # tool_choice is a tool/function name + if tool_choice in tool_names: + tool_choice = { + "type": "function", + "function": {"name": tool_choice}, + } + elif tool_choice in WellKnownTools: + tool_choice = {"type": tool_choice} + # 'any' is not natively supported by OpenAI API. + # We support 'any' since other models use this instead of 'required'. + elif tool_choice == "any": + tool_choice = "required" + else: + pass + elif isinstance(tool_choice, bool): + tool_choice = "required" + elif isinstance(tool_choice, dict): + pass + else: + raise ValueError( + f"Unrecognized tool_choice type. Expected str, bool or dict. " + f"Received: {tool_choice}" + ) + kwargs["tool_choice"] = tool_choice + return super().bind(tools=formatted_tools, **kwargs) + + def with_structured_output( + self, + schema: Optional[_DictOrPydanticClass] = None, + *, + method: Literal[ + "function_calling", "json_mode", "json_schema" + ] = "function_calling", + include_raw: bool = False, + strict: Optional[bool] = None, + tools: Optional[list] = None, + **kwargs: Any, + ) -> Runnable[LanguageModelInput, _DictOrPydantic]: + """Model wrapper that returns outputs formatted to match the given schema. + + Args: + schema: + The output schema. Can be passed in as: + + - an OpenAI function/tool schema, + - a JSON Schema, + - a TypedDict class (support added in 0.1.20), + - or a Pydantic class. + + If ``schema`` is a Pydantic class then the model output will be a + Pydantic instance of that class, and the model-generated fields will be + validated by the Pydantic class. Otherwise the model output will be a + dict and will not be validated. See :meth:`langchain_core.utils.function_calling.convert_to_openai_tool` + for more on how to properly specify types and descriptions of + schema fields when specifying a Pydantic or TypedDict class. + + method: The method for steering model generation, one of: + + - "function_calling": + Uses OpenAI's tool-calling (formerly called function calling) + API: https://platform.openai.com/docs/guides/function-calling + - "json_schema": + Uses OpenAI's Structured Output API: https://platform.openai.com/docs/guides/structured-outputs + Supported for "gpt-4o-mini", "gpt-4o-2024-08-06", "o1", and later + models. + - "json_mode": + Uses OpenAI's JSON mode. Note that if using JSON mode then you + must include instructions for formatting the output into the + desired schema into the model call: + https://platform.openai.com/docs/guides/structured-outputs/json-mode + + Learn more about the differences between the methods and which models + support which methods here: + + - https://platform.openai.com/docs/guides/structured-outputs/structured-outputs-vs-json-mode + - https://platform.openai.com/docs/guides/structured-outputs/function-calling-vs-response-format + + include_raw: + If False then only the parsed structured output is returned. If + an error occurs during model output parsing it will be raised. If True + then both the raw model response (an AIMessage) and the parsed model + response will be returned. If an error occurs during output parsing it + will be caught and returned as well. The final output is always a dict + with keys "raw", "parsed", and "parsing_error". + strict: + + - True: + Model output is guaranteed to exactly match the schema. + The input schema will also be validated according to + https://platform.openai.com/docs/guides/structured-outputs/supported-schemas + - False: + Input schema will not be validated and model output will not be + validated. + - None: + ``strict`` argument will not be passed to the model. + + tools: + A list of tool-like objects to bind to the chat model. Requires that: + + - ``method`` is ``"json_schema"`` (default). + - ``strict=True`` + - ``include_raw=True`` + + If a model elects to call a + tool, the resulting ``AIMessage`` in ``"raw"`` will include tool calls. + + .. dropdown:: Example + + .. code-block:: python + + from langchain.chat_models import init_chat_model + from pydantic import BaseModel + + + class ResponseSchema(BaseModel): + response: str + + + def get_weather(location: str) -> str: + \"\"\"Get weather at a location.\"\"\" + pass + + llm = init_chat_model("openai:gpt-4o-mini") + + structured_llm = llm.with_structured_output( + ResponseSchema, + tools=[get_weather], + strict=True, + include_raw=True, + ) + + structured_llm.invoke("What's the weather in Boston?") + + .. code-block:: python + + { + "raw": AIMessage(content="", tool_calls=[...], ...), + "parsing_error": None, + "parsed": None, + } + + kwargs: Additional keyword args are passed through to the model. + + Returns: + A Runnable that takes same inputs as a :class:`from langchain_core.language_models.v1.chat_models import BaseChatModelV1`. + + | If ``include_raw`` is False and ``schema`` is a Pydantic class, Runnable outputs an instance of ``schema`` (i.e., a Pydantic object). Otherwise, if ``include_raw`` is False then Runnable outputs a dict. + + | If ``include_raw`` is True, then Runnable outputs a dict with keys: + + - "raw": AIMessage + - "parsed": None if there was a parsing error, otherwise the type depends on the ``schema`` as described above. + - "parsing_error": Optional[BaseException] + + .. versionchanged:: 0.1.20 + + Added support for TypedDict class ``schema``. + + .. versionchanged:: 0.1.21 + + Support for ``strict`` argument added. + Support for ``method`` = "json_schema" added. + + .. versionchanged:: 0.3.12 + Support for ``tools`` added. + + .. versionchanged:: 0.3.21 + Pass ``kwargs`` through to the model. + """ # noqa: E501 + if strict is not None and method == "json_mode": + raise ValueError( + "Argument `strict` is not supported with `method`='json_mode'" + ) + is_pydantic_schema = _is_pydantic_class(schema) + + if method == "json_schema": + # Check for Pydantic BaseModel V1 + if ( + is_pydantic_schema and issubclass(schema, BaseModelV1) # type: ignore[arg-type] + ): + warnings.warn( + "Received a Pydantic BaseModel V1 schema. This is not supported by " + 'method="json_schema". Please use method="function_calling" ' + "or specify schema via JSON Schema or Pydantic V2 BaseModel. " + 'Overriding to method="function_calling".' + ) + method = "function_calling" + # Check for incompatible model + if self.model_name and ( + self.model_name.startswith("gpt-3") + or self.model_name.startswith("gpt-4-") + or self.model_name == "gpt-4" + ): + warnings.warn( + f"Cannot use method='json_schema' with model {self.model_name} " + f"since it doesn't support OpenAI's Structured Output API. You can " + f"see supported models here: " + f"https://platform.openai.com/docs/guides/structured-outputs#supported-models. " # noqa: E501 + "To fix this warning, set `method='function_calling'. " + "Overriding to method='function_calling'." + ) + method = "function_calling" + + if method == "function_calling": + if schema is None: + raise ValueError( + "schema must be specified when method is not 'json_mode'. " + "Received None." + ) + tool_name = convert_to_openai_tool(schema)["function"]["name"] + bind_kwargs = self._filter_disabled_params( + **{ + **dict( + tool_choice=tool_name, + parallel_tool_calls=False, + strict=strict, + ls_structured_output_format={ + "kwargs": {"method": method, "strict": strict}, + "schema": schema, + }, + ), + **kwargs, + } + ) + + llm = self.bind_tools([schema], **bind_kwargs) + if is_pydantic_schema: + output_parser: Runnable = PydanticToolsParser( + tools=[schema], # type: ignore[list-item] + first_tool_only=True, # type: ignore[list-item] + ) + else: + output_parser = JsonOutputKeyToolsParser( + key_name=tool_name, first_tool_only=True + ) + elif method == "json_mode": + llm = self.bind( + **{ + **dict( + response_format={"type": "json_object"}, + ls_structured_output_format={ + "kwargs": {"method": method}, + "schema": schema, + }, + ), + **kwargs, + } + ) + output_parser = ( + PydanticOutputParser(pydantic_object=schema) # type: ignore[arg-type] + if is_pydantic_schema + else JsonOutputParser() + ) + elif method == "json_schema": + if schema is None: + raise ValueError( + "schema must be specified when method is not 'json_mode'. " + "Received None." + ) + response_format = _convert_to_openai_response_format(schema, strict=strict) + bind_kwargs = { + **dict( + response_format=response_format, + ls_structured_output_format={ + "kwargs": {"method": method, "strict": strict}, + "schema": convert_to_openai_tool(schema), + }, + **kwargs, + ) + } + if tools: + bind_kwargs["tools"] = [ + convert_to_openai_tool(t, strict=strict) for t in tools + ] + llm = self.bind(**bind_kwargs) + if is_pydantic_schema: + output_parser = RunnableLambda( + partial(_oai_structured_outputs_parser, schema=cast(type, schema)) + ).with_types(output_type=cast(type, schema)) + else: + output_parser = JsonOutputParser() + else: + raise ValueError( + f"Unrecognized method argument. Expected one of 'function_calling' or " + f"'json_mode'. Received: '{method}'" + ) + + if include_raw: + parser_assign = RunnablePassthrough.assign( + parsed=itemgetter("raw") | output_parser, parsing_error=lambda _: None + ) + parser_none = RunnablePassthrough.assign(parsed=lambda _: None) + parser_with_fallback = parser_assign.with_fallbacks( + [parser_none], exception_key="parsing_error" + ) + return RunnableMap(raw=llm) | parser_with_fallback + else: + return llm | output_parser + + def _filter_disabled_params(self, **kwargs: Any) -> dict[str, Any]: + if not self.disabled_params: + return kwargs + filtered = {} + for k, v in kwargs.items(): + # Skip param + if k in self.disabled_params and ( + self.disabled_params[k] is None or v in self.disabled_params[k] + ): + continue + # Keep param + else: + filtered[k] = v + return filtered + + def _get_message_chunk_from_completion( + self, completion: openai.BaseModel + ) -> AIMessageChunkV1: + """Get chunk from completion (e.g., from final completion of a stream).""" + ai_message = self._create_ai_message(completion) + return AIMessageChunkV1( + content="", + usage_metadata=ai_message.usage_metadata, + response_metadata=ai_message.response_metadata, + parsed=ai_message.parsed, + ) + + +class ChatOpenAI(BaseChatOpenAI): # type: ignore[override] + """OpenAI chat model integration. + + .. dropdown:: Setup + :open: + + Install ``langchain-openai`` and set environment variable ``OPENAI_API_KEY``. + + .. code-block:: bash + + pip install -U langchain-openai + export OPENAI_API_KEY="your-api-key" + + .. dropdown:: Key init args — completion params + + model: str + Name of OpenAI model to use. + temperature: float + Sampling temperature. + max_tokens: Optional[int] + Max number of tokens to generate. + logprobs: Optional[bool] + Whether to return logprobs. + stream_options: Dict + Configure streaming outputs, like whether to return token usage when + streaming (``{"include_usage": True}``). + use_responses_api: Optional[bool] + Whether to use the responses API. + + See full list of supported init args and their descriptions in the params section. + + .. dropdown:: Key init args — client params + + timeout: Union[float, Tuple[float, float], Any, None] + Timeout for requests. + max_retries: Optional[int] + Max number of retries. + api_key: Optional[str] + OpenAI API key. If not passed in will be read from env var ``OPENAI_API_KEY``. + base_url: Optional[str] + Base URL for API requests. Only specify if using a proxy or service + emulator. + organization: Optional[str] + OpenAI organization ID. If not passed in will be read from env + var ``OPENAI_ORG_ID``. + + See full list of supported init args and their descriptions in the params section. + + .. dropdown:: Instantiate + + .. code-block:: python + + from langchain_openai import ChatOpenAI + + llm = ChatOpenAI( + model="gpt-4o", + temperature=0, + max_tokens=None, + timeout=None, + max_retries=2, + # api_key="...", + # base_url="...", + # organization="...", + # other params... + ) + + **NOTE**: Any param which is not explicitly supported will be passed directly to the + ``openai.OpenAI.chat.completions.create(...)`` API every time to the model is + invoked. For example: + + .. code-block:: python + + from langchain_openai import ChatOpenAI + import openai + + ChatOpenAI(..., frequency_penalty=0.2).invoke(...) + + # results in underlying API call of: + + openai.OpenAI(..).chat.completions.create(..., frequency_penalty=0.2) + + # which is also equivalent to: + + ChatOpenAI(...).invoke(..., frequency_penalty=0.2) + + .. dropdown:: Invoke + + .. code-block:: python + + messages = [ + ( + "system", + "You are a helpful translator. Translate the user sentence to French.", + ), + ("human", "I love programming."), + ] + llm.invoke(messages) + + .. code-block:: pycon + + AIMessage( + content="J'adore la programmation.", + response_metadata={ + "token_usage": { + "completion_tokens": 5, + "prompt_tokens": 31, + "total_tokens": 36, + }, + "model_name": "gpt-4o", + "system_fingerprint": "fp_43dfabdef1", + "finish_reason": "stop", + "logprobs": None, + }, + id="run-012cffe2-5d3d-424d-83b5-51c6d4a593d1-0", + usage_metadata={"input_tokens": 31, "output_tokens": 5, "total_tokens": 36}, + ) + + .. dropdown:: Stream + + .. code-block:: python + + for chunk in llm.stream(messages): + print(chunk.text(), end="") + + .. code-block:: python + + AIMessageChunk(content="", id="run-9e1517e3-12bf-48f2-bb1b-2e824f7cd7b0") + AIMessageChunk(content="J", id="run-9e1517e3-12bf-48f2-bb1b-2e824f7cd7b0") + AIMessageChunk( + content="'adore", id="run-9e1517e3-12bf-48f2-bb1b-2e824f7cd7b0" + ) + AIMessageChunk(content=" la", id="run-9e1517e3-12bf-48f2-bb1b-2e824f7cd7b0") + AIMessageChunk( + content=" programmation", id="run-9e1517e3-12bf-48f2-bb1b-2e824f7cd7b0" + ) + AIMessageChunk(content=".", id="run-9e1517e3-12bf-48f2-bb1b-2e824f7cd7b0") + AIMessageChunk( + content="", + response_metadata={"finish_reason": "stop"}, + id="run-9e1517e3-12bf-48f2-bb1b-2e824f7cd7b0", + ) + + .. code-block:: python + + stream = llm.stream(messages) + full = next(stream) + for chunk in stream: + full += chunk + full + + .. code-block:: python + + AIMessageChunk( + content="J'adore la programmation.", + response_metadata={"finish_reason": "stop"}, + id="run-bf917526-7f58-4683-84f7-36a6b671d140", + ) + + .. dropdown:: Async + + .. code-block:: python + + await llm.ainvoke(messages) + + # stream: + # async for chunk in (await llm.astream(messages)) + + # batch: + # await llm.abatch([messages]) + + .. code-block:: python + + AIMessage( + content="J'adore la programmation.", + response_metadata={ + "token_usage": { + "completion_tokens": 5, + "prompt_tokens": 31, + "total_tokens": 36, + }, + "model_name": "gpt-4o", + "system_fingerprint": "fp_43dfabdef1", + "finish_reason": "stop", + "logprobs": None, + }, + id="run-012cffe2-5d3d-424d-83b5-51c6d4a593d1-0", + usage_metadata={ + "input_tokens": 31, + "output_tokens": 5, + "total_tokens": 36, + }, + ) + + .. dropdown:: Tool calling + + .. code-block:: python + + from pydantic import BaseModel, Field + + + class GetWeather(BaseModel): + '''Get the current weather in a given location''' + + location: str = Field( + ..., description="The city and state, e.g. San Francisco, CA" + ) + + + class GetPopulation(BaseModel): + '''Get the current population in a given location''' + + location: str = Field( + ..., description="The city and state, e.g. San Francisco, CA" + ) + + + llm_with_tools = llm.bind_tools( + [GetWeather, GetPopulation] + # strict = True # enforce tool args schema is respected + ) + ai_msg = llm_with_tools.invoke( + "Which city is hotter today and which is bigger: LA or NY?" + ) + ai_msg.tool_calls + + .. code-block:: python + + [ + { + "name": "GetWeather", + "args": {"location": "Los Angeles, CA"}, + "id": "call_6XswGD5Pqk8Tt5atYr7tfenU", + }, + { + "name": "GetWeather", + "args": {"location": "New York, NY"}, + "id": "call_ZVL15vA8Y7kXqOy3dtmQgeCi", + }, + { + "name": "GetPopulation", + "args": {"location": "Los Angeles, CA"}, + "id": "call_49CFW8zqC9W7mh7hbMLSIrXw", + }, + { + "name": "GetPopulation", + "args": {"location": "New York, NY"}, + "id": "call_6ghfKxV264jEfe1mRIkS3PE7", + }, + ] + + Note that ``openai >= 1.32`` supports a ``parallel_tool_calls`` parameter + that defaults to ``True``. This parameter can be set to ``False`` to + disable parallel tool calls: + + .. code-block:: python + + ai_msg = llm_with_tools.invoke( + "What is the weather in LA and NY?", parallel_tool_calls=False + ) + ai_msg.tool_calls + + .. code-block:: python + + [ + { + "name": "GetWeather", + "args": {"location": "Los Angeles, CA"}, + "id": "call_4OoY0ZR99iEvC7fevsH8Uhtz", + } + ] + + Like other runtime parameters, ``parallel_tool_calls`` can be bound to a model + using ``llm.bind(parallel_tool_calls=False)`` or during instantiation by + setting ``model_kwargs``. + + See ``ChatOpenAI.bind_tools()`` method for more. + + .. dropdown:: Built-in tools + + .. versionadded:: 0.3.9 + + You can access `built-in tools `_ + supported by the OpenAI Responses API. See LangChain + `docs `_ for more + detail. + + .. note:: + ``langchain-openai >= 0.3.26`` allows users to opt-in to an updated + AIMessage format when using the Responses API. Setting + + .. code-block:: python + + llm = ChatOpenAI(model="...", output_version="responses/v1") + + will format output from reasoning summaries, built-in tool invocations, and + other response items into the message's ``content`` field, rather than + ``additional_kwargs``. We recommend this format for new applications. + + .. code-block:: python + + from langchain_openai import ChatOpenAI + + llm = ChatOpenAI(model="gpt-4.1-mini", output_version="responses/v1") + + tool = {"type": "web_search_preview"} + llm_with_tools = llm.bind_tools([tool]) + + response = llm_with_tools.invoke( + "What was a positive news story from today?" + ) + response.content + + .. code-block:: python + + [ + { + "type": "text", + "text": "Today, a heartwarming story emerged from ...", + "annotations": [ + { + "end_index": 778, + "start_index": 682, + "title": "Title of story", + "type": "url_citation", + "url": "", + } + ], + } + ] + + .. dropdown:: Managing conversation state + + .. versionadded:: 0.3.9 + + OpenAI's Responses API supports management of + `conversation state `_. + Passing in response IDs from previous messages will continue a conversational + thread. See LangChain + `docs `_ for more + detail. + + .. code-block:: python + + from langchain_openai import ChatOpenAI + + llm = ChatOpenAI(model="gpt-4.1-mini", use_responses_api=True) + response = llm.invoke("Hi, I'm Bob.") + response.text() + + .. code-block:: python + + "Hi Bob! How can I assist you today?" + + .. code-block:: python + + second_response = llm.invoke( + "What is my name?", + previous_response_id=response.response_metadata["id"], + ) + second_response.text() + + .. code-block:: python + + "Your name is Bob. How can I help you today, Bob?" + + .. versionadded:: 0.3.26 + + You can also initialize ChatOpenAI with :attr:`use_previous_response_id`. + Input messages up to the most recent response will then be dropped from request + payloads, and ``previous_response_id`` will be set using the ID of the most + recent response. + + .. code-block:: python + + llm = ChatOpenAI(model="gpt-4.1-mini", use_previous_response_id=True) + + .. dropdown:: Reasoning output + + OpenAI's Responses API supports `reasoning models `_ + that expose a summary of internal reasoning processes. + + .. note:: + ``langchain-openai >= 0.3.26`` allows users to opt-in to an updated + AIMessage format when using the Responses API. Setting + + .. code-block:: python + + llm = ChatOpenAI(model="...", output_version="responses/v1") + + will format output from reasoning summaries, built-in tool invocations, and + other response items into the message's ``content`` field, rather than + ``additional_kwargs``. We recommend this format for new applications. + + .. code-block:: python + + from langchain_openai import ChatOpenAI + + reasoning = { + "effort": "medium", # 'low', 'medium', or 'high' + "summary": "auto", # 'detailed', 'auto', or None + } + + llm = ChatOpenAI( + model="o4-mini", reasoning=reasoning, output_version="responses/v1" + ) + response = llm.invoke("What is 3^3?") + + # Response text + print(f"Output: {response.text()}") + + # Reasoning summaries + for block in response.content: + if block["type"] == "reasoning": + for summary in block["summary"]: + print(summary["text"]) + + .. code-block:: none + + Output: 3³ = 27 + Reasoning: The user wants to know... + + .. dropdown:: Structured output + + .. code-block:: python + + from typing import Optional + + from pydantic import BaseModel, Field + + + class Joke(BaseModel): + '''Joke to tell user.''' + + setup: str = Field(description="The setup of the joke") + punchline: str = Field(description="The punchline to the joke") + rating: Optional[int] = Field( + description="How funny the joke is, from 1 to 10" + ) + + + structured_llm = llm.with_structured_output(Joke) + structured_llm.invoke("Tell me a joke about cats") + + .. code-block:: python + + Joke( + setup="Why was the cat sitting on the computer?", + punchline="To keep an eye on the mouse!", + rating=None, + ) + + See ``ChatOpenAI.with_structured_output()`` for more. + + .. dropdown:: JSON mode + + .. code-block:: python + + json_llm = llm.bind(response_format={"type": "json_object"}) + ai_msg = json_llm.invoke( + "Return a JSON object with key 'random_ints' and a value of 10 random ints in [0-99]" + ) + ai_msg.content + + .. code-block:: python + + '\\n{\\n "random_ints": [23, 87, 45, 12, 78, 34, 56, 90, 11, 67]\\n}' + + .. dropdown:: Image input + + .. code-block:: python + + import base64 + import httpx + from langchain_core.messages import HumanMessage + + image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" + image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8") + message = HumanMessage( + content=[ + {"type": "text", "text": "describe the weather in this image"}, + { + "type": "image_url", + "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}, + }, + ] + ) + ai_msg = llm.invoke([message]) + ai_msg.content + + .. code-block:: python + + "The weather in the image appears to be clear and pleasant. The sky is mostly blue with scattered, light clouds, suggesting a sunny day with minimal cloud cover. There is no indication of rain or strong winds, and the overall scene looks bright and calm. The lush green grass and clear visibility further indicate good weather conditions." + + .. dropdown:: Token usage + + .. code-block:: python + + ai_msg = llm.invoke(messages) + ai_msg.usage_metadata + + .. code-block:: python + + {"input_tokens": 28, "output_tokens": 5, "total_tokens": 33} + + When streaming, set the ``stream_usage`` kwarg: + + .. code-block:: python + + stream = llm.stream(messages, stream_usage=True) + full = next(stream) + for chunk in stream: + full += chunk + full.usage_metadata + + .. code-block:: python + + {"input_tokens": 28, "output_tokens": 5, "total_tokens": 33} + + Alternatively, setting ``stream_usage`` when instantiating the model can be + useful when incorporating ``ChatOpenAI`` into LCEL chains-- or when using + methods like ``.with_structured_output``, which generate chains under the + hood. + + .. code-block:: python + + llm = ChatOpenAI(model="gpt-4o", stream_usage=True) + structured_llm = llm.with_structured_output(...) + + .. dropdown:: Logprobs + + .. code-block:: python + + logprobs_llm = llm.bind(logprobs=True) + ai_msg = logprobs_llm.invoke(messages) + ai_msg.response_metadata["logprobs"] + + .. code-block:: python + + { + "content": [ + { + "token": "J", + "bytes": [74], + "logprob": -4.9617593e-06, + "top_logprobs": [], + }, + { + "token": "'adore", + "bytes": [39, 97, 100, 111, 114, 101], + "logprob": -0.25202933, + "top_logprobs": [], + }, + { + "token": " la", + "bytes": [32, 108, 97], + "logprob": -0.20141791, + "top_logprobs": [], + }, + { + "token": " programmation", + "bytes": [ + 32, + 112, + 114, + 111, + 103, + 114, + 97, + 109, + 109, + 97, + 116, + 105, + 111, + 110, + ], + "logprob": -1.9361265e-07, + "top_logprobs": [], + }, + { + "token": ".", + "bytes": [46], + "logprob": -1.2233183e-05, + "top_logprobs": [], + }, + ] + } + + .. dropdown:: Response metadata + + .. code-block:: python + + ai_msg = llm.invoke(messages) + ai_msg.response_metadata + + .. code-block:: python + + { + "token_usage": { + "completion_tokens": 5, + "prompt_tokens": 28, + "total_tokens": 33, + }, + "model_name": "gpt-4o", + "system_fingerprint": "fp_319be4768e", + "finish_reason": "stop", + "logprobs": None, + } + + .. dropdown:: Flex processing + + OpenAI offers a variety of + `service tiers `_. + The "flex" tier offers cheaper pricing for requests, with the trade-off that + responses may take longer and resources might not always be available. + This approach is best suited for non-critical tasks, including model testing, + data enhancement, or jobs that can be run asynchronously. + + To use it, initialize the model with ``service_tier="flex"``: + + .. code-block:: python + + from langchain_openai import ChatOpenAI + + llm = ChatOpenAI(model="o4-mini", service_tier="flex") + + Note that this is a beta feature that is only available for a subset of models. + See OpenAI `docs `_ + for more detail. + + """ # noqa: E501 + + max_tokens: Optional[int] = Field(default=None, alias="max_completion_tokens") + """Maximum number of tokens to generate.""" + + @property + def lc_secrets(self) -> dict[str, str]: + return {"openai_api_key": "OPENAI_API_KEY"} + + @classmethod + def get_lc_namespace(cls) -> list[str]: + """Get the namespace of the langchain object.""" + return ["langchain", "chat_models", "openai"] + + @property + def lc_attributes(self) -> dict[str, Any]: + attributes: dict[str, Any] = {} + + if self.openai_organization: + attributes["openai_organization"] = self.openai_organization + + if self.openai_api_base: + attributes["openai_api_base"] = self.openai_api_base + + if self.openai_proxy: + attributes["openai_proxy"] = self.openai_proxy + + return attributes + + @classmethod + def is_lc_serializable(cls) -> bool: + """Return whether this model can be serialized by Langchain.""" + return True + + @property + def _default_params(self) -> dict[str, Any]: + """Get the default parameters for calling OpenAI API.""" + params = super()._default_params + if "max_tokens" in params: + params["max_completion_tokens"] = params.pop("max_tokens") + + return params + + def _get_request_payload( + self, + input_: LanguageModelInput, + *, + stop: Optional[list[str]] = None, + **kwargs: Any, + ) -> dict: + payload = super()._get_request_payload(input_, stop=stop, **kwargs) + # max_tokens was deprecated in favor of max_completion_tokens + # in September 2024 release + if "max_tokens" in payload: + payload["max_completion_tokens"] = payload.pop("max_tokens") + + # Mutate system message role to "developer" for o-series models + if self.model_name and re.match(r"^o\d", self.model_name): + for message in payload.get("messages", []): + if message["role"] == "system": + message["role"] = "developer" + return payload + + def _stream(self, *args: Any, **kwargs: Any) -> Iterator[AIMessageChunkV1]: + """Route to Chat Completions or Responses API.""" + if self._use_responses_api({**kwargs, **self.model_kwargs}): + return super()._stream_responses(*args, **kwargs) + else: + return super()._stream(*args, **kwargs) + + async def _astream( + self, *args: Any, **kwargs: Any + ) -> AsyncIterator[AIMessageChunkV1]: + """Route to Chat Completions or Responses API.""" + if self._use_responses_api({**kwargs, **self.model_kwargs}): + async for chunk in super()._astream_responses(*args, **kwargs): + yield chunk + else: + async for chunk in super()._astream(*args, **kwargs): + yield chunk + + def with_structured_output( + self, + schema: Optional[_DictOrPydanticClass] = None, + *, + method: Literal["function_calling", "json_mode", "json_schema"] = "json_schema", + include_raw: bool = False, + strict: Optional[bool] = None, + **kwargs: Any, + ) -> Runnable[LanguageModelInput, _DictOrPydantic]: + """Model wrapper that returns outputs formatted to match the given schema. + + Args: + schema: + The output schema. Can be passed in as: + + - a JSON Schema, + - a TypedDict class, + - or a Pydantic class, + - an OpenAI function/tool schema. + + If ``schema`` is a Pydantic class then the model output will be a + Pydantic instance of that class, and the model-generated fields will be + validated by the Pydantic class. Otherwise the model output will be a + dict and will not be validated. See :meth:`langchain_core.utils.function_calling.convert_to_openai_tool` + for more on how to properly specify types and descriptions of + schema fields when specifying a Pydantic or TypedDict class. + + method: The method for steering model generation, one of: + + - "json_schema": + Uses OpenAI's Structured Output API: + https://platform.openai.com/docs/guides/structured-outputs + Supported for "gpt-4o-mini", "gpt-4o-2024-08-06", "o1", and later + models. + - "function_calling": + Uses OpenAI's tool-calling (formerly called function calling) + API: https://platform.openai.com/docs/guides/function-calling + - "json_mode": + Uses OpenAI's JSON mode. Note that if using JSON mode then you + must include instructions for formatting the output into the + desired schema into the model call: + https://platform.openai.com/docs/guides/structured-outputs/json-mode + + Learn more about the differences between the methods and which models + support which methods here: + + - https://platform.openai.com/docs/guides/structured-outputs/structured-outputs-vs-json-mode + - https://platform.openai.com/docs/guides/structured-outputs/function-calling-vs-response-format + + include_raw: + If False then only the parsed structured output is returned. If + an error occurs during model output parsing it will be raised. If True + then both the raw model response (an AIMessage) and the parsed model + response will be returned. If an error occurs during output parsing it + will be caught and returned as well. The final output is always a dict + with keys "raw", "parsed", and "parsing_error". + strict: + + - True: + Model output is guaranteed to exactly match the schema. + The input schema will also be validated according to + https://platform.openai.com/docs/guides/structured-outputs/supported-schemas + - False: + Input schema will not be validated and model output will not be + validated. + - None: + ``strict`` argument will not be passed to the model. + + If schema is specified via TypedDict or JSON schema, ``strict`` is not + enabled by default. Pass ``strict=True`` to enable it. + + Note: ``strict`` can only be non-null if ``method`` is + ``"json_schema"`` or ``"function_calling"``. + tools: + A list of tool-like objects to bind to the chat model. Requires that: + + - ``method`` is ``"json_schema"`` (default). + - ``strict=True`` + - ``include_raw=True`` + + If a model elects to call a + tool, the resulting ``AIMessage`` in ``"raw"`` will include tool calls. + + .. dropdown:: Example + + .. code-block:: python + + from langchain.chat_models import init_chat_model + from pydantic import BaseModel + + + class ResponseSchema(BaseModel): + response: str + + + def get_weather(location: str) -> str: + \"\"\"Get weather at a location.\"\"\" + pass + + llm = init_chat_model("openai:gpt-4o-mini") + + structured_llm = llm.with_structured_output( + ResponseSchema, + tools=[get_weather], + strict=True, + include_raw=True, + ) + + structured_llm.invoke("What's the weather in Boston?") + + .. code-block:: python + + { + "raw": AIMessage(content="", tool_calls=[...], ...), + "parsing_error": None, + "parsed": None, + } + + kwargs: Additional keyword args are passed through to the model. + + Returns: + A Runnable that takes same inputs as a :class:`from langchain_core.language_models.v1.chat_models import BaseChatModelV1`. + + | If ``include_raw`` is False and ``schema`` is a Pydantic class, Runnable outputs an instance of ``schema`` (i.e., a Pydantic object). Otherwise, if ``include_raw`` is False then Runnable outputs a dict. + + | If ``include_raw`` is True, then Runnable outputs a dict with keys: + + - "raw": AIMessage + - "parsed": None if there was a parsing error, otherwise the type depends on the ``schema`` as described above. + - "parsing_error": Optional[BaseException] + + .. versionchanged:: 0.1.20 + + Added support for TypedDict class ``schema``. + + .. versionchanged:: 0.1.21 + + Support for ``strict`` argument added. + Support for ``method="json_schema"`` added. + + .. versionchanged:: 0.3.0 + + ``method`` default changed from "function_calling" to "json_schema". + + .. versionchanged:: 0.3.12 + Support for ``tools`` added. + + .. versionchanged:: 0.3.21 + Pass ``kwargs`` through to the model. + + .. dropdown:: Example: schema=Pydantic class, method="json_schema", include_raw=False, strict=True + + Note, OpenAI has a number of restrictions on what types of schemas can be + provided if ``strict`` = True. When using Pydantic, our model cannot + specify any Field metadata (like min/max constraints) and fields cannot + have default values. + + See all constraints here: https://platform.openai.com/docs/guides/structured-outputs/supported-schemas + + .. code-block:: python + + from typing import Optional + + from langchain_openai import ChatOpenAI + from pydantic import BaseModel, Field + + + class AnswerWithJustification(BaseModel): + '''An answer to the user question along with justification for the answer.''' + + answer: str + justification: Optional[str] = Field( + default=..., description="A justification for the answer." + ) + + + llm = ChatOpenAI(model="gpt-4o", temperature=0) + structured_llm = llm.with_structured_output(AnswerWithJustification) + + structured_llm.invoke( + "What weighs more a pound of bricks or a pound of feathers" + ) + + # -> AnswerWithJustification( + # answer='They weigh the same', + # justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.' + # ) + + .. dropdown:: Example: schema=Pydantic class, method="function_calling", include_raw=False, strict=False + + .. code-block:: python + + from typing import Optional + + from langchain_openai import ChatOpenAI + from pydantic import BaseModel, Field + + + class AnswerWithJustification(BaseModel): + '''An answer to the user question along with justification for the answer.''' + + answer: str + justification: Optional[str] = Field( + default=..., description="A justification for the answer." + ) + + + llm = ChatOpenAI(model="gpt-4o", temperature=0) + structured_llm = llm.with_structured_output( + AnswerWithJustification, method="function_calling" + ) + + structured_llm.invoke( + "What weighs more a pound of bricks or a pound of feathers" + ) + + # -> AnswerWithJustification( + # answer='They weigh the same', + # justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.' + # ) + + .. dropdown:: Example: schema=Pydantic class, method="json_schema", include_raw=True + + .. code-block:: python + + from langchain_openai import ChatOpenAI + from pydantic import BaseModel + + + class AnswerWithJustification(BaseModel): + '''An answer to the user question along with justification for the answer.''' + + answer: str + justification: str + + + llm = ChatOpenAI(model="gpt-4o", temperature=0) + structured_llm = llm.with_structured_output( + AnswerWithJustification, include_raw=True + ) + + structured_llm.invoke( + "What weighs more a pound of bricks or a pound of feathers" + ) + # -> { + # 'raw': AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_Ao02pnFYXD6GN1yzc0uXPsvF', 'function': {'arguments': '{"answer":"They weigh the same.","justification":"Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ."}', 'name': 'AnswerWithJustification'}, 'type': 'function'}]}), + # 'parsed': AnswerWithJustification(answer='They weigh the same.', justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'), + # 'parsing_error': None + # } + + .. dropdown:: Example: schema=TypedDict class, method="json_schema", include_raw=False, strict=False + + .. code-block:: python + + # IMPORTANT: If you are using Python <=3.8, you need to import Annotated + # from typing_extensions, not from typing. + from typing_extensions import Annotated, TypedDict + + from langchain_openai import ChatOpenAI + + + class AnswerWithJustification(TypedDict): + '''An answer to the user question along with justification for the answer.''' + + answer: str + justification: Annotated[ + Optional[str], None, "A justification for the answer." + ] + + + llm = ChatOpenAI(model="gpt-4o", temperature=0) + structured_llm = llm.with_structured_output(AnswerWithJustification) + + structured_llm.invoke( + "What weighs more a pound of bricks or a pound of feathers" + ) + # -> { + # 'answer': 'They weigh the same', + # 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.' + # } + + .. dropdown:: Example: schema=OpenAI function schema, method="json_schema", include_raw=False + + .. code-block:: python + + from langchain_openai import ChatOpenAI + + oai_schema = { + 'name': 'AnswerWithJustification', + 'description': 'An answer to the user question along with justification for the answer.', + 'parameters': { + 'type': 'object', + 'properties': { + 'answer': {'type': 'string'}, + 'justification': {'description': 'A justification for the answer.', 'type': 'string'} + }, + 'required': ['answer'] + } + } + + llm = ChatOpenAI(model="gpt-4o", temperature=0) + structured_llm = llm.with_structured_output(oai_schema) + + structured_llm.invoke( + "What weighs more a pound of bricks or a pound of feathers" + ) + # -> { + # 'answer': 'They weigh the same', + # 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.' + # } + + .. dropdown:: Example: schema=Pydantic class, method="json_mode", include_raw=True + + .. code-block:: + + from langchain_openai import ChatOpenAI + from pydantic import BaseModel + + class AnswerWithJustification(BaseModel): + answer: str + justification: str + + llm = ChatOpenAI(model="gpt-4o", temperature=0) + structured_llm = llm.with_structured_output( + AnswerWithJustification, + method="json_mode", + include_raw=True + ) + + structured_llm.invoke( + "Answer the following question. " + "Make sure to return a JSON blob with keys 'answer' and 'justification'.\\n\\n" + "What's heavier a pound of bricks or a pound of feathers?" + ) + # -> { + # 'raw': AIMessage(content='{\\n "answer": "They are both the same weight.",\\n "justification": "Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight." \\n}'), + # 'parsed': AnswerWithJustification(answer='They are both the same weight.', justification='Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight.'), + # 'parsing_error': None + # } + + .. dropdown:: Example: schema=None, method="json_mode", include_raw=True + + .. code-block:: + + structured_llm = llm.with_structured_output(method="json_mode", include_raw=True) + + structured_llm.invoke( + "Answer the following question. " + "Make sure to return a JSON blob with keys 'answer' and 'justification'.\\n\\n" + "What's heavier a pound of bricks or a pound of feathers?" + ) + # -> { + # 'raw': AIMessage(content='{\\n "answer": "They are both the same weight.",\\n "justification": "Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight." \\n}'), + # 'parsed': { + # 'answer': 'They are both the same weight.', + # 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight.' + # }, + # 'parsing_error': None + # } + """ # noqa: E501 + return super().with_structured_output( + schema, method=method, include_raw=include_raw, strict=strict, **kwargs + ) + + +def _is_pydantic_class(obj: Any) -> bool: + return isinstance(obj, type) and is_basemodel_subclass(obj) + + +def _lc_tool_call_to_openai_tool_call(tool_call: ToolCall) -> dict: + return { + "type": "function", + "id": tool_call["id"], + "function": { + "name": tool_call["name"], + "arguments": json.dumps(tool_call["args"], ensure_ascii=False), + }, + } + + +def _lc_invalid_tool_call_to_openai_tool_call( + invalid_tool_call: InvalidToolCall, +) -> dict: + return { + "type": "function", + "id": invalid_tool_call["id"], + "function": { + "name": invalid_tool_call["name"], + "arguments": invalid_tool_call["args"], + }, + } + + +def _url_to_size(image_source: str) -> Optional[tuple[int, int]]: + try: + from PIL import Image # type: ignore[import] + except ImportError: + logger.info( + "Unable to count image tokens. To count image tokens please install " + "`pip install -U pillow httpx`." + ) + return None + if _is_url(image_source): + try: + import httpx + except ImportError: + logger.info( + "Unable to count image tokens. To count image tokens please install " + "`pip install -U httpx`." + ) + return None + response = httpx.get(image_source) + response.raise_for_status() + width, height = Image.open(BytesIO(response.content)).size + return width, height + elif _is_b64(image_source): + _, encoded = image_source.split(",", 1) + data = base64.b64decode(encoded) + width, height = Image.open(BytesIO(data)).size + return width, height + else: + return None + + +def _count_image_tokens(width: int, height: int) -> int: + # Reference: https://platform.openai.com/docs/guides/vision/calculating-costs + width, height = _resize(width, height) + h = ceil(height / 512) + w = ceil(width / 512) + return (170 * h * w) + 85 + + +def _is_url(s: str) -> bool: + try: + result = urlparse(s) + return all([result.scheme, result.netloc]) + except Exception as e: + logger.debug(f"Unable to parse URL: {e}") + return False + + +def _is_b64(s: str) -> bool: + return s.startswith("data:image") + + +def _resize(width: int, height: int) -> tuple[int, int]: + # larger side must be <= 2048 + if width > 2048 or height > 2048: + if width > height: + height = (height * 2048) // width + width = 2048 + else: + width = (width * 2048) // height + height = 2048 + # smaller side must be <= 768 + if width > 768 and height > 768: + if width > height: + width = (width * 768) // height + height = 768 + else: + height = (width * 768) // height + width = 768 + return width, height + + +def _convert_to_openai_response_format( + schema: Union[dict[str, Any], type], *, strict: Optional[bool] = None +) -> Union[dict, TypeBaseModel]: + if isinstance(schema, type) and is_basemodel_subclass(schema): + return schema + + if ( + isinstance(schema, dict) + and "json_schema" in schema + and schema.get("type") == "json_schema" + ): + response_format = schema + elif isinstance(schema, dict) and "name" in schema and "schema" in schema: + response_format = {"type": "json_schema", "json_schema": schema} + else: + if strict is None: + if isinstance(schema, dict) and isinstance(schema.get("strict"), bool): + strict = schema["strict"] + else: + strict = False + function = convert_to_openai_function(schema, strict=strict) + function["schema"] = function.pop("parameters") + response_format = {"type": "json_schema", "json_schema": function} + + if ( + strict is not None + and strict is not response_format["json_schema"].get("strict") + and isinstance(schema, dict) + ): + msg = ( + f"Output schema already has 'strict' value set to " + f"{schema['json_schema']['strict']} but 'strict' also passed in to " + f"with_structured_output as {strict}. Please make sure that " + f"'strict' is only specified in one place." + ) + raise ValueError(msg) + return response_format + + +def _oai_structured_outputs_parser( + ai_msg: AIMessageV1, schema: type[_BM] +) -> Optional[PydanticBaseModel]: + if parsed := ai_msg.parsed: + if isinstance(parsed, dict): + return schema(**parsed) + else: + return parsed + elif any( + block["type"] == "non_standard" and block["value"].get("type") == "refusal" + for block in ai_msg.content + ): + refusal = next( + block["value"]["text"] + for block in ai_msg.content + if block["type"] == "non_standard" + and block["value"].get("type") == "refusal" + ) + raise OpenAIRefusalError(refusal) + elif ai_msg.tool_calls: + return None + else: + raise ValueError( + "Structured Output response does not have a 'parsed' field nor a 'refusal' " + f"field. Received message:\n\n{ai_msg}" + ) + + +class OpenAIRefusalError(Exception): + """Error raised when OpenAI Structured Outputs API returns a refusal. + + When using OpenAI's Structured Outputs API with user-generated input, the model + may occasionally refuse to fulfill the request for safety reasons. + + See here for more on refusals: + https://platform.openai.com/docs/guides/structured-outputs/refusals + + .. versionadded:: 0.1.21 + """ + + +def _create_usage_metadata(oai_token_usage: dict) -> UsageMetadata: + input_tokens = oai_token_usage.get("prompt_tokens") or 0 + output_tokens = oai_token_usage.get("completion_tokens") or 0 + total_tokens = oai_token_usage.get("total_tokens") or input_tokens + output_tokens + input_token_details: dict = { + "audio": (oai_token_usage.get("prompt_tokens_details") or {}).get( + "audio_tokens" + ), + "cache_read": (oai_token_usage.get("prompt_tokens_details") or {}).get( + "cached_tokens" + ), + } + output_token_details: dict = { + "audio": (oai_token_usage.get("completion_tokens_details") or {}).get( + "audio_tokens" + ), + "reasoning": (oai_token_usage.get("completion_tokens_details") or {}).get( + "reasoning_tokens" + ), + } + return UsageMetadata( + input_tokens=input_tokens, + output_tokens=output_tokens, + total_tokens=total_tokens, + input_token_details=InputTokenDetails( + **{k: v for k, v in input_token_details.items() if v is not None} + ), + output_token_details=OutputTokenDetails( + **{k: v for k, v in output_token_details.items() if v is not None} + ), + ) + + +def _create_usage_metadata_responses(oai_token_usage: dict) -> UsageMetadata: + input_tokens = oai_token_usage.get("input_tokens", 0) + output_tokens = oai_token_usage.get("output_tokens", 0) + total_tokens = oai_token_usage.get("total_tokens", input_tokens + output_tokens) + output_token_details: dict = { + "reasoning": (oai_token_usage.get("output_tokens_details") or {}).get( + "reasoning_tokens" + ) + } + input_token_details: dict = { + "cache_read": (oai_token_usage.get("input_tokens_details") or {}).get( + "cached_tokens" + ) + } + return UsageMetadata( + input_tokens=input_tokens, + output_tokens=output_tokens, + total_tokens=total_tokens, + input_token_details=InputTokenDetails( + **{k: v for k, v in input_token_details.items() if v is not None} + ), + output_token_details=OutputTokenDetails( + **{k: v for k, v in output_token_details.items() if v is not None} + ), + ) + + +def _is_builtin_tool(tool: dict) -> bool: + return "type" in tool and tool["type"] != "function" + + +def _use_responses_api(payload: dict) -> bool: + uses_builtin_tools = "tools" in payload and any( + _is_builtin_tool(tool) for tool in payload["tools"] + ) + responses_only_args = { + "include", + "previous_response_id", + "reasoning", + "text", + "truncation", + } + return bool(uses_builtin_tools or responses_only_args.intersection(payload)) + + +def _get_last_messages( + messages: Sequence[MessageV1], +) -> tuple[Sequence[MessageV1], Optional[str]]: + """ + Return + 1. Every message after the most-recent AIMessage that has a non-empty + ``id`` (may be an empty list), + 2. That id. + + If the most-recent AIMessage does not have an id (or there is no + AIMessage at all) the entire conversation is returned together with ``None``. + """ + for i in range(len(messages) - 1, -1, -1): + msg = messages[i] + if isinstance(msg, AIMessageV1): + response_id = msg.id + if response_id and response_id.startswith("resp_"): + return messages[i + 1 :], response_id + else: + return messages, None + + return messages, None + + +def _construct_responses_api_payload( + messages: Sequence[MessageV1], payload: dict +) -> dict: + # Rename legacy parameters + for legacy_token_param in ["max_tokens", "max_completion_tokens"]: + if legacy_token_param in payload: + payload["max_output_tokens"] = payload.pop(legacy_token_param) + if "reasoning_effort" in payload and "reasoning" not in payload: + payload["reasoning"] = {"effort": payload.pop("reasoning_effort")} + + payload["input"] = _construct_responses_api_input(messages) + if tools := payload.pop("tools", None): + new_tools: list = [] + for tool in tools: + # chat api: {"type": "function", "function": {"name": "...", "description": "...", "parameters": {...}, "strict": ...}} # noqa: E501 + # responses api: {"type": "function", "name": "...", "description": "...", "parameters": {...}, "strict": ...} # noqa: E501 + if tool["type"] == "function" and "function" in tool: + new_tools.append({"type": "function", **tool["function"]}) + else: + if tool["type"] == "image_generation": + # Handle partial images (not yet supported) + if "partial_images" in tool: + raise NotImplementedError( + "Partial image generation is not yet supported " + "via the LangChain ChatOpenAI client. Please " + "drop the 'partial_images' key from the image_generation " + "tool." + ) + elif payload.get("stream") and "partial_images" not in tool: + # OpenAI requires this parameter be set; we ignore it during + # streaming. + tool["partial_images"] = 1 + else: + pass + + new_tools.append(tool) + + payload["tools"] = new_tools + if tool_choice := payload.pop("tool_choice", None): + # chat api: {"type": "function", "function": {"name": "..."}} + # responses api: {"type": "function", "name": "..."} + if ( + isinstance(tool_choice, dict) + and tool_choice["type"] == "function" + and "function" in tool_choice + ): + payload["tool_choice"] = {"type": "function", **tool_choice["function"]} + else: + payload["tool_choice"] = tool_choice + + # Structured output + if schema := payload.pop("response_format", None): + if payload.get("text"): + text = payload["text"] + raise ValueError( + "Can specify at most one of 'response_format' or 'text', received both:" + f"\n{schema=}\n{text=}" + ) + + # For pydantic + non-streaming case, we use responses.parse. + # Otherwise, we use responses.create. + strict = payload.pop("strict", None) + if not payload.get("stream") and _is_pydantic_class(schema): + payload["text_format"] = schema + else: + if _is_pydantic_class(schema): + schema_dict = schema.model_json_schema() + strict = True + else: + schema_dict = schema + if schema_dict == {"type": "json_object"}: # JSON mode + payload["text"] = {"format": {"type": "json_object"}} + elif ( + ( + response_format := _convert_to_openai_response_format( + schema_dict, strict=strict + ) + ) + and (isinstance(response_format, dict)) + and (response_format["type"] == "json_schema") + ): + payload["text"] = { + "format": {"type": "json_schema", **response_format["json_schema"]} + } + else: + pass + return payload + + +def _make_computer_call_output_from_message(message: ToolMessageV1) -> Optional[dict]: + computer_call_output = None + for block in message.content: + if ( + block["type"] == "non_standard" + and block["value"].get("type") == "computer_call_output" + ): + computer_call_output = block["value"] + break + + return computer_call_output + + +def _pop_index_and_sub_index(block: dict) -> dict: + """When streaming, langchain-core uses the ``index`` key to aggregate + text blocks. OpenAI API does not support this key, so we need to remove it. + """ + new_block = {k: v for k, v in block.items() if k != "index"} + if "summary" in new_block and isinstance(new_block["summary"], list): + new_summary = [] + for sub_block in new_block["summary"]: + new_sub_block = {k: v for k, v in sub_block.items() if k != "index"} + new_summary.append(new_sub_block) + new_block["summary"] = new_summary + return new_block + + +def _construct_responses_api_input(messages: Sequence[MessageV1]) -> list: + """Construct the input for the OpenAI Responses API.""" + input_ = [] + for lc_msg in messages: + msg = _convert_message_to_dict(lc_msg, responses_api=True) + if isinstance(lc_msg, AIMessageV1): + msg["content"] = _convert_from_v1_to_responses( + msg["content"], lc_msg.tool_calls + ) + else: + # Get content from non-standard content blocks + for i, block in enumerate(msg["content"]): + if block.get("type") == "non_standard": + msg["content"][i] = block["value"] + + # "name" parameter unsupported + if "name" in msg: + msg.pop("name") + if msg["role"] == "tool": + tool_output = msg["content"] + computer_call_output = _make_computer_call_output_from_message( + cast(ToolMessageV1, lc_msg) + ) + if computer_call_output: + input_.append(computer_call_output) + else: + if not isinstance(tool_output, str): + tool_output = _stringify(tool_output) + function_call_output = { + "type": "function_call_output", + "output": tool_output, + "call_id": msg["tool_call_id"], + } + input_.append(function_call_output) + elif msg["role"] == "assistant": + if isinstance(msg.get("content"), list): + for block in msg["content"]: + if isinstance(block, dict) and (block_type := block.get("type")): + # Aggregate content blocks for a single message + if block_type in ("text", "output_text", "refusal"): + msg_id = block.get("id") + if block_type in ("text", "output_text"): + new_block = { + "type": "output_text", + "text": block["text"], + "annotations": block.get("annotations") or [], + } + elif block_type == "refusal": + new_block = { + "type": "refusal", + "refusal": block["refusal"], + } + for item in input_: + if (item_id := item.get("id")) and item_id == msg_id: + # If existing block with this ID, append to it + if "content" not in item: + item["content"] = [] + item["content"].append(new_block) + break + else: + # If no block with this ID, create a new one + input_.append( + { + "type": "message", + "content": [new_block], + "role": "assistant", + "id": msg_id, + } + ) + elif block_type in ( + "reasoning", + "web_search_call", + "file_search_call", + "function_call", + "computer_call", + "code_interpreter_call", + "mcp_call", + "mcp_list_tools", + "mcp_approval_request", + ): + input_.append(_pop_index_and_sub_index(block)) + elif block_type == "image_generation_call": + # A previous image generation call can be referenced by ID + input_.append( + {"type": "image_generation_call", "id": block["id"]} + ) + else: + pass + elif isinstance(msg.get("content"), str): + input_.append( + { + "type": "message", + "role": "assistant", + "content": [{"type": "output_text", "text": msg["content"]}], + } + ) + + # Add function calls from tool calls if not already present + if tool_calls := msg.pop("tool_calls", None): + content_call_ids = { + block["call_id"] + for block in input_ + if block.get("type") == "function_call" and "call_id" in block + } + for tool_call in tool_calls: + if tool_call["id"] not in content_call_ids: + function_call = { + "type": "function_call", + "name": tool_call["function"]["name"], + "arguments": tool_call["function"]["arguments"], + "call_id": tool_call["id"], + } + input_.append(function_call) + + elif msg["role"] in ("user", "system", "developer"): + if isinstance(msg["content"], list): + new_blocks = [] + non_message_item_types = ("mcp_approval_response",) + for block in msg["content"]: + # chat api: {"type": "text", "text": "..."} + # responses api: {"type": "input_text", "text": "..."} + if block["type"] == "text": + new_blocks.append({"type": "input_text", "text": block["text"]}) + # chat api: {"type": "image_url", "image_url": {"url": "...", "detail": "..."}} # noqa: E501 + # responses api: {"type": "image_url", "image_url": "...", "detail": "...", "file_id": "..."} # noqa: E501 + elif block["type"] == "image_url": + new_block = { + "type": "input_image", + "image_url": block["image_url"]["url"], + } + if block["image_url"].get("detail"): + new_block["detail"] = block["image_url"]["detail"] + new_blocks.append(new_block) + elif block["type"] == "file": + new_block = {"type": "input_file", **block["file"]} + new_blocks.append(new_block) + elif block["type"] in ("input_text", "input_image", "input_file"): + new_blocks.append(block) + elif block["type"] in non_message_item_types: + input_.append(block) + else: + pass + if len(new_blocks) == 1 and new_blocks[0]["type"] == "input_text": + msg["content"] = new_blocks[0]["text"] + else: + msg["content"] = new_blocks + if msg["content"]: + input_.append(msg) + else: + input_.append(msg) + else: + input_.append(msg) + + return input_ + + +def _construct_lc_result_from_responses_api( + response: Response, + schema: Optional[type[_BM]] = None, + metadata: Optional[dict] = None, +) -> AIMessageV1: + """Construct ChatResponse from OpenAI Response API response.""" + if response.error: + raise ValueError(response.error) + + response_metadata = { + k: v + for k, v in response.model_dump(exclude_none=True, mode="json").items() + if k + in ( + "created_at", + # backwards compatibility: keep response ID in response_metadata as well as + # top-level-id + "id", + "incomplete_details", + "metadata", + "object", + "status", + "user", + "model", + "service_tier", + ) + } + if metadata: + response_metadata.update(metadata) + # for compatibility with chat completion calls. + response_metadata["model_provider"] = "openai" + response_metadata["model_name"] = response_metadata.get("model") + if response.usage: + usage_metadata = _create_usage_metadata_responses(response.usage.model_dump()) + else: + usage_metadata = None + + content_blocks: list = [] + tool_calls: list[ToolCall] = [] + invalid_tool_calls: list[InvalidToolCall] = [] + parsed = None + for output in response.output: + if output.type == "message": + for content in output.content: + if content.type == "output_text": + block = { + "type": "text", + "text": content.text, + "annotations": [ + annotation.model_dump() + for annotation in content.annotations + ], + "id": output.id, + } + content_blocks.append(block) + if hasattr(content, "parsed"): + parsed = content.parsed + if content.type == "refusal": + content_blocks.append( + {"type": "refusal", "refusal": content.refusal, "id": output.id} + ) + elif output.type == "function_call": + content_blocks.append(output.model_dump(exclude_none=True, mode="json")) + try: + args = json.loads(output.arguments, strict=False) + error = None + except JSONDecodeError as e: + args = output.arguments + error = str(e) + if error is None: + tool_call = { + "type": "tool_call", + "name": output.name, + "args": args, + "id": output.call_id, + } + tool_calls.append(cast(ToolCall, tool_call)) + else: + tool_call = { + "type": "invalid_tool_call", + "name": output.name, + "args": args, + "id": output.call_id, + "error": error, + } + invalid_tool_calls.append(cast(InvalidToolCall, tool_call)) + elif output.type in ( + "reasoning", + "web_search_call", + "file_search_call", + "computer_call", + "code_interpreter_call", + "mcp_call", + "mcp_list_tools", + "mcp_approval_request", + "image_generation_call", + ): + content_blocks.append(output.model_dump(exclude_none=True, mode="json")) + + # Workaround for parsing structured output in the streaming case. + # from openai import OpenAI + # from pydantic import BaseModel + + # class Foo(BaseModel): + # response: str + + # client = OpenAI() + + # client.responses.parse( + # model="gpt-4o-mini", + # input=[{"content": "how are ya", "role": "user"}], + # text_format=Foo, + # stream=True, # <-- errors + # ) + if ( + schema is not None + and not parsed + and response.output_text # tool calls can generate empty output text + and response.text + and (text_config := response.text.model_dump()) + and (format_ := text_config.get("format", {})) + and (format_.get("type") == "json_schema") + ): + try: + parsed_dict = json.loads(response.output_text) + if schema and _is_pydantic_class(schema): + parsed = schema(**parsed_dict) + else: + parsed = parsed_dict + except json.JSONDecodeError: + pass + + content_v1 = _convert_to_v1_from_responses(content_blocks) + message = AIMessageV1( + content=content_v1, + id=response.id, + usage_metadata=usage_metadata, + response_metadata=cast(ResponseMetadata, response_metadata), + tool_calls=tool_calls, + invalid_tool_calls=invalid_tool_calls, + parsed=parsed, + ) + if response.tools and any( + tool.type == "image_generation" for tool in response.tools + ): + # Get mime_time from tool definition and add to image generations + # if missing (primarily for tracing purposes). + image_generation_call = next( + tool for tool in response.tools if tool.type == "image_generation" + ) + if image_generation_call.output_format: + mime_type = f"image/{image_generation_call.output_format}" + for content_block in message.content: + # OK to mutate output message + if ( + isinstance(content_block, dict) + and content_block.get("type") == "image" + and "base64" in content_block + and "mime_type" not in block + ): + block["mime_type"] = mime_type + + return message + + +def _convert_responses_chunk_to_generation_chunk( + chunk: Any, + current_index: int, # index in content + current_output_index: int, # index in Response output + current_sub_index: int, # index of content block in output item + schema: Optional[type[_BM]] = None, + metadata: Optional[dict] = None, +) -> tuple[int, int, int, Optional[AIMessageChunkV1]]: + def _advance(output_idx: int, sub_idx: Optional[int] = None) -> None: + """Advance indexes tracked during streaming. + + Example: we stream a response item of the form: + + .. code-block:: python + + { + "type": "message", # output_index 0 + "role": "assistant", + "id": "msg_123", + "content": [ + {"type": "output_text", "text": "foo"}, # sub_index 0 + {"type": "output_text", "text": "bar"}, # sub_index 1 + ], + } + + This is a single item with a shared ``output_index`` and two sub-indexes, one + for each content block. + + This will be processed into an AIMessage with two text blocks: + + .. code-block:: python + + AIMessage( + [ + {"type": "text", "text": "foo", "id": "msg_123"}, # index 0 + {"type": "text", "text": "bar", "id": "msg_123"}, # index 1 + ] + ) + + This function just identifies updates in output or sub-indexes and increments + the current index accordingly. + """ + nonlocal current_index, current_output_index, current_sub_index + if sub_idx is None: + if current_output_index != output_idx: + current_index += 1 + else: + if (current_output_index != output_idx) or (current_sub_index != sub_idx): + current_index += 1 + current_sub_index = sub_idx + current_output_index = output_idx + + content = [] + tool_call_chunks: list = [] + parsed = None + if metadata: + response_metadata = cast(ResponseMetadata, metadata) + else: + response_metadata = {} + usage_metadata = None + id = None + if chunk.type == "response.output_text.delta": + _advance(chunk.output_index, chunk.content_index) + content.append({"type": "text", "text": chunk.delta, "index": current_index}) + elif chunk.type == "response.output_text.annotation.added": + _advance(chunk.output_index, chunk.content_index) + if isinstance(chunk.annotation, dict): + # Appears to be a breaking change in openai==1.82.0 + annotation = chunk.annotation + else: + annotation = chunk.annotation.model_dump(exclude_none=True, mode="json") + content.append( + { + "type": "text", + "text": "", + "annotations": [annotation], + "index": current_index, + } + ) + elif chunk.type == "response.output_text.done": + content.append( + {"type": "text", "text": "", "id": chunk.item_id, "index": current_index} + ) + elif chunk.type == "response.created": + id = chunk.response.id + response_metadata["id"] = chunk.response.id # Backwards compatibility + elif chunk.type == "response.completed": + msg = _construct_lc_result_from_responses_api(chunk.response, schema=schema) + if msg.parsed: + parsed = msg.parsed + usage_metadata = msg.usage_metadata + response_metadata = { + **response_metadata, + **{k: v for k, v in msg.response_metadata.items() if k != "id"}, # type: ignore[typeddict-item] + } + elif chunk.type == "response.output_item.added" and chunk.item.type == "message": + pass + elif ( + chunk.type == "response.output_item.added" + and chunk.item.type == "function_call" + ): + _advance(chunk.output_index) + tool_call_chunks.append( + { + "type": "tool_call_chunk", + "name": chunk.item.name, + "args": chunk.item.arguments, + "id": chunk.item.call_id, + "index": current_index, + } + ) + content.append( + { + "type": "function_call", + "name": chunk.item.name, + "arguments": chunk.item.arguments, + "call_id": chunk.item.call_id, + "id": chunk.item.id, + "index": current_index, + } + ) + elif chunk.type == "response.output_item.done" and chunk.item.type in ( + "web_search_call", + "file_search_call", + "computer_call", + "code_interpreter_call", + "mcp_call", + "mcp_list_tools", + "mcp_approval_request", + "image_generation_call", + ): + _advance(chunk.output_index) + tool_output = chunk.item.model_dump(exclude_none=True, mode="json") + tool_output["index"] = current_index + content.append(tool_output) + elif chunk.type == "response.function_call_arguments.delta": + _advance(chunk.output_index) + tool_call_chunks.append( + {"type": "tool_call_chunk", "args": chunk.delta, "index": current_index} + ) + content.append( + {"type": "function_call", "arguments": chunk.delta, "index": current_index} + ) + elif chunk.type == "response.refusal.done": + content.append({"type": "refusal", "refusal": chunk.refusal}) + elif chunk.type == "response.output_item.added" and chunk.item.type == "reasoning": + _advance(chunk.output_index) + current_sub_index = 0 + reasoning = chunk.item.model_dump(exclude_none=True, mode="json") + reasoning["index"] = current_index + content.append(reasoning) + elif chunk.type == "response.reasoning_summary_part.added": + block: dict = {"type": "reasoning", "reasoning": ""} + if chunk.summary_index > 0: + _advance(chunk.output_index, chunk.summary_index) + block["id"] = chunk.item_id + block["index"] = current_index + content.append(block) + elif chunk.type == "response.image_generation_call.partial_image": + # Partial images are not supported yet. + pass + elif chunk.type == "response.reasoning_summary_text.delta": + _advance(chunk.output_index) + content.append( + { + "summary": [ + { + "index": chunk.summary_index, + "type": "summary_text", + "text": chunk.delta, + } + ], + "index": current_index, + "type": "reasoning", + } + ) + else: + return current_index, current_output_index, current_sub_index, None + + content_v1 = _convert_to_v1_from_responses(content) + for content_block in content_v1: + if ( + isinstance(content_block, dict) + and content_block.get("index", -1) > current_index + ): + # blocks were added for v1 + current_index = content_block["index"] + + message = AIMessageChunkV1( + content=content_v1, + tool_call_chunks=tool_call_chunks, + usage_metadata=usage_metadata, + response_metadata=response_metadata, + parsed=parsed, + id=id, + ) + + return (current_index, current_output_index, current_sub_index, message) diff --git a/libs/partners/openai/pyproject.toml b/libs/partners/openai/pyproject.toml index 5bffdabcf44..a54595796de 100644 --- a/libs/partners/openai/pyproject.toml +++ b/libs/partners/openai/pyproject.toml @@ -56,6 +56,8 @@ langchain-tests = { path = "../../standard-tests", editable = true } [tool.mypy] disallow_untyped_defs = "True" +disable_error_code = ["typeddict-unknown-key"] + [[tool.mypy.overrides]] module = "transformers" ignore_missing_imports = true diff --git a/libs/partners/openai/tests/cassettes/test_web_search.yaml.gz b/libs/partners/openai/tests/cassettes/test_web_search.yaml.gz index e99f1c2e13a..a202dfe9c61 100644 Binary files a/libs/partners/openai/tests/cassettes/test_web_search.yaml.gz and b/libs/partners/openai/tests/cassettes/test_web_search.yaml.gz differ diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py index 527eece1241..ee3b47f7ed5 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py @@ -14,16 +14,24 @@ from langchain_core.messages import ( HumanMessage, MessageLikeRepresentation, ) +from langchain_core.messages.v1 import AIMessage as AIMessageV1 +from langchain_core.messages.v1 import AIMessageChunk as AIMessageChunkV1 +from langchain_core.messages.v1 import HumanMessage as HumanMessageV1 from pydantic import BaseModel from typing_extensions import TypedDict -from langchain_openai import ChatOpenAI +from langchain_openai import ChatOpenAI, ChatOpenAIV1 MODEL_NAME = "gpt-4o-mini" -def _check_response(response: Optional[BaseMessage]) -> None: - assert isinstance(response, AIMessage) +def _check_response(response: Optional[BaseMessage], output_version) -> None: + if output_version == "v1": + assert isinstance(response, AIMessageV1) or isinstance( + response, AIMessageChunkV1 + ) + else: + assert isinstance(response, AIMessage) assert isinstance(response.content, list) for block in response.content: assert isinstance(block, dict) @@ -41,7 +49,10 @@ def _check_response(response: Optional[BaseMessage]) -> None: for key in ["end_index", "start_index", "title", "type", "url"] ) - text_content = response.text() + if output_version == "v1": + text_content = response.text + else: + text_content = response.text() assert isinstance(text_content, str) assert text_content assert response.usage_metadata @@ -56,22 +67,34 @@ def _check_response(response: Optional[BaseMessage]) -> None: @pytest.mark.vcr @pytest.mark.parametrize("output_version", ["responses/v1", "v1"]) def test_web_search(output_version: Literal["responses/v1", "v1"]) -> None: - llm = ChatOpenAI(model=MODEL_NAME, output_version=output_version) + if output_version == "v1": + llm = ChatOpenAIV1(model=MODEL_NAME) + else: + llm = ChatOpenAI(model=MODEL_NAME, output_version=output_version) first_response = llm.invoke( "What was a positive news story from today?", tools=[{"type": "web_search_preview"}], ) - _check_response(first_response) + _check_response(first_response, output_version) # Test streaming - full: Optional[BaseMessageChunk] = None - for chunk in llm.stream( - "What was a positive news story from today?", - tools=[{"type": "web_search_preview"}], - ): - assert isinstance(chunk, AIMessageChunk) - full = chunk if full is None else full + chunk - _check_response(full) + if isinstance(llm, ChatOpenAIV1): + full: Optional[AIMessageChunkV1] = None + for chunk in llm.stream( + "What was a positive news story from today?", + tools=[{"type": "web_search_preview"}], + ): + assert isinstance(chunk, AIMessageChunkV1) + full = chunk if full is None else full + chunk + else: + full: Optional[BaseMessageChunk] = None + for chunk in llm.stream( + "What was a positive news story from today?", + tools=[{"type": "web_search_preview"}], + ): + assert isinstance(chunk, AIMessageChunk) + full = chunk if full is None else full + chunk + _check_response(full, output_version) # Use OpenAI's stateful API response = llm.invoke( @@ -79,38 +102,26 @@ def test_web_search(output_version: Literal["responses/v1", "v1"]) -> None: tools=[{"type": "web_search_preview"}], previous_response_id=first_response.response_metadata["id"], ) - _check_response(response) + _check_response(response, output_version) # Manually pass in chat history response = llm.invoke( [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "What was a positive news story from today?", - } - ], - }, + {"role": "user", "content": "What was a positive news story from today?"}, first_response, - { - "role": "user", - "content": [{"type": "text", "text": "what about a negative one"}], - }, + {"role": "user", "content": "what about a negative one"}, ], tools=[{"type": "web_search_preview"}], ) - _check_response(response) + _check_response(response, output_version) # Bind tool response = llm.bind_tools([{"type": "web_search_preview"}]).invoke( "What was a positive news story from today?" ) - _check_response(response) + _check_response(response, output_version) for msg in [first_response, full, response]: - assert isinstance(msg, AIMessage) block_types = [block["type"] for block in msg.content] # type: ignore[index] if output_version == "responses/v1": assert block_types == ["web_search_call", "text"] @@ -125,7 +136,7 @@ async def test_web_search_async() -> None: "What was a positive news story from today?", tools=[{"type": "web_search_preview"}], ) - _check_response(response) + _check_response(response, "v0") assert response.response_metadata["status"] # Test streaming @@ -137,7 +148,7 @@ async def test_web_search_async() -> None: assert isinstance(chunk, AIMessageChunk) full = chunk if full is None else full + chunk assert isinstance(full, AIMessageChunk) - _check_response(full) + _check_response(full, "v0") for msg in [response, full]: assert msg.additional_kwargs["tool_outputs"] @@ -148,8 +159,8 @@ async def test_web_search_async() -> None: @pytest.mark.default_cassette("test_function_calling.yaml.gz") @pytest.mark.vcr -@pytest.mark.parametrize("output_version", ["v0", "responses/v1", "v1"]) -def test_function_calling(output_version: Literal["v0", "responses/v1", "v1"]) -> None: +@pytest.mark.parametrize("output_version", ["v0", "responses/v1"]) +def test_function_calling(output_version: Literal["v0", "responses/v1"]) -> None: def multiply(x: int, y: int) -> int: """return x * y""" return x * y @@ -170,7 +181,33 @@ def test_function_calling(output_version: Literal["v0", "responses/v1", "v1"]) - assert set(full.tool_calls[0]["args"]) == {"x", "y"} response = bound_llm.invoke("What was a positive news story from today?") - _check_response(response) + _check_response(response, output_version) + + +@pytest.mark.default_cassette("test_function_calling.yaml.gz") +@pytest.mark.vcr +def test_function_calling_v1() -> None: + def multiply(x: int, y: int) -> int: + """return x * y""" + return x * y + + llm = ChatOpenAIV1(model=MODEL_NAME) + bound_llm = llm.bind_tools([multiply, {"type": "web_search_preview"}]) + ai_msg = bound_llm.invoke("whats 5 * 4") + assert len(ai_msg.tool_calls) == 1 + assert ai_msg.tool_calls[0]["name"] == "multiply" + assert set(ai_msg.tool_calls[0]["args"]) == {"x", "y"} + + full: Any = None + for chunk in bound_llm.stream("whats 5 * 4"): + assert isinstance(chunk, AIMessageChunkV1) + full = chunk if full is None else full + chunk + assert len(full.tool_calls) == 1 + assert full.tool_calls[0]["name"] == "multiply" + assert set(full.tool_calls[0]["args"]) == {"x", "y"} + + response = bound_llm.invoke("What was a positive news story from today?") + _check_response(response, "v1") class Foo(BaseModel): @@ -183,10 +220,8 @@ class FooDict(TypedDict): @pytest.mark.default_cassette("test_parsed_pydantic_schema.yaml.gz") @pytest.mark.vcr -@pytest.mark.parametrize("output_version", ["v0", "responses/v1", "v1"]) -def test_parsed_pydantic_schema( - output_version: Literal["v0", "responses/v1", "v1"], -) -> None: +@pytest.mark.parametrize("output_version", ["v0", "responses/v1"]) +def test_parsed_pydantic_schema(output_version: Literal["v0", "responses/v1"]) -> None: llm = ChatOpenAI( model=MODEL_NAME, use_responses_api=True, output_version=output_version ) @@ -206,6 +241,28 @@ def test_parsed_pydantic_schema( assert parsed.response +@pytest.mark.default_cassette("test_parsed_pydantic_schema.yaml.gz") +@pytest.mark.vcr +def test_parsed_pydantic_schema_v1() -> None: + llm = ChatOpenAIV1(model=MODEL_NAME, use_responses_api=True) + response = llm.invoke("how are ya", response_format=Foo) + parsed = Foo(**json.loads(response.text)) + assert parsed == response.parsed + assert parsed.response + + # Test stream + full: Optional[AIMessageChunkV1] = None + chunks = [] + for chunk in llm.stream("how are ya", response_format=Foo): + assert isinstance(chunk, AIMessageChunkV1) + full = chunk if full is None else full + chunk + chunks.append(chunk) + assert isinstance(full, AIMessageChunkV1) + parsed = Foo(**json.loads(full.text)) + assert parsed == full.parsed + assert parsed.response + + async def test_parsed_pydantic_schema_async() -> None: llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True) response = await llm.ainvoke("how are ya", response_format=Foo) @@ -311,8 +368,8 @@ def test_function_calling_and_structured_output() -> None: @pytest.mark.default_cassette("test_reasoning.yaml.gz") @pytest.mark.vcr -@pytest.mark.parametrize("output_version", ["v0", "responses/v1", "v1"]) -def test_reasoning(output_version: Literal["v0", "responses/v1", "v1"]) -> None: +@pytest.mark.parametrize("output_version", ["v0", "responses/v1"]) +def test_reasoning(output_version: Literal["v0", "responses/v1"]) -> None: llm = ChatOpenAI( model="o4-mini", use_responses_api=True, output_version=output_version ) @@ -337,6 +394,26 @@ def test_reasoning(output_version: Literal["v0", "responses/v1", "v1"]) -> None: assert block_types == ["reasoning", "text"] +@pytest.mark.default_cassette("test_reasoning.yaml.gz") +@pytest.mark.vcr +def test_reasoning_v1() -> None: + llm = ChatOpenAIV1(model="o4-mini", use_responses_api=True) + response = llm.invoke("Hello", reasoning={"effort": "low"}) + assert isinstance(response, AIMessageV1) + + # Test init params + streaming + llm = ChatOpenAIV1(model="o4-mini", reasoning={"effort": "low"}) + full: Optional[AIMessageChunkV1] = None + for chunk in llm.stream("Hello"): + assert isinstance(chunk, AIMessageChunkV1) + full = chunk if full is None else full + chunk + assert isinstance(full, AIMessageChunkV1) + + for msg in [response, full]: + block_types = [block["type"] for block in msg.content] + assert block_types == ["reasoning", "text"] + + def test_stateful_api() -> None: llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True) response = llm.invoke("how are you, my name is Bobo") @@ -380,14 +457,14 @@ def test_file_search() -> None: input_message = {"role": "user", "content": "What is deep research by OpenAI?"} response = llm.invoke([input_message], tools=[tool]) - _check_response(response) + _check_response(response, "v0") full: Optional[BaseMessageChunk] = None for chunk in llm.stream([input_message], tools=[tool]): assert isinstance(chunk, AIMessageChunk) full = chunk if full is None else full + chunk assert isinstance(full, AIMessageChunk) - _check_response(full) + _check_response(full, "v0") next_message = {"role": "user", "content": "Thank you."} _ = llm.invoke([input_message, full, next_message]) @@ -395,9 +472,9 @@ def test_file_search() -> None: @pytest.mark.default_cassette("test_stream_reasoning_summary.yaml.gz") @pytest.mark.vcr -@pytest.mark.parametrize("output_version", ["v0", "responses/v1", "v1"]) +@pytest.mark.parametrize("output_version", ["v0", "responses/v1"]) def test_stream_reasoning_summary( - output_version: Literal["v0", "responses/v1", "v1"], + output_version: Literal["v0", "responses/v1"], ) -> None: llm = ChatOpenAI( model="o4-mini", @@ -424,7 +501,8 @@ def test_stream_reasoning_summary( assert isinstance(block["type"], str) assert isinstance(block["text"], str) assert block["text"] - elif output_version == "responses/v1": + else: + # output_version == "responses/v1" reasoning = next( block for block in response_1.content @@ -438,18 +516,6 @@ def test_stream_reasoning_summary( assert isinstance(block["type"], str) assert isinstance(block["text"], str) assert block["text"] - else: - # v1 - total_reasoning_blocks = 0 - for block in response_1.content: - if block["type"] == "reasoning": - total_reasoning_blocks += 1 - assert isinstance(block["id"], str) and block["id"].startswith("rs_") - assert isinstance(block["reasoning"], str) - assert isinstance(block["index"], int) - assert ( - total_reasoning_blocks > 1 - ) # This query typically generates multiple reasoning blocks # Check we can pass back summaries message_2 = {"role": "user", "content": "Thank you."} @@ -457,10 +523,45 @@ def test_stream_reasoning_summary( assert isinstance(response_2, AIMessage) +@pytest.mark.default_cassette("test_stream_reasoning_summary.yaml.gz") +@pytest.mark.vcr +def test_stream_reasoning_summary_v1() -> None: + llm = ChatOpenAIV1( + model="o4-mini", + # Routes to Responses API if `reasoning` is set. + reasoning={"effort": "medium", "summary": "auto"}, + ) + message_1 = { + "role": "user", + "content": "What was the third tallest buliding in the year 2000?", + } + response_1: Optional[AIMessageChunkV1] = None + for chunk in llm.stream([message_1]): + assert isinstance(chunk, AIMessageChunkV1) + response_1 = chunk if response_1 is None else response_1 + chunk + assert isinstance(response_1, AIMessageChunkV1) + + total_reasoning_blocks = 0 + for block in response_1.content: + if block["type"] == "reasoning": + total_reasoning_blocks += 1 + assert isinstance(block["id"], str) and block["id"].startswith("rs_") + assert isinstance(block["reasoning"], str) + assert isinstance(block["index"], int) + assert ( + total_reasoning_blocks > 1 + ) # This query typically generates multiple reasoning blocks + + # Check we can pass back summaries + message_2 = {"role": "user", "content": "Thank you."} + response_2 = llm.invoke([message_1, response_1, message_2]) + assert isinstance(response_2, AIMessageV1) + + @pytest.mark.default_cassette("test_code_interpreter.yaml.gz") @pytest.mark.vcr -@pytest.mark.parametrize("output_version", ["v0", "responses/v1", "v1"]) -def test_code_interpreter(output_version: Literal["v0", "responses/v1", "v1"]) -> None: +@pytest.mark.parametrize("output_version", ["v0", "responses/v1"]) +def test_code_interpreter(output_version: Literal["v0", "responses/v1"]) -> None: llm = ChatOpenAI( model="o4-mini", use_responses_api=True, output_version=output_version ) @@ -473,33 +574,20 @@ def test_code_interpreter(output_version: Literal["v0", "responses/v1", "v1"]) - } response = llm_with_tools.invoke([input_message]) assert isinstance(response, AIMessage) - _check_response(response) + _check_response(response, output_version) if output_version == "v0": tool_outputs = [ item for item in response.additional_kwargs["tool_outputs"] if item["type"] == "code_interpreter_call" ] - elif output_version == "responses/v1": - tool_outputs = [ - item - for item in response.content - if isinstance(item, dict) and item["type"] == "code_interpreter_call" - ] else: - # v1 + # responses/v1 tool_outputs = [ item for item in response.content if isinstance(item, dict) and item["type"] == "code_interpreter_call" ] - code_interpreter_result = next( - item - for item in response.content - if isinstance(item, dict) and item["type"] == "code_interpreter_result" - ) - assert tool_outputs - assert code_interpreter_result assert len(tool_outputs) == 1 # Test streaming @@ -520,25 +608,65 @@ def test_code_interpreter(output_version: Literal["v0", "responses/v1", "v1"]) - for item in response.additional_kwargs["tool_outputs"] if item["type"] == "code_interpreter_call" ] - elif output_version == "responses/v1": + else: + # responses/v1 tool_outputs = [ item for item in response.content if isinstance(item, dict) and item["type"] == "code_interpreter_call" ] - else: - code_interpreter_call = next( - item - for item in response.content - if isinstance(item, dict) and item["type"] == "code_interpreter_call" - ) - code_interpreter_result = next( - item - for item in response.content - if isinstance(item, dict) and item["type"] == "code_interpreter_result" - ) - assert code_interpreter_call - assert code_interpreter_result + assert tool_outputs + + # Test we can pass back in + next_message = {"role": "user", "content": "Please add more comments to the code."} + _ = llm_with_tools.invoke([input_message, full, next_message]) + + +@pytest.mark.default_cassette("test_code_interpreter.yaml.gz") +@pytest.mark.vcr +def test_code_interpreter_v1() -> None: + llm = ChatOpenAIV1(model="o4-mini", use_responses_api=True) + llm_with_tools = llm.bind_tools( + [{"type": "code_interpreter", "container": {"type": "auto"}}] + ) + input_message = { + "role": "user", + "content": "Write and run code to answer the question: what is 3^3?", + } + response = llm_with_tools.invoke([input_message]) + assert isinstance(response, AIMessageV1) + _check_response(response, "v1") + + tool_outputs = [ + item for item in response.content if item["type"] == "code_interpreter_call" + ] + code_interpreter_result = next( + item for item in response.content if item["type"] == "code_interpreter_result" + ) + assert tool_outputs + assert code_interpreter_result + assert len(tool_outputs) == 1 + + # Test streaming + # Use same container + container_id = tool_outputs[0]["container_id"] + llm_with_tools = llm.bind_tools( + [{"type": "code_interpreter", "container": container_id}] + ) + + full: Optional[AIMessageChunkV1] = None + for chunk in llm_with_tools.stream([input_message]): + assert isinstance(chunk, AIMessageChunkV1) + full = chunk if full is None else full + chunk + assert isinstance(full, AIMessageChunkV1) + code_interpreter_call = next( + item for item in full.content if item["type"] == "code_interpreter_call" + ) + code_interpreter_result = next( + item for item in full.content if item["type"] == "code_interpreter_result" + ) + assert code_interpreter_call + assert code_interpreter_result assert tool_outputs # Test we can pass back in @@ -634,9 +762,59 @@ def test_mcp_builtin_zdr() -> None: _ = llm_with_tools.invoke([input_message, full, approval_message]) +@pytest.mark.default_cassette("test_mcp_builtin_zdr.yaml.gz") +@pytest.mark.vcr +def test_mcp_builtin_zdr_v1() -> None: + llm = ChatOpenAIV1( + model="o4-mini", store=False, include=["reasoning.encrypted_content"] + ) + + llm_with_tools = llm.bind_tools( + [ + { + "type": "mcp", + "server_label": "deepwiki", + "server_url": "https://mcp.deepwiki.com/mcp", + "require_approval": {"always": {"tool_names": ["read_wiki_structure"]}}, + } + ] + ) + input_message = { + "role": "user", + "content": ( + "What transport protocols does the 2025-03-26 version of the MCP spec " + "support?" + ), + } + full: Optional[AIMessageChunkV1] = None + for chunk in llm_with_tools.stream([input_message]): + assert isinstance(chunk, AIMessageChunkV1) + full = chunk if full is None else full + chunk + + assert isinstance(full, AIMessageChunkV1) + assert all(isinstance(block, dict) for block in full.content) + + approval_message = HumanMessageV1( + [ + { + "type": "non_standard", + "value": { + "type": "mcp_approval_response", + "approve": True, + "approval_request_id": block["value"]["id"], # type: ignore[index] + }, + } + for block in full.content + if block["type"] == "non_standard" + and block["value"]["type"] == "mcp_approval_request" # type: ignore[index] + ] + ) + _ = llm_with_tools.invoke([input_message, full, approval_message]) + + @pytest.mark.default_cassette("test_image_generation_streaming.yaml.gz") @pytest.mark.vcr -@pytest.mark.parametrize("output_version", ["v0", "responses/v1", "v1"]) +@pytest.mark.parametrize("output_version", ["v0", "responses/v1"]) def test_image_generation_streaming(output_version: str) -> None: """Test image generation streaming.""" llm = ChatOpenAI( @@ -710,9 +888,52 @@ def test_image_generation_streaming(output_version: str) -> None: assert set(standard_keys).issubset(tool_output.keys()) +@pytest.mark.default_cassette("test_image_generation_streaming.yaml.gz") +@pytest.mark.vcr +def test_image_generation_streaming_v1() -> None: + """Test image generation streaming.""" + llm = ChatOpenAIV1(model="gpt-4.1", use_responses_api=True) + tool = { + "type": "image_generation", + "quality": "low", + "output_format": "jpeg", + "output_compression": 100, + "size": "1024x1024", + } + + expected_keys = { + # Standard + "type", + "base64", + "mime_type", + "id", + "index", + # OpenAI-specific + "background", + "output_format", + "quality", + "revised_prompt", + "size", + "status", + } + + full: Optional[AIMessageChunkV1] = None + for chunk in llm.stream("Draw a random short word in green font.", tools=[tool]): + assert isinstance(chunk, AIMessageChunkV1) + full = chunk if full is None else full + chunk + complete_ai_message = cast(AIMessageChunkV1, full) + + tool_output = next( + block + for block in complete_ai_message.content + if isinstance(block, dict) and block["type"] == "image" + ) + assert set(expected_keys).issubset(tool_output.keys()) + + @pytest.mark.default_cassette("test_image_generation_multi_turn.yaml.gz") @pytest.mark.vcr -@pytest.mark.parametrize("output_version", ["v0", "responses/v1", "v1"]) +@pytest.mark.parametrize("output_version", ["v0", "responses/v1"]) def test_image_generation_multi_turn(output_version: str) -> None: """Test multi-turn editing of image generation by passing in history.""" # Test multi-turn @@ -735,7 +956,7 @@ def test_image_generation_multi_turn(output_version: str) -> None: ] ai_message = llm_with_tools.invoke(chat_history) assert isinstance(ai_message, AIMessage) - _check_response(ai_message) + _check_response(ai_message, output_version) expected_keys = { "id", @@ -801,7 +1022,7 @@ def test_image_generation_multi_turn(output_version: str) -> None: ai_message2 = llm_with_tools.invoke(chat_history) assert isinstance(ai_message2, AIMessage) - _check_response(ai_message2) + _check_response(ai_message2, output_version) if output_version == "v0": tool_output = ai_message2.additional_kwargs["tool_outputs"][0] @@ -821,3 +1042,76 @@ def test_image_generation_multi_turn(output_version: str) -> None: if isinstance(block, dict) and block["type"] == "image" ) assert set(standard_keys).issubset(tool_output.keys()) + + +@pytest.mark.default_cassette("test_image_generation_multi_turn.yaml.gz") +@pytest.mark.vcr +def test_image_generation_multi_turn_v1() -> None: + """Test multi-turn editing of image generation by passing in history.""" + # Test multi-turn + llm = ChatOpenAIV1(model="gpt-4.1", use_responses_api=True) + # Test invocation + tool = { + "type": "image_generation", + "quality": "low", + "output_format": "jpeg", + "output_compression": 100, + "size": "1024x1024", + } + llm_with_tools = llm.bind_tools([tool]) + + chat_history: list[MessageLikeRepresentation] = [ + {"role": "user", "content": "Draw a random short word in green font."} + ] + ai_message = llm_with_tools.invoke(chat_history) + assert isinstance(ai_message, AIMessageV1) + _check_response(ai_message, "v1") + + expected_keys = { + # Standard + "type", + "base64", + "mime_type", + "id", + # OpenAI-specific + "background", + "output_format", + "quality", + "revised_prompt", + "size", + "status", + } + + standard_keys = {"type", "base64", "id", "status"} + tool_output = next( + block + for block in ai_message.content + if isinstance(block, dict) and block["type"] == "image" + ) + assert set(standard_keys).issubset(tool_output.keys()) + + chat_history.extend( + [ + # AI message with tool output + ai_message, + # New request + { + "role": "user", + "content": ( + "Now, change the font to blue. Keep the word and everything else " + "the same." + ), + }, + ] + ) + + ai_message2 = llm_with_tools.invoke(chat_history) + assert isinstance(ai_message2, AIMessageV1) + _check_response(ai_message2, "v1") + + tool_output = next( + block + for block in ai_message2.content + if isinstance(block, dict) and block["type"] == "image" + ) + assert set(expected_keys).issubset(tool_output.keys())