mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-30 16:24:24 +00:00
revert changes to v0 BaseChatModel
This commit is contained in:
parent
61e329637b
commit
abaf0c5828
@ -108,12 +108,7 @@ from langchain_openai.chat_models._client_utils import (
|
||||
)
|
||||
from langchain_openai.chat_models._compat import (
|
||||
_convert_from_v03_ai_message,
|
||||
_convert_from_v1_to_chat_completions,
|
||||
_convert_from_v1_to_responses,
|
||||
_convert_to_v03_ai_message,
|
||||
_convert_to_v1_from_chat_completions,
|
||||
_convert_to_v1_from_chat_completions_chunk,
|
||||
_convert_to_v1_from_responses,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@ -466,7 +461,7 @@ class BaseChatOpenAI(BaseChatModel):
|
||||
"""Base URL path for API requests, leave blank if not using a proxy or service
|
||||
emulator."""
|
||||
openai_organization: Optional[str] = Field(default=None, alias="organization")
|
||||
"""Automatically inferred from env var `OPENAI_ORG_ID` if not provided."""
|
||||
"""Automatically inferred from env var ``OPENAI_ORG_ID`` if not provided."""
|
||||
# to support explicit proxy for OpenAI
|
||||
openai_proxy: Optional[str] = Field(
|
||||
default_factory=from_env("OPENAI_PROXY", default=None)
|
||||
@ -474,7 +469,7 @@ class BaseChatOpenAI(BaseChatModel):
|
||||
request_timeout: Union[float, tuple[float, float], Any, None] = Field(
|
||||
default=None, alias="timeout"
|
||||
)
|
||||
"""Timeout for requests to OpenAI completion API. Can be float, httpx.Timeout or
|
||||
"""Timeout for requests to OpenAI completion API. Can be float, ``httpx.Timeout`` or
|
||||
None."""
|
||||
stream_usage: bool = False
|
||||
"""Whether to include usage metadata in streaming output. If True, an additional
|
||||
@ -531,6 +526,7 @@ class BaseChatOpenAI(BaseChatModel):
|
||||
}
|
||||
|
||||
.. versionadded:: 0.3.24
|
||||
|
||||
"""
|
||||
tiktoken_model_name: Optional[str] = None
|
||||
"""The model name to pass to tiktoken when using this class.
|
||||
@ -552,15 +548,32 @@ class BaseChatOpenAI(BaseChatModel):
|
||||
invocations.
|
||||
"""
|
||||
http_async_client: Union[Any, None] = Field(default=None, exclude=True)
|
||||
"""Optional httpx.AsyncClient. Only used for async invocations. Must specify
|
||||
"""Optional ``httpx.AsyncClient``. Only used for async invocations. Must specify
|
||||
``http_client`` as well if you'd like a custom client for sync invocations."""
|
||||
stop: Optional[Union[list[str], str]] = Field(default=None, alias="stop_sequences")
|
||||
"""Default stop sequences."""
|
||||
extra_body: Optional[Mapping[str, Any]] = None
|
||||
"""Optional additional JSON properties to include in the request parameters when
|
||||
making requests to OpenAI compatible APIs, such as vLLM."""
|
||||
making requests to OpenAI compatible APIs, such as vLLM, LM Studio, or other
|
||||
providers.
|
||||
|
||||
This is the recommended way to pass custom parameters that are specific to your
|
||||
OpenAI-compatible API provider but not part of the standard OpenAI API.
|
||||
|
||||
Examples:
|
||||
- LM Studio TTL parameter: ``extra_body={"ttl": 300}``
|
||||
- vLLM custom parameters: ``extra_body={"use_beam_search": True}``
|
||||
- Any other provider-specific parameters
|
||||
|
||||
.. note::
|
||||
|
||||
Do NOT use ``model_kwargs`` for custom parameters that are not part of the
|
||||
standard OpenAI API, as this will cause errors when making API calls. Use
|
||||
``extra_body`` instead.
|
||||
"""
|
||||
|
||||
include_response_headers: bool = False
|
||||
"""Whether to include response headers in the output message response_metadata."""
|
||||
"""Whether to include response headers in the output message ``response_metadata``.""" # noqa: E501
|
||||
disabled_params: Optional[dict[str, Any]] = Field(default=None)
|
||||
"""Parameters of the OpenAI client or chat.completions endpoint that should be
|
||||
disabled for the given model.
|
||||
@ -569,7 +582,7 @@ class BaseChatOpenAI(BaseChatModel):
|
||||
parameter and the value is either None, meaning that parameter should never be
|
||||
used, or it's a list of disabled values for the parameter.
|
||||
|
||||
For example, older models may not support the 'parallel_tool_calls' parameter at
|
||||
For example, older models may not support the ``'parallel_tool_calls'`` parameter at
|
||||
all, in which case ``disabled_params={"parallel_tool_calls": None}`` can be passed
|
||||
in.
|
||||
|
||||
@ -584,11 +597,11 @@ class BaseChatOpenAI(BaseChatModel):
|
||||
|
||||
Supported values:
|
||||
|
||||
- ``"file_search_call.results"``
|
||||
- ``"message.input_image.image_url"``
|
||||
- ``"computer_call_output.output.image_url"``
|
||||
- ``"reasoning.encrypted_content"``
|
||||
- ``"code_interpreter_call.outputs"``
|
||||
- ``'file_search_call.results'``
|
||||
- ``'message.input_image.image_url'``
|
||||
- ``'computer_call_output.output.image_url'``
|
||||
- ``'reasoning.encrypted_content'``
|
||||
- ``'code_interpreter_call.outputs'``
|
||||
|
||||
.. versionadded:: 0.3.24
|
||||
"""
|
||||
@ -645,6 +658,7 @@ class BaseChatOpenAI(BaseChatModel):
|
||||
llm.invoke([HumanMessage("How are you?")], previous_response_id="resp_123")
|
||||
|
||||
.. versionadded:: 0.3.26
|
||||
|
||||
"""
|
||||
|
||||
use_responses_api: Optional[bool] = None
|
||||
@ -655,7 +669,7 @@ class BaseChatOpenAI(BaseChatModel):
|
||||
.. versionadded:: 0.3.9
|
||||
"""
|
||||
|
||||
output_version: str = "v0"
|
||||
output_version: Literal["v0", "responses/v1"] = "v0"
|
||||
"""Version of AIMessage output format to use.
|
||||
|
||||
This field is used to roll-out new output formats for chat model AIMessages
|
||||
@ -663,12 +677,12 @@ class BaseChatOpenAI(BaseChatModel):
|
||||
|
||||
Supported values:
|
||||
|
||||
- ``"v0"``: AIMessage format as of langchain-openai 0.3.x.
|
||||
- ``"responses/v1"``: Formats Responses API output
|
||||
- ``'v0'``: AIMessage format as of langchain-openai 0.3.x.
|
||||
- ``'responses/v1'``: Formats Responses API output
|
||||
items into AIMessage content blocks.
|
||||
- ``"v1"``: v1 of LangChain cross-provider standard.
|
||||
|
||||
``output_version="v1"`` is recommended.
|
||||
Currently only impacts the Responses API. ``output_version='responses/v1'`` is
|
||||
recommended.
|
||||
|
||||
.. versionadded:: 0.3.25
|
||||
|
||||
@ -855,10 +869,6 @@ class BaseChatOpenAI(BaseChatModel):
|
||||
message=default_chunk_class(content="", usage_metadata=usage_metadata),
|
||||
generation_info=base_generation_info,
|
||||
)
|
||||
if self.output_version == "v1":
|
||||
generation_chunk.message = _convert_to_v1_from_chat_completions_chunk(
|
||||
cast(AIMessageChunk, generation_chunk.message)
|
||||
)
|
||||
return generation_chunk
|
||||
|
||||
choice = choices[0]
|
||||
@ -886,20 +896,6 @@ class BaseChatOpenAI(BaseChatModel):
|
||||
if usage_metadata and isinstance(message_chunk, AIMessageChunk):
|
||||
message_chunk.usage_metadata = usage_metadata
|
||||
|
||||
if self.output_version == "v1":
|
||||
message_chunk = cast(AIMessageChunk, message_chunk)
|
||||
# Convert to v1 format
|
||||
if isinstance(message_chunk.content, str):
|
||||
message_chunk = _convert_to_v1_from_chat_completions_chunk(
|
||||
message_chunk
|
||||
)
|
||||
if message_chunk.content:
|
||||
message_chunk.content[0]["index"] = 0 # type: ignore[index]
|
||||
else:
|
||||
message_chunk = _convert_to_v1_from_chat_completions_chunk(
|
||||
message_chunk
|
||||
)
|
||||
|
||||
generation_chunk = ChatGenerationChunk(
|
||||
message=message_chunk, generation_info=generation_info or None
|
||||
)
|
||||
@ -1192,12 +1188,7 @@ class BaseChatOpenAI(BaseChatModel):
|
||||
else:
|
||||
payload = _construct_responses_api_payload(messages, payload)
|
||||
else:
|
||||
payload["messages"] = [
|
||||
_convert_message_to_dict(_convert_from_v1_to_chat_completions(m))
|
||||
if isinstance(m, AIMessage)
|
||||
else _convert_message_to_dict(m)
|
||||
for m in messages
|
||||
]
|
||||
payload["messages"] = [_convert_message_to_dict(m) for m in messages]
|
||||
return payload
|
||||
|
||||
def _create_chat_result(
|
||||
@ -1263,11 +1254,6 @@ class BaseChatOpenAI(BaseChatModel):
|
||||
if hasattr(message, "refusal"):
|
||||
generations[0].message.additional_kwargs["refusal"] = message.refusal
|
||||
|
||||
if self.output_version == "v1":
|
||||
_ = llm_output.pop("token_usage", None)
|
||||
generations[0].message = _convert_to_v1_from_chat_completions(
|
||||
cast(AIMessage, generations[0].message)
|
||||
)
|
||||
return ChatResult(generations=generations, llm_output=llm_output)
|
||||
|
||||
async def _astream(
|
||||
@ -1593,8 +1579,9 @@ class BaseChatOpenAI(BaseChatModel):
|
||||
|
||||
Assumes model is compatible with OpenAI function-calling API.
|
||||
|
||||
NOTE: Using bind_tools is recommended instead, as the `functions` and
|
||||
`function_call` request parameters are officially marked as deprecated by
|
||||
.. note::
|
||||
Using ``bind_tools()`` is recommended instead, as the ``functions`` and
|
||||
``function_call`` request parameters are officially marked as deprecated by
|
||||
OpenAI.
|
||||
|
||||
Args:
|
||||
@ -1604,7 +1591,7 @@ class BaseChatOpenAI(BaseChatModel):
|
||||
their schema dictionary representation.
|
||||
function_call: Which function to require the model to call.
|
||||
Must be the name of the single provided function or
|
||||
"auto" to automatically determine which function to call
|
||||
``'auto'`` to automatically determine which function to call
|
||||
(if any).
|
||||
**kwargs: Any additional parameters to pass to the
|
||||
:class:`~langchain.runnable.Runnable` constructor.
|
||||
@ -1655,16 +1642,15 @@ class BaseChatOpenAI(BaseChatModel):
|
||||
:meth:`langchain_core.utils.function_calling.convert_to_openai_tool`.
|
||||
tool_choice: Which tool to require the model to call. Options are:
|
||||
|
||||
- str of the form ``"<<tool_name>>"``: calls <<tool_name>> tool.
|
||||
- ``"auto"``: automatically selects a tool (including no tool).
|
||||
- ``"none"``: does not call a tool.
|
||||
- ``"any"`` or ``"required"`` or ``True``: force at least one tool to be called.
|
||||
- str of the form ``'<<tool_name>>'``: calls <<tool_name>> tool.
|
||||
- ``'auto'``: automatically selects a tool (including no tool).
|
||||
- ``'none'``: does not call a tool.
|
||||
- ``'any'`` or ``'required'`` or ``True``: force at least one tool to be called.
|
||||
- dict of the form ``{"type": "function", "function": {"name": <<tool_name>>}}``: calls <<tool_name>> tool.
|
||||
- ``False`` or ``None``: no effect, default OpenAI behavior.
|
||||
strict: If True, model output is guaranteed to exactly match the JSON Schema
|
||||
provided in the tool definition. If True, the input schema will be
|
||||
validated according to
|
||||
https://platform.openai.com/docs/guides/structured-outputs/supported-schemas.
|
||||
provided in the tool definition. The input schema will also be validated according to the
|
||||
`supported schemas <https://platform.openai.com/docs/guides/structured-outputs/supported-schemas?api-mode=responses#supported-schemas>`__.
|
||||
If False, input schema will not be validated and model output will not
|
||||
be validated.
|
||||
If None, ``strict`` argument will not be passed to the model.
|
||||
@ -1735,8 +1721,7 @@ class BaseChatOpenAI(BaseChatModel):
|
||||
"""Model wrapper that returns outputs formatted to match the given schema.
|
||||
|
||||
Args:
|
||||
schema:
|
||||
The output schema. Can be passed in as:
|
||||
schema: The output schema. Can be passed in as:
|
||||
|
||||
- an OpenAI function/tool schema,
|
||||
- a JSON Schema,
|
||||
@ -1752,24 +1737,20 @@ class BaseChatOpenAI(BaseChatModel):
|
||||
|
||||
method: The method for steering model generation, one of:
|
||||
|
||||
- "function_calling":
|
||||
- ``'function_calling'``:
|
||||
Uses OpenAI's tool-calling (formerly called function calling)
|
||||
API: https://platform.openai.com/docs/guides/function-calling
|
||||
- "json_schema":
|
||||
Uses OpenAI's Structured Output API: https://platform.openai.com/docs/guides/structured-outputs
|
||||
Supported for "gpt-4o-mini", "gpt-4o-2024-08-06", "o1", and later
|
||||
`API <https://platform.openai.com/docs/guides/function-calling>`__
|
||||
- ``'json_schema'``:
|
||||
Uses OpenAI's Structured Output `API <https://platform.openai.com/docs/guides/structured-outputs>`__
|
||||
Supported for ``'gpt-4o-mini'``, ``'gpt-4o-2024-08-06'``, ``'o1'``, and later
|
||||
models.
|
||||
- "json_mode":
|
||||
Uses OpenAI's JSON mode. Note that if using JSON mode then you
|
||||
must include instructions for formatting the output into the
|
||||
desired schema into the model call:
|
||||
https://platform.openai.com/docs/guides/structured-outputs/json-mode
|
||||
- ``'json_mode'``:
|
||||
Uses OpenAI's `JSON mode <https://platform.openai.com/docs/guides/structured-outputs/json-mode>`__.
|
||||
Note that if using JSON mode then you must include instructions for
|
||||
formatting the output into the desired schema into the model call
|
||||
|
||||
Learn more about the differences between the methods and which models
|
||||
support which methods here:
|
||||
|
||||
- https://platform.openai.com/docs/guides/structured-outputs/structured-outputs-vs-json-mode
|
||||
- https://platform.openai.com/docs/guides/structured-outputs/function-calling-vs-response-format
|
||||
support which methods `here <https://platform.openai.com/docs/guides/structured-outputs/function-calling-vs-response-format>`__.
|
||||
|
||||
include_raw:
|
||||
If False then only the parsed structured output is returned. If
|
||||
@ -1777,13 +1758,12 @@ class BaseChatOpenAI(BaseChatModel):
|
||||
then both the raw model response (a BaseMessage) and the parsed model
|
||||
response will be returned. If an error occurs during output parsing it
|
||||
will be caught and returned as well. The final output is always a dict
|
||||
with keys "raw", "parsed", and "parsing_error".
|
||||
with keys ``'raw'``, ``'parsed'``, and ``'parsing_error'``.
|
||||
strict:
|
||||
|
||||
- True:
|
||||
Model output is guaranteed to exactly match the schema.
|
||||
The input schema will also be validated according to
|
||||
https://platform.openai.com/docs/guides/structured-outputs/supported-schemas
|
||||
The input schema will also be validated according to the `supported schemas <https://platform.openai.com/docs/guides/structured-outputs/supported-schemas?api-mode=responses#supported-schemas>`__.
|
||||
- False:
|
||||
Input schema will not be validated and model output will not be
|
||||
validated.
|
||||
@ -1793,12 +1773,12 @@ class BaseChatOpenAI(BaseChatModel):
|
||||
tools:
|
||||
A list of tool-like objects to bind to the chat model. Requires that:
|
||||
|
||||
- ``method`` is ``"json_schema"`` (default).
|
||||
- ``method`` is ``'json_schema'`` (default).
|
||||
- ``strict=True``
|
||||
- ``include_raw=True``
|
||||
|
||||
If a model elects to call a
|
||||
tool, the resulting ``AIMessage`` in ``"raw"`` will include tool calls.
|
||||
tool, the resulting ``AIMessage`` in ``'raw'`` will include tool calls.
|
||||
|
||||
.. dropdown:: Example
|
||||
|
||||
@ -1840,13 +1820,14 @@ class BaseChatOpenAI(BaseChatModel):
|
||||
Returns:
|
||||
A Runnable that takes same inputs as a :class:`langchain_core.language_models.chat.BaseChatModel`.
|
||||
|
||||
| If ``include_raw`` is False and ``schema`` is a Pydantic class, Runnable outputs an instance of ``schema`` (i.e., a Pydantic object). Otherwise, if ``include_raw`` is False then Runnable outputs a dict.
|
||||
If ``include_raw`` is False and ``schema`` is a Pydantic class, Runnable outputs
|
||||
an instance of ``schema`` (i.e., a Pydantic object). Otherwise, if ``include_raw`` is False then Runnable outputs a dict.
|
||||
|
||||
| If ``include_raw`` is True, then Runnable outputs a dict with keys:
|
||||
If ``include_raw`` is True, then Runnable outputs a dict with keys:
|
||||
|
||||
- "raw": BaseMessage
|
||||
- "parsed": None if there was a parsing error, otherwise the type depends on the ``schema`` as described above.
|
||||
- "parsing_error": Optional[BaseException]
|
||||
- ``'raw'``: BaseMessage
|
||||
- ``'parsed'``: None if there was a parsing error, otherwise the type depends on the ``schema`` as described above.
|
||||
- ``'parsing_error'``: Optional[BaseException]
|
||||
|
||||
.. versionchanged:: 0.1.20
|
||||
|
||||
@ -1855,13 +1836,14 @@ class BaseChatOpenAI(BaseChatModel):
|
||||
.. versionchanged:: 0.1.21
|
||||
|
||||
Support for ``strict`` argument added.
|
||||
Support for ``method`` = "json_schema" added.
|
||||
Support for ``method="json_schema"`` added.
|
||||
|
||||
.. versionchanged:: 0.3.12
|
||||
Support for ``tools`` added.
|
||||
|
||||
.. versionchanged:: 0.3.21
|
||||
Pass ``kwargs`` through to the model.
|
||||
|
||||
""" # noqa: E501
|
||||
if strict is not None and method == "json_mode":
|
||||
raise ValueError(
|
||||
@ -2097,24 +2079,25 @@ class ChatOpenAI(BaseChatOpenAI): # type: ignore[override]
|
||||
# other params...
|
||||
)
|
||||
|
||||
**NOTE**: Any param which is not explicitly supported will be passed directly to the
|
||||
``openai.OpenAI.chat.completions.create(...)`` API every time to the model is
|
||||
invoked. For example:
|
||||
.. note::
|
||||
Any param which is not explicitly supported will be passed directly to the
|
||||
``openai.OpenAI.chat.completions.create(...)`` API every time to the model is
|
||||
invoked. For example:
|
||||
|
||||
.. code-block:: python
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
import openai
|
||||
from langchain_openai import ChatOpenAI
|
||||
import openai
|
||||
|
||||
ChatOpenAI(..., frequency_penalty=0.2).invoke(...)
|
||||
ChatOpenAI(..., frequency_penalty=0.2).invoke(...)
|
||||
|
||||
# results in underlying API call of:
|
||||
# results in underlying API call of:
|
||||
|
||||
openai.OpenAI(..).chat.completions.create(..., frequency_penalty=0.2)
|
||||
openai.OpenAI(..).chat.completions.create(..., frequency_penalty=0.2)
|
||||
|
||||
# which is also equivalent to:
|
||||
# which is also equivalent to:
|
||||
|
||||
ChatOpenAI(...).invoke(..., frequency_penalty=0.2)
|
||||
ChatOpenAI(...).invoke(..., frequency_penalty=0.2)
|
||||
|
||||
.. dropdown:: Invoke
|
||||
|
||||
@ -2281,26 +2264,27 @@ class ChatOpenAI(BaseChatOpenAI): # type: ignore[override]
|
||||
},
|
||||
]
|
||||
|
||||
Note that ``openai >= 1.32`` supports a ``parallel_tool_calls`` parameter
|
||||
that defaults to ``True``. This parameter can be set to ``False`` to
|
||||
disable parallel tool calls:
|
||||
.. note::
|
||||
``openai >= 1.32`` supports a ``parallel_tool_calls`` parameter
|
||||
that defaults to ``True``. This parameter can be set to ``False`` to
|
||||
disable parallel tool calls:
|
||||
|
||||
.. code-block:: python
|
||||
.. code-block:: python
|
||||
|
||||
ai_msg = llm_with_tools.invoke(
|
||||
"What is the weather in LA and NY?", parallel_tool_calls=False
|
||||
)
|
||||
ai_msg.tool_calls
|
||||
ai_msg = llm_with_tools.invoke(
|
||||
"What is the weather in LA and NY?", parallel_tool_calls=False
|
||||
)
|
||||
ai_msg.tool_calls
|
||||
|
||||
.. code-block:: python
|
||||
.. code-block:: python
|
||||
|
||||
[
|
||||
{
|
||||
"name": "GetWeather",
|
||||
"args": {"location": "Los Angeles, CA"},
|
||||
"id": "call_4OoY0ZR99iEvC7fevsH8Uhtz",
|
||||
}
|
||||
]
|
||||
[
|
||||
{
|
||||
"name": "GetWeather",
|
||||
"args": {"location": "Los Angeles, CA"},
|
||||
"id": "call_4OoY0ZR99iEvC7fevsH8Uhtz",
|
||||
}
|
||||
]
|
||||
|
||||
Like other runtime parameters, ``parallel_tool_calls`` can be bound to a model
|
||||
using ``llm.bind(parallel_tool_calls=False)`` or during instantiation by
|
||||
@ -2314,7 +2298,7 @@ class ChatOpenAI(BaseChatOpenAI): # type: ignore[override]
|
||||
|
||||
You can access `built-in tools <https://platform.openai.com/docs/guides/tools?api-mode=responses>`_
|
||||
supported by the OpenAI Responses API. See LangChain
|
||||
`docs <https://python.langchain.com/docs/integrations/chat/openai/>`_ for more
|
||||
`docs <https://python.langchain.com/docs/integrations/chat/openai/>`__ for more
|
||||
detail.
|
||||
|
||||
.. note::
|
||||
@ -2369,7 +2353,7 @@ class ChatOpenAI(BaseChatOpenAI): # type: ignore[override]
|
||||
`conversation state <https://platform.openai.com/docs/guides/conversation-state?api-mode=responses>`_.
|
||||
Passing in response IDs from previous messages will continue a conversational
|
||||
thread. See LangChain
|
||||
`docs <https://python.langchain.com/docs/integrations/chat/openai/>`_ for more
|
||||
`conversation docs <https://python.langchain.com/docs/integrations/chat/openai/>`__ for more
|
||||
detail.
|
||||
|
||||
.. code-block:: python
|
||||
@ -2658,9 +2642,95 @@ class ChatOpenAI(BaseChatOpenAI): # type: ignore[override]
|
||||
llm = ChatOpenAI(model="o4-mini", service_tier="flex")
|
||||
|
||||
Note that this is a beta feature that is only available for a subset of models.
|
||||
See OpenAI `docs <https://platform.openai.com/docs/guides/flex-processing>`_
|
||||
See OpenAI `flex processing docs <https://platform.openai.com/docs/guides/flex-processing>`__
|
||||
for more detail.
|
||||
|
||||
.. dropdown:: OpenAI-compatible APIs
|
||||
|
||||
``ChatOpenAI`` can be used with OpenAI-compatible APIs like `LM Studio <https://lmstudio.ai/>`__,
|
||||
`vLLM <https://github.com/vllm-project/vllm>`__,
|
||||
`Ollama <https://ollama.com/>`__, and others.
|
||||
To use custom parameters specific to these providers, use the ``extra_body`` parameter.
|
||||
|
||||
**LM Studio example** with TTL (auto-eviction):
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
llm = ChatOpenAI(
|
||||
base_url="http://localhost:1234/v1",
|
||||
api_key="lm-studio", # Can be any string
|
||||
model="mlx-community/QwQ-32B-4bit",
|
||||
temperature=0,
|
||||
extra_body={
|
||||
"ttl": 300
|
||||
}, # Auto-evict model after 5 minutes of inactivity
|
||||
)
|
||||
|
||||
**vLLM example** with custom parameters:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
llm = ChatOpenAI(
|
||||
base_url="http://localhost:8000/v1",
|
||||
api_key="EMPTY",
|
||||
model="meta-llama/Llama-2-7b-chat-hf",
|
||||
extra_body={"use_beam_search": True, "best_of": 4},
|
||||
)
|
||||
|
||||
.. dropdown:: model_kwargs vs extra_body
|
||||
|
||||
Use the correct parameter for different types of API arguments:
|
||||
|
||||
**Use ``model_kwargs`` for:**
|
||||
|
||||
- Standard OpenAI API parameters not explicitly defined as class parameters
|
||||
- Parameters that should be flattened into the top-level request payload
|
||||
- Examples: ``max_completion_tokens``, ``stream_options``, ``modalities``, ``audio``
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
# Standard OpenAI parameters
|
||||
llm = ChatOpenAI(
|
||||
model="gpt-4o",
|
||||
model_kwargs={
|
||||
"stream_options": {"include_usage": True},
|
||||
"max_completion_tokens": 300,
|
||||
"modalities": ["text", "audio"],
|
||||
"audio": {"voice": "alloy", "format": "wav"},
|
||||
},
|
||||
)
|
||||
|
||||
**Use ``extra_body`` for:**
|
||||
|
||||
- Custom parameters specific to OpenAI-compatible providers (vLLM, LM Studio, etc.)
|
||||
- Parameters that need to be nested under ``extra_body`` in the request
|
||||
- Any non-standard OpenAI API parameters
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
# Custom provider parameters
|
||||
llm = ChatOpenAI(
|
||||
base_url="http://localhost:8000/v1",
|
||||
model="custom-model",
|
||||
extra_body={
|
||||
"use_beam_search": True, # vLLM parameter
|
||||
"best_of": 4, # vLLM parameter
|
||||
"ttl": 300, # LM Studio parameter
|
||||
},
|
||||
)
|
||||
|
||||
**Key Differences:**
|
||||
|
||||
- ``model_kwargs``: Parameters are **merged into top-level** request payload
|
||||
- ``extra_body``: Parameters are **nested under ``extra_body``** key in request
|
||||
|
||||
.. important::
|
||||
|
||||
Always use ``extra_body`` for custom parameters, **not** ``model_kwargs``.
|
||||
Using ``model_kwargs`` for non-OpenAI parameters will cause API errors.
|
||||
|
||||
""" # noqa: E501
|
||||
|
||||
max_tokens: Optional[int] = Field(default=None, alias="max_completion_tokens")
|
||||
@ -2692,7 +2762,7 @@ class ChatOpenAI(BaseChatOpenAI): # type: ignore[override]
|
||||
|
||||
@classmethod
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
"""Return whether this model can be serialized by Langchain."""
|
||||
"""Return whether this model can be serialized by LangChain."""
|
||||
return True
|
||||
|
||||
@property
|
||||
@ -2754,8 +2824,7 @@ class ChatOpenAI(BaseChatOpenAI): # type: ignore[override]
|
||||
"""Model wrapper that returns outputs formatted to match the given schema.
|
||||
|
||||
Args:
|
||||
schema:
|
||||
The output schema. Can be passed in as:
|
||||
schema: The output schema. Can be passed in as:
|
||||
|
||||
- a JSON Schema,
|
||||
- a TypedDict class,
|
||||
@ -2771,25 +2840,20 @@ class ChatOpenAI(BaseChatOpenAI): # type: ignore[override]
|
||||
|
||||
method: The method for steering model generation, one of:
|
||||
|
||||
- "json_schema":
|
||||
Uses OpenAI's Structured Output API:
|
||||
https://platform.openai.com/docs/guides/structured-outputs
|
||||
Supported for "gpt-4o-mini", "gpt-4o-2024-08-06", "o1", and later
|
||||
- ``'json_schema'``:
|
||||
Uses OpenAI's `Structured Output API <https://platform.openai.com/docs/guides/structured-outputs>`__.
|
||||
Supported for ``'gpt-4o-mini'``, ``'gpt-4o-2024-08-06'``, ``'o1'``, and later
|
||||
models.
|
||||
- "function_calling":
|
||||
- ``'function_calling'``:
|
||||
Uses OpenAI's tool-calling (formerly called function calling)
|
||||
API: https://platform.openai.com/docs/guides/function-calling
|
||||
- "json_mode":
|
||||
Uses OpenAI's JSON mode. Note that if using JSON mode then you
|
||||
must include instructions for formatting the output into the
|
||||
desired schema into the model call:
|
||||
https://platform.openai.com/docs/guides/structured-outputs/json-mode
|
||||
`API <https://platform.openai.com/docs/guides/function-calling>`__
|
||||
- ``'json_mode'``:
|
||||
Uses OpenAI's `JSON mode <https://platform.openai.com/docs/guides/structured-outputs/json-mode>`__.
|
||||
Note that if using JSON mode then you must include instructions for
|
||||
formatting the output into the desired schema into the model call
|
||||
|
||||
Learn more about the differences between the methods and which models
|
||||
support which methods here:
|
||||
|
||||
- https://platform.openai.com/docs/guides/structured-outputs/structured-outputs-vs-json-mode
|
||||
- https://platform.openai.com/docs/guides/structured-outputs/function-calling-vs-response-format
|
||||
support which methods `here <https://platform.openai.com/docs/guides/structured-outputs/function-calling-vs-response-format>`__.
|
||||
|
||||
include_raw:
|
||||
If False then only the parsed structured output is returned. If
|
||||
@ -2797,13 +2861,12 @@ class ChatOpenAI(BaseChatOpenAI): # type: ignore[override]
|
||||
then both the raw model response (a BaseMessage) and the parsed model
|
||||
response will be returned. If an error occurs during output parsing it
|
||||
will be caught and returned as well. The final output is always a dict
|
||||
with keys "raw", "parsed", and "parsing_error".
|
||||
with keys ``'raw'``, ``'parsed'``, and ``'parsing_error'``.
|
||||
strict:
|
||||
|
||||
- True:
|
||||
Model output is guaranteed to exactly match the schema.
|
||||
The input schema will also be validated according to
|
||||
https://platform.openai.com/docs/guides/structured-outputs/supported-schemas
|
||||
The input schema will also be validated according to the `supported schemas <https://platform.openai.com/docs/guides/structured-outputs/supported-schemas?api-mode=responses#supported-schemas>`__.
|
||||
- False:
|
||||
Input schema will not be validated and model output will not be
|
||||
validated.
|
||||
@ -2813,17 +2876,17 @@ class ChatOpenAI(BaseChatOpenAI): # type: ignore[override]
|
||||
If schema is specified via TypedDict or JSON schema, ``strict`` is not
|
||||
enabled by default. Pass ``strict=True`` to enable it.
|
||||
|
||||
Note: ``strict`` can only be non-null if ``method`` is
|
||||
``"json_schema"`` or ``"function_calling"``.
|
||||
.. note::
|
||||
``strict`` can only be non-null if ``method`` is ``'json_schema'`` or ``'function_calling'``.
|
||||
tools:
|
||||
A list of tool-like objects to bind to the chat model. Requires that:
|
||||
|
||||
- ``method`` is ``"json_schema"`` (default).
|
||||
- ``method`` is ``'json_schema'`` (default).
|
||||
- ``strict=True``
|
||||
- ``include_raw=True``
|
||||
|
||||
If a model elects to call a
|
||||
tool, the resulting ``AIMessage`` in ``"raw"`` will include tool calls.
|
||||
tool, the resulting ``AIMessage`` in ``'raw'`` will include tool calls.
|
||||
|
||||
.. dropdown:: Example
|
||||
|
||||
@ -2865,13 +2928,14 @@ class ChatOpenAI(BaseChatOpenAI): # type: ignore[override]
|
||||
Returns:
|
||||
A Runnable that takes same inputs as a :class:`langchain_core.language_models.chat.BaseChatModel`.
|
||||
|
||||
| If ``include_raw`` is False and ``schema`` is a Pydantic class, Runnable outputs an instance of ``schema`` (i.e., a Pydantic object). Otherwise, if ``include_raw`` is False then Runnable outputs a dict.
|
||||
If ``include_raw`` is False and ``schema`` is a Pydantic class, Runnable outputs
|
||||
an instance of ``schema`` (i.e., a Pydantic object). Otherwise, if ``include_raw`` is False then Runnable outputs a dict.
|
||||
|
||||
| If ``include_raw`` is True, then Runnable outputs a dict with keys:
|
||||
If ``include_raw`` is True, then Runnable outputs a dict with keys:
|
||||
|
||||
- "raw": BaseMessage
|
||||
- "parsed": None if there was a parsing error, otherwise the type depends on the ``schema`` as described above.
|
||||
- "parsing_error": Optional[BaseException]
|
||||
- ``'raw'``: BaseMessage
|
||||
- ``'parsed'``: None if there was a parsing error, otherwise the type depends on the ``schema`` as described above.
|
||||
- ``'parsing_error'``: Optional[BaseException]
|
||||
|
||||
.. versionchanged:: 0.1.20
|
||||
|
||||
@ -2899,7 +2963,7 @@ class ChatOpenAI(BaseChatOpenAI): # type: ignore[override]
|
||||
specify any Field metadata (like min/max constraints) and fields cannot
|
||||
have default values.
|
||||
|
||||
See all constraints here: https://platform.openai.com/docs/guides/structured-outputs/supported-schemas
|
||||
See all constraints `here <https://platform.openai.com/docs/guides/structured-outputs/supported-schemas>`__.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
@ -3101,6 +3165,7 @@ class ChatOpenAI(BaseChatOpenAI): # type: ignore[override]
|
||||
# },
|
||||
# 'parsing_error': None
|
||||
# }
|
||||
|
||||
""" # noqa: E501
|
||||
return super().with_structured_output(
|
||||
schema, method=method, include_raw=include_raw, strict=strict, **kwargs
|
||||
@ -3117,7 +3182,7 @@ def _lc_tool_call_to_openai_tool_call(tool_call: ToolCall) -> dict:
|
||||
"id": tool_call["id"],
|
||||
"function": {
|
||||
"name": tool_call["name"],
|
||||
"arguments": json.dumps(tool_call["args"]),
|
||||
"arguments": json.dumps(tool_call["args"], ensure_ascii=False),
|
||||
},
|
||||
}
|
||||
|
||||
@ -3512,7 +3577,6 @@ def _construct_responses_api_input(messages: Sequence[BaseMessage]) -> list:
|
||||
for lc_msg in messages:
|
||||
if isinstance(lc_msg, AIMessage):
|
||||
lc_msg = _convert_from_v03_ai_message(lc_msg)
|
||||
lc_msg = _convert_from_v1_to_responses(lc_msg)
|
||||
msg = _convert_message_to_dict(lc_msg)
|
||||
# "name" parameter unsupported
|
||||
if "name" in msg:
|
||||
@ -3656,7 +3720,7 @@ def _construct_lc_result_from_responses_api(
|
||||
response: Response,
|
||||
schema: Optional[type[_BM]] = None,
|
||||
metadata: Optional[dict] = None,
|
||||
output_version: str = "v0",
|
||||
output_version: Literal["v0", "responses/v1"] = "v0",
|
||||
) -> ChatResult:
|
||||
"""Construct ChatResponse from OpenAI Response API response."""
|
||||
if response.error:
|
||||
@ -3795,27 +3859,6 @@ def _construct_lc_result_from_responses_api(
|
||||
)
|
||||
if output_version == "v0":
|
||||
message = _convert_to_v03_ai_message(message)
|
||||
elif output_version == "v1":
|
||||
message = _convert_to_v1_from_responses(message)
|
||||
if response.tools and any(
|
||||
tool.type == "image_generation" for tool in response.tools
|
||||
):
|
||||
# Get mime_time from tool definition and add to image generations
|
||||
# if missing (primarily for tracing purposes).
|
||||
image_generation_call = next(
|
||||
tool for tool in response.tools if tool.type == "image_generation"
|
||||
)
|
||||
if image_generation_call.output_format:
|
||||
mime_type = f"image/{image_generation_call.output_format}"
|
||||
for content_block in message.content:
|
||||
# OK to mutate output message
|
||||
if (
|
||||
isinstance(content_block, dict)
|
||||
and content_block.get("type") == "image"
|
||||
and "base64" in content_block
|
||||
and "mime_type" not in block
|
||||
):
|
||||
block["mime_type"] = mime_type
|
||||
else:
|
||||
pass
|
||||
return ChatResult(generations=[ChatGeneration(message=message)])
|
||||
@ -3829,7 +3872,7 @@ def _convert_responses_chunk_to_generation_chunk(
|
||||
schema: Optional[type[_BM]] = None,
|
||||
metadata: Optional[dict] = None,
|
||||
has_reasoning: bool = False,
|
||||
output_version: str = "v0",
|
||||
output_version: Literal["v0", "responses/v1"] = "v0",
|
||||
) -> tuple[int, int, int, Optional[ChatGenerationChunk]]:
|
||||
def _advance(output_idx: int, sub_idx: Optional[int] = None) -> None:
|
||||
"""Advance indexes tracked during streaming.
|
||||
@ -3864,6 +3907,7 @@ def _convert_responses_chunk_to_generation_chunk(
|
||||
|
||||
This function just identifies updates in output or sub-indexes and increments
|
||||
the current index accordingly.
|
||||
|
||||
"""
|
||||
nonlocal current_index, current_output_index, current_sub_index
|
||||
if sub_idx is None:
|
||||
@ -3894,29 +3938,9 @@ def _convert_responses_chunk_to_generation_chunk(
|
||||
annotation = chunk.annotation
|
||||
else:
|
||||
annotation = chunk.annotation.model_dump(exclude_none=True, mode="json")
|
||||
if output_version == "v1":
|
||||
content.append(
|
||||
{
|
||||
"type": "text",
|
||||
"text": "",
|
||||
"annotations": [annotation],
|
||||
"index": current_index,
|
||||
}
|
||||
)
|
||||
else:
|
||||
content.append({"annotations": [annotation], "index": current_index})
|
||||
content.append({"annotations": [annotation], "index": current_index})
|
||||
elif chunk.type == "response.output_text.done":
|
||||
if output_version == "v1":
|
||||
content.append(
|
||||
{
|
||||
"type": "text",
|
||||
"text": "",
|
||||
"id": chunk.item_id,
|
||||
"index": current_index,
|
||||
}
|
||||
)
|
||||
else:
|
||||
content.append({"id": chunk.item_id, "index": current_index})
|
||||
content.append({"id": chunk.item_id, "index": current_index})
|
||||
elif chunk.type == "response.created":
|
||||
id = chunk.response.id
|
||||
response_metadata["id"] = chunk.response.id # Backwards compatibility
|
||||
@ -3992,34 +4016,21 @@ def _convert_responses_chunk_to_generation_chunk(
|
||||
content.append({"type": "refusal", "refusal": chunk.refusal})
|
||||
elif chunk.type == "response.output_item.added" and chunk.item.type == "reasoning":
|
||||
_advance(chunk.output_index)
|
||||
current_sub_index = 0
|
||||
reasoning = chunk.item.model_dump(exclude_none=True, mode="json")
|
||||
reasoning["index"] = current_index
|
||||
content.append(reasoning)
|
||||
elif chunk.type == "response.reasoning_summary_part.added":
|
||||
if output_version in ("v0", "responses/v1"):
|
||||
_advance(chunk.output_index)
|
||||
content.append(
|
||||
{
|
||||
# langchain-core uses the `index` key to aggregate text blocks.
|
||||
"summary": [
|
||||
{
|
||||
"index": chunk.summary_index,
|
||||
"type": "summary_text",
|
||||
"text": "",
|
||||
}
|
||||
],
|
||||
"index": current_index,
|
||||
"type": "reasoning",
|
||||
}
|
||||
)
|
||||
else:
|
||||
block: dict = {"type": "reasoning", "reasoning": ""}
|
||||
if chunk.summary_index > 0:
|
||||
_advance(chunk.output_index, chunk.summary_index)
|
||||
block["id"] = chunk.item_id
|
||||
block["index"] = current_index
|
||||
content.append(block)
|
||||
_advance(chunk.output_index)
|
||||
content.append(
|
||||
{
|
||||
# langchain-core uses the `index` key to aggregate text blocks.
|
||||
"summary": [
|
||||
{"index": chunk.summary_index, "type": "summary_text", "text": ""}
|
||||
],
|
||||
"index": current_index,
|
||||
"type": "reasoning",
|
||||
}
|
||||
)
|
||||
elif chunk.type == "response.image_generation_call.partial_image":
|
||||
# Partial images are not supported yet.
|
||||
pass
|
||||
@ -4054,15 +4065,6 @@ def _convert_responses_chunk_to_generation_chunk(
|
||||
AIMessageChunk,
|
||||
_convert_to_v03_ai_message(message, has_reasoning=has_reasoning),
|
||||
)
|
||||
elif output_version == "v1":
|
||||
message = cast(AIMessageChunk, _convert_to_v1_from_responses(message))
|
||||
for content_block in message.content:
|
||||
if (
|
||||
isinstance(content_block, dict)
|
||||
and content_block.get("index", -1) > current_index
|
||||
):
|
||||
# blocks were added for v1
|
||||
current_index = content_block["index"]
|
||||
else:
|
||||
pass
|
||||
return (
|
||||
|
Loading…
Reference in New Issue
Block a user