diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py
index 742a202f91a..1bc9b66d880 100644
--- a/libs/partners/openai/langchain_openai/chat_models/base.py
+++ b/libs/partners/openai/langchain_openai/chat_models/base.py
@@ -108,12 +108,7 @@ from langchain_openai.chat_models._client_utils import (
 )
 from langchain_openai.chat_models._compat import (
     _convert_from_v03_ai_message,
-    _convert_from_v1_to_chat_completions,
-    _convert_from_v1_to_responses,
     _convert_to_v03_ai_message,
-    _convert_to_v1_from_chat_completions,
-    _convert_to_v1_from_chat_completions_chunk,
-    _convert_to_v1_from_responses,
 )
 
 if TYPE_CHECKING:
@@ -466,7 +461,7 @@ class BaseChatOpenAI(BaseChatModel):
     """Base URL path for API requests, leave blank if not using a proxy or service 
         emulator."""
     openai_organization: Optional[str] = Field(default=None, alias="organization")
-    """Automatically inferred from env var `OPENAI_ORG_ID` if not provided."""
+    """Automatically inferred from env var ``OPENAI_ORG_ID`` if not provided."""
     # to support explicit proxy for OpenAI
     openai_proxy: Optional[str] = Field(
         default_factory=from_env("OPENAI_PROXY", default=None)
@@ -474,7 +469,7 @@ class BaseChatOpenAI(BaseChatModel):
     request_timeout: Union[float, tuple[float, float], Any, None] = Field(
         default=None, alias="timeout"
     )
-    """Timeout for requests to OpenAI completion API. Can be float, httpx.Timeout or 
+    """Timeout for requests to OpenAI completion API. Can be float, ``httpx.Timeout`` or
         None."""
     stream_usage: bool = False
     """Whether to include usage metadata in streaming output. If True, an additional
@@ -531,6 +526,7 @@ class BaseChatOpenAI(BaseChatModel):
         }
 
     .. versionadded:: 0.3.24
+
     """
     tiktoken_model_name: Optional[str] = None
     """The model name to pass to tiktoken when using this class. 
@@ -552,15 +548,32 @@ class BaseChatOpenAI(BaseChatModel):
         invocations.
     """
     http_async_client: Union[Any, None] = Field(default=None, exclude=True)
-    """Optional httpx.AsyncClient. Only used for async invocations. Must specify 
+    """Optional ``httpx.AsyncClient``. Only used for async invocations. Must specify 
         ``http_client`` as well if you'd like a custom client for sync invocations."""
     stop: Optional[Union[list[str], str]] = Field(default=None, alias="stop_sequences")
     """Default stop sequences."""
     extra_body: Optional[Mapping[str, Any]] = None
     """Optional additional JSON properties to include in the request parameters when
-    making requests to OpenAI compatible APIs, such as vLLM."""
+    making requests to OpenAI compatible APIs, such as vLLM, LM Studio, or other
+    providers.
+    
+    This is the recommended way to pass custom parameters that are specific to your
+    OpenAI-compatible API provider but not part of the standard OpenAI API.
+    
+    Examples:
+        - LM Studio TTL parameter: ``extra_body={"ttl": 300}``
+        - vLLM custom parameters: ``extra_body={"use_beam_search": True}``
+        - Any other provider-specific parameters
+        
+    .. note::
+    
+        Do NOT use ``model_kwargs`` for custom parameters that are not part of the
+        standard OpenAI API, as this will cause errors when making API calls. Use 
+        ``extra_body`` instead.
+    """
+
     include_response_headers: bool = False
-    """Whether to include response headers in the output message response_metadata."""
+    """Whether to include response headers in the output message ``response_metadata``."""  # noqa: E501
     disabled_params: Optional[dict[str, Any]] = Field(default=None)
     """Parameters of the OpenAI client or chat.completions endpoint that should be 
     disabled for the given model.
@@ -569,7 +582,7 @@ class BaseChatOpenAI(BaseChatModel):
     parameter and the value is either None, meaning that parameter should never be
     used, or it's a list of disabled values for the parameter.
     
-    For example, older models may not support the 'parallel_tool_calls' parameter at 
+    For example, older models may not support the ``'parallel_tool_calls'`` parameter at
     all, in which case ``disabled_params={"parallel_tool_calls": None}`` can be passed 
     in.
     
@@ -584,11 +597,11 @@ class BaseChatOpenAI(BaseChatModel):
 
     Supported values:
 
-    - ``"file_search_call.results"``
-    - ``"message.input_image.image_url"``
-    - ``"computer_call_output.output.image_url"``
-    - ``"reasoning.encrypted_content"``
-    - ``"code_interpreter_call.outputs"``
+    - ``'file_search_call.results'``
+    - ``'message.input_image.image_url'``
+    - ``'computer_call_output.output.image_url'``
+    - ``'reasoning.encrypted_content'``
+    - ``'code_interpreter_call.outputs'``
 
     .. versionadded:: 0.3.24
     """
@@ -645,6 +658,7 @@ class BaseChatOpenAI(BaseChatModel):
         llm.invoke([HumanMessage("How are you?")], previous_response_id="resp_123")
 
     .. versionadded:: 0.3.26
+
     """
 
     use_responses_api: Optional[bool] = None
@@ -655,7 +669,7 @@ class BaseChatOpenAI(BaseChatModel):
     .. versionadded:: 0.3.9
     """
 
-    output_version: str = "v0"
+    output_version: Literal["v0", "responses/v1"] = "v0"
     """Version of AIMessage output format to use.
 
     This field is used to roll-out new output formats for chat model AIMessages
@@ -663,12 +677,12 @@ class BaseChatOpenAI(BaseChatModel):
 
     Supported values:
 
-    - ``"v0"``: AIMessage format as of langchain-openai 0.3.x.
-    - ``"responses/v1"``: Formats Responses API output
+    - ``'v0'``: AIMessage format as of langchain-openai 0.3.x.
+    - ``'responses/v1'``: Formats Responses API output
       items into AIMessage content blocks.
-    - ``"v1"``: v1 of LangChain cross-provider standard.
 
-    ``output_version="v1"`` is recommended.
+    Currently only impacts the Responses API. ``output_version='responses/v1'`` is
+    recommended.
 
     .. versionadded:: 0.3.25
 
@@ -855,10 +869,6 @@ class BaseChatOpenAI(BaseChatModel):
                 message=default_chunk_class(content="", usage_metadata=usage_metadata),
                 generation_info=base_generation_info,
             )
-            if self.output_version == "v1":
-                generation_chunk.message = _convert_to_v1_from_chat_completions_chunk(
-                    cast(AIMessageChunk, generation_chunk.message)
-                )
             return generation_chunk
 
         choice = choices[0]
@@ -886,20 +896,6 @@ class BaseChatOpenAI(BaseChatModel):
         if usage_metadata and isinstance(message_chunk, AIMessageChunk):
             message_chunk.usage_metadata = usage_metadata
 
-        if self.output_version == "v1":
-            message_chunk = cast(AIMessageChunk, message_chunk)
-            # Convert to v1 format
-            if isinstance(message_chunk.content, str):
-                message_chunk = _convert_to_v1_from_chat_completions_chunk(
-                    message_chunk
-                )
-                if message_chunk.content:
-                    message_chunk.content[0]["index"] = 0  # type: ignore[index]
-            else:
-                message_chunk = _convert_to_v1_from_chat_completions_chunk(
-                    message_chunk
-                )
-
         generation_chunk = ChatGenerationChunk(
             message=message_chunk, generation_info=generation_info or None
         )
@@ -1192,12 +1188,7 @@ class BaseChatOpenAI(BaseChatModel):
             else:
                 payload = _construct_responses_api_payload(messages, payload)
         else:
-            payload["messages"] = [
-                _convert_message_to_dict(_convert_from_v1_to_chat_completions(m))
-                if isinstance(m, AIMessage)
-                else _convert_message_to_dict(m)
-                for m in messages
-            ]
+            payload["messages"] = [_convert_message_to_dict(m) for m in messages]
         return payload
 
     def _create_chat_result(
@@ -1263,11 +1254,6 @@ class BaseChatOpenAI(BaseChatModel):
             if hasattr(message, "refusal"):
                 generations[0].message.additional_kwargs["refusal"] = message.refusal
 
-        if self.output_version == "v1":
-            _ = llm_output.pop("token_usage", None)
-            generations[0].message = _convert_to_v1_from_chat_completions(
-                cast(AIMessage, generations[0].message)
-            )
         return ChatResult(generations=generations, llm_output=llm_output)
 
     async def _astream(
@@ -1593,8 +1579,9 @@ class BaseChatOpenAI(BaseChatModel):
 
         Assumes model is compatible with OpenAI function-calling API.
 
-        NOTE: Using bind_tools is recommended instead, as the `functions` and
-            `function_call` request parameters are officially marked as deprecated by
+        .. note::
+            Using ``bind_tools()`` is recommended instead, as the ``functions`` and
+            ``function_call`` request parameters are officially marked as deprecated by
             OpenAI.
 
         Args:
@@ -1604,7 +1591,7 @@ class BaseChatOpenAI(BaseChatModel):
                 their schema dictionary representation.
             function_call: Which function to require the model to call.
                 Must be the name of the single provided function or
-                "auto" to automatically determine which function to call
+                ``'auto'`` to automatically determine which function to call
                 (if any).
             **kwargs: Any additional parameters to pass to the
                 :class:`~langchain.runnable.Runnable` constructor.
@@ -1655,16 +1642,15 @@ class BaseChatOpenAI(BaseChatModel):
                 :meth:`langchain_core.utils.function_calling.convert_to_openai_tool`.
             tool_choice: Which tool to require the model to call. Options are:
 
-                - str of the form ``"<<tool_name>>"``: calls <<tool_name>> tool.
-                - ``"auto"``: automatically selects a tool (including no tool).
-                - ``"none"``: does not call a tool.
-                - ``"any"`` or ``"required"`` or ``True``: force at least one tool to be called.
+                - str of the form ``'<<tool_name>>'``: calls <<tool_name>> tool.
+                - ``'auto'``: automatically selects a tool (including no tool).
+                - ``'none'``: does not call a tool.
+                - ``'any'`` or ``'required'`` or ``True``: force at least one tool to be called.
                 - dict of the form ``{"type": "function", "function": {"name": <<tool_name>>}}``: calls <<tool_name>> tool.
                 - ``False`` or ``None``: no effect, default OpenAI behavior.
             strict: If True, model output is guaranteed to exactly match the JSON Schema
-                provided in the tool definition. If True, the input schema will be
-                validated according to
-                https://platform.openai.com/docs/guides/structured-outputs/supported-schemas.
+                provided in the tool definition. The input schema will also be validated according to the
+                `supported schemas <https://platform.openai.com/docs/guides/structured-outputs/supported-schemas?api-mode=responses#supported-schemas>`__.
                 If False, input schema will not be validated and model output will not
                 be validated.
                 If None, ``strict`` argument will not be passed to the model.
@@ -1735,8 +1721,7 @@ class BaseChatOpenAI(BaseChatModel):
         """Model wrapper that returns outputs formatted to match the given schema.
 
         Args:
-            schema:
-                The output schema. Can be passed in as:
+            schema: The output schema. Can be passed in as:
 
                 - an OpenAI function/tool schema,
                 - a JSON Schema,
@@ -1752,24 +1737,20 @@ class BaseChatOpenAI(BaseChatModel):
 
             method: The method for steering model generation, one of:
 
-                - "function_calling":
+                - ``'function_calling'``:
                     Uses OpenAI's tool-calling (formerly called function calling)
-                    API: https://platform.openai.com/docs/guides/function-calling
-                - "json_schema":
-                    Uses OpenAI's Structured Output API: https://platform.openai.com/docs/guides/structured-outputs
-                    Supported for "gpt-4o-mini", "gpt-4o-2024-08-06", "o1", and later
+                    `API <https://platform.openai.com/docs/guides/function-calling>`__
+                - ``'json_schema'``:
+                    Uses OpenAI's Structured Output `API <https://platform.openai.com/docs/guides/structured-outputs>`__
+                    Supported for ``'gpt-4o-mini'``, ``'gpt-4o-2024-08-06'``, ``'o1'``, and later
                     models.
-                - "json_mode":
-                    Uses OpenAI's JSON mode. Note that if using JSON mode then you
-                    must include instructions for formatting the output into the
-                    desired schema into the model call:
-                    https://platform.openai.com/docs/guides/structured-outputs/json-mode
+                - ``'json_mode'``:
+                    Uses OpenAI's `JSON mode <https://platform.openai.com/docs/guides/structured-outputs/json-mode>`__.
+                    Note that if using JSON mode then you must include instructions for
+                    formatting the output into the desired schema into the model call
 
                 Learn more about the differences between the methods and which models
-                support which methods here:
-
-                - https://platform.openai.com/docs/guides/structured-outputs/structured-outputs-vs-json-mode
-                - https://platform.openai.com/docs/guides/structured-outputs/function-calling-vs-response-format
+                support which methods `here <https://platform.openai.com/docs/guides/structured-outputs/function-calling-vs-response-format>`__.
 
             include_raw:
                 If False then only the parsed structured output is returned. If
@@ -1777,13 +1758,12 @@ class BaseChatOpenAI(BaseChatModel):
                 then both the raw model response (a BaseMessage) and the parsed model
                 response will be returned. If an error occurs during output parsing it
                 will be caught and returned as well. The final output is always a dict
-                with keys "raw", "parsed", and "parsing_error".
+                with keys ``'raw'``, ``'parsed'``, and ``'parsing_error'``.
             strict:
 
                 - True:
                     Model output is guaranteed to exactly match the schema.
-                    The input schema will also be validated according to
-                    https://platform.openai.com/docs/guides/structured-outputs/supported-schemas
+                    The input schema will also be validated according to the `supported schemas <https://platform.openai.com/docs/guides/structured-outputs/supported-schemas?api-mode=responses#supported-schemas>`__.
                 - False:
                     Input schema will not be validated and model output will not be
                     validated.
@@ -1793,12 +1773,12 @@ class BaseChatOpenAI(BaseChatModel):
             tools:
                 A list of tool-like objects to bind to the chat model. Requires that:
 
-                - ``method`` is ``"json_schema"`` (default).
+                - ``method`` is ``'json_schema'`` (default).
                 - ``strict=True``
                 - ``include_raw=True``
 
                 If a model elects to call a
-                tool, the resulting ``AIMessage`` in ``"raw"`` will include tool calls.
+                tool, the resulting ``AIMessage`` in ``'raw'`` will include tool calls.
 
                 .. dropdown:: Example
 
@@ -1840,13 +1820,14 @@ class BaseChatOpenAI(BaseChatModel):
         Returns:
             A Runnable that takes same inputs as a :class:`langchain_core.language_models.chat.BaseChatModel`.
 
-            | If ``include_raw`` is False and ``schema`` is a Pydantic class, Runnable outputs an instance of ``schema`` (i.e., a Pydantic object). Otherwise, if ``include_raw`` is False then Runnable outputs a dict.
+            If ``include_raw`` is False and ``schema`` is a Pydantic class, Runnable outputs
+            an instance of ``schema`` (i.e., a Pydantic object). Otherwise, if ``include_raw`` is False then Runnable outputs a dict.
 
-            | If ``include_raw`` is True, then Runnable outputs a dict with keys:
+            If ``include_raw`` is True, then Runnable outputs a dict with keys:
 
-            - "raw": BaseMessage
-            - "parsed": None if there was a parsing error, otherwise the type depends on the ``schema`` as described above.
-            - "parsing_error": Optional[BaseException]
+            - ``'raw'``: BaseMessage
+            - ``'parsed'``: None if there was a parsing error, otherwise the type depends on the ``schema`` as described above.
+            - ``'parsing_error'``: Optional[BaseException]
 
         .. versionchanged:: 0.1.20
 
@@ -1855,13 +1836,14 @@ class BaseChatOpenAI(BaseChatModel):
         .. versionchanged:: 0.1.21
 
             Support for ``strict`` argument added.
-            Support for ``method`` = "json_schema" added.
+            Support for ``method="json_schema"`` added.
 
         .. versionchanged:: 0.3.12
             Support for ``tools`` added.
 
         .. versionchanged:: 0.3.21
             Pass ``kwargs`` through to the model.
+
         """  # noqa: E501
         if strict is not None and method == "json_mode":
             raise ValueError(
@@ -2097,24 +2079,25 @@ class ChatOpenAI(BaseChatOpenAI):  # type: ignore[override]
                 # other params...
             )
 
-        **NOTE**: Any param which is not explicitly supported will be passed directly to the
-        ``openai.OpenAI.chat.completions.create(...)`` API every time to the model is
-        invoked. For example:
+        .. note::
+            Any param which is not explicitly supported will be passed directly to the
+            ``openai.OpenAI.chat.completions.create(...)`` API every time to the model is
+            invoked. For example:
 
-        .. code-block:: python
+            .. code-block:: python
 
-            from langchain_openai import ChatOpenAI
-            import openai
+                from langchain_openai import ChatOpenAI
+                import openai
 
-            ChatOpenAI(..., frequency_penalty=0.2).invoke(...)
+                ChatOpenAI(..., frequency_penalty=0.2).invoke(...)
 
-            # results in underlying API call of:
+                # results in underlying API call of:
 
-            openai.OpenAI(..).chat.completions.create(..., frequency_penalty=0.2)
+                openai.OpenAI(..).chat.completions.create(..., frequency_penalty=0.2)
 
-            # which is also equivalent to:
+                # which is also equivalent to:
 
-            ChatOpenAI(...).invoke(..., frequency_penalty=0.2)
+                ChatOpenAI(...).invoke(..., frequency_penalty=0.2)
 
     .. dropdown:: Invoke
 
@@ -2281,26 +2264,27 @@ class ChatOpenAI(BaseChatOpenAI):  # type: ignore[override]
                 },
             ]
 
-        Note that ``openai >= 1.32`` supports a ``parallel_tool_calls`` parameter
-        that defaults to ``True``. This parameter can be set to ``False`` to
-        disable parallel tool calls:
+        .. note::
+            ``openai >= 1.32`` supports a ``parallel_tool_calls`` parameter
+            that defaults to ``True``. This parameter can be set to ``False`` to
+            disable parallel tool calls:
 
-        .. code-block:: python
+            .. code-block:: python
 
-            ai_msg = llm_with_tools.invoke(
-                "What is the weather in LA and NY?", parallel_tool_calls=False
-            )
-            ai_msg.tool_calls
+                ai_msg = llm_with_tools.invoke(
+                    "What is the weather in LA and NY?", parallel_tool_calls=False
+                )
+                ai_msg.tool_calls
 
-        .. code-block:: python
+            .. code-block:: python
 
-            [
-                {
-                    "name": "GetWeather",
-                    "args": {"location": "Los Angeles, CA"},
-                    "id": "call_4OoY0ZR99iEvC7fevsH8Uhtz",
-                }
-            ]
+                [
+                    {
+                        "name": "GetWeather",
+                        "args": {"location": "Los Angeles, CA"},
+                        "id": "call_4OoY0ZR99iEvC7fevsH8Uhtz",
+                    }
+                ]
 
         Like other runtime parameters, ``parallel_tool_calls`` can be bound to a model
         using ``llm.bind(parallel_tool_calls=False)`` or during instantiation by
@@ -2314,7 +2298,7 @@ class ChatOpenAI(BaseChatOpenAI):  # type: ignore[override]
 
         You can access `built-in tools <https://platform.openai.com/docs/guides/tools?api-mode=responses>`_
         supported by the OpenAI Responses API. See LangChain
-        `docs <https://python.langchain.com/docs/integrations/chat/openai/>`_ for more
+        `docs <https://python.langchain.com/docs/integrations/chat/openai/>`__ for more
         detail.
 
         .. note::
@@ -2369,7 +2353,7 @@ class ChatOpenAI(BaseChatOpenAI):  # type: ignore[override]
         `conversation state <https://platform.openai.com/docs/guides/conversation-state?api-mode=responses>`_.
         Passing in response IDs from previous messages will continue a conversational
         thread. See LangChain
-        `docs <https://python.langchain.com/docs/integrations/chat/openai/>`_ for more
+        `conversation docs <https://python.langchain.com/docs/integrations/chat/openai/>`__ for more
         detail.
 
         .. code-block:: python
@@ -2658,9 +2642,95 @@ class ChatOpenAI(BaseChatOpenAI):  # type: ignore[override]
             llm = ChatOpenAI(model="o4-mini", service_tier="flex")
 
         Note that this is a beta feature that is only available for a subset of models.
-        See OpenAI `docs <https://platform.openai.com/docs/guides/flex-processing>`_
+        See OpenAI `flex processing docs <https://platform.openai.com/docs/guides/flex-processing>`__
         for more detail.
 
+    .. dropdown:: OpenAI-compatible APIs
+
+        ``ChatOpenAI`` can be used with OpenAI-compatible APIs like `LM Studio <https://lmstudio.ai/>`__,
+        `vLLM <https://github.com/vllm-project/vllm>`__,
+        `Ollama <https://ollama.com/>`__, and others.
+        To use custom parameters specific to these providers, use the ``extra_body`` parameter.
+
+        **LM Studio example** with TTL (auto-eviction):
+
+        .. code-block:: python
+
+            from langchain_openai import ChatOpenAI
+
+            llm = ChatOpenAI(
+                base_url="http://localhost:1234/v1",
+                api_key="lm-studio",  # Can be any string
+                model="mlx-community/QwQ-32B-4bit",
+                temperature=0,
+                extra_body={
+                    "ttl": 300
+                },  # Auto-evict model after 5 minutes of inactivity
+            )
+
+        **vLLM example** with custom parameters:
+
+        .. code-block:: python
+
+            llm = ChatOpenAI(
+                base_url="http://localhost:8000/v1",
+                api_key="EMPTY",
+                model="meta-llama/Llama-2-7b-chat-hf",
+                extra_body={"use_beam_search": True, "best_of": 4},
+            )
+
+    .. dropdown:: model_kwargs vs extra_body
+
+        Use the correct parameter for different types of API arguments:
+
+        **Use ``model_kwargs`` for:**
+
+        - Standard OpenAI API parameters not explicitly defined as class parameters
+        - Parameters that should be flattened into the top-level request payload
+        - Examples: ``max_completion_tokens``, ``stream_options``, ``modalities``, ``audio``
+
+        .. code-block:: python
+
+            # Standard OpenAI parameters
+            llm = ChatOpenAI(
+                model="gpt-4o",
+                model_kwargs={
+                    "stream_options": {"include_usage": True},
+                    "max_completion_tokens": 300,
+                    "modalities": ["text", "audio"],
+                    "audio": {"voice": "alloy", "format": "wav"},
+                },
+            )
+
+        **Use ``extra_body`` for:**
+
+        - Custom parameters specific to OpenAI-compatible providers (vLLM, LM Studio, etc.)
+        - Parameters that need to be nested under ``extra_body`` in the request
+        - Any non-standard OpenAI API parameters
+
+        .. code-block:: python
+
+            # Custom provider parameters
+            llm = ChatOpenAI(
+                base_url="http://localhost:8000/v1",
+                model="custom-model",
+                extra_body={
+                    "use_beam_search": True,  # vLLM parameter
+                    "best_of": 4,  # vLLM parameter
+                    "ttl": 300,  # LM Studio parameter
+                },
+            )
+
+        **Key Differences:**
+
+        - ``model_kwargs``: Parameters are **merged into top-level** request payload
+        - ``extra_body``: Parameters are **nested under ``extra_body``** key in request
+
+        .. important::
+
+            Always use ``extra_body`` for custom parameters, **not** ``model_kwargs``.
+            Using ``model_kwargs`` for non-OpenAI parameters will cause API errors.
+
     """  # noqa: E501
 
     max_tokens: Optional[int] = Field(default=None, alias="max_completion_tokens")
@@ -2692,7 +2762,7 @@ class ChatOpenAI(BaseChatOpenAI):  # type: ignore[override]
 
     @classmethod
     def is_lc_serializable(cls) -> bool:
-        """Return whether this model can be serialized by Langchain."""
+        """Return whether this model can be serialized by LangChain."""
         return True
 
     @property
@@ -2754,8 +2824,7 @@ class ChatOpenAI(BaseChatOpenAI):  # type: ignore[override]
         """Model wrapper that returns outputs formatted to match the given schema.
 
         Args:
-            schema:
-                The output schema. Can be passed in as:
+            schema: The output schema. Can be passed in as:
 
                 - a JSON Schema,
                 - a TypedDict class,
@@ -2771,25 +2840,20 @@ class ChatOpenAI(BaseChatOpenAI):  # type: ignore[override]
 
             method: The method for steering model generation, one of:
 
-                - "json_schema":
-                    Uses OpenAI's Structured Output API:
-                    https://platform.openai.com/docs/guides/structured-outputs
-                    Supported for "gpt-4o-mini", "gpt-4o-2024-08-06", "o1", and later
+                - ``'json_schema'``:
+                    Uses OpenAI's `Structured Output API <https://platform.openai.com/docs/guides/structured-outputs>`__.
+                    Supported for ``'gpt-4o-mini'``, ``'gpt-4o-2024-08-06'``, ``'o1'``, and later
                     models.
-                - "function_calling":
+                - ``'function_calling'``:
                     Uses OpenAI's tool-calling (formerly called function calling)
-                    API: https://platform.openai.com/docs/guides/function-calling
-                - "json_mode":
-                    Uses OpenAI's JSON mode. Note that if using JSON mode then you
-                    must include instructions for formatting the output into the
-                    desired schema into the model call:
-                    https://platform.openai.com/docs/guides/structured-outputs/json-mode
+                    `API <https://platform.openai.com/docs/guides/function-calling>`__
+                - ``'json_mode'``:
+                    Uses OpenAI's `JSON mode <https://platform.openai.com/docs/guides/structured-outputs/json-mode>`__.
+                    Note that if using JSON mode then you must include instructions for
+                    formatting the output into the desired schema into the model call
 
                 Learn more about the differences between the methods and which models
-                support which methods here:
-
-                - https://platform.openai.com/docs/guides/structured-outputs/structured-outputs-vs-json-mode
-                - https://platform.openai.com/docs/guides/structured-outputs/function-calling-vs-response-format
+                support which methods `here <https://platform.openai.com/docs/guides/structured-outputs/function-calling-vs-response-format>`__.
 
             include_raw:
                 If False then only the parsed structured output is returned. If
@@ -2797,13 +2861,12 @@ class ChatOpenAI(BaseChatOpenAI):  # type: ignore[override]
                 then both the raw model response (a BaseMessage) and the parsed model
                 response will be returned. If an error occurs during output parsing it
                 will be caught and returned as well. The final output is always a dict
-                with keys "raw", "parsed", and "parsing_error".
+                with keys ``'raw'``, ``'parsed'``, and ``'parsing_error'``.
             strict:
 
                 - True:
                     Model output is guaranteed to exactly match the schema.
-                    The input schema will also be validated according to
-                    https://platform.openai.com/docs/guides/structured-outputs/supported-schemas
+                    The input schema will also be validated according to the `supported schemas <https://platform.openai.com/docs/guides/structured-outputs/supported-schemas?api-mode=responses#supported-schemas>`__.
                 - False:
                     Input schema will not be validated and model output will not be
                     validated.
@@ -2813,17 +2876,17 @@ class ChatOpenAI(BaseChatOpenAI):  # type: ignore[override]
                 If schema is specified via TypedDict or JSON schema, ``strict`` is not
                 enabled by default. Pass ``strict=True`` to enable it.
 
-                Note: ``strict`` can only be non-null if ``method`` is
-                ``"json_schema"`` or ``"function_calling"``.
+                .. note::
+                    ``strict`` can only be non-null if ``method`` is ``'json_schema'`` or ``'function_calling'``.
             tools:
                 A list of tool-like objects to bind to the chat model. Requires that:
 
-                - ``method`` is ``"json_schema"`` (default).
+                - ``method`` is ``'json_schema'`` (default).
                 - ``strict=True``
                 - ``include_raw=True``
 
                 If a model elects to call a
-                tool, the resulting ``AIMessage`` in ``"raw"`` will include tool calls.
+                tool, the resulting ``AIMessage`` in ``'raw'`` will include tool calls.
 
                 .. dropdown:: Example
 
@@ -2865,13 +2928,14 @@ class ChatOpenAI(BaseChatOpenAI):  # type: ignore[override]
         Returns:
             A Runnable that takes same inputs as a :class:`langchain_core.language_models.chat.BaseChatModel`.
 
-            | If ``include_raw`` is False and ``schema`` is a Pydantic class, Runnable outputs an instance of ``schema`` (i.e., a Pydantic object). Otherwise, if ``include_raw`` is False then Runnable outputs a dict.
+            If ``include_raw`` is False and ``schema`` is a Pydantic class, Runnable outputs
+            an instance of ``schema`` (i.e., a Pydantic object). Otherwise, if ``include_raw`` is False then Runnable outputs a dict.
 
-            | If ``include_raw`` is True, then Runnable outputs a dict with keys:
+            If ``include_raw`` is True, then Runnable outputs a dict with keys:
 
-            - "raw": BaseMessage
-            - "parsed": None if there was a parsing error, otherwise the type depends on the ``schema`` as described above.
-            - "parsing_error": Optional[BaseException]
+            - ``'raw'``: BaseMessage
+            - ``'parsed'``: None if there was a parsing error, otherwise the type depends on the ``schema`` as described above.
+            - ``'parsing_error'``: Optional[BaseException]
 
         .. versionchanged:: 0.1.20
 
@@ -2899,7 +2963,7 @@ class ChatOpenAI(BaseChatOpenAI):  # type: ignore[override]
             specify any Field metadata (like min/max constraints) and fields cannot
             have default values.
 
-            See all constraints here: https://platform.openai.com/docs/guides/structured-outputs/supported-schemas
+            See all constraints `here <https://platform.openai.com/docs/guides/structured-outputs/supported-schemas>`__.
 
             .. code-block:: python
 
@@ -3101,6 +3165,7 @@ class ChatOpenAI(BaseChatOpenAI):  # type: ignore[override]
                 #     },
                 #     'parsing_error': None
                 # }
+
         """  # noqa: E501
         return super().with_structured_output(
             schema, method=method, include_raw=include_raw, strict=strict, **kwargs
@@ -3117,7 +3182,7 @@ def _lc_tool_call_to_openai_tool_call(tool_call: ToolCall) -> dict:
         "id": tool_call["id"],
         "function": {
             "name": tool_call["name"],
-            "arguments": json.dumps(tool_call["args"]),
+            "arguments": json.dumps(tool_call["args"], ensure_ascii=False),
         },
     }
 
@@ -3512,7 +3577,6 @@ def _construct_responses_api_input(messages: Sequence[BaseMessage]) -> list:
     for lc_msg in messages:
         if isinstance(lc_msg, AIMessage):
             lc_msg = _convert_from_v03_ai_message(lc_msg)
-            lc_msg = _convert_from_v1_to_responses(lc_msg)
         msg = _convert_message_to_dict(lc_msg)
         # "name" parameter unsupported
         if "name" in msg:
@@ -3656,7 +3720,7 @@ def _construct_lc_result_from_responses_api(
     response: Response,
     schema: Optional[type[_BM]] = None,
     metadata: Optional[dict] = None,
-    output_version: str = "v0",
+    output_version: Literal["v0", "responses/v1"] = "v0",
 ) -> ChatResult:
     """Construct ChatResponse from OpenAI Response API response."""
     if response.error:
@@ -3795,27 +3859,6 @@ def _construct_lc_result_from_responses_api(
     )
     if output_version == "v0":
         message = _convert_to_v03_ai_message(message)
-    elif output_version == "v1":
-        message = _convert_to_v1_from_responses(message)
-        if response.tools and any(
-            tool.type == "image_generation" for tool in response.tools
-        ):
-            # Get mime_time from tool definition and add to image generations
-            # if missing (primarily for tracing purposes).
-            image_generation_call = next(
-                tool for tool in response.tools if tool.type == "image_generation"
-            )
-            if image_generation_call.output_format:
-                mime_type = f"image/{image_generation_call.output_format}"
-                for content_block in message.content:
-                    # OK to mutate output message
-                    if (
-                        isinstance(content_block, dict)
-                        and content_block.get("type") == "image"
-                        and "base64" in content_block
-                        and "mime_type" not in block
-                    ):
-                        block["mime_type"] = mime_type
     else:
         pass
     return ChatResult(generations=[ChatGeneration(message=message)])
@@ -3829,7 +3872,7 @@ def _convert_responses_chunk_to_generation_chunk(
     schema: Optional[type[_BM]] = None,
     metadata: Optional[dict] = None,
     has_reasoning: bool = False,
-    output_version: str = "v0",
+    output_version: Literal["v0", "responses/v1"] = "v0",
 ) -> tuple[int, int, int, Optional[ChatGenerationChunk]]:
     def _advance(output_idx: int, sub_idx: Optional[int] = None) -> None:
         """Advance indexes tracked during streaming.
@@ -3864,6 +3907,7 @@ def _convert_responses_chunk_to_generation_chunk(
 
         This function just identifies updates in output or sub-indexes and increments
         the current index accordingly.
+
         """
         nonlocal current_index, current_output_index, current_sub_index
         if sub_idx is None:
@@ -3894,29 +3938,9 @@ def _convert_responses_chunk_to_generation_chunk(
             annotation = chunk.annotation
         else:
             annotation = chunk.annotation.model_dump(exclude_none=True, mode="json")
-        if output_version == "v1":
-            content.append(
-                {
-                    "type": "text",
-                    "text": "",
-                    "annotations": [annotation],
-                    "index": current_index,
-                }
-            )
-        else:
-            content.append({"annotations": [annotation], "index": current_index})
+        content.append({"annotations": [annotation], "index": current_index})
     elif chunk.type == "response.output_text.done":
-        if output_version == "v1":
-            content.append(
-                {
-                    "type": "text",
-                    "text": "",
-                    "id": chunk.item_id,
-                    "index": current_index,
-                }
-            )
-        else:
-            content.append({"id": chunk.item_id, "index": current_index})
+        content.append({"id": chunk.item_id, "index": current_index})
     elif chunk.type == "response.created":
         id = chunk.response.id
         response_metadata["id"] = chunk.response.id  # Backwards compatibility
@@ -3992,34 +4016,21 @@ def _convert_responses_chunk_to_generation_chunk(
         content.append({"type": "refusal", "refusal": chunk.refusal})
     elif chunk.type == "response.output_item.added" and chunk.item.type == "reasoning":
         _advance(chunk.output_index)
-        current_sub_index = 0
         reasoning = chunk.item.model_dump(exclude_none=True, mode="json")
         reasoning["index"] = current_index
         content.append(reasoning)
     elif chunk.type == "response.reasoning_summary_part.added":
-        if output_version in ("v0", "responses/v1"):
-            _advance(chunk.output_index)
-            content.append(
-                {
-                    # langchain-core uses the `index` key to aggregate text blocks.
-                    "summary": [
-                        {
-                            "index": chunk.summary_index,
-                            "type": "summary_text",
-                            "text": "",
-                        }
-                    ],
-                    "index": current_index,
-                    "type": "reasoning",
-                }
-            )
-        else:
-            block: dict = {"type": "reasoning", "reasoning": ""}
-            if chunk.summary_index > 0:
-                _advance(chunk.output_index, chunk.summary_index)
-                block["id"] = chunk.item_id
-            block["index"] = current_index
-            content.append(block)
+        _advance(chunk.output_index)
+        content.append(
+            {
+                # langchain-core uses the `index` key to aggregate text blocks.
+                "summary": [
+                    {"index": chunk.summary_index, "type": "summary_text", "text": ""}
+                ],
+                "index": current_index,
+                "type": "reasoning",
+            }
+        )
     elif chunk.type == "response.image_generation_call.partial_image":
         # Partial images are not supported yet.
         pass
@@ -4054,15 +4065,6 @@ def _convert_responses_chunk_to_generation_chunk(
             AIMessageChunk,
             _convert_to_v03_ai_message(message, has_reasoning=has_reasoning),
         )
-    elif output_version == "v1":
-        message = cast(AIMessageChunk, _convert_to_v1_from_responses(message))
-        for content_block in message.content:
-            if (
-                isinstance(content_block, dict)
-                and content_block.get("index", -1) > current_index
-            ):
-                # blocks were added for v1
-                current_index = content_block["index"]
     else:
         pass
     return (