From 54542b93857e150bc0fab6dd2fb0df7e5dea69f2 Mon Sep 17 00:00:00 2001
From: Copilot <198982749+Copilot@users.noreply.github.com>
Date: Thu, 24 Jul 2025 16:43:16 -0400
Subject: [PATCH] docs(openai): add comprehensive documentation and examples
 for `extra_body` + others (#32149)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This PR addresses the common issue where users struggle to pass custom
parameters to OpenAI-compatible APIs like LM Studio, vLLM, and others.
The problem occurs when users try to use `model_kwargs` for custom
parameters, which causes API errors.

## Problem

Users attempting to pass custom parameters (like LM Studio's `ttl`
parameter) were getting errors:

```python
# ❌ This approach fails
llm = ChatOpenAI(
    base_url="http://localhost:1234/v1",
    model="mlx-community/QwQ-32B-4bit",
    model_kwargs={"ttl": 5}  # Causes TypeError: unexpected keyword argument 'ttl'
)
```

## Solution

The `extra_body` parameter is the correct way to pass custom parameters
to OpenAI-compatible APIs:

```python
# ✅ This approach works correctly
llm = ChatOpenAI(
    base_url="http://localhost:1234/v1",
    model="mlx-community/QwQ-32B-4bit",
    extra_body={"ttl": 5}  # Custom parameters go in extra_body
)
```

## Changes Made

1. **Enhanced Documentation**: Updated the `extra_body` parameter
docstring with comprehensive examples for LM Studio, vLLM, and other
providers

2. **Added Documentation Section**: Created a new "OpenAI-compatible
APIs" section in the main class docstring with practical examples

3. **Unit Tests**: Added tests to verify `extra_body` functionality
works correctly:
- `test_extra_body_parameter()`: Verifies custom parameters are included
in request payload
- `test_extra_body_with_model_kwargs()`: Ensures `extra_body` and
`model_kwargs` work together

4. **Clear Guidance**: Documented when to use `extra_body` vs
`model_kwargs`

## Examples Added

**LM Studio with TTL (auto-eviction):**
```python
ChatOpenAI(
    base_url="http://localhost:1234/v1",
    api_key="lm-studio",
    model="mlx-community/QwQ-32B-4bit",
    extra_body={"ttl": 300}  # Auto-evict after 5 minutes
)
```

**vLLM with custom sampling:**
```python
ChatOpenAI(
    base_url="http://localhost:8000/v1",
    api_key="EMPTY",
    model="meta-llama/Llama-2-7b-chat-hf",
    extra_body={
        "use_beam_search": True,
        "best_of": 4
    }
)
```

## Why This Works

- `model_kwargs` parameters are passed directly to the OpenAI client's
`create()` method, causing errors for non-standard parameters
- `extra_body` parameters are included in the HTTP request body, which
is exactly what OpenAI-compatible APIs expect for custom parameters

Fixes #32115.

<!-- START COPILOT CODING AGENT TIPS -->
---

💬 Share your feedback on Copilot coding agent for the chance to win a
$200 gift card! Click
[here](https://survey.alchemer.com/s3/8343779/Copilot-Coding-agent) to
start the survey.

---------

Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Co-authored-by: mdrxy <61371264+mdrxy@users.noreply.github.com>
Co-authored-by: Mason Daugherty <github@mdrxy.com>
Co-authored-by: Mason Daugherty <mason@langchain.dev>
---
 README.md                                     |   7 +-
 libs/partners/anthropic/README.md             |   6 +-
 libs/partners/exa/README.md                   |  50 +------
 libs/partners/fireworks/README.md             |  16 +-
 libs/partners/groq/README.md                  |   5 +-
 libs/partners/huggingface/README.md           |   1 +
 libs/partners/mistralai/README.md             |   2 +-
 libs/partners/nomic/README.md                 |   2 +-
 libs/partners/openai/README.md                |  12 +-
 .../langchain_openai/chat_models/base.py      | 141 +++++++++++++++---
 .../tests/unit_tests/chat_models/test_base.py |  42 +++++-
 libs/partners/openai/uv.lock                  |   4 +-
 libs/partners/perplexity/README.md            |   2 +-
 libs/partners/prompty/README.md               |   2 +
 libs/standard-tests/README.md                 |  56 +++----
 pyproject.toml                                |   2 +-
 uv.lock                                       |   6 +-
 17 files changed, 237 insertions(+), 119 deletions(-)

diff --git a/README.md b/README.md
index 7d729e7862f..02a42335e83 100644
--- a/README.md
+++ b/README.md
@@ -40,9 +40,10 @@ controllable agent workflows.
 ## Why use LangChain?
 
 LangChain helps developers build applications powered by LLMs through a standard
-interface for models, embeddings, vector stores, and more. 
+interface for models, embeddings, vector stores, and more.
 
 Use LangChain for:
+
 - **Real-time data augmentation**. Easily connect LLMs to diverse data sources and
 external / internal systems, drawing from LangChain’s vast library of integrations with
 model providers, tools, vector stores, retrievers, and more.
@@ -52,9 +53,10 @@ frontier evolves, adapt quickly — LangChain’s abstractions keep you moving w
 losing momentum.
 
 ## LangChain’s ecosystem
+
 While the LangChain framework can be used standalone, it also integrates seamlessly
 with any LangChain product, giving developers a full suite of tools when building LLM
-applications. 
+applications.
 
 To improve your LLM application development, pair LangChain with:
 
@@ -73,6 +75,7 @@ teams — and iterate quickly with visual prototyping in
 [LangGraph Studio](https://langchain-ai.github.io/langgraph/concepts/langgraph_studio/).
 
 ## Additional resources
+
 - [Tutorials](https://python.langchain.com/docs/tutorials/): Simple walkthroughs with
 guided examples on getting started with LangChain.
 - [How-to Guides](https://python.langchain.com/docs/how_to/): Quick, actionable code
diff --git a/libs/partners/anthropic/README.md b/libs/partners/anthropic/README.md
index 404972e8bdc..91cfce96324 100644
--- a/libs/partners/anthropic/README.md
+++ b/libs/partners/anthropic/README.md
@@ -10,11 +10,11 @@ This package contains the LangChain integration for Anthropic's generative model
 
 Anthropic recommends using their chat models over text completions.
 
-You can see their recommended models [here](https://docs.anthropic.com/claude/docs/models-overview#model-recommendations).
+You can see their recommended models [in the Anthropic docs](https://docs.anthropic.com/claude/docs/models-overview#model-recommendations).
 
 To use, you should have an Anthropic API key configured. Initialize the model as:
 
-```
+```python
 from langchain_anthropic import ChatAnthropic
 from langchain_core.messages import AIMessage, HumanMessage
 
@@ -40,4 +40,4 @@ from langchain_anthropic import AnthropicLLM
 
 model = AnthropicLLM(model="claude-2.1", temperature=0, max_tokens=1024)
 response = model.invoke("The best restaurant in San Francisco is: ")
-```
\ No newline at end of file
+```
diff --git a/libs/partners/exa/README.md b/libs/partners/exa/README.md
index 85dee325b75..2b3985f7f5e 100644
--- a/libs/partners/exa/README.md
+++ b/libs/partners/exa/README.md
@@ -72,33 +72,6 @@ search_results = search_tool._run(
 print("Search Results:", search_results)
 ```
 
-### Advanced Features
-
-You can use advanced features like text limits, summaries, and live crawling:
-
-```python
-from langchain_exa import ExaSearchResults
-
-# Initialize the ExaSearchResults tool
-search_tool = ExaSearchResults(exa_api_key="YOUR API KEY")
-
-# Perform a search query with advanced options
-search_results = search_tool._run(
-    query="Latest AI research papers",
-    num_results=10,  # Number of results (1-100)
-    type="auto",  # Can be "neural", "keyword", or "auto"
-    livecrawl="always",  # Can be "always", "fallback", or "never"
-    summary=True,  # Get an AI-generated summary of each result
-    text_contents_options={"max_characters": 2000}  # Limit text length
-)
-
-# With custom summary prompt
-search_results_with_custom_summary = search_tool._run(
-    query="Latest AI research papers",
-    summary={"query": "generate one liner"}  # Custom summary prompt
-)
-```
-
 ## Exa Find Similar Results
 
 You can run the ExaFindSimilarResults module as follows
@@ -120,20 +93,13 @@ similar_results = find_similar_tool._run(
 print("Similar Results:", similar_results)
 ```
 
-### Advanced Features
+## Configuration Options
 
-```python
-from langchain_exa import ExaFindSimilarResults
+All Exa tools support the following common parameters:
 
-# Initialize the ExaFindSimilarResults tool
-find_similar_tool = ExaFindSimilarResults(exa_api_key="YOUR API KEY")
-
-# Find similar results with advanced options
-similar_results = find_similar_tool._run(
-    url="http://espn.com",
-    num_results=10,  # Number of results (1-100)
-    livecrawl="fallback",  # Can be "always", "fallback", or "never"
-    summary=True,  # Get an AI-generated summary of each result
-    text_contents_options={"max_characters": 1500}  # Limit text length
-)
-```
\ No newline at end of file
+- `num_results` (1-100): Number of search results to return
+- `type`: Search type - "neural", "keyword", or "auto" 
+- `livecrawl`: Live crawling mode - "always", "fallback", or "never"
+- `summary`: Get AI-generated summaries (True/False or custom prompt dict)
+- `text_contents_options`: Dict to limit text length (e.g. `{"max_characters": 2000}`)
+- `highlights`: Include highlighted text snippets (True/False)
diff --git a/libs/partners/fireworks/README.md b/libs/partners/fireworks/README.md
index 373bbc6ef16..b127834a930 100644
--- a/libs/partners/fireworks/README.md
+++ b/libs/partners/fireworks/README.md
@@ -2,7 +2,6 @@
 
 This is the partner package for tying Fireworks.ai and LangChain. Fireworks really strive to provide good support for LangChain use cases, so if you run into any issues please let us know. You can reach out to us [in our Discord channel](https://discord.com/channels/1137072072808472616/)
 
-
 ## Installation
 
 To use the `langchain-fireworks` package, follow these installation steps:
@@ -11,8 +10,6 @@ To use the `langchain-fireworks` package, follow these installation steps:
 pip install langchain-fireworks
 ```
 
-
-
 ## Basic usage
 
 ### Setting up
@@ -21,12 +18,15 @@ pip install langchain-fireworks
 
     Once you've signed in and obtained an API key, follow these steps to set the `FIREWORKS_API_KEY` environment variable:
     - **Linux/macOS:** Open your terminal and execute the following command:
+
     ```bash
     export FIREWORKS_API_KEY='your_api_key'
     ```
+
     **Note:** To make this environment variable persistent across terminal sessions, add the above line to your `~/.bashrc`, `~/.bash_profile`, or `~/.zshrc` file.
 
     - **Windows:** For Command Prompt, use:
+
     ```cmd
     set FIREWORKS_API_KEY=your_api_key
     ```
@@ -44,7 +44,6 @@ llm = Fireworks(
 )
 ```
 
-
 ### Calling the Model Directly
 
 You can call the model directly with string prompts to get completions.
@@ -66,15 +65,14 @@ output = llm.generate(
 print(output.generations)
 ```
 
-
-
-
-
 ## Advanced usage
+
 ### Tool use: LangChain Agent + Fireworks function calling model
-Please checkout how to teach Fireworks function calling model to use a calculator [here](https://github.com/fw-ai/cookbook/blob/main/learn/function-calling/notebooks_langchain/fireworks_langchain_tool_usage.ipynb). 
+
+Please checkout how to teach Fireworks function calling model to use a calculator [in this notebook](https://github.com/fw-ai/cookbook/blob/main/learn/function-calling/notebooks_langchain/fireworks_langchain_tool_usage.ipynb).
 
 Fireworks focus on delivering the best experience for fast model inference as well as tool use. You can check out [our blog](https://fireworks.ai/blog/firefunction-v1-gpt-4-level-function-calling) for more details on how it compares to GPT-4, the punchline is that it is on par with GPT-4 in terms of function calling use cases, but it is way faster and much cheaper.
 
 ### RAG: LangChain agent + Fireworks function calling model + MongoDB + Nomic AI embeddings
+
 Please check out the [cookbook here](https://github.com/fw-ai/cookbook/blob/main/integrations/MongoDB/project_rag_with_mongodb/mongodb_agent.ipynb) for an end to end flow
diff --git a/libs/partners/groq/README.md b/libs/partners/groq/README.md
index 3e7388d2835..9aed0c36122 100644
--- a/libs/partners/groq/README.md
+++ b/libs/partners/groq/README.md
@@ -12,8 +12,8 @@ Beyond the architecture, our software is designed to empower developers like you
 
 Want more Groq? Check out our [website](https://groq.com) for more resources and join our [Discord community](https://discord.gg/JvNsBDKeCG) to connect with our developers!
 
-
 ## Installation and Setup
+
 Install the integration package:
 
 ```bash
@@ -27,6 +27,7 @@ export GROQ_API_KEY=gsk_...
 ```
 
 ## Chat Model
+
 See a [usage example](https://python.langchain.com/docs/integrations/chat/groq).
 
 ## Development
@@ -67,4 +68,4 @@ Run additional tests and linters to ensure your code is up to standard.
 
 ```bash
 make lint spell_check check_imports
-```
\ No newline at end of file
+```
diff --git a/libs/partners/huggingface/README.md b/libs/partners/huggingface/README.md
index 5890c284dda..c069d91f226 100644
--- a/libs/partners/huggingface/README.md
+++ b/libs/partners/huggingface/README.md
@@ -5,6 +5,7 @@ This package contains the LangChain integrations for huggingface related classes
 ## Installation and Setup
 
 - Install the LangChain partner package
+
 ```bash
 pip install langchain-huggingface
 ```
diff --git a/libs/partners/mistralai/README.md b/libs/partners/mistralai/README.md
index 752544e10ce..c28ed0e0bd4 100644
--- a/libs/partners/mistralai/README.md
+++ b/libs/partners/mistralai/README.md
@@ -54,4 +54,4 @@ With `MistralAIEmbeddings`, you can directly use the default model 'mistral-embe
 
 ### Documents
 
-`res_document = embedding.embed_documents(["test1", "another test"])`
\ No newline at end of file
+`res_document = embedding.embed_documents(["test1", "another test"])`
diff --git a/libs/partners/nomic/README.md b/libs/partners/nomic/README.md
index c57ac50e13f..8ca4faf8a04 100644
--- a/libs/partners/nomic/README.md
+++ b/libs/partners/nomic/README.md
@@ -20,4 +20,4 @@ And you should configure credentials by setting the following environment variab
 from langchain_nomic import NomicEmbeddings
 
 embeddings = NomicEmbeddings()
-embeddings.embed_query("What is the meaning of life?")
\ No newline at end of file
+embeddings.embed_query("What is the meaning of life?")
diff --git a/libs/partners/openai/README.md b/libs/partners/openai/README.md
index 70176f8a482..ad4beca013d 100644
--- a/libs/partners/openai/README.md
+++ b/libs/partners/openai/README.md
@@ -5,9 +5,11 @@ This package contains the LangChain integrations for OpenAI through their `opena
 ## Installation and Setup
 
 - Install the LangChain partner package
+
 ```bash
 pip install langchain-openai
 ```
+
 - Get an OpenAI api key and set it as an environment variable (`OPENAI_API_KEY`)
 
 ## Chat model
@@ -19,11 +21,12 @@ from langchain_openai import ChatOpenAI
 ```
 
 If you are using a model hosted on `Azure`, you should use different wrapper for that:
+
 ```python
 from langchain_openai import AzureChatOpenAI
 ```
-For a more detailed walkthrough of the `Azure` wrapper, see [here](http://python.langchain.com/docs/integrations/chat/azure_chat_openai)
 
+For a more detailed walkthrough of the `Azure` wrapper, see [AzureChatOpenAI](http://python.langchain.com/docs/integrations/chat/azure_chat_openai)
 
 ## Text Embedding Model
 
@@ -34,11 +37,12 @@ from langchain_openai import OpenAIEmbeddings
 ```
 
 If you are using a model hosted on `Azure`, you should use different wrapper for that:
+
 ```python
 from langchain_openai import AzureOpenAIEmbeddings
 ```
-For a more detailed walkthrough of the `Azure` wrapper, see [here](https://python.langchain.com/docs/integrations/text_embedding/azureopenai)
 
+For a more detailed walkthrough of the `Azure` wrapper, see [AzureOpenAIEmbeddings](https://python.langchain.com/docs/integrations/text_embedding/azureopenai)
 
 ## LLM (Legacy)
 
@@ -49,7 +53,9 @@ from langchain_openai import OpenAI
 ```
 
 If you are using a model hosted on `Azure`, you should use different wrapper for that:
+
 ```python
 from langchain_openai import AzureOpenAI
 ```
-For a more detailed walkthrough of the `Azure` wrapper, see [here](http://python.langchain.com/docs/integrations/llms/azure_openai)
+
+For a more detailed walkthrough of the `Azure` wrapper, see [Azure OpenAI](http://python.langchain.com/docs/integrations/llms/azure_openai)
diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py
index 7a11985dc79..9cfd47b5334 100644
--- a/libs/partners/openai/langchain_openai/chat_models/base.py
+++ b/libs/partners/openai/langchain_openai/chat_models/base.py
@@ -553,7 +553,22 @@ class BaseChatOpenAI(BaseChatModel):
     """Default stop sequences."""
     extra_body: Optional[Mapping[str, Any]] = None
     """Optional additional JSON properties to include in the request parameters when
-    making requests to OpenAI compatible APIs, such as vLLM."""
+    making requests to OpenAI compatible APIs, such as vLLM, LM Studio, or other
+    providers.
+    
+    This is the recommended way to pass custom parameters that are specific to your
+    OpenAI-compatible API provider but not part of the standard OpenAI API.
+    
+    Examples:
+        - LM Studio TTL parameter: ``extra_body={"ttl": 300}``
+        - vLLM custom parameters: ``extra_body={"use_beam_search": True}``
+        - Any other provider-specific parameters
+        
+    .. note::
+        Do NOT use ``model_kwargs`` for custom parameters that are not part of the
+        standard OpenAI API, as this will cause errors when making API calls. Use 
+    ``extra_body`` instead.
+    """
     include_response_headers: bool = False
     """Whether to include response headers in the output message response_metadata."""
     disabled_params: Optional[dict[str, Any]] = Field(default=None)
@@ -579,11 +594,11 @@ class BaseChatOpenAI(BaseChatModel):
 
     Supported values:
 
-    - ``"file_search_call.results"``
-    - ``"message.input_image.image_url"``
-    - ``"computer_call_output.output.image_url"``
-    - ``"reasoning.encrypted_content"``
-    - ``"code_interpreter_call.outputs"``
+    - ``'file_search_call.results'``
+    - ``'message.input_image.image_url'``
+    - ``'computer_call_output.output.image_url'``
+    - ``'reasoning.encrypted_content'``
+    - ``'code_interpreter_call.outputs'``
 
     .. versionadded:: 0.3.24
     """
@@ -658,8 +673,8 @@ class BaseChatOpenAI(BaseChatModel):
 
     Supported values:
 
-    - ``"v0"``: AIMessage format as of langchain-openai 0.3.x.
-    - ``"responses/v1"``: Formats Responses API output
+    - ``'v0'``: AIMessage format as of langchain-openai 0.3.x.
+    - ``'responses/v1'``: Formats Responses API output
       items into AIMessage content blocks.
 
     Currently only impacts the Responses API. ``output_version="responses/v1"`` is
@@ -1560,8 +1575,9 @@ class BaseChatOpenAI(BaseChatModel):
 
         Assumes model is compatible with OpenAI function-calling API.
 
-        NOTE: Using bind_tools is recommended instead, as the `functions` and
-            `function_call` request parameters are officially marked as deprecated by
+        .. note::
+            Using ``bind_tools()`` is recommended instead, as the ``functions`` and
+            ``function_call`` request parameters are officially marked as deprecated by
             OpenAI.
 
         Args:
@@ -1622,10 +1638,10 @@ class BaseChatOpenAI(BaseChatModel):
                 :meth:`langchain_core.utils.function_calling.convert_to_openai_tool`.
             tool_choice: Which tool to require the model to call. Options are:
 
-                - str of the form ``"<<tool_name>>"``: calls <<tool_name>> tool.
-                - ``"auto"``: automatically selects a tool (including no tool).
-                - ``"none"``: does not call a tool.
-                - ``"any"`` or ``"required"`` or ``True``: force at least one tool to be called.
+                - str of the form ``'<<tool_name>>'``: calls <<tool_name>> tool.
+                - ``'auto'``: automatically selects a tool (including no tool).
+                - ``'none'``: does not call a tool.
+                - ``'any'`` or ``'required'`` or ``True``: force at least one tool to be called.
                 - dict of the form ``{"type": "function", "function": {"name": <<tool_name>>}}``: calls <<tool_name>> tool.
                 - ``False`` or ``None``: no effect, default OpenAI behavior.
             strict: If True, model output is guaranteed to exactly match the JSON Schema
@@ -1760,12 +1776,12 @@ class BaseChatOpenAI(BaseChatModel):
             tools:
                 A list of tool-like objects to bind to the chat model. Requires that:
 
-                - ``method`` is ``"json_schema"`` (default).
+                - ``method`` is ``'json_schema'`` (default).
                 - ``strict=True``
                 - ``include_raw=True``
 
                 If a model elects to call a
-                tool, the resulting ``AIMessage`` in ``"raw"`` will include tool calls.
+                tool, the resulting ``AIMessage`` in ``'raw'`` will include tool calls.
 
                 .. dropdown:: Example
 
@@ -2628,6 +2644,91 @@ class ChatOpenAI(BaseChatOpenAI):  # type: ignore[override]
         See OpenAI `docs <https://platform.openai.com/docs/guides/flex-processing>`_
         for more detail.
 
+    .. dropdown:: OpenAI-compatible APIs
+
+        ``ChatOpenAI`` can be used with OpenAI-compatible APIs like LM Studio, vLLM,
+        Ollama, and others. To use custom parameters specific to these providers,
+        use the ``extra_body`` parameter.
+
+        **LM Studio example** with TTL (auto-eviction):
+
+        .. code-block:: python
+
+            from langchain_openai import ChatOpenAI
+
+            llm = ChatOpenAI(
+                base_url="http://localhost:1234/v1",
+                api_key="lm-studio",  # Can be any string
+                model="mlx-community/QwQ-32B-4bit",
+                temperature=0,
+                extra_body={
+                    "ttl": 300
+                },  # Auto-evict model after 5 minutes of inactivity
+            )
+
+        **vLLM example** with custom parameters:
+
+        .. code-block:: python
+
+            llm = ChatOpenAI(
+                base_url="http://localhost:8000/v1",
+                api_key="EMPTY",
+                model="meta-llama/Llama-2-7b-chat-hf",
+                extra_body={"use_beam_search": True, "best_of": 4},
+            )
+
+    .. dropdown:: model_kwargs vs extra_body
+
+        Use the correct parameter for different types of API arguments:
+
+        **Use `model_kwargs` for:**
+
+        - Standard OpenAI API parameters not explicitly defined as class parameters
+        - Parameters that should be flattened into the top-level request payload
+        - Examples: ``max_completion_tokens``, ``stream_options``, ``modalities``, ``audio``
+
+        .. code-block:: python
+
+            # Standard OpenAI parameters
+            llm = ChatOpenAI(
+                model="gpt-4o",
+                model_kwargs={
+                    "stream_options": {"include_usage": True},
+                    "max_completion_tokens": 300,
+                    "modalities": ["text", "audio"],
+                    "audio": {"voice": "alloy", "format": "wav"},
+                },
+            )
+
+        **Use `extra_body` for:**
+
+        - Custom parameters specific to OpenAI-compatible providers (vLLM, LM Studio, etc.)
+        - Parameters that need to be nested under ``extra_body`` in the request
+        - Any non-standard OpenAI API parameters
+
+        .. code-block:: python
+
+            # Custom provider parameters
+            llm = ChatOpenAI(
+                base_url="http://localhost:8000/v1",
+                model="custom-model",
+                extra_body={
+                    "use_beam_search": True,  # vLLM parameter
+                    "best_of": 4,  # vLLM parameter
+                    "ttl": 300,  # LM Studio parameter
+                },
+            )
+
+        **Key Differences:**
+
+        - ``model_kwargs``: Parameters are **merged into top-level** request payload
+        - ``extra_body``: Parameters are **nested under ``extra_body``** key in request
+
+        .. important::
+
+            Always use ``extra_body`` for custom parameters, **not** ``model_kwargs``.
+            Using ``model_kwargs`` for non-OpenAI parameters will cause API errors.
+
     """  # noqa: E501
 
     max_tokens: Optional[int] = Field(default=None, alias="max_completion_tokens")
@@ -2780,17 +2881,17 @@ class ChatOpenAI(BaseChatOpenAI):  # type: ignore[override]
                 If schema is specified via TypedDict or JSON schema, ``strict`` is not
                 enabled by default. Pass ``strict=True`` to enable it.
 
-                Note: ``strict`` can only be non-null if ``method`` is
-                ``"json_schema"`` or ``"function_calling"``.
+                .. note::
+                    ``strict`` can only be non-null if ``method`` is ``'json_schema'`` or ``'function_calling'``.
             tools:
                 A list of tool-like objects to bind to the chat model. Requires that:
 
-                - ``method`` is ``"json_schema"`` (default).
+                - ``method`` is ``'json_schema'`` (default).
                 - ``strict=True``
                 - ``include_raw=True``
 
                 If a model elects to call a
-                tool, the resulting ``AIMessage`` in ``"raw"`` will include tool calls.
+                tool, the resulting ``AIMessage`` in ``'raw'`` will include tool calls.
 
                 .. dropdown:: Example
 
diff --git a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py
index d387ee92170..991fd5474a7 100644
--- a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py
+++ b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py
@@ -44,7 +44,7 @@ from openai.types.responses.response_usage import (
     InputTokensDetails,
     OutputTokensDetails,
 )
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, SecretStr
 from typing_extensions import TypedDict
 
 from langchain_openai import ChatOpenAI
@@ -2528,3 +2528,43 @@ def test_make_computer_call_output_from_message() -> None:
             }
         ],
     }
+
+
+def test_extra_body_parameter() -> None:
+    """Test that extra_body parameter is properly included in request payload."""
+    llm = ChatOpenAI(
+        model="gpt-4o-mini",
+        api_key=SecretStr(
+            "test-api-key"
+        ),  # Set a fake API key to avoid validation error
+        extra_body={"ttl": 300, "custom_param": "test_value"},
+    )
+
+    messages = [HumanMessage(content="Hello")]
+    payload = llm._get_request_payload(messages)
+
+    # Verify extra_body is included in the payload
+    assert "extra_body" in payload
+    assert payload["extra_body"]["ttl"] == 300
+    assert payload["extra_body"]["custom_param"] == "test_value"
+
+
+def test_extra_body_with_model_kwargs() -> None:
+    """Test that extra_body and model_kwargs work together correctly."""
+    llm = ChatOpenAI(
+        model="gpt-4o-mini",
+        api_key=SecretStr(
+            "test-api-key"
+        ),  # Set a fake API key to avoid validation error
+        temperature=0.5,
+        extra_body={"ttl": 600},
+        model_kwargs={"custom_non_openai_param": "test_value"},
+    )
+
+    messages = [HumanMessage(content="Hello")]
+    payload = llm._get_request_payload(messages)
+
+    # Verify both extra_body and model_kwargs are in payload
+    assert payload["extra_body"]["ttl"] == 600
+    assert payload["custom_non_openai_param"] == "test_value"
+    assert payload["temperature"] == 0.5
diff --git a/libs/partners/openai/uv.lock b/libs/partners/openai/uv.lock
index 3bcdd8f46a2..ee1a9e1c624 100644
--- a/libs/partners/openai/uv.lock
+++ b/libs/partners/openai/uv.lock
@@ -480,7 +480,7 @@ wheels = [
 
 [[package]]
 name = "langchain-core"
-version = "0.3.68"
+version = "0.3.70"
 source = { editable = "../../core" }
 dependencies = [
     { name = "jsonpatch" },
@@ -496,7 +496,7 @@ dependencies = [
 requires-dist = [
     { name = "jsonpatch", specifier = ">=1.33,<2.0" },
     { name = "langsmith", specifier = ">=0.3.45" },
-    { name = "packaging", specifier = ">=23.2,<25" },
+    { name = "packaging", specifier = ">=23.2" },
     { name = "pydantic", specifier = ">=2.7.4" },
     { name = "pyyaml", specifier = ">=5.3" },
     { name = "tenacity", specifier = ">=8.1.0,!=8.4.0,<10.0.0" },
diff --git a/libs/partners/perplexity/README.md b/libs/partners/perplexity/README.md
index 5e9cc31d7a4..aa715fd7619 100644
--- a/libs/partners/perplexity/README.md
+++ b/libs/partners/perplexity/README.md
@@ -26,4 +26,4 @@ from langchain.chat_models import init_chat_model
 
 llm = init_chat_model("llama-3.1-sonar-small-128k-online", model_provider="perplexity")
 llm.invoke("Hello, world!")
-```
\ No newline at end of file
+```
diff --git a/libs/partners/prompty/README.md b/libs/partners/prompty/README.md
index eb5bf8f2a1a..7f4493be65f 100644
--- a/libs/partners/prompty/README.md
+++ b/libs/partners/prompty/README.md
@@ -17,9 +17,11 @@ from langchain_prompty import create_chat_prompt
 
 prompt = create_chat_prompt('<your .prompty file path>')
 ```
+
 Then you can use the prompt for next steps.
 
 Here is an example .prompty file:
+
 ```prompty
 ---
 name: Basic Prompt
diff --git a/libs/standard-tests/README.md b/libs/standard-tests/README.md
index 029a9bafb76..77f6780062a 100644
--- a/libs/standard-tests/README.md
+++ b/libs/standard-tests/README.md
@@ -13,13 +13,13 @@ Not pinning your version will ensure you always have the latest tests, but it ma
 also break your CI if we introduce tests that your integration doesn't pass.
 
 Pip:
-    
+
     ```bash
     pip install -U langchain-tests
     ```
 
 Poetry:
-    
+
     ```bash
     poetry add langchain-tests
     ```
@@ -32,44 +32,44 @@ To add standard tests to an integration package's e.g. ChatModel, you need to cr
 2. An integration test class that inherits from ChatModelIntegrationTests
 
 `tests/unit_tests/test_standard.py`:
-    
-```python
-"""Standard LangChain interface tests"""
 
-from typing import Type
+    ```python
+    """Standard LangChain interface tests"""
 
-import pytest
-from langchain_core.language_models import BaseChatModel
-from langchain_tests.unit_tests import ChatModelUnitTests
+    from typing import Type
 
-from langchain_parrot_chain import ChatParrotChain
+    import pytest
+    from langchain_core.language_models import BaseChatModel
+    from langchain_tests.unit_tests import ChatModelUnitTests
+
+    from langchain_parrot_chain import ChatParrotChain
 
 
-class TestParrotChainStandard(ChatModelUnitTests):
-    @pytest.fixture
-    def chat_model_class(self) -> Type[BaseChatModel]:
-        return ChatParrotChain
-```
+    class TestParrotChainStandard(ChatModelUnitTests):
+        @pytest.fixture
+        def chat_model_class(self) -> Type[BaseChatModel]:
+            return ChatParrotChain
+    ```
 
 `tests/integration_tests/test_standard.py`:
-    
-```python
-"""Standard LangChain interface tests"""
 
-from typing import Type
+    ```python
+    """Standard LangChain interface tests"""
 
-import pytest
-from langchain_core.language_models import BaseChatModel
-from langchain_tests.integration_tests import ChatModelIntegrationTests
+    from typing import Type
 
-from langchain_parrot_chain import ChatParrotChain
+    import pytest
+    from langchain_core.language_models import BaseChatModel
+    from langchain_tests.integration_tests import ChatModelIntegrationTests
+
+    from langchain_parrot_chain import ChatParrotChain
 
 
-class TestParrotChainStandard(ChatModelIntegrationTests):
-    @pytest.fixture
-    def chat_model_class(self) -> Type[BaseChatModel]:
-        return ChatParrotChain
-```
+    class TestParrotChainStandard(ChatModelIntegrationTests):
+        @pytest.fixture
+        def chat_model_class(self) -> Type[BaseChatModel]:
+            return ChatParrotChain
+    ```
 
 ## Reference
 
diff --git a/pyproject.toml b/pyproject.toml
index ee8406c09f6..be2ae6a0241 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -95,4 +95,4 @@ pydocstyle = { convention = "google" }
     "F841", # allow assignments to variables that are never read -- it's example code
 
 ]
-"!libs/langchain/langchain/model_laboratory.py" = ["D"]
\ No newline at end of file
+"!libs/langchain/langchain/model_laboratory.py" = ["D"]
diff --git a/uv.lock b/uv.lock
index adf48d5e66f..161fac01490 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2162,7 +2162,7 @@ wheels = [
 
 [[package]]
 name = "langchain"
-version = "0.3.26"
+version = "0.3.27"
 source = { editable = "libs/langchain" }
 dependencies = [
     { name = "async-timeout", marker = "python_full_version < '3.11'" },
@@ -2332,7 +2332,7 @@ dependencies = [
 
 [[package]]
 name = "langchain-chroma"
-version = "0.2.4"
+version = "0.2.5"
 source = { editable = "libs/partners/chroma" }
 dependencies = [
     { name = "chromadb" },
@@ -2403,7 +2403,7 @@ wheels = [
 
 [[package]]
 name = "langchain-core"
-version = "0.3.70"
+version = "0.3.72"
 source = { editable = "libs/core" }
 dependencies = [
     { name = "jsonpatch" },