docs(xai): update for Grok 4 (#31953)

2026-06-09 10:17:00 +00:00 · 2025-07-10 11:06:37 -04:00
parent 060fc0e3c9
commit 6594eb8cc1
11 changed files with 168 additions and 61 deletions
--- a/libs/partners/xai/langchain_xai/chat_models.py
+++ b/libs/partners/xai/langchain_xai/chat_models.py
@@ -29,6 +29,9 @@ _DictOrPydantic = Union[dict, _BM]
 class ChatXAI(BaseChatOpenAI):  # type: ignore[override]
    r"""ChatXAI chat model.

+    Refer to `xAI's documentation <https://docs.x.ai/docs/api-reference#chat-completions>`__
+    for more nuanced details on the API's behavior and supported parameters.
+
    Setup:
        Install ``langchain-xai`` and set environment variable ``XAI_API_KEY``.

@@ -42,9 +45,12 @@ class ChatXAI(BaseChatOpenAI):  # type: ignore[override]
        model: str
            Name of model to use.
        temperature: float
-            Sampling temperature.
+            Sampling temperature between ``0`` and ``2``. Higher values mean more random completions,
+            while lower values (like ``0.2``) mean more focused and deterministic completions.
+            (Default: ``1``.)
        max_tokens: Optional[int]
-            Max number of tokens to generate.
+            Max number of tokens to generate. Refer to your `model's documentation <https://docs.x.ai/docs/models#model-pricing>`__
+            for the maximum number of tokens it can generate.
        logprobs: Optional[bool]
            Whether to return logprobs.

@@ -62,7 +68,7 @@ class ChatXAI(BaseChatOpenAI):  # type: ignore[override]
            from langchain_xai import ChatXAI

            llm = ChatXAI(
-                model="grok-beta",
+                model="grok-4",
                temperature=0,
                max_tokens=None,
                timeout=None,
@@ -89,7 +95,7 @@ class ChatXAI(BaseChatOpenAI):  # type: ignore[override]
                content="J'adore la programmation.",
                response_metadata={
                    'token_usage': {'completion_tokens': 9, 'prompt_tokens': 32, 'total_tokens': 41},
-                    'model_name': 'grok-beta',
+                    'model_name': 'grok-4',
                    'system_fingerprint': None,
                    'finish_reason': 'stop',
                    'logprobs': None
@@ -113,7 +119,7 @@ class ChatXAI(BaseChatOpenAI):  # type: ignore[override]
            content=' programm' id='run-1bc996b5-293f-4114-96a1-e0f755c05eb9'
            content='ation' id='run-1bc996b5-293f-4114-96a1-e0f755c05eb9'
            content='.' id='run-1bc996b5-293f-4114-96a1-e0f755c05eb9'
-            content='' response_metadata={'finish_reason': 'stop', 'model_name': 'grok-beta'} id='run-1bc996b5-293f-4114-96a1-e0f755c05eb9'
+            content='' response_metadata={'finish_reason': 'stop', 'model_name': 'grok-4'} id='run-1bc996b5-293f-4114-96a1-e0f755c05eb9'


    Async:
@@ -133,7 +139,7 @@ class ChatXAI(BaseChatOpenAI):  # type: ignore[override]
                content="J'adore la programmation.",
                response_metadata={
                    'token_usage': {'completion_tokens': 9, 'prompt_tokens': 32, 'total_tokens': 41},
-                    'model_name': 'grok-beta',
+                    'model_name': 'grok-4',
                    'system_fingerprint': None,
                    'finish_reason': 'stop',
                    'logprobs': None
@@ -141,12 +147,39 @@ class ChatXAI(BaseChatOpenAI):  # type: ignore[override]
                id='run-09371a11-7f72-4c53-8e7c-9de5c238b34c-0',
                usage_metadata={'input_tokens': 32, 'output_tokens': 9, 'total_tokens': 41})

-    Tool calling:
+    Reasoning:
+        `Certain xAI models <https://docs.x.ai/docs/models#model-pricing>`__ support reasoning,
+        which allows the model to provide reasoning content along with the response.
+
+        If provided, reasoning content is returned under the ``additional_kwargs`` field of the
+        AIMessage or AIMessageChunk.
+
+        If supported, reasoning effort can be specified in the model constructor's ``extra_body``
+        argument, which will control the amount of reasoning the model does. The value can be one of
+        ``'low'`` or ``'high'``.
+
+        .. code-block:: python
+
+            model = ChatXAI(
+                model="grok-3-mini",
+                extra_body={"reasoning_effort": "high"},
+            )
+
+        .. note::
+            As of 2025-07-10, ``reasoning_content`` is only returned in Grok 3 models, such as
+            `Grok 3 Mini <https://docs.x.ai/docs/models/grok-3-mini>`__.
+
+        .. note::
+            Note that in `Grok 4 <https://docs.x.ai/docs/models/grok-4-0709>`__, as of 2025-07-10,
+            reasoning is not exposed in ``reasoning_content`` (other than initial ``'Thinking...'`` text),
+            reasoning cannot be disabled, and the ``reasoning_effort`` cannot be specified.
+
+    Tool calling / function calling:
        .. code-block:: python

            from pydantic import BaseModel, Field

-            llm = ChatXAI(model="grok-beta")
+            llm = ChatXAI(model="grok-4")

            class GetWeather(BaseModel):
                '''Get the current weather in a given location'''
@@ -168,7 +201,6 @@ class ChatXAI(BaseChatOpenAI):  # type: ignore[override]
            )
            ai_msg.tool_calls

-
        .. code-block:: python

            [
@@ -186,6 +218,67 @@ class ChatXAI(BaseChatOpenAI):  # type: ignore[override]
                }
            ]

+        .. note::
+            With stream response, the tool / function call will be returned in whole in a
+            single chunk, instead of being streamed across chunks.
+
+        Tool choice can be controlled by setting the ``tool_choice`` parameter in the model
+        constructor's ``extra_body`` argument. For example, to disable tool / function calling:
+        .. code-block:: python
+
+            llm = ChatXAI(model="grok-4", extra_body={"tool_choice": "none"})
+
+        To require that the model always calls a tool / function, set ``tool_choice`` to ``'required'``:
+
+        .. code-block:: python
+
+            llm = ChatXAI(model="grok-4", extra_body={"tool_choice": "required"})
+
+        To specify a tool / function to call, set ``tool_choice`` to the name of the tool / function:
+
+        .. code-block:: python
+
+            from pydantic import BaseModel, Field
+
+            llm = ChatXAI(
+                model="grok-4",
+                extra_body={
+                    "tool_choice": {"type": "function", "function": {"name": "GetWeather"}}
+                },
+            )
+
+            class GetWeather(BaseModel):
+                \"\"\"Get the current weather in a given location\"\"\"
+
+                location: str = Field(..., description='The city and state, e.g. San Francisco, CA')
+
+
+            class GetPopulation(BaseModel):
+                \"\"\"Get the current population in a given location\"\"\"
+
+                location: str = Field(..., description='The city and state, e.g. San Francisco, CA')
+
+
+            llm_with_tools = llm.bind_tools([GetWeather, GetPopulation])
+            ai_msg = llm_with_tools.invoke(
+                "Which city is bigger: LA or NY?",
+            )
+            ai_msg.tool_calls
+
+        The resulting tool call would be:
+
+        .. code-block:: python
+
+            [{'name': 'GetWeather',
+            'args': {'location': 'Los Angeles, CA'},
+            'id': 'call_81668711',
+            'type': 'tool_call'}]
+
+    Parallel tool calling / parallel function calling:
+        By default, parallel tool / function calling is enabled, so you can process
+        multiple function calls in one request/response cycle. When two or more tool calls
+        are required, all of the tool call requests will be included in the response body.
+
    Structured output:
        .. code-block:: python

@@ -222,7 +315,7 @@ class ChatXAI(BaseChatOpenAI):  # type: ignore[override]
            from langchain_xai import ChatXAI

            llm = ChatXAI(
-                model="grok-3-latest",
+                model="grok-4",
                search_parameters={
                    "mode": "auto",
                    # Example optional parameters below:
@@ -234,6 +327,10 @@ class ChatXAI(BaseChatOpenAI):  # type: ignore[override]

            llm.invoke("Provide me a digest of world news in the last 24 hours.")

+        .. note::
+            `Citations <https://docs.x.ai/docs/guides/live-search#returning-citations>`__
+            are only available in `Grok 3 <https://docs.x.ai/docs/models/grok-3>`__.
+
    Token usage:
        .. code-block:: python

@@ -275,7 +372,7 @@ class ChatXAI(BaseChatOpenAI):  # type: ignore[override]
                    'prompt_tokens': 19,
                    'total_tokens': 23
                    },
-                'model_name': 'grok-beta',
+                'model_name': 'grok-4',
                'system_fingerprint': None,
                'finish_reason': 'stop',
                'logprobs': None
@@ -283,7 +380,7 @@ class ChatXAI(BaseChatOpenAI):  # type: ignore[override]

    """  # noqa: E501

-    model_name: str = Field(alias="model")
+    model_name: str = Field(default="grok-4", alias="model")
    """Model name to use."""
    xai_api_key: Optional[SecretStr] = Field(
        alias="api_key",