docs(xai): update for Grok 4 (#31953)

This commit is contained in:
Mason Daugherty
2025-07-10 11:06:37 -04:00
committed by GitHub
parent 060fc0e3c9
commit 6594eb8cc1
11 changed files with 168 additions and 61 deletions

View File

@@ -29,6 +29,9 @@ _DictOrPydantic = Union[dict, _BM]
class ChatXAI(BaseChatOpenAI): # type: ignore[override]
r"""ChatXAI chat model.
Refer to `xAI's documentation <https://docs.x.ai/docs/api-reference#chat-completions>`__
for more nuanced details on the API's behavior and supported parameters.
Setup:
Install ``langchain-xai`` and set environment variable ``XAI_API_KEY``.
@@ -42,9 +45,12 @@ class ChatXAI(BaseChatOpenAI): # type: ignore[override]
model: str
Name of model to use.
temperature: float
Sampling temperature.
Sampling temperature between ``0`` and ``2``. Higher values mean more random completions,
while lower values (like ``0.2``) mean more focused and deterministic completions.
(Default: ``1``.)
max_tokens: Optional[int]
Max number of tokens to generate.
Max number of tokens to generate. Refer to your `model's documentation <https://docs.x.ai/docs/models#model-pricing>`__
for the maximum number of tokens it can generate.
logprobs: Optional[bool]
Whether to return logprobs.
@@ -62,7 +68,7 @@ class ChatXAI(BaseChatOpenAI): # type: ignore[override]
from langchain_xai import ChatXAI
llm = ChatXAI(
model="grok-beta",
model="grok-4",
temperature=0,
max_tokens=None,
timeout=None,
@@ -89,7 +95,7 @@ class ChatXAI(BaseChatOpenAI): # type: ignore[override]
content="J'adore la programmation.",
response_metadata={
'token_usage': {'completion_tokens': 9, 'prompt_tokens': 32, 'total_tokens': 41},
'model_name': 'grok-beta',
'model_name': 'grok-4',
'system_fingerprint': None,
'finish_reason': 'stop',
'logprobs': None
@@ -113,7 +119,7 @@ class ChatXAI(BaseChatOpenAI): # type: ignore[override]
content=' programm' id='run-1bc996b5-293f-4114-96a1-e0f755c05eb9'
content='ation' id='run-1bc996b5-293f-4114-96a1-e0f755c05eb9'
content='.' id='run-1bc996b5-293f-4114-96a1-e0f755c05eb9'
content='' response_metadata={'finish_reason': 'stop', 'model_name': 'grok-beta'} id='run-1bc996b5-293f-4114-96a1-e0f755c05eb9'
content='' response_metadata={'finish_reason': 'stop', 'model_name': 'grok-4'} id='run-1bc996b5-293f-4114-96a1-e0f755c05eb9'
Async:
@@ -133,7 +139,7 @@ class ChatXAI(BaseChatOpenAI): # type: ignore[override]
content="J'adore la programmation.",
response_metadata={
'token_usage': {'completion_tokens': 9, 'prompt_tokens': 32, 'total_tokens': 41},
'model_name': 'grok-beta',
'model_name': 'grok-4',
'system_fingerprint': None,
'finish_reason': 'stop',
'logprobs': None
@@ -141,12 +147,39 @@ class ChatXAI(BaseChatOpenAI): # type: ignore[override]
id='run-09371a11-7f72-4c53-8e7c-9de5c238b34c-0',
usage_metadata={'input_tokens': 32, 'output_tokens': 9, 'total_tokens': 41})
Tool calling:
Reasoning:
`Certain xAI models <https://docs.x.ai/docs/models#model-pricing>`__ support reasoning,
which allows the model to provide reasoning content along with the response.
If provided, reasoning content is returned under the ``additional_kwargs`` field of the
AIMessage or AIMessageChunk.
If supported, reasoning effort can be specified in the model constructor's ``extra_body``
argument, which will control the amount of reasoning the model does. The value can be one of
``'low'`` or ``'high'``.
.. code-block:: python
model = ChatXAI(
model="grok-3-mini",
extra_body={"reasoning_effort": "high"},
)
.. note::
As of 2025-07-10, ``reasoning_content`` is only returned in Grok 3 models, such as
`Grok 3 Mini <https://docs.x.ai/docs/models/grok-3-mini>`__.
.. note::
Note that in `Grok 4 <https://docs.x.ai/docs/models/grok-4-0709>`__, as of 2025-07-10,
reasoning is not exposed in ``reasoning_content`` (other than initial ``'Thinking...'`` text),
reasoning cannot be disabled, and the ``reasoning_effort`` cannot be specified.
Tool calling / function calling:
.. code-block:: python
from pydantic import BaseModel, Field
llm = ChatXAI(model="grok-beta")
llm = ChatXAI(model="grok-4")
class GetWeather(BaseModel):
'''Get the current weather in a given location'''
@@ -168,7 +201,6 @@ class ChatXAI(BaseChatOpenAI): # type: ignore[override]
)
ai_msg.tool_calls
.. code-block:: python
[
@@ -186,6 +218,67 @@ class ChatXAI(BaseChatOpenAI): # type: ignore[override]
}
]
.. note::
With stream response, the tool / function call will be returned in whole in a
single chunk, instead of being streamed across chunks.
Tool choice can be controlled by setting the ``tool_choice`` parameter in the model
constructor's ``extra_body`` argument. For example, to disable tool / function calling:
.. code-block:: python
llm = ChatXAI(model="grok-4", extra_body={"tool_choice": "none"})
To require that the model always calls a tool / function, set ``tool_choice`` to ``'required'``:
.. code-block:: python
llm = ChatXAI(model="grok-4", extra_body={"tool_choice": "required"})
To specify a tool / function to call, set ``tool_choice`` to the name of the tool / function:
.. code-block:: python
from pydantic import BaseModel, Field
llm = ChatXAI(
model="grok-4",
extra_body={
"tool_choice": {"type": "function", "function": {"name": "GetWeather"}}
},
)
class GetWeather(BaseModel):
\"\"\"Get the current weather in a given location\"\"\"
location: str = Field(..., description='The city and state, e.g. San Francisco, CA')
class GetPopulation(BaseModel):
\"\"\"Get the current population in a given location\"\"\"
location: str = Field(..., description='The city and state, e.g. San Francisco, CA')
llm_with_tools = llm.bind_tools([GetWeather, GetPopulation])
ai_msg = llm_with_tools.invoke(
"Which city is bigger: LA or NY?",
)
ai_msg.tool_calls
The resulting tool call would be:
.. code-block:: python
[{'name': 'GetWeather',
'args': {'location': 'Los Angeles, CA'},
'id': 'call_81668711',
'type': 'tool_call'}]
Parallel tool calling / parallel function calling:
By default, parallel tool / function calling is enabled, so you can process
multiple function calls in one request/response cycle. When two or more tool calls
are required, all of the tool call requests will be included in the response body.
Structured output:
.. code-block:: python
@@ -222,7 +315,7 @@ class ChatXAI(BaseChatOpenAI): # type: ignore[override]
from langchain_xai import ChatXAI
llm = ChatXAI(
model="grok-3-latest",
model="grok-4",
search_parameters={
"mode": "auto",
# Example optional parameters below:
@@ -234,6 +327,10 @@ class ChatXAI(BaseChatOpenAI): # type: ignore[override]
llm.invoke("Provide me a digest of world news in the last 24 hours.")
.. note::
`Citations <https://docs.x.ai/docs/guides/live-search#returning-citations>`__
are only available in `Grok 3 <https://docs.x.ai/docs/models/grok-3>`__.
Token usage:
.. code-block:: python
@@ -275,7 +372,7 @@ class ChatXAI(BaseChatOpenAI): # type: ignore[override]
'prompt_tokens': 19,
'total_tokens': 23
},
'model_name': 'grok-beta',
'model_name': 'grok-4',
'system_fingerprint': None,
'finish_reason': 'stop',
'logprobs': None
@@ -283,7 +380,7 @@ class ChatXAI(BaseChatOpenAI): # type: ignore[override]
""" # noqa: E501
model_name: str = Field(alias="model")
model_name: str = Field(default="grok-4", alias="model")
"""Model name to use."""
xai_api_key: Optional[SecretStr] = Field(
alias="api_key",