From 23d369e2f46f54d22de9c736080ca7e6643e3bc5 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Thu, 21 May 2026 13:55:14 -0500 Subject: [PATCH] test(xai): tolerate extra block types in web search and xfail v1 streaming tool calls (#37612) Loosen the xAI integration tests to handle two recent provider behaviors: `web_search` responses may now include block types beyond the core trio, and streaming aggregation under `output_version="v1"` does not produce a `tool_call` content block (tool calls are only available on `.tool_calls`). --- .../integration_tests/test_chat_models.py | 20 ++++++++++++---- .../test_chat_models_standard.py | 23 +++++++++++++++++++ 2 files changed, 39 insertions(+), 4 deletions(-) diff --git a/libs/partners/xai/tests/integration_tests/test_chat_models.py b/libs/partners/xai/tests/integration_tests/test_chat_models.py index 10d7f53ef7a..4846eebecef 100644 --- a/libs/partners/xai/tests/integration_tests/test_chat_models.py +++ b/libs/partners/xai/tests/integration_tests/test_chat_models.py @@ -100,12 +100,23 @@ def test_reasoning(output_version: Literal["", "v1"]) -> None: def test_web_search() -> None: llm = ChatXAI(model=MODEL_NAME, temperature=0).bind_tools([{"type": "web_search"}]) + # xAI may emit additional block types (e.g. `citation`, `reasoning`) alongside + # the core set, so assert each required type is present individually rather + # than checking set equality. + expected_types = ("server_tool_call", "server_tool_result", "text") + + def _assert_web_search_block(blocks: list) -> None: + server_tool_calls = [b for b in blocks if b["type"] == "server_tool_call"] + assert server_tool_calls, "expected at least one server_tool_call block" + assert server_tool_calls[0]["name"] == "web_search" + # Test invoke response = llm.invoke("Look up the current time in Boston, MA.") assert response.content content_types = {block["type"] for block in response.content_blocks} - assert content_types == {"server_tool_call", "server_tool_result", "text"} - assert response.content_blocks[0]["name"] == "web_search" # type: ignore[typeddict-item] + for expected in expected_types: + assert expected in content_types, f"missing {expected!r} in {content_types}" + _assert_web_search_block(response.content_blocks) # Test streaming full: AIMessageChunk | None = None @@ -114,5 +125,6 @@ def test_web_search() -> None: full = chunk if full is None else full + chunk assert isinstance(full, AIMessageChunk) content_types = {block["type"] for block in full.content_blocks} - assert content_types == {"server_tool_call", "server_tool_result", "text"} - assert full.content_blocks[0]["name"] == "web_search" # type: ignore[typeddict-item] + for expected in expected_types: + assert expected in content_types, f"missing {expected!r} in {content_types}" + _assert_web_search_block(full.content_blocks) diff --git a/libs/partners/xai/tests/integration_tests/test_chat_models_standard.py b/libs/partners/xai/tests/integration_tests/test_chat_models_standard.py index fd62bdc1eb4..93da2ea281b 100644 --- a/libs/partners/xai/tests/integration_tests/test_chat_models_standard.py +++ b/libs/partners/xai/tests/integration_tests/test_chat_models_standard.py @@ -38,6 +38,29 @@ class TestXAIStandard(ChatModelIntegrationTests): "rate_limiter": rate_limiter, } + @pytest.mark.parametrize( + "model", + [ + {}, + pytest.param( + {"output_version": "v1"}, + marks=pytest.mark.xfail( + strict=True, + reason=( + "xAI v1 streaming aggregate does not surface tool_call " + "content block; tool calls are only available via " + "`.tool_calls`." + ), + ), + ), + ], + indirect=True, + ) + @override + def test_tool_calling(self, model: BaseChatModel) -> None: + """Parametrize across default and `v1` output versions; `v1` is xfailed.""" + super().test_tool_calling(model) + @pytest.mark.xfail( reason="Default model does not support stop sequences, using grok-3 instead" )