From 23d369e2f46f54d22de9c736080ca7e6643e3bc5 Mon Sep 17 00:00:00 2001
From: Mason Daugherty <mason@langchain.dev>
Date: Thu, 21 May 2026 13:55:14 -0500
Subject: [PATCH] test(xai): tolerate extra block types in web search and xfail
 v1 streaming tool calls (#37612)

Loosen the xAI integration tests to handle two recent provider
behaviors: `web_search` responses may now include block types beyond the
core trio, and streaming aggregation under `output_version="v1"` does
not produce a `tool_call` content block (tool calls are only available
on `.tool_calls`).
---
 .../integration_tests/test_chat_models.py     | 20 ++++++++++++----
 .../test_chat_models_standard.py              | 23 +++++++++++++++++++
 2 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/libs/partners/xai/tests/integration_tests/test_chat_models.py b/libs/partners/xai/tests/integration_tests/test_chat_models.py
index 10d7f53ef7a..4846eebecef 100644
--- a/libs/partners/xai/tests/integration_tests/test_chat_models.py
+++ b/libs/partners/xai/tests/integration_tests/test_chat_models.py
@@ -100,12 +100,23 @@ def test_reasoning(output_version: Literal["", "v1"]) -> None:
 def test_web_search() -> None:
     llm = ChatXAI(model=MODEL_NAME, temperature=0).bind_tools([{"type": "web_search"}])
 
+    # xAI may emit additional block types (e.g. `citation`, `reasoning`) alongside
+    # the core set, so assert each required type is present individually rather
+    # than checking set equality.
+    expected_types = ("server_tool_call", "server_tool_result", "text")
+
+    def _assert_web_search_block(blocks: list) -> None:
+        server_tool_calls = [b for b in blocks if b["type"] == "server_tool_call"]
+        assert server_tool_calls, "expected at least one server_tool_call block"
+        assert server_tool_calls[0]["name"] == "web_search"
+
     # Test invoke
     response = llm.invoke("Look up the current time in Boston, MA.")
     assert response.content
     content_types = {block["type"] for block in response.content_blocks}
-    assert content_types == {"server_tool_call", "server_tool_result", "text"}
-    assert response.content_blocks[0]["name"] == "web_search"  # type: ignore[typeddict-item]
+    for expected in expected_types:
+        assert expected in content_types, f"missing {expected!r} in {content_types}"
+    _assert_web_search_block(response.content_blocks)
 
     # Test streaming
     full: AIMessageChunk | None = None
@@ -114,5 +125,6 @@ def test_web_search() -> None:
         full = chunk if full is None else full + chunk
     assert isinstance(full, AIMessageChunk)
     content_types = {block["type"] for block in full.content_blocks}
-    assert content_types == {"server_tool_call", "server_tool_result", "text"}
-    assert full.content_blocks[0]["name"] == "web_search"  # type: ignore[typeddict-item]
+    for expected in expected_types:
+        assert expected in content_types, f"missing {expected!r} in {content_types}"
+    _assert_web_search_block(full.content_blocks)
diff --git a/libs/partners/xai/tests/integration_tests/test_chat_models_standard.py b/libs/partners/xai/tests/integration_tests/test_chat_models_standard.py
index fd62bdc1eb4..93da2ea281b 100644
--- a/libs/partners/xai/tests/integration_tests/test_chat_models_standard.py
+++ b/libs/partners/xai/tests/integration_tests/test_chat_models_standard.py
@@ -38,6 +38,29 @@ class TestXAIStandard(ChatModelIntegrationTests):
             "rate_limiter": rate_limiter,
         }
 
+    @pytest.mark.parametrize(
+        "model",
+        [
+            {},
+            pytest.param(
+                {"output_version": "v1"},
+                marks=pytest.mark.xfail(
+                    strict=True,
+                    reason=(
+                        "xAI v1 streaming aggregate does not surface tool_call "
+                        "content block; tool calls are only available via "
+                        "`.tool_calls`."
+                    ),
+                ),
+            ),
+        ],
+        indirect=True,
+    )
+    @override
+    def test_tool_calling(self, model: BaseChatModel) -> None:
+        """Parametrize across default and `v1` output versions; `v1` is xfailed."""
+        super().test_tool_calling(model)
+
     @pytest.mark.xfail(
         reason="Default model does not support stop sequences, using grok-3 instead"
     )