fix(openai): revert some changes (#32462)

Keep coverage on `output_version="v0"` (increasing coverage is being managed in v0.4 branch).
2025-09-26 13:59:49 +00:00 · 2025-08-08 09:51:18 -03:00
parent 00244122bd
commit 02001212b0
1 changed files with 19 additions and 66 deletions
--- a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py
+++ b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py
@@ -175,9 +175,7 @@ class FooDict(TypedDict):


 def test_parsed_pydantic_schema() -> None:
-    llm = ChatOpenAI(
-        model=MODEL_NAME, use_responses_api=True, output_version="responses/v1"
-    )
+    llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)
    response = llm.invoke("how are ya", response_format=Foo)
    parsed = Foo(**json.loads(response.text()))
    assert parsed == response.additional_kwargs["parsed"]
@@ -195,9 +193,7 @@ def test_parsed_pydantic_schema() -> None:


 async def test_parsed_pydantic_schema_async() -> None:
-    llm = ChatOpenAI(
-        model=MODEL_NAME, use_responses_api=True, output_version="responses/v1"
-    )
+    llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)
    response = await llm.ainvoke("how are ya", response_format=Foo)
    parsed = Foo(**json.loads(response.text()))
    assert parsed == response.additional_kwargs["parsed"]
@@ -217,9 +213,7 @@ async def test_parsed_pydantic_schema_async() -> None:
@pytest.mark.flaky(retries=3, delay=1)
@pytest.mark.parametrize("schema", [Foo.model_json_schema(), FooDict])
 def test_parsed_dict_schema(schema: Any) -> None:
-    llm = ChatOpenAI(
-        model=MODEL_NAME, use_responses_api=True, output_version="responses/v1"
-    )
+    llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)
    response = llm.invoke("how are ya", response_format=schema)
    parsed = json.loads(response.text())
    assert parsed == response.additional_kwargs["parsed"]
@@ -237,9 +231,7 @@ def test_parsed_dict_schema(schema: Any) -> None:


 def test_parsed_strict() -> None:
-    llm = ChatOpenAI(
-        model=MODEL_NAME, use_responses_api=True, output_version="responses/v1"
-    )
+    llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)

    class InvalidJoke(TypedDict):
        setup: Annotated[str, ..., "The setup of the joke"]
@@ -266,9 +258,7 @@ def test_parsed_strict() -> None:
@pytest.mark.flaky(retries=3, delay=1)
@pytest.mark.parametrize("schema", [Foo.model_json_schema(), FooDict])
 async def test_parsed_dict_schema_async(schema: Any) -> None:
-    llm = ChatOpenAI(
-        model=MODEL_NAME, use_responses_api=True, output_version="responses/v1"
-    )
+    llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)
    response = await llm.ainvoke("how are ya", response_format=schema)
    parsed = json.loads(response.text())
    assert parsed == response.additional_kwargs["parsed"]
@@ -290,9 +280,7 @@ def test_function_calling_and_structured_output() -> None:
        """return x * y"""
        return x * y

-    llm = ChatOpenAI(
-        model=MODEL_NAME, use_responses_api=True, output_version="responses/v1"
-    )
+    llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)
    bound_llm = llm.bind_tools([multiply], response_format=Foo, strict=True)
    # Test structured output
    response = llm.invoke("how are ya", response_format=Foo)
@@ -336,9 +324,7 @@ def test_reasoning(output_version: Literal["v0", "responses/v1"]) -> None:


 def test_stateful_api() -> None:
-    llm = ChatOpenAI(
-        model=MODEL_NAME, use_responses_api=True, output_version="responses/v1"
-    )
+    llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)
    response = llm.invoke("how are you, my name is Bobo")
    assert "id" in response.response_metadata

@@ -435,9 +421,7 @@ def test_stream_reasoning_summary(

@pytest.mark.vcr
 def test_code_interpreter() -> None:
-    llm = ChatOpenAI(
-        model="o4-mini", use_responses_api=True, output_version="responses/v1"
-    )
+    llm = ChatOpenAI(model="o4-mini", use_responses_api=True)
    llm_with_tools = llm.bind_tools(
        [{"type": "code_interpreter", "container": {"type": "auto"}}]
    )
@@ -447,16 +431,13 @@ def test_code_interpreter() -> None:
    }
    response = llm_with_tools.invoke([input_message])
    _check_response(response)
-    tool_outputs = [
-        block
-        for block in response.content
-        if isinstance(block, dict) and block.get("type") == "code_interpreter_call"
-    ]
+    tool_outputs = response.additional_kwargs["tool_outputs"]
    assert tool_outputs
    assert any(output["type"] == "code_interpreter_call" for output in tool_outputs)

    # Test streaming
    # Use same container
+    tool_outputs = response.additional_kwargs["tool_outputs"]
    assert len(tool_outputs) == 1
    container_id = tool_outputs[0]["container_id"]
    llm_with_tools = llm.bind_tools(
@@ -468,11 +449,7 @@ def test_code_interpreter() -> None:
        assert isinstance(chunk, AIMessageChunk)
        full = chunk if full is None else full + chunk
    assert isinstance(full, AIMessageChunk)
-    tool_outputs = [
-        block
-        for block in full.content
-        if isinstance(block, dict) and block.get("type") == "code_interpreter_call"
-    ]
+    tool_outputs = full.additional_kwargs["tool_outputs"]
    assert tool_outputs
    assert any(output["type"] == "code_interpreter_call" for output in tool_outputs)

@@ -483,9 +460,7 @@ def test_code_interpreter() -> None:

@pytest.mark.vcr
 def test_mcp_builtin() -> None:
-    llm = ChatOpenAI(
-        model="o4-mini", use_responses_api=True, output_version="responses/v1"
-    )
+    llm = ChatOpenAI(model="o4-mini", use_responses_api=True)

    llm_with_tools = llm.bind_tools(
        [
@@ -514,8 +489,8 @@ def test_mcp_builtin() -> None:
                "approve": True,
                "approval_request_id": output["id"],
            }
-            for output in response.content
-            if isinstance(output, dict) and output.get("type") == "mcp_approval_request"
+            for output in response.additional_kwargs["tool_outputs"]
+            if output["type"] == "mcp_approval_request"
        ]
    )
    _ = llm_with_tools.invoke(
@@ -574,9 +549,7 @@ def test_mcp_builtin_zdr() -> None:
@pytest.mark.vcr()
 def test_image_generation_streaming() -> None:
    """Test image generation streaming."""
-    llm = ChatOpenAI(
-        model="gpt-4.1", use_responses_api=True, output_version="responses/v1"
-    )
+    llm = ChatOpenAI(model="gpt-4.1", use_responses_api=True)
    tool = {
        "type": "image_generation",
        # For testing purposes let's keep the quality low, so the test runs faster.
@@ -623,13 +596,7 @@ def test_image_generation_streaming() -> None:
    # At the moment, the streaming API does not pick up annotations fully.
    # So the following check is commented out.
    # _check_response(complete_ai_message)
-    tool_outputs = [
-        block
-        for block in complete_ai_message.content
-        if isinstance(block, dict) and block.get("type") == "image_generation_call"
-    ]
-    assert len(tool_outputs) == 1
-    tool_output = tool_outputs[0]
+    tool_output = complete_ai_message.additional_kwargs["tool_outputs"][0]
    assert set(tool_output.keys()).issubset(expected_keys)


@@ -637,9 +604,7 @@ def test_image_generation_streaming() -> None:
 def test_image_generation_multi_turn() -> None:
    """Test multi-turn editing of image generation by passing in history."""
    # Test multi-turn
-    llm = ChatOpenAI(
-        model="gpt-4.1", use_responses_api=True, output_version="responses/v1"
-    )
+    llm = ChatOpenAI(model="gpt-4.1", use_responses_api=True)
    # Test invocation
    tool = {
        "type": "image_generation",
@@ -656,13 +621,7 @@ def test_image_generation_multi_turn() -> None:
    ]
    ai_message = llm_with_tools.invoke(chat_history)
    _check_response(ai_message)
-    tool_outputs = [
-        block
-        for block in ai_message.content
-        if isinstance(block, dict) and block.get("type") == "image_generation_call"
-    ]
-    assert len(tool_outputs) == 1
-    tool_output = tool_outputs[0]
+    tool_output = ai_message.additional_kwargs["tool_outputs"][0]

    # Example tool output for an image
    # {
@@ -711,13 +670,7 @@ def test_image_generation_multi_turn() -> None:

    ai_message2 = llm_with_tools.invoke(chat_history)
    _check_response(ai_message2)
-    tool_outputs2 = [
-        block
-        for block in ai_message2.content
-        if isinstance(block, dict) and block.get("type") == "image_generation_call"
-    ]
-    assert len(tool_outputs2) == 1
-    tool_output2 = tool_outputs2[0]
+    tool_output2 = ai_message2.additional_kwargs["tool_outputs"][0]
    assert set(tool_output2.keys()).issubset(expected_keys)