openai[patch]: reduce tested constraints on strict schema adherence for Responses API (#31290)

Scheduled testing started failing today because the Responses API stopped raising `BadRequestError` for a schema that was previously invalid when `strict=True`. Although docs still say that [some type-specific keywords are not yet supported](https://platform.openai.com/docs/guides/structured-outputs#some-type-specific-keywords-are-not-yet-supported) (including `minimum` and `maximum` for numbers), the below appears to run and correctly respect the constraints: ```python import json import openai maximums = list(range(1, 11)) arg_values = [] for maximum in maximums: tool = { "type": "function", "name": "magic_function", "description": "Applies a magic function to an input.", "parameters": { "properties": { "input": {"maximum": maximum, "minimum": 0, "type": "integer"} }, "required": ["input"], "type": "object", "additionalProperties": False }, "strict": True } client = openai.OpenAI() response = client.responses.create( model="gpt-4.1", input=[{"role": "user", "content": "What is the value of magic_function(3)? Use the tool."}], tools=[tool], ) function_call = next(item for item in response.output if item.type == "function_call") args = json.loads(function_call.arguments) arg_values.append(args["input"]) print(maximums) print(arg_values) # [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] # [1, 2, 3, 3, 3, 3, 3, 3, 3, 3] ``` Until yesterday this raised BadRequestError. The same is not true of Chat Completions, which appears to still raise BadRequestError ```python tool = { "type": "function", "function": { "name": "magic_function", "description": "Applies a magic function to an input.", "parameters": { "properties": { "input": {"maximum": 5, "minimum": 0, "type": "integer"} }, "required": ["input"], "type": "object", "additionalProperties": False }, "strict": True } } response = client.chat.completions.create( model="gpt-4.1", messages=[{"role": "user", "content": "What is the value of magic_function(3)? Use the tool."}], tools=[tool], ) response # raises BadRequestError ``` Here we update tests accordingly.
2025-06-27 08:58:48 +00:00 · 2025-05-20 10:50:31 -04:00 · 2025-05-20 10:50:31 -04:00 · dcb5aba999
commit dcb5aba999
parent f29659728c
1 changed files with 44 additions and 5 deletions
--- a/libs/partners/openai/tests/integration_tests/chat_models/test_base.py
+++ b/libs/partners/openai/tests/integration_tests/chat_models/test_base.py
@ -680,8 +680,7 @@ def test_image_token_counting_png() -> None:
    assert expected == actual
-@pytest.mark.parametrize("use_responses_api", [False, True])
+def test_tool_calling_strict() -> None:
 def test_tool_calling_strict(use_responses_api: bool) -> None:
    """Test tool calling with strict=True."""
    class magic_function(BaseModel):
@ -689,9 +688,7 @@ def test_tool_calling_strict(use_responses_api: bool) -> None:
        input: int
-    model = ChatOpenAI(
+    model = ChatOpenAI(model="gpt-4.1", temperature=0)
        model="gpt-4o", temperature=0, use_responses_api=use_responses_api
    )
    model_with_tools = model.bind_tools([magic_function], strict=True)
    # invalid_magic_function adds metadata to schema that isn't supported by OpenAI.
@ -720,6 +717,48 @@ def test_tool_calling_strict(use_responses_api: bool) -> None:
        next(model_with_invalid_tool_schema.stream(query))
 def test_tool_calling_strict_responses() -> None:
    """Test tool calling with strict=True.
    Responses API appears to have fewer constraints on schema when strict=True.
    """
    class magic_function_notrequired_arg(BaseModel):
        """Applies a magic function to an input."""
        input: Optional[int] = Field(default=None)
    model = ChatOpenAI(model="gpt-4.1", temperature=0, use_responses_api=True)
    # invalid_magic_function adds metadata to schema that as of 2025-05-20 appears
    # supported by the Responses API, but not Chat Completions. We expect tool calls
    # from this schema to be valid.
    model_with_tools = model.bind_tools([invalid_magic_function], strict=True)
    # Having a not-required argument in the schema remains invalid.
    model_with_invalid_tool_schema = model.bind_tools(
        [magic_function_notrequired_arg], strict=True
    )
    # Test invoke
    query = "What is the value of magic_function(3)? Use the tool."
    response = model_with_tools.invoke(query)
    _validate_tool_call_message(response)
    # Test invalid tool schema
    with pytest.raises(openai.BadRequestError):
        model_with_invalid_tool_schema.invoke(query)
    # Test stream
    full: Optional[BaseMessageChunk] = None
    for chunk in model_with_tools.stream(query):
        full = chunk if full is None else full + chunk  # type: ignore
    assert isinstance(full, AIMessage)
    _validate_tool_call_message(full)
    # Test invalid tool schema
    with pytest.raises(openai.BadRequestError):
        next(model_with_invalid_tool_schema.stream(query))
@pytest.mark.parametrize("use_responses_api", [False, True])
@pytest.mark.parametrize(
    ("model", "method"),