other tests and API changes

2026-02-21 14:43:07 +00:00 · 2025-08-01 19:35:20 -04:00
parent ccf3e25884
commit 6aa192cf48
3 changed files with 591 additions and 1 deletions
--- a/libs/partners/openai/langchain_openai/chat_models/base.py
+++ b/libs/partners/openai/langchain_openai/chat_models/base.py
@@ -553,6 +553,13 @@ class BaseChatOpenAI(BaseChatModel):

    .. versionadded:: 0.3.24

+    """
+    verbosity: Optional[str] = None
+    """Controls the verbosity level of responses for reasoning models. For use with the
+    Responses API.
+
+    Currently supported values are ``'low'``, ``'medium'``, and ``'high'``.
+    Controls how detailed the model's responses are.
    """
    tiktoken_model_name: Optional[str] = None
    """The model name to pass to tiktoken when using this class.
@@ -831,6 +838,7 @@ class BaseChatOpenAI(BaseChatModel):
            "temperature": self.temperature,
            "reasoning_effort": self.reasoning_effort,
            "reasoning": self.reasoning,
+            "verbosity": self.verbosity,
            "include": self.include,
            "service_tier": self.service_tier,
            "truncation": self.truncation,
@@ -1723,7 +1731,26 @@ class BaseChatOpenAI(BaseChatModel):
            elif isinstance(tool_choice, bool):
                tool_choice = "required"
            elif isinstance(tool_choice, dict):
-                pass
+                # Handle allowed_tools choice format
+                if tool_choice.get("type") == "allowed_tools":
+                    allowed_config = tool_choice.get("allowed_tools", {})
+                    mode = allowed_config.get("mode", "auto")
+                    allowed_tools = allowed_config.get("tools", [])
+
+                    if mode not in ["auto", "required"]:
+                        raise ValueError(
+                            f"allowed_tools mode must be 'auto' or 'required', "
+                            f"got: {mode}"
+                        )
+
+                    # Convert allowed_tools to the expected format
+                    tool_choice = {
+                        "type": "allowed_tools",
+                        "mode": mode,
+                        "tools": allowed_tools,
+                    }
+                else:
+                    pass
            else:
                raise ValueError(
                    f"Unrecognized tool_choice type. Expected str, bool or dict. "
@@ -3543,6 +3570,14 @@ def _construct_responses_api_payload(
                schema_dict = schema
            if schema_dict == {"type": "json_object"}:  # JSON mode
                payload["text"] = {"format": {"type": "json_object"}}
+            elif schema_dict.get("type") == "grammar":
+                if "grammar" not in schema_dict:
+                    raise ValueError("Grammar format requires 'grammar' field")
+                payload["text"] = {
+                    "format": {"type": "grammar", "grammar": schema_dict["grammar"]}
+                }
+            elif schema_dict.get("type") == "python":
+                payload["text"] = {"format": {"type": "python"}}
            elif (
                (
                    response_format := _convert_to_openai_response_format(
@@ -4038,6 +4073,27 @@ def _convert_responses_chunk_to_generation_chunk(
        content.append(
            {"type": "function_call", "arguments": chunk.delta, "index": current_index}
        )
+    elif chunk.type == "response.custom_tool_call_input.delta":
+        _advance(chunk.output_index)
+        tool_call_chunks.append(
+            {
+                "type": "tool_call_chunk",
+                "text_input": chunk.delta,
+                "index": current_index,
+            }
+        )
+        content.append(
+            {"type": "custom_tool_call", "input": chunk.delta, "index": current_index}
+        )
+    elif chunk.type == "response.custom_tool_call_input.done":
+        content.append(
+            {
+                "type": "custom_tool_call_done",
+                "input": chunk.input,
+                "item_id": chunk.item_id,
+                "index": current_index,
+            }
+        )
    elif chunk.type == "response.refusal.done":
        content.append({"type": "refusal", "refusal": chunk.refusal})
    elif chunk.type == "response.output_item.added" and chunk.item.type == "reasoning":
--- a/libs/partners/openai/tests/integration_tests/chat_models/test_new_features_integration.py
+++ b/libs/partners/openai/tests/integration_tests/chat_models/test_new_features_integration.py
@@ -0,0 +1,273 @@
+"""Integration tests for new OpenAI API features."""
+
+import pytest
+from langchain_core.tools import tool
+
+from langchain_openai import ChatOpenAI
+
+
+class TestResponseFormatsIntegration:
+    """Integration tests for new response format types."""
+
+    @pytest.mark.scheduled
+    def test_grammar_response_format_integration(self):
+        """Test grammar response format with actual API."""
+        llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
+
+        grammar_format = {
+            "type": "grammar",
+            "grammar": """
+            start: expr
+            expr: NUMBER ("+" | "-" | "*" | "/") NUMBER
+            NUMBER: /[0-9]+/
+            %import common.WS
+            %ignore WS
+            """,
+        }
+
+        try:
+            # This will test the actual API integration
+            bound_llm = llm.bind(response_format=grammar_format)
+
+            # Note: This may not work until OpenAI actually supports these formats
+            # For now, we test that the binding works without errors
+            assert bound_llm is not None
+
+        except Exception as e:
+            # If the API doesn't support these formats yet, we expect a specific error
+            # This test serves as documentation for future support
+            pytest.skip(f"Grammar response format not yet supported: {e}")
+
+    @pytest.mark.scheduled
+    def test_python_response_format_integration(self):
+        """Test python response format with actual API."""
+        llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
+
+        python_format = {"type": "python"}
+
+        try:
+            bound_llm = llm.bind(response_format=python_format)
+            assert bound_llm is not None
+
+        except Exception as e:
+            pytest.skip(f"Python response format not yet supported: {e}")
+
+
+class TestAllowedToolsChoiceIntegration:
+    """Integration tests for allowed_tools tool choice."""
+
+    @pytest.mark.scheduled
+    def test_allowed_tools_integration(self):
+        """Test allowed_tools choice with actual API."""
+
+        @tool
+        def get_weather(location: str) -> str:
+            """Get weather for a location."""
+            return f"Weather in {location}: sunny"
+
+        @tool
+        def get_time() -> str:
+            """Get current time."""
+            return "12:00 PM"
+
+        llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
+
+        allowed_tools_choice = {
+            "type": "allowed_tools",
+            "allowed_tools": {
+                "mode": "auto",
+                "tools": [
+                    {"type": "function", "function": {"name": "get_weather"}},
+                    {"type": "function", "function": {"name": "get_time"}},
+                ],
+            },
+        }
+
+        try:
+            bound_llm = llm.bind_tools(
+                [get_weather, get_time], tool_choice=allowed_tools_choice
+            )
+
+            # Test that it can be invoked without errors
+            response = bound_llm.invoke("What's the weather like in Paris?")
+            assert response is not None
+
+        except Exception as e:
+            pytest.skip(f"Allowed tools choice not yet supported: {e}")
+
+
+class TestVerbosityParameterIntegration:
+    """Integration tests for verbosity parameter."""
+
+    @pytest.mark.scheduled
+    def test_verbosity_integration(self):
+        """Test verbosity parameter with actual API."""
+        llm = ChatOpenAI(model="gpt-4o-mini", verbosity="low", temperature=0)
+
+        try:
+            # Test that verbosity parameter is accepted
+            response = llm.invoke("Tell me about artificial intelligence.")
+            assert response is not None
+
+        except Exception as e:
+            # If the parameter isn't supported yet, we expect a parameter error
+            if "verbosity" in str(e).lower():
+                pytest.skip(f"Verbosity parameter not yet supported: {e}")
+            else:
+                raise
+
+
+class TestCustomToolsIntegration:
+    """Integration tests for custom tools functionality."""
+
+    @pytest.mark.scheduled
+    def test_custom_tools_with_cfg_validation(self):
+        """Test custom tools with CFG validation."""
+        # Import from the CFG validation module
+        from langchain_openai.chat_models.cfg_grammar import (
+            validate_cfg_format,
+            validate_custom_tool_output,
+        )
+
+        # Test arithmetic expressions
+        grammar = """
+        start: expr
+        expr: term (("+" | "-") term)*
+        term: factor (("*" | "/") factor)*
+        factor: NUMBER | "(" expr ")"
+        NUMBER: /[0-9]+(\\.[0-9]+)?/
+        %import common.WS
+        %ignore WS
+        """
+
+        tool_format = {"type": "grammar", "grammar": grammar}
+        validator = validate_cfg_format(tool_format)
+
+        assert validator is not None
+
+        # Test valid expressions
+        valid_expressions = ["5 + 3", "10 * 2", "(1 + 2) * 3"]
+        for expr in valid_expressions:
+            assert validate_custom_tool_output(expr, validator) is True
+
+        # Test invalid expressions
+        invalid_expressions = ["hello", "5 + +", "invalid"]
+        for expr in invalid_expressions:
+            assert validate_custom_tool_output(expr, validator) is False
+
+
+class TestStreamingIntegration:
+    """Integration tests for streaming with new features."""
+
+    @pytest.mark.scheduled
+    def test_streaming_with_verbosity(self):
+        """Test streaming works with verbosity parameter."""
+        llm = ChatOpenAI(model="gpt-4o-mini", verbosity="medium", temperature=0)
+
+        try:
+            chunks = []
+            for chunk in llm.stream("Count from 1 to 3"):
+                chunks.append(chunk)
+
+            assert len(chunks) > 0
+
+        except Exception as e:
+            if "verbosity" in str(e).lower():
+                pytest.skip(f"Verbosity parameter not yet supported in streaming: {e}")
+            else:
+                raise
+
+    @pytest.mark.scheduled
+    def test_streaming_with_custom_tools(self):
+        """Test streaming works with custom tools."""
+
+        @tool(custom=True)
+        def execute_code(code: str) -> str:
+            """Execute Python code."""
+            return f"Executed: {code}"
+
+        llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
+
+        try:
+            bound_llm = llm.bind_tools([execute_code])
+
+            chunks = []
+            for chunk in bound_llm.stream("Write a simple Python print statement"):
+                chunks.append(chunk)
+
+            assert len(chunks) > 0
+
+        except Exception as e:
+            # Custom tools may not be fully supported in streaming yet
+            pytest.skip(f"Custom tools streaming not yet supported: {e}")
+
+
+class TestMinimalReasoningEffortIntegration:
+    """Integration tests for minimal reasoning effort."""
+
+    @pytest.mark.scheduled
+    def test_minimal_reasoning_effort_integration(self):
+        """Test minimal reasoning effort with reasoning models."""
+        # This would typically be used with o1 models
+        try:
+            llm = ChatOpenAI(model="o1-mini", reasoning_effort="minimal", temperature=0)
+
+            response = llm.invoke("What is 2 + 2?")
+            assert response is not None
+
+        except Exception as e:
+            # O1 models may not be available in all test environments
+            if "model" in str(e).lower() and "o1" in str(e).lower():
+                pytest.skip(f"O1 model not available: {e}")
+            elif "reasoning_effort" in str(e).lower():
+                pytest.skip(f"Minimal reasoning effort not yet supported: {e}")
+            else:
+                raise
+
+
+class TestFullIntegration:
+    """Test combinations of new features together."""
+
+    @pytest.mark.scheduled
+    def test_multiple_new_features_together(self):
+        """Test using multiple new features in combination."""
+
+        @tool
+        def analyze_data(data: str) -> str:
+            """Analyze data and return insights."""
+            return f"Analysis of {data}: positive trend"
+
+        try:
+            llm = ChatOpenAI(
+                model="gpt-4o-mini",
+                verbosity="medium",
+                reasoning_effort="low",
+                temperature=0,
+            )
+
+            # Try with allowed tools and grammar response format
+            allowed_tools_choice = {
+                "type": "allowed_tools",
+                "allowed_tools": {
+                    "mode": "auto",
+                    "tools": [
+                        {"type": "function", "function": {"name": "analyze_data"}}
+                    ],
+                },
+            }
+
+            grammar_format = {
+                "type": "grammar",
+                "grammar": "start: result\nresult: /[a-zA-Z0-9 ]+/",
+            }
+
+            bound_llm = llm.bind_tools(
+                [analyze_data], tool_choice=allowed_tools_choice
+            ).bind(response_format=grammar_format)
+
+            # If this works, it means all features are compatible
+            response = bound_llm.invoke("Analyze this sales data")
+            assert response is not None
+
+        except Exception as e:
+            pytest.skip(f"Combined new features not yet fully supported: {e}")
--- a/libs/partners/openai/tests/unit_tests/chat_models/test_new_features.py
+++ b/libs/partners/openai/tests/unit_tests/chat_models/test_new_features.py
@@ -0,0 +1,261 @@
+"""Test new OpenAI API features."""
+
+import pytest
+from langchain_core.tools import tool
+
+from langchain_openai import ChatOpenAI
+
+
+class TestResponseFormats:
+    """Test new response format types."""
+
+    def test_grammar_response_format(self):
+        """Test grammar response format configuration."""
+        llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
+
+        # Test grammar format in Responses API
+        grammar_format = {
+            "type": "grammar",
+            "grammar": """
+            start: expr
+            expr: NUMBER ("+" | "-") NUMBER
+            NUMBER: /[0-9]+/
+            %import common.WS
+            %ignore WS
+            """,
+        }
+
+        # This should not raise an error during bind
+        bound_llm = llm.bind(response_format=grammar_format)
+        assert bound_llm is not None
+
+    def test_python_response_format(self):
+        """Test python response format configuration."""
+        llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
+
+        # Test python format in Responses API
+        python_format = {"type": "python"}
+
+        # This should not raise an error during bind
+        bound_llm = llm.bind(response_format=python_format)
+        assert bound_llm is not None
+
+    def test_grammar_format_validation(self):
+        """Test that grammar format requires grammar field."""
+        llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
+
+        # Test missing grammar field
+        invalid_format = {"type": "grammar"}
+
+        bound_llm = llm.bind(response_format=invalid_format)
+
+        # The error should be raised when trying to create the payload
+        # not during bind, so we can't easily test this in unit tests
+        # without mocking the actual API call
+        assert bound_llm is not None
+
+
+class TestAllowedToolsChoice:
+    """Test allowed_tools tool choice functionality."""
+
+    def test_allowed_tools_auto_mode(self):
+        """Test allowed_tools with auto mode."""
+
+        @tool
+        def get_weather(location: str) -> str:
+            """Get weather for location."""
+            return f"Weather in {location}: sunny"
+
+        @tool
+        def get_time() -> str:
+            """Get current time."""
+            return "12:00 PM"
+
+        llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
+
+        allowed_tools_choice = {
+            "type": "allowed_tools",
+            "allowed_tools": {
+                "mode": "auto",
+                "tools": [
+                    {"type": "function", "function": {"name": "get_weather"}},
+                    {"type": "function", "function": {"name": "get_time"}},
+                ],
+            },
+        }
+
+        bound_llm = llm.bind_tools(
+            [get_weather, get_time], tool_choice=allowed_tools_choice
+        )
+        assert bound_llm is not None
+
+    def test_allowed_tools_required_mode(self):
+        """Test allowed_tools with required mode."""
+
+        @tool
+        def calculate(expression: str) -> str:
+            """Calculate mathematical expression."""
+            return f"Result: {eval(expression)}"  # noqa: S307
+
+        llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
+
+        allowed_tools_choice = {
+            "type": "allowed_tools",
+            "allowed_tools": {
+                "mode": "required",
+                "tools": [{"type": "function", "function": {"name": "calculate"}}],
+            },
+        }
+
+        bound_llm = llm.bind_tools([calculate], tool_choice=allowed_tools_choice)
+        assert bound_llm is not None
+
+    def test_allowed_tools_invalid_mode(self):
+        """Test that invalid allowed_tools mode raises error."""
+
+        @tool
+        def test_tool() -> str:
+            """Test tool."""
+            return "test"
+
+        llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
+
+        invalid_choice = {
+            "type": "allowed_tools",
+            "allowed_tools": {"mode": "invalid_mode", "tools": []},
+        }
+
+        with pytest.raises(ValueError, match="allowed_tools mode must be"):
+            llm.bind_tools([test_tool], tool_choice=invalid_choice)
+
+
+class TestVerbosityParameter:
+    """Test verbosity parameter functionality."""
+
+    def test_verbosity_parameter_low(self):
+        """Test verbosity parameter with low value."""
+        llm = ChatOpenAI(model="gpt-4o-mini", verbosity="low")
+
+        assert llm.verbosity == "low"
+        assert "verbosity" in llm._default_params
+        assert llm._default_params["verbosity"] == "low"
+
+    def test_verbosity_parameter_medium(self):
+        """Test verbosity parameter with medium value."""
+        llm = ChatOpenAI(model="gpt-4o-mini", verbosity="medium")
+
+        assert llm.verbosity == "medium"
+        assert llm._default_params["verbosity"] == "medium"
+
+    def test_verbosity_parameter_high(self):
+        """Test verbosity parameter with high value."""
+        llm = ChatOpenAI(model="gpt-4o-mini", verbosity="high")
+
+        assert llm.verbosity == "high"
+        assert llm._default_params["verbosity"] == "high"
+
+    def test_verbosity_parameter_none(self):
+        """Test verbosity parameter with None (default)."""
+        llm = ChatOpenAI(model="gpt-4o-mini")
+
+        assert llm.verbosity is None
+        # When verbosity is None, it may not be included in _default_params
+        # due to the exclude_if_none filtering
+        verbosity_param = llm._default_params.get("verbosity")
+        assert verbosity_param is None
+
+
+class TestCustomToolStreamingSupport:
+    """Test that custom tool streaming events are handled."""
+
+    def test_custom_tool_streaming_event_types(self):
+        """Test that the new custom tool streaming event types are supported."""
+        # This test verifies that our code includes the necessary event handling
+        # The actual streaming event handling is tested in integration tests
+
+        # Import the base module to verify it loads without errors
+        import langchain_openai.chat_models.base as base_module
+
+        # Verify the module loaded successfully
+        assert base_module is not None
+
+        # Check that the module contains our custom tool streaming logic
+        # by looking for the event type strings in the source
+        import inspect
+
+        source = inspect.getsource(base_module)
+
+        # Verify our custom tool streaming events are handled
+        assert "response.custom_tool_call_input.delta" in source
+        assert "response.custom_tool_call_input.done" in source
+
+
+class TestMinimalReasoningEffort:
+    """Test that minimal reasoning effort is supported."""
+
+    def test_minimal_reasoning_effort(self):
+        """Test reasoning_effort parameter supports 'minimal'."""
+        llm = ChatOpenAI(model="gpt-4o-mini", reasoning_effort="minimal")
+
+        assert llm.reasoning_effort == "minimal"
+        assert llm._default_params["reasoning_effort"] == "minimal"
+
+    def test_all_reasoning_effort_values(self):
+        """Test all supported reasoning effort values."""
+        supported_values = ["minimal", "low", "medium", "high"]
+
+        for value in supported_values:
+            llm = ChatOpenAI(model="gpt-4o-mini", reasoning_effort=value)
+            assert llm.reasoning_effort == value
+            assert llm._default_params["reasoning_effort"] == value
+
+
+class TestBackwardCompatibility:
+    """Test that existing functionality still works."""
+
+    def test_existing_response_formats(self):
+        """Test that existing response formats still work."""
+        llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
+
+        # JSON object format should still work
+        json_llm = llm.bind(response_format={"type": "json_object"})
+        assert json_llm is not None
+
+        # JSON schema format should still work
+        schema = {
+            "type": "json_schema",
+            "json_schema": {
+                "name": "test_schema",
+                "schema": {
+                    "type": "object",
+                    "properties": {"result": {"type": "string"}},
+                    "required": ["result"],
+                },
+            },
+        }
+
+        schema_llm = llm.bind(response_format=schema)
+        assert schema_llm is not None
+
+    def test_existing_tool_choice(self):
+        """Test that existing tool_choice functionality still works."""
+
+        @tool
+        def test_tool(x: int) -> int:
+            """Test tool."""
+            return x * 2
+
+        llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
+
+        # String tool choice should still work
+        bound_llm = llm.bind_tools([test_tool], tool_choice="test_tool")
+        assert bound_llm is not None
+
+        # Auto/none/required should still work
+        for choice in ["auto", "none", "required"]:
+            bound_llm = llm.bind_tools([test_tool], tool_choice=choice)
+            assert bound_llm is not None
+
+        # Boolean tool choice should still work
+        bound_llm = llm.bind_tools([test_tool], tool_choice=True)
+        assert bound_llm is not None