feat(ollama): support response_format (#34612)

Fixes #34610 --- This PR resolves an issue where `ChatOllama` would raise an `unexpected keyword argument 'response_format'` error when used with `create_agent` or when passed an OpenAI-style `response_format`. When using `create_agent` (especially with models like `gpt-oss`), LangChain creates a `response_format` argument (e.g., `{"type": "json_schema", ...}`). `ChatOllama` previously passed this argument directly to the underlying Ollama client, which does not support `response_format` and instead expects a `format` parameter. ## The Fix I updated `_chat_params` in `libs/partners/ollama/langchain_ollama/chat_models.py` to: 1. Intercept the `response_format` argument. 2. Map it to the native Ollama `format` parameter: * `{"type": "json_schema", "json_schema": {"schema": ...}}` -> `format=schema` * `{"type": "json_object"}` -> `format="json"` 3. Remove `response_format` from the kwargs passed to the client. ## Validation * **Reproduction Script**: Verified the fix with a script covering `json_schema`, `json_object`, and explicit `format` priority scenarios. * **New Tests**: Added 3 new unit tests to `libs/partners/ollama/tests/unit_tests/test_chat_models.py` covering these scenarios. * **Regression**: Ran the full test suite (`make -C libs/partners/ollama test`), passing 29 tests (previously 26). * **Lint/Format**: Verified with `make lint_package` and `make format`. --------- Co-authored-by: Mohan Kumar Sagadevan <mohankumarsagadevan@Mohans-MacBook-Air.local> Co-authored-by: Mason Daugherty <mason@langchain.dev> Co-authored-by: Mason Daugherty <github@mdrxy.com>
2026-06-09 10:17:00 +00:00 · 2026-04-07 07:53:57 +05:30
parent 2bc982b73c
commit 3beba77e2e
3 changed files with 369 additions and 2 deletions
--- a/libs/partners/ollama/langchain_ollama/chat_models.py
+++ b/libs/partners/ollama/langchain_ollama/chat_models.py
@@ -792,12 +792,17 @@ class ChatOllama(BaseChatModel):
                if v is not None
            }

+        format_param = self._resolve_format_param(
+            kwargs.pop("format", self.format),
+            kwargs.pop("response_format", None),
+        )
+
        params = {
            "messages": ollama_messages,
            "stream": kwargs.pop("stream", True),
            "model": kwargs.pop("model", self.model),
            "think": kwargs.pop("reasoning", self.reasoning),
-            "format": kwargs.pop("format", self.format),
+            "format": format_param,
            "logprobs": kwargs.pop("logprobs", self.logprobs),
            "top_logprobs": kwargs.pop("top_logprobs", self.top_logprobs),
            "options": options_dict,
@@ -815,6 +820,107 @@ class ChatOllama(BaseChatModel):

        return params

+    def _resolve_format_param(
+        self,
+        format_param: str | dict[str, Any] | None,
+        response_format: Any | None,
+    ) -> str | dict[str, Any] | None:
+        """Resolve the format parameter.
+
+        Converts an OpenAI-style `response_format` dict to the `format`
+        parameter expected by Ollama.
+
+        Args:
+            format_param: The explicit `format` value (takes priority).
+            response_format: An OpenAI-style `response_format` dict.
+
+        Returns:
+            The resolved format value to pass to the Ollama client.
+        """
+        if format_param is not None:
+            if response_format is not None:
+                warnings.warn(
+                    "Both 'format' and 'response_format' were provided. "
+                    "'response_format' will be ignored in favor of 'format'.",
+                    UserWarning,
+                    stacklevel=2,
+                )
+            return format_param
+
+        if response_format is None:
+            return None
+
+        return self._convert_response_format(response_format)
+
+    def _convert_response_format(
+        self,
+        response_format: Any,
+    ) -> str | dict[str, Any] | None:
+        """Convert an OpenAI-style `response_format` to an Ollama `format` value.
+
+        Args:
+            response_format: The `response_format` value to convert.
+
+        Returns:
+            The Ollama-compatible `format` value, or `None` if conversion fails.
+        """
+        if not isinstance(response_format, dict):
+            warnings.warn(
+                f"Ignored invalid 'response_format' type: {type(response_format)}. "
+                "Expected a dictionary.",
+                UserWarning,
+                stacklevel=2,
+            )
+            return None
+
+        fmt_type = response_format.get("type")
+        if fmt_type == "json_object":
+            return "json"
+        if fmt_type == "json_schema":
+            return self._extract_json_schema(response_format)
+
+        warnings.warn(
+            f"Ignored unrecognized 'response_format' type: {fmt_type}. "
+            "Expected 'json_object' or 'json_schema'.",
+            UserWarning,
+            stacklevel=2,
+        )
+        return None
+
+    def _extract_json_schema(
+        self,
+        response_format: dict[str, Any],
+    ) -> dict[str, Any] | None:
+        """Extract the raw JSON schema from an OpenAI ``json_schema`` envelope.
+
+        Args:
+            response_format: A dict with ``type: "json_schema"``.
+
+        Returns:
+            The raw JSON schema dict, or ``None`` if extraction fails.
+        """
+        json_schema_block = response_format.get("json_schema")
+        if not isinstance(json_schema_block, dict):
+            warnings.warn(
+                "response_format has type 'json_schema' but 'json_schema' "
+                f"value is {type(json_schema_block)}, expected a dict "
+                "containing a 'schema' key. "
+                "The format parameter will not be set.",
+                UserWarning,
+                stacklevel=2,
+            )
+            return None
+        schema = json_schema_block.get("schema")
+        if schema is None:
+            warnings.warn(
+                "response_format has type 'json_schema' but no 'schema' "
+                "key was found in 'json_schema'. "
+                "The format parameter will not be set.",
+                UserWarning,
+                stacklevel=2,
+            )
+        return schema
+
    @model_validator(mode="after")
    def _set_clients(self) -> Self:
        """Set clients to use for ollama."""
--- a/libs/partners/ollama/tests/integration_tests/chat_models/test_chat_models.py
+++ b/libs/partners/ollama/tests/integration_tests/chat_models/test_chat_models.py
@@ -2,6 +2,7 @@

 from __future__ import annotations

+import json
 from typing import Annotated
 from unittest.mock import MagicMock, patch

@@ -68,7 +69,7 @@ def test_structured_output(method: str) -> None:
        setup: str = Field(description="question to set up a joke")
        punchline: str = Field(description="answer to resolve the joke")

-    llm = ChatOllama(model=DEFAULT_MODEL_NAME, temperature=0)
+    llm = ChatOllama(model=DEFAULT_MODEL_NAME, temperature=0.3)
    query = "Tell me a joke about cats."

    # Pydantic
@@ -112,6 +113,42 @@ def test_structured_output(method: str) -> None:
        assert set(chunk.keys()) == {"setup", "punchline"}


+@pytest.mark.parametrize(
+    "response_format",
+    [
+        {"type": "json_object"},
+        {
+            "type": "json_schema",
+            "json_schema": {
+                "name": "joke",
+                "schema": {
+                    "type": "object",
+                    "properties": {
+                        "setup": {"type": "string"},
+                        "punchline": {"type": "string"},
+                    },
+                    "required": ["setup", "punchline"],
+                },
+            },
+        },
+    ],
+    ids=["json_object", "json_schema"],
+)
+def test_response_format(response_format: dict) -> None:
+    """Test that OpenAI-style response_format is translated and honored."""
+    llm = ChatOllama(model=DEFAULT_MODEL_NAME, temperature=0)
+    result = llm.invoke(
+        [HumanMessage("Tell me a joke about cats. Return JSON with setup/punchline.")],
+        response_format=response_format,
+    )
+    assert isinstance(result, AIMessage)
+    parsed = json.loads(str(result.content))
+    assert isinstance(parsed, dict)
+    if response_format["type"] == "json_schema":
+        assert "setup" in parsed
+        assert "punchline" in parsed
+
+
@pytest.mark.parametrize(("model"), [(DEFAULT_MODEL_NAME)])
 def test_structured_output_deeply_nested(model: str) -> None:
    """Test to verify structured output with a nested objects."""
--- a/libs/partners/ollama/tests/unit_tests/test_chat_models.py
+++ b/libs/partners/ollama/tests/unit_tests/test_chat_models.py
@@ -814,6 +814,230 @@ def test_chat_ollama_ignores_strict_arg() -> None:
        assert "strict" not in call_kwargs


+def test_chat_ollama_supports_response_format_json_schema() -> None:
+    """Test that ChatOllama correctly maps json_schema response_format to format."""
+    with patch("langchain_ollama.chat_models.Client") as mock_client_class:
+        mock_client = MagicMock()
+        mock_client_class.return_value = mock_client
+        mock_client.chat.return_value = [
+            {
+                "model": "gpt-oss:20b",
+                "created_at": "2025-01-01T00:00:00.000000000Z",
+                "done": True,
+                "done_reason": "stop",
+                "message": {"role": "assistant", "content": "{}"},
+            }
+        ]
+
+        llm = ChatOllama(model="gpt-oss:20b")
+        schema = {"type": "object", "properties": {"foo": {"type": "string"}}}
+        response_format = {
+            "type": "json_schema",
+            "json_schema": {"name": "test", "schema": schema, "strict": True},
+        }
+
+        llm.invoke([HumanMessage("Hello")], response_format=response_format)
+
+        call_kwargs = mock_client.chat.call_args[1]
+        assert "response_format" not in call_kwargs
+        assert call_kwargs.get("format") == schema
+
+
+def test_chat_ollama_supports_response_format_json_object() -> None:
+    """Test ChatOllama maps json_object response_format to format='json'."""
+    with patch("langchain_ollama.chat_models.Client") as mock_client_class:
+        mock_client = MagicMock()
+        mock_client_class.return_value = mock_client
+        mock_client.chat.return_value = [
+            {
+                "model": "gpt-oss:20b",
+                "created_at": "2025-01-01T00:00:00.000000000Z",
+                "done": True,
+                "done_reason": "stop",
+                "message": {"role": "assistant", "content": "{}"},
+            }
+        ]
+
+        llm = ChatOllama(model="gpt-oss:20b")
+        response_format = {"type": "json_object"}
+
+        llm.invoke([HumanMessage("Hello")], response_format=response_format)
+
+        call_kwargs = mock_client.chat.call_args[1]
+        assert "response_format" not in call_kwargs
+        assert call_kwargs.get("format") == "json"
+
+
+def test_chat_ollama_prioritizes_explicit_format() -> None:
+    """Test explicit 'format' arg takes precedence over 'response_format'."""
+    with patch("langchain_ollama.chat_models.Client") as mock_client_class:
+        mock_client = MagicMock()
+        mock_client_class.return_value = mock_client
+        mock_client.chat.return_value = [
+            {
+                "model": "gpt-oss:20b",
+                "created_at": "2025-01-01T00:00:00.000000000Z",
+                "done": True,
+                "done_reason": "stop",
+                "message": {"role": "assistant", "content": "{}"},
+            }
+        ]
+
+        llm = ChatOllama(model="gpt-oss:20b")
+        response_format = {"type": "json_object"}
+
+        # User passes BOTH format param and response_format
+        # Should warn about ignored response_format
+        with pytest.warns(UserWarning, match="Both 'format' and 'response_format'"):
+            llm.invoke(
+                [HumanMessage("Hello")],
+                format="some_custom_format",
+                response_format=response_format,
+            )
+
+        call_kwargs = mock_client.chat.call_args[1]
+        assert "response_format" not in call_kwargs
+        # Should keep the explicit format
+        assert call_kwargs.get("format") == "some_custom_format"
+
+
+def test_chat_ollama_warns_invalid_response_format_type() -> None:
+    """Test ChatOllama warns on non-dict response_format."""
+    with patch("langchain_ollama.chat_models.Client") as mock_client_class:
+        mock_client = MagicMock()
+        mock_client_class.return_value = mock_client
+        mock_client.chat.return_value = [
+            {
+                "model": "gpt-oss:20b",
+                "created_at": "2025-01-01T00:00:00.000000000Z",
+                "done": True,
+                "done_reason": "stop",
+                "message": {"role": "assistant", "content": "{}"},
+            }
+        ]
+
+        llm = ChatOllama(model="gpt-oss:20b")
+        # Pass a list (invalid type) instead of a dict
+        response_format = ["invalid_type"]
+
+        with pytest.warns(UserWarning, match="Ignored invalid 'response_format' type"):
+            llm.invoke([HumanMessage("Hello")], response_format=response_format)
+
+        call_kwargs = mock_client.chat.call_args[1]
+        assert "response_format" not in call_kwargs
+        assert call_kwargs.get("format") is None
+
+
+def test_chat_ollama_warns_unrecognized_response_format_type() -> None:
+    """Test ChatOllama warns on unrecognized response_format type (e.g. 'text')."""
+    with patch("langchain_ollama.chat_models.Client") as mock_client_class:
+        mock_client = MagicMock()
+        mock_client_class.return_value = mock_client
+        mock_client.chat.return_value = [
+            {
+                "model": "gpt-oss:20b",
+                "created_at": "2025-01-01T00:00:00.000000000Z",
+                "done": True,
+                "done_reason": "stop",
+                "message": {"role": "assistant", "content": "{}"},
+            }
+        ]
+
+        llm = ChatOllama(model="gpt-oss:20b")
+        response_format = {"type": "text"}  # Not json_object or json_schema
+
+        with pytest.warns(UserWarning, match="Ignored unrecognized 'response_format'"):
+            llm.invoke([HumanMessage("Hello")], response_format=response_format)
+
+        call_kwargs = mock_client.chat.call_args[1]
+        assert "response_format" not in call_kwargs
+        assert call_kwargs.get("format") is None
+
+
+def test_chat_ollama_warns_json_schema_missing_schema_key() -> None:
+    """Test ChatOllama warns when json_schema block has no 'schema' key."""
+    with patch("langchain_ollama.chat_models.Client") as mock_client_class:
+        mock_client = MagicMock()
+        mock_client_class.return_value = mock_client
+        mock_client.chat.return_value = [
+            {
+                "model": "gpt-oss:20b",
+                "created_at": "2025-01-01T00:00:00.000000000Z",
+                "done": True,
+                "done_reason": "stop",
+                "message": {"role": "assistant", "content": "{}"},
+            }
+        ]
+
+        llm = ChatOllama(model="gpt-oss:20b")
+
+        # json_schema present but no schema key
+        response_format = {
+            "type": "json_schema",
+            "json_schema": {"name": "test"},
+        }
+        with pytest.warns(UserWarning, match="no 'schema' key was found"):
+            llm.invoke([HumanMessage("Hello")], response_format=response_format)
+
+        call_kwargs = mock_client.chat.call_args[1]
+        assert "response_format" not in call_kwargs
+        assert call_kwargs.get("format") is None
+
+
+def test_chat_ollama_warns_json_schema_missing_json_schema_key() -> None:
+    """Test ChatOllama warns when json_schema type has no 'json_schema' block."""
+    with patch("langchain_ollama.chat_models.Client") as mock_client_class:
+        mock_client = MagicMock()
+        mock_client_class.return_value = mock_client
+        mock_client.chat.return_value = [
+            {
+                "model": "gpt-oss:20b",
+                "created_at": "2025-01-01T00:00:00.000000000Z",
+                "done": True,
+                "done_reason": "stop",
+                "message": {"role": "assistant", "content": "{}"},
+            }
+        ]
+
+        llm = ChatOllama(model="gpt-oss:20b")
+
+        # type is json_schema but json_schema key is missing entirely
+        response_format = {"type": "json_schema"}
+        with pytest.warns(UserWarning, match="'json_schema' value is"):
+            llm.invoke([HumanMessage("Hello")], response_format=response_format)
+
+        call_kwargs = mock_client.chat.call_args[1]
+        assert "response_format" not in call_kwargs
+        assert call_kwargs.get("format") is None
+
+
+def test_chat_ollama_warns_json_schema_block_not_dict() -> None:
+    """Test ChatOllama warns when json_schema value is not a dict."""
+    with patch("langchain_ollama.chat_models.Client") as mock_client_class:
+        mock_client = MagicMock()
+        mock_client_class.return_value = mock_client
+        mock_client.chat.return_value = [
+            {
+                "model": "gpt-oss:20b",
+                "created_at": "2025-01-01T00:00:00.000000000Z",
+                "done": True,
+                "done_reason": "stop",
+                "message": {"role": "assistant", "content": "{}"},
+            }
+        ]
+
+        llm = ChatOllama(model="gpt-oss:20b")
+
+        # json_schema is a string instead of a dict
+        response_format = {"type": "json_schema", "json_schema": "not_a_dict"}
+        with pytest.warns(UserWarning, match="'json_schema' value is"):
+            llm.invoke([HumanMessage("Hello")], response_format=response_format)
+
+        call_kwargs = mock_client.chat.call_args[1]
+        assert "response_format" not in call_kwargs
+        assert call_kwargs.get("format") is None
+
+
 def test_reasoning_content_serialized_as_thinking() -> None:
    """Test that `reasoning_content` in `AIMessage` is serialized as `'thinking'`.