feat: port various nit changes from wip-v0.4 (#32506)

Lots of work that wasn't directly related to core improvements/messages/testing functionality
2025-09-18 08:03:36 +00:00 · 2025-08-11 15:09:08 -04:00
parent 7db9e60601
commit ee4c2510eb
63 changed files with 2213 additions and 2862 deletions
--- a/libs/partners/ollama/tests/integration_tests/chat_models/test_chat_models.py
+++ b/libs/partners/ollama/tests/integration_tests/chat_models/test_chat_models.py
@@ -15,7 +15,7 @@ DEFAULT_MODEL_NAME = "llama3.1"

@pytest.mark.parametrize(("method"), [("function_calling"), ("json_schema")])
 def test_structured_output(method: str) -> None:
-    """Test to verify structured output via tool calling and ``format`` parameter."""
+    """Test to verify structured output via tool calling and `format` parameter."""

    class Joke(BaseModel):
        """Joke to tell user."""
@@ -27,40 +27,44 @@ def test_structured_output(method: str) -> None:
    query = "Tell me a joke about cats."

    # Pydantic
-    structured_llm = llm.with_structured_output(Joke, method=method)  # type: ignore[arg-type]
-    result = structured_llm.invoke(query)
-    assert isinstance(result, Joke)
+    if method == "function_calling":
+        structured_llm = llm.with_structured_output(Joke, method="function_calling")
+        result = structured_llm.invoke(query)
+        assert isinstance(result, Joke)

-    for chunk in structured_llm.stream(query):
-        assert isinstance(chunk, Joke)
+        for chunk in structured_llm.stream(query):
+            assert isinstance(chunk, Joke)

    # JSON Schema
-    structured_llm = llm.with_structured_output(Joke.model_json_schema(), method=method)  # type: ignore[arg-type]
-    result = structured_llm.invoke(query)
-    assert isinstance(result, dict)
-    assert set(result.keys()) == {"setup", "punchline"}
+    if method == "json_schema":
+        structured_llm = llm.with_structured_output(
+            Joke.model_json_schema(), method="json_schema"
+        )
+        result = structured_llm.invoke(query)
+        assert isinstance(result, dict)
+        assert set(result.keys()) == {"setup", "punchline"}

-    for chunk in structured_llm.stream(query):
+        for chunk in structured_llm.stream(query):
+            assert isinstance(chunk, dict)
        assert isinstance(chunk, dict)
-    assert isinstance(chunk, dict)
-    assert set(chunk.keys()) == {"setup", "punchline"}
+        assert set(chunk.keys()) == {"setup", "punchline"}

-    # Typed Dict
-    class JokeSchema(TypedDict):
-        """Joke to tell user."""
+        # Typed Dict
+        class JokeSchema(TypedDict):
+            """Joke to tell user."""

-        setup: Annotated[str, "question to set up a joke"]
-        punchline: Annotated[str, "answer to resolve the joke"]
+            setup: Annotated[str, "question to set up a joke"]
+            punchline: Annotated[str, "answer to resolve the joke"]

-    structured_llm = llm.with_structured_output(JokeSchema, method=method)  # type: ignore[arg-type]
-    result = structured_llm.invoke(query)
-    assert isinstance(result, dict)
-    assert set(result.keys()) == {"setup", "punchline"}
+        structured_llm = llm.with_structured_output(JokeSchema, method="json_schema")
+        result = structured_llm.invoke(query)
+        assert isinstance(result, dict)
+        assert set(result.keys()) == {"setup", "punchline"}

-    for chunk in structured_llm.stream(query):
+        for chunk in structured_llm.stream(query):
+            assert isinstance(chunk, dict)
        assert isinstance(chunk, dict)
-    assert isinstance(chunk, dict)
-    assert set(chunk.keys()) == {"setup", "punchline"}
+        assert set(chunk.keys()) == {"setup", "punchline"}


@pytest.mark.parametrize(("model"), [(DEFAULT_MODEL_NAME)])
--- a/libs/partners/ollama/tests/integration_tests/chat_models/test_chat_models_reasoning.py
+++ b/libs/partners/ollama/tests/integration_tests/chat_models/test_chat_models_reasoning.py
@@ -1,29 +1,17 @@
-"""Ollama specific chat model integration tests for reasoning models."""
+"""Ollama integration tests for reasoning chat models."""

 import pytest
-from langchain_core.messages import (
-    AIMessageChunk,
-    BaseMessageChunk,
-    HumanMessage,
-)
-from pydantic import BaseModel, Field
+from langchain_core.messages import AIMessageChunk, BaseMessageChunk, HumanMessage

 from langchain_ollama import ChatOllama

 SAMPLE = "What is 3^3?"


-class MathAnswer(BaseModel):
-    """A mathematical expression and its numerical answer."""
-
-    expression: str = Field(description="The mathematical expression to evaluate.")
-    answer: int = Field(description="The numerical answer to the expression.")
-
-
@pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
 def test_stream_no_reasoning(model: str) -> None:
    """Test streaming with `reasoning=False`"""
-    llm = ChatOllama(model=model, num_ctx=2**12)
+    llm = ChatOllama(model=model, num_ctx=2**12, reasoning=False)
    messages = [
        {
            "role": "user",
@@ -39,16 +27,14 @@ def test_stream_no_reasoning(model: str) -> None:
        result += chunk
    assert isinstance(result, AIMessageChunk)
    assert result.content
-    assert "reasoning_content" not in result.additional_kwargs
    assert "<think>" not in result.content and "</think>" not in result.content
-    assert "<think>" not in result.additional_kwargs["reasoning_content"]
-    assert "</think>" not in result.additional_kwargs["reasoning_content"]
+    assert "reasoning_content" not in result.additional_kwargs


@pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
 async def test_astream_no_reasoning(model: str) -> None:
    """Test async streaming with `reasoning=False`"""
-    llm = ChatOllama(model=model, num_ctx=2**12)
+    llm = ChatOllama(model=model, num_ctx=2**12, reasoning=False)
    messages = [
        {
            "role": "user",
@@ -64,10 +50,8 @@ async def test_astream_no_reasoning(model: str) -> None:
        result += chunk
    assert isinstance(result, AIMessageChunk)
    assert result.content
-    assert "reasoning_content" not in result.additional_kwargs
    assert "<think>" not in result.content and "</think>" not in result.content
-    assert "<think>" not in result.additional_kwargs["reasoning_content"]
-    assert "</think>" not in result.additional_kwargs["reasoning_content"]
+    assert "reasoning_content" not in result.additional_kwargs


@pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
@@ -89,10 +73,10 @@ def test_stream_reasoning_none(model: str) -> None:
        result += chunk
    assert isinstance(result, AIMessageChunk)
    assert result.content
-    assert "reasoning_content" not in result.additional_kwargs
    assert "<think>" in result.content and "</think>" in result.content
-    assert "<think>" not in result.additional_kwargs["reasoning_content"]
-    assert "</think>" not in result.additional_kwargs["reasoning_content"]
+    assert "reasoning_content" not in result.additional_kwargs
+    assert "<think>" not in result.additional_kwargs.get("reasoning_content", "")
+    assert "</think>" not in result.additional_kwargs.get("reasoning_content", "")


@pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
@@ -114,10 +98,10 @@ async def test_astream_reasoning_none(model: str) -> None:
        result += chunk
    assert isinstance(result, AIMessageChunk)
    assert result.content
-    assert "reasoning_content" not in result.additional_kwargs
    assert "<think>" in result.content and "</think>" in result.content
-    assert "<think>" not in result.additional_kwargs["reasoning_content"]
-    assert "</think>" not in result.additional_kwargs["reasoning_content"]
+    assert "reasoning_content" not in result.additional_kwargs
+    assert "<think>" not in result.additional_kwargs.get("reasoning_content", "")
+    assert "</think>" not in result.additional_kwargs.get("reasoning_content", "")


@pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
@@ -175,27 +159,23 @@ async def test_reasoning_astream(model: str) -> None:
@pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
 def test_invoke_no_reasoning(model: str) -> None:
    """Test using invoke with `reasoning=False`"""
-    llm = ChatOllama(model=model, num_ctx=2**12)
+    llm = ChatOllama(model=model, num_ctx=2**12, reasoning=False)
    message = HumanMessage(content=SAMPLE)
    result = llm.invoke([message])
    assert result.content
    assert "reasoning_content" not in result.additional_kwargs
    assert "<think>" not in result.content and "</think>" not in result.content
-    assert "<think>" not in result.additional_kwargs["reasoning_content"]
-    assert "</think>" not in result.additional_kwargs["reasoning_content"]


@pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
 async def test_ainvoke_no_reasoning(model: str) -> None:
    """Test using async invoke with `reasoning=False`"""
-    llm = ChatOllama(model=model, num_ctx=2**12)
+    llm = ChatOllama(model=model, num_ctx=2**12, reasoning=False)
    message = HumanMessage(content=SAMPLE)
    result = await llm.ainvoke([message])
    assert result.content
    assert "reasoning_content" not in result.additional_kwargs
    assert "<think>" not in result.content and "</think>" not in result.content
-    assert "<think>" not in result.additional_kwargs["reasoning_content"]
-    assert "</think>" not in result.additional_kwargs["reasoning_content"]


@pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
@@ -207,8 +187,8 @@ def test_invoke_reasoning_none(model: str) -> None:
    assert result.content
    assert "reasoning_content" not in result.additional_kwargs
    assert "<think>" in result.content and "</think>" in result.content
-    assert "<think>" not in result.additional_kwargs["reasoning_content"]
-    assert "</think>" not in result.additional_kwargs["reasoning_content"]
+    assert "<think>" not in result.additional_kwargs.get("reasoning_content", "")
+    assert "</think>" not in result.additional_kwargs.get("reasoning_content", "")


@pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
@@ -220,8 +200,8 @@ async def test_ainvoke_reasoning_none(model: str) -> None:
    assert result.content
    assert "reasoning_content" not in result.additional_kwargs
    assert "<think>" in result.content and "</think>" in result.content
-    assert "<think>" not in result.additional_kwargs["reasoning_content"]
-    assert "</think>" not in result.additional_kwargs["reasoning_content"]
+    assert "<think>" not in result.additional_kwargs.get("reasoning_content", "")
+    assert "</think>" not in result.additional_kwargs.get("reasoning_content", "")


@pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
@@ -250,3 +230,43 @@ async def test_reasoning_ainvoke(model: str) -> None:
    assert "<think>" not in result.content and "</think>" not in result.content
    assert "<think>" not in result.additional_kwargs["reasoning_content"]
    assert "</think>" not in result.additional_kwargs["reasoning_content"]
+
+
+@pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
+def test_think_tag_stripping_necessity(model: str) -> None:
+    """Test that demonstrates why ``_strip_think_tags`` is necessary.
+
+    DeepSeek R1 models include reasoning/thinking as their default behavior.
+    When ``reasoning=False`` is set, the user explicitly wants no reasoning content,
+    but Ollama cannot disable thinking at the API level for these models.
+    Therefore, post-processing is required to strip the ``<think>`` tags.
+
+    This test documents the specific behavior that necessitates the
+    ``_strip_think_tags`` function in the chat_models.py implementation.
+    """
+    # Test with reasoning=None (default behavior - should include think tags)
+    llm_default = ChatOllama(model=model, reasoning=None, num_ctx=2**12)
+    message = HumanMessage(content=SAMPLE)
+
+    result_default = llm_default.invoke([message])
+
+    # With reasoning=None, the model's default behavior includes <think> tags
+    # This demonstrates why we need the stripping logic
+    assert "<think>" in result_default.content
+    assert "</think>" in result_default.content
+    assert "reasoning_content" not in result_default.additional_kwargs
+
+    # Test with reasoning=False (explicit disable - should NOT include think tags)
+    llm_disabled = ChatOllama(model=model, reasoning=False, num_ctx=2**12)
+
+    result_disabled = llm_disabled.invoke([message])
+
+    # With reasoning=False, think tags should be stripped from content
+    # This verifies that _strip_think_tags is working correctly
+    assert "<think>" not in result_disabled.content
+    assert "</think>" not in result_disabled.content
+    assert "reasoning_content" not in result_disabled.additional_kwargs
+
+    # Verify the difference: same model, different reasoning settings
+    # Default includes tags, disabled strips them
+    assert result_default.content != result_disabled.content
--- a/libs/partners/ollama/tests/integration_tests/chat_models/test_chat_models_standard.py
+++ b/libs/partners/ollama/tests/integration_tests/chat_models/test_chat_models_standard.py
@@ -40,7 +40,7 @@ class TestChatOllama(ChatModelIntegrationTests):
    @property
    def has_tool_choice(self) -> bool:
        # TODO: update after Ollama implements
-        # https://github.com/ollama/ollama/blob/main/docs/openai.md
+        # https://github.com/ollama/ollama/blob/main/docs/openai.md#supported-request-fields
        return False

    @property
@@ -168,7 +168,7 @@ class TestChatOllama(ChatModelIntegrationTests):

        with pytest.raises(ValidationError) as excinfo:
            ChatOllama(model="any-model", validate_model_on_init=True)
-        assert "not found in Ollama" in str(excinfo.value)
+        assert "Failed to connect to Ollama" in str(excinfo.value)

    @patch("langchain_ollama.chat_models.Client.list")
    def test_init_response_error(self, mock_list: MagicMock) -> None:
--- a/libs/partners/ollama/tests/integration_tests/test_embeddings.py
+++ b/libs/partners/ollama/tests/integration_tests/test_embeddings.py
@@ -1,10 +1,12 @@
 """Test Ollama embeddings."""

+import os
+
 from langchain_tests.integration_tests import EmbeddingsIntegrationTests

 from langchain_ollama.embeddings import OllamaEmbeddings

-MODEL_NAME = "llama3.1"
+MODEL_NAME = os.environ.get("OLLAMA_TEST_MODEL", "llama3.1")


 class TestOllamaEmbeddings(EmbeddingsIntegrationTests):
--- a/libs/partners/ollama/tests/integration_tests/test_llms.py
+++ b/libs/partners/ollama/tests/integration_tests/test_llms.py
@@ -1,24 +1,27 @@
 """Test OllamaLLM llm."""

+import os
+
 import pytest
 from langchain_core.outputs import GenerationChunk
 from langchain_core.runnables import RunnableConfig

 from langchain_ollama.llms import OllamaLLM

-MODEL_NAME = "llama3.1"
+MODEL_NAME = os.environ.get("OLLAMA_TEST_MODEL", "llama3.1")
+REASONING_MODEL_NAME = os.environ.get("OLLAMA_REASONING_TEST_MODEL", "deepseek-r1:1.5b")
 SAMPLE = "What is 3^3?"


 def test_stream_text_tokens() -> None:
-    """Test streaming raw string tokens from OllamaLLM."""
+    """Test streaming raw string tokens from `OllamaLLM`."""
    llm = OllamaLLM(model=MODEL_NAME)

    for token in llm.stream("I'm Pickle Rick"):
        assert isinstance(token, str)


-@pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
+@pytest.mark.parametrize(("model"), [(REASONING_MODEL_NAME)])
 def test__stream_no_reasoning(model: str) -> None:
    """Test low-level chunk streaming of a simple prompt with `reasoning=False`."""
    llm = OllamaLLM(model=model, num_ctx=2**12)
@@ -39,7 +42,7 @@ def test__stream_no_reasoning(model: str) -> None:
    assert "reasoning_content" not in result_chunk.generation_info  # type: ignore[operator]


-@pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
+@pytest.mark.parametrize(("model"), [(REASONING_MODEL_NAME)])
 def test__stream_with_reasoning(model: str) -> None:
    """Test low-level chunk streaming with `reasoning=True`."""
    llm = OllamaLLM(model=model, num_ctx=2**12, reasoning=True)
@@ -64,14 +67,14 @@ def test__stream_with_reasoning(model: str) -> None:


 async def test_astream_text_tokens() -> None:
-    """Test async streaming raw string tokens from OllamaLLM."""
+    """Test async streaming raw string tokens from `OllamaLLM`."""
    llm = OllamaLLM(model=MODEL_NAME)

    async for token in llm.astream("I'm Pickle Rick"):
        assert isinstance(token, str)


-@pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
+@pytest.mark.parametrize(("model"), [(REASONING_MODEL_NAME)])
 async def test__astream_no_reasoning(model: str) -> None:
    """Test low-level async chunk streaming with `reasoning=False`."""
    llm = OllamaLLM(model=model, num_ctx=2**12)
@@ -89,7 +92,7 @@ async def test__astream_no_reasoning(model: str) -> None:
    assert "reasoning_content" not in result_chunk.generation_info  # type: ignore[operator]


-@pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
+@pytest.mark.parametrize(("model"), [(REASONING_MODEL_NAME)])
 async def test__astream_with_reasoning(model: str) -> None:
    """Test low-level async chunk streaming with `reasoning=True`."""
    llm = OllamaLLM(model=model, num_ctx=2**12, reasoning=True)
@@ -109,7 +112,7 @@ async def test__astream_with_reasoning(model: str) -> None:


 async def test_abatch() -> None:
-    """Test batch sync token generation from OllamaLLM."""
+    """Test batch sync token generation from `OllamaLLM`."""
    llm = OllamaLLM(model=MODEL_NAME)

    result = await llm.abatch(["I'm Pickle Rick", "I'm not Pickle Rick"])
@@ -129,7 +132,7 @@ async def test_abatch_tags() -> None:


 def test_batch() -> None:
-    """Test batch token generation from OllamaLLM."""
+    """Test batch token generation from `OllamaLLM`."""
    llm = OllamaLLM(model=MODEL_NAME)

    result = llm.batch(["I'm Pickle Rick", "I'm not Pickle Rick"])
--- a/libs/partners/ollama/tests/unit_tests/test_chat_models.py
+++ b/libs/partners/ollama/tests/unit_tests/test_chat_models.py
@@ -1,4 +1,4 @@
-"""Test chat model integration."""
+"""Unit tests for ChatOllama."""

 import json
 import logging
@@ -33,6 +33,16 @@ class TestChatOllama(ChatModelUnitTests):


 def test__parse_arguments_from_tool_call() -> None:
+    """Test that string arguments are preserved as strings in tool call parsing.
+
+    This test verifies the fix for PR #30154 which addressed an issue where
+    string-typed tool arguments (like IDs or long strings) were being incorrectly
+    processed. The parser should preserve string values as strings rather than
+    attempting to parse them as JSON when they're already valid string arguments.
+
+    The test uses a long string ID to ensure string arguments maintain their
+    original type after parsing, which is critical for tools expecting string inputs.
+    """
    raw_response = '{"model":"sample-model","message":{"role":"assistant","content":"","tool_calls":[{"function":{"name":"get_profile_details","arguments":{"arg_1":"12345678901234567890123456"}}}]},"done":false}'  # noqa: E501
    raw_tool_calls = json.loads(raw_response)["message"]["tool_calls"]
    response = _parse_arguments_from_tool_call(raw_tool_calls[0])
@@ -40,6 +50,41 @@ def test__parse_arguments_from_tool_call() -> None:
    assert isinstance(response["arg_1"], str)


+def test__parse_arguments_from_tool_call_with_function_name_metadata() -> None:
+    """Test that functionName metadata is filtered out from tool arguments.
+
+    Some models may include metadata like ``functionName`` in the arguments
+    that just echoes the function name. This should be filtered out for
+    no-argument tools to return an empty dictionary.
+    """
+    # Test case where arguments contain functionName metadata
+    raw_tool_call_with_metadata = {
+        "function": {
+            "name": "magic_function_no_args",
+            "arguments": {"functionName": "magic_function_no_args"},
+        }
+    }
+    response = _parse_arguments_from_tool_call(raw_tool_call_with_metadata)
+    assert response == {}
+
+    # Test case where arguments contain both real args and metadata
+    raw_tool_call_mixed = {
+        "function": {
+            "name": "some_function",
+            "arguments": {"functionName": "some_function", "real_arg": "value"},
+        }
+    }
+    response_mixed = _parse_arguments_from_tool_call(raw_tool_call_mixed)
+    assert response_mixed == {"real_arg": "value"}
+
+    # Test case where functionName has different value (should be preserved)
+    raw_tool_call_different = {
+        "function": {"name": "function_a", "arguments": {"functionName": "function_b"}}
+    }
+    response_different = _parse_arguments_from_tool_call(raw_tool_call_different)
+    assert response_different == {"functionName": "function_b"}
+
+
@contextmanager
 def _mock_httpx_client_stream(
    *args: Any, **kwargs: Any
@@ -54,6 +99,7 @@ def _mock_httpx_client_stream(
 def test_arbitrary_roles_accepted_in_chatmessages(
    monkeypatch: pytest.MonkeyPatch,
 ) -> None:
+    """Test that `ChatOllama` accepts arbitrary roles in `ChatMessage`."""
    monkeypatch.setattr(Client, "stream", _mock_httpx_client_stream)
    llm = ChatOllama(
        model=MODEL_NAME,
@@ -94,9 +140,6 @@ dummy_raw_tool_call = {
 }


-# --- Regression tests for tool-call argument parsing (see #30910) ---
-
-
@pytest.mark.parametrize(
    "input_string, expected_output",
    [
@@ -113,14 +156,14 @@ dummy_raw_tool_call = {
 def test_parse_json_string_success_cases(
    input_string: str, expected_output: Any
 ) -> None:
-    """Tests that _parse_json_string correctly parses valid and fixable strings."""
+    """Tests that `_parse_json_string` correctly parses valid and fixable strings."""
    raw_tool_call = {"function": {"name": "test_func", "arguments": input_string}}
    result = _parse_json_string(input_string, raw_tool_call=raw_tool_call, skip=False)
    assert result == expected_output


 def test_parse_json_string_failure_case_raises_exception() -> None:
-    """Tests that _parse_json_string raises an exception for truly malformed strings."""
+    """Tests that `_parse_json_string` raises an exception for malformed strings."""
    malformed_string = "{'key': 'value',,}"
    raw_tool_call = {"function": {"name": "test_func", "arguments": malformed_string}}
    with pytest.raises(OutputParserException):
@@ -132,7 +175,7 @@ def test_parse_json_string_failure_case_raises_exception() -> None:


 def test_parse_json_string_skip_returns_input_on_failure() -> None:
-    """Tests that skip=True returns the original string on parse failure."""
+    """Tests that `skip=True` returns the original string on parse failure."""
    malformed_string = "{'not': valid,,,}"
    raw_tool_call = {"function": {"name": "test_func", "arguments": malformed_string}}
    result = _parse_json_string(
--- a/libs/partners/ollama/tests/unit_tests/test_embeddings.py
+++ b/libs/partners/ollama/tests/unit_tests/test_embeddings.py
@@ -32,7 +32,7 @@ def test_validate_model_on_init(mock_validate_model: Any) -> None:

@patch("langchain_ollama.embeddings.Client")
 def test_embed_documents_passes_options(mock_client_class: Any) -> None:
-    """Test that embed_documents method passes options including num_gpu."""
+    """Test that `embed_documents()` passes options, including `num_gpu`."""
    # Create a mock client instance
    mock_client = Mock()
    mock_client_class.return_value = mock_client