feat: port various nit changes from wip-v0.4 (#32506)

Lots of work that wasn't directly related to core
improvements/messages/testing functionality
This commit is contained in:
Mason Daugherty
2025-08-11 15:09:08 -04:00
committed by GitHub
parent 7db9e60601
commit ee4c2510eb
63 changed files with 2213 additions and 2862 deletions

View File

@@ -15,7 +15,7 @@ DEFAULT_MODEL_NAME = "llama3.1"
@pytest.mark.parametrize(("method"), [("function_calling"), ("json_schema")])
def test_structured_output(method: str) -> None:
"""Test to verify structured output via tool calling and ``format`` parameter."""
"""Test to verify structured output via tool calling and `format` parameter."""
class Joke(BaseModel):
"""Joke to tell user."""
@@ -27,40 +27,44 @@ def test_structured_output(method: str) -> None:
query = "Tell me a joke about cats."
# Pydantic
structured_llm = llm.with_structured_output(Joke, method=method) # type: ignore[arg-type]
result = structured_llm.invoke(query)
assert isinstance(result, Joke)
if method == "function_calling":
structured_llm = llm.with_structured_output(Joke, method="function_calling")
result = structured_llm.invoke(query)
assert isinstance(result, Joke)
for chunk in structured_llm.stream(query):
assert isinstance(chunk, Joke)
for chunk in structured_llm.stream(query):
assert isinstance(chunk, Joke)
# JSON Schema
structured_llm = llm.with_structured_output(Joke.model_json_schema(), method=method) # type: ignore[arg-type]
result = structured_llm.invoke(query)
assert isinstance(result, dict)
assert set(result.keys()) == {"setup", "punchline"}
if method == "json_schema":
structured_llm = llm.with_structured_output(
Joke.model_json_schema(), method="json_schema"
)
result = structured_llm.invoke(query)
assert isinstance(result, dict)
assert set(result.keys()) == {"setup", "punchline"}
for chunk in structured_llm.stream(query):
for chunk in structured_llm.stream(query):
assert isinstance(chunk, dict)
assert isinstance(chunk, dict)
assert isinstance(chunk, dict)
assert set(chunk.keys()) == {"setup", "punchline"}
assert set(chunk.keys()) == {"setup", "punchline"}
# Typed Dict
class JokeSchema(TypedDict):
"""Joke to tell user."""
# Typed Dict
class JokeSchema(TypedDict):
"""Joke to tell user."""
setup: Annotated[str, "question to set up a joke"]
punchline: Annotated[str, "answer to resolve the joke"]
setup: Annotated[str, "question to set up a joke"]
punchline: Annotated[str, "answer to resolve the joke"]
structured_llm = llm.with_structured_output(JokeSchema, method=method) # type: ignore[arg-type]
result = structured_llm.invoke(query)
assert isinstance(result, dict)
assert set(result.keys()) == {"setup", "punchline"}
structured_llm = llm.with_structured_output(JokeSchema, method="json_schema")
result = structured_llm.invoke(query)
assert isinstance(result, dict)
assert set(result.keys()) == {"setup", "punchline"}
for chunk in structured_llm.stream(query):
for chunk in structured_llm.stream(query):
assert isinstance(chunk, dict)
assert isinstance(chunk, dict)
assert isinstance(chunk, dict)
assert set(chunk.keys()) == {"setup", "punchline"}
assert set(chunk.keys()) == {"setup", "punchline"}
@pytest.mark.parametrize(("model"), [(DEFAULT_MODEL_NAME)])

View File

@@ -1,29 +1,17 @@
"""Ollama specific chat model integration tests for reasoning models."""
"""Ollama integration tests for reasoning chat models."""
import pytest
from langchain_core.messages import (
AIMessageChunk,
BaseMessageChunk,
HumanMessage,
)
from pydantic import BaseModel, Field
from langchain_core.messages import AIMessageChunk, BaseMessageChunk, HumanMessage
from langchain_ollama import ChatOllama
SAMPLE = "What is 3^3?"
class MathAnswer(BaseModel):
"""A mathematical expression and its numerical answer."""
expression: str = Field(description="The mathematical expression to evaluate.")
answer: int = Field(description="The numerical answer to the expression.")
@pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
def test_stream_no_reasoning(model: str) -> None:
"""Test streaming with `reasoning=False`"""
llm = ChatOllama(model=model, num_ctx=2**12)
llm = ChatOllama(model=model, num_ctx=2**12, reasoning=False)
messages = [
{
"role": "user",
@@ -39,16 +27,14 @@ def test_stream_no_reasoning(model: str) -> None:
result += chunk
assert isinstance(result, AIMessageChunk)
assert result.content
assert "reasoning_content" not in result.additional_kwargs
assert "<think>" not in result.content and "</think>" not in result.content
assert "<think>" not in result.additional_kwargs["reasoning_content"]
assert "</think>" not in result.additional_kwargs["reasoning_content"]
assert "reasoning_content" not in result.additional_kwargs
@pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
async def test_astream_no_reasoning(model: str) -> None:
"""Test async streaming with `reasoning=False`"""
llm = ChatOllama(model=model, num_ctx=2**12)
llm = ChatOllama(model=model, num_ctx=2**12, reasoning=False)
messages = [
{
"role": "user",
@@ -64,10 +50,8 @@ async def test_astream_no_reasoning(model: str) -> None:
result += chunk
assert isinstance(result, AIMessageChunk)
assert result.content
assert "reasoning_content" not in result.additional_kwargs
assert "<think>" not in result.content and "</think>" not in result.content
assert "<think>" not in result.additional_kwargs["reasoning_content"]
assert "</think>" not in result.additional_kwargs["reasoning_content"]
assert "reasoning_content" not in result.additional_kwargs
@pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
@@ -89,10 +73,10 @@ def test_stream_reasoning_none(model: str) -> None:
result += chunk
assert isinstance(result, AIMessageChunk)
assert result.content
assert "reasoning_content" not in result.additional_kwargs
assert "<think>" in result.content and "</think>" in result.content
assert "<think>" not in result.additional_kwargs["reasoning_content"]
assert "</think>" not in result.additional_kwargs["reasoning_content"]
assert "reasoning_content" not in result.additional_kwargs
assert "<think>" not in result.additional_kwargs.get("reasoning_content", "")
assert "</think>" not in result.additional_kwargs.get("reasoning_content", "")
@pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
@@ -114,10 +98,10 @@ async def test_astream_reasoning_none(model: str) -> None:
result += chunk
assert isinstance(result, AIMessageChunk)
assert result.content
assert "reasoning_content" not in result.additional_kwargs
assert "<think>" in result.content and "</think>" in result.content
assert "<think>" not in result.additional_kwargs["reasoning_content"]
assert "</think>" not in result.additional_kwargs["reasoning_content"]
assert "reasoning_content" not in result.additional_kwargs
assert "<think>" not in result.additional_kwargs.get("reasoning_content", "")
assert "</think>" not in result.additional_kwargs.get("reasoning_content", "")
@pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
@@ -175,27 +159,23 @@ async def test_reasoning_astream(model: str) -> None:
@pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
def test_invoke_no_reasoning(model: str) -> None:
"""Test using invoke with `reasoning=False`"""
llm = ChatOllama(model=model, num_ctx=2**12)
llm = ChatOllama(model=model, num_ctx=2**12, reasoning=False)
message = HumanMessage(content=SAMPLE)
result = llm.invoke([message])
assert result.content
assert "reasoning_content" not in result.additional_kwargs
assert "<think>" not in result.content and "</think>" not in result.content
assert "<think>" not in result.additional_kwargs["reasoning_content"]
assert "</think>" not in result.additional_kwargs["reasoning_content"]
@pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
async def test_ainvoke_no_reasoning(model: str) -> None:
"""Test using async invoke with `reasoning=False`"""
llm = ChatOllama(model=model, num_ctx=2**12)
llm = ChatOllama(model=model, num_ctx=2**12, reasoning=False)
message = HumanMessage(content=SAMPLE)
result = await llm.ainvoke([message])
assert result.content
assert "reasoning_content" not in result.additional_kwargs
assert "<think>" not in result.content and "</think>" not in result.content
assert "<think>" not in result.additional_kwargs["reasoning_content"]
assert "</think>" not in result.additional_kwargs["reasoning_content"]
@pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
@@ -207,8 +187,8 @@ def test_invoke_reasoning_none(model: str) -> None:
assert result.content
assert "reasoning_content" not in result.additional_kwargs
assert "<think>" in result.content and "</think>" in result.content
assert "<think>" not in result.additional_kwargs["reasoning_content"]
assert "</think>" not in result.additional_kwargs["reasoning_content"]
assert "<think>" not in result.additional_kwargs.get("reasoning_content", "")
assert "</think>" not in result.additional_kwargs.get("reasoning_content", "")
@pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
@@ -220,8 +200,8 @@ async def test_ainvoke_reasoning_none(model: str) -> None:
assert result.content
assert "reasoning_content" not in result.additional_kwargs
assert "<think>" in result.content and "</think>" in result.content
assert "<think>" not in result.additional_kwargs["reasoning_content"]
assert "</think>" not in result.additional_kwargs["reasoning_content"]
assert "<think>" not in result.additional_kwargs.get("reasoning_content", "")
assert "</think>" not in result.additional_kwargs.get("reasoning_content", "")
@pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
@@ -250,3 +230,43 @@ async def test_reasoning_ainvoke(model: str) -> None:
assert "<think>" not in result.content and "</think>" not in result.content
assert "<think>" not in result.additional_kwargs["reasoning_content"]
assert "</think>" not in result.additional_kwargs["reasoning_content"]
@pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
def test_think_tag_stripping_necessity(model: str) -> None:
"""Test that demonstrates why ``_strip_think_tags`` is necessary.
DeepSeek R1 models include reasoning/thinking as their default behavior.
When ``reasoning=False`` is set, the user explicitly wants no reasoning content,
but Ollama cannot disable thinking at the API level for these models.
Therefore, post-processing is required to strip the ``<think>`` tags.
This test documents the specific behavior that necessitates the
``_strip_think_tags`` function in the chat_models.py implementation.
"""
# Test with reasoning=None (default behavior - should include think tags)
llm_default = ChatOllama(model=model, reasoning=None, num_ctx=2**12)
message = HumanMessage(content=SAMPLE)
result_default = llm_default.invoke([message])
# With reasoning=None, the model's default behavior includes <think> tags
# This demonstrates why we need the stripping logic
assert "<think>" in result_default.content
assert "</think>" in result_default.content
assert "reasoning_content" not in result_default.additional_kwargs
# Test with reasoning=False (explicit disable - should NOT include think tags)
llm_disabled = ChatOllama(model=model, reasoning=False, num_ctx=2**12)
result_disabled = llm_disabled.invoke([message])
# With reasoning=False, think tags should be stripped from content
# This verifies that _strip_think_tags is working correctly
assert "<think>" not in result_disabled.content
assert "</think>" not in result_disabled.content
assert "reasoning_content" not in result_disabled.additional_kwargs
# Verify the difference: same model, different reasoning settings
# Default includes tags, disabled strips them
assert result_default.content != result_disabled.content

View File

@@ -40,7 +40,7 @@ class TestChatOllama(ChatModelIntegrationTests):
@property
def has_tool_choice(self) -> bool:
# TODO: update after Ollama implements
# https://github.com/ollama/ollama/blob/main/docs/openai.md
# https://github.com/ollama/ollama/blob/main/docs/openai.md#supported-request-fields
return False
@property
@@ -168,7 +168,7 @@ class TestChatOllama(ChatModelIntegrationTests):
with pytest.raises(ValidationError) as excinfo:
ChatOllama(model="any-model", validate_model_on_init=True)
assert "not found in Ollama" in str(excinfo.value)
assert "Failed to connect to Ollama" in str(excinfo.value)
@patch("langchain_ollama.chat_models.Client.list")
def test_init_response_error(self, mock_list: MagicMock) -> None:

View File

@@ -1,10 +1,12 @@
"""Test Ollama embeddings."""
import os
from langchain_tests.integration_tests import EmbeddingsIntegrationTests
from langchain_ollama.embeddings import OllamaEmbeddings
MODEL_NAME = "llama3.1"
MODEL_NAME = os.environ.get("OLLAMA_TEST_MODEL", "llama3.1")
class TestOllamaEmbeddings(EmbeddingsIntegrationTests):

View File

@@ -1,24 +1,27 @@
"""Test OllamaLLM llm."""
import os
import pytest
from langchain_core.outputs import GenerationChunk
from langchain_core.runnables import RunnableConfig
from langchain_ollama.llms import OllamaLLM
MODEL_NAME = "llama3.1"
MODEL_NAME = os.environ.get("OLLAMA_TEST_MODEL", "llama3.1")
REASONING_MODEL_NAME = os.environ.get("OLLAMA_REASONING_TEST_MODEL", "deepseek-r1:1.5b")
SAMPLE = "What is 3^3?"
def test_stream_text_tokens() -> None:
"""Test streaming raw string tokens from OllamaLLM."""
"""Test streaming raw string tokens from `OllamaLLM`."""
llm = OllamaLLM(model=MODEL_NAME)
for token in llm.stream("I'm Pickle Rick"):
assert isinstance(token, str)
@pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
@pytest.mark.parametrize(("model"), [(REASONING_MODEL_NAME)])
def test__stream_no_reasoning(model: str) -> None:
"""Test low-level chunk streaming of a simple prompt with `reasoning=False`."""
llm = OllamaLLM(model=model, num_ctx=2**12)
@@ -39,7 +42,7 @@ def test__stream_no_reasoning(model: str) -> None:
assert "reasoning_content" not in result_chunk.generation_info # type: ignore[operator]
@pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
@pytest.mark.parametrize(("model"), [(REASONING_MODEL_NAME)])
def test__stream_with_reasoning(model: str) -> None:
"""Test low-level chunk streaming with `reasoning=True`."""
llm = OllamaLLM(model=model, num_ctx=2**12, reasoning=True)
@@ -64,14 +67,14 @@ def test__stream_with_reasoning(model: str) -> None:
async def test_astream_text_tokens() -> None:
"""Test async streaming raw string tokens from OllamaLLM."""
"""Test async streaming raw string tokens from `OllamaLLM`."""
llm = OllamaLLM(model=MODEL_NAME)
async for token in llm.astream("I'm Pickle Rick"):
assert isinstance(token, str)
@pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
@pytest.mark.parametrize(("model"), [(REASONING_MODEL_NAME)])
async def test__astream_no_reasoning(model: str) -> None:
"""Test low-level async chunk streaming with `reasoning=False`."""
llm = OllamaLLM(model=model, num_ctx=2**12)
@@ -89,7 +92,7 @@ async def test__astream_no_reasoning(model: str) -> None:
assert "reasoning_content" not in result_chunk.generation_info # type: ignore[operator]
@pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
@pytest.mark.parametrize(("model"), [(REASONING_MODEL_NAME)])
async def test__astream_with_reasoning(model: str) -> None:
"""Test low-level async chunk streaming with `reasoning=True`."""
llm = OllamaLLM(model=model, num_ctx=2**12, reasoning=True)
@@ -109,7 +112,7 @@ async def test__astream_with_reasoning(model: str) -> None:
async def test_abatch() -> None:
"""Test batch sync token generation from OllamaLLM."""
"""Test batch sync token generation from `OllamaLLM`."""
llm = OllamaLLM(model=MODEL_NAME)
result = await llm.abatch(["I'm Pickle Rick", "I'm not Pickle Rick"])
@@ -129,7 +132,7 @@ async def test_abatch_tags() -> None:
def test_batch() -> None:
"""Test batch token generation from OllamaLLM."""
"""Test batch token generation from `OllamaLLM`."""
llm = OllamaLLM(model=MODEL_NAME)
result = llm.batch(["I'm Pickle Rick", "I'm not Pickle Rick"])

View File

@@ -1,4 +1,4 @@
"""Test chat model integration."""
"""Unit tests for ChatOllama."""
import json
import logging
@@ -33,6 +33,16 @@ class TestChatOllama(ChatModelUnitTests):
def test__parse_arguments_from_tool_call() -> None:
"""Test that string arguments are preserved as strings in tool call parsing.
This test verifies the fix for PR #30154 which addressed an issue where
string-typed tool arguments (like IDs or long strings) were being incorrectly
processed. The parser should preserve string values as strings rather than
attempting to parse them as JSON when they're already valid string arguments.
The test uses a long string ID to ensure string arguments maintain their
original type after parsing, which is critical for tools expecting string inputs.
"""
raw_response = '{"model":"sample-model","message":{"role":"assistant","content":"","tool_calls":[{"function":{"name":"get_profile_details","arguments":{"arg_1":"12345678901234567890123456"}}}]},"done":false}' # noqa: E501
raw_tool_calls = json.loads(raw_response)["message"]["tool_calls"]
response = _parse_arguments_from_tool_call(raw_tool_calls[0])
@@ -40,6 +50,41 @@ def test__parse_arguments_from_tool_call() -> None:
assert isinstance(response["arg_1"], str)
def test__parse_arguments_from_tool_call_with_function_name_metadata() -> None:
"""Test that functionName metadata is filtered out from tool arguments.
Some models may include metadata like ``functionName`` in the arguments
that just echoes the function name. This should be filtered out for
no-argument tools to return an empty dictionary.
"""
# Test case where arguments contain functionName metadata
raw_tool_call_with_metadata = {
"function": {
"name": "magic_function_no_args",
"arguments": {"functionName": "magic_function_no_args"},
}
}
response = _parse_arguments_from_tool_call(raw_tool_call_with_metadata)
assert response == {}
# Test case where arguments contain both real args and metadata
raw_tool_call_mixed = {
"function": {
"name": "some_function",
"arguments": {"functionName": "some_function", "real_arg": "value"},
}
}
response_mixed = _parse_arguments_from_tool_call(raw_tool_call_mixed)
assert response_mixed == {"real_arg": "value"}
# Test case where functionName has different value (should be preserved)
raw_tool_call_different = {
"function": {"name": "function_a", "arguments": {"functionName": "function_b"}}
}
response_different = _parse_arguments_from_tool_call(raw_tool_call_different)
assert response_different == {"functionName": "function_b"}
@contextmanager
def _mock_httpx_client_stream(
*args: Any, **kwargs: Any
@@ -54,6 +99,7 @@ def _mock_httpx_client_stream(
def test_arbitrary_roles_accepted_in_chatmessages(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Test that `ChatOllama` accepts arbitrary roles in `ChatMessage`."""
monkeypatch.setattr(Client, "stream", _mock_httpx_client_stream)
llm = ChatOllama(
model=MODEL_NAME,
@@ -94,9 +140,6 @@ dummy_raw_tool_call = {
}
# --- Regression tests for tool-call argument parsing (see #30910) ---
@pytest.mark.parametrize(
"input_string, expected_output",
[
@@ -113,14 +156,14 @@ dummy_raw_tool_call = {
def test_parse_json_string_success_cases(
input_string: str, expected_output: Any
) -> None:
"""Tests that _parse_json_string correctly parses valid and fixable strings."""
"""Tests that `_parse_json_string` correctly parses valid and fixable strings."""
raw_tool_call = {"function": {"name": "test_func", "arguments": input_string}}
result = _parse_json_string(input_string, raw_tool_call=raw_tool_call, skip=False)
assert result == expected_output
def test_parse_json_string_failure_case_raises_exception() -> None:
"""Tests that _parse_json_string raises an exception for truly malformed strings."""
"""Tests that `_parse_json_string` raises an exception for malformed strings."""
malformed_string = "{'key': 'value',,}"
raw_tool_call = {"function": {"name": "test_func", "arguments": malformed_string}}
with pytest.raises(OutputParserException):
@@ -132,7 +175,7 @@ def test_parse_json_string_failure_case_raises_exception() -> None:
def test_parse_json_string_skip_returns_input_on_failure() -> None:
"""Tests that skip=True returns the original string on parse failure."""
"""Tests that `skip=True` returns the original string on parse failure."""
malformed_string = "{'not': valid,,,}"
raw_tool_call = {"function": {"name": "test_func", "arguments": malformed_string}}
result = _parse_json_string(

View File

@@ -32,7 +32,7 @@ def test_validate_model_on_init(mock_validate_model: Any) -> None:
@patch("langchain_ollama.embeddings.Client")
def test_embed_documents_passes_options(mock_client_class: Any) -> None:
"""Test that embed_documents method passes options including num_gpu."""
"""Test that `embed_documents()` passes options, including `num_gpu`."""
# Create a mock client instance
mock_client = Mock()
mock_client_class.return_value = mock_client