Files
langchain/libs/model-profiles/tests/unit_tests/test_cli.py
Mason Daugherty 70192690b1 fix(model-profiles): sort generated profiles by model ID for stable diffs (#35344)
- Sort model profiles alphabetically by model ID (the top-level
`_PROFILES` dictionary keys, e.g. `claude-3-5-haiku-20241022`,
`gpt-4o-mini`) before writing `_profiles.py`, so that regenerating
profiles only shows actual data changes in diffs — not random reordering
from the models.dev API response order
- Regenerate all 10 partner profile files with the new sorted ordering
2026-02-19 23:11:22 -05:00

303 lines
9.7 KiB
Python

"""Tests for CLI functionality."""
import importlib.util
from pathlib import Path
from unittest.mock import Mock, patch
import pytest
from langchain_model_profiles.cli import _model_data_to_profile, refresh
@pytest.fixture
def mock_models_dev_response() -> dict:
"""Create a mock response from models.dev API."""
return {
"anthropic": {
"id": "anthropic",
"name": "Anthropic",
"models": {
"claude-3-opus": {
"id": "claude-3-opus",
"name": "Claude 3 Opus",
"tool_call": True,
"limit": {"context": 200000, "output": 4096},
"modalities": {"input": ["text", "image"], "output": ["text"]},
},
"claude-3-sonnet": {
"id": "claude-3-sonnet",
"name": "Claude 3 Sonnet",
"tool_call": True,
"limit": {"context": 200000, "output": 4096},
"modalities": {"input": ["text", "image"], "output": ["text"]},
},
},
},
"openai": {
"id": "openai",
"name": "OpenAI",
"models": {
"gpt-4": {
"id": "gpt-4",
"name": "GPT-4",
"tool_call": True,
"limit": {"context": 8192, "output": 4096},
"modalities": {"input": ["text"], "output": ["text"]},
}
},
},
}
def test_refresh_generates_profiles_file(
tmp_path: Path, mock_models_dev_response: dict
) -> None:
"""Test that refresh command generates _profiles.py with merged data."""
data_dir = tmp_path / "data"
data_dir.mkdir()
# Create augmentations file
aug_file = data_dir / "profile_augmentations.toml"
aug_file.write_text("""
provider = "anthropic"
[overrides]
image_url_inputs = true
pdf_inputs = true
""")
# Mock the httpx.get call
mock_response = Mock()
mock_response.json.return_value = mock_models_dev_response
mock_response.raise_for_status = Mock()
with (
patch("langchain_model_profiles.cli.httpx.get", return_value=mock_response),
patch("builtins.input", return_value="y"),
):
refresh("anthropic", data_dir)
# Verify _profiles.py was created
profiles_file = data_dir / "_profiles.py"
assert profiles_file.exists()
# Import and verify content
profiles_content = profiles_file.read_text()
assert "DO NOT EDIT THIS FILE MANUALLY" in profiles_content
assert "PROFILES:" in profiles_content
assert "claude-3-opus" in profiles_content
assert "claude-3-sonnet" in profiles_content
# Check that augmentations were applied
assert "image_url_inputs" in profiles_content
assert "pdf_inputs" in profiles_content
def test_refresh_raises_error_for_missing_provider(
tmp_path: Path, mock_models_dev_response: dict
) -> None:
"""Test that refresh exits with error for non-existent provider."""
data_dir = tmp_path / "data"
data_dir.mkdir()
# Mock the httpx.get call
mock_response = Mock()
mock_response.json.return_value = mock_models_dev_response
mock_response.raise_for_status = Mock()
with (
patch("langchain_model_profiles.cli.httpx.get", return_value=mock_response),
patch("builtins.input", return_value="y"),
):
with pytest.raises(SystemExit) as exc_info:
refresh("nonexistent-provider", data_dir)
assert exc_info.value.code == 1
# Output file should not be created
profiles_file = data_dir / "_profiles.py"
assert not profiles_file.exists()
def test_refresh_works_without_augmentations(
tmp_path: Path, mock_models_dev_response: dict
) -> None:
"""Test that refresh works even without augmentations file."""
data_dir = tmp_path / "data"
data_dir.mkdir()
# Mock the httpx.get call
mock_response = Mock()
mock_response.json.return_value = mock_models_dev_response
mock_response.raise_for_status = Mock()
with (
patch("langchain_model_profiles.cli.httpx.get", return_value=mock_response),
patch("builtins.input", return_value="y"),
):
refresh("anthropic", data_dir)
# Verify _profiles.py was created
profiles_file = data_dir / "_profiles.py"
assert profiles_file.exists()
assert profiles_file.stat().st_size > 0
def test_refresh_aborts_when_user_declines_external_directory(
tmp_path: Path, mock_models_dev_response: dict
) -> None:
"""Test that refresh aborts when user declines writing to external directory."""
data_dir = tmp_path / "data"
data_dir.mkdir()
# Mock the httpx.get call
mock_response = Mock()
mock_response.json.return_value = mock_models_dev_response
mock_response.raise_for_status = Mock()
with (
patch("langchain_model_profiles.cli.httpx.get", return_value=mock_response),
patch("builtins.input", return_value="n"), # User declines
):
with pytest.raises(SystemExit) as exc_info:
refresh("anthropic", data_dir)
assert exc_info.value.code == 1
# Verify _profiles.py was NOT created
profiles_file = data_dir / "_profiles.py"
assert not profiles_file.exists()
def test_refresh_includes_models_defined_only_in_augmentations(
tmp_path: Path, mock_models_dev_response: dict
) -> None:
"""Ensure models that only exist in augmentations are emitted."""
data_dir = tmp_path / "data"
data_dir.mkdir()
aug_file = data_dir / "profile_augmentations.toml"
aug_file.write_text("""
provider = "anthropic"
[overrides."custom-offline-model"]
structured_output = true
pdf_inputs = true
max_input_tokens = 123
""")
mock_response = Mock()
mock_response.json.return_value = mock_models_dev_response
mock_response.raise_for_status = Mock()
with (
patch("langchain_model_profiles.cli.httpx.get", return_value=mock_response),
patch("builtins.input", return_value="y"),
):
refresh("anthropic", data_dir)
profiles_file = data_dir / "_profiles.py"
assert profiles_file.exists()
spec = importlib.util.spec_from_file_location(
"generated_profiles_aug_only", profiles_file
)
assert spec
assert spec.loader
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module) # type: ignore[union-attr]
assert "custom-offline-model" in module._PROFILES # type: ignore[attr-defined]
assert (
module._PROFILES["custom-offline-model"]["structured_output"] is True # type: ignore[index]
)
assert (
module._PROFILES["custom-offline-model"]["max_input_tokens"] == 123 # type: ignore[index]
)
def test_refresh_generates_sorted_profiles(
tmp_path: Path, mock_models_dev_response: dict
) -> None:
"""Test that profiles are sorted alphabetically by model ID."""
data_dir = tmp_path / "data"
data_dir.mkdir()
# Inject models in reverse-alphabetical order so the API response
# is NOT already sorted.
mock_models_dev_response["anthropic"]["models"] = {
"z-model": {
"id": "z-model",
"name": "Z Model",
"tool_call": True,
"limit": {"context": 100000, "output": 2048},
"modalities": {"input": ["text"], "output": ["text"]},
},
"a-model": {
"id": "a-model",
"name": "A Model",
"tool_call": True,
"limit": {"context": 100000, "output": 2048},
"modalities": {"input": ["text"], "output": ["text"]},
},
"m-model": {
"id": "m-model",
"name": "M Model",
"tool_call": True,
"limit": {"context": 100000, "output": 2048},
"modalities": {"input": ["text"], "output": ["text"]},
},
}
mock_response = Mock()
mock_response.json.return_value = mock_models_dev_response
mock_response.raise_for_status = Mock()
with (
patch("langchain_model_profiles.cli.httpx.get", return_value=mock_response),
patch("builtins.input", return_value="y"),
):
refresh("anthropic", data_dir)
profiles_file = data_dir / "_profiles.py"
spec = importlib.util.spec_from_file_location(
"generated_profiles_sorted", profiles_file
)
assert spec
assert spec.loader
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module) # type: ignore[union-attr]
model_ids = list(module._PROFILES.keys()) # type: ignore[attr-defined]
assert model_ids == sorted(model_ids), f"Profile keys are not sorted: {model_ids}"
def test_model_data_to_profile_text_modalities() -> None:
"""Test that text input/output modalities are correctly mapped."""
# Model with text in both input and output
model_with_text = {
"modalities": {"input": ["text", "image"], "output": ["text"]},
"limit": {"context": 128000, "output": 4096},
}
profile = _model_data_to_profile(model_with_text)
assert profile["text_inputs"] is True
assert profile["text_outputs"] is True
# Model without text input (e.g., Whisper-like audio model)
audio_only_model = {
"modalities": {"input": ["audio"], "output": ["text"]},
"limit": {"context": 0, "output": 0},
}
profile = _model_data_to_profile(audio_only_model)
assert profile["text_inputs"] is False
assert profile["text_outputs"] is True
# Model without text output (e.g., image generator)
image_gen_model = {
"modalities": {"input": ["text"], "output": ["image"]},
"limit": {},
}
profile = _model_data_to_profile(image_gen_model)
assert profile["text_inputs"] is True
assert profile["text_outputs"] is False