mirror of
https://github.com/hwchase17/langchain.git
synced 2026-04-11 15:03:36 +00:00
PR #35788 added 7 new fields to the `langchain-profiles` CLI output (`name`, `status`, `release_date`, `last_updated`, `open_weights`, `attachment`, `temperature`) but didn't update `ModelProfile` in `langchain-core`. Partner packages like `langchain-aws` that set `extra="forbid"` on their Pydantic models hit `extra_forbidden` validation errors when Pydantic encountered undeclared TypedDict keys at construction time. This adds the missing fields, makes `ModelProfile` forward-compatible, provides a base-class hook so partners can stop duplicating model-profile validator boilerplate, migrates all in-repo partners to the new hook, and adds runtime + CI-time warnings for schema drift. ## Changes ### `langchain-core` - Add `__pydantic_config__ = ConfigDict(extra="allow")` to `ModelProfile` so unknown profile keys pass Pydantic validation even on models with `extra="forbid"` — forward-compatibility for when the CLI schema evolves ahead of core - Declare the 7 missing fields on `ModelProfile`: `name`, `status`, `release_date`, `last_updated`, `open_weights` (metadata) and `attachment`, `temperature` (capabilities) - Add `_warn_unknown_profile_keys()` in `model_profile.py` — emits a `UserWarning` when a profile dict contains keys not in `ModelProfile`, suggesting a core upgrade. Wrapped in a bare `except` so introspection failures never crash model construction - Add `BaseChatModel._resolve_model_profile()` hook that returns `None` by default. Partners can override this single method instead of redefining the full `_set_model_profile` validator — the base validator calls it automatically - Add `BaseChatModel._check_profile_keys` as a separate `model_validator` that calls `_warn_unknown_profile_keys`. Uses a distinct method name so partner overrides of `_set_model_profile` don't inadvertently suppress the check ### `langchain-profiles` CLI - Add `_warn_undeclared_profile_keys()` to the CLI (`cli.py`), called after merging augmentations in `refresh()` — warns at profile-generation time (not just runtime) when emitted keys aren't declared in `ModelProfile`. Gracefully skips if `langchain-core` isn't installed - Add guard test `test_model_data_to_profile_keys_subset_of_model_profile` in model-profiles — feeds a fully-populated model dict to `_model_data_to_profile()` and asserts every emitted key exists in `ModelProfile.__annotations__`. CI fails before any release if someone adds a CLI field without updating the TypedDict ### Partner packages - Migrate all 10 in-repo partners to the `_resolve_model_profile()` hook, replacing duplicated `@model_validator` / `_set_model_profile` overrides: anthropic, deepseek, fireworks, groq, huggingface, mistralai, openai (base + azure), openrouter, perplexity, xai - Anthropic retains custom logic (context-1m beta → `max_input_tokens` override); all others reduce to a one-liner - Add `pr_lint.yml` scope for the new `model-profiles` package
475 lines
16 KiB
Python
475 lines
16 KiB
Python
"""Tests for CLI functionality."""
|
|
|
|
import importlib.util
|
|
import warnings
|
|
from pathlib import Path
|
|
from typing import Any, get_type_hints
|
|
from unittest.mock import Mock, patch
|
|
|
|
import pytest
|
|
from langchain_core.language_models.model_profile import ModelProfile
|
|
|
|
from langchain_model_profiles.cli import (
|
|
_model_data_to_profile,
|
|
_warn_undeclared_profile_keys,
|
|
refresh,
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_models_dev_response() -> dict:
|
|
"""Create a mock response from models.dev API."""
|
|
return {
|
|
"anthropic": {
|
|
"id": "anthropic",
|
|
"name": "Anthropic",
|
|
"models": {
|
|
"claude-3-opus": {
|
|
"id": "claude-3-opus",
|
|
"name": "Claude 3 Opus",
|
|
"tool_call": True,
|
|
"limit": {"context": 200000, "output": 4096},
|
|
"modalities": {"input": ["text", "image"], "output": ["text"]},
|
|
},
|
|
"claude-3-sonnet": {
|
|
"id": "claude-3-sonnet",
|
|
"name": "Claude 3 Sonnet",
|
|
"tool_call": True,
|
|
"limit": {"context": 200000, "output": 4096},
|
|
"modalities": {"input": ["text", "image"], "output": ["text"]},
|
|
},
|
|
},
|
|
},
|
|
"openai": {
|
|
"id": "openai",
|
|
"name": "OpenAI",
|
|
"models": {
|
|
"gpt-4": {
|
|
"id": "gpt-4",
|
|
"name": "GPT-4",
|
|
"tool_call": True,
|
|
"limit": {"context": 8192, "output": 4096},
|
|
"modalities": {"input": ["text"], "output": ["text"]},
|
|
}
|
|
},
|
|
},
|
|
}
|
|
|
|
|
|
def test_refresh_generates_profiles_file(
|
|
tmp_path: Path, mock_models_dev_response: dict
|
|
) -> None:
|
|
"""Test that refresh command generates _profiles.py with merged data."""
|
|
data_dir = tmp_path / "data"
|
|
data_dir.mkdir()
|
|
|
|
# Create augmentations file
|
|
aug_file = data_dir / "profile_augmentations.toml"
|
|
aug_file.write_text("""
|
|
provider = "anthropic"
|
|
|
|
[overrides]
|
|
image_url_inputs = true
|
|
pdf_inputs = true
|
|
""")
|
|
|
|
# Mock the httpx.get call
|
|
mock_response = Mock()
|
|
mock_response.json.return_value = mock_models_dev_response
|
|
mock_response.raise_for_status = Mock()
|
|
|
|
with (
|
|
patch("langchain_model_profiles.cli.httpx.get", return_value=mock_response),
|
|
patch("builtins.input", return_value="y"),
|
|
):
|
|
refresh("anthropic", data_dir)
|
|
|
|
# Verify _profiles.py was created
|
|
profiles_file = data_dir / "_profiles.py"
|
|
assert profiles_file.exists()
|
|
|
|
# Import and verify content
|
|
profiles_content = profiles_file.read_text()
|
|
assert "DO NOT EDIT THIS FILE MANUALLY" in profiles_content
|
|
assert "PROFILES:" in profiles_content
|
|
assert "claude-3-opus" in profiles_content
|
|
assert "claude-3-sonnet" in profiles_content
|
|
|
|
# Check that augmentations were applied
|
|
assert "image_url_inputs" in profiles_content
|
|
assert "pdf_inputs" in profiles_content
|
|
|
|
|
|
def test_refresh_raises_error_for_missing_provider(
|
|
tmp_path: Path, mock_models_dev_response: dict
|
|
) -> None:
|
|
"""Test that refresh exits with error for non-existent provider."""
|
|
data_dir = tmp_path / "data"
|
|
data_dir.mkdir()
|
|
|
|
# Mock the httpx.get call
|
|
mock_response = Mock()
|
|
mock_response.json.return_value = mock_models_dev_response
|
|
mock_response.raise_for_status = Mock()
|
|
|
|
with (
|
|
patch("langchain_model_profiles.cli.httpx.get", return_value=mock_response),
|
|
patch("builtins.input", return_value="y"),
|
|
):
|
|
with pytest.raises(SystemExit) as exc_info:
|
|
refresh("nonexistent-provider", data_dir)
|
|
|
|
assert exc_info.value.code == 1
|
|
|
|
# Output file should not be created
|
|
profiles_file = data_dir / "_profiles.py"
|
|
assert not profiles_file.exists()
|
|
|
|
|
|
def test_refresh_works_without_augmentations(
|
|
tmp_path: Path, mock_models_dev_response: dict
|
|
) -> None:
|
|
"""Test that refresh works even without augmentations file."""
|
|
data_dir = tmp_path / "data"
|
|
data_dir.mkdir()
|
|
|
|
# Mock the httpx.get call
|
|
mock_response = Mock()
|
|
mock_response.json.return_value = mock_models_dev_response
|
|
mock_response.raise_for_status = Mock()
|
|
|
|
with (
|
|
patch("langchain_model_profiles.cli.httpx.get", return_value=mock_response),
|
|
patch("builtins.input", return_value="y"),
|
|
):
|
|
refresh("anthropic", data_dir)
|
|
|
|
# Verify _profiles.py was created
|
|
profiles_file = data_dir / "_profiles.py"
|
|
assert profiles_file.exists()
|
|
assert profiles_file.stat().st_size > 0
|
|
|
|
|
|
def test_refresh_aborts_when_user_declines_external_directory(
|
|
tmp_path: Path, mock_models_dev_response: dict
|
|
) -> None:
|
|
"""Test that refresh aborts when user declines writing to external directory."""
|
|
data_dir = tmp_path / "data"
|
|
data_dir.mkdir()
|
|
|
|
# Mock the httpx.get call
|
|
mock_response = Mock()
|
|
mock_response.json.return_value = mock_models_dev_response
|
|
mock_response.raise_for_status = Mock()
|
|
|
|
with (
|
|
patch("langchain_model_profiles.cli.httpx.get", return_value=mock_response),
|
|
patch("builtins.input", return_value="n"), # User declines
|
|
):
|
|
with pytest.raises(SystemExit) as exc_info:
|
|
refresh("anthropic", data_dir)
|
|
|
|
assert exc_info.value.code == 1
|
|
|
|
# Verify _profiles.py was NOT created
|
|
profiles_file = data_dir / "_profiles.py"
|
|
assert not profiles_file.exists()
|
|
|
|
|
|
def test_refresh_includes_models_defined_only_in_augmentations(
|
|
tmp_path: Path, mock_models_dev_response: dict
|
|
) -> None:
|
|
"""Ensure models that only exist in augmentations are emitted."""
|
|
data_dir = tmp_path / "data"
|
|
data_dir.mkdir()
|
|
|
|
aug_file = data_dir / "profile_augmentations.toml"
|
|
aug_file.write_text("""
|
|
provider = "anthropic"
|
|
|
|
[overrides."custom-offline-model"]
|
|
structured_output = true
|
|
pdf_inputs = true
|
|
max_input_tokens = 123
|
|
""")
|
|
|
|
mock_response = Mock()
|
|
mock_response.json.return_value = mock_models_dev_response
|
|
mock_response.raise_for_status = Mock()
|
|
|
|
with (
|
|
patch("langchain_model_profiles.cli.httpx.get", return_value=mock_response),
|
|
patch("builtins.input", return_value="y"),
|
|
):
|
|
refresh("anthropic", data_dir)
|
|
|
|
profiles_file = data_dir / "_profiles.py"
|
|
assert profiles_file.exists()
|
|
|
|
spec = importlib.util.spec_from_file_location(
|
|
"generated_profiles_aug_only", profiles_file
|
|
)
|
|
assert spec
|
|
assert spec.loader
|
|
module = importlib.util.module_from_spec(spec)
|
|
spec.loader.exec_module(module) # type: ignore[union-attr]
|
|
|
|
assert "custom-offline-model" in module._PROFILES # type: ignore[attr-defined]
|
|
assert (
|
|
module._PROFILES["custom-offline-model"]["structured_output"] is True # type: ignore[index]
|
|
)
|
|
assert (
|
|
module._PROFILES["custom-offline-model"]["max_input_tokens"] == 123 # type: ignore[index]
|
|
)
|
|
|
|
|
|
def test_refresh_generates_sorted_profiles(
|
|
tmp_path: Path, mock_models_dev_response: dict
|
|
) -> None:
|
|
"""Test that profiles are sorted alphabetically by model ID."""
|
|
data_dir = tmp_path / "data"
|
|
data_dir.mkdir()
|
|
|
|
# Inject models in reverse-alphabetical order so the API response
|
|
# is NOT already sorted.
|
|
mock_models_dev_response["anthropic"]["models"] = {
|
|
"z-model": {
|
|
"id": "z-model",
|
|
"name": "Z Model",
|
|
"tool_call": True,
|
|
"limit": {"context": 100000, "output": 2048},
|
|
"modalities": {"input": ["text"], "output": ["text"]},
|
|
},
|
|
"a-model": {
|
|
"id": "a-model",
|
|
"name": "A Model",
|
|
"tool_call": True,
|
|
"limit": {"context": 100000, "output": 2048},
|
|
"modalities": {"input": ["text"], "output": ["text"]},
|
|
},
|
|
"m-model": {
|
|
"id": "m-model",
|
|
"name": "M Model",
|
|
"tool_call": True,
|
|
"limit": {"context": 100000, "output": 2048},
|
|
"modalities": {"input": ["text"], "output": ["text"]},
|
|
},
|
|
}
|
|
|
|
mock_response = Mock()
|
|
mock_response.json.return_value = mock_models_dev_response
|
|
mock_response.raise_for_status = Mock()
|
|
|
|
with (
|
|
patch("langchain_model_profiles.cli.httpx.get", return_value=mock_response),
|
|
patch("builtins.input", return_value="y"),
|
|
):
|
|
refresh("anthropic", data_dir)
|
|
|
|
profiles_file = data_dir / "_profiles.py"
|
|
spec = importlib.util.spec_from_file_location(
|
|
"generated_profiles_sorted", profiles_file
|
|
)
|
|
assert spec
|
|
assert spec.loader
|
|
module = importlib.util.module_from_spec(spec)
|
|
spec.loader.exec_module(module) # type: ignore[union-attr]
|
|
|
|
model_ids = list(module._PROFILES.keys()) # type: ignore[attr-defined]
|
|
assert model_ids == sorted(model_ids), f"Profile keys are not sorted: {model_ids}"
|
|
|
|
|
|
def test_model_data_to_profile_captures_all_models_dev_fields() -> None:
|
|
"""Test that all models.dev fields are captured in the profile."""
|
|
model_data = {
|
|
"id": "claude-opus-4-6",
|
|
"name": "Claude Opus 4.6",
|
|
"status": "deprecated",
|
|
"release_date": "2025-06-01",
|
|
"last_updated": "2025-07-01",
|
|
"open_weights": False,
|
|
"reasoning": True,
|
|
"tool_call": True,
|
|
"tool_choice": True,
|
|
"structured_output": True,
|
|
"attachment": True,
|
|
"temperature": True,
|
|
"limit": {"context": 200000, "output": 64000},
|
|
"modalities": {
|
|
"input": ["text", "image", "pdf"],
|
|
"output": ["text"],
|
|
},
|
|
}
|
|
profile = _model_data_to_profile(model_data)
|
|
|
|
# Metadata
|
|
assert profile["name"] == "Claude Opus 4.6"
|
|
assert profile["status"] == "deprecated"
|
|
assert profile["release_date"] == "2025-06-01"
|
|
assert profile["last_updated"] == "2025-07-01"
|
|
assert profile["open_weights"] is False
|
|
|
|
# Limits
|
|
assert profile["max_input_tokens"] == 200000
|
|
assert profile["max_output_tokens"] == 64000
|
|
|
|
# Capabilities
|
|
assert profile["reasoning_output"] is True
|
|
assert profile["tool_calling"] is True
|
|
assert profile["tool_choice"] is True
|
|
assert profile["structured_output"] is True
|
|
assert profile["attachment"] is True
|
|
|
|
# Modalities
|
|
assert profile["text_inputs"] is True
|
|
assert profile["image_inputs"] is True
|
|
assert profile["pdf_inputs"] is True
|
|
assert profile["text_outputs"] is True
|
|
|
|
|
|
def test_model_data_to_profile_omits_absent_fields() -> None:
|
|
"""Test that fields not present in source data are omitted (not None)."""
|
|
minimal = {
|
|
"modalities": {"input": ["text"], "output": ["text"]},
|
|
"limit": {"context": 8192, "output": 4096},
|
|
}
|
|
profile = _model_data_to_profile(minimal)
|
|
|
|
assert "status" not in profile
|
|
assert "family" not in profile
|
|
assert "knowledge_cutoff" not in profile
|
|
assert "cost_input" not in profile
|
|
assert "interleaved" not in profile
|
|
assert None not in profile.values()
|
|
|
|
|
|
def test_model_data_to_profile_text_modalities() -> None:
|
|
"""Test that text input/output modalities are correctly mapped."""
|
|
# Model with text in both input and output
|
|
model_with_text = {
|
|
"modalities": {"input": ["text", "image"], "output": ["text"]},
|
|
"limit": {"context": 128000, "output": 4096},
|
|
}
|
|
profile = _model_data_to_profile(model_with_text)
|
|
assert profile["text_inputs"] is True
|
|
assert profile["text_outputs"] is True
|
|
|
|
# Model without text input (e.g., Whisper-like audio model)
|
|
audio_only_model = {
|
|
"modalities": {"input": ["audio"], "output": ["text"]},
|
|
"limit": {"context": 0, "output": 0},
|
|
}
|
|
profile = _model_data_to_profile(audio_only_model)
|
|
assert profile["text_inputs"] is False
|
|
assert profile["text_outputs"] is True
|
|
|
|
# Model without text output (e.g., image generator)
|
|
image_gen_model = {
|
|
"modalities": {"input": ["text"], "output": ["image"]},
|
|
"limit": {},
|
|
}
|
|
profile = _model_data_to_profile(image_gen_model)
|
|
assert profile["text_inputs"] is True
|
|
assert profile["text_outputs"] is False
|
|
|
|
|
|
def test_model_data_to_profile_keys_subset_of_model_profile() -> None:
|
|
"""All CLI-emitted profile keys must be declared in `ModelProfile`."""
|
|
# Build a model_data dict with every possible field populated so
|
|
# _model_data_to_profile includes all keys it can emit.
|
|
model_data = {
|
|
"id": "test-model",
|
|
"name": "Test Model",
|
|
"status": "active",
|
|
"release_date": "2025-01-01",
|
|
"last_updated": "2025-01-01",
|
|
"open_weights": True,
|
|
"reasoning": True,
|
|
"tool_call": True,
|
|
"tool_choice": True,
|
|
"structured_output": True,
|
|
"attachment": True,
|
|
"temperature": True,
|
|
"image_url_inputs": True,
|
|
"image_tool_message": True,
|
|
"pdf_tool_message": True,
|
|
"pdf_inputs": True,
|
|
"limit": {"context": 100000, "output": 4096},
|
|
"modalities": {
|
|
"input": ["text", "image", "audio", "video", "pdf"],
|
|
"output": ["text", "image", "audio", "video"],
|
|
},
|
|
}
|
|
|
|
profile = _model_data_to_profile(model_data)
|
|
declared_fields = set(get_type_hints(ModelProfile).keys())
|
|
emitted_fields = set(profile.keys())
|
|
extra = emitted_fields - declared_fields
|
|
|
|
assert not extra, (
|
|
f"CLI emits profile keys not declared in ModelProfile: {sorted(extra)}. "
|
|
f"Add these fields to langchain_core.language_models.model_profile."
|
|
f"ModelProfile and release langchain-core before refreshing partner "
|
|
f"profiles."
|
|
)
|
|
|
|
|
|
class TestWarnUndeclaredProfileKeys:
|
|
"""Tests for _warn_undeclared_profile_keys."""
|
|
|
|
def test_warns_on_undeclared_keys(self) -> None:
|
|
"""Extra keys across profiles trigger a single warning."""
|
|
profiles: dict[str, dict[str, Any]] = {
|
|
"model-a": {"max_input_tokens": 100, "future_key": True},
|
|
"model-b": {"another_key": "val"},
|
|
}
|
|
with warnings.catch_warnings(record=True) as w:
|
|
warnings.simplefilter("always")
|
|
_warn_undeclared_profile_keys(profiles)
|
|
|
|
assert len(w) == 1
|
|
assert "another_key" in str(w[0].message)
|
|
assert "future_key" in str(w[0].message)
|
|
|
|
def test_silent_on_declared_keys_only(self) -> None:
|
|
"""No warning when all keys are declared in ModelProfile."""
|
|
profiles: dict[str, dict[str, Any]] = {
|
|
"model-a": {"max_input_tokens": 100, "tool_calling": True},
|
|
}
|
|
with warnings.catch_warnings(record=True) as w:
|
|
warnings.simplefilter("always")
|
|
_warn_undeclared_profile_keys(profiles)
|
|
|
|
assert len(w) == 0
|
|
|
|
def test_silent_when_langchain_core_not_installed(self) -> None:
|
|
"""Gracefully skips when langchain-core is not importable."""
|
|
import sys
|
|
|
|
profiles: dict[str, dict[str, Any]] = {
|
|
"model-a": {"unknown": True},
|
|
}
|
|
with (
|
|
patch.dict(
|
|
sys.modules,
|
|
{"langchain_core.language_models.model_profile": None},
|
|
),
|
|
warnings.catch_warnings(record=True) as w,
|
|
):
|
|
warnings.simplefilter("always")
|
|
_warn_undeclared_profile_keys(profiles)
|
|
|
|
undeclared_warnings = [x for x in w if "not declared" in str(x.message)]
|
|
assert len(undeclared_warnings) == 0
|
|
|
|
def test_survives_get_type_hints_failure(self) -> None:
|
|
"""Gracefully handles TypeError from get_type_hints."""
|
|
profiles: dict[str, dict[str, Any]] = {
|
|
"model-a": {"unknown": True},
|
|
}
|
|
with patch(
|
|
"langchain_model_profiles.cli.get_type_hints",
|
|
side_effect=TypeError("broken"),
|
|
):
|
|
_warn_undeclared_profile_keys(profiles)
|