From f7e87f7ab8a4586a69eea8e59f5df4b05953746b Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Mon, 22 Jun 2026 22:15:29 -0400 Subject: [PATCH] feat(model-profiles): plain-English summary for profile refresh PRs (#38218) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Automated model-profile refresh PRs (e.g. #38210) ship a static template body, so a reviewer has to open *Files changed* and read large blocks of generated data to learn what actually moved. Because the underlying profile data is fully structured, we can describe the changes deterministically — no LLM, no hallucination risk. This adds a `langchain-profiles summarize` subcommand that compares the working-tree `_profiles.py` files against a git ref and renders a skimmable Markdown summary: models added (with a short capability descriptor), models removed, and per-field capability changes (context/output tokens, modalities, tool calling, reasoning, etc.), grouped by provider and capped so huge refreshes stay readable. Profiles are read with `ast.literal_eval` rather than imported, so the generated data file is never executed. Example output for a refresh that adds a model and bumps an output limit: ``` ## Summary of changes **1 added · 0 removed · 1 changed** across 1 provider(s). ### openai **➕ 1 added** - `gpt-6-preview` — 1,000,000 ctx, 128,000 out, text+image+audio in, reasoning, tools **✏️ 1 changed** - `gpt-3.5-turbo`: max output tokens 4,096 → 16,384 ``` Made by [Open SWE](https://openswe.vercel.app/agents/9bcbf182-effc-ba9b-0df3-afac620ad152) --------- Co-authored-by: open-swe[bot] --- .github/workflows/_refresh_model_profiles.yml | 28 +- .../langchain_model_profiles/_summary.py | 475 ++++++++++++++++++ .../langchain_model_profiles/cli.py | 43 ++ .../tests/unit_tests/test_summary.py | 470 +++++++++++++++++ libs/model-profiles/uv.lock | 4 +- 5 files changed, 1017 insertions(+), 3 deletions(-) create mode 100644 libs/model-profiles/langchain_model_profiles/_summary.py create mode 100644 libs/model-profiles/tests/unit_tests/test_summary.py diff --git a/.github/workflows/_refresh_model_profiles.yml b/.github/workflows/_refresh_model_profiles.yml index cc5abbe9494..33bae64d8ab 100644 --- a/.github/workflows/_refresh_model_profiles.yml +++ b/.github/workflows/_refresh_model_profiles.yml @@ -167,6 +167,32 @@ jobs: exit 1 fi + - name: "📝 Build PR body with change summary" + id: pr-body + env: + PROVIDERS_JSON: ${{ inputs.providers }} + PR_BODY: ${{ inputs.pr-body }} + run: | + # The refresh step modified the working tree without committing, so + # comparing against HEAD yields exactly the refresh's changes. + cli_dir="${GITHUB_WORKSPACE}/${{ steps.cli.outputs.dir }}" + body_file="${RUNNER_TEMP}/pr_body.md" + printf '%s\n\n' "${PR_BODY}" > "${body_file}" + # `summarize` builds the whole summary in memory and prints it once, + # so a failure exits non-zero before any stdout reaches the append — + # the body keeps only the static note, never a half-written summary. + if ! uv run --frozen --project "${cli_dir}" \ + langchain-profiles summarize \ + --providers "${PROVIDERS_JSON}" \ + --base-ref HEAD \ + --repo-root "${GITHUB_WORKSPACE}" >> "${body_file}"; then + echo "::warning::Could not generate change summary; see job log." + # Surface the degradation in the PR body too: the warning above only + # lands in the Actions log, which a PR reviewer won't see. + printf '\n> [!NOTE]\n> Automated change summary unavailable — see the workflow run log.\n' >> "${body_file}" + fi + echo "path=${body_file}" >> "$GITHUB_OUTPUT" + - name: "🔑 Generate GitHub App token" id: app-token uses: actions/create-github-app-token@bcd2ba49218906704ab6c1aa796996da409d3eb1 # v3 @@ -182,7 +208,7 @@ jobs: branch: ${{ inputs.pr-branch }} commit-message: ${{ inputs.pr-title }} title: ${{ inputs.pr-title }} - body: ${{ inputs.pr-body }} + body-path: ${{ steps.pr-body.outputs.path }} labels: ${{ inputs.pr-labels }} add-paths: ${{ inputs.add-paths }} diff --git a/libs/model-profiles/langchain_model_profiles/_summary.py b/libs/model-profiles/langchain_model_profiles/_summary.py new file mode 100644 index 00000000000..391a1f384ed --- /dev/null +++ b/libs/model-profiles/langchain_model_profiles/_summary.py @@ -0,0 +1,475 @@ +"""Generate a plain-English summary of model profile changes. + +The `refresh_model_profiles` workflow opens an automated PR whenever the data +behind `_profiles.py` files changes. Those diffs are large blocks of generated +data, so a reviewer otherwise has to open *Files changed* and eyeball raw values +to learn what actually moved. This module turns the structured before/after data +into a skimmable Markdown summary (new models, removed models, and per-field +capability/metadata changes) for the PR body. The summary is generated +deterministically from the data, so there is no risk of an LLM misdescribing it. +""" + +from __future__ import annotations + +import ast +import subprocess +from dataclasses import dataclass, field +from pathlib import Path +from typing import TYPE_CHECKING, Any, NamedTuple, TypedDict + +if TYPE_CHECKING: + from collections.abc import Mapping + + from langchain_core.language_models.model_profile import ( + ModelProfile, + ModelProfileRegistry, + ) + +# Maximum number of bullet rows rendered per section before truncating. +_MAX_ROWS = 25 + +# Human-readable labels for profile fields. +_FIELD_LABELS: dict[str, str] = { + "name": "display name", + "status": "status", + "release_date": "release date", + "last_updated": "last updated", + "open_weights": "open weights", + "max_input_tokens": "max input tokens", + "max_output_tokens": "max output tokens", + "text_inputs": "text input", + "image_inputs": "image input", + "audio_inputs": "audio input", + "pdf_inputs": "PDF input", + "video_inputs": "video input", + "text_outputs": "text output", + "image_outputs": "image output", + "audio_outputs": "audio output", + "video_outputs": "video output", + "reasoning_output": "reasoning", + "tool_calling": "tool calling", + "tool_choice": "tool choice", + "tool_call_streaming": "tool call streaming", + "structured_output": "structured output", + "attachment": "attachments", + "temperature": "temperature control", + "image_url_inputs": "image URL input", + "image_tool_message": "image tool messages", + "pdf_tool_message": "PDF tool messages", +} + +# Token fields rendered with thousands separators. +_TOKEN_FIELDS = frozenset({"max_input_tokens", "max_output_tokens"}) + + +class ProfileParseError(ValueError): + """A `_profiles.py` source exists but its `_PROFILES` data is unparseable. + + Distinguished from a genuinely absent file (which yields an empty mapping) + so that corrupt working-tree or committed data surfaces as an error rather + than being silently diffed as a mass addition or removal of models. + """ + + +class FieldChange(NamedTuple): + """Old and new values for a single changed profile field. + + Named rather than a bare `tuple` so the old → new ordering the renderer + relies on is part of the type instead of a positional convention. Values are + heterogeneous profile data (bool, int, str, or unset), so `Any` is the + honest element type here. + """ + + old: Any + """Value before the refresh, or `None` if absent/unset.""" + + new: Any + """Value after the refresh, or `None` if absent/unset.""" + + +class ProviderEntry(TypedDict): + """One provider's identity and the data dir holding its `_profiles.py`.""" + + provider: str + """Provider identifier (e.g. `'openai'`).""" + + data_dir: str + """Path to the provider's data directory, relative to the repo root.""" + + +@dataclass +class ProfileDiff: + """Structured difference between two sets of model profiles.""" + + added: list[str] = field(default_factory=list) + """Model IDs present after the refresh but not before, sorted.""" + + removed: list[str] = field(default_factory=list) + """Model IDs present before the refresh but not after, sorted.""" + + changed: dict[str, dict[str, FieldChange]] = field(default_factory=dict) + """Per-model field changes, keyed by model ID then field name.""" + + added_profiles: ModelProfileRegistry = field(default_factory=dict) + """Full profiles for each added model, keyed by model ID.""" + + @property + def is_empty(self) -> bool: + """Whether there are no model additions, removals, or field changes.""" + return not (self.added or self.removed or self.changed) + + +def extract_profiles(source: str) -> ModelProfileRegistry: + """Extract the `_PROFILES` mapping from `_profiles.py` source. + + Uses `ast.literal_eval` rather than importing/executing the module so the + generated data file is never run as code. + + Args: + source: Contents of a `_profiles.py` module. An empty string (e.g. a + file absent at a git ref) yields an empty mapping. + + Returns: + The `_PROFILES` mapping, or an empty dict when the source contains no + `_PROFILES` assignment. + + Raises: + ProfileParseError: If the source is present but cannot be parsed, or its + `_PROFILES` value is not a dict literal. Surfacing this rather than + returning `{}` prevents a corrupt file from being misreported as + every model added or removed. + """ + try: + tree = ast.parse(source) + except SyntaxError as e: + msg = f"Could not parse profile source as Python: {e}" + raise ProfileParseError(msg) from e + + for node in tree.body: + if isinstance(node, ast.AnnAssign): + targets: list[ast.expr] = [node.target] + elif isinstance(node, ast.Assign): + targets = list(node.targets) + else: + continue + is_profiles = any( + isinstance(t, ast.Name) and t.id == "_PROFILES" for t in targets + ) + if is_profiles and node.value is not None: + try: + value = ast.literal_eval(node.value) + except (ValueError, SyntaxError) as e: + msg = f"`_PROFILES` is not a literal expression: {e}" + raise ProfileParseError(msg) from e + if not isinstance(value, dict): + msg = f"`_PROFILES` is not a dict (got {type(value).__name__})" + raise ProfileParseError(msg) + return value + return {} + + +def diff_profiles(old: ModelProfileRegistry, new: ModelProfileRegistry) -> ProfileDiff: + """Compute the difference between two `_PROFILES` mappings. + + Args: + old: Profiles before the refresh. + new: Profiles after the refresh. + + Returns: + A `ProfileDiff` describing added, removed, and changed models. + """ + added = sorted(set(new) - set(old)) + removed = sorted(set(old) - set(new)) + + changed: dict[str, dict[str, FieldChange]] = {} + for model_id in sorted(set(old) & set(new)): + # View profiles as plain mappings so we can iterate dynamic keys (the + # `ModelProfile` TypedDict only permits literal-key access). + old_profile: Mapping[str, Any] = old[model_id] + new_profile: Mapping[str, Any] = new[model_id] + fields: dict[str, FieldChange] = {} + for key in sorted(set(old_profile) | set(new_profile)): + old_val = old_profile.get(key) + new_val = new_profile.get(key) + if old_val != new_val: + fields[key] = FieldChange(old_val, new_val) + if fields: + changed[model_id] = fields + + added_profiles = {model_id: new[model_id] for model_id in added} + return ProfileDiff( + added=added, + removed=removed, + changed=changed, + added_profiles=added_profiles, + ) + + +def _format_value(field_name: str, value: Any) -> str: # noqa: ANN401 + """Render a single field value for display.""" + if value is None: + return "unset" + if isinstance(value, bool): + return "yes" if value else "no" + if isinstance(value, int) and field_name in _TOKEN_FIELDS: + return f"{value:,}" + return f"`{value}`" if isinstance(value, str) else str(value) + + +def _describe_field_change( + field_name: str, + old_val: Any, # noqa: ANN401 + new_val: Any, # noqa: ANN401 +) -> str: + """Produce a plain-English phrase for one field change.""" + label = _FIELD_LABELS.get(field_name, field_name) + if isinstance(old_val, bool) or isinstance(new_val, bool): + if new_val and not old_val: + return f"added {label}" + if old_val and not new_val: + return f"removed {label}" + old_str = _format_value(field_name, old_val) + new_str = _format_value(field_name, new_val) + return f"{label} {old_str} → {new_str}" + + +def _describe_new_model(profile: ModelProfile) -> str: + """Produce a short descriptor for a newly added model.""" + parts: list[str] = [] + context = profile.get("max_input_tokens") + if context: + parts.append(f"{context:,} ctx") + output = profile.get("max_output_tokens") + if output: + parts.append(f"{output:,} out") + modalities = [ + name + for key, name in ( + ("image_inputs", "image"), + ("audio_inputs", "audio"), + ("video_inputs", "video"), + ("pdf_inputs", "pdf"), + ) + if profile.get(key) + ] + if modalities: + parts.append("text+" + "+".join(modalities) + " in") + if profile.get("reasoning_output"): + parts.append("reasoning") + if profile.get("tool_calling"): + parts.append("tools") + return ", ".join(parts) + + +def _truncate(rows: list[str]) -> list[str]: + """Cap a list of bullet rows, appending an ellipsis row when truncated.""" + if len(rows) <= _MAX_ROWS: + return rows + hidden = len(rows) - _MAX_ROWS + return [*rows[:_MAX_ROWS], f"- …and {hidden} more"] + + +def render_provider_section(provider: str, diff: ProfileDiff) -> str | None: + """Render the Markdown section for a single provider, or None if unchanged. + + Args: + provider: Provider identifier (e.g. `'openai'`). + diff: The computed `ProfileDiff` for the provider. + + Returns: + Markdown for the provider's changes, or `None` when there are none. + """ + if diff.is_empty: + return None + + lines = [f"### {provider}"] + + if diff.added: + lines.append(f"\n**➕ {len(diff.added)} added**") # noqa: RUF001 + rows = [] + for model_id in diff.added: + descriptor = _describe_new_model(diff.added_profiles[model_id]) + suffix = f" — {descriptor}" if descriptor else "" + rows.append(f"- `{model_id}`{suffix}") + lines.extend(_truncate(rows)) + + if diff.removed: + lines.append(f"\n**➖ {len(diff.removed)} removed**") # noqa: RUF001 + lines.extend(_truncate([f"- `{m}`" for m in diff.removed])) + + if diff.changed: + lines.append(f"\n**✏️ {len(diff.changed)} changed**") + rows = [] + for model_id, fields in diff.changed.items(): + phrases = [ + _describe_field_change(name, change.old, change.new) + for name, change in fields.items() + ] + rows.append(f"- `{model_id}`: " + "; ".join(phrases)) + lines.extend(_truncate(rows)) + + return "\n".join(lines) + + +def build_summary(provider_diffs: dict[str, ProfileDiff]) -> str: + """Assemble the full Markdown summary across all providers. + + Args: + provider_diffs: Mapping of provider name to its `ProfileDiff`. + + Returns: + Markdown summary. When nothing changed, a short note is returned. + """ + sections = [ + section + for provider in sorted(provider_diffs) + if (section := render_provider_section(provider, provider_diffs[provider])) + ] + if not sections: + return "No model profile data changed." + + total_added = sum(len(d.added) for d in provider_diffs.values()) + total_removed = sum(len(d.removed) for d in provider_diffs.values()) + total_changed = sum(len(d.changed) for d in provider_diffs.values()) + headline = ( + f"**{total_added} added · {total_removed} removed · " + f"{total_changed} changed** across {len(sections)} provider(s)." + ) + + return "\n\n".join(["## Summary of changes", headline, *sections]) + + +def _verify_ref(repo_root: Path, ref: str) -> None: + """Confirm `ref` resolves to a commit in `repo_root`. + + Validating once up front lets `_git_show` treat a non-zero exit + unambiguously as "path absent at this ref", rather than conflating a typo'd + ref, an unfetched ref, or a non-repository root with a genuinely new file — + which would otherwise render every existing model as newly added. + + Raises: + RuntimeError: If git is unavailable, `repo_root` is not a repository, or + `ref` cannot be resolved. + """ + try: + result = subprocess.run( # noqa: S603 + [ # noqa: S607 + "git", + "-C", + str(repo_root), + "rev-parse", + "--verify", + "--quiet", + f"{ref}^{{commit}}", + ], + capture_output=True, + text=True, + check=False, + ) + except OSError as e: + msg = f"Could not run git (is it installed and on PATH?): {e}" + raise RuntimeError(msg) from e + if result.returncode != 0: + msg = ( + f"Could not resolve base ref {ref!r} in {repo_root}; " + "is it a valid git ref in this repository?" + ) + raise RuntimeError(msg) + + +def _git_show(repo_root: Path, ref: str, rel_path: str) -> str | None: + """Return file contents at `ref`, or None if the file does not exist there. + + Assumes `ref` has already been validated by `_verify_ref`, so a non-zero + exit here means the path is absent at `ref` rather than a bad ref. + """ + try: + result = subprocess.run( # noqa: S603 + ["git", "-C", str(repo_root), "show", f"{ref}:{rel_path}"], # noqa: S607 + capture_output=True, + text=True, + check=False, + ) + except OSError: + return None + return result.stdout if result.returncode == 0 else None + + +def summarize( + providers: list[ProviderEntry], + *, + base_ref: str = "HEAD", + repo_root: Path | None = None, +) -> str: + """Build a Markdown summary of profile changes vs `base_ref`. + + Args: + providers: List of `{'provider': ..., 'data_dir': ...}` entries, + matching the workflow input. `data_dir` is relative to the repo + root and contains `_profiles.py`. + base_ref: Git ref to compare the working tree against. + repo_root: Repository root. Defaults to the current directory. + + Returns: + Markdown summary suitable for a PR body. + + Raises: + RuntimeError: If `base_ref` cannot be resolved, or a profiles file + exists but cannot be read or parsed. + ValueError: If a `providers` entry is missing a required key. + TypeError: If a `providers` entry's `provider`/`data_dir` is not a + string. + """ + root = (repo_root or Path.cwd()).resolve() + _verify_ref(root, base_ref) + provider_diffs: dict[str, ProfileDiff] = {} + + for entry in providers: + # View the entry as an untrusted mapping: at the CLI boundary it is + # arbitrary parsed JSON, not a guaranteed `ProviderEntry`. + entry_map: Mapping[str, Any] = entry + try: + provider = entry_map["provider"] + data_dir = entry_map["data_dir"] + except (KeyError, TypeError) as e: + msg = ( + f"Invalid provider entry {entry!r}: expected 'provider' and " + f"'data_dir' keys ({e})" + ) + raise ValueError(msg) from e + if not isinstance(provider, str) or not isinstance(data_dir, str): + msg = ( + f"Invalid provider entry {entry!r}: 'provider' and 'data_dir' " + "must be strings" + ) + raise TypeError(msg) + rel_path = f"{data_dir.rstrip('/')}/_profiles.py" + + old_source = _git_show(root, base_ref, rel_path) or "" + new_path = root / rel_path + if new_path.exists(): + try: + new_source = new_path.read_text(encoding="utf-8") + except (OSError, UnicodeDecodeError) as e: + msg = f"Could not read {new_path}: {e}" + raise RuntimeError(msg) from e + else: + new_source = "" + + # A corrupt-but-readable file must surface as an error: extracting `{}` + # from it would otherwise be diffed as every model added (old side) or + # removed (new side), yielding a confident but wrong summary. + try: + old_profiles = extract_profiles(old_source) + except ProfileParseError as e: + msg = f"Profile data for {provider!r} at {base_ref!r} is unparseable: {e}" + raise RuntimeError(msg) from e + try: + new_profiles = extract_profiles(new_source) + except ProfileParseError as e: + msg = f"Profile data for {provider!r} at {new_path} is unparseable: {e}" + raise RuntimeError(msg) from e + + provider_diffs[provider] = diff_profiles(old_profiles, new_profiles) + + return build_summary(provider_diffs) diff --git a/libs/model-profiles/langchain_model_profiles/cli.py b/libs/model-profiles/langchain_model_profiles/cli.py index b7b2c56ea76..456d70c55e7 100644 --- a/libs/model-profiles/langchain_model_profiles/cli.py +++ b/libs/model-profiles/langchain_model_profiles/cli.py @@ -399,10 +399,53 @@ def main() -> None: help="Data directory containing profile_augmentations.toml", ) + # summarize command + summarize_parser = subparsers.add_parser( + "summarize", + help="Summarize profile changes vs a git ref as Markdown (for PR bodies)", + ) + summarize_parser.add_argument( + "--providers", + required=True, + help=( + "JSON array of objects with 'provider' and 'data_dir' keys " + "(data_dir relative to the repo root)." + ), + ) + summarize_parser.add_argument( + "--base-ref", + default="HEAD", + help="Git ref to compare the working tree against (default: HEAD).", + ) + summarize_parser.add_argument( + "--repo-root", + type=Path, + default=None, + help="Repository root (default: current directory).", + ) + args = parser.parse_args() if args.command == "refresh": refresh(args.provider, args.data_dir) + elif args.command == "summarize": + from langchain_model_profiles._summary import summarize + + try: + providers = json.loads(args.providers) + except json.JSONDecodeError as e: + parser.error(f"--providers is not valid JSON: {e}") + + if not isinstance(providers, list): + parser.error("--providers must be a JSON array") + + try: + output = summarize( + providers, base_ref=args.base_ref, repo_root=args.repo_root + ) + except (RuntimeError, ValueError, TypeError) as e: + parser.error(str(e)) + print(output) if __name__ == "__main__": diff --git a/libs/model-profiles/tests/unit_tests/test_summary.py b/libs/model-profiles/tests/unit_tests/test_summary.py new file mode 100644 index 00000000000..f0615aad28b --- /dev/null +++ b/libs/model-profiles/tests/unit_tests/test_summary.py @@ -0,0 +1,470 @@ +"""Tests for the profile change summary generator.""" + +import json +import subprocess +import sys +from pathlib import Path +from typing import TYPE_CHECKING + +import pytest + +from langchain_model_profiles import cli +from langchain_model_profiles._summary import ( + _MAX_ROWS, + FieldChange, + ProfileDiff, + ProfileParseError, + _describe_new_model, + _format_value, + _truncate, + build_summary, + diff_profiles, + extract_profiles, + render_provider_section, + summarize, +) + +if TYPE_CHECKING: + from langchain_core.language_models.model_profile import ( + ModelProfile, + ModelProfileRegistry, + ) + +_OLD_SOURCE = '''"""Auto-generated.""" + +from typing import Any + +_PROFILES: dict[str, dict[str, Any]] = { + "gpt-4": { + "name": "GPT-4", + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "image_inputs": False, + "tool_calling": True, + }, + "old-model": { + "name": "Old", + "max_input_tokens": 1000, + }, +} +''' + +_NEW_SOURCE = '''"""Auto-generated.""" + +from typing import Any + +_PROFILES: dict[str, dict[str, Any]] = { + "gpt-4": { + "name": "GPT-4", + "max_input_tokens": 8192, + "max_output_tokens": 16384, + "image_inputs": True, + "tool_calling": True, + }, + "gpt-5": { + "name": "GPT-5", + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "image_inputs": True, + "reasoning_output": True, + "tool_calling": True, + }, +} +''' + + +def _git(repo: Path, *args: str) -> None: + """Run a git command inside `repo` (test helper).""" + subprocess.run( # noqa: S603 + ["git", "-C", str(repo), *args], # noqa: S607 + check=True, + capture_output=True, + text=True, + ) + + +def _init_repo(repo: Path) -> None: + """Initialize a git repo with a deterministic identity.""" + _git(repo, "init", "-q") + _git(repo, "config", "user.email", "t@example.com") + _git(repo, "config", "user.name", "Test") + + +def _write_profiles(path: Path, source: str) -> None: + """Write a `_profiles.py` file, creating parent directories.""" + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(source) + + +def test_extract_profiles() -> None: + """`_PROFILES` literal is extracted via ast without executing the module.""" + profiles = extract_profiles(_OLD_SOURCE) + assert set(profiles) == {"gpt-4", "old-model"} + assert profiles["gpt-4"]["max_input_tokens"] == 8192 + + +def test_extract_profiles_handles_missing_or_invalid() -> None: + """Absent `_PROFILES` yields `{}`; present-but-unparseable sources raise.""" + # No `_PROFILES` assignment, and an empty file, are both legitimately empty. + assert extract_profiles("x = 1") == {} + assert extract_profiles("") == {} + # A syntactically broken file is corrupt, not empty. + with pytest.raises(ProfileParseError): + extract_profiles("def (:") + # A non-literal or non-dict `_PROFILES` is corrupt too. + with pytest.raises(ProfileParseError): + extract_profiles("_PROFILES = some_function()") + with pytest.raises(ProfileParseError): + extract_profiles("_PROFILES = [1, 2, 3]") + + +def test_diff_profiles() -> None: + """Diff reports added, removed, and per-field changes.""" + diff = diff_profiles(extract_profiles(_OLD_SOURCE), extract_profiles(_NEW_SOURCE)) + assert diff.added == ["gpt-5"] + assert diff.removed == ["old-model"] + assert set(diff.changed) == {"gpt-4"} + assert diff.changed["gpt-4"]["max_output_tokens"] == (4096, 16384) + assert diff.changed["gpt-4"]["image_inputs"] == (False, True) + assert diff.added_profiles["gpt-5"]["max_input_tokens"] == 400000 + + +def test_diff_profiles_no_changes() -> None: + """Identical inputs produce an empty diff.""" + profiles = extract_profiles(_OLD_SOURCE) + diff = diff_profiles(profiles, profiles) + assert diff.is_empty + + +def test_render_provider_section_content() -> None: + """Rendered section describes additions, removals, and field changes.""" + diff = diff_profiles(extract_profiles(_OLD_SOURCE), extract_profiles(_NEW_SOURCE)) + section = render_provider_section("openai", diff) + assert section is not None + assert "### openai" in section + assert "1 added" in section + assert "`gpt-5`" in section + assert "400,000 ctx" in section + assert "reasoning" in section + assert "1 removed" in section + assert "`old-model`" in section + assert "1 changed" in section + assert "max output tokens 4,096 → 16,384" in section + assert "added image input" in section + + +def test_render_provider_section_empty() -> None: + """An empty diff renders no section.""" + assert render_provider_section("openai", ProfileDiff()) is None + + +def test_build_summary_headline() -> None: + """The summary leads with a header and an aggregate headline.""" + diff = diff_profiles(extract_profiles(_OLD_SOURCE), extract_profiles(_NEW_SOURCE)) + summary = build_summary({"openai": diff}) + assert summary.startswith("## Summary of changes") + assert "1 added" in summary + assert "1 removed" in summary + assert "1 changed" in summary + + +def test_build_summary_no_changes() -> None: + """An all-empty diff produces a short no-change note.""" + assert build_summary({"openai": ProfileDiff()}) == "No model profile data changed." + + +def test_truncation() -> None: + """Long lists are truncated with a trailing count of hidden rows.""" + new: ModelProfileRegistry = {f"model-{i}": {"name": f"m{i}"} for i in range(40)} + diff = diff_profiles({}, new) + section = render_provider_section("openai", diff) + assert section is not None + assert "…and 15 more" in section + + +def test_summarize_against_git(tmp_path: Path) -> None: + """Summarize compares the working tree against a committed baseline.""" + repo = tmp_path + _init_repo(repo) + + data_dir = "libs/partners/openai/data" + profiles_path = repo / data_dir / "_profiles.py" + _write_profiles(profiles_path, _OLD_SOURCE) + _git(repo, "add", "-A") + _git(repo, "commit", "-q", "-m", "init") + + # Simulate a refresh by overwriting the working-tree file. + profiles_path.write_text(_NEW_SOURCE) + + summary = summarize([{"provider": "openai", "data_dir": data_dir}], repo_root=repo) + assert "## Summary of changes" in summary + assert "`gpt-5`" in summary + assert "`old-model`" in summary + # The changed-field path is exercised end-to-end, not just at the unit layer. + assert "max output tokens 4,096 → 16,384" in summary + + +def test_summarize_new_provider_file(tmp_path: Path) -> None: + """A brand-new profiles file is treated as all-added.""" + repo = tmp_path + _init_repo(repo) + (repo / "README.md").write_text("x") + _git(repo, "add", "-A") + _git(repo, "commit", "-q", "-m", "init") + + data_dir = "libs/partners/new/data" + _write_profiles(repo / data_dir / "_profiles.py", _NEW_SOURCE) + + summary = summarize([{"provider": "new", "data_dir": data_dir}], repo_root=repo) + assert "2 added" in summary + + +def test_field_change_is_tuple() -> None: + """`FieldChange` unpacks and compares like a plain (old, new) tuple.""" + change = FieldChange(1, 2) + # Access the named fields before the tuple comparison: `== (1, 2)` would + # otherwise narrow `change` to a plain `tuple` for the rest of the scope. + assert change.old == 1 + assert change.new == 2 + assert change == (1, 2) + + +def test_format_value_variants() -> None: + """Each `_format_value` branch renders the expected string.""" + assert _format_value("x", None) == "unset" + assert _format_value("tool_calling", True) == "yes" # noqa: FBT003 + assert _format_value("tool_calling", False) == "no" # noqa: FBT003 + assert _format_value("max_input_tokens", 200000) == "200,000" + # Plain int outside the token fields is rendered without separators. + assert _format_value("foo", 42) == "42" + # Floats fall through to str(). + assert _format_value("temperature", 1.5) == "1.5" + assert _format_value("name", "GPT") == "`GPT`" + + +def test_render_non_bool_field_change() -> None: + """Non-boolean field changes render an `old → new` phrase.""" + old: ModelProfileRegistry = {"m": {"status": "active", "name": "M"}} + new: ModelProfileRegistry = {"m": {"status": "deprecated", "name": "M2"}} + section = render_provider_section("openai", diff_profiles(old, new)) + assert section is not None + assert "status `active` → `deprecated`" in section + assert "display name `M` → `M2`" in section + + +def test_render_removed_bool_field_change() -> None: + """A boolean field flipped off renders a `removed