feat(model-profiles): plain-English summary for profile refresh PRs (#38218)

Automated model-profile refresh PRs (e.g. #38210) ship a static template body, so a reviewer has to open *Files changed* and read large blocks of generated data to learn what actually moved. Because the underlying profile data is fully structured, we can describe the changes deterministically — no LLM, no hallucination risk. This adds a `langchain-profiles summarize` subcommand that compares the working-tree `_profiles.py` files against a git ref and renders a skimmable Markdown summary: models added (with a short capability descriptor), models removed, and per-field capability changes (context/output tokens, modalities, tool calling, reasoning, etc.), grouped by provider and capped so huge refreshes stay readable. Profiles are read with `ast.literal_eval` rather than imported, so the generated data file is never executed. Example output for a refresh that adds a model and bumps an output limit: ``` ## Summary of changes **1 added · 0 removed · 1 changed** across 1 provider(s). ### openai **➕ 1 added** - `gpt-6-preview` — 1,000,000 ctx, 128,000 out, text+image+audio in, reasoning, tools **✏️ 1 changed** - `gpt-3.5-turbo`: max output tokens 4,096 → 16,384 ``` Made by [Open SWE](https://openswe.vercel.app/agents/9bcbf182-effc-ba9b-0df3-afac620ad152) --------- Co-authored-by: open-swe[bot] <open-swe@users.noreply.github.com>
2026-07-01 14:47:02 +00:00 · 2026-06-22 22:15:29 -04:00
parent c669da7662
commit f7e87f7ab8
5 changed files with 1017 additions and 3 deletions
--- a/.github/workflows/_refresh_model_profiles.yml
+++ b/.github/workflows/_refresh_model_profiles.yml
@@ -167,6 +167,32 @@ jobs:
            exit 1
          fi

+      - name: "📝 Build PR body with change summary"
+        id: pr-body
+        env:
+          PROVIDERS_JSON: ${{ inputs.providers }}
+          PR_BODY: ${{ inputs.pr-body }}
+        run: |
+          # The refresh step modified the working tree without committing, so
+          # comparing against HEAD yields exactly the refresh's changes.
+          cli_dir="${GITHUB_WORKSPACE}/${{ steps.cli.outputs.dir }}"
+          body_file="${RUNNER_TEMP}/pr_body.md"
+          printf '%s\n\n' "${PR_BODY}" > "${body_file}"
+          # `summarize` builds the whole summary in memory and prints it once,
+          # so a failure exits non-zero before any stdout reaches the append —
+          # the body keeps only the static note, never a half-written summary.
+          if ! uv run --frozen --project "${cli_dir}" \
+            langchain-profiles summarize \
+            --providers "${PROVIDERS_JSON}" \
+            --base-ref HEAD \
+            --repo-root "${GITHUB_WORKSPACE}" >> "${body_file}"; then
+            echo "::warning::Could not generate change summary; see job log."
+            # Surface the degradation in the PR body too: the warning above only
+            # lands in the Actions log, which a PR reviewer won't see.
+            printf '\n> [!NOTE]\n> Automated change summary unavailable — see the workflow run log.\n' >> "${body_file}"
+          fi
+          echo "path=${body_file}" >> "$GITHUB_OUTPUT"
+
      - name: "🔑 Generate GitHub App token"
        id: app-token
        uses: actions/create-github-app-token@bcd2ba49218906704ab6c1aa796996da409d3eb1 # v3
@@ -182,7 +208,7 @@ jobs:
          branch: ${{ inputs.pr-branch }}
          commit-message: ${{ inputs.pr-title }}
          title: ${{ inputs.pr-title }}
-          body: ${{ inputs.pr-body }}
+          body-path: ${{ steps.pr-body.outputs.path }}
          labels: ${{ inputs.pr-labels }}
          add-paths: ${{ inputs.add-paths }}

--- a/libs/model-profiles/langchain_model_profiles/_summary.py
+++ b/libs/model-profiles/langchain_model_profiles/_summary.py
@@ -0,0 +1,475 @@
+"""Generate a plain-English summary of model profile changes.
+
+The `refresh_model_profiles` workflow opens an automated PR whenever the data
+behind `_profiles.py` files changes. Those diffs are large blocks of generated
+data, so a reviewer otherwise has to open *Files changed* and eyeball raw values
+to learn what actually moved. This module turns the structured before/after data
+into a skimmable Markdown summary (new models, removed models, and per-field
+capability/metadata changes) for the PR body. The summary is generated
+deterministically from the data, so there is no risk of an LLM misdescribing it.
+"""
+
+from __future__ import annotations
+
+import ast
+import subprocess
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import TYPE_CHECKING, Any, NamedTuple, TypedDict
+
+if TYPE_CHECKING:
+    from collections.abc import Mapping
+
+    from langchain_core.language_models.model_profile import (
+        ModelProfile,
+        ModelProfileRegistry,
+    )
+
+# Maximum number of bullet rows rendered per section before truncating.
+_MAX_ROWS = 25
+
+# Human-readable labels for profile fields.
+_FIELD_LABELS: dict[str, str] = {
+    "name": "display name",
+    "status": "status",
+    "release_date": "release date",
+    "last_updated": "last updated",
+    "open_weights": "open weights",
+    "max_input_tokens": "max input tokens",
+    "max_output_tokens": "max output tokens",
+    "text_inputs": "text input",
+    "image_inputs": "image input",
+    "audio_inputs": "audio input",
+    "pdf_inputs": "PDF input",
+    "video_inputs": "video input",
+    "text_outputs": "text output",
+    "image_outputs": "image output",
+    "audio_outputs": "audio output",
+    "video_outputs": "video output",
+    "reasoning_output": "reasoning",
+    "tool_calling": "tool calling",
+    "tool_choice": "tool choice",
+    "tool_call_streaming": "tool call streaming",
+    "structured_output": "structured output",
+    "attachment": "attachments",
+    "temperature": "temperature control",
+    "image_url_inputs": "image URL input",
+    "image_tool_message": "image tool messages",
+    "pdf_tool_message": "PDF tool messages",
+}
+
+# Token fields rendered with thousands separators.
+_TOKEN_FIELDS = frozenset({"max_input_tokens", "max_output_tokens"})
+
+
+class ProfileParseError(ValueError):
+    """A `_profiles.py` source exists but its `_PROFILES` data is unparseable.
+
+    Distinguished from a genuinely absent file (which yields an empty mapping)
+    so that corrupt working-tree or committed data surfaces as an error rather
+    than being silently diffed as a mass addition or removal of models.
+    """
+
+
+class FieldChange(NamedTuple):
+    """Old and new values for a single changed profile field.
+
+    Named rather than a bare `tuple` so the old → new ordering the renderer
+    relies on is part of the type instead of a positional convention. Values are
+    heterogeneous profile data (bool, int, str, or unset), so `Any` is the
+    honest element type here.
+    """
+
+    old: Any
+    """Value before the refresh, or `None` if absent/unset."""
+
+    new: Any
+    """Value after the refresh, or `None` if absent/unset."""
+
+
+class ProviderEntry(TypedDict):
+    """One provider's identity and the data dir holding its `_profiles.py`."""
+
+    provider: str
+    """Provider identifier (e.g. `'openai'`)."""
+
+    data_dir: str
+    """Path to the provider's data directory, relative to the repo root."""
+
+
+@dataclass
+class ProfileDiff:
+    """Structured difference between two sets of model profiles."""
+
+    added: list[str] = field(default_factory=list)
+    """Model IDs present after the refresh but not before, sorted."""
+
+    removed: list[str] = field(default_factory=list)
+    """Model IDs present before the refresh but not after, sorted."""
+
+    changed: dict[str, dict[str, FieldChange]] = field(default_factory=dict)
+    """Per-model field changes, keyed by model ID then field name."""
+
+    added_profiles: ModelProfileRegistry = field(default_factory=dict)
+    """Full profiles for each added model, keyed by model ID."""
+
+    @property
+    def is_empty(self) -> bool:
+        """Whether there are no model additions, removals, or field changes."""
+        return not (self.added or self.removed or self.changed)
+
+
+def extract_profiles(source: str) -> ModelProfileRegistry:
+    """Extract the `_PROFILES` mapping from `_profiles.py` source.
+
+    Uses `ast.literal_eval` rather than importing/executing the module so the
+    generated data file is never run as code.
+
+    Args:
+        source: Contents of a `_profiles.py` module. An empty string (e.g. a
+            file absent at a git ref) yields an empty mapping.
+
+    Returns:
+        The `_PROFILES` mapping, or an empty dict when the source contains no
+        `_PROFILES` assignment.
+
+    Raises:
+        ProfileParseError: If the source is present but cannot be parsed, or its
+            `_PROFILES` value is not a dict literal. Surfacing this rather than
+            returning `{}` prevents a corrupt file from being misreported as
+            every model added or removed.
+    """
+    try:
+        tree = ast.parse(source)
+    except SyntaxError as e:
+        msg = f"Could not parse profile source as Python: {e}"
+        raise ProfileParseError(msg) from e
+
+    for node in tree.body:
+        if isinstance(node, ast.AnnAssign):
+            targets: list[ast.expr] = [node.target]
+        elif isinstance(node, ast.Assign):
+            targets = list(node.targets)
+        else:
+            continue
+        is_profiles = any(
+            isinstance(t, ast.Name) and t.id == "_PROFILES" for t in targets
+        )
+        if is_profiles and node.value is not None:
+            try:
+                value = ast.literal_eval(node.value)
+            except (ValueError, SyntaxError) as e:
+                msg = f"`_PROFILES` is not a literal expression: {e}"
+                raise ProfileParseError(msg) from e
+            if not isinstance(value, dict):
+                msg = f"`_PROFILES` is not a dict (got {type(value).__name__})"
+                raise ProfileParseError(msg)
+            return value
+    return {}
+
+
+def diff_profiles(old: ModelProfileRegistry, new: ModelProfileRegistry) -> ProfileDiff:
+    """Compute the difference between two `_PROFILES` mappings.
+
+    Args:
+        old: Profiles before the refresh.
+        new: Profiles after the refresh.
+
+    Returns:
+        A `ProfileDiff` describing added, removed, and changed models.
+    """
+    added = sorted(set(new) - set(old))
+    removed = sorted(set(old) - set(new))
+
+    changed: dict[str, dict[str, FieldChange]] = {}
+    for model_id in sorted(set(old) & set(new)):
+        # View profiles as plain mappings so we can iterate dynamic keys (the
+        # `ModelProfile` TypedDict only permits literal-key access).
+        old_profile: Mapping[str, Any] = old[model_id]
+        new_profile: Mapping[str, Any] = new[model_id]
+        fields: dict[str, FieldChange] = {}
+        for key in sorted(set(old_profile) | set(new_profile)):
+            old_val = old_profile.get(key)
+            new_val = new_profile.get(key)
+            if old_val != new_val:
+                fields[key] = FieldChange(old_val, new_val)
+        if fields:
+            changed[model_id] = fields
+
+    added_profiles = {model_id: new[model_id] for model_id in added}
+    return ProfileDiff(
+        added=added,
+        removed=removed,
+        changed=changed,
+        added_profiles=added_profiles,
+    )
+
+
+def _format_value(field_name: str, value: Any) -> str:  # noqa: ANN401
+    """Render a single field value for display."""
+    if value is None:
+        return "unset"
+    if isinstance(value, bool):
+        return "yes" if value else "no"
+    if isinstance(value, int) and field_name in _TOKEN_FIELDS:
+        return f"{value:,}"
+    return f"`{value}`" if isinstance(value, str) else str(value)
+
+
+def _describe_field_change(
+    field_name: str,
+    old_val: Any,  # noqa: ANN401
+    new_val: Any,  # noqa: ANN401
+) -> str:
+    """Produce a plain-English phrase for one field change."""
+    label = _FIELD_LABELS.get(field_name, field_name)
+    if isinstance(old_val, bool) or isinstance(new_val, bool):
+        if new_val and not old_val:
+            return f"added {label}"
+        if old_val and not new_val:
+            return f"removed {label}"
+    old_str = _format_value(field_name, old_val)
+    new_str = _format_value(field_name, new_val)
+    return f"{label} {old_str} → {new_str}"
+
+
+def _describe_new_model(profile: ModelProfile) -> str:
+    """Produce a short descriptor for a newly added model."""
+    parts: list[str] = []
+    context = profile.get("max_input_tokens")
+    if context:
+        parts.append(f"{context:,} ctx")
+    output = profile.get("max_output_tokens")
+    if output:
+        parts.append(f"{output:,} out")
+    modalities = [
+        name
+        for key, name in (
+            ("image_inputs", "image"),
+            ("audio_inputs", "audio"),
+            ("video_inputs", "video"),
+            ("pdf_inputs", "pdf"),
+        )
+        if profile.get(key)
+    ]
+    if modalities:
+        parts.append("text+" + "+".join(modalities) + " in")
+    if profile.get("reasoning_output"):
+        parts.append("reasoning")
+    if profile.get("tool_calling"):
+        parts.append("tools")
+    return ", ".join(parts)
+
+
+def _truncate(rows: list[str]) -> list[str]:
+    """Cap a list of bullet rows, appending an ellipsis row when truncated."""
+    if len(rows) <= _MAX_ROWS:
+        return rows
+    hidden = len(rows) - _MAX_ROWS
+    return [*rows[:_MAX_ROWS], f"- …and {hidden} more"]
+
+
+def render_provider_section(provider: str, diff: ProfileDiff) -> str | None:
+    """Render the Markdown section for a single provider, or None if unchanged.
+
+    Args:
+        provider: Provider identifier (e.g. `'openai'`).
+        diff: The computed `ProfileDiff` for the provider.
+
+    Returns:
+        Markdown for the provider's changes, or `None` when there are none.
+    """
+    if diff.is_empty:
+        return None
+
+    lines = [f"### {provider}"]
+
+    if diff.added:
+        lines.append(f"\n**➕ {len(diff.added)} added**")  # noqa: RUF001
+        rows = []
+        for model_id in diff.added:
+            descriptor = _describe_new_model(diff.added_profiles[model_id])
+            suffix = f" — {descriptor}" if descriptor else ""
+            rows.append(f"- `{model_id}`{suffix}")
+        lines.extend(_truncate(rows))
+
+    if diff.removed:
+        lines.append(f"\n**➖ {len(diff.removed)} removed**")  # noqa: RUF001
+        lines.extend(_truncate([f"- `{m}`" for m in diff.removed]))
+
+    if diff.changed:
+        lines.append(f"\n**✏️ {len(diff.changed)} changed**")
+        rows = []
+        for model_id, fields in diff.changed.items():
+            phrases = [
+                _describe_field_change(name, change.old, change.new)
+                for name, change in fields.items()
+            ]
+            rows.append(f"- `{model_id}`: " + "; ".join(phrases))
+        lines.extend(_truncate(rows))
+
+    return "\n".join(lines)
+
+
+def build_summary(provider_diffs: dict[str, ProfileDiff]) -> str:
+    """Assemble the full Markdown summary across all providers.
+
+    Args:
+        provider_diffs: Mapping of provider name to its `ProfileDiff`.
+
+    Returns:
+        Markdown summary. When nothing changed, a short note is returned.
+    """
+    sections = [
+        section
+        for provider in sorted(provider_diffs)
+        if (section := render_provider_section(provider, provider_diffs[provider]))
+    ]
+    if not sections:
+        return "No model profile data changed."
+
+    total_added = sum(len(d.added) for d in provider_diffs.values())
+    total_removed = sum(len(d.removed) for d in provider_diffs.values())
+    total_changed = sum(len(d.changed) for d in provider_diffs.values())
+    headline = (
+        f"**{total_added} added · {total_removed} removed · "
+        f"{total_changed} changed** across {len(sections)} provider(s)."
+    )
+
+    return "\n\n".join(["## Summary of changes", headline, *sections])
+
+
+def _verify_ref(repo_root: Path, ref: str) -> None:
+    """Confirm `ref` resolves to a commit in `repo_root`.
+
+    Validating once up front lets `_git_show` treat a non-zero exit
+    unambiguously as "path absent at this ref", rather than conflating a typo'd
+    ref, an unfetched ref, or a non-repository root with a genuinely new file —
+    which would otherwise render every existing model as newly added.
+
+    Raises:
+        RuntimeError: If git is unavailable, `repo_root` is not a repository, or
+            `ref` cannot be resolved.
+    """
+    try:
+        result = subprocess.run(  # noqa: S603
+            [  # noqa: S607
+                "git",
+                "-C",
+                str(repo_root),
+                "rev-parse",
+                "--verify",
+                "--quiet",
+                f"{ref}^{{commit}}",
+            ],
+            capture_output=True,
+            text=True,
+            check=False,
+        )
+    except OSError as e:
+        msg = f"Could not run git (is it installed and on PATH?): {e}"
+        raise RuntimeError(msg) from e
+    if result.returncode != 0:
+        msg = (
+            f"Could not resolve base ref {ref!r} in {repo_root}; "
+            "is it a valid git ref in this repository?"
+        )
+        raise RuntimeError(msg)
+
+
+def _git_show(repo_root: Path, ref: str, rel_path: str) -> str | None:
+    """Return file contents at `ref`, or None if the file does not exist there.
+
+    Assumes `ref` has already been validated by `_verify_ref`, so a non-zero
+    exit here means the path is absent at `ref` rather than a bad ref.
+    """
+    try:
+        result = subprocess.run(  # noqa: S603
+            ["git", "-C", str(repo_root), "show", f"{ref}:{rel_path}"],  # noqa: S607
+            capture_output=True,
+            text=True,
+            check=False,
+        )
+    except OSError:
+        return None
+    return result.stdout if result.returncode == 0 else None
+
+
+def summarize(
+    providers: list[ProviderEntry],
+    *,
+    base_ref: str = "HEAD",
+    repo_root: Path | None = None,
+) -> str:
+    """Build a Markdown summary of profile changes vs `base_ref`.
+
+    Args:
+        providers: List of `{'provider': ..., 'data_dir': ...}` entries,
+            matching the workflow input. `data_dir` is relative to the repo
+            root and contains `_profiles.py`.
+        base_ref: Git ref to compare the working tree against.
+        repo_root: Repository root. Defaults to the current directory.
+
+    Returns:
+        Markdown summary suitable for a PR body.
+
+    Raises:
+        RuntimeError: If `base_ref` cannot be resolved, or a profiles file
+            exists but cannot be read or parsed.
+        ValueError: If a `providers` entry is missing a required key.
+        TypeError: If a `providers` entry's `provider`/`data_dir` is not a
+            string.
+    """
+    root = (repo_root or Path.cwd()).resolve()
+    _verify_ref(root, base_ref)
+    provider_diffs: dict[str, ProfileDiff] = {}
+
+    for entry in providers:
+        # View the entry as an untrusted mapping: at the CLI boundary it is
+        # arbitrary parsed JSON, not a guaranteed `ProviderEntry`.
+        entry_map: Mapping[str, Any] = entry
+        try:
+            provider = entry_map["provider"]
+            data_dir = entry_map["data_dir"]
+        except (KeyError, TypeError) as e:
+            msg = (
+                f"Invalid provider entry {entry!r}: expected 'provider' and "
+                f"'data_dir' keys ({e})"
+            )
+            raise ValueError(msg) from e
+        if not isinstance(provider, str) or not isinstance(data_dir, str):
+            msg = (
+                f"Invalid provider entry {entry!r}: 'provider' and 'data_dir' "
+                "must be strings"
+            )
+            raise TypeError(msg)
+        rel_path = f"{data_dir.rstrip('/')}/_profiles.py"
+
+        old_source = _git_show(root, base_ref, rel_path) or ""
+        new_path = root / rel_path
+        if new_path.exists():
+            try:
+                new_source = new_path.read_text(encoding="utf-8")
+            except (OSError, UnicodeDecodeError) as e:
+                msg = f"Could not read {new_path}: {e}"
+                raise RuntimeError(msg) from e
+        else:
+            new_source = ""
+
+        # A corrupt-but-readable file must surface as an error: extracting `{}`
+        # from it would otherwise be diffed as every model added (old side) or
+        # removed (new side), yielding a confident but wrong summary.
+        try:
+            old_profiles = extract_profiles(old_source)
+        except ProfileParseError as e:
+            msg = f"Profile data for {provider!r} at {base_ref!r} is unparseable: {e}"
+            raise RuntimeError(msg) from e
+        try:
+            new_profiles = extract_profiles(new_source)
+        except ProfileParseError as e:
+            msg = f"Profile data for {provider!r} at {new_path} is unparseable: {e}"
+            raise RuntimeError(msg) from e
+
+        provider_diffs[provider] = diff_profiles(old_profiles, new_profiles)
+
+    return build_summary(provider_diffs)
--- a/libs/model-profiles/langchain_model_profiles/cli.py
+++ b/libs/model-profiles/langchain_model_profiles/cli.py
@@ -399,10 +399,53 @@ def main() -> None:
        help="Data directory containing profile_augmentations.toml",
    )

+    # summarize command
+    summarize_parser = subparsers.add_parser(
+        "summarize",
+        help="Summarize profile changes vs a git ref as Markdown (for PR bodies)",
+    )
+    summarize_parser.add_argument(
+        "--providers",
+        required=True,
+        help=(
+            "JSON array of objects with 'provider' and 'data_dir' keys "
+            "(data_dir relative to the repo root)."
+        ),
+    )
+    summarize_parser.add_argument(
+        "--base-ref",
+        default="HEAD",
+        help="Git ref to compare the working tree against (default: HEAD).",
+    )
+    summarize_parser.add_argument(
+        "--repo-root",
+        type=Path,
+        default=None,
+        help="Repository root (default: current directory).",
+    )
+
    args = parser.parse_args()

    if args.command == "refresh":
        refresh(args.provider, args.data_dir)
+    elif args.command == "summarize":
+        from langchain_model_profiles._summary import summarize
+
+        try:
+            providers = json.loads(args.providers)
+        except json.JSONDecodeError as e:
+            parser.error(f"--providers is not valid JSON: {e}")
+
+        if not isinstance(providers, list):
+            parser.error("--providers must be a JSON array")
+
+        try:
+            output = summarize(
+                providers, base_ref=args.base_ref, repo_root=args.repo_root
+            )
+        except (RuntimeError, ValueError, TypeError) as e:
+            parser.error(str(e))
+        print(output)


 if __name__ == "__main__":
--- a/libs/model-profiles/tests/unit_tests/test_summary.py
+++ b/libs/model-profiles/tests/unit_tests/test_summary.py
@@ -0,0 +1,470 @@
+"""Tests for the profile change summary generator."""
+
+import json
+import subprocess
+import sys
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+import pytest
+
+from langchain_model_profiles import cli
+from langchain_model_profiles._summary import (
+    _MAX_ROWS,
+    FieldChange,
+    ProfileDiff,
+    ProfileParseError,
+    _describe_new_model,
+    _format_value,
+    _truncate,
+    build_summary,
+    diff_profiles,
+    extract_profiles,
+    render_provider_section,
+    summarize,
+)
+
+if TYPE_CHECKING:
+    from langchain_core.language_models.model_profile import (
+        ModelProfile,
+        ModelProfileRegistry,
+    )
+
+_OLD_SOURCE = '''"""Auto-generated."""
+
+from typing import Any
+
+_PROFILES: dict[str, dict[str, Any]] = {
+    "gpt-4": {
+        "name": "GPT-4",
+        "max_input_tokens": 8192,
+        "max_output_tokens": 4096,
+        "image_inputs": False,
+        "tool_calling": True,
+    },
+    "old-model": {
+        "name": "Old",
+        "max_input_tokens": 1000,
+    },
+}
+'''
+
+_NEW_SOURCE = '''"""Auto-generated."""
+
+from typing import Any
+
+_PROFILES: dict[str, dict[str, Any]] = {
+    "gpt-4": {
+        "name": "GPT-4",
+        "max_input_tokens": 8192,
+        "max_output_tokens": 16384,
+        "image_inputs": True,
+        "tool_calling": True,
+    },
+    "gpt-5": {
+        "name": "GPT-5",
+        "max_input_tokens": 400000,
+        "max_output_tokens": 128000,
+        "image_inputs": True,
+        "reasoning_output": True,
+        "tool_calling": True,
+    },
+}
+'''
+
+
+def _git(repo: Path, *args: str) -> None:
+    """Run a git command inside `repo` (test helper)."""
+    subprocess.run(  # noqa: S603
+        ["git", "-C", str(repo), *args],  # noqa: S607
+        check=True,
+        capture_output=True,
+        text=True,
+    )
+
+
+def _init_repo(repo: Path) -> None:
+    """Initialize a git repo with a deterministic identity."""
+    _git(repo, "init", "-q")
+    _git(repo, "config", "user.email", "t@example.com")
+    _git(repo, "config", "user.name", "Test")
+
+
+def _write_profiles(path: Path, source: str) -> None:
+    """Write a `_profiles.py` file, creating parent directories."""
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(source)
+
+
+def test_extract_profiles() -> None:
+    """`_PROFILES` literal is extracted via ast without executing the module."""
+    profiles = extract_profiles(_OLD_SOURCE)
+    assert set(profiles) == {"gpt-4", "old-model"}
+    assert profiles["gpt-4"]["max_input_tokens"] == 8192
+
+
+def test_extract_profiles_handles_missing_or_invalid() -> None:
+    """Absent `_PROFILES` yields `{}`; present-but-unparseable sources raise."""
+    # No `_PROFILES` assignment, and an empty file, are both legitimately empty.
+    assert extract_profiles("x = 1") == {}
+    assert extract_profiles("") == {}
+    # A syntactically broken file is corrupt, not empty.
+    with pytest.raises(ProfileParseError):
+        extract_profiles("def (:")
+    # A non-literal or non-dict `_PROFILES` is corrupt too.
+    with pytest.raises(ProfileParseError):
+        extract_profiles("_PROFILES = some_function()")
+    with pytest.raises(ProfileParseError):
+        extract_profiles("_PROFILES = [1, 2, 3]")
+
+
+def test_diff_profiles() -> None:
+    """Diff reports added, removed, and per-field changes."""
+    diff = diff_profiles(extract_profiles(_OLD_SOURCE), extract_profiles(_NEW_SOURCE))
+    assert diff.added == ["gpt-5"]
+    assert diff.removed == ["old-model"]
+    assert set(diff.changed) == {"gpt-4"}
+    assert diff.changed["gpt-4"]["max_output_tokens"] == (4096, 16384)
+    assert diff.changed["gpt-4"]["image_inputs"] == (False, True)
+    assert diff.added_profiles["gpt-5"]["max_input_tokens"] == 400000
+
+
+def test_diff_profiles_no_changes() -> None:
+    """Identical inputs produce an empty diff."""
+    profiles = extract_profiles(_OLD_SOURCE)
+    diff = diff_profiles(profiles, profiles)
+    assert diff.is_empty
+
+
+def test_render_provider_section_content() -> None:
+    """Rendered section describes additions, removals, and field changes."""
+    diff = diff_profiles(extract_profiles(_OLD_SOURCE), extract_profiles(_NEW_SOURCE))
+    section = render_provider_section("openai", diff)
+    assert section is not None
+    assert "### openai" in section
+    assert "1 added" in section
+    assert "`gpt-5`" in section
+    assert "400,000 ctx" in section
+    assert "reasoning" in section
+    assert "1 removed" in section
+    assert "`old-model`" in section
+    assert "1 changed" in section
+    assert "max output tokens 4,096 → 16,384" in section
+    assert "added image input" in section
+
+
+def test_render_provider_section_empty() -> None:
+    """An empty diff renders no section."""
+    assert render_provider_section("openai", ProfileDiff()) is None
+
+
+def test_build_summary_headline() -> None:
+    """The summary leads with a header and an aggregate headline."""
+    diff = diff_profiles(extract_profiles(_OLD_SOURCE), extract_profiles(_NEW_SOURCE))
+    summary = build_summary({"openai": diff})
+    assert summary.startswith("## Summary of changes")
+    assert "1 added" in summary
+    assert "1 removed" in summary
+    assert "1 changed" in summary
+
+
+def test_build_summary_no_changes() -> None:
+    """An all-empty diff produces a short no-change note."""
+    assert build_summary({"openai": ProfileDiff()}) == "No model profile data changed."
+
+
+def test_truncation() -> None:
+    """Long lists are truncated with a trailing count of hidden rows."""
+    new: ModelProfileRegistry = {f"model-{i}": {"name": f"m{i}"} for i in range(40)}
+    diff = diff_profiles({}, new)
+    section = render_provider_section("openai", diff)
+    assert section is not None
+    assert "…and 15 more" in section
+
+
+def test_summarize_against_git(tmp_path: Path) -> None:
+    """Summarize compares the working tree against a committed baseline."""
+    repo = tmp_path
+    _init_repo(repo)
+
+    data_dir = "libs/partners/openai/data"
+    profiles_path = repo / data_dir / "_profiles.py"
+    _write_profiles(profiles_path, _OLD_SOURCE)
+    _git(repo, "add", "-A")
+    _git(repo, "commit", "-q", "-m", "init")
+
+    # Simulate a refresh by overwriting the working-tree file.
+    profiles_path.write_text(_NEW_SOURCE)
+
+    summary = summarize([{"provider": "openai", "data_dir": data_dir}], repo_root=repo)
+    assert "## Summary of changes" in summary
+    assert "`gpt-5`" in summary
+    assert "`old-model`" in summary
+    # The changed-field path is exercised end-to-end, not just at the unit layer.
+    assert "max output tokens 4,096 → 16,384" in summary
+
+
+def test_summarize_new_provider_file(tmp_path: Path) -> None:
+    """A brand-new profiles file is treated as all-added."""
+    repo = tmp_path
+    _init_repo(repo)
+    (repo / "README.md").write_text("x")
+    _git(repo, "add", "-A")
+    _git(repo, "commit", "-q", "-m", "init")
+
+    data_dir = "libs/partners/new/data"
+    _write_profiles(repo / data_dir / "_profiles.py", _NEW_SOURCE)
+
+    summary = summarize([{"provider": "new", "data_dir": data_dir}], repo_root=repo)
+    assert "2 added" in summary
+
+
+def test_field_change_is_tuple() -> None:
+    """`FieldChange` unpacks and compares like a plain (old, new) tuple."""
+    change = FieldChange(1, 2)
+    # Access the named fields before the tuple comparison: `== (1, 2)` would
+    # otherwise narrow `change` to a plain `tuple` for the rest of the scope.
+    assert change.old == 1
+    assert change.new == 2
+    assert change == (1, 2)
+
+
+def test_format_value_variants() -> None:
+    """Each `_format_value` branch renders the expected string."""
+    assert _format_value("x", None) == "unset"
+    assert _format_value("tool_calling", True) == "yes"  # noqa: FBT003
+    assert _format_value("tool_calling", False) == "no"  # noqa: FBT003
+    assert _format_value("max_input_tokens", 200000) == "200,000"
+    # Plain int outside the token fields is rendered without separators.
+    assert _format_value("foo", 42) == "42"
+    # Floats fall through to str().
+    assert _format_value("temperature", 1.5) == "1.5"
+    assert _format_value("name", "GPT") == "`GPT`"
+
+
+def test_render_non_bool_field_change() -> None:
+    """Non-boolean field changes render an `old → new` phrase."""
+    old: ModelProfileRegistry = {"m": {"status": "active", "name": "M"}}
+    new: ModelProfileRegistry = {"m": {"status": "deprecated", "name": "M2"}}
+    section = render_provider_section("openai", diff_profiles(old, new))
+    assert section is not None
+    assert "status `active` → `deprecated`" in section
+    assert "display name `M` → `M2`" in section
+
+
+def test_render_removed_bool_field_change() -> None:
+    """A boolean field flipped off renders a `removed <label>` phrase."""
+    old: ModelProfileRegistry = {"m": {"image_inputs": True}}
+    new: ModelProfileRegistry = {"m": {"image_inputs": False}}
+    section = render_provider_section("openai", diff_profiles(old, new))
+    assert section is not None
+    assert "removed image input" in section
+
+
+def test_describe_new_model_modalities() -> None:
+    """A new model descriptor lists context, output, modalities, and tools."""
+    profile: ModelProfile = {
+        "max_input_tokens": 200000,
+        "max_output_tokens": 64000,
+        "image_inputs": True,
+        "audio_inputs": True,
+        "video_inputs": True,
+        "pdf_inputs": True,
+        "tool_calling": True,
+    }
+    descriptor = _describe_new_model(profile)
+    assert "200,000 ctx" in descriptor
+    assert "64,000 out" in descriptor
+    assert "text+image+audio+video+pdf in" in descriptor
+    assert "tools" in descriptor
+
+
+def test_describe_new_model_empty() -> None:
+    """A profile with no notable fields yields an empty descriptor."""
+    assert _describe_new_model({"name": "x"}) == ""
+
+
+def test_render_added_model_without_descriptor() -> None:
+    """An added model with no descriptor renders no ` — ` suffix."""
+    section = render_provider_section("p", diff_profiles({}, {"bare": {"name": "B"}}))
+    assert section is not None
+    assert "- `bare`" in section
+    assert "- `bare` —" not in section
+
+
+def test_truncate_boundary() -> None:
+    """`_truncate` keeps exactly `_MAX_ROWS` rows but caps one more."""
+    exactly = [f"- r{i}" for i in range(_MAX_ROWS)]
+    assert _truncate(exactly) == exactly
+
+    over = [f"- r{i}" for i in range(_MAX_ROWS + 1)]
+    result = _truncate(over)
+    assert len(result) == _MAX_ROWS + 1
+    assert result[-1] == "- …and 1 more"
+
+
+def test_build_summary_multi_provider_sorted() -> None:
+    """Providers are rendered in sorted order regardless of input order."""
+    diff_a = diff_profiles({}, {"a": {"name": "A"}})
+    diff_z = diff_profiles({}, {"z": {"name": "Z"}})
+    summary = build_summary({"zzz": diff_z, "aaa": diff_a})
+    assert summary.index("### aaa") < summary.index("### zzz")
+
+
+def test_summarize_removed_when_file_deleted(tmp_path: Path) -> None:
+    """Deleting the working-tree file reports every model as removed."""
+    repo = tmp_path
+    _init_repo(repo)
+    data_dir = "libs/partners/openai/data"
+    profiles_path = repo / data_dir / "_profiles.py"
+    _write_profiles(profiles_path, _OLD_SOURCE)
+    _git(repo, "add", "-A")
+    _git(repo, "commit", "-q", "-m", "init")
+
+    profiles_path.unlink()
+
+    summary = summarize([{"provider": "openai", "data_dir": data_dir}], repo_root=repo)
+    assert "2 removed" in summary
+
+
+def test_summarize_bad_base_ref(tmp_path: Path) -> None:
+    """An unresolvable base ref raises rather than fabricating an all-added diff."""
+    repo = tmp_path
+    _init_repo(repo)
+    (repo / "README.md").write_text("x")
+    _git(repo, "add", "-A")
+    _git(repo, "commit", "-q", "-m", "init")
+
+    data_dir = "libs/partners/openai/data"
+    _write_profiles(repo / data_dir / "_profiles.py", _NEW_SOURCE)
+
+    with pytest.raises(RuntimeError, match="Could not resolve base ref"):
+        summarize(
+            [{"provider": "openai", "data_dir": data_dir}],
+            base_ref="no-such-ref",
+            repo_root=repo,
+        )
+
+
+def test_summarize_malformed_entry(tmp_path: Path) -> None:
+    """A provider entry missing a required key raises a clear error."""
+    repo = tmp_path
+    _init_repo(repo)
+    (repo / "README.md").write_text("x")
+    _git(repo, "add", "-A")
+    _git(repo, "commit", "-q", "-m", "init")
+
+    with pytest.raises(ValueError, match="Invalid provider entry"):
+        summarize([{"provider": "openai"}], repo_root=repo)  # type: ignore[typeddict-item]
+
+
+def test_summarize_non_string_entry(tmp_path: Path) -> None:
+    """Non-string `provider`/`data_dir` raises `TypeError`, not `AttributeError`.
+
+    A non-string value would otherwise reach `data_dir.rstrip(...)` and raise an
+    `AttributeError` that escapes the CLI's `except (RuntimeError, ValueError,
+    TypeError)`, surfacing a raw traceback instead of a clean error.
+    """
+    repo = tmp_path
+    _init_repo(repo)
+    (repo / "README.md").write_text("x")
+    _git(repo, "add", "-A")
+    _git(repo, "commit", "-q", "-m", "init")
+
+    with pytest.raises(TypeError, match="must be strings"):
+        summarize(
+            [{"provider": 5, "data_dir": 7}],  # type: ignore[typeddict-item]
+            repo_root=repo,
+        )
+
+
+def test_summarize_corrupt_working_tree_file(tmp_path: Path) -> None:
+    """A present-but-unparseable working-tree file raises, not a mass removal.
+
+    Mirrors the `_verify_ref` guard on the base-ref side: a corrupt new file
+    must surface as an error rather than be diffed as every model removed.
+    """
+    repo = tmp_path
+    _init_repo(repo)
+    data_dir = "libs/partners/openai/data"
+    profiles_path = repo / data_dir / "_profiles.py"
+    _write_profiles(profiles_path, _OLD_SOURCE)
+    _git(repo, "add", "-A")
+    _git(repo, "commit", "-q", "-m", "init")
+
+    # Simulate a refresh that left the file truncated / syntactically broken.
+    profiles_path.write_text("_PROFILES = {")
+
+    with pytest.raises(RuntimeError, match="unparseable"):
+        summarize([{"provider": "openai", "data_dir": data_dir}], repo_root=repo)
+
+
+def test_summarize_corrupt_base_ref_file(tmp_path: Path) -> None:
+    """An unparseable file at the base ref raises, not an all-added diff."""
+    repo = tmp_path
+    _init_repo(repo)
+    data_dir = "libs/partners/openai/data"
+    profiles_path = repo / data_dir / "_profiles.py"
+    _write_profiles(profiles_path, "_PROFILES = {")  # committed broken
+    _git(repo, "add", "-A")
+    _git(repo, "commit", "-q", "-m", "init")
+
+    profiles_path.write_text(_NEW_SOURCE)  # working tree now valid
+
+    with pytest.raises(RuntimeError, match="unparseable"):
+        summarize([{"provider": "openai", "data_dir": data_dir}], repo_root=repo)
+
+
+def test_cli_summarize_success(
+    tmp_path: Path,
+    capsys: pytest.CaptureFixture[str],
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """A valid `--providers` array prints the Markdown summary to stdout."""
+    repo = tmp_path
+    _init_repo(repo)
+    data_dir = "libs/partners/openai/data"
+    profiles_path = repo / data_dir / "_profiles.py"
+    _write_profiles(profiles_path, _OLD_SOURCE)
+    _git(repo, "add", "-A")
+    _git(repo, "commit", "-q", "-m", "init")
+    profiles_path.write_text(_NEW_SOURCE)
+
+    providers = json.dumps([{"provider": "openai", "data_dir": data_dir}])
+    monkeypatch.setattr(
+        sys,
+        "argv",
+        [
+            "langchain-profiles",
+            "summarize",
+            "--providers",
+            providers,
+            "--repo-root",
+            str(repo),
+        ],
+    )
+
+    cli.main()
+
+    out = capsys.readouterr().out
+    assert "## Summary of changes" in out
+    assert "`gpt-5`" in out
+    assert "max output tokens 4,096 → 16,384" in out
+
+
+def test_cli_summarize_invalid_json(monkeypatch: pytest.MonkeyPatch) -> None:
+    """The CLI rejects a `--providers` value that is not valid JSON."""
+    monkeypatch.setattr(
+        sys, "argv", ["langchain-profiles", "summarize", "--providers", "not json"]
+    )
+    with pytest.raises(SystemExit):
+        cli.main()
+
+
+def test_cli_summarize_non_list(monkeypatch: pytest.MonkeyPatch) -> None:
+    """The CLI rejects a `--providers` value that is not a JSON array."""
+    monkeypatch.setattr(
+        sys, "argv", ["langchain-profiles", "summarize", "--providers", '{"a": 1}']
+    )
+    with pytest.raises(SystemExit):
+        cli.main()
--- a/libs/model-profiles/uv.lock
+++ b/libs/model-profiles/uv.lock
@@ -504,7 +504,7 @@ wheels = [

 [[package]]
 name = "langchain"
-version = "1.3.10"
+version = "1.3.11"
 source = { editable = "../langchain_v1" }
 dependencies = [
    { name = "langchain-core" },
@@ -703,7 +703,7 @@ typing = [

 [[package]]
 name = "langchain-openai"
-version = "1.3.2"
+version = "1.3.3"
 source = { editable = "../partners/openai" }
 dependencies = [
    { name = "langchain-core" },