mirror of
https://github.com/hwchase17/langchain.git
synced 2026-07-01 14:47:02 +00:00
feat(model-profiles): plain-English summary for profile refresh PRs (#38218)
Automated model-profile refresh PRs (e.g. #38210) ship a static template body, so a reviewer has to open *Files changed* and read large blocks of generated data to learn what actually moved. Because the underlying profile data is fully structured, we can describe the changes deterministically — no LLM, no hallucination risk. This adds a `langchain-profiles summarize` subcommand that compares the working-tree `_profiles.py` files against a git ref and renders a skimmable Markdown summary: models added (with a short capability descriptor), models removed, and per-field capability changes (context/output tokens, modalities, tool calling, reasoning, etc.), grouped by provider and capped so huge refreshes stay readable. Profiles are read with `ast.literal_eval` rather than imported, so the generated data file is never executed. Example output for a refresh that adds a model and bumps an output limit: ``` ## Summary of changes **1 added · 0 removed · 1 changed** across 1 provider(s). ### openai **➕ 1 added** - `gpt-6-preview` — 1,000,000 ctx, 128,000 out, text+image+audio in, reasoning, tools **✏️ 1 changed** - `gpt-3.5-turbo`: max output tokens 4,096 → 16,384 ``` Made by [Open SWE](https://openswe.vercel.app/agents/9bcbf182-effc-ba9b-0df3-afac620ad152) --------- Co-authored-by: open-swe[bot] <open-swe@users.noreply.github.com>
This commit is contained in:
28
.github/workflows/_refresh_model_profiles.yml
vendored
28
.github/workflows/_refresh_model_profiles.yml
vendored
@@ -167,6 +167,32 @@ jobs:
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: "📝 Build PR body with change summary"
|
||||
id: pr-body
|
||||
env:
|
||||
PROVIDERS_JSON: ${{ inputs.providers }}
|
||||
PR_BODY: ${{ inputs.pr-body }}
|
||||
run: |
|
||||
# The refresh step modified the working tree without committing, so
|
||||
# comparing against HEAD yields exactly the refresh's changes.
|
||||
cli_dir="${GITHUB_WORKSPACE}/${{ steps.cli.outputs.dir }}"
|
||||
body_file="${RUNNER_TEMP}/pr_body.md"
|
||||
printf '%s\n\n' "${PR_BODY}" > "${body_file}"
|
||||
# `summarize` builds the whole summary in memory and prints it once,
|
||||
# so a failure exits non-zero before any stdout reaches the append —
|
||||
# the body keeps only the static note, never a half-written summary.
|
||||
if ! uv run --frozen --project "${cli_dir}" \
|
||||
langchain-profiles summarize \
|
||||
--providers "${PROVIDERS_JSON}" \
|
||||
--base-ref HEAD \
|
||||
--repo-root "${GITHUB_WORKSPACE}" >> "${body_file}"; then
|
||||
echo "::warning::Could not generate change summary; see job log."
|
||||
# Surface the degradation in the PR body too: the warning above only
|
||||
# lands in the Actions log, which a PR reviewer won't see.
|
||||
printf '\n> [!NOTE]\n> Automated change summary unavailable — see the workflow run log.\n' >> "${body_file}"
|
||||
fi
|
||||
echo "path=${body_file}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: "🔑 Generate GitHub App token"
|
||||
id: app-token
|
||||
uses: actions/create-github-app-token@bcd2ba49218906704ab6c1aa796996da409d3eb1 # v3
|
||||
@@ -182,7 +208,7 @@ jobs:
|
||||
branch: ${{ inputs.pr-branch }}
|
||||
commit-message: ${{ inputs.pr-title }}
|
||||
title: ${{ inputs.pr-title }}
|
||||
body: ${{ inputs.pr-body }}
|
||||
body-path: ${{ steps.pr-body.outputs.path }}
|
||||
labels: ${{ inputs.pr-labels }}
|
||||
add-paths: ${{ inputs.add-paths }}
|
||||
|
||||
|
||||
475
libs/model-profiles/langchain_model_profiles/_summary.py
Normal file
475
libs/model-profiles/langchain_model_profiles/_summary.py
Normal file
@@ -0,0 +1,475 @@
|
||||
"""Generate a plain-English summary of model profile changes.
|
||||
|
||||
The `refresh_model_profiles` workflow opens an automated PR whenever the data
|
||||
behind `_profiles.py` files changes. Those diffs are large blocks of generated
|
||||
data, so a reviewer otherwise has to open *Files changed* and eyeball raw values
|
||||
to learn what actually moved. This module turns the structured before/after data
|
||||
into a skimmable Markdown summary (new models, removed models, and per-field
|
||||
capability/metadata changes) for the PR body. The summary is generated
|
||||
deterministically from the data, so there is no risk of an LLM misdescribing it.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import ast
|
||||
import subprocess
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, Any, NamedTuple, TypedDict
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Mapping
|
||||
|
||||
from langchain_core.language_models.model_profile import (
|
||||
ModelProfile,
|
||||
ModelProfileRegistry,
|
||||
)
|
||||
|
||||
# Maximum number of bullet rows rendered per section before truncating.
|
||||
_MAX_ROWS = 25
|
||||
|
||||
# Human-readable labels for profile fields.
|
||||
_FIELD_LABELS: dict[str, str] = {
|
||||
"name": "display name",
|
||||
"status": "status",
|
||||
"release_date": "release date",
|
||||
"last_updated": "last updated",
|
||||
"open_weights": "open weights",
|
||||
"max_input_tokens": "max input tokens",
|
||||
"max_output_tokens": "max output tokens",
|
||||
"text_inputs": "text input",
|
||||
"image_inputs": "image input",
|
||||
"audio_inputs": "audio input",
|
||||
"pdf_inputs": "PDF input",
|
||||
"video_inputs": "video input",
|
||||
"text_outputs": "text output",
|
||||
"image_outputs": "image output",
|
||||
"audio_outputs": "audio output",
|
||||
"video_outputs": "video output",
|
||||
"reasoning_output": "reasoning",
|
||||
"tool_calling": "tool calling",
|
||||
"tool_choice": "tool choice",
|
||||
"tool_call_streaming": "tool call streaming",
|
||||
"structured_output": "structured output",
|
||||
"attachment": "attachments",
|
||||
"temperature": "temperature control",
|
||||
"image_url_inputs": "image URL input",
|
||||
"image_tool_message": "image tool messages",
|
||||
"pdf_tool_message": "PDF tool messages",
|
||||
}
|
||||
|
||||
# Token fields rendered with thousands separators.
|
||||
_TOKEN_FIELDS = frozenset({"max_input_tokens", "max_output_tokens"})
|
||||
|
||||
|
||||
class ProfileParseError(ValueError):
|
||||
"""A `_profiles.py` source exists but its `_PROFILES` data is unparseable.
|
||||
|
||||
Distinguished from a genuinely absent file (which yields an empty mapping)
|
||||
so that corrupt working-tree or committed data surfaces as an error rather
|
||||
than being silently diffed as a mass addition or removal of models.
|
||||
"""
|
||||
|
||||
|
||||
class FieldChange(NamedTuple):
|
||||
"""Old and new values for a single changed profile field.
|
||||
|
||||
Named rather than a bare `tuple` so the old → new ordering the renderer
|
||||
relies on is part of the type instead of a positional convention. Values are
|
||||
heterogeneous profile data (bool, int, str, or unset), so `Any` is the
|
||||
honest element type here.
|
||||
"""
|
||||
|
||||
old: Any
|
||||
"""Value before the refresh, or `None` if absent/unset."""
|
||||
|
||||
new: Any
|
||||
"""Value after the refresh, or `None` if absent/unset."""
|
||||
|
||||
|
||||
class ProviderEntry(TypedDict):
|
||||
"""One provider's identity and the data dir holding its `_profiles.py`."""
|
||||
|
||||
provider: str
|
||||
"""Provider identifier (e.g. `'openai'`)."""
|
||||
|
||||
data_dir: str
|
||||
"""Path to the provider's data directory, relative to the repo root."""
|
||||
|
||||
|
||||
@dataclass
|
||||
class ProfileDiff:
|
||||
"""Structured difference between two sets of model profiles."""
|
||||
|
||||
added: list[str] = field(default_factory=list)
|
||||
"""Model IDs present after the refresh but not before, sorted."""
|
||||
|
||||
removed: list[str] = field(default_factory=list)
|
||||
"""Model IDs present before the refresh but not after, sorted."""
|
||||
|
||||
changed: dict[str, dict[str, FieldChange]] = field(default_factory=dict)
|
||||
"""Per-model field changes, keyed by model ID then field name."""
|
||||
|
||||
added_profiles: ModelProfileRegistry = field(default_factory=dict)
|
||||
"""Full profiles for each added model, keyed by model ID."""
|
||||
|
||||
@property
|
||||
def is_empty(self) -> bool:
|
||||
"""Whether there are no model additions, removals, or field changes."""
|
||||
return not (self.added or self.removed or self.changed)
|
||||
|
||||
|
||||
def extract_profiles(source: str) -> ModelProfileRegistry:
|
||||
"""Extract the `_PROFILES` mapping from `_profiles.py` source.
|
||||
|
||||
Uses `ast.literal_eval` rather than importing/executing the module so the
|
||||
generated data file is never run as code.
|
||||
|
||||
Args:
|
||||
source: Contents of a `_profiles.py` module. An empty string (e.g. a
|
||||
file absent at a git ref) yields an empty mapping.
|
||||
|
||||
Returns:
|
||||
The `_PROFILES` mapping, or an empty dict when the source contains no
|
||||
`_PROFILES` assignment.
|
||||
|
||||
Raises:
|
||||
ProfileParseError: If the source is present but cannot be parsed, or its
|
||||
`_PROFILES` value is not a dict literal. Surfacing this rather than
|
||||
returning `{}` prevents a corrupt file from being misreported as
|
||||
every model added or removed.
|
||||
"""
|
||||
try:
|
||||
tree = ast.parse(source)
|
||||
except SyntaxError as e:
|
||||
msg = f"Could not parse profile source as Python: {e}"
|
||||
raise ProfileParseError(msg) from e
|
||||
|
||||
for node in tree.body:
|
||||
if isinstance(node, ast.AnnAssign):
|
||||
targets: list[ast.expr] = [node.target]
|
||||
elif isinstance(node, ast.Assign):
|
||||
targets = list(node.targets)
|
||||
else:
|
||||
continue
|
||||
is_profiles = any(
|
||||
isinstance(t, ast.Name) and t.id == "_PROFILES" for t in targets
|
||||
)
|
||||
if is_profiles and node.value is not None:
|
||||
try:
|
||||
value = ast.literal_eval(node.value)
|
||||
except (ValueError, SyntaxError) as e:
|
||||
msg = f"`_PROFILES` is not a literal expression: {e}"
|
||||
raise ProfileParseError(msg) from e
|
||||
if not isinstance(value, dict):
|
||||
msg = f"`_PROFILES` is not a dict (got {type(value).__name__})"
|
||||
raise ProfileParseError(msg)
|
||||
return value
|
||||
return {}
|
||||
|
||||
|
||||
def diff_profiles(old: ModelProfileRegistry, new: ModelProfileRegistry) -> ProfileDiff:
|
||||
"""Compute the difference between two `_PROFILES` mappings.
|
||||
|
||||
Args:
|
||||
old: Profiles before the refresh.
|
||||
new: Profiles after the refresh.
|
||||
|
||||
Returns:
|
||||
A `ProfileDiff` describing added, removed, and changed models.
|
||||
"""
|
||||
added = sorted(set(new) - set(old))
|
||||
removed = sorted(set(old) - set(new))
|
||||
|
||||
changed: dict[str, dict[str, FieldChange]] = {}
|
||||
for model_id in sorted(set(old) & set(new)):
|
||||
# View profiles as plain mappings so we can iterate dynamic keys (the
|
||||
# `ModelProfile` TypedDict only permits literal-key access).
|
||||
old_profile: Mapping[str, Any] = old[model_id]
|
||||
new_profile: Mapping[str, Any] = new[model_id]
|
||||
fields: dict[str, FieldChange] = {}
|
||||
for key in sorted(set(old_profile) | set(new_profile)):
|
||||
old_val = old_profile.get(key)
|
||||
new_val = new_profile.get(key)
|
||||
if old_val != new_val:
|
||||
fields[key] = FieldChange(old_val, new_val)
|
||||
if fields:
|
||||
changed[model_id] = fields
|
||||
|
||||
added_profiles = {model_id: new[model_id] for model_id in added}
|
||||
return ProfileDiff(
|
||||
added=added,
|
||||
removed=removed,
|
||||
changed=changed,
|
||||
added_profiles=added_profiles,
|
||||
)
|
||||
|
||||
|
||||
def _format_value(field_name: str, value: Any) -> str: # noqa: ANN401
|
||||
"""Render a single field value for display."""
|
||||
if value is None:
|
||||
return "unset"
|
||||
if isinstance(value, bool):
|
||||
return "yes" if value else "no"
|
||||
if isinstance(value, int) and field_name in _TOKEN_FIELDS:
|
||||
return f"{value:,}"
|
||||
return f"`{value}`" if isinstance(value, str) else str(value)
|
||||
|
||||
|
||||
def _describe_field_change(
|
||||
field_name: str,
|
||||
old_val: Any, # noqa: ANN401
|
||||
new_val: Any, # noqa: ANN401
|
||||
) -> str:
|
||||
"""Produce a plain-English phrase for one field change."""
|
||||
label = _FIELD_LABELS.get(field_name, field_name)
|
||||
if isinstance(old_val, bool) or isinstance(new_val, bool):
|
||||
if new_val and not old_val:
|
||||
return f"added {label}"
|
||||
if old_val and not new_val:
|
||||
return f"removed {label}"
|
||||
old_str = _format_value(field_name, old_val)
|
||||
new_str = _format_value(field_name, new_val)
|
||||
return f"{label} {old_str} → {new_str}"
|
||||
|
||||
|
||||
def _describe_new_model(profile: ModelProfile) -> str:
|
||||
"""Produce a short descriptor for a newly added model."""
|
||||
parts: list[str] = []
|
||||
context = profile.get("max_input_tokens")
|
||||
if context:
|
||||
parts.append(f"{context:,} ctx")
|
||||
output = profile.get("max_output_tokens")
|
||||
if output:
|
||||
parts.append(f"{output:,} out")
|
||||
modalities = [
|
||||
name
|
||||
for key, name in (
|
||||
("image_inputs", "image"),
|
||||
("audio_inputs", "audio"),
|
||||
("video_inputs", "video"),
|
||||
("pdf_inputs", "pdf"),
|
||||
)
|
||||
if profile.get(key)
|
||||
]
|
||||
if modalities:
|
||||
parts.append("text+" + "+".join(modalities) + " in")
|
||||
if profile.get("reasoning_output"):
|
||||
parts.append("reasoning")
|
||||
if profile.get("tool_calling"):
|
||||
parts.append("tools")
|
||||
return ", ".join(parts)
|
||||
|
||||
|
||||
def _truncate(rows: list[str]) -> list[str]:
|
||||
"""Cap a list of bullet rows, appending an ellipsis row when truncated."""
|
||||
if len(rows) <= _MAX_ROWS:
|
||||
return rows
|
||||
hidden = len(rows) - _MAX_ROWS
|
||||
return [*rows[:_MAX_ROWS], f"- …and {hidden} more"]
|
||||
|
||||
|
||||
def render_provider_section(provider: str, diff: ProfileDiff) -> str | None:
|
||||
"""Render the Markdown section for a single provider, or None if unchanged.
|
||||
|
||||
Args:
|
||||
provider: Provider identifier (e.g. `'openai'`).
|
||||
diff: The computed `ProfileDiff` for the provider.
|
||||
|
||||
Returns:
|
||||
Markdown for the provider's changes, or `None` when there are none.
|
||||
"""
|
||||
if diff.is_empty:
|
||||
return None
|
||||
|
||||
lines = [f"### {provider}"]
|
||||
|
||||
if diff.added:
|
||||
lines.append(f"\n**➕ {len(diff.added)} added**") # noqa: RUF001
|
||||
rows = []
|
||||
for model_id in diff.added:
|
||||
descriptor = _describe_new_model(diff.added_profiles[model_id])
|
||||
suffix = f" — {descriptor}" if descriptor else ""
|
||||
rows.append(f"- `{model_id}`{suffix}")
|
||||
lines.extend(_truncate(rows))
|
||||
|
||||
if diff.removed:
|
||||
lines.append(f"\n**➖ {len(diff.removed)} removed**") # noqa: RUF001
|
||||
lines.extend(_truncate([f"- `{m}`" for m in diff.removed]))
|
||||
|
||||
if diff.changed:
|
||||
lines.append(f"\n**✏️ {len(diff.changed)} changed**")
|
||||
rows = []
|
||||
for model_id, fields in diff.changed.items():
|
||||
phrases = [
|
||||
_describe_field_change(name, change.old, change.new)
|
||||
for name, change in fields.items()
|
||||
]
|
||||
rows.append(f"- `{model_id}`: " + "; ".join(phrases))
|
||||
lines.extend(_truncate(rows))
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def build_summary(provider_diffs: dict[str, ProfileDiff]) -> str:
|
||||
"""Assemble the full Markdown summary across all providers.
|
||||
|
||||
Args:
|
||||
provider_diffs: Mapping of provider name to its `ProfileDiff`.
|
||||
|
||||
Returns:
|
||||
Markdown summary. When nothing changed, a short note is returned.
|
||||
"""
|
||||
sections = [
|
||||
section
|
||||
for provider in sorted(provider_diffs)
|
||||
if (section := render_provider_section(provider, provider_diffs[provider]))
|
||||
]
|
||||
if not sections:
|
||||
return "No model profile data changed."
|
||||
|
||||
total_added = sum(len(d.added) for d in provider_diffs.values())
|
||||
total_removed = sum(len(d.removed) for d in provider_diffs.values())
|
||||
total_changed = sum(len(d.changed) for d in provider_diffs.values())
|
||||
headline = (
|
||||
f"**{total_added} added · {total_removed} removed · "
|
||||
f"{total_changed} changed** across {len(sections)} provider(s)."
|
||||
)
|
||||
|
||||
return "\n\n".join(["## Summary of changes", headline, *sections])
|
||||
|
||||
|
||||
def _verify_ref(repo_root: Path, ref: str) -> None:
|
||||
"""Confirm `ref` resolves to a commit in `repo_root`.
|
||||
|
||||
Validating once up front lets `_git_show` treat a non-zero exit
|
||||
unambiguously as "path absent at this ref", rather than conflating a typo'd
|
||||
ref, an unfetched ref, or a non-repository root with a genuinely new file —
|
||||
which would otherwise render every existing model as newly added.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If git is unavailable, `repo_root` is not a repository, or
|
||||
`ref` cannot be resolved.
|
||||
"""
|
||||
try:
|
||||
result = subprocess.run( # noqa: S603
|
||||
[ # noqa: S607
|
||||
"git",
|
||||
"-C",
|
||||
str(repo_root),
|
||||
"rev-parse",
|
||||
"--verify",
|
||||
"--quiet",
|
||||
f"{ref}^{{commit}}",
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
)
|
||||
except OSError as e:
|
||||
msg = f"Could not run git (is it installed and on PATH?): {e}"
|
||||
raise RuntimeError(msg) from e
|
||||
if result.returncode != 0:
|
||||
msg = (
|
||||
f"Could not resolve base ref {ref!r} in {repo_root}; "
|
||||
"is it a valid git ref in this repository?"
|
||||
)
|
||||
raise RuntimeError(msg)
|
||||
|
||||
|
||||
def _git_show(repo_root: Path, ref: str, rel_path: str) -> str | None:
|
||||
"""Return file contents at `ref`, or None if the file does not exist there.
|
||||
|
||||
Assumes `ref` has already been validated by `_verify_ref`, so a non-zero
|
||||
exit here means the path is absent at `ref` rather than a bad ref.
|
||||
"""
|
||||
try:
|
||||
result = subprocess.run( # noqa: S603
|
||||
["git", "-C", str(repo_root), "show", f"{ref}:{rel_path}"], # noqa: S607
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
)
|
||||
except OSError:
|
||||
return None
|
||||
return result.stdout if result.returncode == 0 else None
|
||||
|
||||
|
||||
def summarize(
|
||||
providers: list[ProviderEntry],
|
||||
*,
|
||||
base_ref: str = "HEAD",
|
||||
repo_root: Path | None = None,
|
||||
) -> str:
|
||||
"""Build a Markdown summary of profile changes vs `base_ref`.
|
||||
|
||||
Args:
|
||||
providers: List of `{'provider': ..., 'data_dir': ...}` entries,
|
||||
matching the workflow input. `data_dir` is relative to the repo
|
||||
root and contains `_profiles.py`.
|
||||
base_ref: Git ref to compare the working tree against.
|
||||
repo_root: Repository root. Defaults to the current directory.
|
||||
|
||||
Returns:
|
||||
Markdown summary suitable for a PR body.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If `base_ref` cannot be resolved, or a profiles file
|
||||
exists but cannot be read or parsed.
|
||||
ValueError: If a `providers` entry is missing a required key.
|
||||
TypeError: If a `providers` entry's `provider`/`data_dir` is not a
|
||||
string.
|
||||
"""
|
||||
root = (repo_root or Path.cwd()).resolve()
|
||||
_verify_ref(root, base_ref)
|
||||
provider_diffs: dict[str, ProfileDiff] = {}
|
||||
|
||||
for entry in providers:
|
||||
# View the entry as an untrusted mapping: at the CLI boundary it is
|
||||
# arbitrary parsed JSON, not a guaranteed `ProviderEntry`.
|
||||
entry_map: Mapping[str, Any] = entry
|
||||
try:
|
||||
provider = entry_map["provider"]
|
||||
data_dir = entry_map["data_dir"]
|
||||
except (KeyError, TypeError) as e:
|
||||
msg = (
|
||||
f"Invalid provider entry {entry!r}: expected 'provider' and "
|
||||
f"'data_dir' keys ({e})"
|
||||
)
|
||||
raise ValueError(msg) from e
|
||||
if not isinstance(provider, str) or not isinstance(data_dir, str):
|
||||
msg = (
|
||||
f"Invalid provider entry {entry!r}: 'provider' and 'data_dir' "
|
||||
"must be strings"
|
||||
)
|
||||
raise TypeError(msg)
|
||||
rel_path = f"{data_dir.rstrip('/')}/_profiles.py"
|
||||
|
||||
old_source = _git_show(root, base_ref, rel_path) or ""
|
||||
new_path = root / rel_path
|
||||
if new_path.exists():
|
||||
try:
|
||||
new_source = new_path.read_text(encoding="utf-8")
|
||||
except (OSError, UnicodeDecodeError) as e:
|
||||
msg = f"Could not read {new_path}: {e}"
|
||||
raise RuntimeError(msg) from e
|
||||
else:
|
||||
new_source = ""
|
||||
|
||||
# A corrupt-but-readable file must surface as an error: extracting `{}`
|
||||
# from it would otherwise be diffed as every model added (old side) or
|
||||
# removed (new side), yielding a confident but wrong summary.
|
||||
try:
|
||||
old_profiles = extract_profiles(old_source)
|
||||
except ProfileParseError as e:
|
||||
msg = f"Profile data for {provider!r} at {base_ref!r} is unparseable: {e}"
|
||||
raise RuntimeError(msg) from e
|
||||
try:
|
||||
new_profiles = extract_profiles(new_source)
|
||||
except ProfileParseError as e:
|
||||
msg = f"Profile data for {provider!r} at {new_path} is unparseable: {e}"
|
||||
raise RuntimeError(msg) from e
|
||||
|
||||
provider_diffs[provider] = diff_profiles(old_profiles, new_profiles)
|
||||
|
||||
return build_summary(provider_diffs)
|
||||
@@ -399,10 +399,53 @@ def main() -> None:
|
||||
help="Data directory containing profile_augmentations.toml",
|
||||
)
|
||||
|
||||
# summarize command
|
||||
summarize_parser = subparsers.add_parser(
|
||||
"summarize",
|
||||
help="Summarize profile changes vs a git ref as Markdown (for PR bodies)",
|
||||
)
|
||||
summarize_parser.add_argument(
|
||||
"--providers",
|
||||
required=True,
|
||||
help=(
|
||||
"JSON array of objects with 'provider' and 'data_dir' keys "
|
||||
"(data_dir relative to the repo root)."
|
||||
),
|
||||
)
|
||||
summarize_parser.add_argument(
|
||||
"--base-ref",
|
||||
default="HEAD",
|
||||
help="Git ref to compare the working tree against (default: HEAD).",
|
||||
)
|
||||
summarize_parser.add_argument(
|
||||
"--repo-root",
|
||||
type=Path,
|
||||
default=None,
|
||||
help="Repository root (default: current directory).",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.command == "refresh":
|
||||
refresh(args.provider, args.data_dir)
|
||||
elif args.command == "summarize":
|
||||
from langchain_model_profiles._summary import summarize
|
||||
|
||||
try:
|
||||
providers = json.loads(args.providers)
|
||||
except json.JSONDecodeError as e:
|
||||
parser.error(f"--providers is not valid JSON: {e}")
|
||||
|
||||
if not isinstance(providers, list):
|
||||
parser.error("--providers must be a JSON array")
|
||||
|
||||
try:
|
||||
output = summarize(
|
||||
providers, base_ref=args.base_ref, repo_root=args.repo_root
|
||||
)
|
||||
except (RuntimeError, ValueError, TypeError) as e:
|
||||
parser.error(str(e))
|
||||
print(output)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
470
libs/model-profiles/tests/unit_tests/test_summary.py
Normal file
470
libs/model-profiles/tests/unit_tests/test_summary.py
Normal file
@@ -0,0 +1,470 @@
|
||||
"""Tests for the profile change summary generator."""
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import pytest
|
||||
|
||||
from langchain_model_profiles import cli
|
||||
from langchain_model_profiles._summary import (
|
||||
_MAX_ROWS,
|
||||
FieldChange,
|
||||
ProfileDiff,
|
||||
ProfileParseError,
|
||||
_describe_new_model,
|
||||
_format_value,
|
||||
_truncate,
|
||||
build_summary,
|
||||
diff_profiles,
|
||||
extract_profiles,
|
||||
render_provider_section,
|
||||
summarize,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from langchain_core.language_models.model_profile import (
|
||||
ModelProfile,
|
||||
ModelProfileRegistry,
|
||||
)
|
||||
|
||||
_OLD_SOURCE = '''"""Auto-generated."""
|
||||
|
||||
from typing import Any
|
||||
|
||||
_PROFILES: dict[str, dict[str, Any]] = {
|
||||
"gpt-4": {
|
||||
"name": "GPT-4",
|
||||
"max_input_tokens": 8192,
|
||||
"max_output_tokens": 4096,
|
||||
"image_inputs": False,
|
||||
"tool_calling": True,
|
||||
},
|
||||
"old-model": {
|
||||
"name": "Old",
|
||||
"max_input_tokens": 1000,
|
||||
},
|
||||
}
|
||||
'''
|
||||
|
||||
_NEW_SOURCE = '''"""Auto-generated."""
|
||||
|
||||
from typing import Any
|
||||
|
||||
_PROFILES: dict[str, dict[str, Any]] = {
|
||||
"gpt-4": {
|
||||
"name": "GPT-4",
|
||||
"max_input_tokens": 8192,
|
||||
"max_output_tokens": 16384,
|
||||
"image_inputs": True,
|
||||
"tool_calling": True,
|
||||
},
|
||||
"gpt-5": {
|
||||
"name": "GPT-5",
|
||||
"max_input_tokens": 400000,
|
||||
"max_output_tokens": 128000,
|
||||
"image_inputs": True,
|
||||
"reasoning_output": True,
|
||||
"tool_calling": True,
|
||||
},
|
||||
}
|
||||
'''
|
||||
|
||||
|
||||
def _git(repo: Path, *args: str) -> None:
|
||||
"""Run a git command inside `repo` (test helper)."""
|
||||
subprocess.run( # noqa: S603
|
||||
["git", "-C", str(repo), *args], # noqa: S607
|
||||
check=True,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
|
||||
def _init_repo(repo: Path) -> None:
|
||||
"""Initialize a git repo with a deterministic identity."""
|
||||
_git(repo, "init", "-q")
|
||||
_git(repo, "config", "user.email", "t@example.com")
|
||||
_git(repo, "config", "user.name", "Test")
|
||||
|
||||
|
||||
def _write_profiles(path: Path, source: str) -> None:
|
||||
"""Write a `_profiles.py` file, creating parent directories."""
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(source)
|
||||
|
||||
|
||||
def test_extract_profiles() -> None:
|
||||
"""`_PROFILES` literal is extracted via ast without executing the module."""
|
||||
profiles = extract_profiles(_OLD_SOURCE)
|
||||
assert set(profiles) == {"gpt-4", "old-model"}
|
||||
assert profiles["gpt-4"]["max_input_tokens"] == 8192
|
||||
|
||||
|
||||
def test_extract_profiles_handles_missing_or_invalid() -> None:
|
||||
"""Absent `_PROFILES` yields `{}`; present-but-unparseable sources raise."""
|
||||
# No `_PROFILES` assignment, and an empty file, are both legitimately empty.
|
||||
assert extract_profiles("x = 1") == {}
|
||||
assert extract_profiles("") == {}
|
||||
# A syntactically broken file is corrupt, not empty.
|
||||
with pytest.raises(ProfileParseError):
|
||||
extract_profiles("def (:")
|
||||
# A non-literal or non-dict `_PROFILES` is corrupt too.
|
||||
with pytest.raises(ProfileParseError):
|
||||
extract_profiles("_PROFILES = some_function()")
|
||||
with pytest.raises(ProfileParseError):
|
||||
extract_profiles("_PROFILES = [1, 2, 3]")
|
||||
|
||||
|
||||
def test_diff_profiles() -> None:
|
||||
"""Diff reports added, removed, and per-field changes."""
|
||||
diff = diff_profiles(extract_profiles(_OLD_SOURCE), extract_profiles(_NEW_SOURCE))
|
||||
assert diff.added == ["gpt-5"]
|
||||
assert diff.removed == ["old-model"]
|
||||
assert set(diff.changed) == {"gpt-4"}
|
||||
assert diff.changed["gpt-4"]["max_output_tokens"] == (4096, 16384)
|
||||
assert diff.changed["gpt-4"]["image_inputs"] == (False, True)
|
||||
assert diff.added_profiles["gpt-5"]["max_input_tokens"] == 400000
|
||||
|
||||
|
||||
def test_diff_profiles_no_changes() -> None:
|
||||
"""Identical inputs produce an empty diff."""
|
||||
profiles = extract_profiles(_OLD_SOURCE)
|
||||
diff = diff_profiles(profiles, profiles)
|
||||
assert diff.is_empty
|
||||
|
||||
|
||||
def test_render_provider_section_content() -> None:
|
||||
"""Rendered section describes additions, removals, and field changes."""
|
||||
diff = diff_profiles(extract_profiles(_OLD_SOURCE), extract_profiles(_NEW_SOURCE))
|
||||
section = render_provider_section("openai", diff)
|
||||
assert section is not None
|
||||
assert "### openai" in section
|
||||
assert "1 added" in section
|
||||
assert "`gpt-5`" in section
|
||||
assert "400,000 ctx" in section
|
||||
assert "reasoning" in section
|
||||
assert "1 removed" in section
|
||||
assert "`old-model`" in section
|
||||
assert "1 changed" in section
|
||||
assert "max output tokens 4,096 → 16,384" in section
|
||||
assert "added image input" in section
|
||||
|
||||
|
||||
def test_render_provider_section_empty() -> None:
|
||||
"""An empty diff renders no section."""
|
||||
assert render_provider_section("openai", ProfileDiff()) is None
|
||||
|
||||
|
||||
def test_build_summary_headline() -> None:
|
||||
"""The summary leads with a header and an aggregate headline."""
|
||||
diff = diff_profiles(extract_profiles(_OLD_SOURCE), extract_profiles(_NEW_SOURCE))
|
||||
summary = build_summary({"openai": diff})
|
||||
assert summary.startswith("## Summary of changes")
|
||||
assert "1 added" in summary
|
||||
assert "1 removed" in summary
|
||||
assert "1 changed" in summary
|
||||
|
||||
|
||||
def test_build_summary_no_changes() -> None:
|
||||
"""An all-empty diff produces a short no-change note."""
|
||||
assert build_summary({"openai": ProfileDiff()}) == "No model profile data changed."
|
||||
|
||||
|
||||
def test_truncation() -> None:
|
||||
"""Long lists are truncated with a trailing count of hidden rows."""
|
||||
new: ModelProfileRegistry = {f"model-{i}": {"name": f"m{i}"} for i in range(40)}
|
||||
diff = diff_profiles({}, new)
|
||||
section = render_provider_section("openai", diff)
|
||||
assert section is not None
|
||||
assert "…and 15 more" in section
|
||||
|
||||
|
||||
def test_summarize_against_git(tmp_path: Path) -> None:
|
||||
"""Summarize compares the working tree against a committed baseline."""
|
||||
repo = tmp_path
|
||||
_init_repo(repo)
|
||||
|
||||
data_dir = "libs/partners/openai/data"
|
||||
profiles_path = repo / data_dir / "_profiles.py"
|
||||
_write_profiles(profiles_path, _OLD_SOURCE)
|
||||
_git(repo, "add", "-A")
|
||||
_git(repo, "commit", "-q", "-m", "init")
|
||||
|
||||
# Simulate a refresh by overwriting the working-tree file.
|
||||
profiles_path.write_text(_NEW_SOURCE)
|
||||
|
||||
summary = summarize([{"provider": "openai", "data_dir": data_dir}], repo_root=repo)
|
||||
assert "## Summary of changes" in summary
|
||||
assert "`gpt-5`" in summary
|
||||
assert "`old-model`" in summary
|
||||
# The changed-field path is exercised end-to-end, not just at the unit layer.
|
||||
assert "max output tokens 4,096 → 16,384" in summary
|
||||
|
||||
|
||||
def test_summarize_new_provider_file(tmp_path: Path) -> None:
|
||||
"""A brand-new profiles file is treated as all-added."""
|
||||
repo = tmp_path
|
||||
_init_repo(repo)
|
||||
(repo / "README.md").write_text("x")
|
||||
_git(repo, "add", "-A")
|
||||
_git(repo, "commit", "-q", "-m", "init")
|
||||
|
||||
data_dir = "libs/partners/new/data"
|
||||
_write_profiles(repo / data_dir / "_profiles.py", _NEW_SOURCE)
|
||||
|
||||
summary = summarize([{"provider": "new", "data_dir": data_dir}], repo_root=repo)
|
||||
assert "2 added" in summary
|
||||
|
||||
|
||||
def test_field_change_is_tuple() -> None:
|
||||
"""`FieldChange` unpacks and compares like a plain (old, new) tuple."""
|
||||
change = FieldChange(1, 2)
|
||||
# Access the named fields before the tuple comparison: `== (1, 2)` would
|
||||
# otherwise narrow `change` to a plain `tuple` for the rest of the scope.
|
||||
assert change.old == 1
|
||||
assert change.new == 2
|
||||
assert change == (1, 2)
|
||||
|
||||
|
||||
def test_format_value_variants() -> None:
|
||||
"""Each `_format_value` branch renders the expected string."""
|
||||
assert _format_value("x", None) == "unset"
|
||||
assert _format_value("tool_calling", True) == "yes" # noqa: FBT003
|
||||
assert _format_value("tool_calling", False) == "no" # noqa: FBT003
|
||||
assert _format_value("max_input_tokens", 200000) == "200,000"
|
||||
# Plain int outside the token fields is rendered without separators.
|
||||
assert _format_value("foo", 42) == "42"
|
||||
# Floats fall through to str().
|
||||
assert _format_value("temperature", 1.5) == "1.5"
|
||||
assert _format_value("name", "GPT") == "`GPT`"
|
||||
|
||||
|
||||
def test_render_non_bool_field_change() -> None:
|
||||
"""Non-boolean field changes render an `old → new` phrase."""
|
||||
old: ModelProfileRegistry = {"m": {"status": "active", "name": "M"}}
|
||||
new: ModelProfileRegistry = {"m": {"status": "deprecated", "name": "M2"}}
|
||||
section = render_provider_section("openai", diff_profiles(old, new))
|
||||
assert section is not None
|
||||
assert "status `active` → `deprecated`" in section
|
||||
assert "display name `M` → `M2`" in section
|
||||
|
||||
|
||||
def test_render_removed_bool_field_change() -> None:
|
||||
"""A boolean field flipped off renders a `removed <label>` phrase."""
|
||||
old: ModelProfileRegistry = {"m": {"image_inputs": True}}
|
||||
new: ModelProfileRegistry = {"m": {"image_inputs": False}}
|
||||
section = render_provider_section("openai", diff_profiles(old, new))
|
||||
assert section is not None
|
||||
assert "removed image input" in section
|
||||
|
||||
|
||||
def test_describe_new_model_modalities() -> None:
|
||||
"""A new model descriptor lists context, output, modalities, and tools."""
|
||||
profile: ModelProfile = {
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 64000,
|
||||
"image_inputs": True,
|
||||
"audio_inputs": True,
|
||||
"video_inputs": True,
|
||||
"pdf_inputs": True,
|
||||
"tool_calling": True,
|
||||
}
|
||||
descriptor = _describe_new_model(profile)
|
||||
assert "200,000 ctx" in descriptor
|
||||
assert "64,000 out" in descriptor
|
||||
assert "text+image+audio+video+pdf in" in descriptor
|
||||
assert "tools" in descriptor
|
||||
|
||||
|
||||
def test_describe_new_model_empty() -> None:
|
||||
"""A profile with no notable fields yields an empty descriptor."""
|
||||
assert _describe_new_model({"name": "x"}) == ""
|
||||
|
||||
|
||||
def test_render_added_model_without_descriptor() -> None:
|
||||
"""An added model with no descriptor renders no ` — ` suffix."""
|
||||
section = render_provider_section("p", diff_profiles({}, {"bare": {"name": "B"}}))
|
||||
assert section is not None
|
||||
assert "- `bare`" in section
|
||||
assert "- `bare` —" not in section
|
||||
|
||||
|
||||
def test_truncate_boundary() -> None:
|
||||
"""`_truncate` keeps exactly `_MAX_ROWS` rows but caps one more."""
|
||||
exactly = [f"- r{i}" for i in range(_MAX_ROWS)]
|
||||
assert _truncate(exactly) == exactly
|
||||
|
||||
over = [f"- r{i}" for i in range(_MAX_ROWS + 1)]
|
||||
result = _truncate(over)
|
||||
assert len(result) == _MAX_ROWS + 1
|
||||
assert result[-1] == "- …and 1 more"
|
||||
|
||||
|
||||
def test_build_summary_multi_provider_sorted() -> None:
|
||||
"""Providers are rendered in sorted order regardless of input order."""
|
||||
diff_a = diff_profiles({}, {"a": {"name": "A"}})
|
||||
diff_z = diff_profiles({}, {"z": {"name": "Z"}})
|
||||
summary = build_summary({"zzz": diff_z, "aaa": diff_a})
|
||||
assert summary.index("### aaa") < summary.index("### zzz")
|
||||
|
||||
|
||||
def test_summarize_removed_when_file_deleted(tmp_path: Path) -> None:
|
||||
"""Deleting the working-tree file reports every model as removed."""
|
||||
repo = tmp_path
|
||||
_init_repo(repo)
|
||||
data_dir = "libs/partners/openai/data"
|
||||
profiles_path = repo / data_dir / "_profiles.py"
|
||||
_write_profiles(profiles_path, _OLD_SOURCE)
|
||||
_git(repo, "add", "-A")
|
||||
_git(repo, "commit", "-q", "-m", "init")
|
||||
|
||||
profiles_path.unlink()
|
||||
|
||||
summary = summarize([{"provider": "openai", "data_dir": data_dir}], repo_root=repo)
|
||||
assert "2 removed" in summary
|
||||
|
||||
|
||||
def test_summarize_bad_base_ref(tmp_path: Path) -> None:
|
||||
"""An unresolvable base ref raises rather than fabricating an all-added diff."""
|
||||
repo = tmp_path
|
||||
_init_repo(repo)
|
||||
(repo / "README.md").write_text("x")
|
||||
_git(repo, "add", "-A")
|
||||
_git(repo, "commit", "-q", "-m", "init")
|
||||
|
||||
data_dir = "libs/partners/openai/data"
|
||||
_write_profiles(repo / data_dir / "_profiles.py", _NEW_SOURCE)
|
||||
|
||||
with pytest.raises(RuntimeError, match="Could not resolve base ref"):
|
||||
summarize(
|
||||
[{"provider": "openai", "data_dir": data_dir}],
|
||||
base_ref="no-such-ref",
|
||||
repo_root=repo,
|
||||
)
|
||||
|
||||
|
||||
def test_summarize_malformed_entry(tmp_path: Path) -> None:
|
||||
"""A provider entry missing a required key raises a clear error."""
|
||||
repo = tmp_path
|
||||
_init_repo(repo)
|
||||
(repo / "README.md").write_text("x")
|
||||
_git(repo, "add", "-A")
|
||||
_git(repo, "commit", "-q", "-m", "init")
|
||||
|
||||
with pytest.raises(ValueError, match="Invalid provider entry"):
|
||||
summarize([{"provider": "openai"}], repo_root=repo) # type: ignore[typeddict-item]
|
||||
|
||||
|
||||
def test_summarize_non_string_entry(tmp_path: Path) -> None:
|
||||
"""Non-string `provider`/`data_dir` raises `TypeError`, not `AttributeError`.
|
||||
|
||||
A non-string value would otherwise reach `data_dir.rstrip(...)` and raise an
|
||||
`AttributeError` that escapes the CLI's `except (RuntimeError, ValueError,
|
||||
TypeError)`, surfacing a raw traceback instead of a clean error.
|
||||
"""
|
||||
repo = tmp_path
|
||||
_init_repo(repo)
|
||||
(repo / "README.md").write_text("x")
|
||||
_git(repo, "add", "-A")
|
||||
_git(repo, "commit", "-q", "-m", "init")
|
||||
|
||||
with pytest.raises(TypeError, match="must be strings"):
|
||||
summarize(
|
||||
[{"provider": 5, "data_dir": 7}], # type: ignore[typeddict-item]
|
||||
repo_root=repo,
|
||||
)
|
||||
|
||||
|
||||
def test_summarize_corrupt_working_tree_file(tmp_path: Path) -> None:
|
||||
"""A present-but-unparseable working-tree file raises, not a mass removal.
|
||||
|
||||
Mirrors the `_verify_ref` guard on the base-ref side: a corrupt new file
|
||||
must surface as an error rather than be diffed as every model removed.
|
||||
"""
|
||||
repo = tmp_path
|
||||
_init_repo(repo)
|
||||
data_dir = "libs/partners/openai/data"
|
||||
profiles_path = repo / data_dir / "_profiles.py"
|
||||
_write_profiles(profiles_path, _OLD_SOURCE)
|
||||
_git(repo, "add", "-A")
|
||||
_git(repo, "commit", "-q", "-m", "init")
|
||||
|
||||
# Simulate a refresh that left the file truncated / syntactically broken.
|
||||
profiles_path.write_text("_PROFILES = {")
|
||||
|
||||
with pytest.raises(RuntimeError, match="unparseable"):
|
||||
summarize([{"provider": "openai", "data_dir": data_dir}], repo_root=repo)
|
||||
|
||||
|
||||
def test_summarize_corrupt_base_ref_file(tmp_path: Path) -> None:
|
||||
"""An unparseable file at the base ref raises, not an all-added diff."""
|
||||
repo = tmp_path
|
||||
_init_repo(repo)
|
||||
data_dir = "libs/partners/openai/data"
|
||||
profiles_path = repo / data_dir / "_profiles.py"
|
||||
_write_profiles(profiles_path, "_PROFILES = {") # committed broken
|
||||
_git(repo, "add", "-A")
|
||||
_git(repo, "commit", "-q", "-m", "init")
|
||||
|
||||
profiles_path.write_text(_NEW_SOURCE) # working tree now valid
|
||||
|
||||
with pytest.raises(RuntimeError, match="unparseable"):
|
||||
summarize([{"provider": "openai", "data_dir": data_dir}], repo_root=repo)
|
||||
|
||||
|
||||
def test_cli_summarize_success(
|
||||
tmp_path: Path,
|
||||
capsys: pytest.CaptureFixture[str],
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""A valid `--providers` array prints the Markdown summary to stdout."""
|
||||
repo = tmp_path
|
||||
_init_repo(repo)
|
||||
data_dir = "libs/partners/openai/data"
|
||||
profiles_path = repo / data_dir / "_profiles.py"
|
||||
_write_profiles(profiles_path, _OLD_SOURCE)
|
||||
_git(repo, "add", "-A")
|
||||
_git(repo, "commit", "-q", "-m", "init")
|
||||
profiles_path.write_text(_NEW_SOURCE)
|
||||
|
||||
providers = json.dumps([{"provider": "openai", "data_dir": data_dir}])
|
||||
monkeypatch.setattr(
|
||||
sys,
|
||||
"argv",
|
||||
[
|
||||
"langchain-profiles",
|
||||
"summarize",
|
||||
"--providers",
|
||||
providers,
|
||||
"--repo-root",
|
||||
str(repo),
|
||||
],
|
||||
)
|
||||
|
||||
cli.main()
|
||||
|
||||
out = capsys.readouterr().out
|
||||
assert "## Summary of changes" in out
|
||||
assert "`gpt-5`" in out
|
||||
assert "max output tokens 4,096 → 16,384" in out
|
||||
|
||||
|
||||
def test_cli_summarize_invalid_json(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""The CLI rejects a `--providers` value that is not valid JSON."""
|
||||
monkeypatch.setattr(
|
||||
sys, "argv", ["langchain-profiles", "summarize", "--providers", "not json"]
|
||||
)
|
||||
with pytest.raises(SystemExit):
|
||||
cli.main()
|
||||
|
||||
|
||||
def test_cli_summarize_non_list(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""The CLI rejects a `--providers` value that is not a JSON array."""
|
||||
monkeypatch.setattr(
|
||||
sys, "argv", ["langchain-profiles", "summarize", "--providers", '{"a": 1}']
|
||||
)
|
||||
with pytest.raises(SystemExit):
|
||||
cli.main()
|
||||
4
libs/model-profiles/uv.lock
generated
4
libs/model-profiles/uv.lock
generated
@@ -504,7 +504,7 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "langchain"
|
||||
version = "1.3.10"
|
||||
version = "1.3.11"
|
||||
source = { editable = "../langchain_v1" }
|
||||
dependencies = [
|
||||
{ name = "langchain-core" },
|
||||
@@ -703,7 +703,7 @@ typing = [
|
||||
|
||||
[[package]]
|
||||
name = "langchain-openai"
|
||||
version = "1.3.2"
|
||||
version = "1.3.3"
|
||||
source = { editable = "../partners/openai" }
|
||||
dependencies = [
|
||||
{ name = "langchain-core" },
|
||||
|
||||
Reference in New Issue
Block a user