Files
langchain/.github/workflows/_refresh_model_profiles.yml
Mason Daugherty f7e87f7ab8 feat(model-profiles): plain-English summary for profile refresh PRs (#38218)
Automated model-profile refresh PRs (e.g. #38210) ship a static template
body, so a reviewer has to open *Files changed* and read large blocks of
generated data to learn what actually moved. Because the underlying
profile data is fully structured, we can describe the changes
deterministically — no LLM, no hallucination risk.

This adds a `langchain-profiles summarize` subcommand that compares the
working-tree `_profiles.py` files against a git ref and renders a
skimmable Markdown summary: models added (with a short capability
descriptor), models removed, and per-field capability changes
(context/output tokens, modalities, tool calling, reasoning, etc.),
grouped by provider and capped so huge refreshes stay readable. Profiles
are read with `ast.literal_eval` rather than imported, so the generated
data file is never executed.

Example output for a refresh that adds a model and bumps an output
limit:

```
## Summary of changes

**1 added · 0 removed · 1 changed** across 1 provider(s).

### openai

** 1 added**
- `gpt-6-preview` — 1,000,000 ctx, 128,000 out, text+image+audio in, reasoning, tools

**✏️ 1 changed**
- `gpt-3.5-turbo`: max output tokens 4,096 → 16,384
```

Made by [Open
SWE](https://openswe.vercel.app/agents/9bcbf182-effc-ba9b-0df3-afac620ad152)

---------

Co-authored-by: open-swe[bot] <open-swe@users.noreply.github.com>
2026-06-22 22:15:29 -04:00

229 lines
8.9 KiB
YAML

# Reusable workflow: refreshes model profile data for any repo that uses the
# `langchain-profiles` CLI. Creates (or updates) a pull request with the
# resulting changes.
#
# Callers MUST set `permissions: { contents: write, pull-requests: write }` —
# reusable workflows cannot escalate the caller's token permissions.
#
# ── Example: external repo (langchain-google) ──────────────────────────
#
# jobs:
# refresh-profiles:
# uses: langchain-ai/langchain/.github/workflows/_refresh_model_profiles.yml@master
# with:
# providers: >-
# [
# {"provider":"google", "data_dir":"libs/genai/langchain_google_genai/data"},
# ]
# secrets:
# MODEL_PROFILE_BOT_CLIENT_ID: ${{ secrets.MODEL_PROFILE_BOT_CLIENT_ID }}
# MODEL_PROFILE_BOT_PRIVATE_KEY: ${{ secrets.MODEL_PROFILE_BOT_PRIVATE_KEY }}
name: "Refresh Model Profiles (reusable)"
on:
workflow_call:
inputs:
providers:
description: >-
JSON array of objects, each with `provider` (models.dev provider ID)
and `data_dir` (path relative to repo root where `_profiles.py` and
`profile_augmentations.toml` live).
required: true
type: string
cli-path:
description: >-
Path (relative to workspace) to an existing `libs/model-profiles`
checkout. When set the workflow skips cloning the langchain repo and
uses this directory for the CLI instead. Useful when the caller IS
the langchain monorepo.
required: false
type: string
default: ""
cli-ref:
description: >-
Git ref of langchain-ai/langchain to checkout for the CLI.
Ignored when `cli-path` is set.
required: false
type: string
default: master
add-paths:
description: "Glob for files to stage in the PR commit."
required: false
type: string
default: "**/_profiles.py"
pr-branch:
description: "Branch name for the auto-created PR."
required: false
type: string
default: bot/refresh-model-profiles
pr-title:
description: "PR / commit title."
required: false
type: string
default: "chore(model-profiles): refresh model profile data"
pr-body:
description: "PR body."
required: false
type: string
default: |
Automated refresh of model profile data via `langchain-profiles refresh`.
🤖 Generated by the `refresh_model_profiles` workflow.
pr-labels:
description: "Comma-separated labels to apply to the PR."
required: false
type: string
default: bot
secrets:
MODEL_PROFILE_BOT_CLIENT_ID:
required: true
MODEL_PROFILE_BOT_PRIVATE_KEY:
required: true
permissions:
contents: write
pull-requests: write
jobs:
refresh-profiles:
name: refresh model profiles
runs-on: ubuntu-latest
steps:
- name: "📋 Checkout"
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- name: "📋 Checkout langchain-profiles CLI"
if: inputs.cli-path == ''
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
with:
repository: langchain-ai/langchain
ref: ${{ inputs.cli-ref }}
sparse-checkout: libs/model-profiles
path: _langchain-cli
- name: "🔧 Resolve CLI directory"
id: cli
env:
CLI_PATH: ${{ inputs.cli-path }}
run: |
if [ -n "${CLI_PATH}" ]; then
resolved="${GITHUB_WORKSPACE}/${CLI_PATH}"
if [ ! -d "${resolved}" ]; then
echo "::error::cli-path '${CLI_PATH}' does not exist at ${resolved}"
exit 1
fi
echo "dir=${CLI_PATH}" >> "$GITHUB_OUTPUT"
else
echo "dir=_langchain-cli/libs/model-profiles" >> "$GITHUB_OUTPUT"
fi
- name: "🐍 Set up Python + uv"
uses: astral-sh/setup-uv@0ca8f610542aa7f4acaf39e65cf4eb3c35091883 # v7
with:
version: "0.5.25"
python-version: "3.12"
enable-cache: true
cache-dependency-glob: "**/model-profiles/uv.lock"
- name: "📦 Install langchain-profiles CLI"
working-directory: ${{ steps.cli.outputs.dir }}
run: uv sync --frozen --no-group test --no-group dev --no-group lint
- name: "✅ Validate providers input"
env:
PROVIDERS_JSON: ${{ inputs.providers }}
run: |
echo "${PROVIDERS_JSON}" | jq -e 'type == "array" and length > 0' > /dev/null || {
echo "::error::providers input must be a non-empty JSON array"
exit 1
}
echo "${PROVIDERS_JSON}" | jq -e 'all(has("provider") and has("data_dir"))' > /dev/null || {
echo "::error::every entry in providers must have 'provider' and 'data_dir' keys"
exit 1
}
- name: "🔄 Refresh profiles"
env:
PROVIDERS_JSON: ${{ inputs.providers }}
run: |
cli_dir="${GITHUB_WORKSPACE}/${{ steps.cli.outputs.dir }}"
failed=""
mapfile -t rows < <(echo "${PROVIDERS_JSON}" | jq -c '.[]')
for row in "${rows[@]}"; do
provider=$(echo "${row}" | jq -r '.provider')
data_dir=$(echo "${row}" | jq -r '.data_dir')
echo "--- Refreshing ${provider} -> ${data_dir} ---"
if ! echo y | uv run --frozen --project "${cli_dir}" \
langchain-profiles refresh \
--provider "${provider}" \
--data-dir "${GITHUB_WORKSPACE}/${data_dir}"; then
echo "::error::Failed to refresh provider: ${provider}"
failed="${failed} ${provider}"
fi
done
if [ -n "${failed}" ]; then
echo "::error::The following providers failed:${failed}"
exit 1
fi
- name: "📝 Build PR body with change summary"
id: pr-body
env:
PROVIDERS_JSON: ${{ inputs.providers }}
PR_BODY: ${{ inputs.pr-body }}
run: |
# The refresh step modified the working tree without committing, so
# comparing against HEAD yields exactly the refresh's changes.
cli_dir="${GITHUB_WORKSPACE}/${{ steps.cli.outputs.dir }}"
body_file="${RUNNER_TEMP}/pr_body.md"
printf '%s\n\n' "${PR_BODY}" > "${body_file}"
# `summarize` builds the whole summary in memory and prints it once,
# so a failure exits non-zero before any stdout reaches the append —
# the body keeps only the static note, never a half-written summary.
if ! uv run --frozen --project "${cli_dir}" \
langchain-profiles summarize \
--providers "${PROVIDERS_JSON}" \
--base-ref HEAD \
--repo-root "${GITHUB_WORKSPACE}" >> "${body_file}"; then
echo "::warning::Could not generate change summary; see job log."
# Surface the degradation in the PR body too: the warning above only
# lands in the Actions log, which a PR reviewer won't see.
printf '\n> [!NOTE]\n> Automated change summary unavailable — see the workflow run log.\n' >> "${body_file}"
fi
echo "path=${body_file}" >> "$GITHUB_OUTPUT"
- name: "🔑 Generate GitHub App token"
id: app-token
uses: actions/create-github-app-token@bcd2ba49218906704ab6c1aa796996da409d3eb1 # v3
with:
client-id: ${{ secrets.MODEL_PROFILE_BOT_CLIENT_ID }}
private-key: ${{ secrets.MODEL_PROFILE_BOT_PRIVATE_KEY }}
- name: "🔀 Create pull request"
id: create-pr
uses: peter-evans/create-pull-request@5f6978faf089d4d20b00c7766989d076bb2fc7f1 # v8
with:
token: ${{ steps.app-token.outputs.token }}
branch: ${{ inputs.pr-branch }}
commit-message: ${{ inputs.pr-title }}
title: ${{ inputs.pr-title }}
body-path: ${{ steps.pr-body.outputs.path }}
labels: ${{ inputs.pr-labels }}
add-paths: ${{ inputs.add-paths }}
- name: "📝 Summary"
if: always()
env:
PR_OP: ${{ steps.create-pr.outputs.pull-request-operation }}
PR_URL: ${{ steps.create-pr.outputs.pull-request-url }}
JOB_STATUS: ${{ job.status }}
run: |
if [ "${PR_OP}" = "created" ] || [ "${PR_OP}" = "updated" ]; then
echo "### ✅ PR ${PR_OP}: ${PR_URL}" >> "$GITHUB_STEP_SUMMARY"
elif [ -z "${PR_OP}" ] && [ "${JOB_STATUS}" = "success" ]; then
echo "### ⏭️ Skipped: profiles already up to date" >> "$GITHUB_STEP_SUMMARY"
elif [ "${JOB_STATUS}" = "failure" ]; then
echo "### ❌ Job failed — check step logs for details" >> "$GITHUB_STEP_SUMMARY"
fi