langchain/.github/workflows/_refresh_model_profiles.yml

# Reusable workflow: refreshes model profile data for any repo that uses the
# `langchain-profiles` CLI. Creates (or updates) a pull request with the
# resulting changes.
#
# Callers MUST set `permissions: { contents: write, pull-requests: write }` —
# reusable workflows cannot escalate the caller's token permissions.
#
# ── Example: external repo (langchain-google) ──────────────────────────
#
#   jobs:
#     refresh-profiles:
#       uses: langchain-ai/langchain/.github/workflows/_refresh_model_profiles.yml@master
#       with:
#         providers: >-
#           [
#             {"provider":"google",        "data_dir":"libs/genai/langchain_google_genai/data"},
#           ]
#       secrets:
#         MODEL_PROFILE_BOT_CLIENT_ID:      ${{ secrets.MODEL_PROFILE_BOT_CLIENT_ID }}
#         MODEL_PROFILE_BOT_PRIVATE_KEY: ${{ secrets.MODEL_PROFILE_BOT_PRIVATE_KEY }}

name: "Refresh Model Profiles (reusable)"

on:
  workflow_call:
    inputs:
      providers:
        description: >-
          JSON array of objects, each with `provider` (models.dev provider ID)
          and `data_dir` (path relative to repo root where `_profiles.py` and
          `profile_augmentations.toml` live).
        required: true
        type: string
      cli-path:
        description: >-
          Path (relative to workspace) to an existing `libs/model-profiles`
          checkout.  When set the workflow skips cloning the langchain repo and
          uses this directory for the CLI instead.  Useful when the caller IS
          the langchain monorepo.
        required: false
        type: string
        default: ""
      cli-ref:
        description: >-
          Git ref of langchain-ai/langchain to checkout for the CLI.
          Ignored when `cli-path` is set.
        required: false
        type: string
        default: master
      add-paths:
        description: "Glob for files to stage in the PR commit."
        required: false
        type: string
        default: "**/_profiles.py"
      pr-branch:
        description: "Branch name for the auto-created PR."
        required: false
        type: string
        default: bot/refresh-model-profiles
      pr-title:
        description: "PR / commit title."
        required: false
        type: string
        default: "chore(model-profiles): refresh model profile data"
      pr-body:
        description: "PR body."
        required: false
        type: string
        default: |
          Automated refresh of model profile data via `langchain-profiles refresh`.

          🤖 Generated by the `refresh_model_profiles` workflow.
      pr-labels:
        description: "Comma-separated labels to apply to the PR."
        required: false
        type: string
        default: bot
    secrets:
      MODEL_PROFILE_BOT_CLIENT_ID:
        required: true
      MODEL_PROFILE_BOT_PRIVATE_KEY:
        required: true

permissions:
  contents: write
  pull-requests: write

jobs:
  refresh-profiles:
    name: refresh model profiles
    runs-on: ubuntu-latest
    steps:
      - name: "📋 Checkout"
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6

      - name: "📋 Checkout langchain-profiles CLI"
        if: inputs.cli-path == ''
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
        with:
          repository: langchain-ai/langchain
          ref: ${{ inputs.cli-ref }}
          sparse-checkout: libs/model-profiles
          path: _langchain-cli

      - name: "🔧 Resolve CLI directory"
        id: cli
        env:
          CLI_PATH: ${{ inputs.cli-path }}
        run: |
          if [ -n "${CLI_PATH}" ]; then
            resolved="${GITHUB_WORKSPACE}/${CLI_PATH}"
            if [ ! -d "${resolved}" ]; then
              echo "::error::cli-path '${CLI_PATH}' does not exist at ${resolved}"
              exit 1
            fi
            echo "dir=${CLI_PATH}" >> "$GITHUB_OUTPUT"
          else
            echo "dir=_langchain-cli/libs/model-profiles" >> "$GITHUB_OUTPUT"
          fi

      - name: "🐍 Set up Python + uv"
        uses: astral-sh/setup-uv@0ca8f610542aa7f4acaf39e65cf4eb3c35091883 # v7
        with:
          version: "0.5.25"
          python-version: "3.12"
          enable-cache: true
          cache-dependency-glob: "**/model-profiles/uv.lock"

      - name: "📦 Install langchain-profiles CLI"
        working-directory: ${{ steps.cli.outputs.dir }}
        run: uv sync --frozen --no-group test --no-group dev --no-group lint

      - name: "✅ Validate providers input"
        env:
          PROVIDERS_JSON: ${{ inputs.providers }}
        run: |
          echo "${PROVIDERS_JSON}" | jq -e 'type == "array" and length > 0' > /dev/null || {
            echo "::error::providers input must be a non-empty JSON array"
            exit 1
          }
          echo "${PROVIDERS_JSON}" | jq -e 'all(has("provider") and has("data_dir"))' > /dev/null || {
            echo "::error::every entry in providers must have 'provider' and 'data_dir' keys"
            exit 1
          }

      - name: "🔄 Refresh profiles"
        env:
          PROVIDERS_JSON: ${{ inputs.providers }}
        run: |
          cli_dir="${GITHUB_WORKSPACE}/${{ steps.cli.outputs.dir }}"
          failed=""
          mapfile -t rows < <(echo "${PROVIDERS_JSON}" | jq -c '.[]')
          for row in "${rows[@]}"; do
            provider=$(echo "${row}" | jq -r '.provider')
            data_dir=$(echo "${row}" | jq -r '.data_dir')
            echo "--- Refreshing ${provider} -> ${data_dir} ---"
            if ! echo y | uv run --frozen --project "${cli_dir}" \
              langchain-profiles refresh \
              --provider "${provider}" \
              --data-dir "${GITHUB_WORKSPACE}/${data_dir}"; then
              echo "::error::Failed to refresh provider: ${provider}"
              failed="${failed} ${provider}"
            fi
          done
          if [ -n "${failed}" ]; then
            echo "::error::The following providers failed:${failed}"
            exit 1
          fi

      - name: "📝 Build PR body with change summary"
        id: pr-body
        env:
          PROVIDERS_JSON: ${{ inputs.providers }}
          PR_BODY: ${{ inputs.pr-body }}
        run: |
          # The refresh step modified the working tree without committing, so
          # comparing against HEAD yields exactly the refresh's changes.
          cli_dir="${GITHUB_WORKSPACE}/${{ steps.cli.outputs.dir }}"
          body_file="${RUNNER_TEMP}/pr_body.md"
          printf '%s\n\n' "${PR_BODY}" > "${body_file}"
          # `summarize` builds the whole summary in memory and prints it once,
          # so a failure exits non-zero before any stdout reaches the append —
          # the body keeps only the static note, never a half-written summary.
          if ! uv run --frozen --project "${cli_dir}" \
            langchain-profiles summarize \
            --providers "${PROVIDERS_JSON}" \
            --base-ref HEAD \
            --repo-root "${GITHUB_WORKSPACE}" >> "${body_file}"; then
            echo "::warning::Could not generate change summary; see job log."
            # Surface the degradation in the PR body too: the warning above only
            # lands in the Actions log, which a PR reviewer won't see.
            printf '\n> [!NOTE]\n> Automated change summary unavailable — see the workflow run log.\n' >> "${body_file}"
          fi
          echo "path=${body_file}" >> "$GITHUB_OUTPUT"

      - name: "🔑 Generate GitHub App token"
        id: app-token
        uses: actions/create-github-app-token@bcd2ba49218906704ab6c1aa796996da409d3eb1 # v3
        with:
          client-id: ${{ secrets.MODEL_PROFILE_BOT_CLIENT_ID }}
          private-key: ${{ secrets.MODEL_PROFILE_BOT_PRIVATE_KEY }}

      - name: "🔀 Create pull request"
        id: create-pr
        uses: peter-evans/create-pull-request@5f6978faf089d4d20b00c7766989d076bb2fc7f1 # v8
        with:
          token: ${{ steps.app-token.outputs.token }}
          branch: ${{ inputs.pr-branch }}
          commit-message: ${{ inputs.pr-title }}
          title: ${{ inputs.pr-title }}
          body-path: ${{ steps.pr-body.outputs.path }}
          labels: ${{ inputs.pr-labels }}
          add-paths: ${{ inputs.add-paths }}

      - name: "📝 Summary"
        if: always()
        env:
          PR_OP: ${{ steps.create-pr.outputs.pull-request-operation }}
          PR_URL: ${{ steps.create-pr.outputs.pull-request-url }}
          JOB_STATUS: ${{ job.status }}
        run: |
          if [ "${PR_OP}" = "created" ] || [ "${PR_OP}" = "updated" ]; then
            echo "### ✅ PR ${PR_OP}: ${PR_URL}" >> "$GITHUB_STEP_SUMMARY"
          elif [ -z "${PR_OP}" ] && [ "${JOB_STATUS}" = "success" ]; then
            echo "### ⏭️ Skipped: profiles already up to date" >> "$GITHUB_STEP_SUMMARY"
          elif [ "${JOB_STATUS}" = "failure" ]; then
            echo "### ❌ Job failed — check step logs for details" >> "$GITHUB_STEP_SUMMARY"
          fi