diff --git a/.github/pr-file-labeler.yml b/.github/pr-file-labeler.yml deleted file mode 100644 index 4e2de5926c9..00000000000 --- a/.github/pr-file-labeler.yml +++ /dev/null @@ -1,128 +0,0 @@ -# Label PRs (config) -# Automatically applies labels based on changed files and branch patterns - -# Core packages -core: - - changed-files: - - any-glob-to-any-file: - - "libs/core/**/*" - -langchain-classic: - - changed-files: - - any-glob-to-any-file: - - "libs/langchain/**/*" - -langchain: - - changed-files: - - any-glob-to-any-file: - - "libs/langchain_v1/**/*" - -standard-tests: - - changed-files: - - any-glob-to-any-file: - - "libs/standard-tests/**/*" - -model-profiles: - - changed-files: - - any-glob-to-any-file: - - "libs/model-profiles/**/*" - -text-splitters: - - changed-files: - - any-glob-to-any-file: - - "libs/text-splitters/**/*" - -# Partner integrations -integration: - - changed-files: - - any-glob-to-any-file: - - "libs/partners/**/*" - -anthropic: - - changed-files: - - any-glob-to-any-file: - - "libs/partners/anthropic/**/*" - -chroma: - - changed-files: - - any-glob-to-any-file: - - "libs/partners/chroma/**/*" - -deepseek: - - changed-files: - - any-glob-to-any-file: - - "libs/partners/deepseek/**/*" - -exa: - - changed-files: - - any-glob-to-any-file: - - "libs/partners/exa/**/*" - -fireworks: - - changed-files: - - any-glob-to-any-file: - - "libs/partners/fireworks/**/*" - -groq: - - changed-files: - - any-glob-to-any-file: - - "libs/partners/groq/**/*" - -huggingface: - - changed-files: - - any-glob-to-any-file: - - "libs/partners/huggingface/**/*" - -mistralai: - - changed-files: - - any-glob-to-any-file: - - "libs/partners/mistralai/**/*" - -nomic: - - changed-files: - - any-glob-to-any-file: - - "libs/partners/nomic/**/*" - -ollama: - - changed-files: - - any-glob-to-any-file: - - "libs/partners/ollama/**/*" - -openai: - - changed-files: - - any-glob-to-any-file: - - "libs/partners/openai/**/*" - -openrouter: - - changed-files: - - any-glob-to-any-file: - - "libs/partners/openrouter/**/*" - -perplexity: - - changed-files: - - any-glob-to-any-file: - - "libs/partners/perplexity/**/*" - -qdrant: - - changed-files: - - any-glob-to-any-file: - - "libs/partners/qdrant/**/*" - -xai: - - changed-files: - - any-glob-to-any-file: - - "libs/partners/xai/**/*" - -github_actions: - - changed-files: - - any-glob-to-any-file: - - ".github/workflows/**/*" - - ".github/actions/**/*" - -dependencies: - - changed-files: - - any-glob-to-any-file: - - "**/pyproject.toml" - - "uv.lock" - - "**/requirements*.txt" - - "**/poetry.lock" diff --git a/.github/scripts/pr-labeler-config.json b/.github/scripts/pr-labeler-config.json new file mode 100644 index 00000000000..de514db239d --- /dev/null +++ b/.github/scripts/pr-labeler-config.json @@ -0,0 +1,84 @@ +{ + "trustedThreshold": 5, + "labelColor": "b76e79", + "sizeThresholds": [ + { "label": "size: XS", "max": 50 }, + { "label": "size: S", "max": 200 }, + { "label": "size: M", "max": 500 }, + { "label": "size: L", "max": 1000 }, + { "label": "size: XL" } + ], + "excludedFiles": ["uv.lock"], + "excludedPaths": ["docs/"], + "typeToLabel": { + "feat": "feature", + "fix": "fix", + "docs": "documentation", + "style": "linting", + "refactor": "refactor", + "perf": "performance", + "test": "tests", + "build": "infra", + "ci": "infra", + "chore": "infra", + "revert": "revert", + "release": "release", + "hotfix": "hotfix", + "breaking": "breaking" + }, + "scopeToLabel": { + "core": "core", + "langchain": "langchain", + "langchain-classic": "langchain-classic", + "model-profiles": "model-profiles", + "standard-tests": "standard-tests", + "text-splitters": "text-splitters", + "anthropic": "anthropic", + "chroma": "chroma", + "deepseek": "deepseek", + "exa": "exa", + "fireworks": "fireworks", + "groq": "groq", + "huggingface": "huggingface", + "mistralai": "mistralai", + "nomic": "nomic", + "ollama": "ollama", + "openai": "openai", + "openrouter": "openrouter", + "perplexity": "perplexity", + "qdrant": "qdrant", + "xai": "xai", + "deps": "dependencies", + "docs": "documentation", + "infra": "infra" + }, + "fileRules": [ + { "label": "core", "prefix": "libs/core/" }, + { "label": "langchain-classic", "prefix": "libs/langchain/" }, + { "label": "langchain", "prefix": "libs/langchain_v1/" }, + { "label": "standard-tests", "prefix": "libs/standard-tests/" }, + { "label": "model-profiles", "prefix": "libs/model-profiles/" }, + { "label": "text-splitters", "prefix": "libs/text-splitters/" }, + { "label": "integration", "prefix": "libs/partners/" }, + { "label": "anthropic", "prefix": "libs/partners/anthropic/" }, + { "label": "chroma", "prefix": "libs/partners/chroma/" }, + { "label": "deepseek", "prefix": "libs/partners/deepseek/" }, + { "label": "exa", "prefix": "libs/partners/exa/" }, + { "label": "fireworks", "prefix": "libs/partners/fireworks/" }, + { "label": "groq", "prefix": "libs/partners/groq/" }, + { "label": "huggingface", "prefix": "libs/partners/huggingface/" }, + { "label": "mistralai", "prefix": "libs/partners/mistralai/" }, + { "label": "nomic", "prefix": "libs/partners/nomic/" }, + { "label": "ollama", "prefix": "libs/partners/ollama/" }, + { "label": "openai", "prefix": "libs/partners/openai/" }, + { "label": "openrouter", "prefix": "libs/partners/openrouter/" }, + { "label": "perplexity", "prefix": "libs/partners/perplexity/" }, + { "label": "qdrant", "prefix": "libs/partners/qdrant/" }, + { "label": "xai", "prefix": "libs/partners/xai/" }, + { "label": "github_actions", "prefix": ".github/workflows/" }, + { "label": "github_actions", "prefix": ".github/actions/" }, + { "label": "dependencies", "suffix": "pyproject.toml" }, + { "label": "dependencies", "exact": "uv.lock" }, + { "label": "dependencies", "pattern": "(?:^|/)requirements[^/]*\\.txt$" } + ] +} diff --git a/.github/scripts/pr-labeler.js b/.github/scripts/pr-labeler.js new file mode 100644 index 00000000000..bc0639bffb7 --- /dev/null +++ b/.github/scripts/pr-labeler.js @@ -0,0 +1,271 @@ +// Shared helpers for pr_labeler.yml and tag-external-issues.yml. +// +// Usage from actions/github-script (requires actions/checkout first): +// const { h } = require('./.github/scripts/pr-labeler.js').loadAndInit(github, owner, repo); + +const fs = require('fs'); +const path = require('path'); + +function loadConfig() { + const configPath = path.join(__dirname, 'pr-labeler-config.json'); + let raw; + try { + raw = fs.readFileSync(configPath, 'utf8'); + } catch (e) { + throw new Error(`Failed to read ${configPath}: ${e.message}`); + } + let config; + try { + config = JSON.parse(raw); + } catch (e) { + throw new Error(`Failed to parse pr-labeler-config.json: ${e.message}`); + } + const required = [ + 'labelColor', 'sizeThresholds', 'fileRules', + 'typeToLabel', 'scopeToLabel', 'trustedThreshold', + 'excludedFiles', 'excludedPaths', + ]; + const missing = required.filter(k => !(k in config)); + if (missing.length > 0) { + throw new Error(`pr-labeler-config.json missing required keys: ${missing.join(', ')}`); + } + return config; +} + +function init(github, owner, repo, config) { + const { + trustedThreshold, + labelColor, + sizeThresholds, + scopeToLabel, + typeToLabel, + fileRules: fileRulesDef, + excludedFiles, + excludedPaths, + } = config; + + const sizeLabels = sizeThresholds.map(t => t.label); + const allTypeLabels = [...new Set(Object.values(typeToLabel))]; + const tierLabels = ['new-contributor', 'trusted-contributor']; + + // ── Label management ────────────────────────────────────────────── + + async function ensureLabel(name, color = labelColor) { + try { + await github.rest.issues.getLabel({ owner, repo, name }); + } catch (e) { + if (e.status !== 404) throw e; + try { + await github.rest.issues.createLabel({ owner, repo, name, color }); + } catch (createErr) { + // 422 = label created by a concurrent run between our get and create + if (createErr.status !== 422) throw createErr; + const core = require('@actions/core'); + core.info(`Label "${name}" creation returned 422 (likely already exists)`); + } + } + } + + // ── Size calculation ────────────────────────────────────────────── + + function getSizeLabel(totalChanged) { + for (const t of sizeThresholds) { + if (t.max != null && totalChanged < t.max) return t.label; + } + // Last entry has no max — it's the catch-all + return sizeThresholds[sizeThresholds.length - 1].label; + } + + function computeSize(files) { + const excluded = new Set(excludedFiles); + const totalChanged = files.reduce((sum, f) => { + const p = f.filename ?? ''; + const base = p.split('/').pop(); + if (excluded.has(base)) return sum; + for (const prefix of excludedPaths) { + if (p.startsWith(prefix)) return sum; + } + return sum + (f.additions ?? 0) + (f.deletions ?? 0); + }, 0); + return { totalChanged, sizeLabel: getSizeLabel(totalChanged) }; + } + + // ── File-based labels ───────────────────────────────────────────── + + function buildFileRules() { + return fileRulesDef.map((rule, i) => { + let test; + if (rule.prefix) test = p => p.startsWith(rule.prefix); + else if (rule.suffix) test = p => p.endsWith(rule.suffix); + else if (rule.exact) test = p => p === rule.exact; + else if (rule.pattern) { + const re = new RegExp(rule.pattern); + test = p => re.test(p); + } else { + throw new Error( + `fileRules[${i}] (label: "${rule.label}") has no recognized matcher ` + + `(expected one of: prefix, suffix, exact, pattern)` + ); + } + return { label: rule.label, test }; + }); + } + + function matchFileLabels(files, fileRules) { + const rules = fileRules || buildFileRules(); + const labels = new Set(); + for (const rule of rules) { + if (files.some(f => rule.test(f.filename ?? ''))) { + labels.add(rule.label); + } + } + return labels; + } + + // ── Title-based labels ──────────────────────────────────────────── + + function matchTitleLabels(title) { + const labels = new Set(); + const m = (title ?? '').match(/^(\w+)(?:\(([^)]+)\))?(!)?:/); + if (!m) return { labels, type: null, typeLabel: null, scopes: [], breaking: false }; + + const type = m[1].toLowerCase(); + const scopeStr = m[2] ?? ''; + const breaking = !!m[3]; + + const typeLabel = typeToLabel[type] || null; + if (typeLabel) labels.add(typeLabel); + if (breaking) labels.add('breaking'); + + const scopes = scopeStr.split(',').map(s => s.trim()).filter(Boolean); + for (const scope of scopes) { + const sl = scopeToLabel[scope]; + if (sl) labels.add(sl); + } + + return { labels, type, typeLabel, scopes, breaking }; + } + + // ── Org membership ──────────────────────────────────────────────── + + async function checkMembership(author, userType) { + if (userType === 'Bot') { + console.log(`${author} is a Bot — treating as internal`); + return { isExternal: false }; + } + + try { + const membership = await github.rest.orgs.getMembershipForUser({ + org: 'langchain-ai', + username: author, + }); + const isExternal = membership.data.state !== 'active'; + console.log( + isExternal + ? `${author} has pending membership — treating as external` + : `${author} is an active member of langchain-ai`, + ); + return { isExternal }; + } catch (e) { + if (e.status === 404) { + console.log(`${author} is not a member of langchain-ai`); + return { isExternal: true }; + } + // Non-404 errors (rate limit, auth failure, server error) must not + // silently default to external — rethrow to fail the step. + throw new Error( + `Membership check failed for ${author} (${e.status}): ${e.message}`, + ); + } + } + + // ── Contributor analysis ────────────────────────────────────────── + + async function getContributorInfo(contributorCache, author, userType) { + if (contributorCache.has(author)) return contributorCache.get(author); + + const { isExternal } = await checkMembership(author, userType); + + let mergedCount = null; + if (isExternal) { + try { + const result = await github.rest.search.issuesAndPullRequests({ + q: `repo:${owner}/${repo} is:pr is:merged author:"${author}"`, + per_page: 1, + }); + mergedCount = result?.data?.total_count ?? null; + } catch (e) { + if (e?.status !== 422) throw e; + const core = require('@actions/core'); + core.warning(`Search failed for ${author}; skipping tier.`); + } + } + + const info = { isExternal, mergedCount }; + contributorCache.set(author, info); + return info; + } + + // ── Tier label resolution ─────────────────────────────────────────── + + async function applyTierLabel(issueNumber, author, { skipNewContributor = false } = {}) { + const core = require('@actions/core'); + let mergedCount; + try { + const result = await github.rest.search.issuesAndPullRequests({ + q: `repo:${owner}/${repo} is:pr is:merged author:"${author}"`, + per_page: 1, + }); + mergedCount = result?.data?.total_count; + } catch (error) { + if (error?.status !== 422) throw error; + core.warning(`Search failed for ${author}; skipping tier label.`); + return; + } + + if (mergedCount == null) { + core.warning(`Search response missing total_count for ${author}; skipping tier label.`); + return; + } + + let tierLabel = null; + if (mergedCount >= trustedThreshold) tierLabel = 'trusted-contributor'; + else if (mergedCount === 0 && !skipNewContributor) tierLabel = 'new-contributor'; + + if (tierLabel) { + await ensureLabel(tierLabel); + await github.rest.issues.addLabels({ + owner, repo, issue_number: issueNumber, labels: [tierLabel], + }); + console.log(`Applied '${tierLabel}' to #${issueNumber} (${mergedCount} merged PRs)`); + } else { + console.log(`No tier label for ${author} (${mergedCount} merged PRs)`); + } + + return tierLabel; + } + + return { + ensureLabel, + getSizeLabel, + computeSize, + buildFileRules, + matchFileLabels, + matchTitleLabels, + allTypeLabels, + checkMembership, + getContributorInfo, + applyTierLabel, + sizeLabels, + tierLabels, + trustedThreshold, + labelColor, + }; +} + +function loadAndInit(github, owner, repo) { + const config = loadConfig(); + return { config, h: init(github, owner, repo, config) }; +} + +module.exports = { loadConfig, init, loadAndInit }; diff --git a/.github/workflows/pr_labeler.yml b/.github/workflows/pr_labeler.yml new file mode 100644 index 00000000000..544943da3b2 --- /dev/null +++ b/.github/workflows/pr_labeler.yml @@ -0,0 +1,212 @@ +# Unified PR labeler — applies size, file-based, title-based, and +# contributor classification labels in a single sequential workflow. +# +# Consolidates pr_labeler_file.yml, pr_labeler_title.yml, +# pr_size_labeler.yml, and PR-handling from tag-external-contributions.yml +# into one workflow to eliminate race conditions from concurrent label +# mutations. tag-external-issues.yml remains active for issue-only +# labeling. Backfill lives in pr_labeler_backfill.yml. +# +# Config and shared logic live in .github/scripts/pr-labeler-config.json +# and .github/scripts/pr-labeler.js — update those when adding partners. +# +# Setup Requirements: +# 1. Create a GitHub App with permissions: +# - Repository: Pull requests (write) +# - Repository: Issues (write) +# - Organization: Members (read) +# 2. Install the app on your organization and this repository +# 3. Add these repository secrets: +# - ORG_MEMBERSHIP_APP_ID: Your app's ID +# - ORG_MEMBERSHIP_APP_PRIVATE_KEY: Your app's private key +# +# The GitHub App token is required to check private organization membership +# and to propagate label events to downstream workflows. + +name: "PR Labeler" + +on: + # Safe since we're not checking out or running the PR's code. + # Never check out the PR's head in a pull_request_target job. + pull_request_target: + types: [opened, synchronize, reopened, edited] + +permissions: + contents: read + +concurrency: + # Separate opened events so external/tier labels are never lost to cancellation + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.run_id }}-${{ github.event.action == 'opened' && 'opened' || 'update' }} + cancel-in-progress: ${{ github.event.action != 'opened' }} + +jobs: + label: + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: write + issues: write + + steps: + # Checks out the BASE branch (safe for pull_request_target — never + # the PR head). Needed to load .github/scripts/pr-labeler*. + - uses: actions/checkout@v6 + + - name: Generate GitHub App token + if: github.event.action == 'opened' + id: app-token + uses: actions/create-github-app-token@v3 + with: + app-id: ${{ secrets.ORG_MEMBERSHIP_APP_ID }} + private-key: ${{ secrets.ORG_MEMBERSHIP_APP_PRIVATE_KEY }} + + - name: Verify App token + if: github.event.action == 'opened' + run: | + if [ -z "${{ steps.app-token.outputs.token }}" ]; then + echo "::error::GitHub App token generation failed — cannot classify contributor" + exit 1 + fi + + - name: Check org membership + if: github.event.action == 'opened' + id: check-membership + uses: actions/github-script@v8 + with: + github-token: ${{ steps.app-token.outputs.token }} + script: | + const { owner, repo } = context.repo; + const { h } = require('./.github/scripts/pr-labeler.js').loadAndInit(github, owner, repo); + + const author = context.payload.sender.login; + const { isExternal } = await h.checkMembership( + author, context.payload.sender.type, + ); + core.setOutput('is-external', isExternal ? 'true' : 'false'); + + - name: Apply PR labels + uses: actions/github-script@v8 + env: + IS_EXTERNAL: ${{ steps.check-membership.outputs.is-external }} + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + const { owner, repo } = context.repo; + const { h } = require('./.github/scripts/pr-labeler.js').loadAndInit(github, owner, repo); + + const pr = context.payload.pull_request; + if (!pr) return; + const prNumber = pr.number; + const action = context.payload.action; + + const toAdd = new Set(); + const toRemove = new Set(); + + const currentLabels = (await github.paginate( + github.rest.issues.listLabelsOnIssue, + { owner, repo, issue_number: prNumber, per_page: 100 }, + )).map(l => l.name ?? ''); + + // ── Size + file labels (skip on 'edited' — files unchanged) ── + if (action !== 'edited') { + for (const sl of h.sizeLabels) await h.ensureLabel(sl); + + const files = await github.paginate(github.rest.pulls.listFiles, { + owner, repo, pull_number: prNumber, per_page: 100, + }); + + const { totalChanged, sizeLabel } = h.computeSize(files); + toAdd.add(sizeLabel); + for (const sl of h.sizeLabels) { + if (currentLabels.includes(sl) && sl !== sizeLabel) toRemove.add(sl); + } + console.log(`Size: ${totalChanged} changed lines → ${sizeLabel}`); + + for (const label of h.matchFileLabels(files)) { + toAdd.add(label); + } + } + + // ── Title-based labels ── + const { labels: titleLabels, typeLabel } = h.matchTitleLabels(pr.title || ''); + for (const label of titleLabels) toAdd.add(label); + + // Remove stale type labels only when a type was detected + if (typeLabel) { + for (const tl of h.allTypeLabels) { + if (currentLabels.includes(tl) && !titleLabels.has(tl)) toRemove.add(tl); + } + } + + // ── Internal label (only on open, non-external contributors) ── + // IS_EXTERNAL is empty string on non-opened events (step didn't + // run), so this guard is only true for opened + internal. + if (action === 'opened' && process.env.IS_EXTERNAL === 'false') { + toAdd.add('internal'); + } + + // ── Apply changes ── + // Ensure all labels we're about to add exist (addLabels returns + // 422 if any label in the batch is missing, which would prevent + // ALL labels from being applied). + for (const name of toAdd) { + await h.ensureLabel(name); + } + + for (const name of toRemove) { + if (toAdd.has(name)) continue; + try { + await github.rest.issues.removeLabel({ + owner, repo, issue_number: prNumber, name, + }); + } catch (e) { + if (e.status !== 404) throw e; + } + } + + const addList = [...toAdd]; + if (addList.length > 0) { + await github.rest.issues.addLabels({ + owner, repo, issue_number: prNumber, labels: addList, + }); + } + + const removed = [...toRemove].filter(r => !toAdd.has(r)); + console.log(`PR #${prNumber}: +[${addList.join(', ')}] -[${removed.join(', ')}]`); + + # Apply tier label BEFORE the external label so that + # "trusted-contributor" is already present when the "external" labeled + # event fires and triggers require_issue_link.yml. + - name: Apply contributor tier label + if: github.event.action == 'opened' && steps.check-membership.outputs.is-external == 'true' + uses: actions/github-script@v8 + with: + github-token: ${{ steps.app-token.outputs.token }} + script: | + const { owner, repo } = context.repo; + const { h } = require('./.github/scripts/pr-labeler.js').loadAndInit(github, owner, repo); + + const pr = context.payload.pull_request; + await h.applyTierLabel(pr.number, pr.user.login); + + - name: Add external label + if: github.event.action == 'opened' && steps.check-membership.outputs.is-external == 'true' + uses: actions/github-script@v8 + with: + # Use App token so the "labeled" event propagates to downstream + # workflows (e.g. require_issue_link.yml). Events created by the + # default GITHUB_TOKEN do not trigger additional workflow runs. + github-token: ${{ steps.app-token.outputs.token }} + script: | + const { owner, repo } = context.repo; + const prNumber = context.payload.pull_request.number; + + const { h } = require('./.github/scripts/pr-labeler.js').loadAndInit(github, owner, repo); + + await h.ensureLabel('external'); + await github.rest.issues.addLabels({ + owner, repo, + issue_number: prNumber, + labels: ['external'], + }); + console.log(`Added 'external' label to PR #${prNumber}`); diff --git a/.github/workflows/pr_labeler_backfill.yml b/.github/workflows/pr_labeler_backfill.yml new file mode 100644 index 00000000000..fe69500d525 --- /dev/null +++ b/.github/workflows/pr_labeler_backfill.yml @@ -0,0 +1,130 @@ +# Backfill PR labels on all open PRs. +# +# Manual-only workflow that applies the same labels as pr_labeler.yml +# (size, file, title, contributor classification) to existing open PRs. +# Reuses shared logic from .github/scripts/pr-labeler.js. + +name: "PR Labeler Backfill" + +on: + workflow_dispatch: + inputs: + max_items: + description: "Maximum number of open PRs to process" + default: "100" + type: string + +permissions: + contents: read + +jobs: + backfill: + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: write + issues: write + + steps: + - uses: actions/checkout@v6 + + - name: Generate GitHub App token + id: app-token + uses: actions/create-github-app-token@v3 + with: + app-id: ${{ secrets.ORG_MEMBERSHIP_APP_ID }} + private-key: ${{ secrets.ORG_MEMBERSHIP_APP_PRIVATE_KEY }} + + - name: Backfill labels on open PRs + uses: actions/github-script@v8 + with: + github-token: ${{ steps.app-token.outputs.token }} + script: | + const { owner, repo } = context.repo; + const rawMax = '${{ inputs.max_items }}'; + const maxItems = parseInt(rawMax, 10); + if (isNaN(maxItems) || maxItems <= 0) { + core.setFailed(`Invalid max_items: "${rawMax}" — must be a positive integer`); + return; + } + + const { h } = require('./.github/scripts/pr-labeler.js').loadAndInit(github, owner, repo); + + for (const name of [...h.sizeLabels, ...h.tierLabels]) { + await h.ensureLabel(name); + } + + const contributorCache = new Map(); + const fileRules = h.buildFileRules(); + + const prs = await github.paginate(github.rest.pulls.list, { + owner, repo, state: 'open', per_page: 100, + }); + + let processed = 0; + let failures = 0; + for (const pr of prs) { + if (processed >= maxItems) break; + try { + const author = pr.user.login; + const info = await h.getContributorInfo(contributorCache, author, pr.user.type); + const labels = new Set(); + + labels.add(info.isExternal ? 'external' : 'internal'); + if (info.isExternal && info.mergedCount != null && info.mergedCount >= h.trustedThreshold) { + labels.add('trusted-contributor'); + } else if (info.isExternal && info.mergedCount === 0) { + labels.add('new-contributor'); + } + + // Size + file labels + const files = await github.paginate(github.rest.pulls.listFiles, { + owner, repo, pull_number: pr.number, per_page: 100, + }); + const { sizeLabel } = h.computeSize(files); + labels.add(sizeLabel); + + for (const label of h.matchFileLabels(files, fileRules)) { + labels.add(label); + } + + // Title labels + const { labels: titleLabels } = h.matchTitleLabels(pr.title ?? ''); + for (const tl of titleLabels) labels.add(tl); + + // Ensure all labels exist before batch add + for (const name of labels) { + await h.ensureLabel(name); + } + + // Remove stale managed labels + const currentLabels = (await github.paginate( + github.rest.issues.listLabelsOnIssue, + { owner, repo, issue_number: pr.number, per_page: 100 }, + )).map(l => l.name ?? ''); + + const managed = [...h.sizeLabels, ...h.tierLabels, ...h.allTypeLabels]; + for (const name of currentLabels) { + if (managed.includes(name) && !labels.has(name)) { + try { + await github.rest.issues.removeLabel({ + owner, repo, issue_number: pr.number, name, + }); + } catch (e) { + if (e.status !== 404) throw e; + } + } + } + + await github.rest.issues.addLabels({ + owner, repo, issue_number: pr.number, labels: [...labels], + }); + console.log(`PR #${pr.number} (${author}): ${[...labels].join(', ')}`); + processed++; + } catch (e) { + failures++; + core.warning(`Failed to process PR #${pr.number}: ${e.message}`); + } + } + + console.log(`\nBackfill complete. Processed ${processed} PRs, ${failures} failures. ${contributorCache.size} unique authors.`); diff --git a/.github/workflows/pr_labeler_file.yml b/.github/workflows/pr_labeler_file.yml deleted file mode 100644 index a23350cb5ca..00000000000 --- a/.github/workflows/pr_labeler_file.yml +++ /dev/null @@ -1,31 +0,0 @@ -# Label PRs based on changed files. -# -# See `.github/pr-file-labeler.yml` to see rules for each label/directory. - -name: "🏷️ Pull Request Labeler" - -on: - # Safe since we're not checking out or running the PR's code - # Never check out the PR's head in a pull_request_target job - pull_request_target: - types: [opened, synchronize, reopened] - -permissions: - contents: read - -jobs: - labeler: - name: "label" - permissions: - contents: read - pull-requests: write - issues: write - runs-on: ubuntu-latest - - steps: - - name: Label Pull Request - uses: actions/labeler@v6 - with: - repo-token: "${{ secrets.GITHUB_TOKEN }}" - configuration-path: .github/pr-file-labeler.yml - sync-labels: false diff --git a/.github/workflows/pr_labeler_title.yml b/.github/workflows/pr_labeler_title.yml deleted file mode 100644 index e2448b0663e..00000000000 --- a/.github/workflows/pr_labeler_title.yml +++ /dev/null @@ -1,47 +0,0 @@ -# Label PRs based on their titles. -# -# Uses conventional commit types from PR titles to apply labels. -# Note: Scope-based labeling (e.g., integration labels) is handled by pr_labeler_file.yml - -name: "🏷️ PR Title Labeler" - -on: - # Safe since we're not checking out or running the PR's code - # Never check out the PR's head in a pull_request_target job - pull_request_target: - types: [opened, edited] - -permissions: - contents: read - -jobs: - pr-title-labeler: - name: "label" - permissions: - contents: read - pull-requests: write - issues: write - runs-on: ubuntu-latest - - steps: - - name: Label PR based on title - uses: bcoe/conventional-release-labels@b503ca473654e07521c051628c5f1f969e7436da # v1 - with: - token: ${{ secrets.GITHUB_TOKEN }} - type_labels: >- - { - "feat": "feature", - "fix": "fix", - "docs": "documentation", - "style": "linting", - "refactor": "refactor", - "perf": "performance", - "test": "tests", - "build": "infra", - "ci": "infra", - "chore": "infra", - "revert": "revert", - "release": "release", - "breaking": "breaking" - } - ignored_types: '[]' diff --git a/.github/workflows/pr_lint.yml b/.github/workflows/pr_lint.yml index 107a3ec811f..310474c4174 100644 --- a/.github/workflows/pr_lint.yml +++ b/.github/workflows/pr_lint.yml @@ -66,6 +66,15 @@ jobs: name: "validate format" runs-on: ubuntu-latest steps: + - name: "🚫 Reject empty scope" + env: + PR_TITLE: ${{ github.event.pull_request.title }} + run: | + if [[ "$PR_TITLE" =~ ^[a-z]+\(\)[!]?: ]]; then + echo "::error::PR title has empty scope parentheses: '$PR_TITLE'" + echo "Either remove the parentheses or provide a scope (e.g., 'fix(core): ...')." + exit 1 + fi - name: "✅ Validate Conventional Commits Format" uses: amannn/action-semantic-pull-request@48f256284bd46cdaab1048c3721360e808335d50 # v6 env: diff --git a/.github/workflows/pr_size_labeler.yml b/.github/workflows/pr_size_labeler.yml deleted file mode 100644 index 9dc4b6a4adc..00000000000 --- a/.github/workflows/pr_size_labeler.yml +++ /dev/null @@ -1,174 +0,0 @@ -# Label PRs by size (changed lines, excluding lockfiles and docs). -# -# Size thresholds: -# XS: < 50, S: < 200, M: < 500, L: < 1000, XL: >= 1000 - -name: "📏 PR Size Labeler" - -on: - pull_request_target: - types: [opened, synchronize, reopened] - workflow_dispatch: - inputs: - max_items: - description: "Maximum number of open PRs to process" - default: "100" - type: string - -permissions: - contents: read - -jobs: - size-label: - if: github.event_name != 'workflow_dispatch' - runs-on: ubuntu-latest - permissions: - pull-requests: write - issues: write - - steps: - - name: Apply PR size label - uses: actions/github-script@v8 - with: - github-token: ${{ secrets.GITHUB_TOKEN }} - script: | - const { owner, repo } = context.repo; - const pullRequest = context.payload.pull_request; - if (!pullRequest) return; - - const sizeLabels = ['size: XS', 'size: S', 'size: M', 'size: L', 'size: XL']; - const labelColor = 'b76e79'; - - // Ensure labels exist - for (const name of sizeLabels) { - try { - await github.rest.issues.getLabel({ owner, repo, name }); - } catch (error) { - if (error?.status !== 404) throw error; - await github.rest.issues.createLabel({ - owner, repo, name, color: labelColor, - }); - } - } - - const files = await github.paginate(github.rest.pulls.listFiles, { - owner, repo, pull_number: pullRequest.number, per_page: 100, - }); - - const excludedFiles = new Set(['poetry.lock', 'uv.lock']); - const totalChangedLines = files.reduce((total, file) => { - const path = file.filename ?? ''; - if (path.startsWith('docs/') || excludedFiles.has(path)) return total; - return total + (file.additions ?? 0) + (file.deletions ?? 0); - }, 0); - - let targetSizeLabel = 'size: XL'; - if (totalChangedLines < 50) targetSizeLabel = 'size: XS'; - else if (totalChangedLines < 200) targetSizeLabel = 'size: S'; - else if (totalChangedLines < 500) targetSizeLabel = 'size: M'; - else if (totalChangedLines < 1000) targetSizeLabel = 'size: L'; - - // Remove stale size labels - const currentLabels = await github.paginate( - github.rest.issues.listLabelsOnIssue, - { owner, repo, issue_number: pullRequest.number, per_page: 100 }, - ); - for (const label of currentLabels) { - const name = label.name ?? ''; - if (sizeLabels.includes(name) && name !== targetSizeLabel) { - await github.rest.issues.removeLabel({ - owner, repo, issue_number: pullRequest.number, name, - }); - } - } - - await github.rest.issues.addLabels({ - owner, repo, issue_number: pullRequest.number, labels: [targetSizeLabel], - }); - - console.log(`PR #${pullRequest.number}: ${totalChangedLines} changed lines → ${targetSizeLabel}`); - - backfill: - if: github.event_name == 'workflow_dispatch' - runs-on: ubuntu-latest - permissions: - pull-requests: write - issues: write - - steps: - - name: Backfill size labels on open PRs - uses: actions/github-script@v8 - with: - github-token: ${{ secrets.GITHUB_TOKEN }} - script: | - const { owner, repo } = context.repo; - const maxItems = parseInt('${{ inputs.max_items }}') || 100; - - const sizeLabels = ['size: XS', 'size: S', 'size: M', 'size: L', 'size: XL']; - const labelColor = 'b76e79'; - - // Ensure labels exist - for (const name of sizeLabels) { - try { - await github.rest.issues.getLabel({ owner, repo, name }); - } catch (error) { - if (error?.status !== 404) throw error; - await github.rest.issues.createLabel({ - owner, repo, name, color: labelColor, - }); - console.log(`Created label: ${name}`); - } - } - - function getSizeLabel(totalChangedLines) { - if (totalChangedLines < 50) return 'size: XS'; - if (totalChangedLines < 200) return 'size: S'; - if (totalChangedLines < 500) return 'size: M'; - if (totalChangedLines < 1000) return 'size: L'; - return 'size: XL'; - } - - const prs = await github.paginate(github.rest.pulls.list, { - owner, repo, state: 'open', per_page: 100, - }); - - let processed = 0; - for (const pr of prs) { - if (processed >= maxItems) break; - - const files = await github.paginate(github.rest.pulls.listFiles, { - owner, repo, pull_number: pr.number, per_page: 100, - }); - - const excludedFiles = new Set(['poetry.lock', 'uv.lock']); - const totalChangedLines = files.reduce((total, file) => { - const path = file.filename ?? ''; - if (path.startsWith('docs/') || excludedFiles.has(path)) return total; - return total + (file.additions ?? 0) + (file.deletions ?? 0); - }, 0); - - const targetSizeLabel = getSizeLabel(totalChangedLines); - - // Remove stale size labels - const currentLabels = await github.paginate( - github.rest.issues.listLabelsOnIssue, - { owner, repo, issue_number: pr.number, per_page: 100 }, - ); - for (const label of currentLabels) { - const name = label.name ?? ''; - if (sizeLabels.includes(name) && name !== targetSizeLabel) { - await github.rest.issues.removeLabel({ - owner, repo, issue_number: pr.number, name, - }); - } - } - - await github.rest.issues.addLabels({ - owner, repo, issue_number: pr.number, labels: [targetSizeLabel], - }); - - console.log(`PR #${pr.number}: ${totalChangedLines} changed lines → ${targetSizeLabel}`); - processed++; - } - - console.log(`\nBackfill complete. Processed ${processed} PRs.`); diff --git a/.github/workflows/require_issue_link.yml b/.github/workflows/require_issue_link.yml index 59ac380d699..e9a88f6f605 100644 --- a/.github/workflows/require_issue_link.yml +++ b/.github/workflows/require_issue_link.yml @@ -2,7 +2,7 @@ # GitHub auto-close keywords (Fixes #NNN, Closes #NNN, Resolves #NNN), # AND require that the PR author is assigned to the linked issue. # -# - Reacts to the "external" label applied by tag-external-contributions.yml, +# - Reacts to the "external" label applied by pr_labeler.yml, # avoiding a duplicate org membership check. # - Also re-checks on PR edits/reopens for PRs that already have the label. # - Bypasses the check for PRs with the "trusted-contributor" label, and @@ -11,14 +11,14 @@ # - Adds a "missing-issue-link" label on failure; removes it on pass. # - Automatically reopens PRs that were closed by this workflow once the # check passes (e.g. author edits the body to add a valid issue link). -# - Posts a comment explaining the requirement on failure. +# - Posts (or updates) a comment explaining the requirement on failure. # - Cancels all other in-progress/queued CI runs for the PR on closure. # - Deduplicates comments via an HTML marker so re-runs don't spam. # -# Dependency: tag-external-contributions.yml must run first to apply the -# "external" label on new PRs. Both workflows trigger on pull_request_target -# opened events; this workflow additionally listens for the "labeled" event -# to chain off the external classification. +# Dependency: pr_labeler.yml must run first to apply the "external" label +# on new PRs. Both workflows trigger on pull_request_target opened events; +# this workflow additionally listens for the "labeled" event to chain off +# the external classification. name: Require Issue Link @@ -82,7 +82,14 @@ jobs: // Check whether the PR author is assigned to at least one linked issue const prAuthor = context.payload.pull_request.user.login; - const issueNumbers = [...new Set(matches.map(m => parseInt(m[1], 10)))]; + const MAX_ISSUES = 5; + const allIssueNumbers = [...new Set(matches.map(m => parseInt(m[1], 10)))]; + const issueNumbers = allIssueNumbers.slice(0, MAX_ISSUES); + if (allIssueNumbers.length > MAX_ISSUES) { + core.warning( + `PR references ${allIssueNumbers.length} issues — only checking the first ${MAX_ISSUES}`, + ); + } let assignedToAny = false; for (const num of issueNumbers) { @@ -99,7 +106,16 @@ jobs: console.log(`PR author "${prAuthor}" is NOT assigned to #${num} (assignees: ${assignees.join(', ') || 'none'})`); } } catch (error) { - console.log(`Could not fetch issue #${num}: ${error.message}`); + if (error.status === 404) { + console.log(`Issue #${num} not found — skipping`); + } else { + // Non-404 errors (rate limit, server error) must not be + // silently skipped — they could cause false enforcement + // (closing a legitimate PR whose assignment can't be verified). + throw new Error( + `Cannot verify assignee for issue #${num} (${error.status}): ${error.message}`, + ); + } } } @@ -112,8 +128,24 @@ jobs: script: | const { owner, repo } = context.repo; const prNumber = context.payload.pull_request.number; + const labelName = 'missing-issue-link'; + + // Ensure the label exists (no checkout/shared helper available) + try { + await github.rest.issues.getLabel({ owner, repo, name: labelName }); + } catch (e) { + if (e.status !== 404) throw e; + try { + await github.rest.issues.createLabel({ + owner, repo, name: labelName, color: 'b76e79', + }); + } catch (createErr) { + if (createErr.status !== 422) throw createErr; + } + } + await github.rest.issues.addLabels({ - owner, repo, issue_number: prNumber, labels: ['missing-issue-link'], + owner, repo, issue_number: prNumber, labels: [labelName], }); - name: Remove missing-issue-link label and reopen PR @@ -131,7 +163,9 @@ jobs: if (error.status !== 404) throw error; } - // Reopen PR only if it was previously closed by this workflow + // Reopen if this workflow previously closed the PR. We check the + // event payload labels (not live labels) because we already removed + // missing-issue-link above; the payload still reflects pre-step state. const labels = context.payload.pull_request.labels.map(l => l.name); if (context.payload.pull_request.state === 'closed' && labels.includes('missing-issue-link')) { await github.rest.pulls.update({ diff --git a/.github/workflows/tag-external-contributions.yml b/.github/workflows/tag-external-contributions.yml deleted file mode 100644 index 59818b3082a..00000000000 --- a/.github/workflows/tag-external-contributions.yml +++ /dev/null @@ -1,421 +0,0 @@ -# Automatically tag issues and pull requests as "external" or "internal" -# based on whether the author is a member of the langchain-ai -# GitHub organization, and apply contributor tier labels to external -# contributors based on their merged PR history. -# -# Setup Requirements: -# 1. Create a GitHub App with permissions: -# - Repository: Issues (write), Pull requests (write) -# - Organization: Members (read) -# 2. Install the app on your organization and this repository -# 3. Add these repository secrets: -# - ORG_MEMBERSHIP_APP_ID: Your app's ID -# - ORG_MEMBERSHIP_APP_PRIVATE_KEY: Your app's private key -# -# The GitHub App token is required to check private organization membership. -# Without it, the workflow will fail. -# -# Contributor tier thresholds: -# - trusted-contributor: >= 5 merged PRs - -name: Tag External Contributions - -on: - issues: - types: [opened] - pull_request_target: - types: [opened] - workflow_dispatch: - inputs: - backfill_type: - description: "Backfill type (for initial run)" - default: "both" - type: choice - options: - - prs - - issues - - both - max_items: - description: "Maximum number of items to process" - default: "100" - type: string - -permissions: - contents: read - -jobs: - tag-external: - if: github.event_name != 'workflow_dispatch' - runs-on: ubuntu-latest - permissions: - issues: write - pull-requests: write - - steps: - - name: Generate GitHub App token - id: app-token - uses: actions/create-github-app-token@v2 - with: - app-id: ${{ secrets.ORG_MEMBERSHIP_APP_ID }} - private-key: ${{ secrets.ORG_MEMBERSHIP_APP_PRIVATE_KEY }} - - - name: Check if contributor is external - id: check-membership - uses: actions/github-script@v8 - with: - github-token: ${{ steps.app-token.outputs.token }} - script: | - const { owner, repo } = context.repo; - const author = context.payload.sender.login; - - // GitHub App bots (e.g. model-profile-bot) are not org members - // but should be treated as internal — skip the membership check. - const senderType = context.payload.sender.type; - if (senderType === 'Bot') { - console.log(`Sender ${author} is a Bot — treating as internal`); - core.setOutput('is-external', 'false'); - return; - } - - try { - // Check if the author is a member of the langchain-ai organization - // This requires org:read permissions to see private memberships - const membership = await github.rest.orgs.getMembershipForUser({ - org: 'langchain-ai', - username: author - }); - - // Check if membership is active (not just pending invitation) - if (membership.data.state === 'active') { - console.log(`User ${author} is an active member of langchain-ai organization`); - core.setOutput('is-external', 'false'); - } else { - console.log(`User ${author} has pending membership in langchain-ai organization`); - core.setOutput('is-external', 'true'); - } - } catch (error) { - if (error.status === 404) { - console.log(`User ${author} is not a member of langchain-ai organization`); - core.setOutput('is-external', 'true'); - } else { - console.error('Error checking membership:', error); - console.log('Status:', error.status); - console.log('Message:', error.message); - // If we can't determine membership due to API error, assume external for safety - core.setOutput('is-external', 'true'); - } - } - - # Apply tier label BEFORE the external/internal labels so that - # "trusted-contributor" is already present when the "external" labeled - # event fires and triggers require_issue_link.yml. - - name: Apply contributor tier label - if: steps.check-membership.outputs.is-external == 'true' - uses: actions/github-script@v8 - with: - # Use App token so the "labeled" event propagates to downstream - # workflows (e.g. require_issue_link.yml bypass-trusted-contributor). - github-token: ${{ steps.app-token.outputs.token }} - script: | - const { owner, repo } = context.repo; - const isPR = context.eventName === 'pull_request_target'; - const item = isPR - ? context.payload.pull_request - : context.payload.issue; - const author = item.user.login; - const issueNumber = item.number; - - const TRUSTED_THRESHOLD = 5; - - const mergedQuery = `repo:${owner}/${repo} is:pr is:merged author:"${author}"`; - let mergedCount = 0; - try { - const result = await github.rest.search.issuesAndPullRequests({ - q: mergedQuery, - per_page: 1, - }); - mergedCount = result?.data?.total_count ?? 0; - } catch (error) { - if (error?.status !== 422) throw error; - core.warning(`Search failed for ${author}; skipping tier label.`); - return; - } - - const label = mergedCount >= TRUSTED_THRESHOLD ? 'trusted-contributor' : null; - - if (label) { - await github.rest.issues.addLabels({ - owner, - repo, - issue_number: issueNumber, - labels: [label], - }); - console.log(`Applied '${label}' to #${issueNumber} (${mergedCount} merged PRs)`); - } else { - console.log(`No tier label for ${author} (${mergedCount} merged PRs)`); - } - - - name: Add external label to issue - if: steps.check-membership.outputs.is-external == 'true' && github.event_name == 'issues' - uses: actions/github-script@v8 - with: - github-token: ${{ secrets.GITHUB_TOKEN }} - script: | - const { owner, repo } = context.repo; - const issue_number = context.payload.issue.number; - - await github.rest.issues.addLabels({ - owner, - repo, - issue_number, - labels: ['external'] - }); - - console.log(`Added 'external' label to issue #${issue_number}`); - - - name: Add external label to pull request - if: steps.check-membership.outputs.is-external == 'true' && github.event_name == 'pull_request_target' - uses: actions/github-script@v8 - with: - # Use App token so the "labeled" event propagates to downstream - # workflows (e.g. require_issue_link.yml). Events created by the - # default GITHUB_TOKEN do not trigger additional workflow runs. - github-token: ${{ steps.app-token.outputs.token }} - script: | - const { owner, repo } = context.repo; - const pull_number = context.payload.pull_request.number; - - await github.rest.issues.addLabels({ - owner, - repo, - issue_number: pull_number, - labels: ['external'] - }); - - console.log(`Added 'external' label to pull request #${pull_number}`); - - - name: Add internal label to issue - if: steps.check-membership.outputs.is-external == 'false' && github.event_name == 'issues' - uses: actions/github-script@v8 - with: - github-token: ${{ secrets.GITHUB_TOKEN }} - script: | - const { owner, repo } = context.repo; - const issue_number = context.payload.issue.number; - - await github.rest.issues.addLabels({ - owner, - repo, - issue_number, - labels: ['internal'] - }); - - console.log(`Added 'internal' label to issue #${issue_number}`); - - - name: Add internal label to pull request - if: steps.check-membership.outputs.is-external == 'false' && github.event_name == 'pull_request_target' - uses: actions/github-script@v8 - with: - github-token: ${{ secrets.GITHUB_TOKEN }} - script: | - const { owner, repo } = context.repo; - const pull_number = context.payload.pull_request.number; - - await github.rest.issues.addLabels({ - owner, - repo, - issue_number: pull_number, - labels: ['internal'] - }); - - console.log(`Added 'internal' label to pull request #${pull_number}`); - - backfill: - if: github.event_name == 'workflow_dispatch' - runs-on: ubuntu-latest - permissions: - issues: write - pull-requests: write - - steps: - - name: Generate GitHub App token - id: app-token - uses: actions/create-github-app-token@v2 - with: - app-id: ${{ secrets.ORG_MEMBERSHIP_APP_ID }} - private-key: ${{ secrets.ORG_MEMBERSHIP_APP_PRIVATE_KEY }} - - - name: Backfill labels - uses: actions/github-script@v8 - with: - github-token: ${{ steps.app-token.outputs.token }} - script: | - const { owner, repo } = context.repo; - const maxItems = parseInt('${{ inputs.max_items }}') || 100; - const backfillType = '${{ inputs.backfill_type }}'; - - const TRUSTED_THRESHOLD = 5; - const LABEL_COLOR = 'b76e79'; - - const sizeLabels = ['size: XS', 'size: S', 'size: M', 'size: L', 'size: XL']; - const tierLabels = ['trusted-contributor']; - - // Ensure tier and size labels exist - for (const name of [...tierLabels, ...sizeLabels]) { - try { - await github.rest.issues.getLabel({ owner, repo, name }); - } catch (error) { - if (error?.status !== 404) throw error; - await github.rest.issues.createLabel({ - owner, repo, name, color: LABEL_COLOR, - }); - console.log(`Created label: ${name}`); - } - } - - // Cache: author -> { isExternal, mergedCount } - const contributorCache = new Map(); - - async function getContributorInfo(author, userType) { - if (contributorCache.has(author)) return contributorCache.get(author); - - // Bots are always internal - if (userType === 'Bot') { - const info = { isExternal: false, mergedCount: 0 }; - contributorCache.set(author, info); - return info; - } - - let isExternal = true; - try { - const membership = await github.rest.orgs.getMembershipForUser({ - org: 'langchain-ai', - username: author, - }); - isExternal = membership.data.state !== 'active'; - } catch (error) { - if (error.status !== 404) { - core.warning(`Membership check failed for ${author}: ${error.message}`); - } - } - - let mergedCount = 0; - if (isExternal) { - try { - const result = await github.rest.search.issuesAndPullRequests({ - q: `repo:${owner}/${repo} is:pr is:merged author:"${author}"`, - per_page: 1, - }); - mergedCount = result?.data?.total_count ?? 0; - } catch (error) { - if (error?.status !== 422) throw error; - core.warning(`Search failed for ${author}; skipping tier.`); - } - } - - const info = { isExternal, mergedCount }; - contributorCache.set(author, info); - return info; - } - - function getTierLabel(mergedCount) { - return mergedCount >= TRUSTED_THRESHOLD ? 'trusted-contributor' : null; - } - - function getSizeLabel(totalChangedLines) { - if (totalChangedLines < 50) return 'size: XS'; - if (totalChangedLines < 200) return 'size: S'; - if (totalChangedLines < 500) return 'size: M'; - if (totalChangedLines < 1000) return 'size: L'; - return 'size: XL'; - } - - async function removeStaleLabels(issueNumber, labelsToKeep, labelSets) { - const currentLabels = await github.paginate( - github.rest.issues.listLabelsOnIssue, - { owner, repo, issue_number: issueNumber, per_page: 100 }, - ); - for (const label of currentLabels) { - const name = label.name ?? ''; - const inManagedSet = labelSets.some((s) => s.includes(name)); - if (inManagedSet && !labelsToKeep.includes(name)) { - await github.rest.issues.removeLabel({ - owner, repo, issue_number: issueNumber, name, - }); - } - } - } - - let processed = 0; - - // Backfill PRs - if (backfillType === 'prs' || backfillType === 'both') { - const prs = await github.paginate(github.rest.pulls.list, { - owner, repo, state: 'open', per_page: 100, - }); - - for (const pr of prs) { - if (processed >= maxItems) break; - const author = pr.user.login; - const info = await getContributorInfo(author, pr.user.type); - - const labels = []; - labels.push(info.isExternal ? 'external' : 'internal'); - - if (info.isExternal) { - const tier = getTierLabel(info.mergedCount); - if (tier) labels.push(tier); - } - - // Compute size label - const files = await github.paginate(github.rest.pulls.listFiles, { - owner, repo, pull_number: pr.number, per_page: 100, - }); - const excludedFiles = new Set(['poetry.lock', 'uv.lock']); - const totalChangedLines = files.reduce((total, file) => { - const path = file.filename ?? ''; - if (path.startsWith('docs/') || excludedFiles.has(path)) return total; - return total + (file.additions ?? 0) + (file.deletions ?? 0); - }, 0); - labels.push(getSizeLabel(totalChangedLines)); - - await removeStaleLabels(pr.number, labels, [sizeLabels, tierLabels]); - await github.rest.issues.addLabels({ - owner, repo, issue_number: pr.number, labels, - }); - console.log(`PR #${pr.number} (${author}): ${labels.join(', ')}`); - processed++; - } - } - - // Backfill issues - if (backfillType === 'issues' || backfillType === 'both') { - const issues = await github.paginate(github.rest.issues.listForRepo, { - owner, repo, state: 'open', per_page: 100, - }); - - for (const issue of issues) { - if (processed >= maxItems) break; - if (issue.pull_request) continue; - - const author = issue.user.login; - const info = await getContributorInfo(author, issue.user.type); - - const labels = []; - labels.push(info.isExternal ? 'external' : 'internal'); - - if (info.isExternal) { - const tier = getTierLabel(info.mergedCount); - if (tier) labels.push(tier); - } - - await removeStaleLabels(issue.number, labels, [tierLabels]); - await github.rest.issues.addLabels({ - owner, repo, issue_number: issue.number, labels, - }); - console.log(`Issue #${issue.number} (${author}): ${labels.join(', ')}`); - processed++; - } - } - - console.log(`\nBackfill complete. Processed ${processed} items. Cache hits: ${contributorCache.size} unique authors.`); diff --git a/.github/workflows/tag-external-issues.yml b/.github/workflows/tag-external-issues.yml new file mode 100644 index 00000000000..7c3e615d3a2 --- /dev/null +++ b/.github/workflows/tag-external-issues.yml @@ -0,0 +1,205 @@ +# Automatically tag issues as "external" or "internal" based on whether +# the author is a member of the langchain-ai GitHub organization, and +# apply contributor tier labels to external contributors based on their +# merged PR history. +# +# NOTE: PR labeling (including external/internal, tier, size, file, and +# title labels) is handled by pr_labeler.yml. This workflow handles +# issues only. +# +# Config (trustedThreshold, labelColor) is read from +# .github/scripts/pr-labeler-config.json to stay in sync with +# pr_labeler.yml. +# +# Setup Requirements: +# 1. Create a GitHub App with permissions: +# - Repository: Issues (write) +# - Organization: Members (read) +# 2. Install the app on your organization and this repository +# 3. Add these repository secrets: +# - ORG_MEMBERSHIP_APP_ID: Your app's ID +# - ORG_MEMBERSHIP_APP_PRIVATE_KEY: Your app's private key +# +# The GitHub App token is required to check private organization membership. +# Without it, the workflow will fail. + +name: Tag External Issues + +on: + issues: + types: [opened] + workflow_dispatch: + inputs: + max_items: + description: "Maximum number of open issues to process" + default: "100" + type: string + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.event.issue.number || github.run_id }} + cancel-in-progress: true + +jobs: + tag-external: + if: github.event_name != 'workflow_dispatch' + runs-on: ubuntu-latest + permissions: + contents: read + issues: write + + steps: + - uses: actions/checkout@v6 + + - name: Generate GitHub App token + id: app-token + uses: actions/create-github-app-token@v3 + with: + app-id: ${{ secrets.ORG_MEMBERSHIP_APP_ID }} + private-key: ${{ secrets.ORG_MEMBERSHIP_APP_PRIVATE_KEY }} + + - name: Check if contributor is external + if: steps.app-token.outcome == 'success' + id: check-membership + uses: actions/github-script@v8 + with: + github-token: ${{ steps.app-token.outputs.token }} + script: | + const { owner, repo } = context.repo; + const { h } = require('./.github/scripts/pr-labeler.js').loadAndInit(github, owner, repo); + + const author = context.payload.sender.login; + const { isExternal } = await h.checkMembership( + author, context.payload.sender.type, + ); + core.setOutput('is-external', isExternal ? 'true' : 'false'); + + - name: Apply contributor tier label + if: steps.check-membership.outputs.is-external == 'true' + uses: actions/github-script@v8 + with: + # GITHUB_TOKEN is fine here — no downstream workflow chains + # off tier labels on issues (unlike PRs where App token is + # needed for require_issue_link.yml). + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + const { owner, repo } = context.repo; + const { h } = require('./.github/scripts/pr-labeler.js').loadAndInit(github, owner, repo); + + const issue = context.payload.issue; + // new-contributor is only meaningful on PRs, not issues + await h.applyTierLabel(issue.number, issue.user.login, { skipNewContributor: true }); + + - name: Add external/internal label + if: steps.check-membership.outputs.is-external != '' + uses: actions/github-script@v8 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + const { owner, repo } = context.repo; + const issue_number = context.payload.issue.number; + + const { h } = require('./.github/scripts/pr-labeler.js').loadAndInit(github, owner, repo); + + const label = '${{ steps.check-membership.outputs.is-external }}' === 'true' + ? 'external' : 'internal'; + await h.ensureLabel(label); + await github.rest.issues.addLabels({ + owner, repo, issue_number, labels: [label], + }); + console.log(`Added '${label}' label to issue #${issue_number}`); + + backfill: + if: github.event_name == 'workflow_dispatch' + runs-on: ubuntu-latest + permissions: + contents: read + issues: write + + steps: + - uses: actions/checkout@v6 + + - name: Generate GitHub App token + id: app-token + uses: actions/create-github-app-token@v3 + with: + app-id: ${{ secrets.ORG_MEMBERSHIP_APP_ID }} + private-key: ${{ secrets.ORG_MEMBERSHIP_APP_PRIVATE_KEY }} + + - name: Backfill labels on open issues + uses: actions/github-script@v8 + with: + github-token: ${{ steps.app-token.outputs.token }} + script: | + const { owner, repo } = context.repo; + const rawMax = '${{ inputs.max_items }}'; + const maxItems = parseInt(rawMax, 10); + if (isNaN(maxItems) || maxItems <= 0) { + core.setFailed(`Invalid max_items: "${rawMax}" — must be a positive integer`); + return; + } + + const { h } = require('./.github/scripts/pr-labeler.js').loadAndInit(github, owner, repo); + + const tierLabels = ['trusted-contributor']; + for (const name of tierLabels) { + await h.ensureLabel(name); + } + + const contributorCache = new Map(); + + const issues = await github.paginate(github.rest.issues.listForRepo, { + owner, repo, state: 'open', per_page: 100, + }); + + let processed = 0; + let failures = 0; + for (const issue of issues) { + if (processed >= maxItems) break; + if (issue.pull_request) continue; + + try { + const author = issue.user.login; + const info = await h.getContributorInfo(contributorCache, author, issue.user.type); + + const labels = [info.isExternal ? 'external' : 'internal']; + if (info.isExternal && info.mergedCount != null && info.mergedCount >= h.trustedThreshold) { + labels.push('trusted-contributor'); + } + + // Ensure all labels exist before batch add + for (const name of labels) { + await h.ensureLabel(name); + } + + // Remove stale tier labels + const currentLabels = (await github.paginate( + github.rest.issues.listLabelsOnIssue, + { owner, repo, issue_number: issue.number, per_page: 100 }, + )).map(l => l.name ?? ''); + for (const name of currentLabels) { + if (tierLabels.includes(name) && !labels.includes(name)) { + try { + await github.rest.issues.removeLabel({ + owner, repo, issue_number: issue.number, name, + }); + } catch (e) { + if (e.status !== 404) throw e; + } + } + } + + await github.rest.issues.addLabels({ + owner, repo, issue_number: issue.number, labels, + }); + console.log(`Issue #${issue.number} (${author}): ${labels.join(', ')}`); + processed++; + } catch (e) { + failures++; + core.warning(`Failed to process issue #${issue.number}: ${e.message}`); + } + } + + console.log(`\nBackfill complete. Processed ${processed} issues, ${failures} failures. ${contributorCache.size} unique authors.`); diff --git a/AGENTS.md b/AGENTS.md index 240de05e8ec..9f12b01018b 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -229,10 +229,10 @@ Releases are triggered manually via `.github/workflows/_release.yml` with `worki **Auto-labeling:** -- `.github/workflows/pr_labeler_file.yml` -- `.github/workflows/pr_labeler_title.yml` -- `.github/workflows/auto-label-by-package.yml` -- `.github/workflows/tag-external-contributions.yml` +- `.github/workflows/pr_labeler.yml` – Unified PR labeler (size, file, title, external/internal, contributor tier) +- `.github/workflows/pr_labeler_backfill.yml` – Manual backfill of PR labels on open PRs +- `.github/workflows/auto-label-by-package.yml` – Issue labeling by package +- `.github/workflows/tag-external-issues.yml` – Issue external/internal classification ### Adding a new partner to CI @@ -240,7 +240,7 @@ When adding a new partner package, update these files: - `.github/ISSUE_TEMPLATE/*.yml` – Add to package dropdown - `.github/dependabot.yml` – Add dependency update entry -- `.github/pr-file-labeler.yml` – Add file-to-label mapping +- `.github/scripts/pr-labeler-config.json` – Add file rule and scope-to-label mapping - `.github/workflows/_release.yml` – Add API key secrets if needed - `.github/workflows/auto-label-by-package.yml` – Add package label - `.github/workflows/check_diffs.yml` – Add to change detection diff --git a/CLAUDE.md b/CLAUDE.md index 240de05e8ec..9f12b01018b 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -229,10 +229,10 @@ Releases are triggered manually via `.github/workflows/_release.yml` with `worki **Auto-labeling:** -- `.github/workflows/pr_labeler_file.yml` -- `.github/workflows/pr_labeler_title.yml` -- `.github/workflows/auto-label-by-package.yml` -- `.github/workflows/tag-external-contributions.yml` +- `.github/workflows/pr_labeler.yml` – Unified PR labeler (size, file, title, external/internal, contributor tier) +- `.github/workflows/pr_labeler_backfill.yml` – Manual backfill of PR labels on open PRs +- `.github/workflows/auto-label-by-package.yml` – Issue labeling by package +- `.github/workflows/tag-external-issues.yml` – Issue external/internal classification ### Adding a new partner to CI @@ -240,7 +240,7 @@ When adding a new partner package, update these files: - `.github/ISSUE_TEMPLATE/*.yml` – Add to package dropdown - `.github/dependabot.yml` – Add dependency update entry -- `.github/pr-file-labeler.yml` – Add file-to-label mapping +- `.github/scripts/pr-labeler-config.json` – Add file rule and scope-to-label mapping - `.github/workflows/_release.yml` – Add API key secrets if needed - `.github/workflows/auto-label-by-package.yml` – Add package label - `.github/workflows/check_diffs.yml` – Add to change detection