From 360e0165abfb8d2abf1a3b8e3c13d739c5d13f2c Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Mon, 9 Mar 2026 10:31:04 -0400 Subject: [PATCH] ci: add contributor tier labels, PR size labels, and backfill job (#35687) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extend the existing `tag-external-contributions.yml` workflow with tiered contributor labels (`trusted-contributor` at ≥4 merged PRs, `experienced-contributor` at ≥10) for both issues and PRs, and add a new `pr_size_labeler.yml` workflow. The tier step piggybacks on the existing org membership check — no additional API call for that — and the backfill job reuses the same membership + search logic with a per-author cache to avoid redundant calls. ## Changes - Add a consolidated `Apply contributor tier label` step to the `tag-external` job that handles both `pull_request_target` and `issues` events, querying the search API for merged PR count and applying the appropriate tier label - Add `workflow_dispatch` trigger with `backfill_type` (prs/issues/both) and `max_items` inputs, gated to a separate `backfill` job that iterates open PRs and issues, applies `external`/`internal` + tier + size labels, and uses a `contributorCache` Map to deduplicate org membership and search API calls per author - Add `pr_size_labeler.yml` — standalone workflow on `pull_request_target` (opened/synchronize/reopened) that computes changed lines excluding `docs/`, `poetry.lock`, and `uv.lock`, then applies `size: XS`/`S`/`M`/`L`/`XL` labels (auto-created on first run with color `b76e79`), removing stale size labels before applying the new one ## Security notes Both workflows use `pull_request_target` but neither checks out PR code — all operations are GitHub API calls via `actions/github-script@v8`. The `${{ inputs.max_items }}` interpolation is a `workflow_dispatch` input restricted to users with write access (equivalent or greater privilege than the workflow token). `${{ inputs.backfill_type }}` is a `choice` type with server-side enforcement. Author values in search queries come from GitHub API responses with restricted character sets. No high-confidence vulnerabilities identified. --- .github/workflows/pr_size_labeler.yml | 82 ++++++ .../workflows/tag-external-contributions.yml | 258 +++++++++++++++++- 2 files changed, 339 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/pr_size_labeler.yml diff --git a/.github/workflows/pr_size_labeler.yml b/.github/workflows/pr_size_labeler.yml new file mode 100644 index 00000000000..c76314f67b5 --- /dev/null +++ b/.github/workflows/pr_size_labeler.yml @@ -0,0 +1,82 @@ +# Label PRs by size (changed lines, excluding lockfiles and docs). +# +# Size thresholds: +# XS: < 50, S: < 200, M: < 500, L: < 1000, XL: >= 1000 + +name: "📏 PR Size Labeler" + +on: + pull_request_target: + types: [opened, synchronize, reopened] + +permissions: + contents: read + +jobs: + size-label: + runs-on: ubuntu-latest + permissions: + pull-requests: write + issues: write + + steps: + - name: Apply PR size label + uses: actions/github-script@v8 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + const { owner, repo } = context.repo; + const pullRequest = context.payload.pull_request; + if (!pullRequest) return; + + const sizeLabels = ['size: XS', 'size: S', 'size: M', 'size: L', 'size: XL']; + const labelColor = 'b76e79'; + + // Ensure labels exist + for (const name of sizeLabels) { + try { + await github.rest.issues.getLabel({ owner, repo, name }); + } catch (error) { + if (error?.status !== 404) throw error; + await github.rest.issues.createLabel({ + owner, repo, name, color: labelColor, + }); + } + } + + const files = await github.paginate(github.rest.pulls.listFiles, { + owner, repo, pull_number: pullRequest.number, per_page: 100, + }); + + const excludedFiles = new Set(['poetry.lock', 'uv.lock']); + const totalChangedLines = files.reduce((total, file) => { + const path = file.filename ?? ''; + if (path.startsWith('docs/') || excludedFiles.has(path)) return total; + return total + (file.additions ?? 0) + (file.deletions ?? 0); + }, 0); + + let targetSizeLabel = 'size: XL'; + if (totalChangedLines < 50) targetSizeLabel = 'size: XS'; + else if (totalChangedLines < 200) targetSizeLabel = 'size: S'; + else if (totalChangedLines < 500) targetSizeLabel = 'size: M'; + else if (totalChangedLines < 1000) targetSizeLabel = 'size: L'; + + // Remove stale size labels + const currentLabels = await github.paginate( + github.rest.issues.listLabelsOnIssue, + { owner, repo, issue_number: pullRequest.number, per_page: 100 }, + ); + for (const label of currentLabels) { + const name = label.name ?? ''; + if (sizeLabels.includes(name) && name !== targetSizeLabel) { + await github.rest.issues.removeLabel({ + owner, repo, issue_number: pullRequest.number, name, + }); + } + } + + await github.rest.issues.addLabels({ + owner, repo, issue_number: pullRequest.number, labels: [targetSizeLabel], + }); + + console.log(`PR #${pullRequest.number}: ${totalChangedLines} changed lines → ${targetSizeLabel}`); diff --git a/.github/workflows/tag-external-contributions.yml b/.github/workflows/tag-external-contributions.yml index 59f227bbf1a..8d6852cae67 100644 --- a/.github/workflows/tag-external-contributions.yml +++ b/.github/workflows/tag-external-contributions.yml @@ -1,6 +1,7 @@ # Automatically tag issues and pull requests as "external" or "internal" # based on whether the author is a member of the langchain-ai -# GitHub organization. +# GitHub organization, and apply contributor tier labels to external +# contributors based on their merged PR history. # # Setup Requirements: # 1. Create a GitHub App with permissions: @@ -13,6 +14,10 @@ # # The GitHub App token is required to check private organization membership. # Without it, the workflow will fail. +# +# Contributor tier thresholds: +# - trusted-contributor: >= 4 merged PRs +# - experienced-contributor: >= 10 merged PRs name: Tag External Contributions @@ -21,12 +26,27 @@ on: types: [opened] pull_request_target: types: [opened] + workflow_dispatch: + inputs: + backfill_type: + description: "Backfill type (for initial run)" + default: "both" + type: choice + options: + - prs + - issues + - both + max_items: + description: "Maximum number of items to process" + default: "100" + type: string permissions: contents: read jobs: tag-external: + if: github.event_name != 'workflow_dispatch' runs-on: ubuntu-latest permissions: issues: write @@ -149,3 +169,239 @@ jobs: }); console.log(`Added 'internal' label to pull request #${pull_number}`); + + - name: Apply contributor tier label + if: steps.check-membership.outputs.is-external == 'true' + uses: actions/github-script@v8 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + const { owner, repo } = context.repo; + const isPR = context.eventName === 'pull_request_target'; + const item = isPR + ? context.payload.pull_request + : context.payload.issue; + const author = item.user.login; + const issueNumber = item.number; + + const TRUSTED_THRESHOLD = 4; + const EXPERIENCED_THRESHOLD = 10; + + const mergedQuery = `repo:${owner}/${repo} is:pr is:merged author:${author}`; + let mergedCount = 0; + try { + const result = await github.rest.search.issuesAndPullRequests({ + q: mergedQuery, + per_page: 1, + }); + mergedCount = result?.data?.total_count ?? 0; + } catch (error) { + if (error?.status !== 422) throw error; + core.warning(`Search failed for ${author}; skipping tier label.`); + return; + } + + let label = null; + if (mergedCount >= EXPERIENCED_THRESHOLD) { + label = 'experienced-contributor'; + } else if (mergedCount >= TRUSTED_THRESHOLD) { + label = 'trusted-contributor'; + } + + if (label) { + await github.rest.issues.addLabels({ + owner, + repo, + issue_number: issueNumber, + labels: [label], + }); + console.log(`Applied '${label}' to #${issueNumber} (${mergedCount} merged PRs)`); + } else { + console.log(`No tier label for ${author} (${mergedCount} merged PRs)`); + } + + backfill: + if: github.event_name == 'workflow_dispatch' + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + + steps: + - name: Generate GitHub App token + id: app-token + uses: actions/create-github-app-token@v2 + with: + app-id: ${{ secrets.ORG_MEMBERSHIP_APP_ID }} + private-key: ${{ secrets.ORG_MEMBERSHIP_APP_PRIVATE_KEY }} + + - name: Backfill labels + uses: actions/github-script@v8 + with: + github-token: ${{ steps.app-token.outputs.token }} + script: | + const { owner, repo } = context.repo; + const maxItems = parseInt('${{ inputs.max_items }}') || 100; + const backfillType = '${{ inputs.backfill_type }}'; + + const TRUSTED_THRESHOLD = 4; + const EXPERIENCED_THRESHOLD = 10; + const LABEL_COLOR = 'b76e79'; + + const sizeLabels = ['size: XS', 'size: S', 'size: M', 'size: L', 'size: XL']; + const tierLabels = ['trusted-contributor', 'experienced-contributor']; + + // Ensure tier and size labels exist + for (const name of [...tierLabels, ...sizeLabels]) { + try { + await github.rest.issues.getLabel({ owner, repo, name }); + } catch (error) { + if (error?.status !== 404) throw error; + await github.rest.issues.createLabel({ + owner, repo, name, color: LABEL_COLOR, + }); + console.log(`Created label: ${name}`); + } + } + + // Cache: author -> { isExternal, mergedCount } + const contributorCache = new Map(); + + async function getContributorInfo(author) { + if (contributorCache.has(author)) return contributorCache.get(author); + + let isExternal = true; + try { + const membership = await github.rest.orgs.getMembershipForUser({ + org: 'langchain-ai', + username: author, + }); + isExternal = membership.data.state !== 'active'; + } catch (error) { + if (error.status !== 404) { + core.warning(`Membership check failed for ${author}: ${error.message}`); + } + } + + let mergedCount = 0; + if (isExternal) { + try { + const result = await github.rest.search.issuesAndPullRequests({ + q: `repo:${owner}/${repo} is:pr is:merged author:${author}`, + per_page: 1, + }); + mergedCount = result?.data?.total_count ?? 0; + } catch (error) { + if (error?.status !== 422) throw error; + core.warning(`Search failed for ${author}; skipping tier.`); + } + } + + const info = { isExternal, mergedCount }; + contributorCache.set(author, info); + return info; + } + + function getTierLabel(mergedCount) { + if (mergedCount >= EXPERIENCED_THRESHOLD) return 'experienced-contributor'; + if (mergedCount >= TRUSTED_THRESHOLD) return 'trusted-contributor'; + return null; + } + + function getSizeLabel(totalChangedLines) { + if (totalChangedLines < 50) return 'size: XS'; + if (totalChangedLines < 200) return 'size: S'; + if (totalChangedLines < 500) return 'size: M'; + if (totalChangedLines < 1000) return 'size: L'; + return 'size: XL'; + } + + async function removeStaleLabels(issueNumber, labelsToKeep, labelSets) { + const currentLabels = await github.paginate( + github.rest.issues.listLabelsOnIssue, + { owner, repo, issue_number: issueNumber, per_page: 100 }, + ); + for (const label of currentLabels) { + const name = label.name ?? ''; + const inManagedSet = labelSets.some((s) => s.includes(name)); + if (inManagedSet && !labelsToKeep.includes(name)) { + await github.rest.issues.removeLabel({ + owner, repo, issue_number: issueNumber, name, + }); + } + } + } + + let processed = 0; + + // Backfill PRs + if (backfillType === 'prs' || backfillType === 'both') { + const prs = await github.paginate(github.rest.pulls.list, { + owner, repo, state: 'open', per_page: 100, + }); + + for (const pr of prs) { + if (processed >= maxItems) break; + const author = pr.user.login; + const info = await getContributorInfo(author); + + const labels = []; + labels.push(info.isExternal ? 'external' : 'internal'); + + if (info.isExternal) { + const tier = getTierLabel(info.mergedCount); + if (tier) labels.push(tier); + } + + // Compute size label + const files = await github.paginate(github.rest.pulls.listFiles, { + owner, repo, pull_number: pr.number, per_page: 100, + }); + const excludedFiles = new Set(['poetry.lock', 'uv.lock']); + const totalChangedLines = files.reduce((total, file) => { + const path = file.filename ?? ''; + if (path.startsWith('docs/') || excludedFiles.has(path)) return total; + return total + (file.additions ?? 0) + (file.deletions ?? 0); + }, 0); + labels.push(getSizeLabel(totalChangedLines)); + + await removeStaleLabels(pr.number, labels, [sizeLabels, tierLabels]); + await github.rest.issues.addLabels({ + owner, repo, issue_number: pr.number, labels, + }); + console.log(`PR #${pr.number} (${author}): ${labels.join(', ')}`); + processed++; + } + } + + // Backfill issues + if (backfillType === 'issues' || backfillType === 'both') { + const issues = await github.paginate(github.rest.issues.listForRepo, { + owner, repo, state: 'open', per_page: 100, + }); + + for (const issue of issues) { + if (processed >= maxItems) break; + if (issue.pull_request) continue; + + const author = issue.user.login; + const info = await getContributorInfo(author); + + const labels = []; + labels.push(info.isExternal ? 'external' : 'internal'); + + if (info.isExternal) { + const tier = getTierLabel(info.mergedCount); + if (tier) labels.push(tier); + } + + await removeStaleLabels(issue.number, labels, [tierLabels]); + await github.rest.issues.addLabels({ + owner, repo, issue_number: issue.number, labels, + }); + console.log(`Issue #${issue.number} (${author}): ${labels.join(', ')}`); + processed++; + } + } + + console.log(`\nBackfill complete. Processed ${processed} items. Cache hits: ${contributorCache.size} unique authors.`);