ci: add contributor tier labels, PR size labels, and backfill job (#35687)

Extend the existing `tag-external-contributions.yml` workflow with
tiered contributor labels (`trusted-contributor` at ≥4 merged PRs,
`experienced-contributor` at ≥10) for both issues and PRs, and add a new
`pr_size_labeler.yml` workflow. The tier step piggybacks on the existing
org membership check — no additional API call for that — and the
backfill job reuses the same membership + search logic with a per-author
cache to avoid redundant calls.

## Changes

- Add a consolidated `Apply contributor tier label` step to the
`tag-external` job that handles both `pull_request_target` and `issues`
events, querying the search API for merged PR count and applying the
appropriate tier label
- Add `workflow_dispatch` trigger with `backfill_type` (prs/issues/both)
and `max_items` inputs, gated to a separate `backfill` job that iterates
open PRs and issues, applies `external`/`internal` + tier + size labels,
and uses a `contributorCache` Map to deduplicate org membership and
search API calls per author
- Add `pr_size_labeler.yml` — standalone workflow on
`pull_request_target` (opened/synchronize/reopened) that computes
changed lines excluding `docs/`, `poetry.lock`, and `uv.lock`, then
applies `size: XS`/`S`/`M`/`L`/`XL` labels (auto-created on first run
with color `b76e79`), removing stale size labels before applying the new
one

## Security notes

Both workflows use `pull_request_target` but neither checks out PR code
— all operations are GitHub API calls via `actions/github-script@v8`.
The `${{ inputs.max_items }}` interpolation is a `workflow_dispatch`
input restricted to users with write access (equivalent or greater
privilege than the workflow token). `${{ inputs.backfill_type }}` is a
`choice` type with server-side enforcement. Author values in search
queries come from GitHub API responses with restricted character sets.
No high-confidence vulnerabilities identified.
This commit is contained in:
Mason Daugherty
2026-03-09 10:31:04 -04:00
committed by GitHub
parent 527fc02980
commit 360e0165ab
2 changed files with 339 additions and 1 deletions

82
.github/workflows/pr_size_labeler.yml vendored Normal file
View File

@@ -0,0 +1,82 @@
# Label PRs by size (changed lines, excluding lockfiles and docs).
#
# Size thresholds:
# XS: < 50, S: < 200, M: < 500, L: < 1000, XL: >= 1000
name: "📏 PR Size Labeler"
on:
pull_request_target:
types: [opened, synchronize, reopened]
permissions:
contents: read
jobs:
size-label:
runs-on: ubuntu-latest
permissions:
pull-requests: write
issues: write
steps:
- name: Apply PR size label
uses: actions/github-script@v8
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
const { owner, repo } = context.repo;
const pullRequest = context.payload.pull_request;
if (!pullRequest) return;
const sizeLabels = ['size: XS', 'size: S', 'size: M', 'size: L', 'size: XL'];
const labelColor = 'b76e79';
// Ensure labels exist
for (const name of sizeLabels) {
try {
await github.rest.issues.getLabel({ owner, repo, name });
} catch (error) {
if (error?.status !== 404) throw error;
await github.rest.issues.createLabel({
owner, repo, name, color: labelColor,
});
}
}
const files = await github.paginate(github.rest.pulls.listFiles, {
owner, repo, pull_number: pullRequest.number, per_page: 100,
});
const excludedFiles = new Set(['poetry.lock', 'uv.lock']);
const totalChangedLines = files.reduce((total, file) => {
const path = file.filename ?? '';
if (path.startsWith('docs/') || excludedFiles.has(path)) return total;
return total + (file.additions ?? 0) + (file.deletions ?? 0);
}, 0);
let targetSizeLabel = 'size: XL';
if (totalChangedLines < 50) targetSizeLabel = 'size: XS';
else if (totalChangedLines < 200) targetSizeLabel = 'size: S';
else if (totalChangedLines < 500) targetSizeLabel = 'size: M';
else if (totalChangedLines < 1000) targetSizeLabel = 'size: L';
// Remove stale size labels
const currentLabels = await github.paginate(
github.rest.issues.listLabelsOnIssue,
{ owner, repo, issue_number: pullRequest.number, per_page: 100 },
);
for (const label of currentLabels) {
const name = label.name ?? '';
if (sizeLabels.includes(name) && name !== targetSizeLabel) {
await github.rest.issues.removeLabel({
owner, repo, issue_number: pullRequest.number, name,
});
}
}
await github.rest.issues.addLabels({
owner, repo, issue_number: pullRequest.number, labels: [targetSizeLabel],
});
console.log(`PR #${pullRequest.number}: ${totalChangedLines} changed lines → ${targetSizeLabel}`);

View File

@@ -1,6 +1,7 @@
# Automatically tag issues and pull requests as "external" or "internal"
# based on whether the author is a member of the langchain-ai
# GitHub organization.
# GitHub organization, and apply contributor tier labels to external
# contributors based on their merged PR history.
#
# Setup Requirements:
# 1. Create a GitHub App with permissions:
@@ -13,6 +14,10 @@
#
# The GitHub App token is required to check private organization membership.
# Without it, the workflow will fail.
#
# Contributor tier thresholds:
# - trusted-contributor: >= 4 merged PRs
# - experienced-contributor: >= 10 merged PRs
name: Tag External Contributions
@@ -21,12 +26,27 @@ on:
types: [opened]
pull_request_target:
types: [opened]
workflow_dispatch:
inputs:
backfill_type:
description: "Backfill type (for initial run)"
default: "both"
type: choice
options:
- prs
- issues
- both
max_items:
description: "Maximum number of items to process"
default: "100"
type: string
permissions:
contents: read
jobs:
tag-external:
if: github.event_name != 'workflow_dispatch'
runs-on: ubuntu-latest
permissions:
issues: write
@@ -149,3 +169,239 @@ jobs:
});
console.log(`Added 'internal' label to pull request #${pull_number}`);
- name: Apply contributor tier label
if: steps.check-membership.outputs.is-external == 'true'
uses: actions/github-script@v8
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
const { owner, repo } = context.repo;
const isPR = context.eventName === 'pull_request_target';
const item = isPR
? context.payload.pull_request
: context.payload.issue;
const author = item.user.login;
const issueNumber = item.number;
const TRUSTED_THRESHOLD = 4;
const EXPERIENCED_THRESHOLD = 10;
const mergedQuery = `repo:${owner}/${repo} is:pr is:merged author:${author}`;
let mergedCount = 0;
try {
const result = await github.rest.search.issuesAndPullRequests({
q: mergedQuery,
per_page: 1,
});
mergedCount = result?.data?.total_count ?? 0;
} catch (error) {
if (error?.status !== 422) throw error;
core.warning(`Search failed for ${author}; skipping tier label.`);
return;
}
let label = null;
if (mergedCount >= EXPERIENCED_THRESHOLD) {
label = 'experienced-contributor';
} else if (mergedCount >= TRUSTED_THRESHOLD) {
label = 'trusted-contributor';
}
if (label) {
await github.rest.issues.addLabels({
owner,
repo,
issue_number: issueNumber,
labels: [label],
});
console.log(`Applied '${label}' to #${issueNumber} (${mergedCount} merged PRs)`);
} else {
console.log(`No tier label for ${author} (${mergedCount} merged PRs)`);
}
backfill:
if: github.event_name == 'workflow_dispatch'
runs-on: ubuntu-latest
permissions:
issues: write
pull-requests: write
steps:
- name: Generate GitHub App token
id: app-token
uses: actions/create-github-app-token@v2
with:
app-id: ${{ secrets.ORG_MEMBERSHIP_APP_ID }}
private-key: ${{ secrets.ORG_MEMBERSHIP_APP_PRIVATE_KEY }}
- name: Backfill labels
uses: actions/github-script@v8
with:
github-token: ${{ steps.app-token.outputs.token }}
script: |
const { owner, repo } = context.repo;
const maxItems = parseInt('${{ inputs.max_items }}') || 100;
const backfillType = '${{ inputs.backfill_type }}';
const TRUSTED_THRESHOLD = 4;
const EXPERIENCED_THRESHOLD = 10;
const LABEL_COLOR = 'b76e79';
const sizeLabels = ['size: XS', 'size: S', 'size: M', 'size: L', 'size: XL'];
const tierLabels = ['trusted-contributor', 'experienced-contributor'];
// Ensure tier and size labels exist
for (const name of [...tierLabels, ...sizeLabels]) {
try {
await github.rest.issues.getLabel({ owner, repo, name });
} catch (error) {
if (error?.status !== 404) throw error;
await github.rest.issues.createLabel({
owner, repo, name, color: LABEL_COLOR,
});
console.log(`Created label: ${name}`);
}
}
// Cache: author -> { isExternal, mergedCount }
const contributorCache = new Map();
async function getContributorInfo(author) {
if (contributorCache.has(author)) return contributorCache.get(author);
let isExternal = true;
try {
const membership = await github.rest.orgs.getMembershipForUser({
org: 'langchain-ai',
username: author,
});
isExternal = membership.data.state !== 'active';
} catch (error) {
if (error.status !== 404) {
core.warning(`Membership check failed for ${author}: ${error.message}`);
}
}
let mergedCount = 0;
if (isExternal) {
try {
const result = await github.rest.search.issuesAndPullRequests({
q: `repo:${owner}/${repo} is:pr is:merged author:${author}`,
per_page: 1,
});
mergedCount = result?.data?.total_count ?? 0;
} catch (error) {
if (error?.status !== 422) throw error;
core.warning(`Search failed for ${author}; skipping tier.`);
}
}
const info = { isExternal, mergedCount };
contributorCache.set(author, info);
return info;
}
function getTierLabel(mergedCount) {
if (mergedCount >= EXPERIENCED_THRESHOLD) return 'experienced-contributor';
if (mergedCount >= TRUSTED_THRESHOLD) return 'trusted-contributor';
return null;
}
function getSizeLabel(totalChangedLines) {
if (totalChangedLines < 50) return 'size: XS';
if (totalChangedLines < 200) return 'size: S';
if (totalChangedLines < 500) return 'size: M';
if (totalChangedLines < 1000) return 'size: L';
return 'size: XL';
}
async function removeStaleLabels(issueNumber, labelsToKeep, labelSets) {
const currentLabels = await github.paginate(
github.rest.issues.listLabelsOnIssue,
{ owner, repo, issue_number: issueNumber, per_page: 100 },
);
for (const label of currentLabels) {
const name = label.name ?? '';
const inManagedSet = labelSets.some((s) => s.includes(name));
if (inManagedSet && !labelsToKeep.includes(name)) {
await github.rest.issues.removeLabel({
owner, repo, issue_number: issueNumber, name,
});
}
}
}
let processed = 0;
// Backfill PRs
if (backfillType === 'prs' || backfillType === 'both') {
const prs = await github.paginate(github.rest.pulls.list, {
owner, repo, state: 'open', per_page: 100,
});
for (const pr of prs) {
if (processed >= maxItems) break;
const author = pr.user.login;
const info = await getContributorInfo(author);
const labels = [];
labels.push(info.isExternal ? 'external' : 'internal');
if (info.isExternal) {
const tier = getTierLabel(info.mergedCount);
if (tier) labels.push(tier);
}
// Compute size label
const files = await github.paginate(github.rest.pulls.listFiles, {
owner, repo, pull_number: pr.number, per_page: 100,
});
const excludedFiles = new Set(['poetry.lock', 'uv.lock']);
const totalChangedLines = files.reduce((total, file) => {
const path = file.filename ?? '';
if (path.startsWith('docs/') || excludedFiles.has(path)) return total;
return total + (file.additions ?? 0) + (file.deletions ?? 0);
}, 0);
labels.push(getSizeLabel(totalChangedLines));
await removeStaleLabels(pr.number, labels, [sizeLabels, tierLabels]);
await github.rest.issues.addLabels({
owner, repo, issue_number: pr.number, labels,
});
console.log(`PR #${pr.number} (${author}): ${labels.join(', ')}`);
processed++;
}
}
// Backfill issues
if (backfillType === 'issues' || backfillType === 'both') {
const issues = await github.paginate(github.rest.issues.listForRepo, {
owner, repo, state: 'open', per_page: 100,
});
for (const issue of issues) {
if (processed >= maxItems) break;
if (issue.pull_request) continue;
const author = issue.user.login;
const info = await getContributorInfo(author);
const labels = [];
labels.push(info.isExternal ? 'external' : 'internal');
if (info.isExternal) {
const tier = getTierLabel(info.mergedCount);
if (tier) labels.push(tier);
}
await removeStaleLabels(issue.number, labels, [tierLabels]);
await github.rest.issues.addLabels({
owner, repo, issue_number: issue.number, labels,
});
console.log(`Issue #${issue.number} (${author}): ${labels.join(', ')}`);
processed++;
}
}
console.log(`\nBackfill complete. Processed ${processed} items. Cache hits: ${contributorCache.size} unique authors.`);