diff --git a/.github/workflows/check-repo-size.yml b/.github/workflows/check-repo-size.yml new file mode 100644 index 000000000..9668ad232 --- /dev/null +++ b/.github/workflows/check-repo-size.yml @@ -0,0 +1,53 @@ +name: Check repo size + +on: + pull_request: + types: [opened, synchronize, reopened] + +defaults: + run: + shell: bash + +permissions: + contents: read + pull-requests: write + +jobs: + check-repo-size: + name: Check repo size + runs-on: ubuntu-slim + # PRs from forks (and Dependabot, which behaves like a fork) get a + # read-only GITHUB_TOKEN that can't post comments, so the job would only + # ever fail. Skip them. + if: >- + github.event.pull_request.head.repo.full_name == github.repository && + github.triggering_actor != 'dependabot[bot]' + timeout-minutes: 10 + + steps: + - name: Checkout repository + uses: actions/checkout@v6 + with: + # Need full history so we have both the PR merge commit (HEAD) and + # the base ref locally for `git archive` to work against either. + fetch-depth: 0 + + - name: Set up Node.js + uses: actions/setup-node@v6 + with: + node-version: 24 + cache: 'npm' + + - name: Install pr-checks dependencies + working-directory: pr-checks + run: npm ci + + - name: Check repo size + working-directory: pr-checks + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + BASE_REF: ${{ github.event.pull_request.base.ref }} + PR_NUMBER: ${{ github.event.pull_request.number }} + GITHUB_REPOSITORY: ${{ github.repository }} + RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + run: npx tsx check-repo-size.ts diff --git a/package-lock.json b/package-lock.json index 130ed3da1..ff0319736 100644 --- a/package-lock.json +++ b/package-lock.json @@ -10408,6 +10408,8 @@ }, "devDependencies": { "@types/node": "^20.19.39", + "@types/sinon": "^21.0.1", + "sinon": "^21.1.2", "tsx": "^4.21.0" } } diff --git a/pr-checks/check-repo-size.test.ts b/pr-checks/check-repo-size.test.ts new file mode 100644 index 000000000..dcdb821b7 --- /dev/null +++ b/pr-checks/check-repo-size.test.ts @@ -0,0 +1,334 @@ +import * as assert from "node:assert/strict"; +import { execFileSync } from "node:child_process"; +import { randomBytes } from "node:crypto"; +import * as fs from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; +import { afterEach, beforeEach, describe, it } from "node:test"; + +import { getOctokit } from "@actions/github"; +import * as sinon from "sinon"; + +import { + COMMENT_MARKER, + buildCommentBody, + formatBytes, + formatPercent, + isDeltaSignificant, + measureArchiveSize, + upsertSizeComment, +} from "./check-repo-size"; + +describe("formatBytes", async () => { + const cases: Array<[number, boolean, string]> = [ + // Unsigned: bytes / KiB / MiB boundaries. + [0, false, "0 B"], + [1, false, "1 B"], + [1023, false, "1023 B"], + [1024, false, "1.00 KiB"], + [2048, false, "2.00 KiB"], + [1024 * 1024 - 1, false, "1024.00 KiB"], + [1024 * 1024, false, "1.00 MiB"], + [2.5 * 1024 * 1024, false, "2.50 MiB"], + // Negative values always use a leading minus. + [-512, false, "-512 B"], + [-2048, false, "-2.00 KiB"], + [-2 * 1024 * 1024, false, "-2.00 MiB"], + // signed=true prepends a + to non-negative values. + [0, true, "+0 B"], + [512, true, "+512 B"], + [2048, true, "+2.00 KiB"], + [-512, true, "-512 B"], + ]; + for (const [bytes, signed, expected] of cases) { + await it(`formats ${bytes} (signed=${signed}) as ${expected}`, () => { + assert.equal(formatBytes(bytes, signed), expected); + }); + } +}); + +describe("formatPercent", async () => { + await it("formats positive fractions with a leading +", () => { + assert.equal(formatPercent(0.1), "+10.00%"); + assert.equal(formatPercent(0.0123), "+1.23%"); + }); + + await it("formats negative fractions with a leading -", () => { + assert.equal(formatPercent(-0.1), "-10.00%"); + }); + + await it("formats zero without a sign", () => { + assert.equal(formatPercent(0), "0.00%"); + }); +}); + +describe("isDeltaSignificant", async () => { + const cases: Array<[number, number, number, boolean]> = [ + // At and above threshold (both signs). + [100, 1000, 0.1, true], + [101, 1000, 0.1, true], + [-100, 1000, 0.1, true], + // Below threshold (both signs, plus exact zero). + [99, 1000, 0.1, false], + [-99, 1000, 0.1, false], + [0, 1000, 0.1, false], + ]; + for (const [delta, base, fraction, expected] of cases) { + await it(`returns ${expected} for delta=${delta}, base=${base}, fraction=${fraction}`, () => { + assert.equal(isDeltaSignificant(delta, base, fraction), expected); + }); + } +}); + +describe("buildCommentBody", async () => { + await it("includes the marker, the base/PR/delta rows, and the run URL", () => { + const body = buildCommentBody({ + baseRef: "main", + baseSize: 2_000_000, + prSize: 2_300_000, + runUrl: "https://example.test/run", + }); + + assert.match(body, new RegExp(`^${escapeRegExp(COMMENT_MARKER)}`)); + assert.match(body, /Base \(`main`\) \| 1\.91 MiB \(2000000 bytes\)/); + assert.match(body, /This PR \| 2\.19 MiB \(2300000 bytes\)/); + assert.match( + body, + /\*\*Delta\*\* \| \*\*\+292\.97 KiB \(\+300000 bytes, \+15\.00%\)\*\*/, + ); + assert.match(body, /\[workflow run\]\(https:\/\/example\.test\/run\)/); + }); + + await it("formats negative deltas with a leading minus and omits the run URL when missing", () => { + const body = buildCommentBody({ + baseRef: "main", + baseSize: 2_000_000, + prSize: 1_800_000, + }); + assert.match( + body, + /\*\*Delta\*\* \| \*\*-195\.31 KiB \(-200000 bytes, -10\.00%\)\*\*/, + ); + assert.doesNotMatch(body, /workflow run/); + }); +}); + +let repoDir: string; + +beforeEach(() => { + repoDir = fs.mkdtempSync(path.join(os.tmpdir(), "check-repo-size-test-")); + execFileSync("git", ["init", "--initial-branch=main", "-q"], { + cwd: repoDir, + }); + execFileSync("git", ["config", "user.email", "test@example.test"], { + cwd: repoDir, + }); + execFileSync("git", ["config", "user.name", "Test"], { cwd: repoDir }); + execFileSync("git", ["config", "commit.gpgsign", "false"], { cwd: repoDir }); +}); + +afterEach(() => { + fs.rmSync(repoDir, { recursive: true, force: true }); +}); + +function commit(name: string, content: string, message: string) { + fs.writeFileSync(path.join(repoDir, name), content); + execFileSync("git", ["add", name], { cwd: repoDir }); + execFileSync("git", ["commit", "-q", "-m", message], { cwd: repoDir }); +} + +describe("measureArchiveSize", async () => { + await it("returns a positive byte count for a non-empty repo", async () => { + commit("a.txt", "hello world\n", "first"); + const size = await measureArchiveSize("HEAD", repoDir); + assert.ok(size > 0, `expected size > 0, got ${size}`); + }); + + await it("returns the same size on repeated runs (deterministic)", async () => { + commit("a.txt", "hello world\n", "first"); + const a = await measureArchiveSize("HEAD", repoDir); + const b = await measureArchiveSize("HEAD", repoDir); + assert.equal(a, b); + }); + + await it("returns a larger size when more content is added", async () => { + commit("a.txt", "hello world\n", "first"); + const small = await measureArchiveSize("HEAD", repoDir); + + // Use random bytes so the new content is incompressible and the archive + // is guaranteed to grow even after gzip. + commit("b.bin", randomBytes(8192).toString("base64"), "second"); + const big = await measureArchiveSize("HEAD", repoDir); + assert.ok( + big > small, + `expected ${big} > ${small} after adding more content`, + ); + }); + + await it("ignores untracked files (e.g. node_modules)", async () => { + commit("a.txt", "hello\n", "first"); + commit(".gitignore", "node_modules/\n", "ignore node_modules"); + const sizeBefore = await measureArchiveSize("HEAD", repoDir); + + fs.mkdirSync(path.join(repoDir, "node_modules")); + fs.writeFileSync( + path.join(repoDir, "node_modules", "huge.bin"), + "x".repeat(1_000_000), + ); + + const sizeAfter = await measureArchiveSize("HEAD", repoDir); + assert.equal( + sizeAfter, + sizeBefore, + "untracked node_modules should not affect the archive size", + ); + }); + + await it("rejects when the ref does not exist", async () => { + commit("a.txt", "hello\n", "first"); + await assert.rejects( + () => measureArchiveSize("does-not-exist", repoDir), + /git archive does-not-exist exited with code/, + ); + }); +}); + +describe("upsertSizeComment", async () => { + const owner = "test-owner"; + const repo = "test-repo"; + const prNumber = 42; + + let octokit: ReturnType; + + beforeEach(() => { + octokit = getOctokit("test-token"); + }); + + afterEach(() => { + sinon.restore(); + }); + + function stubExistingComments(comments: Array<{ id: number; body: string }>) { + // upsertSizeComment calls `octokit.paginate(octokit.rest.issues.listComments, ...)`, + // so stubbing `paginate` directly mocks the listing without depending on how + // paginate walks Octokit's response (link headers etc.). + return sinon.stub(octokit, "paginate").resolves(comments); + } + + await it("creates a new comment when none exists and the delta is significant", async () => { + stubExistingComments([]); + const createStub = sinon + .stub(octokit.rest.issues, "createComment") + .resolves({ data: { id: 999 } } as never); + + const result = await upsertSizeComment({ + octokit, + owner, + repo, + prNumber, + body: `${COMMENT_MARKER}\nhello`, + delta: 200, + baseSize: 1000, + }); + + assert.deepEqual(result, { action: "created", commentId: 999 }); + sinon.assert.calledOnce(createStub); + const createArgs = createStub.firstCall.args[0]!; + assert.equal(createArgs.owner, owner); + assert.equal(createArgs.repo, repo); + assert.equal(createArgs.issue_number, prNumber); + assert.ok(createArgs.body.includes(COMMENT_MARKER)); + }); + + await it("creates a new comment for a significant size decrease", async () => { + // Shrinkage matters too: it might indicate accidentally deleted tracked + // files. The full pipeline (not just isDeltaSignificant) needs to post on + // negative deltas. + stubExistingComments([]); + const createStub = sinon + .stub(octokit.rest.issues, "createComment") + .resolves({ data: { id: 999 } } as never); + + const result = await upsertSizeComment({ + octokit, + owner, + repo, + prNumber, + body: `${COMMENT_MARKER}\nhello`, + delta: -200, + baseSize: 1000, + }); + + assert.deepEqual(result, { action: "created", commentId: 999 }); + sinon.assert.calledOnce(createStub); + }); + + await it("skips when no existing comment and delta is below threshold", async () => { + stubExistingComments([]); + const createStub = sinon.stub(octokit.rest.issues, "createComment"); + const updateStub = sinon.stub(octokit.rest.issues, "updateComment"); + + const result = await upsertSizeComment({ + octokit, + owner, + repo, + prNumber, + body: `${COMMENT_MARKER}\nhello`, + delta: 50, + baseSize: 1000, + }); + + assert.equal(result.action, "skipped"); + sinon.assert.notCalled(createStub); + sinon.assert.notCalled(updateStub); + }); + + await it("updates the existing comment when the delta is significant", async () => { + stubExistingComments([{ id: 7, body: `${COMMENT_MARKER}\nold body` }]); + const updateStub = sinon + .stub(octokit.rest.issues, "updateComment") + .resolves({ data: { id: 7 } } as never); + + const result = await upsertSizeComment({ + octokit, + owner, + repo, + prNumber, + body: `${COMMENT_MARKER}\nnew body`, + delta: 200, + baseSize: 1000, + }); + + assert.deepEqual(result, { action: "updated", commentId: 7 }); + sinon.assert.calledOnce(updateStub); + const updateArgs = updateStub.firstCall.args[0]!; + assert.equal(updateArgs.comment_id, 7); + assert.ok(updateArgs.body.includes("new body")); + }); + + await it("updates an existing comment even when the delta is below threshold", async () => { + // This keeps the comment in sync after a PR that initially had a big diff + // gets reduced below the threshold by a follow-up commit. + stubExistingComments([{ id: 7, body: `${COMMENT_MARKER}\nold body` }]); + const updateStub = sinon + .stub(octokit.rest.issues, "updateComment") + .resolves({ data: { id: 7 } } as never); + + const result = await upsertSizeComment({ + octokit, + owner, + repo, + prNumber, + body: `${COMMENT_MARKER}\nnew body`, + delta: 1, + baseSize: 1000, + }); + + assert.deepEqual(result, { action: "updated", commentId: 7 }); + sinon.assert.calledOnce(updateStub); + }); +}); + +function escapeRegExp(s: string): string { + return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} diff --git a/pr-checks/check-repo-size.ts b/pr-checks/check-repo-size.ts new file mode 100644 index 000000000..485dfdf94 --- /dev/null +++ b/pr-checks/check-repo-size.ts @@ -0,0 +1,327 @@ +#!/usr/bin/env npx tsx + +/* +Computes the difference in the `.tar.gz`'d checkout size of the repo between the PR head and the PR +base, and posts/updates a sticky comment on the PR when the change is significant in either +direction. This size is relevant because it corresponds to the duration of the "Download action +repository" step that happens at the start of every job that uses this Action. + +Designed to be invoked from the `Check repo size` workflow on PR events, but also runnable locally +(with --dry-run) for testing. +*/ + +import { spawn } from "node:child_process"; +import * as path from "node:path"; +import { parseArgs } from "node:util"; + +import { getOctokit } from "@actions/github"; + +/** Hidden marker used to find the existing sticky comment on a PR. */ +export const COMMENT_MARKER = ""; + +export const DEFAULT_BASE_REF = "main"; +export const DEFAULT_REPOSITORY = "github/codeql-action"; + +/** + * Fraction of the base archive size at which a delta is considered + * significant enough to warrant a new sticky comment. We always update an + * existing comment regardless, so the comment stays in sync as the diff + * evolves. + */ +export const SIGNIFICANT_DELTA_FRACTION = 0.1; + +export type Octokit = ReturnType; + +/** + * Stream `git archive --format=tar.gz ` and count the compressed bytes. + * + * `git archive` only includes tracked files, so we will ignore directories like `node_modules` and + * `build` that aren't downloaded when starting up a CodeQL job. + */ +export async function measureArchiveSize( + ref: string, + cwd: string, +): Promise { + const git = spawn("git", ["archive", "--format=tar.gz", ref], { cwd }); + + let stderr = ""; + git.stderr.on("data", (chunk: Buffer) => { + stderr += chunk.toString(); + }); + + let size = 0; + git.stdout.on("data", (chunk: Buffer) => { + size += chunk.length; + }); + + const exitCode = await new Promise((resolve, reject) => { + git.on("error", reject); + git.on("close", resolve); + }); + + if (exitCode !== 0) { + throw new Error( + `git archive ${ref} exited with code ${exitCode}: ${stderr.trim()}`, + ); + } + return size; +} + +/** + * Format a byte count into a human-readable string with binary units. If + * `signed` is true, a leading `+` is prepended for non-negative values so + * gains and losses are visually distinct. + */ +export function formatBytes(bytes: number, signed = false): string { + const sign = bytes < 0 ? "-" : signed ? "+" : ""; + const abs = Math.abs(bytes); + if (abs < 1024) return `${sign}${abs} B`; + if (abs < 1024 * 1024) return `${sign}${(abs / 1024).toFixed(2)} KiB`; + return `${sign}${(abs / 1024 / 1024).toFixed(2)} MiB`; +} + +/** Format a fraction as a signed percentage with 2 decimal places. */ +export function formatPercent(fraction: number): string { + const pct = fraction * 100; + const sign = pct > 0 ? "+" : ""; + return `${sign}${pct.toFixed(2)}%`; +} + +export interface CommentBodyOptions { + baseRef: string; + baseSize: number; + prSize: number; + /** Optional URL of the workflow run, included in the comment footer. */ + runUrl?: string; +} + +export function buildCommentBody(opts: CommentBodyOptions): string { + const { baseRef, baseSize, prSize, runUrl } = opts; + const delta = prSize - baseSize; + const signedDelta = delta >= 0 ? `+${delta}` : `${delta}`; + const runUrlLine = runUrl + ? ` See the [workflow run](${runUrl}) for details.` + : ""; + + return [ + COMMENT_MARKER, + "### Repository checkout size", + "", + "| | Compressed archive size |", + "|---|---|", + `| Base (\`${baseRef}\`) | ${formatBytes(baseSize)} (${baseSize} bytes) |`, + `| This PR | ${formatBytes(prSize)} (${prSize} bytes) |`, + `| **Delta** | **${formatBytes(delta, true)} (${signedDelta} bytes, ${formatPercent(delta / baseSize)})** |`, + "", + "Sizes are measured by streaming `git archive --format=tar.gz `, " + + "which includes all tracked files and excludes `node_modules` and " + + "other untracked or git-ignored files. The compressed checkout is " + + "downloaded by every consumer of this Action, so changes here directly " + + `affect Action download time.${runUrlLine}`, + ].join("\n"); +} + +/** + * Returns true when the absolute delta is at least `fraction` of the base size. Both increases and + * decreases are considered significant, so we report wins as well as losses. + */ +export function isDeltaSignificant( + delta: number, + baseSize: number, + fraction: number, +): boolean { + return Math.abs(delta) >= baseSize * fraction; +} + +export interface UpsertOptions { + octokit: Octokit; + owner: string; + repo: string; + prNumber: number; + body: string; + delta: number; + baseSize: number; +} + +export type UpsertResult = + | { action: "updated"; commentId: number } + | { action: "created"; commentId: number } + | { action: "skipped"; reason: string }; + +/** + * Find an existing sticky comment on the PR by HTML marker. If one exists, + * always update it (so it stays in sync). Otherwise, only create a new + * comment when the delta is currently significant. + */ +export async function upsertSizeComment( + opts: UpsertOptions, +): Promise { + const { octokit, owner, repo, prNumber, body, delta, baseSize } = opts; + + const comments = await octokit.paginate(octokit.rest.issues.listComments, { + owner, + repo, + issue_number: prNumber, + per_page: 100, + }); + const existing = comments.find((c) => + (c.body ?? "").includes(COMMENT_MARKER), + ); + + if (existing) { + await octokit.rest.issues.updateComment({ + owner, + repo, + comment_id: existing.id, + body, + }); + return { action: "updated", commentId: existing.id }; + } + + if (isDeltaSignificant(delta, baseSize, SIGNIFICANT_DELTA_FRACTION)) { + const { data } = await octokit.rest.issues.createComment({ + owner, + repo, + issue_number: prNumber, + body, + }); + return { action: "created", commentId: data.id }; + } + + return { + action: "skipped", + reason: + `delta ${delta} bytes is below ` + + `${(SIGNIFICANT_DELTA_FRACTION * 100).toFixed(2)}% of base size ` + + `${baseSize} bytes`, + }; +} + +interface MainArgs { + /** Base ref of the PR. Defaults to `main`, and is prefixed with `origin/` when passed to git. */ + baseRef: string; + /** Numeric PR number used to find / create / update the sticky comment. */ + prNumber: number; + /** `owner/repo` slug, defaulting to `github/codeql-action`, split before being passed to Octokit. */ + ownerRepo: string; + /** Optional URL of the workflow run, surfaced in the comment footer. */ + runUrl?: string; + /** When true, log the would-be comment instead of calling GitHub. */ + dryRun: boolean; + /** GitHub token used to authenticate Octokit. Required unless `dryRun` is true. */ + token?: string; +} + +export function readArgs(): MainArgs { + const { values } = parseArgs({ + options: { + "dry-run": { type: "boolean", default: false }, + }, + strict: true, + }); + + const baseRef = process.env.BASE_REF ?? DEFAULT_BASE_REF; + const prNumberStr = process.env.PR_NUMBER; + const repo = process.env.GITHUB_REPOSITORY ?? DEFAULT_REPOSITORY; + + if (!prNumberStr) throw new Error("Missing PR_NUMBER env var"); + + const prNumber = Number.parseInt(prNumberStr, 10); + if (!Number.isFinite(prNumber)) { + throw new Error(`Invalid PR_NUMBER value: ${prNumberStr}`); + } + + return { + baseRef, + prNumber, + ownerRepo: repo, + runUrl: process.env.RUN_URL, + dryRun: values["dry-run"] ?? false, + token: process.env.GITHUB_TOKEN, + }; +} + +async function main(): Promise { + const args = readArgs(); + + // The script lives at `/pr-checks/check-repo-size.ts`, so the repo + // root is always the parent directory. + const repoRoot = path.resolve(__dirname, ".."); + + console.log(`Measuring base archive size for origin/${args.baseRef}...`); + const baseSize = await measureArchiveSize(`origin/${args.baseRef}`, repoRoot); + console.log(` ${baseSize} bytes`); + + console.log("Measuring PR archive size for HEAD..."); + const prSize = await measureArchiveSize("HEAD", repoRoot); + console.log(` ${prSize} bytes`); + + const delta = prSize - baseSize; + console.log(`Delta: ${delta} bytes`); + + const body = buildCommentBody({ + baseRef: args.baseRef, + baseSize, + prSize, + runUrl: args.runUrl, + }); + + if (args.dryRun) { + const significant = isDeltaSignificant( + delta, + baseSize, + SIGNIFICANT_DELTA_FRACTION, + ); + console.log( + `--dry-run: significant=${significant} (threshold ${( + SIGNIFICANT_DELTA_FRACTION * 100 + ).toFixed(2)}%); would post:\n${body}`, + ); + return 0; + } + + if (!args.token) { + throw new Error( + "GITHUB_TOKEN env var is required when not running with --dry-run", + ); + } + + const [owner, repo] = args.ownerRepo.split("/"); + if (!owner || !repo) { + throw new Error(`Invalid GITHUB_REPOSITORY value: ${args.ownerRepo}`); + } + + const result = await upsertSizeComment({ + octokit: getOctokit(args.token), + owner, + repo, + prNumber: args.prNumber, + body, + delta, + baseSize, + }); + + switch (result.action) { + case "updated": + console.log(`Updated existing comment ${result.commentId}.`); + break; + case "created": + console.log(`Created new comment ${result.commentId}.`); + break; + case "skipped": + console.log(`Skipped commenting: ${result.reason}.`); + break; + } + return 0; +} + +if (require.main === module) { + void (async () => { + try { + process.exit(await main()); + } catch (err) { + console.error(err instanceof Error ? err.message : String(err)); + process.exit(1); + } + })(); +} diff --git a/pr-checks/package.json b/pr-checks/package.json index 2741560f6..ff4b45238 100644 --- a/pr-checks/package.json +++ b/pr-checks/package.json @@ -11,6 +11,8 @@ }, "devDependencies": { "@types/node": "^20.19.39", + "@types/sinon": "^21.0.1", + "sinon": "^21.1.2", "tsx": "^4.21.0" } }