feat: always include files from diff in overlay changed files

This commit is contained in:
Sam Robson
2026-03-06 16:03:16 +00:00
parent 3d564d9359
commit 521c3536d3
15 changed files with 1486 additions and 640 deletions
+2 -9
View File
@@ -28,7 +28,6 @@ import {
DependencyCacheUploadStatusReport,
uploadDependencyCaches,
} from "./dependency-caching";
import { getDiffInformedAnalysisBranches } from "./diff-informed-analysis-utils";
import { EnvVar } from "./environment";
import { initFeatures } from "./feature-flags";
import { KnownLanguage } from "./languages";
@@ -305,14 +304,8 @@ async function run(startedAt: Date) {
logger,
);
const branches = await getDiffInformedAnalysisBranches(
codeql,
features,
logger,
);
const diffRangePackDir = branches
? await setupDiffInformedQueryRun(branches, logger)
: undefined;
// Setup diff informed analysis if needed (based on whether init created the file)
const diffRangePackDir = await setupDiffInformedQueryRun(logger);
await warnIfGoInstalledAfterInit(config, logger);
await runAutobuildIfLegacyGoWorkflow(config, logger);
+10 -16
View File
@@ -5,11 +5,7 @@ import { performance } from "perf_hooks";
import * as io from "@actions/io";
import * as yaml from "js-yaml";
import {
getTemporaryDirectory,
getRequiredInput,
PullRequestBranches,
} from "./actions-util";
import { getTemporaryDirectory, getRequiredInput } from "./actions-util";
import * as analyses from "./analyses";
import { setupCppAutobuild } from "./autobuild";
import { type CodeQL } from "./codeql";
@@ -21,8 +17,7 @@ import {
import { addDiagnostic, makeDiagnostic } from "./diagnostics";
import {
DiffThunkRange,
writeDiffRangesJsonFile,
getPullRequestEditedDiffRanges,
readDiffRangesJsonFile,
} from "./diff-informed-analysis-utils";
import { EnvVar } from "./environment";
import { FeatureEnablement, Feature } from "./feature-flags";
@@ -237,16 +232,19 @@ async function finalizeDatabaseCreation(
* the diff range information, or `undefined` if the feature is disabled.
*/
export async function setupDiffInformedQueryRun(
branches: PullRequestBranches,
logger: Logger,
): Promise<string | undefined> {
return await withGroupAsync(
"Generating diff range extension pack",
async () => {
logger.info(
`Calculating diff ranges for ${branches.base}...${branches.head}`,
);
const diffRanges = await getPullRequestEditedDiffRanges(branches, logger);
const diffRanges = readDiffRangesJsonFile(logger);
if (diffRanges === undefined) {
logger.info(
"No precomputed diff ranges found; skipping diff-informed analysis stage.",
);
return undefined;
}
const checkoutPath = getRequiredInput("checkout_path");
const packDir = writeDiffRangeDataExtensionPack(
logger,
@@ -368,10 +366,6 @@ dataExtensions:
`Wrote pr-diff-range extension pack to ${extensionFilePath}:\n${extensionContents}`,
);
// Write the diff ranges to a JSON file, for action-side alert filtering by the
// upload-lib module.
writeDiffRangesJsonFile(logger, ranges);
return diffRangeDir;
}
+8 -1
View File
@@ -105,7 +105,14 @@ export function readDiffRangesJsonFile(
logger.debug(
`Read pr-diff-range JSON file from ${jsonFilePath}:\n${jsonContents}`,
);
return JSON.parse(jsonContents) as DiffThunkRange[];
try {
return JSON.parse(jsonContents) as DiffThunkRange[];
} catch (e) {
logger.warning(
`Failed to parse diff ranges JSON file at ${jsonFilePath}: ${e}`,
);
return undefined;
}
}
/**
+43 -1
View File
@@ -37,6 +37,11 @@ import {
makeDiagnostic,
makeTelemetryDiagnostic,
} from "./diagnostics";
import {
getDiffInformedAnalysisBranches,
getPullRequestEditedDiffRanges,
writeDiffRangesJsonFile,
} from "./diff-informed-analysis-utils";
import { EnvVar } from "./environment";
import { Feature, FeatureEnablement, initFeatures } from "./feature-flags";
import {
@@ -54,7 +59,7 @@ import {
runDatabaseInitCluster,
} from "./init";
import { JavaEnvVars, KnownLanguage } from "./languages";
import { getActionsLogger, Logger } from "./logging";
import { getActionsLogger, Logger, withGroupAsync } from "./logging";
import {
downloadOverlayBaseDatabaseFromCache,
OverlayBaseDatabaseDownloadStats,
@@ -413,6 +418,7 @@ async function run(startedAt: Date) {
}
await checkInstallPython311(config.languages, codeql);
await computeAndPersistDiffRanges(codeql, features, logger);
} catch (unwrappedError) {
const error = wrapError(unwrappedError);
core.setFailed(error.message);
@@ -833,6 +839,42 @@ async function loadRepositoryProperties(
}
}
/**
* Compute and persist diff ranges when diff-informed analysis is enabled
* (feature flag + PR context). This writes the standard pr-diff-range.json
* file for later reuse in the analyze step. Failures are logged but non-fatal.
*/
async function computeAndPersistDiffRanges(
codeql: CodeQL,
features: FeatureEnablement,
logger: Logger,
): Promise<void> {
try {
await withGroupAsync("Compute PR diff ranges", async () => {
const branches = await getDiffInformedAnalysisBranches(
codeql,
features,
logger,
);
if (!branches) {
return;
}
const ranges = await getPullRequestEditedDiffRanges(branches, logger);
if (ranges === undefined) {
return;
}
writeDiffRangesJsonFile(logger, ranges);
const distinctFiles = new Set(ranges.map((r) => r.path)).size;
logger.info(
`Persisted ${ranges.length} diff range(s) across ${distinctFiles} file(s).`,
);
});
} catch (e) {
logger.warning(
`Failed to compute and persist PR diff ranges: ${getErrorMessage(e)}`,
);
}
}
async function recordZstdAvailability(
config: configUtils.Config,
zstdAvailability: ZstdAvailability,
+238 -6
View File
@@ -34,12 +34,14 @@ test.serial(
"writeOverlayChangesFile generates correct changes file",
async (t) => {
await withTmpDir(async (tmpDir) => {
const dbLocation = path.join(tmpDir, "db");
await fs.promises.mkdir(dbLocation, { recursive: true });
const sourceRoot = path.join(tmpDir, "src");
await fs.promises.mkdir(sourceRoot, { recursive: true });
const tempDir = path.join(tmpDir, "temp");
await fs.promises.mkdir(tempDir, { recursive: true });
const [dbLocation, sourceRoot, tempDir] = ["db", "src", "temp"].map((d) =>
path.join(tmpDir, d),
);
await Promise.all(
[dbLocation, sourceRoot, tempDir].map((d) =>
fs.promises.mkdir(d, { recursive: true }),
),
);
const logger = getRunnerLogger(true);
const config = createTestConfig({ dbLocation });
@@ -73,6 +75,9 @@ test.serial(
const getTempDirStub = sinon
.stub(actionsUtil, "getTemporaryDirectory")
.returns(tempDir);
const getGitRootStub = sinon
.stub(gitUtils, "getGitRoot")
.resolves(sourceRoot);
const changesFilePath = await writeOverlayChangesFile(
config,
sourceRoot,
@@ -80,6 +85,7 @@ test.serial(
);
getFileOidsStubForOverlay.restore();
getTempDirStub.restore();
getGitRootStub.restore();
const fileContent = await fs.promises.readFile(changesFilePath, "utf-8");
const parsedContent = JSON.parse(fileContent) as { changes: string[] };
@@ -93,6 +99,232 @@ test.serial(
},
);
test.serial(
"writeOverlayChangesFile merges additional diff files into overlay changes",
async (t) => {
await withTmpDir(async (tmpDir) => {
const [dbLocation, sourceRoot, tempDir] = ["db", "src", "temp"].map((d) =>
path.join(tmpDir, d),
);
await Promise.all(
[dbLocation, sourceRoot, tempDir].map((d) =>
fs.promises.mkdir(d, { recursive: true }),
),
);
const logger = getRunnerLogger(true);
const config = createTestConfig({ dbLocation });
// Mock the getFileOidsUnderPath function to return base OIDs
// "reverted.js" has the same OID in both base and current, simulating
// a revert PR where the file content matches the overlay-base
const baseOids = {
"unchanged.js": "aaa111",
"modified.js": "bbb222",
"reverted.js": "eee555",
};
const getFileOidsStubForBase = sinon
.stub(gitUtils, "getFileOidsUnderPath")
.resolves(baseOids);
// Write the base database OIDs file
await writeBaseDatabaseOidsFile(config, sourceRoot);
getFileOidsStubForBase.restore();
// Mock the getFileOidsUnderPath function to return overlay OIDs
// "reverted.js" has the same OID as the base -- OID comparison alone
// would NOT include it, only additionalChangedFiles causes it to appear
const currentOids = {
"unchanged.js": "aaa111",
"modified.js": "ddd444", // Changed OID
"reverted.js": "eee555", // Same OID as base -- not detected by OID comparison
};
const getFileOidsStubForOverlay = sinon
.stub(gitUtils, "getFileOidsUnderPath")
.resolves(currentOids);
const getTempDirStub = sinon
.stub(actionsUtil, "getTemporaryDirectory")
.returns(tempDir);
const getGitRootStub = sinon
.stub(gitUtils, "getGitRoot")
.resolves(sourceRoot);
// Write a pr-diff-range.json file with diff ranges including
// "reverted.js" (unchanged OIDs) and "modified.js" (already in OID changes)
await fs.promises.writeFile(
path.join(tempDir, "pr-diff-range.json"),
JSON.stringify([
{ path: "reverted.js", startLine: 1, endLine: 10 },
{ path: "modified.js", startLine: 1, endLine: 5 },
{ path: "diff-only.js", startLine: 1, endLine: 3 },
]),
);
const changesFilePath = await writeOverlayChangesFile(
config,
sourceRoot,
logger,
);
getFileOidsStubForOverlay.restore();
getTempDirStub.restore();
getGitRootStub.restore();
const fileContent = await fs.promises.readFile(changesFilePath, "utf-8");
const parsedContent = JSON.parse(fileContent) as { changes: string[] };
t.deepEqual(
parsedContent.changes.sort(),
["diff-only.js", "modified.js", "reverted.js"],
"Should include OID-changed files, diff-only files, and deduplicate overlapping files",
);
});
},
);
test.serial(
"writeOverlayChangesFile works without additional diff files",
async (t) => {
await withTmpDir(async (tmpDir) => {
const [dbLocation, sourceRoot, tempDir] = ["db", "src", "temp"].map((d) =>
path.join(tmpDir, d),
);
await Promise.all(
[dbLocation, sourceRoot, tempDir].map((d) =>
fs.promises.mkdir(d, { recursive: true }),
),
);
const logger = getRunnerLogger(true);
const config = createTestConfig({ dbLocation });
// Mock the getFileOidsUnderPath function to return base OIDs
const baseOids = {
"unchanged.js": "aaa111",
"modified.js": "bbb222",
};
const getFileOidsStubForBase = sinon
.stub(gitUtils, "getFileOidsUnderPath")
.resolves(baseOids);
await writeBaseDatabaseOidsFile(config, sourceRoot);
getFileOidsStubForBase.restore();
const currentOids = {
"unchanged.js": "aaa111",
"modified.js": "ddd444",
};
const getFileOidsStubForOverlay = sinon
.stub(gitUtils, "getFileOidsUnderPath")
.resolves(currentOids);
const getTempDirStub = sinon
.stub(actionsUtil, "getTemporaryDirectory")
.returns(tempDir);
const getGitRootStub = sinon
.stub(gitUtils, "getGitRoot")
.resolves(sourceRoot);
// No pr-diff-range.json file exists - should work the same as before
const changesFilePath = await writeOverlayChangesFile(
config,
sourceRoot,
logger,
);
getFileOidsStubForOverlay.restore();
getTempDirStub.restore();
getGitRootStub.restore();
const fileContent = await fs.promises.readFile(changesFilePath, "utf-8");
const parsedContent = JSON.parse(fileContent) as { changes: string[] };
t.deepEqual(
parsedContent.changes.sort(),
["modified.js"],
"Should only include OID-changed files when no additional files provided",
);
});
},
);
test.serial(
"writeOverlayChangesFile converts diff range paths to sourceRoot-relative when sourceRoot is a subdirectory",
async (t) => {
await withTmpDir(async (tmpDir) => {
// Simulate: repo root = tmpDir, sourceRoot = tmpDir/src
const repoRoot = tmpDir;
const sourceRoot = path.join(tmpDir, "src");
const [dbLocation, tempDir] = ["db", "temp"].map((d) =>
path.join(tmpDir, d),
);
await Promise.all(
[dbLocation, sourceRoot, tempDir].map((d) =>
fs.promises.mkdir(d, { recursive: true }),
),
);
const logger = getRunnerLogger(true);
const config = createTestConfig({ dbLocation });
// Base OIDs (sourceRoot-relative paths)
const baseOids = {
"app.js": "aaa111",
"lib/util.js": "bbb222",
};
const getFileOidsStubForBase = sinon
.stub(gitUtils, "getFileOidsUnderPath")
.resolves(baseOids);
await writeBaseDatabaseOidsFile(config, sourceRoot);
getFileOidsStubForBase.restore();
// Current OIDs — same as base (no OID changes)
const currentOids = {
"app.js": "aaa111",
"lib/util.js": "bbb222",
};
const getFileOidsStubForOverlay = sinon
.stub(gitUtils, "getFileOidsUnderPath")
.resolves(currentOids);
const getTempDirStub = sinon
.stub(actionsUtil, "getTemporaryDirectory")
.returns(tempDir);
// getGitRoot returns the repo root (parent of sourceRoot)
const getGitRootStub = sinon
.stub(gitUtils, "getGitRoot")
.resolves(repoRoot);
// Diff ranges use repo-root-relative paths (as returned by the GitHub compare API)
await fs.promises.writeFile(
path.join(tempDir, "pr-diff-range.json"),
JSON.stringify([
{ path: "src/app.js", startLine: 1, endLine: 10 },
{ path: "src/lib/util.js", startLine: 5, endLine: 8 },
{ path: "other/outside.js", startLine: 1, endLine: 3 }, // not under sourceRoot
]),
);
const changesFilePath = await writeOverlayChangesFile(
config,
sourceRoot,
logger,
);
getFileOidsStubForOverlay.restore();
getTempDirStub.restore();
getGitRootStub.restore();
const fileContent = await fs.promises.readFile(changesFilePath, "utf-8");
const parsedContent = JSON.parse(fileContent) as { changes: string[] };
t.deepEqual(
parsedContent.changes.sort(),
["app.js", "lib/util.js"],
"Should convert repo-root-relative paths to sourceRoot-relative and filter out files outside sourceRoot",
);
});
},
);
interface DownloadOverlayBaseDatabaseTestCase {
overlayDatabaseMode: OverlayDatabaseMode;
useOverlayDatabaseCaching: boolean;
+68 -3
View File
@@ -13,7 +13,7 @@ import { getAutomationID } from "../api-client";
import { createCacheKeyHash } from "../caching-utils";
import { type CodeQL } from "../codeql";
import { type Config } from "../config-utils";
import { getCommitOid, getFileOidsUnderPath } from "../git-utils";
import { getCommitOid, getFileOidsUnderPath, getGitRoot } from "../git-utils";
import { Logger, withGroupAsync } from "../logging";
import {
CleanupLevel,
@@ -130,11 +130,17 @@ export async function writeOverlayChangesFile(
): Promise<string> {
const baseFileOids = await readBaseDatabaseOidsFile(config, logger);
const overlayFileOids = await getFileOidsUnderPath(sourceRoot);
const changedFiles = computeChangedFiles(baseFileOids, overlayFileOids);
const oidChangedFiles = computeChangedFiles(baseFileOids, overlayFileOids);
logger.info(
`Found ${changedFiles.length} changed file(s) under ${sourceRoot}.`,
`Found ${oidChangedFiles.length} changed file(s) under ${sourceRoot} from OID comparison.`,
);
// Merge in any file paths from precomputed PR diff ranges to ensure the
// overlay always includes all files from the PR diff, even in edge cases
// like revert PRs where OID comparison shows no change.
const diffRangeFiles = await getDiffRangeFilePaths(sourceRoot, logger);
const changedFiles = [...new Set([...oidChangedFiles, ...diffRangeFiles])];
const changedFilesJson = JSON.stringify({ changes: changedFiles });
const overlayChangesFile = path.join(
getTemporaryDirectory(),
@@ -165,6 +171,65 @@ function computeChangedFiles(
return changes;
}
async function getDiffRangeFilePaths(
sourceRoot: string,
logger: Logger,
): Promise<string[]> {
const jsonFilePath = path.join(getTemporaryDirectory(), "pr-diff-range.json");
if (!fs.existsSync(jsonFilePath)) {
return [];
}
let diffRanges: Array<{ path: string }>;
try {
diffRanges = JSON.parse(fs.readFileSync(jsonFilePath, "utf8")) as Array<{
path: string;
}>;
} catch (e) {
logger.warning(
`Failed to parse diff ranges JSON file at ${jsonFilePath}: ${e}`,
);
return [];
}
logger.debug(
`Read ${diffRanges.length} diff range(s) from ${jsonFilePath} for overlay changes.`,
);
const repoRelativePaths = [...new Set(diffRanges.map((r) => r.path))];
// Diff-range paths are relative to the repo root (from the GitHub compare
// API), but overlay changed files must be relative to sourceRoot (to match
// getFileOidsUnderPath output). Convert and filter accordingly.
const repoRoot = await getGitRoot(sourceRoot);
if (repoRoot === undefined) {
logger.warning(
"Cannot determine git root; returning diff range paths as-is.",
);
return repoRelativePaths;
}
// e.g. if repoRoot=/workspace and sourceRoot=/workspace/src, prefix="src"
const sourceRootRelPrefix = path
.relative(repoRoot, sourceRoot)
.replaceAll(path.sep, "/");
// If sourceRoot IS the repo root, prefix is "" and all paths pass through.
if (sourceRootRelPrefix === "") {
return repoRelativePaths;
}
const prefixWithSlash = `${sourceRootRelPrefix}/`;
const result: string[] = [];
for (const p of repoRelativePaths) {
if (p.startsWith(prefixWithSlash)) {
result.push(p.slice(prefixWithSlash.length));
} else {
logger.debug(
`Skipping diff range path "${p}" (not under source root "${sourceRootRelPrefix}").`,
);
}
}
return result;
}
// Constants for database caching
const CACHE_VERSION = 1;
const CACHE_PREFIX = "codeql-overlay-base-database";