Files
codeql-action/src/analyze.ts
T
2026-04-10 14:55:12 +01:00

720 lines
24 KiB
TypeScript

import * as fs from "fs";
import * as path from "path";
import { performance } from "perf_hooks";
import * as io from "@actions/io";
import * as yaml from "js-yaml";
import { getTemporaryDirectory, getRequiredInput } from "./actions-util";
import * as analyses from "./analyses";
import { setupCppAutobuild } from "./autobuild";
import { type CodeQL } from "./codeql";
import * as configUtils from "./config-utils";
import {
getCsharpTempDependencyDir,
getJavaTempDependencyDir,
} from "./dependency-caching";
import { addDiagnostic, makeDiagnostic } from "./diagnostics";
import {
DiffThunkRange,
readDiffRangesJsonFile,
} from "./diff-informed-analysis-utils";
import { EnvVar } from "./environment";
import { FeatureEnablement, Feature } from "./feature-flags";
import { KnownLanguage, Language } from "./languages";
import { Logger, withGroupAsync } from "./logging";
import { OverlayDatabaseMode } from "./overlay/overlay-database-mode";
import type * as sarif from "./sarif";
import { DatabaseCreationTimings, EventReport } from "./status-report";
import { endTracingForCluster } from "./tracer-config";
import * as util from "./util";
import { BuildMode } from "./util";
export class CodeQLAnalysisError extends Error {
constructor(
public queriesStatusReport: QueriesStatusReport,
public message: string,
public error: Error,
) {
super(message);
this.name = "CodeQLAnalysisError";
}
}
type KnownLanguageKey = keyof typeof KnownLanguage;
type RunQueriesDurationStatusReport = {
/**
* Time taken in ms to run queries for the language (or undefined if this language was not analyzed).
*
* The "builtin" designation is now outdated with the move to CLI config parsing: this is the time
* taken to run _all_ the queries.
*/
[L in KnownLanguageKey as `analyze_builtin_queries_${L}_duration_ms`]?: number;
};
type InterpretResultsDurationStatusReport = {
/** Time taken in ms to interpret results for the language (or undefined if this language was not analyzed). */
[L in KnownLanguageKey as `interpret_results_${L}_duration_ms`]?: number;
};
export interface QueriesStatusReport
extends RunQueriesDurationStatusReport,
InterpretResultsDurationStatusReport {
/**
* Whether the analysis is diff-informed (in the sense that the action generates a diff-range data
* extension for the analysis, regardless of whether the data extension is actually used by queries).
*/
analysis_is_diff_informed?: boolean;
/**
* Whether the analysis runs in overlay mode (i.e., uses an overlay-base database).
* This is true if the AugmentationProperties.overlayDatabaseMode === Overlay.
*/
analysis_is_overlay?: boolean;
/**
* Whether the analysis builds an overlay-base database.
* This is true if the AugmentationProperties.overlayDatabaseMode === OverlayBase.
*/
analysis_builds_overlay_base_database?: boolean;
/** Name of language that errored during analysis (or undefined if no language failed). */
analyze_failure_language?: string;
/** Reports on discrete events associated with this status report. */
event_reports?: EventReport[];
}
async function setupPythonExtractor(logger: Logger) {
const codeqlPython = process.env["CODEQL_PYTHON"];
if (codeqlPython === undefined || codeqlPython.length === 0) {
// If CODEQL_PYTHON is not set, no dependencies were installed, so we don't need to do anything
return;
}
logger.warning(
"The CODEQL_PYTHON environment variable is no longer supported. Please remove it from your workflow. This environment variable was originally used to specify a Python executable that included the dependencies of your Python code, however Python analysis no longer uses these dependencies." +
"\nIf you used CODEQL_PYTHON to force the version of Python to analyze as, please use CODEQL_EXTRACTOR_PYTHON_ANALYSIS_VERSION instead, such as 'CODEQL_EXTRACTOR_PYTHON_ANALYSIS_VERSION=2.7' or 'CODEQL_EXTRACTOR_PYTHON_ANALYSIS_VERSION=3.11'.",
);
return;
}
export async function runExtraction(
codeql: CodeQL,
features: FeatureEnablement,
config: configUtils.Config,
logger: Logger,
) {
for (const language of config.languages) {
if (dbIsFinalized(config, language, logger)) {
logger.debug(
`Database for ${language} has already been finalized, skipping extraction.`,
);
continue;
}
if (await shouldExtractLanguage(codeql, config, language)) {
logger.startGroup(`Extracting ${language}`);
if (language === KnownLanguage.python) {
await setupPythonExtractor(logger);
}
if (config.buildMode) {
if (
language === KnownLanguage.cpp &&
config.buildMode === BuildMode.Autobuild
) {
await setupCppAutobuild(codeql, logger);
}
// The Java and C# `build-mode: none` extractors place dependencies in the
// database scratch directory by default. For dependency caching purposes, we want
// a stable path that caches can be restored into and that we can cache at the
// end of the workflow (i.e. that does not get removed when the scratch directory is).
if (
language === KnownLanguage.java &&
config.buildMode === BuildMode.None
) {
process.env["CODEQL_EXTRACTOR_JAVA_OPTION_BUILDLESS_DEPENDENCY_DIR"] =
getJavaTempDependencyDir();
}
if (
language === KnownLanguage.csharp &&
config.buildMode === BuildMode.None &&
(await features.getValue(Feature.CsharpCacheBuildModeNone))
) {
process.env[
"CODEQL_EXTRACTOR_CSHARP_OPTION_BUILDLESS_DEPENDENCY_DIR"
] = getCsharpTempDependencyDir();
}
await codeql.extractUsingBuildMode(config, language);
} else {
await codeql.extractScannedLanguage(config, language);
}
logger.endGroup();
}
}
}
async function shouldExtractLanguage(
codeql: CodeQL,
config: configUtils.Config,
language: Language,
): Promise<boolean> {
return (
config.buildMode === BuildMode.None ||
(config.buildMode === BuildMode.Autobuild &&
process.env[EnvVar.AUTOBUILD_DID_COMPLETE_SUCCESSFULLY] !== "true") ||
(!config.buildMode && (await codeql.isScannedLanguage(language)))
);
}
export function dbIsFinalized(
config: configUtils.Config,
language: Language,
logger: Logger,
) {
const dbPath = util.getCodeQLDatabasePath(config, language);
try {
const dbInfo = yaml.load(
fs.readFileSync(path.resolve(dbPath, "codeql-database.yml"), "utf8"),
) as { inProgress?: boolean };
return !("inProgress" in dbInfo);
} catch {
logger.warning(
`Could not check whether database for ${language} was finalized. Assuming it is not.`,
);
return false;
}
}
async function finalizeDatabaseCreation(
codeql: CodeQL,
features: FeatureEnablement,
config: configUtils.Config,
threadsFlag: string,
memoryFlag: string,
logger: Logger,
): Promise<DatabaseCreationTimings> {
const extractionStart = performance.now();
await runExtraction(codeql, features, config, logger);
const extractionTime = performance.now() - extractionStart;
const trapImportStart = performance.now();
for (const language of config.languages) {
if (dbIsFinalized(config, language, logger)) {
logger.info(
`There is already a finalized database for ${language} at the location where the CodeQL Action places databases, so we did not create one.`,
);
} else {
logger.startGroup(`Finalizing ${language}`);
await codeql.finalizeDatabase(
util.getCodeQLDatabasePath(config, language),
threadsFlag,
memoryFlag,
config.debugMode,
);
logger.endGroup();
}
}
const trapImportTime = performance.now() - trapImportStart;
return {
scanned_language_extraction_duration_ms: Math.round(extractionTime),
trap_import_duration_ms: Math.round(trapImportTime),
};
}
/**
* Set up the diff-informed analysis feature.
*
* @returns Absolute path to the directory containing the extension pack for
* the diff range information, or `undefined` if the feature is disabled.
*/
export async function setupDiffInformedQueryRun(
logger: Logger,
): Promise<string | undefined> {
return await withGroupAsync(
"Generating diff range extension pack",
async () => {
const diffRanges = readDiffRangesJsonFile(logger);
if (diffRanges === undefined) {
logger.info(
"No precomputed diff ranges found; skipping diff-informed analysis stage.",
);
return undefined;
}
const checkoutPath = getRequiredInput("checkout_path");
const packDir = writeDiffRangeDataExtensionPack(
logger,
diffRanges,
checkoutPath,
);
if (packDir === undefined) {
logger.warning(
"Cannot create diff range extension pack for diff-informed queries; " +
"reverting to performing full analysis.",
);
} else {
logger.info(
`Successfully created diff range extension pack at ${packDir}.`,
);
}
return packDir;
},
);
}
export function diffRangeExtensionPackContents(
ranges: DiffThunkRange[],
checkoutPath: string,
): string {
const header = `
extensions:
- addsTo:
pack: codeql/util
extensible: restrictAlertsTo
checkPresence: false
data:
`;
let data = ranges
.map((range) => {
// Diff-informed queries expect the file path to be absolute. CodeQL always
// uses forward slashes as the path separator, so on Windows we need to
// replace any backslashes with forward slashes.
const filename = path
.join(checkoutPath, range.path)
.replaceAll(path.sep, "/");
// Using yaml.dump() with `forceQuotes: true` ensures that all special
// characters are escaped, and that the path is always rendered as a
// quoted string on a single line.
return (
` - [${yaml.dump(filename, { forceQuotes: true }).trim()}, ` +
`${range.startLine}, ${range.endLine}]\n`
);
})
.join("");
if (!data) {
// Ensure that the data extension is not empty, so that a pull request with
// no edited lines would exclude (instead of accepting) all alerts.
data = ' - ["", 0, 0]\n';
}
return header + data;
}
/**
* Create an extension pack in the temporary directory that contains the file
* line ranges that were added or modified in the pull request.
*
* @param logger
* @param ranges The file line ranges, as returned by
* `getPullRequestEditedDiffRanges`.
* @param checkoutPath The path at which the repository was checked out.
* @returns The absolute path of the directory containing the extension pack, or
* `undefined` if no extension pack was created.
*/
function writeDiffRangeDataExtensionPack(
logger: Logger,
ranges: DiffThunkRange[] | undefined,
checkoutPath: string,
): string | undefined {
if (ranges === undefined) {
return undefined;
}
if (ranges.length === 0) {
// An empty diff range means that there are no added or modified lines in
// the pull request. But the `restrictAlertsTo` extensible predicate
// interprets an empty data extension differently, as an indication that
// all alerts should be included. So we need to specifically set the diff
// range to a non-empty list that cannot match any alert location.
ranges = [{ path: "", startLine: 0, endLine: 0 }];
}
const diffRangeDir = path.join(getTemporaryDirectory(), "pr-diff-range");
// We expect the Actions temporary directory to already exist, so are mainly
// using `recursive: true` to avoid errors if the directory already exists,
// for example if the analyze Action is run multiple times in the same job.
// This is not really something that is supported, but we make use of it in
// tests.
fs.mkdirSync(diffRangeDir, { recursive: true });
fs.writeFileSync(
path.join(diffRangeDir, "qlpack.yml"),
`
name: codeql-action/pr-diff-range
version: 0.0.0
library: true
extensionTargets:
codeql/util: '*'
dataExtensions:
- pr-diff-range.yml
`,
);
const extensionContents = diffRangeExtensionPackContents(
ranges,
checkoutPath,
);
const extensionFilePath = path.join(diffRangeDir, "pr-diff-range.yml");
fs.writeFileSync(extensionFilePath, extensionContents);
logger.debug(
`Wrote pr-diff-range extension pack to ${extensionFilePath}:\n${extensionContents}`,
);
return diffRangeDir;
}
// A set of default query suite names that are understood by the CLI.
export const defaultSuites: Set<string> = new Set([
"security-experimental",
"security-extended",
"security-and-quality",
"code-quality",
"code-scanning",
]);
/**
* If `maybeSuite` is the name of a default query suite, it is resolved into the corresponding
* query suite name for the given `language`. Otherwise, `maybeSuite` is returned as is.
*
* @param language The language for which to resolve the default query suite name.
* @param maybeSuite The string that potentially contains the name of a default query suite.
* @returns Returns the resolved query suite name, or the unmodified input.
*/
export function resolveQuerySuiteAlias(
language: Language,
maybeSuite: string,
): string {
if (defaultSuites.has(maybeSuite)) {
return `${language}-${maybeSuite}.qls`;
}
return maybeSuite;
}
/**
* Adds the appropriate file extension for the given analysis configuration to the given base filename.
*/
export function addSarifExtension(
analysis: analyses.AnalysisConfig,
base: string,
): string {
return `${base}${analysis.sarifExtension}`;
}
// Runs queries and creates sarif files in the given folder
export async function runQueries(
sarifFolder: string,
memoryFlag: string,
threadsFlag: string,
diffRangePackDir: string | undefined,
automationDetailsId: string | undefined,
codeql: CodeQL,
config: configUtils.Config,
logger: Logger,
features: FeatureEnablement,
): Promise<QueriesStatusReport> {
const statusReport: QueriesStatusReport = {};
const queryFlags = [memoryFlag, threadsFlag];
const incrementalMode: string[] = [];
// Preserve cached intermediate results for overlay-base databases.
if (config.overlayDatabaseMode !== OverlayDatabaseMode.OverlayBase) {
queryFlags.push("--expect-discarded-cache");
}
statusReport.analysis_is_diff_informed = diffRangePackDir !== undefined;
if (diffRangePackDir) {
queryFlags.push(`--additional-packs=${diffRangePackDir}`);
queryFlags.push("--extension-packs=codeql-action/pr-diff-range");
incrementalMode.push("diff-informed");
}
statusReport.analysis_is_overlay =
config.overlayDatabaseMode === OverlayDatabaseMode.Overlay;
statusReport.analysis_builds_overlay_base_database =
config.overlayDatabaseMode === OverlayDatabaseMode.OverlayBase;
if (config.overlayDatabaseMode === OverlayDatabaseMode.Overlay) {
incrementalMode.push("overlay");
}
const sarifRunPropertyFlag =
incrementalMode.length > 0
? `--sarif-run-property=incrementalMode=${incrementalMode.join(",")}`
: undefined;
const dbAnalysisConfig = configUtils.getPrimaryAnalysisConfig(config);
for (const language of config.languages) {
try {
// This should be empty to run only the query suite that was generated when
// the database was initialised.
const queries: string[] = [];
// If multiple analysis kinds are enabled, the database is initialised for Code Scanning.
// To avoid duplicate work, we want to run queries for all analyses at the same time.
// To do this, we invoke `run-queries` once with the generated query suite that was created
// when the database was initialised + the queries for other analysis kinds.
if (config.analysisKinds.length > 1) {
queries.push(util.getGeneratedSuitePath(config, language));
if (configUtils.isCodeQualityEnabled(config)) {
for (const qualityQuery of analyses.codeQualityQueries) {
queries.push(resolveQuerySuiteAlias(language, qualityQuery));
}
}
}
// The work needed to generate the query suites
// is done in the CLI. We just need to make a single
// call to run all the queries for each language and
// another to interpret the results.
logger.startGroup(`Running queries for ${language}`);
const startTimeRunQueries = new Date().getTime();
const databasePath = util.getCodeQLDatabasePath(config, language);
await codeql.databaseRunQueries(databasePath, queryFlags, queries);
logger.debug(`Finished running queries for ${language}.`);
// TODO should not be using `builtin` here. We should be using `all` instead.
// The status report does not support `all` yet.
statusReport[`analyze_builtin_queries_${language}_duration_ms`] =
new Date().getTime() - startTimeRunQueries;
// There is always at least one analysis kind enabled. Running `interpret-results`
// produces the SARIF file for the analysis kind that the database was initialised with.
const startTimeInterpretResults = new Date();
const { summary: analysisSummary, sarifFile } =
await runInterpretResultsFor(
dbAnalysisConfig,
language,
undefined,
config.debugMode,
);
// This case is only needed if Code Quality is not the sole analysis kind.
// In this case, we will have run queries for all analysis kinds. The previous call to
// `interpret-results` will have produced a SARIF file for Code Scanning and we now
// need to produce an additional SARIF file for Code Quality.
let qualityAnalysisSummary: string | undefined;
if (
config.analysisKinds.length > 1 &&
configUtils.isCodeQualityEnabled(config)
) {
const qualityResult = await runInterpretResultsFor(
analyses.CodeQuality,
language,
analyses.codeQualityQueries.map((i) =>
resolveQuerySuiteAlias(language, i),
),
config.debugMode,
);
qualityAnalysisSummary = qualityResult.summary;
}
const endTimeInterpretResults = new Date();
statusReport[`interpret_results_${language}_duration_ms`] =
endTimeInterpretResults.getTime() - startTimeInterpretResults.getTime();
logger.endGroup();
if (analysisSummary.trim()) {
logger.info(analysisSummary);
}
if (qualityAnalysisSummary?.trim()) {
logger.info(qualityAnalysisSummary);
}
if (!config.enableFileCoverageInformation) {
logger.info(
"To speed up pull request analysis, file coverage information is only enabled when analyzing " +
"the default branch and protected branches.",
);
}
if (await features.getValue(Feature.QaTelemetryEnabled)) {
// Note: QA adds the `code-quality` query suite to the `queries` input,
// so this is fine since there is no `.quality.sarif`.
const perQueryAlertCounts = getPerQueryAlertCounts(sarifFile);
const perQueryAlertCountEventReport: EventReport = {
event: "codeql database interpret-results",
started_at: startTimeInterpretResults.toISOString(),
completed_at: endTimeInterpretResults.toISOString(),
exit_status: "success",
language,
properties: {
alertCounts: perQueryAlertCounts,
},
};
if (statusReport["event_reports"] === undefined) {
statusReport["event_reports"] = [];
}
statusReport["event_reports"].push(perQueryAlertCountEventReport);
}
} catch (e) {
statusReport.analyze_failure_language = language;
throw new CodeQLAnalysisError(
statusReport,
`Error running analysis for ${language}: ${util.getErrorMessage(e)}`,
util.wrapError(e),
);
}
}
return statusReport;
async function runInterpretResultsFor(
analysis: analyses.AnalysisConfig,
language: Language,
queries: string[] | undefined,
enableDebugLogging: boolean,
): Promise<{ summary: string; sarifFile: string }> {
logger.info(`Interpreting ${analysis.name} results for ${language}`);
// Apply the analysis configuration's `fixCategory` function to adjust the category if needed.
// This is a no-op for Code Scanning.
const category = analysis.fixCategory(logger, automationDetailsId);
const sarifFile = path.join(
sarifFolder,
addSarifExtension(analysis, language),
);
const summary = await runInterpretResults(
language,
queries,
sarifFile,
enableDebugLogging,
category,
);
return { summary, sarifFile };
}
async function runInterpretResults(
language: Language,
queries: string[] | undefined,
sarifFile: string,
enableDebugLogging: boolean,
category: string | undefined,
): Promise<string> {
const databasePath = util.getCodeQLDatabasePath(config, language);
return await codeql.databaseInterpretResults(
databasePath,
queries,
sarifFile,
threadsFlag,
enableDebugLogging ? "-vv" : "-v",
sarifRunPropertyFlag,
category,
config,
features,
);
}
/** Get an object with all queries and their counts parsed from a SARIF file path. */
function getPerQueryAlertCounts(sarifPath: string): Record<string, number> {
const sarifObject = JSON.parse(
fs.readFileSync(sarifPath, "utf8"),
) as sarif.Log;
// We do not need to compute fingerprints because we are not sending data based off of locations.
// Generate the query: alert count object
const perQueryAlertCounts: Record<string, number> = {};
// All rules (queries), from all results, from all runs
for (const sarifRun of sarifObject.runs) {
if (sarifRun.results) {
for (const result of sarifRun.results) {
const query = result.rule?.id || result.ruleId;
if (query) {
perQueryAlertCounts[query] = (perQueryAlertCounts[query] || 0) + 1;
}
}
}
}
return perQueryAlertCounts;
}
}
export async function runFinalize(
features: FeatureEnablement,
outputDir: string,
threadsFlag: string,
memoryFlag: string,
codeql: CodeQL,
config: configUtils.Config,
logger: Logger,
): Promise<DatabaseCreationTimings> {
try {
await fs.promises.rm(outputDir, { force: true, recursive: true });
} catch (error: any) {
if (error?.code !== "ENOENT") {
throw error;
}
}
await fs.promises.mkdir(outputDir, { recursive: true });
const timings = await finalizeDatabaseCreation(
codeql,
features,
config,
threadsFlag,
memoryFlag,
logger,
);
// If we didn't already end tracing in the autobuild Action, end it now.
if (process.env[EnvVar.AUTOBUILD_DID_COMPLETE_SUCCESSFULLY] !== "true") {
await endTracingForCluster(codeql, config, logger);
}
return timings;
}
export async function warnIfGoInstalledAfterInit(
config: configUtils.Config,
logger: Logger,
) {
// Check that `which go` still points at the same path it did when the `init` Action ran to ensure that no steps
// in-between performed any setup. We encourage users to perform all setup tasks before initializing CodeQL so that
// the setup tasks do not interfere with our analysis.
// Furthermore, if we installed a wrapper script in the `init` Action, we need to ensure that there isn't a step
// in the workflow after the `init` step which installs a different version of Go and takes precedence in the PATH,
// thus potentially circumventing our workaround that allows tracing to work.
const goInitPath = process.env[EnvVar.GO_BINARY_LOCATION];
if (
process.env[EnvVar.DID_AUTOBUILD_GOLANG] !== "true" &&
goInitPath !== undefined
) {
const goBinaryPath = await io.which("go", true);
if (goInitPath !== goBinaryPath) {
logger.warning(
`Expected \`which go\` to return ${goInitPath}, but got ${goBinaryPath}: please ensure that the correct version of Go is installed before the \`codeql-action/init\` Action is used.`,
);
addDiagnostic(
config,
KnownLanguage.go,
makeDiagnostic(
"go/workflow/go-installed-after-codeql-init",
"Go was installed after the `codeql-action/init` Action was run",
{
markdownMessage:
"To avoid interfering with the CodeQL analysis, perform all installation steps before calling the `github/codeql-action/init` Action.",
visibility: {
statusPage: true,
telemetry: true,
cliSummaryTable: true,
},
severity: "warning",
},
),
);
}
}
}