import * as fs from "fs"; import * as path from "path"; import { performance } from "perf_hooks"; import * as io from "@actions/io"; import * as yaml from "js-yaml"; import { getTemporaryDirectory, getRequiredInput } from "./actions-util"; import * as analyses from "./analyses"; import { setupCppAutobuild } from "./autobuild"; import { type CodeQL } from "./codeql"; import * as configUtils from "./config-utils"; import { getCsharpTempDependencyDir, getJavaTempDependencyDir, } from "./dependency-caching"; import { addDiagnostic, makeDiagnostic } from "./diagnostics"; import { DiffThunkRange, readDiffRangesJsonFile, } from "./diff-informed-analysis-utils"; import { EnvVar } from "./environment"; import { FeatureEnablement, Feature } from "./feature-flags"; import { KnownLanguage, Language } from "./languages"; import { Logger, withGroupAsync } from "./logging"; import { OverlayDatabaseMode } from "./overlay/overlay-database-mode"; import type * as sarif from "./sarif"; import { DatabaseCreationTimings, EventReport } from "./status-report"; import { endTracingForCluster } from "./tracer-config"; import * as util from "./util"; import { BuildMode } from "./util"; export class CodeQLAnalysisError extends Error { constructor( public queriesStatusReport: QueriesStatusReport, public message: string, public error: Error, ) { super(message); this.name = "CodeQLAnalysisError"; } } type KnownLanguageKey = keyof typeof KnownLanguage; type RunQueriesDurationStatusReport = { /** * Time taken in ms to run queries for the language (or undefined if this language was not analyzed). * * The "builtin" designation is now outdated with the move to CLI config parsing: this is the time * taken to run _all_ the queries. */ [L in KnownLanguageKey as `analyze_builtin_queries_${L}_duration_ms`]?: number; }; type InterpretResultsDurationStatusReport = { /** Time taken in ms to interpret results for the language (or undefined if this language was not analyzed). */ [L in KnownLanguageKey as `interpret_results_${L}_duration_ms`]?: number; }; export interface QueriesStatusReport extends RunQueriesDurationStatusReport, InterpretResultsDurationStatusReport { /** * Whether the analysis is diff-informed (in the sense that the action generates a diff-range data * extension for the analysis, regardless of whether the data extension is actually used by queries). */ analysis_is_diff_informed?: boolean; /** * Whether the analysis runs in overlay mode (i.e., uses an overlay-base database). * This is true if the AugmentationProperties.overlayDatabaseMode === Overlay. */ analysis_is_overlay?: boolean; /** * Whether the analysis builds an overlay-base database. * This is true if the AugmentationProperties.overlayDatabaseMode === OverlayBase. */ analysis_builds_overlay_base_database?: boolean; /** Name of language that errored during analysis (or undefined if no language failed). */ analyze_failure_language?: string; /** Reports on discrete events associated with this status report. */ event_reports?: EventReport[]; } async function setupPythonExtractor(logger: Logger) { const codeqlPython = process.env["CODEQL_PYTHON"]; if (codeqlPython === undefined || codeqlPython.length === 0) { // If CODEQL_PYTHON is not set, no dependencies were installed, so we don't need to do anything return; } logger.warning( "The CODEQL_PYTHON environment variable is no longer supported. Please remove it from your workflow. This environment variable was originally used to specify a Python executable that included the dependencies of your Python code, however Python analysis no longer uses these dependencies." + "\nIf you used CODEQL_PYTHON to force the version of Python to analyze as, please use CODEQL_EXTRACTOR_PYTHON_ANALYSIS_VERSION instead, such as 'CODEQL_EXTRACTOR_PYTHON_ANALYSIS_VERSION=2.7' or 'CODEQL_EXTRACTOR_PYTHON_ANALYSIS_VERSION=3.11'.", ); return; } export async function runExtraction( codeql: CodeQL, features: FeatureEnablement, config: configUtils.Config, logger: Logger, ) { for (const language of config.languages) { if (dbIsFinalized(config, language, logger)) { logger.debug( `Database for ${language} has already been finalized, skipping extraction.`, ); continue; } if (await shouldExtractLanguage(codeql, config, language)) { logger.startGroup(`Extracting ${language}`); if (language === KnownLanguage.python) { await setupPythonExtractor(logger); } if (config.buildMode) { if ( language === KnownLanguage.cpp && config.buildMode === BuildMode.Autobuild ) { await setupCppAutobuild(codeql, logger); } // The Java and C# `build-mode: none` extractors place dependencies in the // database scratch directory by default. For dependency caching purposes, we want // a stable path that caches can be restored into and that we can cache at the // end of the workflow (i.e. that does not get removed when the scratch directory is). if ( language === KnownLanguage.java && config.buildMode === BuildMode.None ) { process.env["CODEQL_EXTRACTOR_JAVA_OPTION_BUILDLESS_DEPENDENCY_DIR"] = getJavaTempDependencyDir(); } if ( language === KnownLanguage.csharp && config.buildMode === BuildMode.None && (await features.getValue(Feature.CsharpCacheBuildModeNone)) ) { process.env[ "CODEQL_EXTRACTOR_CSHARP_OPTION_BUILDLESS_DEPENDENCY_DIR" ] = getCsharpTempDependencyDir(); } await codeql.extractUsingBuildMode(config, language); } else { await codeql.extractScannedLanguage(config, language); } logger.endGroup(); } } } async function shouldExtractLanguage( codeql: CodeQL, config: configUtils.Config, language: Language, ): Promise { return ( config.buildMode === BuildMode.None || (config.buildMode === BuildMode.Autobuild && process.env[EnvVar.AUTOBUILD_DID_COMPLETE_SUCCESSFULLY] !== "true") || (!config.buildMode && (await codeql.isScannedLanguage(language))) ); } export function dbIsFinalized( config: configUtils.Config, language: Language, logger: Logger, ) { const dbPath = util.getCodeQLDatabasePath(config, language); try { const dbInfo = yaml.load( fs.readFileSync(path.resolve(dbPath, "codeql-database.yml"), "utf8"), ) as { inProgress?: boolean }; return !("inProgress" in dbInfo); } catch { logger.warning( `Could not check whether database for ${language} was finalized. Assuming it is not.`, ); return false; } } async function finalizeDatabaseCreation( codeql: CodeQL, features: FeatureEnablement, config: configUtils.Config, threadsFlag: string, memoryFlag: string, logger: Logger, ): Promise { const extractionStart = performance.now(); await runExtraction(codeql, features, config, logger); const extractionTime = performance.now() - extractionStart; const trapImportStart = performance.now(); for (const language of config.languages) { if (dbIsFinalized(config, language, logger)) { logger.info( `There is already a finalized database for ${language} at the location where the CodeQL Action places databases, so we did not create one.`, ); } else { logger.startGroup(`Finalizing ${language}`); await codeql.finalizeDatabase( util.getCodeQLDatabasePath(config, language), threadsFlag, memoryFlag, config.debugMode, ); logger.endGroup(); } } const trapImportTime = performance.now() - trapImportStart; return { scanned_language_extraction_duration_ms: Math.round(extractionTime), trap_import_duration_ms: Math.round(trapImportTime), }; } /** * Set up the diff-informed analysis feature. * * @returns Absolute path to the directory containing the extension pack for * the diff range information, or `undefined` if the feature is disabled. */ export async function setupDiffInformedQueryRun( logger: Logger, ): Promise { return await withGroupAsync( "Generating diff range extension pack", async () => { const diffRanges = readDiffRangesJsonFile(logger); if (diffRanges === undefined) { logger.info( "No precomputed diff ranges found; skipping diff-informed analysis stage.", ); return undefined; } const checkoutPath = getRequiredInput("checkout_path"); const packDir = writeDiffRangeDataExtensionPack( logger, diffRanges, checkoutPath, ); if (packDir === undefined) { logger.warning( "Cannot create diff range extension pack for diff-informed queries; " + "reverting to performing full analysis.", ); } else { logger.info( `Successfully created diff range extension pack at ${packDir}.`, ); } return packDir; }, ); } export function diffRangeExtensionPackContents( ranges: DiffThunkRange[], checkoutPath: string, ): string { const header = ` extensions: - addsTo: pack: codeql/util extensible: restrictAlertsTo checkPresence: false data: `; let data = ranges .map((range) => { // Diff-informed queries expect the file path to be absolute. CodeQL always // uses forward slashes as the path separator, so on Windows we need to // replace any backslashes with forward slashes. const filename = path .join(checkoutPath, range.path) .replaceAll(path.sep, "/"); // Using yaml.dump() with `forceQuotes: true` ensures that all special // characters are escaped, and that the path is always rendered as a // quoted string on a single line. return ( ` - [${yaml.dump(filename, { forceQuotes: true }).trim()}, ` + `${range.startLine}, ${range.endLine}]\n` ); }) .join(""); if (!data) { // Ensure that the data extension is not empty, so that a pull request with // no edited lines would exclude (instead of accepting) all alerts. data = ' - ["", 0, 0]\n'; } return header + data; } /** * Create an extension pack in the temporary directory that contains the file * line ranges that were added or modified in the pull request. * * @param logger * @param ranges The file line ranges, as returned by * `getPullRequestEditedDiffRanges`. * @param checkoutPath The path at which the repository was checked out. * @returns The absolute path of the directory containing the extension pack, or * `undefined` if no extension pack was created. */ function writeDiffRangeDataExtensionPack( logger: Logger, ranges: DiffThunkRange[] | undefined, checkoutPath: string, ): string | undefined { if (ranges === undefined) { return undefined; } if (ranges.length === 0) { // An empty diff range means that there are no added or modified lines in // the pull request. But the `restrictAlertsTo` extensible predicate // interprets an empty data extension differently, as an indication that // all alerts should be included. So we need to specifically set the diff // range to a non-empty list that cannot match any alert location. ranges = [{ path: "", startLine: 0, endLine: 0 }]; } const diffRangeDir = path.join(getTemporaryDirectory(), "pr-diff-range"); // We expect the Actions temporary directory to already exist, so are mainly // using `recursive: true` to avoid errors if the directory already exists, // for example if the analyze Action is run multiple times in the same job. // This is not really something that is supported, but we make use of it in // tests. fs.mkdirSync(diffRangeDir, { recursive: true }); fs.writeFileSync( path.join(diffRangeDir, "qlpack.yml"), ` name: codeql-action/pr-diff-range version: 0.0.0 library: true extensionTargets: codeql/util: '*' dataExtensions: - pr-diff-range.yml `, ); const extensionContents = diffRangeExtensionPackContents( ranges, checkoutPath, ); const extensionFilePath = path.join(diffRangeDir, "pr-diff-range.yml"); fs.writeFileSync(extensionFilePath, extensionContents); logger.debug( `Wrote pr-diff-range extension pack to ${extensionFilePath}:\n${extensionContents}`, ); return diffRangeDir; } // A set of default query suite names that are understood by the CLI. export const defaultSuites: Set = new Set([ "security-experimental", "security-extended", "security-and-quality", "code-quality", "code-scanning", ]); /** * If `maybeSuite` is the name of a default query suite, it is resolved into the corresponding * query suite name for the given `language`. Otherwise, `maybeSuite` is returned as is. * * @param language The language for which to resolve the default query suite name. * @param maybeSuite The string that potentially contains the name of a default query suite. * @returns Returns the resolved query suite name, or the unmodified input. */ export function resolveQuerySuiteAlias( language: Language, maybeSuite: string, ): string { if (defaultSuites.has(maybeSuite)) { return `${language}-${maybeSuite}.qls`; } return maybeSuite; } /** * Adds the appropriate file extension for the given analysis configuration to the given base filename. */ export function addSarifExtension( analysis: analyses.AnalysisConfig, base: string, ): string { return `${base}${analysis.sarifExtension}`; } // Runs queries and creates sarif files in the given folder export async function runQueries( sarifFolder: string, memoryFlag: string, threadsFlag: string, diffRangePackDir: string | undefined, automationDetailsId: string | undefined, codeql: CodeQL, config: configUtils.Config, logger: Logger, features: FeatureEnablement, ): Promise { const statusReport: QueriesStatusReport = {}; const queryFlags = [memoryFlag, threadsFlag]; const incrementalMode: string[] = []; // Preserve cached intermediate results for overlay-base databases. if (config.overlayDatabaseMode !== OverlayDatabaseMode.OverlayBase) { queryFlags.push("--expect-discarded-cache"); } statusReport.analysis_is_diff_informed = diffRangePackDir !== undefined; if (diffRangePackDir) { queryFlags.push(`--additional-packs=${diffRangePackDir}`); queryFlags.push("--extension-packs=codeql-action/pr-diff-range"); incrementalMode.push("diff-informed"); } statusReport.analysis_is_overlay = config.overlayDatabaseMode === OverlayDatabaseMode.Overlay; statusReport.analysis_builds_overlay_base_database = config.overlayDatabaseMode === OverlayDatabaseMode.OverlayBase; if (config.overlayDatabaseMode === OverlayDatabaseMode.Overlay) { incrementalMode.push("overlay"); } const sarifRunPropertyFlag = incrementalMode.length > 0 ? `--sarif-run-property=incrementalMode=${incrementalMode.join(",")}` : undefined; const dbAnalysisConfig = configUtils.getPrimaryAnalysisConfig(config); for (const language of config.languages) { try { // This should be empty to run only the query suite that was generated when // the database was initialised. const queries: string[] = []; // If multiple analysis kinds are enabled, the database is initialised for Code Scanning. // To avoid duplicate work, we want to run queries for all analyses at the same time. // To do this, we invoke `run-queries` once with the generated query suite that was created // when the database was initialised + the queries for other analysis kinds. if (config.analysisKinds.length > 1) { queries.push(util.getGeneratedSuitePath(config, language)); if (configUtils.isCodeQualityEnabled(config)) { for (const qualityQuery of analyses.codeQualityQueries) { queries.push(resolveQuerySuiteAlias(language, qualityQuery)); } } } // The work needed to generate the query suites // is done in the CLI. We just need to make a single // call to run all the queries for each language and // another to interpret the results. logger.startGroup(`Running queries for ${language}`); const startTimeRunQueries = new Date().getTime(); const databasePath = util.getCodeQLDatabasePath(config, language); await codeql.databaseRunQueries(databasePath, queryFlags, queries); logger.debug(`Finished running queries for ${language}.`); // TODO should not be using `builtin` here. We should be using `all` instead. // The status report does not support `all` yet. statusReport[`analyze_builtin_queries_${language}_duration_ms`] = new Date().getTime() - startTimeRunQueries; // There is always at least one analysis kind enabled. Running `interpret-results` // produces the SARIF file for the analysis kind that the database was initialised with. const startTimeInterpretResults = new Date(); const { summary: analysisSummary, sarifFile } = await runInterpretResultsFor( dbAnalysisConfig, language, undefined, config.debugMode, ); // This case is only needed if Code Quality is not the sole analysis kind. // In this case, we will have run queries for all analysis kinds. The previous call to // `interpret-results` will have produced a SARIF file for Code Scanning and we now // need to produce an additional SARIF file for Code Quality. let qualityAnalysisSummary: string | undefined; if ( config.analysisKinds.length > 1 && configUtils.isCodeQualityEnabled(config) ) { const qualityResult = await runInterpretResultsFor( analyses.CodeQuality, language, analyses.codeQualityQueries.map((i) => resolveQuerySuiteAlias(language, i), ), config.debugMode, ); qualityAnalysisSummary = qualityResult.summary; } const endTimeInterpretResults = new Date(); statusReport[`interpret_results_${language}_duration_ms`] = endTimeInterpretResults.getTime() - startTimeInterpretResults.getTime(); logger.endGroup(); if (analysisSummary.trim()) { logger.info(analysisSummary); } if (qualityAnalysisSummary?.trim()) { logger.info(qualityAnalysisSummary); } if (!config.enableFileCoverageInformation) { logger.info( "To speed up pull request analysis, file coverage information is only enabled when analyzing " + "the default branch and protected branches.", ); } if (await features.getValue(Feature.QaTelemetryEnabled)) { // Note: QA adds the `code-quality` query suite to the `queries` input, // so this is fine since there is no `.quality.sarif`. const perQueryAlertCounts = getPerQueryAlertCounts(sarifFile); const perQueryAlertCountEventReport: EventReport = { event: "codeql database interpret-results", started_at: startTimeInterpretResults.toISOString(), completed_at: endTimeInterpretResults.toISOString(), exit_status: "success", language, properties: { alertCounts: perQueryAlertCounts, }, }; if (statusReport["event_reports"] === undefined) { statusReport["event_reports"] = []; } statusReport["event_reports"].push(perQueryAlertCountEventReport); } } catch (e) { statusReport.analyze_failure_language = language; throw new CodeQLAnalysisError( statusReport, `Error running analysis for ${language}: ${util.getErrorMessage(e)}`, util.wrapError(e), ); } } return statusReport; async function runInterpretResultsFor( analysis: analyses.AnalysisConfig, language: Language, queries: string[] | undefined, enableDebugLogging: boolean, ): Promise<{ summary: string; sarifFile: string }> { logger.info(`Interpreting ${analysis.name} results for ${language}`); // Apply the analysis configuration's `fixCategory` function to adjust the category if needed. // This is a no-op for Code Scanning. const category = analysis.fixCategory(logger, automationDetailsId); const sarifFile = path.join( sarifFolder, addSarifExtension(analysis, language), ); const summary = await runInterpretResults( language, queries, sarifFile, enableDebugLogging, category, ); return { summary, sarifFile }; } async function runInterpretResults( language: Language, queries: string[] | undefined, sarifFile: string, enableDebugLogging: boolean, category: string | undefined, ): Promise { const databasePath = util.getCodeQLDatabasePath(config, language); return await codeql.databaseInterpretResults( databasePath, queries, sarifFile, threadsFlag, enableDebugLogging ? "-vv" : "-v", sarifRunPropertyFlag, category, config, features, ); } /** Get an object with all queries and their counts parsed from a SARIF file path. */ function getPerQueryAlertCounts(sarifPath: string): Record { const sarifObject = JSON.parse( fs.readFileSync(sarifPath, "utf8"), ) as sarif.Log; // We do not need to compute fingerprints because we are not sending data based off of locations. // Generate the query: alert count object const perQueryAlertCounts: Record = {}; // All rules (queries), from all results, from all runs for (const sarifRun of sarifObject.runs) { if (sarifRun.results) { for (const result of sarifRun.results) { const query = result.rule?.id || result.ruleId; if (query) { perQueryAlertCounts[query] = (perQueryAlertCounts[query] || 0) + 1; } } } } return perQueryAlertCounts; } } export async function runFinalize( features: FeatureEnablement, outputDir: string, threadsFlag: string, memoryFlag: string, codeql: CodeQL, config: configUtils.Config, logger: Logger, ): Promise { try { await fs.promises.rm(outputDir, { force: true, recursive: true }); } catch (error: any) { if (error?.code !== "ENOENT") { throw error; } } await fs.promises.mkdir(outputDir, { recursive: true }); const timings = await finalizeDatabaseCreation( codeql, features, config, threadsFlag, memoryFlag, logger, ); // If we didn't already end tracing in the autobuild Action, end it now. if (process.env[EnvVar.AUTOBUILD_DID_COMPLETE_SUCCESSFULLY] !== "true") { await endTracingForCluster(codeql, config, logger); } return timings; } export async function warnIfGoInstalledAfterInit( config: configUtils.Config, logger: Logger, ) { // Check that `which go` still points at the same path it did when the `init` Action ran to ensure that no steps // in-between performed any setup. We encourage users to perform all setup tasks before initializing CodeQL so that // the setup tasks do not interfere with our analysis. // Furthermore, if we installed a wrapper script in the `init` Action, we need to ensure that there isn't a step // in the workflow after the `init` step which installs a different version of Go and takes precedence in the PATH, // thus potentially circumventing our workaround that allows tracing to work. const goInitPath = process.env[EnvVar.GO_BINARY_LOCATION]; if ( process.env[EnvVar.DID_AUTOBUILD_GOLANG] !== "true" && goInitPath !== undefined ) { const goBinaryPath = await io.which("go", true); if (goInitPath !== goBinaryPath) { logger.warning( `Expected \`which go\` to return ${goInitPath}, but got ${goBinaryPath}: please ensure that the correct version of Go is installed before the \`codeql-action/init\` Action is used.`, ); addDiagnostic( config, KnownLanguage.go, makeDiagnostic( "go/workflow/go-installed-after-codeql-init", "Go was installed after the `codeql-action/init` Action was run", { markdownMessage: "To avoid interfering with the CodeQL analysis, perform all installation steps before calling the `github/codeql-action/init` Action.", visibility: { statusPage: true, telemetry: true, cliSummaryTable: true, }, severity: "warning", }, ), ); } } }