Files
codeql-action/src/artifact-scanner.ts
T
2025-12-17 15:34:12 +00:00

380 lines
10 KiB
TypeScript

import * as fs from "fs";
import * as os from "os";
import * as path from "path";
import * as exec from "@actions/exec";
import { Logger } from "./logging";
import { getErrorMessage } from "./util";
/**
* GitHub token patterns to scan for.
* These patterns match various GitHub token formats.
*/
const GITHUB_TOKEN_PATTERNS = [
{
name: "Personal Access Token",
pattern: /\bghp_[a-zA-Z0-9]{36}\b/g,
},
{
name: "OAuth Access Token",
pattern: /\bgho_[a-zA-Z0-9]{36}\b/g,
},
{
name: "User-to-Server Token",
pattern: /\bghu_[a-zA-Z0-9]{36}\b/g,
},
{
name: "Server-to-Server Token",
pattern: /\bghs_[a-zA-Z0-9]{36}\b/g,
},
{
name: "Refresh Token",
pattern: /\bghr_[a-zA-Z0-9]{36}\b/g,
},
{
name: "App Installation Access Token",
pattern: /\bghs_[a-zA-Z0-9]{255}\b/g,
},
];
interface TokenFinding {
tokenType: string;
filePath: string;
}
interface ScanResult {
scannedFiles: number;
findings: TokenFinding[];
}
/**
* Scans a file for GitHub tokens.
*
* @param filePath Path to the file to scan
* @param relativePath Relative path for display purposes
* @param logger Logger instance
* @returns Array of token findings in the file
*/
function scanFileForTokens(
filePath: string,
relativePath: string,
logger: Logger,
): TokenFinding[] {
const findings: TokenFinding[] = [];
try {
const content = fs.readFileSync(filePath, "utf8");
for (const { name, pattern } of GITHUB_TOKEN_PATTERNS) {
const matches = content.match(pattern);
if (matches) {
for (let i = 0; i < matches.length; i++) {
findings.push({ tokenType: name, filePath: relativePath });
}
logger.debug(`Found ${matches.length} ${name}(s) in ${relativePath}`);
}
}
return findings;
} catch (e) {
// If we can't read the file as text, it's likely binary or inaccessible
logger.debug(
`Could not scan file ${filePath} for tokens: ${getErrorMessage(e)}`,
);
return [];
}
}
/**
* Recursively extracts and scans archive files (.zip, .gz, .tar.gz).
*
* @param archivePath Path to the archive file
* @param relativeArchivePath Relative path of the archive for display
* @param extractDir Directory to extract to
* @param logger Logger instance
* @param depth Current recursion depth (to prevent infinite loops)
* @returns Scan results
*/
async function scanArchiveFile(
archivePath: string,
relativeArchivePath: string,
extractDir: string,
logger: Logger,
depth: number = 0,
): Promise<ScanResult> {
const MAX_DEPTH = 10; // Prevent infinite recursion
if (depth > MAX_DEPTH) {
throw new Error(
`Maximum archive extraction depth (${MAX_DEPTH}) reached for ${archivePath}`,
);
}
const result: ScanResult = {
scannedFiles: 0,
findings: [],
};
try {
const tempExtractDir = fs.mkdtempSync(
path.join(extractDir, `extract-${depth}-`),
);
// Determine archive type and extract accordingly
const fileName = path.basename(archivePath).toLowerCase();
if (fileName.endsWith(".tar.gz") || fileName.endsWith(".tgz")) {
// Extract tar.gz files
logger.debug(`Extracting tar.gz file: ${archivePath}`);
await exec.exec("tar", ["-xzf", archivePath, "-C", tempExtractDir], {
silent: true,
});
} else if (fileName.endsWith(".tar.zst")) {
// Extract tar.zst files
logger.debug(`Extracting tar.zst file: ${archivePath}`);
await exec.exec(
"tar",
["--zstd", "-xf", archivePath, "-C", tempExtractDir],
{
silent: true,
},
);
} else if (fileName.endsWith(".zst")) {
// Extract .zst files (single file compression)
logger.debug(`Extracting zst file: ${archivePath}`);
const outputFile = path.join(
tempExtractDir,
path.basename(archivePath, ".zst"),
);
await exec.exec("zstd", ["-d", archivePath, "-o", outputFile], {
silent: true,
});
} else if (fileName.endsWith(".gz")) {
// Extract .gz files (single file compression)
logger.debug(`Extracting gz file: ${archivePath}`);
const outputFile = path.join(
tempExtractDir,
path.basename(archivePath, ".gz"),
);
await exec.exec("gunzip", ["-c", archivePath], {
outStream: fs.createWriteStream(outputFile),
silent: true,
});
} else if (fileName.endsWith(".zip")) {
// Extract zip files
logger.debug(`Extracting zip file: ${archivePath}`);
await exec.exec(
"unzip",
["-q", "-o", archivePath, "-d", tempExtractDir],
{
silent: true,
},
);
}
// Scan the extracted contents
const scanResult = await scanDirectory(
tempExtractDir,
relativeArchivePath,
logger,
depth + 1,
);
result.scannedFiles += scanResult.scannedFiles;
result.findings.push(...scanResult.findings);
// Clean up extracted files
fs.rmSync(tempExtractDir, { recursive: true, force: true });
} catch (e) {
logger.debug(
`Could not extract or scan archive file ${archivePath}: ${getErrorMessage(e)}`,
);
}
return result;
}
/**
* Scans a single file, including recursive archive extraction if applicable.
*
* @param fullPath Full path to the file
* @param relativePath Relative path for display
* @param extractDir Directory to use for extraction (for archive files)
* @param logger Logger instance
* @param depth Current recursion depth
* @returns Scan results
*/
async function scanFile(
fullPath: string,
relativePath: string,
extractDir: string,
logger: Logger,
depth: number = 0,
): Promise<ScanResult> {
const result: ScanResult = {
scannedFiles: 1,
findings: [],
};
// Check if it's an archive file and recursively scan it
const fileName = path.basename(fullPath).toLowerCase();
const isArchive =
fileName.endsWith(".zip") ||
fileName.endsWith(".tar.gz") ||
fileName.endsWith(".tgz") ||
fileName.endsWith(".tar.zst") ||
fileName.endsWith(".zst") ||
fileName.endsWith(".gz");
if (isArchive) {
const archiveResult = await scanArchiveFile(
fullPath,
relativePath,
extractDir,
logger,
depth,
);
result.scannedFiles += archiveResult.scannedFiles;
result.findings.push(...archiveResult.findings);
}
// Scan the file itself for tokens (unless it's a pure binary archive format)
const fileFindings = scanFileForTokens(fullPath, relativePath, logger);
result.findings.push(...fileFindings);
return result;
}
/**
* Recursively scans a directory for GitHub tokens.
*
* @param dirPath Directory path to scan
* @param baseRelativePath Base relative path for computing display paths
* @param logger Logger instance
* @param depth Current recursion depth
* @returns Scan results
*/
async function scanDirectory(
dirPath: string,
baseRelativePath: string,
logger: Logger,
depth: number = 0,
): Promise<ScanResult> {
const result: ScanResult = {
scannedFiles: 0,
findings: [],
};
const entries = fs.readdirSync(dirPath, { withFileTypes: true });
for (const entry of entries) {
const fullPath = path.join(dirPath, entry.name);
const relativePath = path.join(baseRelativePath, entry.name);
if (entry.isDirectory()) {
const subResult = await scanDirectory(
fullPath,
relativePath,
logger,
depth,
);
result.scannedFiles += subResult.scannedFiles;
result.findings.push(...subResult.findings);
} else if (entry.isFile()) {
const fileResult = await scanFile(
fullPath,
relativePath,
path.dirname(fullPath),
logger,
depth,
);
result.scannedFiles += fileResult.scannedFiles;
result.findings.push(...fileResult.findings);
}
}
return result;
}
/**
* Scans a list of files and directories for GitHub tokens.
* Recursively extracts and scans archive files (.zip, .gz, .tar.gz).
*
* @param filesToScan List of file paths to scan
* @param logger Logger instance
* @returns Scan results
*/
export async function scanArtifactsForTokens(
filesToScan: string[],
logger: Logger,
): Promise<void> {
logger.info(
"Starting best-effort check for potential GitHub tokens in debug artifacts (for testing purposes only)...",
);
const result: ScanResult = {
scannedFiles: 0,
findings: [],
};
// Create a temporary directory for extraction
const tempScanDir = fs.mkdtempSync(path.join(os.tmpdir(), "artifact-scan-"));
try {
for (const filePath of filesToScan) {
const stats = fs.statSync(filePath);
const fileName = path.basename(filePath);
if (stats.isDirectory()) {
const dirResult = await scanDirectory(filePath, fileName, logger);
result.scannedFiles += dirResult.scannedFiles;
result.findings.push(...dirResult.findings);
} else if (stats.isFile()) {
const fileResult = await scanFile(
filePath,
fileName,
tempScanDir,
logger,
);
result.scannedFiles += fileResult.scannedFiles;
result.findings.push(...fileResult.findings);
}
}
// Compute statistics from findings
const tokenTypesCounts = new Map<string, number>();
const filesWithTokens = new Set<string>();
for (const finding of result.findings) {
tokenTypesCounts.set(
finding.tokenType,
(tokenTypesCounts.get(finding.tokenType) || 0) + 1,
);
filesWithTokens.add(finding.filePath);
}
const tokenTypesSummary = Array.from(tokenTypesCounts.entries())
.map(([type, count]) => `${count} ${type}${count > 1 ? "s" : ""}`)
.join(", ");
const baseSummary = `scanned ${result.scannedFiles} files, found ${result.findings.length} potential token(s) in ${filesWithTokens.size} file(s)`;
const summaryWithTypes = tokenTypesSummary
? `${baseSummary} (${tokenTypesSummary})`
: baseSummary;
logger.info(`Artifact check complete: ${summaryWithTypes}`);
if (result.findings.length > 0) {
const fileList = Array.from(filesWithTokens).join(", ");
throw new Error(
`Found ${result.findings.length} potential GitHub token(s) (${tokenTypesSummary}) in debug artifacts at: ${fileList}. This is a best-effort check for testing purposes only.`,
);
}
} finally {
// Clean up temporary directory
try {
fs.rmSync(tempScanDir, { recursive: true, force: true });
} catch (e) {
logger.debug(
`Could not clean up temporary scan directory: ${getErrorMessage(e)}`,
);
}
}
}