import * as fs from "fs"; import * as os from "os"; import * as path from "path"; import * as exec from "@actions/exec"; import { Logger } from "./logging"; import { getErrorMessage } from "./util"; /** * GitHub token patterns to scan for. * These patterns match various GitHub token formats. */ const GITHUB_TOKEN_PATTERNS = [ { name: "Personal Access Token", pattern: /\bghp_[a-zA-Z0-9]{36}\b/g, }, { name: "OAuth Access Token", pattern: /\bgho_[a-zA-Z0-9]{36}\b/g, }, { name: "User-to-Server Token", pattern: /\bghu_[a-zA-Z0-9]{36}\b/g, }, { name: "Server-to-Server Token", pattern: /\bghs_[a-zA-Z0-9]{36}\b/g, }, { name: "Refresh Token", pattern: /\bghr_[a-zA-Z0-9]{36}\b/g, }, { name: "App Installation Access Token", pattern: /\bghs_[a-zA-Z0-9]{255}\b/g, }, ]; interface TokenFinding { tokenType: string; filePath: string; } interface ScanResult { scannedFiles: number; findings: TokenFinding[]; } /** * Scans a file for GitHub tokens. * * @param filePath Path to the file to scan * @param relativePath Relative path for display purposes * @param logger Logger instance * @returns Array of token findings in the file */ function scanFileForTokens( filePath: string, relativePath: string, logger: Logger, ): TokenFinding[] { const findings: TokenFinding[] = []; try { const content = fs.readFileSync(filePath, "utf8"); for (const { name, pattern } of GITHUB_TOKEN_PATTERNS) { const matches = content.match(pattern); if (matches) { for (let i = 0; i < matches.length; i++) { findings.push({ tokenType: name, filePath: relativePath }); } logger.debug(`Found ${matches.length} ${name}(s) in ${relativePath}`); } } return findings; } catch (e) { // If we can't read the file as text, it's likely binary or inaccessible logger.debug( `Could not scan file ${filePath} for tokens: ${getErrorMessage(e)}`, ); return []; } } /** * Recursively extracts and scans archive files (.zip, .gz, .tar.gz). * * @param archivePath Path to the archive file * @param relativeArchivePath Relative path of the archive for display * @param extractDir Directory to extract to * @param logger Logger instance * @param depth Current recursion depth (to prevent infinite loops) * @returns Scan results */ async function scanArchiveFile( archivePath: string, relativeArchivePath: string, extractDir: string, logger: Logger, depth: number = 0, ): Promise { const MAX_DEPTH = 10; // Prevent infinite recursion if (depth > MAX_DEPTH) { throw new Error( `Maximum archive extraction depth (${MAX_DEPTH}) reached for ${archivePath}`, ); } const result: ScanResult = { scannedFiles: 0, findings: [], }; try { const tempExtractDir = fs.mkdtempSync( path.join(extractDir, `extract-${depth}-`), ); // Determine archive type and extract accordingly const fileName = path.basename(archivePath).toLowerCase(); if (fileName.endsWith(".tar.gz") || fileName.endsWith(".tgz")) { // Extract tar.gz files logger.debug(`Extracting tar.gz file: ${archivePath}`); await exec.exec("tar", ["-xzf", archivePath, "-C", tempExtractDir], { silent: true, }); } else if (fileName.endsWith(".tar.zst")) { // Extract tar.zst files logger.debug(`Extracting tar.zst file: ${archivePath}`); await exec.exec( "tar", ["--zstd", "-xf", archivePath, "-C", tempExtractDir], { silent: true, }, ); } else if (fileName.endsWith(".zst")) { // Extract .zst files (single file compression) logger.debug(`Extracting zst file: ${archivePath}`); const outputFile = path.join( tempExtractDir, path.basename(archivePath, ".zst"), ); await exec.exec("zstd", ["-d", archivePath, "-o", outputFile], { silent: true, }); } else if (fileName.endsWith(".gz")) { // Extract .gz files (single file compression) logger.debug(`Extracting gz file: ${archivePath}`); const outputFile = path.join( tempExtractDir, path.basename(archivePath, ".gz"), ); await exec.exec("gunzip", ["-c", archivePath], { outStream: fs.createWriteStream(outputFile), silent: true, }); } else if (fileName.endsWith(".zip")) { // Extract zip files logger.debug(`Extracting zip file: ${archivePath}`); await exec.exec( "unzip", ["-q", "-o", archivePath, "-d", tempExtractDir], { silent: true, }, ); } // Scan the extracted contents const scanResult = await scanDirectory( tempExtractDir, relativeArchivePath, logger, depth + 1, ); result.scannedFiles += scanResult.scannedFiles; result.findings.push(...scanResult.findings); // Clean up extracted files fs.rmSync(tempExtractDir, { recursive: true, force: true }); } catch (e) { logger.debug( `Could not extract or scan archive file ${archivePath}: ${getErrorMessage(e)}`, ); } return result; } /** * Scans a single file, including recursive archive extraction if applicable. * * @param fullPath Full path to the file * @param relativePath Relative path for display * @param extractDir Directory to use for extraction (for archive files) * @param logger Logger instance * @param depth Current recursion depth * @returns Scan results */ async function scanFile( fullPath: string, relativePath: string, extractDir: string, logger: Logger, depth: number = 0, ): Promise { const result: ScanResult = { scannedFiles: 1, findings: [], }; // Check if it's an archive file and recursively scan it const fileName = path.basename(fullPath).toLowerCase(); const isArchive = fileName.endsWith(".zip") || fileName.endsWith(".tar.gz") || fileName.endsWith(".tgz") || fileName.endsWith(".tar.zst") || fileName.endsWith(".zst") || fileName.endsWith(".gz"); if (isArchive) { const archiveResult = await scanArchiveFile( fullPath, relativePath, extractDir, logger, depth, ); result.scannedFiles += archiveResult.scannedFiles; result.findings.push(...archiveResult.findings); } // Scan the file itself for tokens (unless it's a pure binary archive format) const fileFindings = scanFileForTokens(fullPath, relativePath, logger); result.findings.push(...fileFindings); return result; } /** * Recursively scans a directory for GitHub tokens. * * @param dirPath Directory path to scan * @param baseRelativePath Base relative path for computing display paths * @param logger Logger instance * @param depth Current recursion depth * @returns Scan results */ async function scanDirectory( dirPath: string, baseRelativePath: string, logger: Logger, depth: number = 0, ): Promise { const result: ScanResult = { scannedFiles: 0, findings: [], }; const entries = fs.readdirSync(dirPath, { withFileTypes: true }); for (const entry of entries) { const fullPath = path.join(dirPath, entry.name); const relativePath = path.join(baseRelativePath, entry.name); if (entry.isDirectory()) { const subResult = await scanDirectory( fullPath, relativePath, logger, depth, ); result.scannedFiles += subResult.scannedFiles; result.findings.push(...subResult.findings); } else if (entry.isFile()) { const fileResult = await scanFile( fullPath, relativePath, path.dirname(fullPath), logger, depth, ); result.scannedFiles += fileResult.scannedFiles; result.findings.push(...fileResult.findings); } } return result; } /** * Scans a list of files and directories for GitHub tokens. * Recursively extracts and scans archive files (.zip, .gz, .tar.gz). * * @param filesToScan List of file paths to scan * @param logger Logger instance * @returns Scan results */ export async function scanArtifactsForTokens( filesToScan: string[], logger: Logger, ): Promise { logger.info( "Starting best-effort check for potential GitHub tokens in debug artifacts (for testing purposes only)...", ); const result: ScanResult = { scannedFiles: 0, findings: [], }; // Create a temporary directory for extraction const tempScanDir = fs.mkdtempSync(path.join(os.tmpdir(), "artifact-scan-")); try { for (const filePath of filesToScan) { const stats = fs.statSync(filePath); const fileName = path.basename(filePath); if (stats.isDirectory()) { const dirResult = await scanDirectory(filePath, fileName, logger); result.scannedFiles += dirResult.scannedFiles; result.findings.push(...dirResult.findings); } else if (stats.isFile()) { const fileResult = await scanFile( filePath, fileName, tempScanDir, logger, ); result.scannedFiles += fileResult.scannedFiles; result.findings.push(...fileResult.findings); } } // Compute statistics from findings const tokenTypesCounts = new Map(); const filesWithTokens = new Set(); for (const finding of result.findings) { tokenTypesCounts.set( finding.tokenType, (tokenTypesCounts.get(finding.tokenType) || 0) + 1, ); filesWithTokens.add(finding.filePath); } const tokenTypesSummary = Array.from(tokenTypesCounts.entries()) .map(([type, count]) => `${count} ${type}${count > 1 ? "s" : ""}`) .join(", "); const baseSummary = `scanned ${result.scannedFiles} files, found ${result.findings.length} potential token(s) in ${filesWithTokens.size} file(s)`; const summaryWithTypes = tokenTypesSummary ? `${baseSummary} (${tokenTypesSummary})` : baseSummary; logger.info(`Artifact check complete: ${summaryWithTypes}`); if (result.findings.length > 0) { const fileList = Array.from(filesWithTokens).join(", "); throw new Error( `Found ${result.findings.length} potential GitHub token(s) (${tokenTypesSummary}) in debug artifacts at: ${fileList}. This is a best-effort check for testing purposes only.`, ); } } finally { // Clean up temporary directory try { fs.rmSync(tempScanDir, { recursive: true, force: true }); } catch (e) { logger.debug( `Could not clean up temporary scan directory: ${getErrorMessage(e)}`, ); } } }