From f2ccf3b4f18ef30c80f012c6e52391d611bae66f Mon Sep 17 00:00:00 2001 From: Henry Mercer Date: Wed, 17 Dec 2025 10:25:47 +0000 Subject: [PATCH] Ensure .gz files are extracted too --- lib/analyze-action-post.js | 42 +++++++++++++------- lib/init-action-post.js | 42 +++++++++++++------- lib/upload-sarif-action-post.js | 42 +++++++++++++------- src/artifact-scanner.ts | 68 ++++++++++++++++++++++----------- 4 files changed, 133 insertions(+), 61 deletions(-) diff --git a/lib/analyze-action-post.js b/lib/analyze-action-post.js index a0cd21ec2..2dcdb8e9f 100644 --- a/lib/analyze-action-post.js +++ b/lib/analyze-action-post.js @@ -87355,7 +87355,7 @@ var require_graceful_fs = __commonJS({ polyfills(fs9); fs9.gracefulify = patch; fs9.createReadStream = createReadStream; - fs9.createWriteStream = createWriteStream2; + fs9.createWriteStream = createWriteStream3; var fs$readFile = fs9.readFile; fs9.readFile = readFile; function readFile(path7, options, cb) { @@ -87567,7 +87567,7 @@ var require_graceful_fs = __commonJS({ function createReadStream(path7, options) { return new fs9.ReadStream(path7, options); } - function createWriteStream2(path7, options) { + function createWriteStream3(path7, options) { return new fs9.WriteStream(path7, options); } var fs$open = fs9.open; @@ -120961,11 +120961,11 @@ function scanFileForTokens(filePath, relativePath, logger) { return []; } } -async function scanZipFile(zipPath, relativeZipPath, extractDir, logger, depth = 0) { +async function scanArchiveFile(archivePath, relativeArchivePath, extractDir, logger, depth = 0) { const MAX_DEPTH = 10; if (depth > MAX_DEPTH) { throw new Error( - `Maximum zip extraction depth (${MAX_DEPTH}) reached for ${zipPath}` + `Maximum archive extraction depth (${MAX_DEPTH}) reached for ${archivePath}` ); } const result = { @@ -120973,14 +120973,29 @@ async function scanZipFile(zipPath, relativeZipPath, extractDir, logger, depth = findings: [] }; try { - logger.debug(`Extracting zip file: ${zipPath}`); const tempExtractDir = fs5.mkdtempSync( path5.join(extractDir, `extract-${depth}-`) ); - await exec.exec("unzip", ["-q", "-o", zipPath, "-d", tempExtractDir]); + const fileName = path5.basename(archivePath).toLowerCase(); + if (fileName.endsWith(".tar.gz") || fileName.endsWith(".tgz")) { + logger.debug(`Extracting tar.gz file: ${archivePath}`); + await exec.exec("tar", ["-xzf", archivePath, "-C", tempExtractDir]); + } else if (fileName.endsWith(".gz")) { + logger.debug(`Extracting gz file: ${archivePath}`); + const outputFile = path5.join( + tempExtractDir, + path5.basename(archivePath, ".gz") + ); + await exec.exec("gunzip", ["-c", archivePath], { + outStream: fs5.createWriteStream(outputFile) + }); + } else if (fileName.endsWith(".zip")) { + logger.debug(`Extracting zip file: ${archivePath}`); + await exec.exec("unzip", ["-q", "-o", archivePath, "-d", tempExtractDir]); + } const scanResult = await scanDirectory( tempExtractDir, - relativeZipPath, + relativeArchivePath, logger, depth + 1 ); @@ -120989,7 +121004,7 @@ async function scanZipFile(zipPath, relativeZipPath, extractDir, logger, depth = fs5.rmSync(tempExtractDir, { recursive: true, force: true }); } catch (e) { logger.debug( - `Could not extract or scan zip file ${zipPath}: ${getErrorMessage(e)}` + `Could not extract or scan archive file ${archivePath}: ${getErrorMessage(e)}` ); } return result; @@ -120999,17 +121014,18 @@ async function scanFile(fullPath, relativePath, extractDir, logger, depth = 0) { scannedFiles: 1, findings: [] }; - const ext = path5.extname(fullPath).toLowerCase(); - if (ext === ".zip") { - const zipResult = await scanZipFile( + const fileName = path5.basename(fullPath).toLowerCase(); + const isArchive = fileName.endsWith(".zip") || fileName.endsWith(".tar.gz") || fileName.endsWith(".tgz") || fileName.endsWith(".gz"); + if (isArchive) { + const archiveResult = await scanArchiveFile( fullPath, relativePath, extractDir, logger, depth ); - result.scannedFiles += zipResult.scannedFiles; - result.findings.push(...zipResult.findings); + result.scannedFiles += archiveResult.scannedFiles; + result.findings.push(...archiveResult.findings); } const fileFindings = scanFileForTokens(fullPath, relativePath, logger); result.findings.push(...fileFindings); diff --git a/lib/init-action-post.js b/lib/init-action-post.js index 8d040c72a..918cfa20b 100644 --- a/lib/init-action-post.js +++ b/lib/init-action-post.js @@ -87355,7 +87355,7 @@ var require_graceful_fs = __commonJS({ polyfills(fs19); fs19.gracefulify = patch; fs19.createReadStream = createReadStream2; - fs19.createWriteStream = createWriteStream2; + fs19.createWriteStream = createWriteStream3; var fs$readFile = fs19.readFile; fs19.readFile = readFile; function readFile(path16, options, cb) { @@ -87567,7 +87567,7 @@ var require_graceful_fs = __commonJS({ function createReadStream2(path16, options) { return new fs19.ReadStream(path16, options); } - function createWriteStream2(path16, options) { + function createWriteStream3(path16, options) { return new fs19.WriteStream(path16, options); } var fs$open = fs19.open; @@ -125603,11 +125603,11 @@ function scanFileForTokens(filePath, relativePath, logger) { return []; } } -async function scanZipFile(zipPath, relativeZipPath, extractDir, logger, depth = 0) { +async function scanArchiveFile(archivePath, relativeArchivePath, extractDir, logger, depth = 0) { const MAX_DEPTH = 10; if (depth > MAX_DEPTH) { throw new Error( - `Maximum zip extraction depth (${MAX_DEPTH}) reached for ${zipPath}` + `Maximum archive extraction depth (${MAX_DEPTH}) reached for ${archivePath}` ); } const result = { @@ -125615,14 +125615,29 @@ async function scanZipFile(zipPath, relativeZipPath, extractDir, logger, depth = findings: [] }; try { - logger.debug(`Extracting zip file: ${zipPath}`); const tempExtractDir = fs12.mkdtempSync( path11.join(extractDir, `extract-${depth}-`) ); - await exec.exec("unzip", ["-q", "-o", zipPath, "-d", tempExtractDir]); + const fileName = path11.basename(archivePath).toLowerCase(); + if (fileName.endsWith(".tar.gz") || fileName.endsWith(".tgz")) { + logger.debug(`Extracting tar.gz file: ${archivePath}`); + await exec.exec("tar", ["-xzf", archivePath, "-C", tempExtractDir]); + } else if (fileName.endsWith(".gz")) { + logger.debug(`Extracting gz file: ${archivePath}`); + const outputFile = path11.join( + tempExtractDir, + path11.basename(archivePath, ".gz") + ); + await exec.exec("gunzip", ["-c", archivePath], { + outStream: fs12.createWriteStream(outputFile) + }); + } else if (fileName.endsWith(".zip")) { + logger.debug(`Extracting zip file: ${archivePath}`); + await exec.exec("unzip", ["-q", "-o", archivePath, "-d", tempExtractDir]); + } const scanResult = await scanDirectory( tempExtractDir, - relativeZipPath, + relativeArchivePath, logger, depth + 1 ); @@ -125631,7 +125646,7 @@ async function scanZipFile(zipPath, relativeZipPath, extractDir, logger, depth = fs12.rmSync(tempExtractDir, { recursive: true, force: true }); } catch (e) { logger.debug( - `Could not extract or scan zip file ${zipPath}: ${getErrorMessage(e)}` + `Could not extract or scan archive file ${archivePath}: ${getErrorMessage(e)}` ); } return result; @@ -125641,17 +125656,18 @@ async function scanFile(fullPath, relativePath, extractDir, logger, depth = 0) { scannedFiles: 1, findings: [] }; - const ext = path11.extname(fullPath).toLowerCase(); - if (ext === ".zip") { - const zipResult = await scanZipFile( + const fileName = path11.basename(fullPath).toLowerCase(); + const isArchive = fileName.endsWith(".zip") || fileName.endsWith(".tar.gz") || fileName.endsWith(".tgz") || fileName.endsWith(".gz"); + if (isArchive) { + const archiveResult = await scanArchiveFile( fullPath, relativePath, extractDir, logger, depth ); - result.scannedFiles += zipResult.scannedFiles; - result.findings.push(...zipResult.findings); + result.scannedFiles += archiveResult.scannedFiles; + result.findings.push(...archiveResult.findings); } const fileFindings = scanFileForTokens(fullPath, relativePath, logger); result.findings.push(...fileFindings); diff --git a/lib/upload-sarif-action-post.js b/lib/upload-sarif-action-post.js index 5fa264362..d19aba4de 100644 --- a/lib/upload-sarif-action-post.js +++ b/lib/upload-sarif-action-post.js @@ -75691,7 +75691,7 @@ var require_graceful_fs = __commonJS({ polyfills(fs4); fs4.gracefulify = patch; fs4.createReadStream = createReadStream; - fs4.createWriteStream = createWriteStream2; + fs4.createWriteStream = createWriteStream3; var fs$readFile = fs4.readFile; fs4.readFile = readFile; function readFile(path3, options, cb) { @@ -75903,7 +75903,7 @@ var require_graceful_fs = __commonJS({ function createReadStream(path3, options) { return new fs4.ReadStream(path3, options); } - function createWriteStream2(path3, options) { + function createWriteStream3(path3, options) { return new fs4.WriteStream(path3, options); } var fs$open = fs4.open; @@ -119896,11 +119896,11 @@ function scanFileForTokens(filePath, relativePath, logger) { return []; } } -async function scanZipFile(zipPath, relativeZipPath, extractDir, logger, depth = 0) { +async function scanArchiveFile(archivePath, relativeArchivePath, extractDir, logger, depth = 0) { const MAX_DEPTH = 10; if (depth > MAX_DEPTH) { throw new Error( - `Maximum zip extraction depth (${MAX_DEPTH}) reached for ${zipPath}` + `Maximum archive extraction depth (${MAX_DEPTH}) reached for ${archivePath}` ); } const result = { @@ -119908,14 +119908,29 @@ async function scanZipFile(zipPath, relativeZipPath, extractDir, logger, depth = findings: [] }; try { - logger.debug(`Extracting zip file: ${zipPath}`); const tempExtractDir = fs.mkdtempSync( path.join(extractDir, `extract-${depth}-`) ); - await exec.exec("unzip", ["-q", "-o", zipPath, "-d", tempExtractDir]); + const fileName = path.basename(archivePath).toLowerCase(); + if (fileName.endsWith(".tar.gz") || fileName.endsWith(".tgz")) { + logger.debug(`Extracting tar.gz file: ${archivePath}`); + await exec.exec("tar", ["-xzf", archivePath, "-C", tempExtractDir]); + } else if (fileName.endsWith(".gz")) { + logger.debug(`Extracting gz file: ${archivePath}`); + const outputFile = path.join( + tempExtractDir, + path.basename(archivePath, ".gz") + ); + await exec.exec("gunzip", ["-c", archivePath], { + outStream: fs.createWriteStream(outputFile) + }); + } else if (fileName.endsWith(".zip")) { + logger.debug(`Extracting zip file: ${archivePath}`); + await exec.exec("unzip", ["-q", "-o", archivePath, "-d", tempExtractDir]); + } const scanResult = await scanDirectory( tempExtractDir, - relativeZipPath, + relativeArchivePath, logger, depth + 1 ); @@ -119924,7 +119939,7 @@ async function scanZipFile(zipPath, relativeZipPath, extractDir, logger, depth = fs.rmSync(tempExtractDir, { recursive: true, force: true }); } catch (e) { logger.debug( - `Could not extract or scan zip file ${zipPath}: ${getErrorMessage(e)}` + `Could not extract or scan archive file ${archivePath}: ${getErrorMessage(e)}` ); } return result; @@ -119934,17 +119949,18 @@ async function scanFile(fullPath, relativePath, extractDir, logger, depth = 0) { scannedFiles: 1, findings: [] }; - const ext = path.extname(fullPath).toLowerCase(); - if (ext === ".zip") { - const zipResult = await scanZipFile( + const fileName = path.basename(fullPath).toLowerCase(); + const isArchive = fileName.endsWith(".zip") || fileName.endsWith(".tar.gz") || fileName.endsWith(".tgz") || fileName.endsWith(".gz"); + if (isArchive) { + const archiveResult = await scanArchiveFile( fullPath, relativePath, extractDir, logger, depth ); - result.scannedFiles += zipResult.scannedFiles; - result.findings.push(...zipResult.findings); + result.scannedFiles += archiveResult.scannedFiles; + result.findings.push(...archiveResult.findings); } const fileFindings = scanFileForTokens(fullPath, relativePath, logger); result.findings.push(...fileFindings); diff --git a/src/artifact-scanner.ts b/src/artifact-scanner.ts index d4514012a..6d009dfe9 100644 --- a/src/artifact-scanner.ts +++ b/src/artifact-scanner.ts @@ -86,18 +86,18 @@ function scanFileForTokens( } /** - * Recursively extracts and scans zip files. + * Recursively extracts and scans archive files (.zip, .gz, .tar.gz). * - * @param zipPath Path to the zip file - * @param relativeZipPath Relative path of the zip for display + * @param archivePath Path to the archive file + * @param relativeArchivePath Relative path of the archive for display * @param extractDir Directory to extract to * @param logger Logger instance * @param depth Current recursion depth (to prevent infinite loops) * @returns Scan results */ -async function scanZipFile( - zipPath: string, - relativeZipPath: string, +async function scanArchiveFile( + archivePath: string, + relativeArchivePath: string, extractDir: string, logger: Logger, depth: number = 0, @@ -105,7 +105,7 @@ async function scanZipFile( const MAX_DEPTH = 10; // Prevent infinite recursion if (depth > MAX_DEPTH) { throw new Error( - `Maximum zip extraction depth (${MAX_DEPTH}) reached for ${zipPath}`, + `Maximum archive extraction depth (${MAX_DEPTH}) reached for ${archivePath}`, ); } @@ -115,18 +115,36 @@ async function scanZipFile( }; try { - logger.debug(`Extracting zip file: ${zipPath}`); const tempExtractDir = fs.mkdtempSync( path.join(extractDir, `extract-${depth}-`), ); - // Use unzip command available on GitHub-hosted Linux runners - await exec.exec("unzip", ["-q", "-o", zipPath, "-d", tempExtractDir]); + // Determine archive type and extract accordingly + const fileName = path.basename(archivePath).toLowerCase(); + if (fileName.endsWith(".tar.gz") || fileName.endsWith(".tgz")) { + // Extract tar.gz files + logger.debug(`Extracting tar.gz file: ${archivePath}`); + await exec.exec("tar", ["-xzf", archivePath, "-C", tempExtractDir]); + } else if (fileName.endsWith(".gz")) { + // Extract .gz files (single file compression) + logger.debug(`Extracting gz file: ${archivePath}`); + const outputFile = path.join( + tempExtractDir, + path.basename(archivePath, ".gz"), + ); + await exec.exec("gunzip", ["-c", archivePath], { + outStream: fs.createWriteStream(outputFile), + }); + } else if (fileName.endsWith(".zip")) { + // Extract zip files + logger.debug(`Extracting zip file: ${archivePath}`); + await exec.exec("unzip", ["-q", "-o", archivePath, "-d", tempExtractDir]); + } // Scan the extracted contents const scanResult = await scanDirectory( tempExtractDir, - relativeZipPath, + relativeArchivePath, logger, depth + 1, ); @@ -137,7 +155,7 @@ async function scanZipFile( fs.rmSync(tempExtractDir, { recursive: true, force: true }); } catch (e) { logger.debug( - `Could not extract or scan zip file ${zipPath}: ${getErrorMessage(e)}`, + `Could not extract or scan archive file ${archivePath}: ${getErrorMessage(e)}`, ); } @@ -145,11 +163,11 @@ async function scanZipFile( } /** - * Scans a single file, including recursive zip extraction if applicable. + * Scans a single file, including recursive archive extraction if applicable. * * @param fullPath Full path to the file * @param relativePath Relative path for display - * @param extractDir Directory to use for extraction (for zip files) + * @param extractDir Directory to use for extraction (for archive files) * @param logger Logger instance * @param depth Current recursion depth * @returns Scan results @@ -166,21 +184,27 @@ async function scanFile( findings: [], }; - // Check if it's a zip file and recursively scan it - const ext = path.extname(fullPath).toLowerCase(); - if (ext === ".zip") { - const zipResult = await scanZipFile( + // Check if it's an archive file and recursively scan it + const fileName = path.basename(fullPath).toLowerCase(); + const isArchive = + fileName.endsWith(".zip") || + fileName.endsWith(".tar.gz") || + fileName.endsWith(".tgz") || + fileName.endsWith(".gz"); + + if (isArchive) { + const archiveResult = await scanArchiveFile( fullPath, relativePath, extractDir, logger, depth, ); - result.scannedFiles += zipResult.scannedFiles; - result.findings.push(...zipResult.findings); + result.scannedFiles += archiveResult.scannedFiles; + result.findings.push(...archiveResult.findings); } - // Scan the file itself for tokens + // Scan the file itself for tokens (unless it's a pure binary archive format) const fileFindings = scanFileForTokens(fullPath, relativePath, logger); result.findings.push(...fileFindings); @@ -240,7 +264,7 @@ async function scanDirectory( /** * Scans a list of files and directories for GitHub tokens. - * Recursively extracts and scans zip files. + * Recursively extracts and scans archive files (.zip, .gz, .tar.gz). * * @param filesToScan List of file paths to scan * @param logger Logger instance