Refactoring: Split up script

This commit is contained in:
Henry Mercer
2026-04-13 17:03:20 +01:00
parent 7c9e131894
commit cb52ba6486
+99 -61
View File
@@ -1,6 +1,6 @@
#!/usr/bin/env npx tsx
/**
/*
* Updates src/languages/builtin.json by querying the CodeQL CLI for:
* - Languages that have default queries (via codeql-extractor.yml)
* - Language aliases (via `codeql resolve languages --format=betterjson --extractor-include-aliases`)
@@ -19,69 +19,107 @@ import * as yaml from "yaml";
import { BUILTIN_LANGUAGES_FILE } from "./config";
const codeqlPath = process.argv[2] || "codeql";
// Step 1: Resolve all language extractor directories.
const resolveJson: Record<string, string[]> = JSON.parse(
execFileSync(codeqlPath, ["resolve", "languages", "--format=json"], {
encoding: "utf8",
}),
);
// Step 2: For each language, read codeql-extractor.yml and check default_queries.
const languages: string[] = [];
for (const [language, dirs] of Object.entries(resolveJson)) {
const extractorDir = dirs[0];
const extractorYmlPath = path.join(extractorDir, "codeql-extractor.yml");
if (!fs.existsSync(extractorYmlPath)) {
throw new Error(
`Extractor YAML not found for language '${language}' at expected path: ${extractorYmlPath}`,
);
}
const extractorYml = yaml.parse(fs.readFileSync(extractorYmlPath, "utf8"));
const defaultQueries: unknown[] | undefined = extractorYml.default_queries;
if (Array.isArray(defaultQueries) && defaultQueries.length > 0) {
console.log(
`${language}: included (default queries: ${JSON.stringify(defaultQueries)})`,
);
languages.push(language);
} else {
console.log(`${language}: excluded (no default queries)`);
}
/** Resolve all known language extractor directories. */
function resolveLanguages(codeqlPath: string): Record<string, string[]> {
return JSON.parse(
execFileSync(codeqlPath, ["resolve", "languages", "--format=json"], {
encoding: "utf8",
}),
) as Record<string, string[]>;
}
languages.sort();
/**
* Return the sorted list of languages whose extractors ship default queries.
*
* @param extractorDirs - Map from language to list of extractor directories
*/
function findLanguagesWithDefaultQueries(
extractorDirs: Record<string, string[]>,
): string[] {
const languages: string[] = [];
// Step 3: Resolve aliases, filtered to only those targeting included languages.
const betterjsonOutput = JSON.parse(
execFileSync(
codeqlPath,
[
"resolve",
"languages",
"--format=betterjson",
"--extractor-include-aliases",
],
{ encoding: "utf8" },
),
);
for (const [language, dirs] of Object.entries(extractorDirs)) {
if (dirs.length !== 1) {
throw new Error(
`Expected exactly one extractor directory for language '${language}', but found ${dirs.length}: ${dirs.join(
", ",
)}`,
);
}
const languageSet = new Set(languages);
const aliases: Record<string, string> = Object.fromEntries(
Object.entries((betterjsonOutput.aliases ?? {}) as Record<string, string>)
.filter(([, target]) => languageSet.has(target))
.sort(([a], [b]) => a.localeCompare(b)),
);
const extractorYmlPath = path.join(dirs[0], "codeql-extractor.yml");
// Step 4: Update the built-in languages file.
const content = `${JSON.stringify({ languages, aliases }, null, 2)}\n`;
fs.mkdirSync(path.dirname(BUILTIN_LANGUAGES_FILE), { recursive: true });
fs.writeFileSync(BUILTIN_LANGUAGES_FILE, content);
if (!fs.existsSync(extractorYmlPath)) {
throw new Error(
`Extractor YAML not found for language '${language}' at expected path: ${extractorYmlPath}`,
);
}
console.log(`\nWrote ${BUILTIN_LANGUAGES_FILE}`);
console.log(` Languages: ${languages.join(", ")}`);
console.log(` Aliases: ${Object.keys(aliases).join(", ")}`);
const extractorYml = yaml.parse(fs.readFileSync(extractorYmlPath, "utf8"));
const defaultQueries: unknown[] | undefined = extractorYml.default_queries;
if (Array.isArray(defaultQueries) && defaultQueries.length > 0) {
console.log(
`${language}: included (default queries: ${JSON.stringify(defaultQueries)})`,
);
languages.push(language);
} else {
console.log(`${language}: excluded (no default queries)`);
}
}
return languages.sort();
}
/**
* Resolve language aliases from the CodeQL CLI, keeping only those whose
* target is in the given set of included languages.
*/
function resolveAliases(
codeqlPath: string,
includedLanguages: Set<string>,
): Record<string, string> {
const betterjsonOutput = JSON.parse(
execFileSync(
codeqlPath,
[
"resolve",
"languages",
"--format=betterjson",
"--extractor-include-aliases",
],
{ encoding: "utf8" },
),
);
return Object.fromEntries(
Object.entries((betterjsonOutput.aliases ?? {}) as Record<string, string>)
.filter(([, target]) => includedLanguages.has(target))
.sort(([a], [b]) => a.localeCompare(b)),
);
}
/** Write the built-in languages data to disk. */
function writeBuiltinLanguages(
languages: string[],
aliases: Record<string, string>,
): void {
const content = `${JSON.stringify({ languages, aliases }, null, 2)}\n`;
fs.mkdirSync(path.dirname(BUILTIN_LANGUAGES_FILE), { recursive: true });
fs.writeFileSync(BUILTIN_LANGUAGES_FILE, content);
console.log(`\nWrote ${BUILTIN_LANGUAGES_FILE}`);
console.log(` Languages: ${languages.join(", ")}`);
console.log(` Aliases: ${Object.keys(aliases).join(", ")}`);
}
function main(): void {
const codeqlPath = process.argv[2] || "codeql";
const extractorDirs = resolveLanguages(codeqlPath);
const languages = findLanguagesWithDefaultQueries(extractorDirs);
const aliases = resolveAliases(codeqlPath, new Set(languages));
writeBuiltinLanguages(languages, aliases);
}
main();