[vcpkg] Improve versioning files generators (#15172)

* Remove port version splitting from x-history

* Parallelize versions file generator

* Use cpu_count()/2 to avoid crashes

* Use generatePortVersionsDb.py output to generate baseline

* Update scripts/generateBaseline.py

Co-authored-by: Adam Johnson <AdamJohnso@gmail.com>

* rename generateBaseline function

* Update toolsrc/src/vcpkg/commands.porthistory.cpp

Co-authored-by: ras0219 <533828+ras0219@users.noreply.github.com>

* Remove unused code

Co-authored-by: Adam Johnson <AdamJohnso@gmail.com>
Co-authored-by: ras0219 <533828+ras0219@users.noreply.github.com>
This commit is contained in:
Victor Romero 2021-01-05 14:36:46 -08:00 committed by GitHub
parent 378ffbb940
commit 6d3d6490eb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 135 additions and 158 deletions

View File

@ -1,50 +1,71 @@
import os import os
import json
import subprocess
import sys import sys
import json
import time
from pathlib import Path
SCRIPT_DIRECTORY = os.path.dirname(os.path.abspath(__file__)) SCRIPT_DIRECTORY = os.path.dirname(os.path.abspath(__file__))
PORTS_DIRECTORY = os.path.join(SCRIPT_DIRECTORY, '../ports')
VERSIONS_DB_DIRECTORY = os.path.join(SCRIPT_DIRECTORY, '../port_versions')
def generate_baseline(ports_path, output_filepath): def generate_baseline():
start_time = time.time()
# Assume each directory in ${VCPKG_ROOT}/ports is a different port
port_names = [item for item in os.listdir( port_names = [item for item in os.listdir(
ports_path) if os.path.isdir(os.path.join(ports_path, item))] PORTS_DIRECTORY) if os.path.isdir(os.path.join(PORTS_DIRECTORY, item))]
port_names.sort() port_names.sort()
total = len(port_names) baseline_entries = {}
baseline_versions = {} total_count = len(port_names)
for counter, port_name in enumerate(port_names): for i, port_name in enumerate(port_names, 1):
vcpkg_exe = os.path.join(SCRIPT_DIRECTORY, '../vcpkg') port_file_path = os.path.join(
print(f'[{counter + 1}/{total}] Getting package info for {port_name}') VERSIONS_DB_DIRECTORY, f'{port_name[0]}-', f'{port_name}.json')
output = subprocess.run(
[vcpkg_exe, 'x-package-info', '--x-json', port_name],
capture_output=True,
encoding='utf-8')
if output.returncode == 0: if not os.path.exists(port_file_path):
package_info = json.loads(output.stdout) print(
port_info = package_info['results'][port_name] f'Error: No version file for {port_name}.\n', file=sys.stderr)
continue
sys.stderr.write(
f'\rProcessed {i}/{total_count} ({i/total_count:.2%})')
with open(port_file_path, 'r') as db_file:
try:
versions_object = json.load(db_file)
if versions_object['versions']:
last_version = versions_object['versions'][0]
version_obj = {}
if 'version' in last_version:
version_obj['version'] = last_version['version']
elif 'version-date' in last_version:
version_obj['version-date'] = last_version['version-date']
elif 'version-semver' in last_version:
version_obj['version-semver'] - last_version['version-semver']
else:
version_obj['version-string'] = last_version['version-string']
version_obj['port-version'] = last_version['port-version']
baseline_entries[port_name] = version_obj
except json.JSONDecodeError as e:
print(f'Error: Decoding {port_file_path}\n{e}\n')
baseline_object = {}
baseline_object['default'] = baseline_entries
version = {} os.makedirs(VERSIONS_DB_DIRECTORY, exist_ok=True)
for scheme in ['version-string', 'version-semver', 'version-date', 'version']: baseline_path = os.path.join(VERSIONS_DB_DIRECTORY, 'baseline.json')
if scheme in port_info: with open(baseline_path, 'w') as baseline_file:
version[scheme] = package_info['results'][port_name][scheme] json.dump(baseline_object, baseline_file)
break
version['port-version'] = 0
if 'port-version' in port_info:
version['port-version'] = port_info['port-version']
baseline_versions[port_name] = version
else:
print(f'x-package-info --x-json {port_name} failed: ', output.stdout.strip(), file=sys.stderr)
output = {} elapsed_time = time.time() - start_time
output['default'] = baseline_versions print(f'\nElapsed time: {elapsed_time:.2f} seconds')
with open(output_filepath, 'r') as output_file:
json.dump(baseline_versions, output_file)
sys.exit(0)
if __name__ == '__main__': def main():
generate_baseline( if not os.path.exists(VERSIONS_DB_DIRECTORY):
ports_path=f'{SCRIPT_DIRECTORY}/../ports', output_filepath='baseline.json') print(f'Version DB files must exist before generating a baseline.\nRun: `python generatePortVersionsDB`\n')
generate_baseline()
if __name__ == "__main__":
main()

View File

@ -1,17 +1,19 @@
import os import os
import os.path
import sys import sys
import subprocess import subprocess
import json import json
import time import time
import shutil import shutil
from subprocess import CalledProcessError import multiprocessing
from json.decoder import JSONDecodeError
from pathlib import Path from pathlib import Path
MAX_PROCESSES = multiprocessing.cpu_count()
SCRIPT_DIRECTORY = os.path.dirname(os.path.abspath(__file__)) SCRIPT_DIRECTORY = os.path.dirname(os.path.abspath(__file__))
PORTS_DIRECTORY = os.path.join(SCRIPT_DIRECTORY, '../ports')
VERSIONS_DB_DIRECTORY = os.path.join(SCRIPT_DIRECTORY, '../port_versions')
def get_current_git_ref(): def get_current_git_ref():
@ -24,91 +26,62 @@ def get_current_git_ref():
return None return None
def generate_port_versions_db(ports_path, db_path, revision): def generate_port_versions_file(port_name):
containing_dir = os.path.join(VERSIONS_DB_DIRECTORY, f'{port_name[0]}-')
os.makedirs(containing_dir, exist_ok=True)
output_file_path = os.path.join(containing_dir, f'{port_name}.json')
if not os.path.exists(output_file_path):
env = os.environ.copy()
env['GIT_OPTIONAL_LOCKS'] = '0'
output = subprocess.run(
[os.path.join(SCRIPT_DIRECTORY, '../vcpkg.exe'),
'x-history', port_name, '--x-json', f'--output={output_file_path}'],
capture_output=True, encoding='utf-8', env=env)
if output.returncode != 0:
print(f'x-history {port_name} failed: ',
output.stdout.strip(), file=sys.stderr)
def generate_port_versions_db(revision):
start_time = time.time() start_time = time.time()
# Assume each directory in ${VCPKG_ROOT}/ports is a different port # Assume each directory in ${VCPKG_ROOT}/ports is a different port
port_names = [item for item in os.listdir( port_names = [item for item in os.listdir(
ports_path) if os.path.isdir(os.path.join(ports_path, item))] PORTS_DIRECTORY) if os.path.isdir(os.path.join(PORTS_DIRECTORY, item))]
port_names.sort()
total_count = len(port_names) total_count = len(port_names)
# Dictionary to collect the latest version of each port as baseline concurrency = MAX_PROCESSES / 2
baseline_objects = {} print(f'Running {concurrency:.0f} parallel processes')
baseline_objects['default'] = {} process_pool = multiprocessing.Pool(MAX_PROCESSES)
for i, _ in enumerate(process_pool.imap_unordered(generate_port_versions_file, port_names), 1):
for counter, port_name in enumerate(port_names): sys.stderr.write(
containing_dir = os.path.join(db_path, f'{port_name[0]}-') f'\rProcessed: {i}/{total_count} ({(i / total_count):.2%})')
os.makedirs(containing_dir, exist_ok=True) process_pool.close()
process_pool.join()
output_filepath = os.path.join(containing_dir, f'{port_name}.json')
if not os.path.exists(output_filepath):
output = subprocess.run(
[os.path.join(SCRIPT_DIRECTORY, '../vcpkg'),
'x-history', port_name, '--x-json'],
capture_output=True, encoding='utf-8')
if output.returncode == 0:
try:
versions_object = json.loads(output.stdout)
# Put latest version in baseline dictionary
latest_version = versions_object["versions"][0]
baseline_objects['default'][port_name] = {
"version-string": latest_version["version-string"],
"port-version": latest_version["port-version"]
}
with open(output_filepath, 'w') as output_file:
json.dump(versions_object, output_file)
except JSONDecodeError:
print(
f'Malformed JSON from vcpkg x-history {port_name}: ', output.stdout.strip(), file=sys.stderr)
else:
print(f'x-history {port_name} failed: ',
output.stdout.strip(), file=sys.stderr)
# This should be replaced by a progress bar
if counter > 0 and counter % 100 == 0:
elapsed_time = time.time() - start_time
print(
f'Processed {counter} out of {total_count}. Elapsed time: {elapsed_time:.2f} seconds')
# Generate baseline.json
baseline_file_path = os.path.join(db_path, 'baseline.json')
with open(baseline_file_path, 'w') as baseline_output_file:
json.dump(baseline_objects, baseline_output_file)
# Generate timestamp # Generate timestamp
rev_file = os.path.join(db_path, revision) rev_file = os.path.join(VERSIONS_DB_DIRECTORY, revision)
Path(rev_file).touch() Path(rev_file).touch()
elapsed_time = time.time() - start_time elapsed_time = time.time() - start_time
print( print(
f'Processed {total_count} total ports. Elapsed time: {elapsed_time:.2f} seconds') f'\nElapsed time: {elapsed_time:.2f} seconds')
def main(ports_path, db_path): def main():
revision = get_current_git_ref() revision = get_current_git_ref()
if not revision: if not revision:
print('Couldn\'t fetch current Git revision', file=sys.stderr) print('Couldn\'t fetch current Git revision', file=sys.stderr)
sys.exit(1) sys.exit(1)
rev_file = os.path.join(db_path, revision) rev_file = os.path.join(VERSIONS_DB_DIRECTORY, revision)
if os.path.exists(rev_file): if os.path.exists(rev_file):
print(f'Database files already exist for commit {revision}') print(f'Database files already exist for commit {revision}')
sys.exit(0) sys.exit(0)
if (os.path.exists(db_path)): generate_port_versions_db(revision)
try:
shutil.rmtree(db_path)
except OSError as e:
print(f'Could not delete folder: {db_path}.\nError: {e.strerror}')
generate_port_versions_db(ports_path=ports_path,
db_path=db_path,
revision=revision)
if __name__ == "__main__": if __name__ == "__main__":
main(ports_path=os.path.join(SCRIPT_DIRECTORY, '../ports'), main()
db_path=os.path.join(SCRIPT_DIRECTORY, '../port_versions'))

View File

@ -58,44 +58,6 @@ namespace vcpkg::Commands::PortHistory
return std::regex_match(version_string, re); return std::regex_match(version_string, re);
} }
std::pair<std::string, int> clean_version_string(const std::string& version_string,
int port_version,
bool from_manifest)
{
// Manifest files and ports that use the `Port-Version` field are assumed to have a clean version string
// already.
if (from_manifest || port_version > 0)
{
return std::make_pair(version_string, port_version);
}
std::string clean_version = version_string;
int clean_port_version = 0;
const auto index = version_string.find_last_of('-');
if (index != std::string::npos)
{
// Very lazy check to keep date versions untouched
if (!is_date(version_string))
{
auto maybe_port_version = version_string.substr(index + 1);
clean_version.resize(index);
try
{
clean_port_version = std::stoi(maybe_port_version);
}
catch (std::exception&)
{
// If not convertible to int consider last fragment as part of version string
clean_version = version_string;
}
}
}
return std::make_pair(clean_version, clean_port_version);
}
vcpkg::Optional<HistoryVersion> get_version_from_text(const std::string& text, vcpkg::Optional<HistoryVersion> get_version_from_text(const std::string& text,
const std::string& git_tree, const std::string& git_tree,
const std::string& commit_id, const std::string& commit_id,
@ -108,20 +70,17 @@ namespace vcpkg::Commands::PortHistory
{ {
if (const auto& scf = maybe_scf->get()) if (const auto& scf = maybe_scf->get())
{ {
// TODO: Get clean version name and port version auto version = scf->core_paragraph->version;
const auto version_string = scf->core_paragraph->version; auto port_version = scf->core_paragraph->port_version;
const auto clean_version =
clean_version_string(version_string, scf->core_paragraph->port_version, is_manifest);
// SCF to HistoryVersion
return HistoryVersion{ return HistoryVersion{
port_name, port_name,
git_tree, git_tree,
commit_id, commit_id,
commit_date, commit_date,
Strings::concat(clean_version.first, "#", std::to_string(clean_version.second)), Strings::concat(version, "#", port_version),
clean_version.first, version,
clean_version.second}; port_version,
};
} }
} }
@ -197,29 +156,37 @@ namespace vcpkg::Commands::PortHistory
ret.emplace_back(version); ret.emplace_back(version);
} }
} }
// NOTE: Uncomment this code if you're looking for edge cases to patch in the generation.
// Otherwise, x-history simply skips "bad" versions, which is OK behavior.
// else
//{
// Checks::exit_with_message(VCPKG_LINE_INFO, "Failed to get version from %s:%s",
// commit_date_pair.first, port_name);
//}
} }
return ret; return ret;
} }
} }
static constexpr StringLiteral OPTION_OUTPUT_FILE = "output";
static const CommandSetting HISTORY_SETTINGS[] = {
{OPTION_OUTPUT_FILE, "Write output to a file"},
};
const CommandStructure COMMAND_STRUCTURE = { const CommandStructure COMMAND_STRUCTURE = {
create_example_string("history <port>"), create_example_string("history <port>"),
1, 1,
1, 1,
{}, {{}, {HISTORY_SETTINGS}, {}},
nullptr, nullptr,
}; };
static Optional<std::string> maybe_lookup(std::unordered_map<std::string, std::string> const& m,
std::string const& key)
{
const auto it = m.find(key);
if (it != m.end()) return it->second;
return nullopt;
}
void perform_and_exit(const VcpkgCmdArguments& args, const VcpkgPaths& paths) void perform_and_exit(const VcpkgCmdArguments& args, const VcpkgPaths& paths)
{ {
const ParsedArguments options = args.parse_arguments(COMMAND_STRUCTURE); const ParsedArguments parsed_args = args.parse_arguments(COMMAND_STRUCTURE);
auto maybe_output_file = maybe_lookup(parsed_args.settings, OPTION_OUTPUT_FILE);
std::string port_name = args.command_arguments.at(0); std::string port_name = args.command_arguments.at(0);
std::vector<HistoryVersion> versions = read_versions_from_log(paths, port_name); std::vector<HistoryVersion> versions = read_versions_from_log(paths, port_name);
@ -241,10 +208,26 @@ namespace vcpkg::Commands::PortHistory
root.insert("versions", versions_json); root.insert("versions", versions_json);
auto json_string = Json::stringify(root, vcpkg::Json::JsonStyle::with_spaces(2)); auto json_string = Json::stringify(root, vcpkg::Json::JsonStyle::with_spaces(2));
System::printf("%s\n", json_string);
if (maybe_output_file.has_value())
{
auto output_file_path = fs::u8path(maybe_output_file.value_or_exit(VCPKG_LINE_INFO));
auto& fs = paths.get_filesystem();
fs.write_contents(output_file_path, json_string, VCPKG_LINE_INFO);
}
else
{
System::printf("%s\n", json_string);
}
} }
else else
{ {
if (maybe_output_file.has_value())
{
System::printf(
System::Color::warning, "Warning: Option `--$s` requires `--x-json` switch.", OPTION_OUTPUT_FILE);
}
System::print2(" version date vcpkg commit\n"); System::print2(" version date vcpkg commit\n");
for (auto&& version : versions) for (auto&& version : versions)
{ {