From 6d3d6490ebe693b0c798bfa2e43923043387baf2 Mon Sep 17 00:00:00 2001 From: Victor Romero Date: Tue, 5 Jan 2021 14:36:46 -0800 Subject: [PATCH] [vcpkg] Improve versioning files generators (#15172) * Remove port version splitting from x-history * Parallelize versions file generator * Use cpu_count()/2 to avoid crashes * Use generatePortVersionsDb.py output to generate baseline * Update scripts/generateBaseline.py Co-authored-by: Adam Johnson * rename generateBaseline function * Update toolsrc/src/vcpkg/commands.porthistory.cpp Co-authored-by: ras0219 <533828+ras0219@users.noreply.github.com> * Remove unused code Co-authored-by: Adam Johnson Co-authored-by: ras0219 <533828+ras0219@users.noreply.github.com> --- scripts/generateBaseline.py | 93 ++++++++++++------- scripts/generatePortVersionsDb.py | 103 ++++++++------------- toolsrc/src/vcpkg/commands.porthistory.cpp | 97 ++++++++----------- 3 files changed, 135 insertions(+), 158 deletions(-) diff --git a/scripts/generateBaseline.py b/scripts/generateBaseline.py index 45c424a7df..55bea6ba21 100644 --- a/scripts/generateBaseline.py +++ b/scripts/generateBaseline.py @@ -1,50 +1,71 @@ import os -import json -import subprocess import sys +import json +import time + +from pathlib import Path + SCRIPT_DIRECTORY = os.path.dirname(os.path.abspath(__file__)) +PORTS_DIRECTORY = os.path.join(SCRIPT_DIRECTORY, '../ports') +VERSIONS_DB_DIRECTORY = os.path.join(SCRIPT_DIRECTORY, '../port_versions') -def generate_baseline(ports_path, output_filepath): +def generate_baseline(): + start_time = time.time() + + # Assume each directory in ${VCPKG_ROOT}/ports is a different port port_names = [item for item in os.listdir( - ports_path) if os.path.isdir(os.path.join(ports_path, item))] + PORTS_DIRECTORY) if os.path.isdir(os.path.join(PORTS_DIRECTORY, item))] port_names.sort() - total = len(port_names) - baseline_versions = {} - for counter, port_name in enumerate(port_names): - vcpkg_exe = os.path.join(SCRIPT_DIRECTORY, '../vcpkg') - print(f'[{counter + 1}/{total}] Getting package info for {port_name}') - output = subprocess.run( - [vcpkg_exe, 'x-package-info', '--x-json', port_name], - capture_output=True, - encoding='utf-8') + baseline_entries = {} + total_count = len(port_names) + for i, port_name in enumerate(port_names, 1): + port_file_path = os.path.join( + VERSIONS_DB_DIRECTORY, f'{port_name[0]}-', f'{port_name}.json') - if output.returncode == 0: - package_info = json.loads(output.stdout) - port_info = package_info['results'][port_name] + if not os.path.exists(port_file_path): + print( + f'Error: No version file for {port_name}.\n', file=sys.stderr) + continue + sys.stderr.write( + f'\rProcessed {i}/{total_count} ({i/total_count:.2%})') + with open(port_file_path, 'r') as db_file: + try: + versions_object = json.load(db_file) + if versions_object['versions']: + last_version = versions_object['versions'][0] + version_obj = {} + if 'version' in last_version: + version_obj['version'] = last_version['version'] + elif 'version-date' in last_version: + version_obj['version-date'] = last_version['version-date'] + elif 'version-semver' in last_version: + version_obj['version-semver'] - last_version['version-semver'] + else: + version_obj['version-string'] = last_version['version-string'] + version_obj['port-version'] = last_version['port-version'] + baseline_entries[port_name] = version_obj + except json.JSONDecodeError as e: + print(f'Error: Decoding {port_file_path}\n{e}\n') + baseline_object = {} + baseline_object['default'] = baseline_entries - version = {} - for scheme in ['version-string', 'version-semver', 'version-date', 'version']: - if scheme in port_info: - version[scheme] = package_info['results'][port_name][scheme] - break - version['port-version'] = 0 - if 'port-version' in port_info: - version['port-version'] = port_info['port-version'] - baseline_versions[port_name] = version - else: - print(f'x-package-info --x-json {port_name} failed: ', output.stdout.strip(), file=sys.stderr) + os.makedirs(VERSIONS_DB_DIRECTORY, exist_ok=True) + baseline_path = os.path.join(VERSIONS_DB_DIRECTORY, 'baseline.json') + with open(baseline_path, 'w') as baseline_file: + json.dump(baseline_object, baseline_file) - output = {} - output['default'] = baseline_versions - - with open(output_filepath, 'r') as output_file: - json.dump(baseline_versions, output_file) - sys.exit(0) + elapsed_time = time.time() - start_time + print(f'\nElapsed time: {elapsed_time:.2f} seconds') -if __name__ == '__main__': - generate_baseline( - ports_path=f'{SCRIPT_DIRECTORY}/../ports', output_filepath='baseline.json') +def main(): + if not os.path.exists(VERSIONS_DB_DIRECTORY): + print(f'Version DB files must exist before generating a baseline.\nRun: `python generatePortVersionsDB`\n') + generate_baseline() + + +if __name__ == "__main__": + main() diff --git a/scripts/generatePortVersionsDb.py b/scripts/generatePortVersionsDb.py index e3c338c64e..3b7de69421 100644 --- a/scripts/generatePortVersionsDb.py +++ b/scripts/generatePortVersionsDb.py @@ -1,17 +1,19 @@ import os -import os.path import sys import subprocess import json import time import shutil -from subprocess import CalledProcessError -from json.decoder import JSONDecodeError +import multiprocessing + from pathlib import Path +MAX_PROCESSES = multiprocessing.cpu_count() SCRIPT_DIRECTORY = os.path.dirname(os.path.abspath(__file__)) +PORTS_DIRECTORY = os.path.join(SCRIPT_DIRECTORY, '../ports') +VERSIONS_DB_DIRECTORY = os.path.join(SCRIPT_DIRECTORY, '../port_versions') def get_current_git_ref(): @@ -24,91 +26,62 @@ def get_current_git_ref(): return None -def generate_port_versions_db(ports_path, db_path, revision): +def generate_port_versions_file(port_name): + containing_dir = os.path.join(VERSIONS_DB_DIRECTORY, f'{port_name[0]}-') + os.makedirs(containing_dir, exist_ok=True) + + output_file_path = os.path.join(containing_dir, f'{port_name}.json') + if not os.path.exists(output_file_path): + env = os.environ.copy() + env['GIT_OPTIONAL_LOCKS'] = '0' + output = subprocess.run( + [os.path.join(SCRIPT_DIRECTORY, '../vcpkg.exe'), + 'x-history', port_name, '--x-json', f'--output={output_file_path}'], + capture_output=True, encoding='utf-8', env=env) + if output.returncode != 0: + print(f'x-history {port_name} failed: ', + output.stdout.strip(), file=sys.stderr) + + +def generate_port_versions_db(revision): start_time = time.time() # Assume each directory in ${VCPKG_ROOT}/ports is a different port port_names = [item for item in os.listdir( - ports_path) if os.path.isdir(os.path.join(ports_path, item))] - port_names.sort() + PORTS_DIRECTORY) if os.path.isdir(os.path.join(PORTS_DIRECTORY, item))] total_count = len(port_names) - # Dictionary to collect the latest version of each port as baseline - baseline_objects = {} - baseline_objects['default'] = {} - - for counter, port_name in enumerate(port_names): - containing_dir = os.path.join(db_path, f'{port_name[0]}-') - os.makedirs(containing_dir, exist_ok=True) - - output_filepath = os.path.join(containing_dir, f'{port_name}.json') - if not os.path.exists(output_filepath): - output = subprocess.run( - [os.path.join(SCRIPT_DIRECTORY, '../vcpkg'), - 'x-history', port_name, '--x-json'], - capture_output=True, encoding='utf-8') - - if output.returncode == 0: - try: - versions_object = json.loads(output.stdout) - - # Put latest version in baseline dictionary - latest_version = versions_object["versions"][0] - baseline_objects['default'][port_name] = { - "version-string": latest_version["version-string"], - "port-version": latest_version["port-version"] - } - with open(output_filepath, 'w') as output_file: - json.dump(versions_object, output_file) - except JSONDecodeError: - print( - f'Malformed JSON from vcpkg x-history {port_name}: ', output.stdout.strip(), file=sys.stderr) - else: - print(f'x-history {port_name} failed: ', - output.stdout.strip(), file=sys.stderr) - - # This should be replaced by a progress bar - if counter > 0 and counter % 100 == 0: - elapsed_time = time.time() - start_time - print( - f'Processed {counter} out of {total_count}. Elapsed time: {elapsed_time:.2f} seconds') - - # Generate baseline.json - baseline_file_path = os.path.join(db_path, 'baseline.json') - with open(baseline_file_path, 'w') as baseline_output_file: - json.dump(baseline_objects, baseline_output_file) + concurrency = MAX_PROCESSES / 2 + print(f'Running {concurrency:.0f} parallel processes') + process_pool = multiprocessing.Pool(MAX_PROCESSES) + for i, _ in enumerate(process_pool.imap_unordered(generate_port_versions_file, port_names), 1): + sys.stderr.write( + f'\rProcessed: {i}/{total_count} ({(i / total_count):.2%})') + process_pool.close() + process_pool.join() # Generate timestamp - rev_file = os.path.join(db_path, revision) + rev_file = os.path.join(VERSIONS_DB_DIRECTORY, revision) Path(rev_file).touch() elapsed_time = time.time() - start_time print( - f'Processed {total_count} total ports. Elapsed time: {elapsed_time:.2f} seconds') + f'\nElapsed time: {elapsed_time:.2f} seconds') -def main(ports_path, db_path): +def main(): revision = get_current_git_ref() if not revision: print('Couldn\'t fetch current Git revision', file=sys.stderr) sys.exit(1) - rev_file = os.path.join(db_path, revision) + rev_file = os.path.join(VERSIONS_DB_DIRECTORY, revision) if os.path.exists(rev_file): print(f'Database files already exist for commit {revision}') sys.exit(0) - if (os.path.exists(db_path)): - try: - shutil.rmtree(db_path) - except OSError as e: - print(f'Could not delete folder: {db_path}.\nError: {e.strerror}') - - generate_port_versions_db(ports_path=ports_path, - db_path=db_path, - revision=revision) + generate_port_versions_db(revision) if __name__ == "__main__": - main(ports_path=os.path.join(SCRIPT_DIRECTORY, '../ports'), - db_path=os.path.join(SCRIPT_DIRECTORY, '../port_versions')) + main() diff --git a/toolsrc/src/vcpkg/commands.porthistory.cpp b/toolsrc/src/vcpkg/commands.porthistory.cpp index 7efb36bd18..92d782c36b 100644 --- a/toolsrc/src/vcpkg/commands.porthistory.cpp +++ b/toolsrc/src/vcpkg/commands.porthistory.cpp @@ -58,44 +58,6 @@ namespace vcpkg::Commands::PortHistory return std::regex_match(version_string, re); } - std::pair clean_version_string(const std::string& version_string, - int port_version, - bool from_manifest) - { - // Manifest files and ports that use the `Port-Version` field are assumed to have a clean version string - // already. - if (from_manifest || port_version > 0) - { - return std::make_pair(version_string, port_version); - } - - std::string clean_version = version_string; - int clean_port_version = 0; - - const auto index = version_string.find_last_of('-'); - if (index != std::string::npos) - { - // Very lazy check to keep date versions untouched - if (!is_date(version_string)) - { - auto maybe_port_version = version_string.substr(index + 1); - clean_version.resize(index); - - try - { - clean_port_version = std::stoi(maybe_port_version); - } - catch (std::exception&) - { - // If not convertible to int consider last fragment as part of version string - clean_version = version_string; - } - } - } - - return std::make_pair(clean_version, clean_port_version); - } - vcpkg::Optional get_version_from_text(const std::string& text, const std::string& git_tree, const std::string& commit_id, @@ -108,20 +70,17 @@ namespace vcpkg::Commands::PortHistory { if (const auto& scf = maybe_scf->get()) { - // TODO: Get clean version name and port version - const auto version_string = scf->core_paragraph->version; - const auto clean_version = - clean_version_string(version_string, scf->core_paragraph->port_version, is_manifest); - - // SCF to HistoryVersion + auto version = scf->core_paragraph->version; + auto port_version = scf->core_paragraph->port_version; return HistoryVersion{ port_name, git_tree, commit_id, commit_date, - Strings::concat(clean_version.first, "#", std::to_string(clean_version.second)), - clean_version.first, - clean_version.second}; + Strings::concat(version, "#", port_version), + version, + port_version, + }; } } @@ -197,29 +156,37 @@ namespace vcpkg::Commands::PortHistory ret.emplace_back(version); } } - // NOTE: Uncomment this code if you're looking for edge cases to patch in the generation. - // Otherwise, x-history simply skips "bad" versions, which is OK behavior. - // else - //{ - // Checks::exit_with_message(VCPKG_LINE_INFO, "Failed to get version from %s:%s", - // commit_date_pair.first, port_name); - //} } return ret; } } + static constexpr StringLiteral OPTION_OUTPUT_FILE = "output"; + + static const CommandSetting HISTORY_SETTINGS[] = { + {OPTION_OUTPUT_FILE, "Write output to a file"}, + }; + const CommandStructure COMMAND_STRUCTURE = { create_example_string("history "), 1, 1, - {}, + {{}, {HISTORY_SETTINGS}, {}}, nullptr, }; + static Optional maybe_lookup(std::unordered_map const& m, + std::string const& key) + { + const auto it = m.find(key); + if (it != m.end()) return it->second; + return nullopt; + } + void perform_and_exit(const VcpkgCmdArguments& args, const VcpkgPaths& paths) { - const ParsedArguments options = args.parse_arguments(COMMAND_STRUCTURE); + const ParsedArguments parsed_args = args.parse_arguments(COMMAND_STRUCTURE); + auto maybe_output_file = maybe_lookup(parsed_args.settings, OPTION_OUTPUT_FILE); std::string port_name = args.command_arguments.at(0); std::vector versions = read_versions_from_log(paths, port_name); @@ -241,10 +208,26 @@ namespace vcpkg::Commands::PortHistory root.insert("versions", versions_json); auto json_string = Json::stringify(root, vcpkg::Json::JsonStyle::with_spaces(2)); - System::printf("%s\n", json_string); + + if (maybe_output_file.has_value()) + { + auto output_file_path = fs::u8path(maybe_output_file.value_or_exit(VCPKG_LINE_INFO)); + auto& fs = paths.get_filesystem(); + fs.write_contents(output_file_path, json_string, VCPKG_LINE_INFO); + } + else + { + System::printf("%s\n", json_string); + } } else { + if (maybe_output_file.has_value()) + { + System::printf( + System::Color::warning, "Warning: Option `--$s` requires `--x-json` switch.", OPTION_OUTPUT_FILE); + } + System::print2(" version date vcpkg commit\n"); for (auto&& version : versions) {