Implement #624: Use shorter hashes with CPM_SOURCE_CACHE (#631)

* Add ASSERT_CONTENTS_EQUAL test macro in testing.cmake

Checks if the contents of a file matches the given input

* Use shorter hashes with CPM_SOURCE_CACHE (#624)

Uses shorter hashes with CPM_SOURCE_CACHE. Falls back to a longer hash
if necessary (ie, if there's a collision with an existing hash).

See: https://github.com/cpm-cmake/CPM.cmake/issues/624

* Update integration tests to support shorter hashes

* trigger ci

* run cmake-format

* if already available, use the legacy cache hash

* create temporary file in current binary dir

* add test case for legacy hash

---------

Co-authored-by: Lars Melchior <lars.melchior@gmail.com>
Co-authored-by: Lars Melchior <TheLartians@users.noreply.github.com>
This commit is contained in:
Alecto Irene Perez
2025-05-18 13:02:47 -04:00
committed by GitHub
parent d7614381ab
commit d9364ce284
4 changed files with 182 additions and 1 deletions

View File

@@ -202,6 +202,60 @@ function(cpm_package_name_from_git_uri URI RESULT)
endif()
endfunction()
# Find the shortest hash that can be used eg, if origin_hash is
# cccb77ae9609d2768ed80dd42cec54f77b1f1455 the following files will be checked, until one is found
# that is either empty (allowing us to assign origin_hash), or whose contents matches ${origin_hash}
#
# * .../cccb.hash
# * .../cccb77ae.hash
# * .../cccb77ae9609.hash
# * .../cccb77ae9609d276.hash
# * etc
#
# We will be able to use a shorter path with very high probability, but in the (rare) event that the
# first couple characters collide, we will check longer and longer substrings.
function(cpm_get_shortest_hash source_cache_dir origin_hash short_hash_output_var)
# for compatibility with caches populated by a previous version of CPM, check if a directory using
# the full hash already exists
if(EXISTS "${source_cache_dir}/${origin_hash}")
set(${short_hash_output_var}
"${origin_hash}"
PARENT_SCOPE
)
return()
endif()
foreach(len RANGE 4 40 4)
string(SUBSTRING "${origin_hash}" 0 ${len} short_hash)
set(hash_lock ${source_cache_dir}/${short_hash}.lock)
set(hash_fp ${source_cache_dir}/${short_hash}.hash)
# Take a lock, so we don't have a race condition with another instance of cmake. We will release
# this lock when we can, however, if there is an error, we want to ensure it gets released on
# it's own on exit from the function.
file(LOCK ${hash_lock} GUARD FUNCTION)
# Load the contents of .../${short_hash}.hash
file(TOUCH ${hash_fp})
file(READ ${hash_fp} hash_fp_contents)
if(hash_fp_contents STREQUAL "")
# Write the origin hash
file(WRITE ${hash_fp} ${origin_hash})
file(LOCK ${hash_lock} RELEASE)
break()
elseif(hash_fp_contents STREQUAL origin_hash)
file(LOCK ${hash_lock} RELEASE)
break()
else()
file(LOCK ${hash_lock} RELEASE)
endif()
endforeach()
set(${short_hash_output_var}
"${short_hash}"
PARENT_SCOPE
)
endfunction()
# Try to infer package name and version from a url
function(cpm_package_name_and_ver_from_url url outName outVer)
if(url MATCHES "[/\\?]([a-zA-Z0-9_\\.-]+)\\.(tar|tar\\.gz|tar\\.bz2|zip|ZIP)(\\?|/|$)")
@@ -806,9 +860,19 @@ function(CPMAddPackage)
set(download_directory ${CPM_SOURCE_CACHE}/${lower_case_name}/${CPM_ARGS_CUSTOM_CACHE_KEY})
elseif(CPM_USE_NAMED_CACHE_DIRECTORIES)
string(SHA1 origin_hash "${origin_parameters};NEW_CACHE_STRUCTURE_TAG")
cpm_get_shortest_hash(
"${CPM_SOURCE_CACHE}/${lower_case_name}" # source cache directory
"${origin_hash}" # Input hash
origin_hash # Computed hash
)
set(download_directory ${CPM_SOURCE_CACHE}/${lower_case_name}/${origin_hash}/${CPM_ARGS_NAME})
else()
string(SHA1 origin_hash "${origin_parameters}")
cpm_get_shortest_hash(
"${CPM_SOURCE_CACHE}/${lower_case_name}" # source cache directory
"${origin_hash}" # Input hash
origin_hash # Computed hash
)
set(download_directory ${CPM_SOURCE_CACHE}/${lower_case_name}/${origin_hash})
endif()
# Expand `download_directory` relative path. This is important because EXISTS doesn't work for

View File

@@ -79,3 +79,16 @@ function(ASSERT_NOT_EXISTS file)
message(FATAL_ERROR "assertion failed: file ${file} exists")
endif()
endfunction()
function(ASSERT_CONTENTS_EQUAL file content)
if(EXISTS ${file})
file(READ ${file} file_content)
if(content STREQUAL file_content)
message(STATUS "test passed: '${file}' exists and contains '${content}'")
else()
message(FATAL_ERROR "assertion failed: file '${file}' does not contain expected content.")
endif()
else()
message(FATAL_ERROR "assertion failed: file '${file} does not exist")
endif()
endfunction()