From 4dc864e2401f2ed3230c9042a4dd56f6d1c30360 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Tue, 15 Jun 2021 19:59:52 -0400 Subject: [PATCH] [arrow] Update to 4.0.0 (#17975) * Update arrow to 4.0.0 * Format * Try fix thrift * Update versions/ files * Do not set ZSTD_ROOT * Remove double quotes causing Windows problems * Apply patches * Remove LIB_DIR_OPTIONS * Tweak zstd flags * Update version hash * Format * Fail early on x86 * Update hash * Fail early on arm, arm64 * Update hash * Add expected failures to to scripts/ci.baseline.txt * Exclude mallocs from default features * Update hash * Set default-features to false for aws-sdk-cpp Co-authored-by: Robert Schumacher * Specify only x64 support in manifest Co-authored-by: Robert Schumacher * Remove unneeded ci.baseline.txt entries Co-authored-by: Robert Schumacher * Remove dataset from default-features Co-authored-by: Robert Schumacher * Update hash * Remove zstd path args * Update hash Co-authored-by: Tanguy Fautre Co-authored-by: Robert Schumacher --- ports/arrow/CONTROL | 20 --------- ports/arrow/all.patch | 76 ++++++++++++-------------------- ports/arrow/portfile.cmake | 75 +++++++++++++++++++------------- ports/arrow/vcpkg.json | 88 ++++++++++++++++++++++++++++++++++++++ versions/a-/arrow.json | 5 +++ versions/baseline.json | 2 +- 6 files changed, 168 insertions(+), 98 deletions(-) delete mode 100644 ports/arrow/CONTROL create mode 100644 ports/arrow/vcpkg.json diff --git a/ports/arrow/CONTROL b/ports/arrow/CONTROL deleted file mode 100644 index 8aace675f9..0000000000 --- a/ports/arrow/CONTROL +++ /dev/null @@ -1,20 +0,0 @@ -Source: arrow -Version: 3.0.0 -Port-Version: 0 -Build-Depends: boost-algorithm, boost-filesystem, boost-multiprecision, boost-system, brotli, bzip2, double-conversion, flatbuffers, gflags, glog, lz4, openssl, rapidjson, re2, snappy, thrift, uriparser, utf8proc, zlib, zstd -Homepage: https://github.com/apache/arrow -Description: Apache Arrow is a columnar in-memory analytics layer designed to accelerate big data. It houses a set of canonical in-memory representations of flat and hierarchical data along with multiple language-bindings for structure manipulation. It also provides IPC and common algorithm implementations. -Supports: x64&!x86&!arm -Default-Features: csv, json, parquet, filesystem - -Feature: csv -Description: CSV file support - -Feature: json -Description: JSON file support - -Feature: parquet -Description: Parquet file support - -Feature: filesystem -Description: Local filesystem support diff --git a/ports/arrow/all.patch b/ports/arrow/all.patch index 838a9dcb45..8ff755ac5c 100644 --- a/ports/arrow/all.patch +++ b/ports/arrow/all.patch @@ -1,11 +1,11 @@ diff --git a/cpp/cmake_modules/BuildUtils.cmake b/cpp/cmake_modules/BuildUtils.cmake -index e59b4a38a..9bd895608 100644 +index 2fd897b5d..b6118ad4f 100644 --- a/cpp/cmake_modules/BuildUtils.cmake +++ b/cpp/cmake_modules/BuildUtils.cmake @@ -440,7 +440,7 @@ function(ADD_ARROW_LIB LIB_NAME) target_include_directories(${LIB_NAME}_static PRIVATE ${ARG_PRIVATE_INCLUDES}) endif() - + - if(MSVC_TOOLCHAIN) + if(MSVC_TOOLCHAIN AND 0) set(LIB_NAME_STATIC ${LIB_NAME}_static) @@ -26,26 +26,26 @@ index b46a0f1a0..3d87f5204 100644 + #pkg_check_modules(BROTLI_PC libbrotlicommon libbrotlienc libbrotlidec) + if(BROTLI_PC_FOUND AND 0) # Find via pkg_check_modules disabled as incompatible with vcpkg set(BROTLI_INCLUDE_DIR "${BROTLI_PC_libbrotlicommon_INCLUDEDIR}") - + # Some systems (e.g. Fedora) don't fill Brotli_LIBRARY_DIRS, so add the other dirs here. diff --git a/cpp/cmake_modules/FindLz4.cmake b/cpp/cmake_modules/FindLz4.cmake -index 14b6d93b9..d8d80c408 100644 +index 14b6d93b9..1905079ee 100644 --- a/cpp/cmake_modules/FindLz4.cmake +++ b/cpp/cmake_modules/FindLz4.cmake @@ -15,10 +15,12 @@ # specific language governing permissions and limitations # under the License. - + -if(MSVC_TOOLCHAIN AND NOT DEFINED LZ4_MSVC_LIB_PREFIX) - set(LZ4_MSVC_LIB_PREFIX "lib") +# Avoid the debug build linking to the release library by mistake. +# In theory harmless if static linking at this point, but disastrous if done for a shared library. +if(CMAKE_BUILD_TYPE STREQUAL "DEBUG") -+ set(LZ4_LIB_NAME_DEBUG_SUFFIX d) ++ set(LZ4_LIB_NAME_DEBUG_SUFFIX "d") endif() -set(LZ4_LIB_NAME_BASE "${LZ4_MSVC_LIB_PREFIX}lz4") +set(LZ4_LIB_NAME_BASE "lz4${LZ4_LIB_NAME_DEBUG_SUFFIX}") - + if(ARROW_LZ4_USE_SHARED) set(LZ4_LIB_NAMES) @@ -34,12 +36,8 @@ if(ARROW_LZ4_USE_SHARED) @@ -60,10 +60,10 @@ index 14b6d93b9..d8d80c408 100644 - "${CMAKE_STATIC_LIBRARY_PREFIX}${LZ4_LIB_NAME_BASE}${LZ4_STATIC_LIB_SUFFIX}") + "${CMAKE_STATIC_LIBRARY_PREFIX}${LZ4_LIB_NAME_BASE}${CMAKE_STATIC_LIBRARY_SUFFIX}") endif() - + if(LZ4_ROOT) @@ -56,8 +54,8 @@ if(LZ4_ROOT) - + else() find_package(PkgConfig QUIET) - pkg_check_modules(LZ4_PC liblz4) @@ -71,20 +71,20 @@ index 14b6d93b9..d8d80c408 100644 + #pkg_check_modules(LZ4_PC liblz4) + if(0) # Do not use pkg_check_modules, doesn't seem to work correctly on some macOS versions (10.x in GitHub Actions) set(LZ4_INCLUDE_DIR "${LZ4_PC_INCLUDEDIR}") - + list(APPEND LZ4_PC_LIBRARY_DIRS "${LZ4_PC_LIBDIR}") diff --git a/cpp/cmake_modules/FindSnappy.cmake b/cpp/cmake_modules/FindSnappy.cmake -index 5784cf592..817cf0c47 100644 +index 26cccb786..8bee097af 100644 --- a/cpp/cmake_modules/FindSnappy.cmake +++ b/cpp/cmake_modules/FindSnappy.cmake -@@ -15,20 +15,27 @@ +@@ -15,23 +15,30 @@ # specific language governing permissions and limitations # under the License. - + +# Avoid the debug build linking to the release library by mistake. +# In theory harmless if static linking at this point, but disastrous if done for a shared library. +if(CMAKE_BUILD_TYPE STREQUAL "DEBUG") -+ set(SNAPPY_LIB_NAME_DEBUG_SUFFIX d) ++ set(SNAPPY_LIB_NAME_DEBUG_SUFFIX "d") +endif() + +set(SNAPPY_LIB_NAME_BASE "snappy${SNAPPY_LIB_NAME_DEBUG_SUFFIX}") @@ -102,26 +102,29 @@ index 5784cf592..817cf0c47 100644 else() - set(SNAPPY_STATIC_LIB_NAME_BASE "snappy") if(MSVC) -- set(SNAPPY_STATIC_LIB_NAME_BASE "${SNAPPY_STATIC_LIB_NAME_BASE}${SNAPPY_MSVC_STATIC_LIB_SUFFIX}") -+ set(SNAPPY_STATIC_LIB_NAME_BASE "${SNAPPY_LIB_NAME_BASE}${SNAPPY_MSVC_STATIC_LIB_SUFFIX}") + set(SNAPPY_STATIC_LIB_NAME_BASE +- "${SNAPPY_STATIC_LIB_NAME_BASE}${SNAPPY_MSVC_STATIC_LIB_SUFFIX}") ++ "${SNAPPY_LIB_NAME_BASE}${SNAPPY_MSVC_STATIC_LIB_SUFFIX}") endif() -- set(SNAPPY_LIB_NAMES "${CMAKE_STATIC_LIBRARY_PREFIX}${SNAPPY_STATIC_LIB_NAME_BASE}${CMAKE_STATIC_LIBRARY_SUFFIX}") -+ set(SNAPPY_LIB_NAMES "${CMAKE_STATIC_LIBRARY_PREFIX}${SNAPPY_LIB_NAME_BASE}${CMAKE_STATIC_LIBRARY_SUFFIX}") + set( + SNAPPY_LIB_NAMES +- "${CMAKE_STATIC_LIBRARY_PREFIX}${SNAPPY_STATIC_LIB_NAME_BASE}${CMAKE_STATIC_LIBRARY_SUFFIX}" ++ "${CMAKE_STATIC_LIBRARY_PREFIX}${SNAPPY_LIB_NAME_BASE}${CMAKE_STATIC_LIBRARY_SUFFIX}" + ) endif() - - if(Snappy_ROOT) + diff --git a/cpp/cmake_modules/FindThrift.cmake b/cpp/cmake_modules/FindThrift.cmake -index 273d907ed..65f477f54 100644 +index 273d907ed..02a1e7fe1 100644 --- a/cpp/cmake_modules/FindThrift.cmake +++ b/cpp/cmake_modules/FindThrift.cmake @@ -39,6 +39,12 @@ function(EXTRACT_THRIFT_VERSION) endif() endfunction(EXTRACT_THRIFT_VERSION) - + +# Avoid the debug build linking to the release library by mistake. +# In theory harmless if static linking at this point, but disastrous if done for a shared library. +if(CMAKE_BUILD_TYPE STREQUAL "DEBUG") -+ set(THRIFT_LIB_NAME_DEBUG_SUFFIX d) ++ set(THRIFT_LIB_NAME_DEBUG_SUFFIX "d") +endif() + if(MSVC_TOOLCHAIN AND NOT DEFINED THRIFT_MSVC_LIB_SUFFIX) @@ -133,7 +136,7 @@ index 273d907ed..65f477f54 100644 endif() -set(THRIFT_LIB_NAME_BASE "thrift${THRIFT_MSVC_LIB_SUFFIX}") +set(THRIFT_LIB_NAME_BASE "thrift${THRIFT_MSVC_LIB_SUFFIX}${THRIFT_LIB_NAME_DEBUG_SUFFIX}") - + if(ARROW_THRIFT_USE_SHARED) set(THRIFT_LIB_NAMES thrift) @@ -84,8 +90,8 @@ else() @@ -145,7 +148,7 @@ index 273d907ed..65f477f54 100644 + #pkg_check_modules(THRIFT_PC thrift) + if(0) # Do not use pkg_check_modules, as it finds the wrong location (an intermediate build dir). set(THRIFT_INCLUDE_DIR "${THRIFT_PC_INCLUDEDIR}") - + list(APPEND THRIFT_PC_LIBRARY_DIRS "${THRIFT_PC_LIBDIR}") @@ -101,8 +107,7 @@ else() set(THRIFT_VERSION ${THRIFT_PC_VERSION}) @@ -157,26 +160,3 @@ index 273d907ed..65f477f54 100644 find_path(THRIFT_INCLUDE_DIR thrift/Thrift.h PATH_SUFFIXES "include") find_program(THRIFT_COMPILER thrift PATH_SUFFIXES "bin") extract_thrift_version() -diff --git a/cpp/cmake_modules/Findzstd.cmake b/cpp/cmake_modules/Findzstd.cmake -index 6659a682d..d8cc4f72d 100644 ---- a/cpp/cmake_modules/Findzstd.cmake -+++ b/cpp/cmake_modules/Findzstd.cmake -@@ -34,13 +34,14 @@ if(ARROW_ZSTD_USE_SHARED) - ZSTD_LIB_NAMES - "${CMAKE_SHARED_LIBRARY_PREFIX}${ZSTD_LIB_NAME_BASE}${CMAKE_SHARED_LIBRARY_SUFFIX}") - else() -- if(MSVC AND NOT DEFINED ZSTD_MSVC_STATIC_LIB_SUFFIX) -- set(ZSTD_MSVC_STATIC_LIB_SUFFIX "_static") -+ if(MSVC AND CMAKE_BUILD_TYPE STREQUAL "DEBUG") -+ set(ZSTD_MSVC_DEBUG_LIB_SUFFIX d) - endif() - set(ZSTD_STATIC_LIB_SUFFIX -- "${ZSTD_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}") -+ "${ZSTD_MSVC_DEBUG_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}") - set(ZSTD_LIB_NAMES -- "${CMAKE_STATIC_LIBRARY_PREFIX}${ZSTD_LIB_NAME_BASE}${ZSTD_STATIC_LIB_SUFFIX}") -+ "zstd${ZSTD_STATIC_LIB_SUFFIX}" -+ "libzstd${ZSTD_STATIC_LIB_SUFFIX}") - endif() - - # First, find via if specified ZTD_ROOT diff --git a/ports/arrow/portfile.cmake b/ports/arrow/portfile.cmake index a36dad1c1d..fb9e05bc69 100644 --- a/ports/arrow/portfile.cmake +++ b/ports/arrow/portfile.cmake @@ -3,53 +3,70 @@ vcpkg_fail_port_install(ON_ARCH "x86" "arm" "arm64") vcpkg_from_github( OUT_SOURCE_PATH SOURCE_PATH REPO apache/arrow - REF apache-arrow-3.0.0 - SHA512 02645be0eaaaa69880ab911fc0b74665ebf52a35f9ad05210b23e7b42bcfbe3c3a4d44fa6c4c35af74764efbe528c2e0ebf0549ce5890c796be695ceb94e5606 + REF apache-arrow-4.0.0 + SHA512 4697a32004d02a519b8a8e899ed3cd981ae3485e6d34071436051080d6c84e25ad0bc568b3e52effe0a9204756da3d6e560a2037df06d2730dccd19c6b4c8027 HEAD_REF master PATCHES all.patch ) -string(COMPARE EQUAL ${VCPKG_LIBRARY_LINKAGE} "dynamic" ARROW_BUILD_SHARED) -string(COMPARE EQUAL ${VCPKG_LIBRARY_LINKAGE} "static" ARROW_BUILD_STATIC) - vcpkg_check_features(OUT_FEATURE_OPTIONS FEATURE_OPTIONS - "csv" ARROW_CSV - "json" ARROW_JSON - "parquet" ARROW_PARQUET - "filesystem" ARROW_FILESYSTEM + FEATURES + csv ARROW_CSV + dataset ARROW_DATASET + filesystem ARROW_FILESYSTEM + flight ARROW_FLIGHT + json ARROW_JSON + orc ARROW_ORC + parquet ARROW_PARQUET + parquet PARQUET_REQUIRE_ENCRYPTION + s3 ARROW_S3 ) -file(REMOVE "${SOURCE_PATH}/cpp/cmake_modules/FindZSTD.cmake") +if(VCPKG_TARGET_IS_WINDOWS OR VCPKG_TARGET_IS_UWP) + set(MALLOC_OPTIONS -DARROW_JEMALLOC=OFF) +elseif("jemalloc" IN_LIST FEATURES) + set(MALLOC_OPTIONS -DARROW_JEMALLOC=ON) +else() + set(MALLOC_OPTIONS -DARROW_JEMALLOC=OFF) +endif() + +if(VCPKG_TARGET_IS_WINDOWS AND ("mimalloc" IN_LIST FEATURES)) + set(MALLOC_OPTIONS ${MALLOC_OPTIONS} -DARROW_MIMALLOC=ON) +else() + set(MALLOC_OPTIONS ${MALLOC_OPTIONS} -DARROW_MIMALLOC=OFF) +endif() + +string(COMPARE EQUAL ${VCPKG_LIBRARY_LINKAGE} "dynamic" ARROW_BUILD_SHARED) +string(COMPARE EQUAL ${VCPKG_LIBRARY_LINKAGE} "static" ARROW_BUILD_STATIC) +string(COMPARE EQUAL ${VCPKG_LIBRARY_LINKAGE} "dynamic" ARROW_DEPENDENCY_USE_SHARED) + +if(VCPKG_TARGET_IS_WINDOWS) + set(THRIFT_USE_SHARED OFF) +else() + set(THRIFT_USE_SHARED ${ARROW_DEPENDENCY_USE_SHARED}) +endif() vcpkg_configure_cmake( SOURCE_PATH ${SOURCE_PATH}/cpp PREFER_NINJA OPTIONS - -DARROW_DEPENDENCY_SOURCE=SYSTEM - -Duriparser_SOURCE=SYSTEM - -DARROW_BUILD_TESTS=OFF ${FEATURE_OPTIONS} - -DARROW_BUILD_STATIC=${ARROW_BUILD_STATIC} + ${MALLOC_OPTIONS} -DARROW_BUILD_SHARED=${ARROW_BUILD_SHARED} - -DARROW_BROTLI_USE_SHARED=${ARROW_BUILD_SHARED} # This can be wrong in custom triplets - -DARROW_GFLAGS_USE_SHARED=${ARROW_BUILD_SHARED} # This can be wrong in custom triplets - -DARROW_LZ4_USE_SHARED=${ARROW_BUILD_SHARED} # This can be wrong in custom triplets - -DARROW_SNAPPY_USE_SHARED=${ARROW_BUILD_SHARED} # This can be wrong in custom triplets - -DARROW_THRIFT_USE_SHARED=OFF # vcpkg doesn't build Thrift as a shared library for the moment (2020/01/22). - -DARROW_UTF8PROC_USE_SHARED=${ARROW_BUILD_SHARED} # This can be wrong in custom triplets - -DARROW_ZSTD_USE_SHARED=${ARROW_BUILD_SHARED} # This can be wrong in custom triplets - -DARROW_JEMALLOC=OFF - -DARROW_BUILD_UTILITIES=OFF + -DARROW_BUILD_STATIC=${ARROW_BUILD_STATIC} + -DARROW_BUILD_TESTS=OFF + -DARROW_DEPENDENCY_SOURCE=SYSTEM + -DARROW_DEPENDENCY_USE_SHARED=${ARROW_DEPENDENCY_USE_SHARED} + -DARROW_THRIFT_USE_SHARED=${THRIFT_USE_SHARED} + -DBUILD_WARNING_LEVEL=PRODUCTION + -DARROW_WITH_BROTLI=ON -DARROW_WITH_BZ2=ON - -DARROW_WITH_ZLIB=ON - -DARROW_WITH_ZSTD=ON -DARROW_WITH_LZ4=ON -DARROW_WITH_SNAPPY=ON - -DARROW_WITH_BROTLI=ON - -DARROW_WITH_UTF8PROC=ON - -DPARQUET_REQUIRE_ENCRYPTION=ON - -DBUILD_WARNING_LEVEL=PRODUCTION + -DARROW_WITH_ZLIB=ON + -DARROW_WITH_ZSTD=ON + -DZSTD_MSVC_LIB_PREFIX= ) vcpkg_install_cmake() diff --git a/ports/arrow/vcpkg.json b/ports/arrow/vcpkg.json new file mode 100644 index 0000000000..51755afd71 --- /dev/null +++ b/ports/arrow/vcpkg.json @@ -0,0 +1,88 @@ +{ + "name": "arrow", + "version": "4.0.0", + "description": "Cross-language development platform for in-memory analytics", + "homepage": "https://arrow.apache.org", + "supports": "x64", + "dependencies": [ + "boost-filesystem", + "boost-multiprecision", + "boost-system", + "brotli", + "bzip2", + "gflags", + "glog", + "lz4", + "openssl", + "re2", + "snappy", + "thrift", + "utf8proc", + "zlib", + "zstd" + ], + "default-features": [ + "csv", + "filesystem", + "json", + "parquet" + ], + "features": { + "csv": { + "description": "CSV support" + }, + "dataset": { + "description": "Dataset support" + }, + "filesystem": { + "description": "Filesystem support" + }, + "flight": { + "description": "Arrow Flight RPC support", + "dependencies": [ + "abseil", + "c-ares", + "grpc", + "protobuf" + ] + }, + "jemalloc": { + "description": "jemalloc allocator" + }, + "json": { + "description": "JSON support", + "dependencies": [ + "rapidjson" + ] + }, + "mimalloc": { + "description": "mimalloc allocator" + }, + "orc": { + "description": "ORC support", + "dependencies": [ + "orc" + ] + }, + "parquet": { + "description": "Parquet support" + }, + "s3": { + "description": "S3 support", + "dependencies": [ + { + "name": "aws-sdk-cpp", + "default-features": false, + "features": [ + "cognito-identity", + "config", + "identity-management", + "s3", + "sts", + "transfer" + ] + } + ] + } + } +} diff --git a/versions/a-/arrow.json b/versions/a-/arrow.json index d795d327ad..503dbef985 100644 --- a/versions/a-/arrow.json +++ b/versions/a-/arrow.json @@ -1,5 +1,10 @@ { "versions": [ + { + "git-tree": "2066704ab2b0b5977bcd5677ea1378fac0b0555c", + "version": "4.0.0", + "port-version": 0 + }, { "git-tree": "bc9cb096ffa223f2af620db18ed244d17e47ffe7", "version-string": "3.0.0", diff --git a/versions/baseline.json b/versions/baseline.json index 4ff7df52a6..c46e2c457b 100644 --- a/versions/baseline.json +++ b/versions/baseline.json @@ -157,7 +157,7 @@ "port-version": 3 }, "arrow": { - "baseline": "3.0.0", + "baseline": "4.0.0", "port-version": 0 }, "ashes": {