From 265921b4a307d71bfc408b8ab927501d79d77973 Mon Sep 17 00:00:00 2001 From: contre Date: Wed, 24 Jul 2019 10:58:19 -0700 Subject: [PATCH] [pcl] Fix cuda building compatability issues with cuda 10.1 (#7388) --- ports/pcl/CONTROL | 2 +- ports/pcl/cuda_10_1.patch | 133 ++++++++++++++++++++++++++++++++++++++ ports/pcl/portfile.cmake | 1 + 3 files changed, 135 insertions(+), 1 deletion(-) create mode 100644 ports/pcl/cuda_10_1.patch diff --git a/ports/pcl/CONTROL b/ports/pcl/CONTROL index 1a9852c8e9..41714a66aa 100644 --- a/ports/pcl/CONTROL +++ b/ports/pcl/CONTROL @@ -1,5 +1,5 @@ Source: pcl -Version: 1.9.1-4 +Version: 1.9.1-5 Homepage: https://github.com/PointCloudLibrary/pcl Description: Point Cloud Library (PCL) is open source library for 2D/3D image and point cloud processing. Build-Depends: eigen3, flann, qhull, vtk, libpng, boost-system, boost-filesystem, boost-thread, boost-date-time, boost-iostreams, boost-random, boost-foreach, boost-dynamic-bitset, boost-property-map, boost-graph, boost-multi-array, boost-signals2, boost-ptr-container, boost-uuid, boost-interprocess, boost-asio diff --git a/ports/pcl/cuda_10_1.patch b/ports/pcl/cuda_10_1.patch new file mode 100644 index 0000000000..4b46149a4b --- /dev/null +++ b/ports/pcl/cuda_10_1.patch @@ -0,0 +1,133 @@ +diff --git a/cuda/common/include/pcl/cuda/thrust.h b/cuda/common/include/pcl/cuda/thrust.h +index 57586ab..af073d7 100644 +--- a/cuda/common/include/pcl/cuda/thrust.h ++++ b/cuda/common/include/pcl/cuda/thrust.h +@@ -42,6 +42,7 @@ + + #include + #include ++#include + #include + #include + #include +diff --git a/gpu/features/src/fpfh.cu b/gpu/features/src/fpfh.cu +index 8d34f76..59a5f0c 100644 +--- a/gpu/features/src/fpfh.cu ++++ b/gpu/features/src/fpfh.cu +@@ -231,12 +231,12 @@ namespace pcl + int *sinds = sindices + Warp::WARP_SIZE * warp_idx; + int size = sizes[idx]; + +- for(int i = lane; __any(i < size); i += Warp::STRIDE) ++ for(int i = lane; __any_sync(0xFFFFFFFF, i < size); i += Warp::STRIDE) + { + if (i < size) + sinds[lane] = ginds[i]; + +- int inds_num = __popc(__ballot(i < size)); ++ int inds_num = __popc(__ballot_sync(0xFFFFFFFF, i < size)); + + for(int j = 0; j < inds_num; ++j) + { +diff --git a/gpu/octree/src/cuda/approx_nsearch.cu b/gpu/octree/src/cuda/approx_nsearch.cu +index e457255..3e1adfe 100644 +--- a/gpu/octree/src/cuda/approx_nsearch.cu ++++ b/gpu/octree/src/cuda/approx_nsearch.cu +@@ -141,7 +141,7 @@ namespace pcl { namespace device { namespace appnearest_search + { + __shared__ volatile int per_warp_buffer[KernelPolicy::WARPS_COUNT]; + +- int mask = __ballot(node_idx != -1); ++ int mask = __ballot_sync(0xFFFFFFFF, node_idx != -1); + + while(mask) + { +@@ -275,7 +275,7 @@ namespace pcl { namespace device { namespace appnearest_search + + bool active = query_index < batch.queries_num; + +- if (__all(active == false)) ++ if (__all_sync(0xFFFFFFFF, active == false)) + return; + + Warp_appNearestSearch search(batch, query_index); +diff --git a/gpu/octree/src/cuda/knn_search.cu b/gpu/octree/src/cuda/knn_search.cu +index a99655d..b55e3c1 100644 +--- a/gpu/octree/src/cuda/knn_search.cu ++++ b/gpu/octree/src/cuda/knn_search.cu +@@ -106,7 +106,7 @@ namespace pcl { namespace device { namespace knn_search + else + query_index = -1; + +- while(__any(active)) ++ while(__any_sync(0xFFFFFFFF, active)) + { + int leaf = -1; + +@@ -163,7 +163,7 @@ namespace pcl { namespace device { namespace knn_search + + __device__ __forceinline__ void processLeaf(int node_idx) + { +- int mask = __ballot(node_idx != -1); ++ int mask = __ballot_sync(0xFFFFFFFF, node_idx != -1); + + unsigned int laneId = Warp::laneId(); + unsigned int warpId = Warp::id(); +@@ -310,7 +310,7 @@ namespace pcl { namespace device { namespace knn_search + + bool active = query_index < batch.queries_num; + +- if (__all(active == false)) ++ if (__all_sync(0xFFFFFFFF, active == false)) + return; + + Warp_knnSearch search(batch, query_index); +diff --git a/gpu/octree/src/cuda/radius_search.cu b/gpu/octree/src/cuda/radius_search.cu +index f90273f..8ae84e7 100644 +--- a/gpu/octree/src/cuda/radius_search.cu ++++ b/gpu/octree/src/cuda/radius_search.cu +@@ -155,7 +155,7 @@ namespace pcl + else + query_index = -1; + +- while(__any(active)) ++ while(__any_sync(0xFFFFFFFF, active)) + { + int leaf = -1; + +@@ -217,7 +217,7 @@ namespace pcl + + __device__ __forceinline__ void processLeaf(int leaf) + { +- int mask = __ballot(leaf != -1); ++ int mask = __ballot_sync(0xFFFFFFFF, leaf != -1); + + while(mask) + { +@@ -255,7 +255,7 @@ namespace pcl + int *out = batch.output + active_query_index * batch.max_results + active_found_count; + int length_left = batch.max_results - active_found_count; + +- int test = __any(active_lane == laneId && (leaf & KernelPolicy::CHECK_FLAG)); ++ int test = __any_sync(0xFFFFFFFF, active_lane == laneId && (leaf & KernelPolicy::CHECK_FLAG)); + + if (test) + { +@@ -329,7 +329,7 @@ namespace pcl + total_new += new_nodes; + out += new_nodes; + +- if (__all(idx >= length) || __any(out_of_bounds) || total_new == length_left) ++ if (__all_sync(0xFFFFFFFF, idx >= length) || __any_sync(0xFFFFFFFF, out_of_bounds) || total_new == length_left) + break; + } + return min(total_new, length_left); +@@ -343,7 +343,7 @@ namespace pcl + + bool active = query_index < batch.queries.size; + +- if (__all(active == false)) ++ if (__all_sync(0xFFFFFFFF, active == false)) + return; + + Warp_radiusSearch search(batch, query_index); diff --git a/ports/pcl/portfile.cmake b/ports/pcl/portfile.cmake index b30ceb0d8e..b855831e9d 100644 --- a/ports/pcl/portfile.cmake +++ b/ports/pcl/portfile.cmake @@ -11,6 +11,7 @@ vcpkg_from_github( pcl_config.patch use_flann_targets.patch boost-1.70.patch + cuda_10_1.patch ) file(REMOVE ${SOURCE_PATH}/cmake/Modules/FindFLANN.cmake)