Files
llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt

110 lines
2.8 KiB
CMake

find_package(OpenCL REQUIRED)
find_package(Python3 REQUIRED)
set(TARGET_NAME ggml-opencl)
ggml_add_backend_library(${TARGET_NAME}
ggml-opencl.cpp
../../include/ggml-opencl.h)
target_link_libraries(${TARGET_NAME} PRIVATE ${OpenCL_LIBRARIES})
target_include_directories(${TARGET_NAME} PRIVATE ${OpenCL_INCLUDE_DIRS})
if (GGML_OPENCL_PROFILING)
message(STATUS "OpenCL profiling enabled (increases CPU overhead)")
add_compile_definitions(GGML_OPENCL_PROFILING)
endif ()
add_compile_definitions(GGML_OPENCL_SOA_Q)
add_compile_definitions(GGML_OPENCL_TARGET_VERSION=${GGML_OPENCL_TARGET_VERSION})
if (GGML_OPENCL_USE_ADRENO_KERNELS)
message(STATUS "OpenCL will use matmul kernels optimized for Adreno")
add_compile_definitions(GGML_OPENCL_USE_ADRENO_KERNELS)
endif ()
if (GGML_OPENCL_EMBED_KERNELS)
add_compile_definitions(GGML_OPENCL_EMBED_KERNELS)
set(EMBED_KERNEL_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/kernels/embed_kernel.py")
file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/autogenerated")
target_include_directories(${TARGET_NAME} PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/autogenerated")
endif ()
function(ggml_opencl_add_kernel KNAME)
set(KERN_HDR ${CMAKE_CURRENT_BINARY_DIR}/autogenerated/${KNAME}.cl.h)
set(KERN_SRC ${CMAKE_CURRENT_SOURCE_DIR}/kernels/${KNAME}.cl)
if (GGML_OPENCL_EMBED_KERNELS)
message(STATUS "opencl: embedding kernel ${KNAME}")
# Python must be accessible from command line
add_custom_command(
OUTPUT ${KERN_HDR}
COMMAND ${Python3_EXECUTABLE} ${EMBED_KERNEL_SCRIPT} ${KERN_SRC} ${KERN_HDR}
DEPENDS ${KERN_SRC} ${EMBED_KERNEL_SCRIPT}
COMMENT "Generate ${KERN_HDR}"
)
target_sources(${TARGET_NAME} PRIVATE ${KERN_HDR})
else ()
message(STATUS "opencl: adding kernel ${KNAME}")
configure_file(${KERN_SRC} ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${KNAME}.cl COPYONLY)
endif ()
endfunction()
set(GGML_OPENCL_KERNELS
add
argsort
clamp
cpy
cvt
diag_mask_inf
div
gelu
gemv_noshuffle_general
gemv_noshuffle
get_rows
group_norm
im2col_f32
im2col_f16
mul_mat_Ab_Bi_8x4
mul_mv_f16_f16
mul_mv_f16_f32_1row
mul_mv_f16_f32_l4
mul_mv_f16_f32
mul_mv_f32_f32
mul_mv_q4_0_f32
mul_mv_q4_0_f32_v
mul_mv_q4_0_f32_8x_flat
mul_mv_q4_0_f32_1d_8x_flat
mul_mv_q4_0_f32_1d_16x_flat
mul_mv_q6_k
mul_mv_id_q4_0_f32_8x_flat
mul
norm
relu
rms_norm
rope
scale
sigmoid
silu
softmax_4_f32
softmax_4_f16
softmax_f32
softmax_f16
sub
sum_rows
transpose
concat
tsembd
upscale
tanh
pad
repeat
)
foreach (K ${GGML_OPENCL_KERNELS})
ggml_opencl_add_kernel(${K})
endforeach()