mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-26 11:45:21 +00:00
move BLAS to a separate backend (#6210)
* move BLAS to a separate backend * rename GGML_USE_OPENBLAS to GGML_USE_BLAS * alloc : reuse same buffer when the same buffer type if used multiple times * set number of threads automatically for openblas and blis * sched : print assignments when GGML_SCHED_DEBUG env variable is set * sched : allow ops with weights on an incompatible buffer type This will cause the weight to be copied to a backend that supports the op, which is very costly. The weight should have been stored in a buffer of a backend that can run the op, but llama.cpp cannot do this automatically at the moment. --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
@ -39,8 +39,12 @@ endif()
|
||||
|
||||
if (APPLE)
|
||||
set(LLAMA_METAL_DEFAULT ON)
|
||||
set(LLAMA_BLAS_DEFAULT ON)
|
||||
set(LLAMA_BLAS_VENDOR_DEFAULT "Apple")
|
||||
else()
|
||||
set(LLAMA_METAL_DEFAULT OFF)
|
||||
set(LLAMA_BLAS_DEFAULT OFF)
|
||||
set(LLAMA_BLAS_VENDOR_DEFAULT "Generic")
|
||||
endif()
|
||||
|
||||
set(LLAMA_LLAMAFILE_DEFAULT ON)
|
||||
@ -91,9 +95,10 @@ endif()
|
||||
|
||||
# 3rd party libs
|
||||
option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON)
|
||||
option(LLAMA_BLAS "llama: use BLAS" OFF)
|
||||
option(LLAMA_BLAS "llama: use BLAS" ${LLAMA_BLAS_DEFAULT})
|
||||
set(LLAMA_BLAS_VENDOR ${LLAMA_BLAS_VENDOR_DEFAULT} CACHE STRING
|
||||
"llama: BLAS library vendor")
|
||||
option(LLAMA_LLAMAFILE "llama: use llamafile SGEMM" ${LLAMA_LLAMAFILE_DEFAULT})
|
||||
set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor")
|
||||
option(LLAMA_CUDA "llama: use CUDA" OFF)
|
||||
option(LLAMA_CUBLAS "llama: use CUDA (deprecated, use LLAMA_CUDA)" OFF)
|
||||
option(LLAMA_CUDA_FORCE_DMMV "llama: use dmmv instead of mmvq CUDA kernels" OFF)
|
||||
@ -311,9 +316,9 @@ if (LLAMA_BLAS)
|
||||
if (LLAMA_STATIC)
|
||||
set(BLA_STATIC ON)
|
||||
endif()
|
||||
if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.22)
|
||||
set(BLA_SIZEOF_INTEGER 8)
|
||||
endif()
|
||||
#if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.22)
|
||||
# set(BLA_SIZEOF_INTEGER 8)
|
||||
#endif()
|
||||
|
||||
set(BLA_VENDOR ${LLAMA_BLAS_VENDOR})
|
||||
find_package(BLAS)
|
||||
@ -321,7 +326,7 @@ if (LLAMA_BLAS)
|
||||
if (BLAS_FOUND)
|
||||
message(STATUS "BLAS found, Libraries: ${BLAS_LIBRARIES}")
|
||||
|
||||
if ("${BLAS_INCLUDE_DIRS}" STREQUAL "")
|
||||
if (("${BLAS_INCLUDE_DIRS}" STREQUAL "") AND NOT (${LLAMA_BLAS_VENDOR} MATCHES "Apple"))
|
||||
# BLAS_INCLUDE_DIRS is missing in FindBLAS.cmake.
|
||||
# see https://gitlab.kitware.com/cmake/cmake/-/issues/20268
|
||||
find_package(PkgConfig REQUIRED)
|
||||
@ -374,12 +379,15 @@ if (LLAMA_BLAS)
|
||||
|
||||
add_compile_options(${BLAS_LINKER_FLAGS})
|
||||
|
||||
add_compile_definitions(GGML_USE_OPENBLAS)
|
||||
add_compile_definitions(GGML_USE_BLAS)
|
||||
|
||||
if (${BLAS_INCLUDE_DIRS} MATCHES "mkl" AND (${LLAMA_BLAS_VENDOR} MATCHES "Generic" OR ${LLAMA_BLAS_VENDOR} MATCHES "Intel"))
|
||||
add_compile_definitions(GGML_BLAS_USE_MKL)
|
||||
endif()
|
||||
|
||||
set(GGML_HEADERS_BLAS ggml-blas.h)
|
||||
set(GGML_SOURCES_BLAS ggml-blas.cpp)
|
||||
|
||||
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${BLAS_LIBRARIES})
|
||||
set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${BLAS_INCLUDE_DIRS})
|
||||
else()
|
||||
@ -1258,6 +1266,7 @@ add_library(ggml OBJECT
|
||||
${GGML_SOURCES_KOMPUTE} ${GGML_HEADERS_KOMPUTE}
|
||||
${GGML_SOURCES_VULKAN} ${GGML_HEADERS_VULKAN}
|
||||
${GGML_SOURCES_ROCM} ${GGML_HEADERS_ROCM}
|
||||
${GGML_SOURCES_BLAS} ${GGML_HEADERS_BLAS}
|
||||
${GGML_SOURCES_LLAMAFILE} ${GGML_HEADERS_LLAMAFILE}
|
||||
)
|
||||
|
||||
|
Reference in New Issue
Block a user