Speedup the AVX-512 implementation of ggml_vec_dot_q4_0() (#933)

This commit is contained in:
Ivan Komarov
2023-04-17 15:10:57 +02:00
committed by GitHub
parent 47f61aaa5f
commit f266259ad9
4 changed files with 235 additions and 44 deletions

View File

@ -55,6 +55,8 @@ option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer"
option(LLAMA_AVX "llama: enable AVX" ON)
option(LLAMA_AVX2 "llama: enable AVX2" ON)
option(LLAMA_AVX512 "llama: enable AVX512" OFF)
option(LLAMA_AVX512_VBMI "llama: enable AVX512-VBMI" OFF)
option(LLAMA_AVX512_VNNI "llama: enable AVX512-VNNI" OFF)
option(LLAMA_FMA "llama: enable FMA" ON)
# in MSVC F16C is implied with AVX2/AVX512
if (NOT MSVC)
@ -220,6 +222,16 @@ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$")
if (MSVC)
if (LLAMA_AVX512)
add_compile_options(/arch:AVX512)
# MSVC has no compile-time flags enabling specific
# AVX512 extensions, neither it defines the
# macros corresponding to the extensions.
# Do it manually.
if (LLAMA_AVX512_VBMI)
add_compile_definitions(__AVX512VBMI__)
endif()
if (LLAMA_AVX512_VNNI)
add_compile_definitions(__AVX512VNNI__)
endif()
elseif (LLAMA_AVX2)
add_compile_options(/arch:AVX2)
elseif (LLAMA_AVX)
@ -240,9 +252,13 @@ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$")
endif()
if (LLAMA_AVX512)
add_compile_options(-mavx512f)
# add_compile_options(-mavx512cd)
# add_compile_options(-mavx512dq)
# add_compile_options(-mavx512bw)
add_compile_options(-mavx512bw)
endif()
if (LLAMA_AVX512_VBMI)
add_compile_options(-mavx512vbmi)
endif()
if (LLAMA_AVX512_VNNI)
add_compile_options(-mavx512vnni)
endif()
endif()
else()