mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-27 12:05:03 +00:00
ggml : riscv: add 128-bit RVV support (#12530)
* ggml : add 128-bit RVV support * ggml : revert to old RVV 256+ q2_K, q3_K, q4_K, q6_K impl * remove trailing whitespaces * restructure vector length selection code
This commit is contained in:
@ -123,6 +123,7 @@ endif()
|
|||||||
option(GGML_LASX "ggml: enable lasx" ON)
|
option(GGML_LASX "ggml: enable lasx" ON)
|
||||||
option(GGML_LSX "ggml: enable lsx" ON)
|
option(GGML_LSX "ggml: enable lsx" ON)
|
||||||
option(GGML_RVV "ggml: enable rvv" ON)
|
option(GGML_RVV "ggml: enable rvv" ON)
|
||||||
|
option(GGML_RV_ZFH "ggml: enable riscv zfh" OFF)
|
||||||
option(GGML_VXE "ggml: enable vxe" ON)
|
option(GGML_VXE "ggml: enable vxe" ON)
|
||||||
|
|
||||||
option(GGML_CPU_ALL_VARIANTS "ggml: build all variants of the CPU backend (requires GGML_BACKEND_DL)" OFF)
|
option(GGML_CPU_ALL_VARIANTS "ggml: build all variants of the CPU backend (requires GGML_BACKEND_DL)" OFF)
|
||||||
|
@ -320,7 +320,11 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|||||||
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "riscv64")
|
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "riscv64")
|
||||||
message(STATUS "RISC-V detected")
|
message(STATUS "RISC-V detected")
|
||||||
if (GGML_RVV)
|
if (GGML_RVV)
|
||||||
list(APPEND ARCH_FLAGS -march=rv64gcv -mabi=lp64d)
|
if (GGML_RV_ZFH)
|
||||||
|
list(APPEND ARCH_FLAGS -march=rv64gcv_zfhmin -DGGML_RV_ZFH -mabi=lp64d)
|
||||||
|
else()
|
||||||
|
list(APPEND ARCH_FLAGS -march=rv64gcv -mabi=lp64d)
|
||||||
|
endif()
|
||||||
endif()
|
endif()
|
||||||
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "s390x")
|
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "s390x")
|
||||||
message(STATUS "s390x detected")
|
message(STATUS "s390x detected")
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -381,6 +381,35 @@ GGML_API void ggml_aligned_free(void * ptr, size_t size);
|
|||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#elif defined(__riscv) && defined(GGML_RV_ZFH)
|
||||||
|
|
||||||
|
static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
|
||||||
|
float f;
|
||||||
|
__asm__(
|
||||||
|
"fmv.h.x %[f], %[h]\n\t"
|
||||||
|
"fcvt.s.h %[f], %[f]"
|
||||||
|
: [f] "=&f" (f)
|
||||||
|
: [h] "r" (h)
|
||||||
|
);
|
||||||
|
return f;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
|
||||||
|
ggml_fp16_t res;
|
||||||
|
__asm__(
|
||||||
|
"fcvt.h.s %[f], %[f]\n\t"
|
||||||
|
"fmv.x.h %[h], %[f]"
|
||||||
|
: [h] "=&r" (res)
|
||||||
|
: [f] "f" (f)
|
||||||
|
);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
|
||||||
|
#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
|
||||||
|
#define GGML_FP16_TO_FP32(x) GGML_COMPUTE_FP16_TO_FP32(x)
|
||||||
|
#define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
// FP16 <-> FP32
|
// FP16 <-> FP32
|
||||||
|
Reference in New Issue
Block a user