ggml : riscv: add 128-bit RVV support (#12530)

* ggml : add 128-bit RVV support

* ggml : revert to old RVV 256+ q2_K, q3_K, q4_K, q6_K impl

* remove trailing whitespaces

* restructure vector length selection code
This commit is contained in:
xctan
2025-03-27 14:38:34 +08:00
committed by GitHub
parent f28bc4c286
commit 24feaec057
4 changed files with 781 additions and 397 deletions

View File

@ -123,6 +123,7 @@ endif()
option(GGML_LASX "ggml: enable lasx" ON) option(GGML_LASX "ggml: enable lasx" ON)
option(GGML_LSX "ggml: enable lsx" ON) option(GGML_LSX "ggml: enable lsx" ON)
option(GGML_RVV "ggml: enable rvv" ON) option(GGML_RVV "ggml: enable rvv" ON)
option(GGML_RV_ZFH "ggml: enable riscv zfh" OFF)
option(GGML_VXE "ggml: enable vxe" ON) option(GGML_VXE "ggml: enable vxe" ON)
option(GGML_CPU_ALL_VARIANTS "ggml: build all variants of the CPU backend (requires GGML_BACKEND_DL)" OFF) option(GGML_CPU_ALL_VARIANTS "ggml: build all variants of the CPU backend (requires GGML_BACKEND_DL)" OFF)

View File

@ -320,7 +320,11 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "riscv64") elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "riscv64")
message(STATUS "RISC-V detected") message(STATUS "RISC-V detected")
if (GGML_RVV) if (GGML_RVV)
list(APPEND ARCH_FLAGS -march=rv64gcv -mabi=lp64d) if (GGML_RV_ZFH)
list(APPEND ARCH_FLAGS -march=rv64gcv_zfhmin -DGGML_RV_ZFH -mabi=lp64d)
else()
list(APPEND ARCH_FLAGS -march=rv64gcv -mabi=lp64d)
endif()
endif() endif()
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "s390x") elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "s390x")
message(STATUS "s390x detected") message(STATUS "s390x detected")

File diff suppressed because it is too large Load Diff

View File

@ -381,6 +381,35 @@ GGML_API void ggml_aligned_free(void * ptr, size_t size);
return r; return r;
} }
#elif defined(__riscv) && defined(GGML_RV_ZFH)
static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
float f;
__asm__(
"fmv.h.x %[f], %[h]\n\t"
"fcvt.s.h %[f], %[f]"
: [f] "=&f" (f)
: [h] "r" (h)
);
return f;
}
static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
ggml_fp16_t res;
__asm__(
"fcvt.h.s %[f], %[f]\n\t"
"fmv.x.h %[h], %[f]"
: [h] "=&r" (res)
: [f] "f" (f)
);
return res;
}
#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
#define GGML_FP16_TO_FP32(x) GGML_COMPUTE_FP16_TO_FP32(x)
#define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
#else #else
// FP16 <-> FP32 // FP16 <-> FP32