ggml: move fp16/bf16 conversion optimizations to CPU backend + export conversion APIs (#13107)

* ggml: dynamic x86_64 feature detection for FP32 <-> FP16/BF16 conversion * move fp converter to ggml-cpu * Switch ggml_compute_forward_get_rows_f16/bf16 to new ggml_cpu_fp16/bf16_to_fp32
2025-06-27 03:55:20 +00:00 · 2025-04-26 22:05:31 +08:00
parent d5fe4e81bd
commit 77d5e9a76a
4 changed files with 101 additions and 50 deletions
--- a/ggml/include/ggml-cpu.h
+++ b/ggml/include/ggml-cpu.h
@ -133,6 +133,11 @@ extern "C" {

    GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cpu_reg(void);

+    GGML_BACKEND_API void ggml_cpu_fp32_to_fp16(const float *, ggml_fp16_t *, int64_t);
+    GGML_BACKEND_API void ggml_cpu_fp16_to_fp32(const ggml_fp16_t *, float *, int64_t);
+    GGML_BACKEND_API void ggml_cpu_fp32_to_bf16(const float *, ggml_bf16_t *, int64_t);
+    GGML_BACKEND_API void ggml_cpu_bf16_to_fp32(const ggml_bf16_t *, float *, int64_t);
+
 #ifdef __cplusplus
 }
 #endif