ggml: move ggml_table_f32_f16 to ggml-cpu

ref: https://github.com/ggml-org/llama.cpp/pull/14317#discussion_r2164775006 Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
2025-06-29 04:35:05 +00:00 · 2025-06-25 14:57:58 +08:00
parent 1b23fec005
commit 9e40d984ad
4 changed files with 5 additions and 25 deletions
--- a/ggml/src/ggml-cpu/ggml-cpu.c
+++ b/ggml/src/ggml-cpu/ggml-cpu.c
@ -3479,6 +3479,7 @@ void ggml_cpu_init(void) {
                    ggml_fp16_t fp16;
                } u = {i};
                float f = GGML_CPU_FP16_TO_FP32(u.fp16);
                ggml_table_f32_f16[i] = GGML_COMPUTE_FP16_TO_FP32(u.fp16);
                ggml_table_gelu_f16[i] = GGML_CPU_FP32_TO_FP16(ggml_gelu_f32(f));
                ggml_table_gelu_quick_f16[i] = GGML_CPU_FP32_TO_FP16(ggml_gelu_quick_f32(f));
            }
--- a/ggml/src/ggml-cpu/simd-mappings.h
+++ b/ggml/src/ggml-cpu/simd-mappings.h
@ -137,6 +137,10 @@
    }
 #endif
 // precomputed f32 table for f16 (256 KB)
 // defined in ggml.c, initialized in ggml_init()
 GGML_API float ggml_table_f32_f16[1 << 16];
 // On ARM NEON, it's quicker to directly convert x -> x instead of calling into ggml_lookup_fp16_to_fp32,
 // so we define GGML_CPU_FP16_TO_FP32 and GGML_CPU_FP32_TO_FP16 elsewhere for NEON.
 // This is also true for POWER9.
--- a/ggml/src/ggml-impl.h
+++ b/ggml/src/ggml-impl.h
@ -393,10 +393,6 @@ static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
 #define GGML_FP16_TO_FP32(x) GGML_COMPUTE_FP16_TO_FP32(x)
 #define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
 // precomputed f32 table for f16 (256 KB)
 // defined in ggml.c, initialized in ggml_init()
 GGML_API float ggml_table_f32_f16[1 << 16];
 /**
 * Converts brain16 to float32.
 *
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@ -1414,27 +1414,6 @@ static inline bool ggml_can_repeat_rows(const struct ggml_tensor * t0, const str
 ////////////////////////////////////////////////////////////////////////////////
 struct ggml_context * ggml_init(struct ggml_init_params params) {
    static bool is_first_call = true;
    ggml_critical_section_start();
    if (is_first_call) {
        // initialize time system (required on Windows)
        ggml_time_init();
        for (int i = 0; i < (1 << 16); ++i) {
            union {
                uint16_t u16;
                ggml_fp16_t fp16;
            } u = {i};
            ggml_table_f32_f16[i] = GGML_COMPUTE_FP16_TO_FP32(u.fp16);
        }
        is_first_call = false;
    }
    ggml_critical_section_end();
    struct ggml_context * ctx = GGML_MALLOC(sizeof(struct ggml_context));
    // allow to call ggml_init with 0 size