ggml: move ggml_table_f32_f16 to ggml-cpu

ref: https://github.com/ggml-org/llama.cpp/pull/14317#discussion_r2164775006

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
This commit is contained in:
Aaron Teo
2025-06-25 14:57:58 +08:00
parent 1b23fec005
commit 9e40d984ad
4 changed files with 5 additions and 25 deletions

View File

@ -3479,6 +3479,7 @@ void ggml_cpu_init(void) {
ggml_fp16_t fp16;
} u = {i};
float f = GGML_CPU_FP16_TO_FP32(u.fp16);
ggml_table_f32_f16[i] = GGML_COMPUTE_FP16_TO_FP32(u.fp16);
ggml_table_gelu_f16[i] = GGML_CPU_FP32_TO_FP16(ggml_gelu_f32(f));
ggml_table_gelu_quick_f16[i] = GGML_CPU_FP32_TO_FP16(ggml_gelu_quick_f32(f));
}

View File

@ -137,6 +137,10 @@
}
#endif
// precomputed f32 table for f16 (256 KB)
// defined in ggml.c, initialized in ggml_init()
GGML_API float ggml_table_f32_f16[1 << 16];
// On ARM NEON, it's quicker to directly convert x -> x instead of calling into ggml_lookup_fp16_to_fp32,
// so we define GGML_CPU_FP16_TO_FP32 and GGML_CPU_FP32_TO_FP16 elsewhere for NEON.
// This is also true for POWER9.

View File

@ -393,10 +393,6 @@ static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
#define GGML_FP16_TO_FP32(x) GGML_COMPUTE_FP16_TO_FP32(x)
#define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
// precomputed f32 table for f16 (256 KB)
// defined in ggml.c, initialized in ggml_init()
GGML_API float ggml_table_f32_f16[1 << 16];
/**
* Converts brain16 to float32.
*

View File

@ -1414,27 +1414,6 @@ static inline bool ggml_can_repeat_rows(const struct ggml_tensor * t0, const str
////////////////////////////////////////////////////////////////////////////////
struct ggml_context * ggml_init(struct ggml_init_params params) {
static bool is_first_call = true;
ggml_critical_section_start();
if (is_first_call) {
// initialize time system (required on Windows)
ggml_time_init();
for (int i = 0; i < (1 << 16); ++i) {
union {
uint16_t u16;
ggml_fp16_t fp16;
} u = {i};
ggml_table_f32_f16[i] = GGML_COMPUTE_FP16_TO_FP32(u.fp16);
}
is_first_call = false;
}
ggml_critical_section_end();
struct ggml_context * ctx = GGML_MALLOC(sizeof(struct ggml_context));
// allow to call ggml_init with 0 size