mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-29 04:35:05 +00:00
ggml: move ggml_table_f32_f16 to ggml-cpu
ref: https://github.com/ggml-org/llama.cpp/pull/14317#discussion_r2164775006 Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
This commit is contained in:
@ -3479,6 +3479,7 @@ void ggml_cpu_init(void) {
|
|||||||
ggml_fp16_t fp16;
|
ggml_fp16_t fp16;
|
||||||
} u = {i};
|
} u = {i};
|
||||||
float f = GGML_CPU_FP16_TO_FP32(u.fp16);
|
float f = GGML_CPU_FP16_TO_FP32(u.fp16);
|
||||||
|
ggml_table_f32_f16[i] = GGML_COMPUTE_FP16_TO_FP32(u.fp16);
|
||||||
ggml_table_gelu_f16[i] = GGML_CPU_FP32_TO_FP16(ggml_gelu_f32(f));
|
ggml_table_gelu_f16[i] = GGML_CPU_FP32_TO_FP16(ggml_gelu_f32(f));
|
||||||
ggml_table_gelu_quick_f16[i] = GGML_CPU_FP32_TO_FP16(ggml_gelu_quick_f32(f));
|
ggml_table_gelu_quick_f16[i] = GGML_CPU_FP32_TO_FP16(ggml_gelu_quick_f32(f));
|
||||||
}
|
}
|
||||||
|
@ -137,6 +137,10 @@
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// precomputed f32 table for f16 (256 KB)
|
||||||
|
// defined in ggml.c, initialized in ggml_init()
|
||||||
|
GGML_API float ggml_table_f32_f16[1 << 16];
|
||||||
|
|
||||||
// On ARM NEON, it's quicker to directly convert x -> x instead of calling into ggml_lookup_fp16_to_fp32,
|
// On ARM NEON, it's quicker to directly convert x -> x instead of calling into ggml_lookup_fp16_to_fp32,
|
||||||
// so we define GGML_CPU_FP16_TO_FP32 and GGML_CPU_FP32_TO_FP16 elsewhere for NEON.
|
// so we define GGML_CPU_FP16_TO_FP32 and GGML_CPU_FP32_TO_FP16 elsewhere for NEON.
|
||||||
// This is also true for POWER9.
|
// This is also true for POWER9.
|
||||||
|
@ -393,10 +393,6 @@ static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
|
|||||||
#define GGML_FP16_TO_FP32(x) GGML_COMPUTE_FP16_TO_FP32(x)
|
#define GGML_FP16_TO_FP32(x) GGML_COMPUTE_FP16_TO_FP32(x)
|
||||||
#define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
|
#define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
|
||||||
|
|
||||||
// precomputed f32 table for f16 (256 KB)
|
|
||||||
// defined in ggml.c, initialized in ggml_init()
|
|
||||||
GGML_API float ggml_table_f32_f16[1 << 16];
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Converts brain16 to float32.
|
* Converts brain16 to float32.
|
||||||
*
|
*
|
||||||
|
@ -1414,27 +1414,6 @@ static inline bool ggml_can_repeat_rows(const struct ggml_tensor * t0, const str
|
|||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
struct ggml_context * ggml_init(struct ggml_init_params params) {
|
struct ggml_context * ggml_init(struct ggml_init_params params) {
|
||||||
static bool is_first_call = true;
|
|
||||||
|
|
||||||
ggml_critical_section_start();
|
|
||||||
|
|
||||||
if (is_first_call) {
|
|
||||||
// initialize time system (required on Windows)
|
|
||||||
ggml_time_init();
|
|
||||||
|
|
||||||
for (int i = 0; i < (1 << 16); ++i) {
|
|
||||||
union {
|
|
||||||
uint16_t u16;
|
|
||||||
ggml_fp16_t fp16;
|
|
||||||
} u = {i};
|
|
||||||
ggml_table_f32_f16[i] = GGML_COMPUTE_FP16_TO_FP32(u.fp16);
|
|
||||||
}
|
|
||||||
|
|
||||||
is_first_call = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
ggml_critical_section_end();
|
|
||||||
|
|
||||||
struct ggml_context * ctx = GGML_MALLOC(sizeof(struct ggml_context));
|
struct ggml_context * ctx = GGML_MALLOC(sizeof(struct ggml_context));
|
||||||
|
|
||||||
// allow to call ggml_init with 0 size
|
// allow to call ggml_init with 0 size
|
||||||
|
Reference in New Issue
Block a user