ggml : simplify Arm fp16 CPU logic (ggml/1177)

* ggml : simlpify Arm fp16 CPU logic

ggml-ci

* cont : bring back CUDA/MUSA checks

ggml-ci
This commit is contained in:
Georgi Gerganov
2025-04-07 12:25:15 +03:00
parent 36ca8b3628
commit ff067dbcb9
3 changed files with 23 additions and 42 deletions

View File

@ -16,14 +16,6 @@
#include <arm_sve.h>
#endif // __ARM_FEATURE_SVE
#if defined(__ARM_NEON) && !defined(__CUDACC__) && !defined(__MUSACC__)
// if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example:
//
// $ ln -sfn /Library/Developer/CommandLineTools/usr/lib/clang/13.1.6/include/arm_neon.h ./src/
//
#include <arm_neon.h>
#endif
#if defined(__F16C__)
#include <immintrin.h>
#endif
@ -311,29 +303,35 @@ GGML_API void ggml_aligned_free(void * ptr, size_t size);
// FP16 to FP32 conversion
#if defined(__ARM_NEON)
#if defined(_MSC_VER) || (defined(__CUDACC__) && __CUDACC_VER_MAJOR__ <= 11)
typedef uint16_t ggml_fp16_internal_t;
#else
typedef __fp16 ggml_fp16_internal_t;
#endif
#endif
// 16-bit float
// on Arm, we use __fp16
// on x86, we use uint16_t
//
// for old CUDA compilers (<= 11), we use uint16_t: ref https://github.com/ggml-org/llama.cpp/pull/10616
// for MUSA compilers , we use uint16_t: ref https://github.com/ggml-org/llama.cpp/pull/11843
//
#if defined(__ARM_NEON) && !(defined(__CUDACC__) && __CUDACC_VER_MAJOR__ <= 11) && !defined(__MUSACC__)
// if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example:
//
// $ ln -sfn /Library/Developer/CommandLineTools/usr/lib/clang/13.1.6/include/arm_neon.h ./src/
//
#include <arm_neon.h>
#if defined(__ARM_NEON) && !defined(_MSC_VER) && !(defined(__CUDACC__) && __CUDACC_VER_MAJOR__ <= 11)
#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
#define GGML_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
ggml_fp16_internal_t tmp;
__fp16 tmp;
memcpy(&tmp, &h, sizeof(ggml_fp16_t));
return (float)tmp;
}
static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
ggml_fp16_t res;
ggml_fp16_internal_t tmp = f;
__fp16 tmp = f;
memcpy(&res, &tmp, sizeof(ggml_fp16_t));
return res;
}
@ -485,7 +483,7 @@ GGML_API void ggml_aligned_free(void * ptr, size_t size);
#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
#endif // defined(__ARM_NEON) && (!defined(__MSC_VER)
#endif // defined(__ARM_NEON) && !(defined(__CUDACC__) && __CUDACC_VER_MAJOR__ <= 11) && !defined(__MUSACC__)
// precomputed f32 table for f16 (256 KB)
// defined in ggml.c, initialized in ggml_init()