mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-28 12:25:03 +00:00
repo : update links to new url (#11886)
* repo : update links to new url ggml-ci * cont : more urls ggml-ci
This commit is contained in:
@ -1816,7 +1816,7 @@ inline static float ggml_silu_f32(float x) {
|
||||
|
||||
#if __FINITE_MATH_ONLY__
|
||||
#error "some routines in ggml.c require non-finite math arithmetics -- pass -fno-finite-math-only to the compiler to fix"
|
||||
#error "ref: https://github.com/ggerganov/llama.cpp/pull/7154#issuecomment-2143844461"
|
||||
#error "ref: https://github.com/ggml-org/llama.cpp/pull/7154#issuecomment-2143844461"
|
||||
#endif
|
||||
|
||||
#if defined(__ARM_NEON) && defined(__aarch64__)
|
||||
@ -7574,7 +7574,7 @@ UseGgmlGemm2:;
|
||||
int64_t nchunk1 = (nr1 + chunk_size - 1) / chunk_size;
|
||||
|
||||
// If the chunking is poor for the number of threads on this setup, scrap the whole plan. Re-chunk it by thread.
|
||||
// Also, chunking by thread was measured to have perform better on NUMA systems. See https://github.com/ggerganov/llama.cpp/pull/6915
|
||||
// Also, chunking by thread was measured to have perform better on NUMA systems. See https://github.com/ggml-org/llama.cpp/pull/6915
|
||||
// In theory, chunking should be just as useful on NUMA and non NUMA systems, but testing disagreed with that.
|
||||
if (nchunk0 * nchunk1 < nth * 4 || ggml_is_numa()) {
|
||||
// distribute the thread work across the inner or outer loop based on which one is larger
|
||||
|
Reference in New Issue
Block a user