mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-08-13 11:57:43 -04:00
Vulkan Optimizations and Fixes (#8959)
* Optimize Vulkan REPEAT performance * Use Vulkan GLSL fused multiply-add instruction where possible * Add GGML_VULKAN_PERF option to output performance data per operator * Rework and fix Vulkan descriptor set and descriptor pool handling * Fix float32 concat f16 shader validation error * Add Vulkan GROUP_NORM eps parameter * Fix validation error with transfer queue memory barrier flags * Remove trailing whitespaces
This commit is contained in:
@@ -30,6 +30,10 @@ void main() {
|
||||
#ifndef OPTIMIZATION_ERROR_WORKAROUND
|
||||
data_d[p.d_offset + dst_idx] = D_TYPE(is_src0 ? data_a[src0_idx] : data_b[src1_idx]);
|
||||
#else
|
||||
data_d[p.d_offset + dst_idx] = is_src0 ? data_a[src0_idx] : data_b[src1_idx];
|
||||
if (is_src0) {
|
||||
data_d[p.d_offset + dst_idx] = data_a[src0_idx];
|
||||
} else {
|
||||
data_d[p.d_offset + dst_idx] = data_b[src1_idx];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
Reference in New Issue
Block a user