mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-07-26 03:03:25 -04:00
metal : support bcast add & dup & cont op (#2323)
This commit is contained in:
@ -67,6 +67,17 @@ kernel void kernel_add(
|
||||
dst[tpig] = src0[tpig] + src1[tpig];
|
||||
}
|
||||
|
||||
// assumption: src1 is a row
|
||||
// broadcast src1 into src0
|
||||
kernel void kernel_add_row(
|
||||
device const float * src0,
|
||||
device const float * src1,
|
||||
device float * dst,
|
||||
constant int64_t & ne00,
|
||||
uint tpig[[thread_position_in_grid]]) {
|
||||
dst[tpig] = src0[tpig] + src1[tpig % ne00];
|
||||
}
|
||||
|
||||
kernel void kernel_mul(
|
||||
device const float * src0,
|
||||
device const float * src1,
|
||||
|
Reference in New Issue
Block a user