ggml : add ggml_set_rows (#14274)

* ggml : add ggml_set_rows

Add ggml_set_rows(a, b, c) which copies rows from 'b' into 'a' using
indices from 'c'.

ref: #8366

* use I64 for indices

* ggml : add repeat impl for i64

* ggml : add ggml_is_contiguous_rows

* ggml : ggml_set_rows support broadcast

* ggml : ggml_set_rows support quantized dst

ggml-ci

* ggml : support GGML_TYPE_F32 ".from_float" trait

* ggml : ggml_set_rows update comment + better index name

* tests : add ggml_set_rows

* metal : add ggml_set_rows implementation

ggml-ci

* ggml : simplify forward_dup_f32

* ggml : fix supports_op

* tests : add comment to set_rows

* ggml : leave the repeat_i64 for a separate PR

ggml-ci

* ggml : set_rows use std::min instead of MIN

* ggml : better error message for set_rows unsupported type

* metal : perform op->type check only once

* tests : more consistent implementation + more tests

ggml-ci

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
Radoslav Gerganov
2025-06-27 16:41:40 +03:00
committed by GitHub
parent f667f1e624
commit 8d94219a4a
12 changed files with 653 additions and 204 deletions

View File

@@ -933,6 +933,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
"TRANSPOSE",
"GET_ROWS",
"GET_ROWS_BACK",
"SET_ROWS",
"DIAG",
"DIAG_MASK_INF",
"DIAG_MASK_ZERO",
@@ -983,7 +984,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
"OPT_STEP_ADAMW",
};
static_assert(GGML_OP_COUNT == 83, "GGML_OP_COUNT != 83");
static_assert(GGML_OP_COUNT == 84, "GGML_OP_COUNT != 84");
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
"none",
@@ -1029,6 +1030,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
"transpose(x)",
"get_rows(x)",
"get_rows_back(x)",
"set_rows(x)",
"diag(x)",
"diag_mask_inf(x)",
"diag_mask_zero(x)",
@@ -1079,7 +1081,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
"adamw(x)",
};
static_assert(GGML_OP_COUNT == 83, "GGML_OP_COUNT != 83");
static_assert(GGML_OP_COUNT == 84, "GGML_OP_COUNT != 84");
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
@@ -1348,6 +1350,12 @@ bool ggml_is_contiguous_channels(const struct ggml_tensor * tensor) {
tensor->nb[2] == ggml_type_size(tensor->type);
}
bool ggml_is_contiguous_rows(const struct ggml_tensor * tensor) {
return
tensor->ne[0] == ggml_blck_size(tensor->type) ||
tensor->nb[0] == ggml_type_size(tensor->type);
}
static inline bool ggml_is_padded_1d(const struct ggml_tensor * tensor) {
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
@@ -3384,6 +3392,35 @@ struct ggml_tensor * ggml_get_rows_back(
return result;
}
// ggml_set_rows
struct ggml_tensor * ggml_set_rows(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
struct ggml_tensor * c) {
GGML_ASSERT(a->ne[0] == b->ne[0]);
GGML_ASSERT(a->ne[2] == b->ne[2]);
GGML_ASSERT(a->ne[3] == b->ne[3]);
GGML_ASSERT(b->ne[1] == c->ne[0]);
GGML_ASSERT(b->ne[2] % c->ne[1] == 0);
GGML_ASSERT(b->ne[3] % c->ne[2] == 0);
GGML_ASSERT(c->ne[3] == 1);
GGML_ASSERT(b->type == GGML_TYPE_F32);
GGML_ASSERT(c->type == GGML_TYPE_I64);
GGML_ASSERT(ggml_is_contiguous_rows(a));
GGML_ASSERT(ggml_is_contiguous_rows(b));
struct ggml_tensor * result = ggml_view_tensor(ctx, a);
result->op = GGML_OP_SET_ROWS;
result->src[0] = b;
result->src[1] = c;
return result;
}
// ggml_diag
struct ggml_tensor * ggml_diag(