mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-29 12:35:16 +00:00
examplse : de-shadow
ggml-ci
This commit is contained in:
@ -1039,41 +1039,40 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
|
||||
}
|
||||
|
||||
{ // attention
|
||||
int hidden_size = 4096;
|
||||
const int d_head = 128;
|
||||
int n_head = hidden_size/d_head;
|
||||
int hidden_size_cur = 4096;
|
||||
int num_query = 96;
|
||||
if (ctx->minicpmv_version == 2) {
|
||||
hidden_size = 4096;
|
||||
n_head = hidden_size/d_head;
|
||||
hidden_size_cur = 4096;
|
||||
num_query = 96;
|
||||
}
|
||||
else if (ctx->minicpmv_version == 3) {
|
||||
hidden_size = 3584;
|
||||
n_head = hidden_size/d_head;
|
||||
hidden_size_cur = 3584;
|
||||
num_query = 64;
|
||||
}
|
||||
|
||||
const int d_head_cur = 128;
|
||||
const int n_head_cur = hidden_size_cur/d_head_cur;
|
||||
|
||||
struct ggml_tensor * Q = ggml_add(ctx0, ggml_mul_mat(ctx0, model.mm_model_attn_q_w, q), model.mm_model_attn_q_b);
|
||||
Q = ggml_scale_inplace(ctx0, Q, 1.0f / sqrt((float)d_head));
|
||||
Q = ggml_scale_inplace(ctx0, Q, 1.0f / sqrt((float)d_head_cur));
|
||||
struct ggml_tensor * K = ggml_add(ctx0, ggml_mul_mat(ctx0, model.mm_model_attn_k_w, k), model.mm_model_attn_k_b);
|
||||
struct ggml_tensor * V = ggml_add(ctx0, ggml_mul_mat(ctx0, model.mm_model_attn_v_w, v), model.mm_model_attn_v_b);
|
||||
// permute
|
||||
Q = ggml_reshape_4d(ctx0, Q, d_head, n_head, num_query, batch_size);
|
||||
Q = ggml_reshape_4d(ctx0, Q, d_head_cur, n_head_cur, num_query, batch_size);
|
||||
Q = ggml_cont(ctx0, ggml_permute(ctx0, Q, 0, 2, 1, 3));
|
||||
Q = ggml_reshape_3d(ctx0, Q, d_head, num_query, n_head * batch_size);
|
||||
K = ggml_reshape_4d(ctx0, K, d_head, n_head, num_positions, batch_size);
|
||||
Q = ggml_reshape_3d(ctx0, Q, d_head_cur, num_query, n_head_cur * batch_size);
|
||||
K = ggml_reshape_4d(ctx0, K, d_head_cur, n_head_cur, num_positions, batch_size);
|
||||
K = ggml_cont(ctx0, ggml_permute(ctx0, K, 0, 2, 1, 3));
|
||||
K = ggml_reshape_3d(ctx0, K, d_head, num_positions, n_head * batch_size);
|
||||
V = ggml_reshape_4d(ctx0, V, d_head, n_head, num_positions, batch_size);
|
||||
K = ggml_reshape_3d(ctx0, K, d_head_cur, num_positions, n_head_cur * batch_size);
|
||||
V = ggml_reshape_4d(ctx0, V, d_head_cur, n_head_cur, num_positions, batch_size);
|
||||
V = ggml_cont(ctx0, ggml_permute(ctx0, V, 1, 2, 0, 3));
|
||||
V = ggml_reshape_3d(ctx0, V, num_positions, d_head, n_head * batch_size);
|
||||
V = ggml_reshape_3d(ctx0, V, num_positions, d_head_cur, n_head_cur * batch_size);
|
||||
struct ggml_tensor * KQ = ggml_mul_mat(ctx0, K, Q);
|
||||
KQ = ggml_soft_max_inplace(ctx0, KQ);
|
||||
struct ggml_tensor * KQV = ggml_mul_mat(ctx0, V, KQ);
|
||||
KQV = ggml_reshape_4d(ctx0, KQV, d_head, num_query, n_head, batch_size);
|
||||
KQV = ggml_reshape_4d(ctx0, KQV, d_head_cur, num_query, n_head_cur, batch_size);
|
||||
KQV = ggml_permute(ctx0, KQV, 0, 2, 1, 3);
|
||||
KQV = ggml_cont_3d(ctx0, KQV, hidden_size, num_query, batch_size);
|
||||
KQV = ggml_cont_3d(ctx0, KQV, hidden_size_cur, num_query, batch_size);
|
||||
|
||||
embeddings = ggml_add(ctx0, ggml_mul_mat(ctx0, model.mm_model_attn_o_w, KQV), model.mm_model_attn_o_b);
|
||||
}
|
||||
@ -1113,12 +1112,12 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
|
||||
struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
||||
struct ggml_context * meta = NULL;
|
||||
|
||||
struct gguf_init_params params = {
|
||||
struct gguf_init_params params_meta = {
|
||||
/*.no_alloc = */ true,
|
||||
/*.ctx = */ &meta,
|
||||
};
|
||||
|
||||
struct gguf_context * ctx = gguf_init_from_file(fname, params);
|
||||
struct gguf_context * ctx = gguf_init_from_file(fname, params_meta);
|
||||
if (!ctx) {
|
||||
throw std::runtime_error(format("%s: failed to load CLIP model from %s. Does this file exist?\n", __func__, fname));
|
||||
}
|
||||
@ -1310,13 +1309,13 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
||||
// load tensors
|
||||
{
|
||||
std::vector<uint8_t> read_buf;
|
||||
struct ggml_init_params params = {
|
||||
struct ggml_init_params params_data = {
|
||||
/*.mem_size =*/ (n_tensors + 1) * ggml_tensor_overhead(),
|
||||
/*.mem_buffer =*/ NULL,
|
||||
/*.no_alloc =*/ true,
|
||||
};
|
||||
|
||||
new_clip->ctx_data = ggml_init(params);
|
||||
new_clip->ctx_data = ggml_init(params_data);
|
||||
if (!new_clip->ctx_data) {
|
||||
LOG_ERR("%s: ggml_init() failed\n", __func__);
|
||||
clip_free(new_clip);
|
||||
|
Reference in New Issue
Block a user