mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-08-14 04:17:53 -04:00
@@ -144,7 +144,7 @@ public:
|
|||||||
|
|
||||||
ggml_tensor * pos_bucket = nullptr; // I32 [n_batch, n_batch]
|
ggml_tensor * pos_bucket = nullptr; // I32 [n_batch, n_batch]
|
||||||
|
|
||||||
const llama_hparams & hparams;
|
const llama_hparams hparams;
|
||||||
};
|
};
|
||||||
|
|
||||||
class llm_graph_input_pos_bucket_kv : public llm_graph_input_i {
|
class llm_graph_input_pos_bucket_kv : public llm_graph_input_i {
|
||||||
@@ -158,7 +158,7 @@ public:
|
|||||||
|
|
||||||
ggml_tensor * pos_bucket = nullptr; // I32 [n_kv, n_batch]
|
ggml_tensor * pos_bucket = nullptr; // I32 [n_kv, n_batch]
|
||||||
|
|
||||||
const llama_hparams & hparams;
|
const llama_hparams hparams;
|
||||||
|
|
||||||
const llama_kv_cache_unified_context * mctx;
|
const llama_kv_cache_unified_context * mctx;
|
||||||
};
|
};
|
||||||
@@ -177,8 +177,8 @@ public:
|
|||||||
|
|
||||||
ggml_tensor * out_ids; // I32 [n_outputs]
|
ggml_tensor * out_ids; // I32 [n_outputs]
|
||||||
|
|
||||||
const llama_hparams & hparams;
|
const llama_hparams hparams;
|
||||||
const llama_cparams & cparams;
|
const llama_cparams cparams;
|
||||||
|
|
||||||
const uint32_t n_outputs;
|
const uint32_t n_outputs;
|
||||||
};
|
};
|
||||||
@@ -192,7 +192,7 @@ public:
|
|||||||
|
|
||||||
ggml_tensor * mean; // F32 [n_batch, n_batch]
|
ggml_tensor * mean; // F32 [n_batch, n_batch]
|
||||||
|
|
||||||
const llama_cparams & cparams;
|
const llama_cparams cparams;
|
||||||
};
|
};
|
||||||
|
|
||||||
class llm_graph_input_cls : public llm_graph_input_i {
|
class llm_graph_input_cls : public llm_graph_input_i {
|
||||||
@@ -204,7 +204,7 @@ public:
|
|||||||
|
|
||||||
ggml_tensor * cls; // I32 [n_batch]
|
ggml_tensor * cls; // I32 [n_batch]
|
||||||
|
|
||||||
const llama_cparams & cparams;
|
const llama_cparams cparams;
|
||||||
};
|
};
|
||||||
|
|
||||||
class llm_graph_input_rs : public llm_graph_input_i {
|
class llm_graph_input_rs : public llm_graph_input_i {
|
||||||
@@ -247,8 +247,8 @@ public:
|
|||||||
ggml_tensor * kq_mask = nullptr; // F32 [n_tokens, n_batch, 1, 1]
|
ggml_tensor * kq_mask = nullptr; // F32 [n_tokens, n_batch, 1, 1]
|
||||||
ggml_tensor * kq_mask_cnv = nullptr; // [n_tokens, n_batch, 1, 1]
|
ggml_tensor * kq_mask_cnv = nullptr; // [n_tokens, n_batch, 1, 1]
|
||||||
|
|
||||||
const llama_hparams & hparams;
|
const llama_hparams hparams;
|
||||||
const llama_cparams & cparams;
|
const llama_cparams cparams;
|
||||||
};
|
};
|
||||||
|
|
||||||
class llm_graph_input_attn_kv_unified : public llm_graph_input_i {
|
class llm_graph_input_attn_kv_unified : public llm_graph_input_i {
|
||||||
@@ -278,8 +278,11 @@ public:
|
|||||||
ggml_tensor * self_kq_mask = nullptr; // F32 [n_kv, n_batch/n_stream, 1, n_stream]
|
ggml_tensor * self_kq_mask = nullptr; // F32 [n_kv, n_batch/n_stream, 1, n_stream]
|
||||||
ggml_tensor * self_kq_mask_cnv = nullptr; // [n_kv, n_batch/n_stream, 1, n_stream]
|
ggml_tensor * self_kq_mask_cnv = nullptr; // [n_kv, n_batch/n_stream, 1, n_stream]
|
||||||
|
|
||||||
const llama_hparams & hparams;
|
// note: these have to be copies because in order to be able to reuse a graph, its inputs
|
||||||
const llama_cparams & cparams;
|
// need to carry these parameters with them. otherwise, they can point to freed
|
||||||
|
// llm_graph_params from a previous batch, causing stack-use-after-return
|
||||||
|
const llama_hparams hparams;
|
||||||
|
const llama_cparams cparams;
|
||||||
|
|
||||||
const llama_kv_cache_unified_context * mctx;
|
const llama_kv_cache_unified_context * mctx;
|
||||||
};
|
};
|
||||||
@@ -318,8 +321,8 @@ public:
|
|||||||
ggml_tensor * self_kq_mask_swa = nullptr; // F32 [n_kv, n_batch/n_stream, 1, n_stream]
|
ggml_tensor * self_kq_mask_swa = nullptr; // F32 [n_kv, n_batch/n_stream, 1, n_stream]
|
||||||
ggml_tensor * self_kq_mask_swa_cnv = nullptr; // [n_kv, n_batch/n_stream, 1, n_stream]
|
ggml_tensor * self_kq_mask_swa_cnv = nullptr; // [n_kv, n_batch/n_stream, 1, n_stream]
|
||||||
|
|
||||||
const llama_hparams & hparams;
|
const llama_hparams hparams;
|
||||||
const llama_cparams & cparams;
|
const llama_cparams cparams;
|
||||||
|
|
||||||
const llama_kv_cache_unified_iswa_context * mctx;
|
const llama_kv_cache_unified_iswa_context * mctx;
|
||||||
};
|
};
|
||||||
|
Reference in New Issue
Block a user