mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-26 19:55:04 +00:00
llama : add llama_vocab
, functions -> methods, naming (#11110)
* llama : functions -> methods (#11110) * llama : add struct llama_vocab to the API (#11156) ggml-ci * hparams : move vocab params to llama_vocab (#11159) ggml-ci * vocab : more pimpl (#11165) ggml-ci * vocab : minor tokenization optimizations (#11160) ggml-ci Co-authored-by: Diego Devesa <slarengh@gmail.com> * lora : update API names (#11167) ggml-ci * llama : update API names to use correct prefix (#11174) * llama : update API names to use correct prefix ggml-ci * cont ggml-ci * cont ggml-ci * minor [no ci] * vocab : llama_vocab_add_[be]os -> llama_vocab_get_add_[be]os (#11174) ggml-ci * vocab : llama_vocab_n_vocab -> llama_vocab_n_tokens (#11174) ggml-ci --------- Co-authored-by: Diego Devesa <slarengh@gmail.com>
This commit is contained in:
@ -14,7 +14,7 @@ int main(int argc, char ** argv) {
|
||||
std::thread([&model_path]() {
|
||||
llama_backend_init();
|
||||
auto * model = llama_model_load_from_file(model_path, llama_model_default_params());
|
||||
auto * ctx = llama_new_context_with_model(model, llama_context_default_params());
|
||||
auto * ctx = llama_init_from_model(model, llama_context_default_params());
|
||||
llama_free(ctx);
|
||||
llama_model_free(model);
|
||||
llama_backend_free();
|
||||
|
@ -157,7 +157,7 @@ int main(void) {
|
||||
}
|
||||
|
||||
// test invalid chat template
|
||||
res = llama_chat_apply_template(nullptr, "INVALID TEMPLATE", conversation, message_count, true, formatted_chat.data(), formatted_chat.size());
|
||||
res = llama_chat_apply_template("INVALID TEMPLATE", conversation, message_count, true, formatted_chat.data(), formatted_chat.size());
|
||||
assert(res < 0);
|
||||
|
||||
for (size_t i = 0; i < templates.size(); i++) {
|
||||
@ -165,7 +165,6 @@ int main(void) {
|
||||
std::string expected = expected_output[i];
|
||||
formatted_chat.resize(1024);
|
||||
res = llama_chat_apply_template(
|
||||
nullptr,
|
||||
custom_template.c_str(),
|
||||
conversation,
|
||||
message_count,
|
||||
|
@ -161,7 +161,7 @@ int main(int argc, char **argv) {
|
||||
|
||||
auto cparams = llama_context_default_params();
|
||||
|
||||
ctx = llama_new_context_with_model(model, cparams);
|
||||
ctx = llama_init_from_model(model, cparams);
|
||||
|
||||
if (ctx == NULL) {
|
||||
fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
|
||||
|
@ -55,7 +55,7 @@ int main(int argc, char **argv) {
|
||||
|
||||
auto cparams = llama_context_default_params();
|
||||
|
||||
ctx = llama_new_context_with_model(model, cparams);
|
||||
ctx = llama_init_from_model(model, cparams);
|
||||
|
||||
if (ctx == NULL) {
|
||||
fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
|
||||
@ -64,8 +64,10 @@ int main(int argc, char **argv) {
|
||||
}
|
||||
}
|
||||
|
||||
//GGML_ASSERT(llama_vocab_type(model) == LLAMA_VOCAB_TYPE_BPE);
|
||||
if (llama_vocab_type(model) != LLAMA_VOCAB_TYPE_BPE) {
|
||||
const llama_vocab * vocab = llama_model_get_vocab(model);
|
||||
|
||||
//GGML_ASSERT(llama_vocab_type(vocab) == LLAMA_VOCAB_TYPE_BPE);
|
||||
if (llama_vocab_type(vocab) != LLAMA_VOCAB_TYPE_BPE) {
|
||||
return 99;
|
||||
}
|
||||
|
||||
@ -75,7 +77,7 @@ int main(int argc, char **argv) {
|
||||
atexit([]() { console::cleanup(); });
|
||||
#endif
|
||||
|
||||
const int n_vocab = llama_n_vocab(model);
|
||||
const int n_vocab = llama_vocab_n_tokens(vocab);
|
||||
|
||||
for (int i = 0; i < n_vocab; ++i) {
|
||||
std::string str = common_detokenize(ctx, std::vector<int>(1, i));
|
||||
|
@ -43,7 +43,7 @@ int main(int argc, char ** argv) {
|
||||
|
||||
auto cparams = llama_context_default_params();
|
||||
|
||||
ctx = llama_new_context_with_model(model, cparams);
|
||||
ctx = llama_init_from_model(model, cparams);
|
||||
|
||||
if (ctx == NULL) {
|
||||
fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
|
||||
@ -52,8 +52,10 @@ int main(int argc, char ** argv) {
|
||||
}
|
||||
}
|
||||
|
||||
const llama_vocab * vocab = llama_model_get_vocab(model);
|
||||
|
||||
//GGML_ASSERT(llama_vocab_type(model) == LLAMA_VOCAB_TYPE_SPM);
|
||||
if (llama_vocab_type(model) != LLAMA_VOCAB_TYPE_SPM) {
|
||||
if (llama_vocab_type(vocab) != LLAMA_VOCAB_TYPE_SPM) {
|
||||
return 99;
|
||||
}
|
||||
|
||||
@ -63,7 +65,7 @@ int main(int argc, char ** argv) {
|
||||
atexit([]() { console::cleanup(); });
|
||||
#endif
|
||||
|
||||
const int n_vocab = llama_n_vocab(model);
|
||||
const int n_vocab = llama_vocab_n_tokens(vocab);
|
||||
|
||||
for (int i = 0; i < n_vocab; ++i) {
|
||||
std::string str = common_detokenize(ctx, std::vector<int>(1, i), true);
|
||||
|
@ -76,7 +76,7 @@ class LibLlamaModel:
|
||||
self.ffi = libllama.ffi
|
||||
if isinstance(mparams, dict):
|
||||
mparams = libllama.model_default_params(**mparams)
|
||||
self.model = self.lib.llama_load_model_from_file(path_model.encode(), mparams)
|
||||
self.model = self.lib.llama_model_load_from_file(path_model.encode(), mparams)
|
||||
if not self.model:
|
||||
raise RuntimeError("error: failed to load model '%s'" % path_model)
|
||||
if isinstance(cparams, dict):
|
||||
@ -92,7 +92,7 @@ class LibLlamaModel:
|
||||
if self.ctx:
|
||||
self.lib.llama_free(self.ctx)
|
||||
if self.model:
|
||||
self.lib.llama_free_model(self.model)
|
||||
self.lib.llama_model_free(self.model)
|
||||
self.ctx = None
|
||||
self.model = None
|
||||
self.lib = None
|
||||
|
Reference in New Issue
Block a user