mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-26 19:55:04 +00:00
common : use common_ prefix for common library functions (#9805)
* common : use common_ prefix for common library functions --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
@ -10,12 +10,12 @@
|
||||
#include <cassert>
|
||||
|
||||
int main(void) {
|
||||
gpt_params params;
|
||||
common_params params;
|
||||
|
||||
printf("test-arg-parser: make sure there is no duplicated arguments in any examples\n\n");
|
||||
for (int ex = 0; ex < LLAMA_EXAMPLE_COUNT; ex++) {
|
||||
try {
|
||||
auto ctx_arg = gpt_params_parser_init(params, (enum llama_example)ex);
|
||||
auto ctx_arg = common_params_parser_init(params, (enum llama_example)ex);
|
||||
std::unordered_set<std::string> seen_args;
|
||||
std::unordered_set<std::string> seen_env_vars;
|
||||
for (const auto & opt : ctx_arg.options) {
|
||||
@ -58,44 +58,44 @@ int main(void) {
|
||||
|
||||
// missing value
|
||||
argv = {"binary_name", "-m"};
|
||||
assert(false == gpt_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
|
||||
assert(false == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
|
||||
|
||||
// wrong value (int)
|
||||
argv = {"binary_name", "-ngl", "hello"};
|
||||
assert(false == gpt_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
|
||||
assert(false == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
|
||||
|
||||
// wrong value (enum)
|
||||
argv = {"binary_name", "-sm", "hello"};
|
||||
assert(false == gpt_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
|
||||
assert(false == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
|
||||
|
||||
// non-existence arg in specific example (--draft cannot be used outside llama-speculative)
|
||||
argv = {"binary_name", "--draft", "123"};
|
||||
assert(false == gpt_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_SERVER));
|
||||
assert(false == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_SERVER));
|
||||
|
||||
|
||||
printf("test-arg-parser: test valid usage\n\n");
|
||||
|
||||
argv = {"binary_name", "-m", "model_file.gguf"};
|
||||
assert(true == gpt_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
|
||||
assert(true == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
|
||||
assert(params.model == "model_file.gguf");
|
||||
|
||||
argv = {"binary_name", "-t", "1234"};
|
||||
assert(true == gpt_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
|
||||
assert(true == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
|
||||
assert(params.cpuparams.n_threads == 1234);
|
||||
|
||||
argv = {"binary_name", "--verbose"};
|
||||
assert(true == gpt_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
|
||||
assert(true == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
|
||||
assert(params.verbosity > 1);
|
||||
|
||||
argv = {"binary_name", "-m", "abc.gguf", "--predict", "6789", "--batch-size", "9090"};
|
||||
assert(true == gpt_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
|
||||
assert(true == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
|
||||
assert(params.model == "abc.gguf");
|
||||
assert(params.n_predict == 6789);
|
||||
assert(params.n_batch == 9090);
|
||||
|
||||
// --draft cannot be used outside llama-speculative
|
||||
argv = {"binary_name", "--draft", "123"};
|
||||
assert(true == gpt_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_SPECULATIVE));
|
||||
assert(true == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_SPECULATIVE));
|
||||
assert(params.n_draft == 123);
|
||||
|
||||
// skip this part on windows, because setenv is not supported
|
||||
@ -106,12 +106,12 @@ int main(void) {
|
||||
|
||||
setenv("LLAMA_ARG_THREADS", "blah", true);
|
||||
argv = {"binary_name"};
|
||||
assert(false == gpt_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
|
||||
assert(false == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
|
||||
|
||||
setenv("LLAMA_ARG_MODEL", "blah.gguf", true);
|
||||
setenv("LLAMA_ARG_THREADS", "1010", true);
|
||||
argv = {"binary_name"};
|
||||
assert(true == gpt_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
|
||||
assert(true == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
|
||||
assert(params.model == "blah.gguf");
|
||||
assert(params.cpuparams.n_threads == 1010);
|
||||
|
||||
@ -121,7 +121,7 @@ int main(void) {
|
||||
setenv("LLAMA_ARG_MODEL", "blah.gguf", true);
|
||||
setenv("LLAMA_ARG_THREADS", "1010", true);
|
||||
argv = {"binary_name", "-m", "overwritten.gguf"};
|
||||
assert(true == gpt_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
|
||||
assert(true == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
|
||||
assert(params.model == "overwritten.gguf");
|
||||
assert(params.cpuparams.n_threads == 1010);
|
||||
#endif // _WIN32
|
||||
|
@ -140,11 +140,11 @@ int main(void) {
|
||||
|
||||
// test llama_chat_format_single for system message
|
||||
printf("\n\n=== llama_chat_format_single (system message) ===\n\n");
|
||||
std::vector<llama_chat_msg> chat2;
|
||||
llama_chat_msg sys_msg{"system", "You are a helpful assistant"};
|
||||
std::vector<common_chat_msg> chat2;
|
||||
common_chat_msg sys_msg{"system", "You are a helpful assistant"};
|
||||
|
||||
auto fmt_sys = [&](std::string tmpl) {
|
||||
auto output = llama_chat_format_single(nullptr, tmpl, chat2, sys_msg, false);
|
||||
auto output = common_chat_format_single(nullptr, tmpl, chat2, sys_msg, false);
|
||||
printf("fmt_sys(%s) : %s\n", tmpl.c_str(), output.c_str());
|
||||
printf("-------------------------\n");
|
||||
return output;
|
||||
@ -160,10 +160,10 @@ int main(void) {
|
||||
chat2.push_back({"system", "You are a helpful assistant"});
|
||||
chat2.push_back({"user", "Hello"});
|
||||
chat2.push_back({"assistant", "I am assistant"});
|
||||
llama_chat_msg new_msg{"user", "How are you"};
|
||||
common_chat_msg new_msg{"user", "How are you"};
|
||||
|
||||
auto fmt_single = [&](std::string tmpl) {
|
||||
auto output = llama_chat_format_single(nullptr, tmpl, chat2, new_msg, true);
|
||||
auto output = common_chat_format_single(nullptr, tmpl, chat2, new_msg, true);
|
||||
printf("fmt_single(%s) : %s\n", tmpl.c_str(), output.c_str());
|
||||
printf("-------------------------\n");
|
||||
return output;
|
||||
|
@ -24,8 +24,8 @@ int main() {
|
||||
}
|
||||
|
||||
if (rand () % 10 < 5) {
|
||||
gpt_log_set_timestamps(gpt_log_main(), rand() % 2);
|
||||
gpt_log_set_prefix (gpt_log_main(), rand() % 2);
|
||||
common_log_set_timestamps(common_log_main(), rand() % 2);
|
||||
common_log_set_prefix (common_log_main(), rand() % 2);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
@ -202,7 +202,7 @@ int main(int argc, char **argv) {
|
||||
for (int i = 0; i < nthread; i++) {
|
||||
threads[i] = std::thread([&, i]() {
|
||||
for (const auto & test_kv : k_tests) {
|
||||
const std::vector<llama_token> res = llama_tokenize(ctx, test_kv.first, add_special, false);
|
||||
const std::vector<llama_token> res = common_tokenize(ctx, test_kv.first, add_special, false);
|
||||
|
||||
// here only print the result of the first thread
|
||||
// because the other threads are running the same tests
|
||||
@ -212,7 +212,7 @@ int main(int argc, char **argv) {
|
||||
|
||||
printf("\n");
|
||||
printf("src: '%s'\n", test_kv.first.c_str());
|
||||
printf("res: '%s'\n", llama_detokenize(ctx, res).c_str());
|
||||
printf("res: '%s'\n", common_detokenize(ctx, res).c_str());
|
||||
printf("tok: ");
|
||||
for (const auto & tok : res) {
|
||||
printf("%d ", tok);
|
||||
@ -229,16 +229,16 @@ int main(int argc, char **argv) {
|
||||
if (!correct) {
|
||||
fprintf(stderr, "%s : failed test: '%s'\n", __func__, test_kv.first.c_str());
|
||||
fprintf(stderr, "%s : detokenized to: '%s' instead of '%s'\n", __func__,
|
||||
llama_detokenize(ctx, res).c_str(),
|
||||
llama_detokenize(ctx, test_kv.second).c_str());
|
||||
common_detokenize(ctx, res).c_str(),
|
||||
common_detokenize(ctx, test_kv.second).c_str());
|
||||
fprintf(stderr, "%s : expected tokens: ", __func__);
|
||||
for (const auto & t : test_kv.second) {
|
||||
fprintf(stderr, "%6d '%s', ", t, llama_token_to_piece(ctx, t).c_str());
|
||||
fprintf(stderr, "%6d '%s', ", t, common_token_to_piece(ctx, t).c_str());
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "%s : got tokens: ", __func__);
|
||||
for (const auto & t : res) {
|
||||
fprintf(stderr, "%6d '%s', ", t, llama_token_to_piece(ctx, t).c_str());
|
||||
fprintf(stderr, "%6d '%s', ", t, common_token_to_piece(ctx, t).c_str());
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
|
||||
@ -273,7 +273,7 @@ int main(int argc, char **argv) {
|
||||
{
|
||||
const auto t_start = ggml_time_us();
|
||||
|
||||
res = llama_tokenize(ctx, text, add_special, false);
|
||||
res = common_tokenize(ctx, text, add_special, false);
|
||||
|
||||
const auto t_end = ggml_time_us();
|
||||
|
||||
|
@ -78,10 +78,10 @@ int main(int argc, char **argv) {
|
||||
const int n_vocab = llama_n_vocab(model);
|
||||
|
||||
for (int i = 0; i < n_vocab; ++i) {
|
||||
std::string str = llama_detokenize(ctx, std::vector<int>(1, i));
|
||||
std::string str = common_detokenize(ctx, std::vector<int>(1, i));
|
||||
try {
|
||||
auto cps = unicode_cpts_from_utf8(str);
|
||||
std::vector<llama_token> tokens = llama_tokenize(ctx, str, false, true);
|
||||
std::vector<llama_token> tokens = common_tokenize(ctx, str, false, true);
|
||||
if (ignore_merges && tokens.size() > 1) {
|
||||
fprintf(stderr,
|
||||
"%s : error: token %d detokenizes to '%s'(%zu) but "
|
||||
@ -94,7 +94,7 @@ int main(int argc, char **argv) {
|
||||
fprintf(stderr, "]\n");
|
||||
return 2;
|
||||
}
|
||||
std::string check = llama_detokenize(ctx, tokens);
|
||||
std::string check = common_detokenize(ctx, tokens);
|
||||
if (check != str) {
|
||||
fprintf(stderr, "%s : error: token %d detokenizes to '%s'(%zu) but tokenization of this detokenizes to '%s'(%zu)\n",
|
||||
__func__, i, str.c_str(), str.length(), check.c_str(), check.length());
|
||||
@ -123,8 +123,8 @@ int main(int argc, char **argv) {
|
||||
}
|
||||
|
||||
std::string str = unicode_cpt_to_utf8(cp);
|
||||
std::vector<llama_token> tokens = llama_tokenize(ctx, str, false);
|
||||
std::string check = llama_detokenize(ctx, tokens);
|
||||
std::vector<llama_token> tokens = common_tokenize(ctx, str, false);
|
||||
std::string check = common_detokenize(ctx, tokens);
|
||||
if (cp != 9601 && str != check) {
|
||||
fprintf(stderr, "error: codepoint 0x%x detokenizes to '%s'(%zu) instead of '%s'(%zu)\n",
|
||||
cp, check.c_str(), check.length(), str.c_str(), str.length());
|
||||
|
@ -66,9 +66,9 @@ int main(int argc, char ** argv) {
|
||||
const int n_vocab = llama_n_vocab(model);
|
||||
|
||||
for (int i = 0; i < n_vocab; ++i) {
|
||||
std::string str = llama_detokenize(ctx, std::vector<int>(1, i), true);
|
||||
std::vector<llama_token> tokens = llama_tokenize(ctx, str, false, true);
|
||||
std::string check = llama_detokenize(ctx, tokens);
|
||||
std::string str = common_detokenize(ctx, std::vector<int>(1, i), true);
|
||||
std::vector<llama_token> tokens = common_tokenize(ctx, str, false, true);
|
||||
std::string check = common_detokenize(ctx, tokens);
|
||||
if (check != str) {
|
||||
fprintf(stderr, "%s : error: token %d detokenizes to '%s'(%zu) but tokenization of this detokenizes to '%s'(%zu)\n",
|
||||
__func__, i, str.c_str(), str.length(), check.c_str(), check.length());
|
||||
@ -93,8 +93,8 @@ int main(int argc, char ** argv) {
|
||||
}
|
||||
|
||||
std::string str = unicode_cpt_to_utf8(cp);
|
||||
std::vector<llama_token> tokens = llama_tokenize(ctx, str, false, true);
|
||||
std::string check = llama_detokenize(ctx, tokens);
|
||||
std::vector<llama_token> tokens = common_tokenize(ctx, str, false, true);
|
||||
std::string check = common_detokenize(ctx, tokens);
|
||||
if (cp != 9601 && str != check) {
|
||||
fprintf(stderr, "error: codepoint 0x%x detokenizes to '%s'(%zu) instead of '%s'(%zu)\n",
|
||||
cp, check.c_str(), check.length(), str.c_str(), str.length());
|
||||
|
Reference in New Issue
Block a user