mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-08-18 14:18:50 -04:00
download in batches
This commit is contained in:
@@ -4,7 +4,8 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <json.hpp>
|
|
||||||
|
#include <nlohmann/json.hpp>
|
||||||
|
|
||||||
using json = nlohmann::json;
|
using json = nlohmann::json;
|
||||||
|
|
||||||
@@ -109,32 +110,50 @@ int main(void) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (common_download_file_multiple(files, {}, false)) {
|
if (!files.empty()) {
|
||||||
std::string dir_sep(1, DIRECTORY_SEPARATOR);
|
bool downloaded = false;
|
||||||
|
const size_t batch_size = 6;
|
||||||
|
size_t batches = (files.size() + batch_size - 1) / batch_size;
|
||||||
|
|
||||||
for (auto const & item : files) {
|
for (size_t i = 0; i < batches; i++) {
|
||||||
std::string filepath = item.second;
|
size_t batch_pos = (i * batch_size);
|
||||||
|
size_t batch_step = batch_pos + batch_size;
|
||||||
|
auto batch_begin = files.begin() + batch_pos;
|
||||||
|
auto batch_end = batch_step >= files.size() ? files.end() : files.begin() + batch_step;
|
||||||
|
std::vector<std::pair<std::string, std::string>> batch(batch_begin, batch_end);
|
||||||
|
|
||||||
if (string_ends_with(filepath, ".gguf")) {
|
if (!(downloaded = common_download_file_multiple(batch, {}, false))) {
|
||||||
std::string vocab_inp = filepath + ".inp";
|
break;
|
||||||
std::string vocab_out = filepath + ".out";
|
|
||||||
auto matching_inp = std::find_if(files.begin(), files.end(), [&vocab_inp](const auto & p) {
|
|
||||||
return p.second == vocab_inp;
|
|
||||||
});
|
|
||||||
auto matching_out = std::find_if(files.begin(), files.end(), [&vocab_out](const auto & p) {
|
|
||||||
return p.second == vocab_out;
|
|
||||||
});
|
|
||||||
|
|
||||||
if (matching_inp != files.end() && matching_out != files.end()) {
|
|
||||||
std::string test_command = "." + dir_sep + "test-tokenizer-0 '" + filepath + "'";
|
|
||||||
assert(std::system(test_command.c_str()) == 0);
|
|
||||||
} else {
|
|
||||||
printf("test-tokenizers-remote: %s found without .inp/out vocab files, skipping...\n", filepath.c_str());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
printf("test-tokenizers-remote: failed to download files, unable to perform tests...\n");
|
if (downloaded) {
|
||||||
|
std::string dir_sep(1, DIRECTORY_SEPARATOR);
|
||||||
|
|
||||||
|
for (auto const & item : files) {
|
||||||
|
std::string filepath = item.second;
|
||||||
|
|
||||||
|
if (string_ends_with(filepath, ".gguf")) {
|
||||||
|
std::string vocab_inp = filepath + ".inp";
|
||||||
|
std::string vocab_out = filepath + ".out";
|
||||||
|
auto matching_inp = std::find_if(files.begin(), files.end(), [&vocab_inp](const auto & p) {
|
||||||
|
return p.second == vocab_inp;
|
||||||
|
});
|
||||||
|
auto matching_out = std::find_if(files.begin(), files.end(), [&vocab_out](const auto & p) {
|
||||||
|
return p.second == vocab_out;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (matching_inp != files.end() && matching_out != files.end()) {
|
||||||
|
std::string test_command = "." + dir_sep + "test-tokenizer-0 '" + filepath + "'";
|
||||||
|
assert(std::system(test_command.c_str()) == 0);
|
||||||
|
} else {
|
||||||
|
printf("test-tokenizers-remote: %s found without .inp/out vocab files, skipping...\n", filepath.c_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
printf("test-tokenizers-remote: failed to download files, unable to perform tests...\n");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
printf("test-tokenizers-remote: failed to retrieve repository info, unable to perform tests...\n");
|
printf("test-tokenizers-remote: failed to retrieve repository info, unable to perform tests...\n");
|
||||||
|
Reference in New Issue
Block a user