mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-28 20:25:20 +00:00
vulkan: initial support for IQ1_S and IQ1_M quantizations (#11528)
* vulkan: initial support for IQ1_S and IQ1_M quantizations * vulkan: define MMV kernels for IQ1 quantizations * devops: increase timeout of Vulkan tests again * vulkan: simplify ifdef for init_iq_shmem
This commit is contained in:
@ -55,6 +55,8 @@ const std::vector<std::string> type_names = {
|
||||
"q4_k",
|
||||
"q5_k",
|
||||
"q6_k",
|
||||
"iq1_s",
|
||||
"iq1_m",
|
||||
"iq2_xxs",
|
||||
"iq2_xs",
|
||||
"iq2_s",
|
||||
@ -182,6 +184,13 @@ std::string to_uppercase(const std::string& input) {
|
||||
return result;
|
||||
}
|
||||
|
||||
bool string_starts_with(const std::string& str, const std::string& prefix) {
|
||||
if (prefix.size() > str.size()) {
|
||||
return false;
|
||||
}
|
||||
return std::equal(prefix.begin(), prefix.end(), str.begin());
|
||||
}
|
||||
|
||||
bool string_ends_with(const std::string& str, const std::string& suffix) {
|
||||
if (suffix.size() > str.size()) {
|
||||
return false;
|
||||
@ -387,7 +396,7 @@ void process_shaders() {
|
||||
for (const auto& tname : type_names) {
|
||||
// mul mat vec
|
||||
std::string data_a_key = "DATA_A_" + to_uppercase(tname);
|
||||
std::string shader = (string_ends_with(tname, "_k")) ? "mul_mat_vec_" + tname + ".comp" : "mul_mat_vec.comp";
|
||||
std::string shader = (string_ends_with(tname, "_k") || string_starts_with(tname, "iq1_")) ? "mul_mat_vec_" + tname + ".comp" : "mul_mat_vec.comp";
|
||||
|
||||
string_to_spv("mul_mat_vec_" + tname + "_f32_f32", shader, merge_maps(base_dict, {{data_a_key, "1"}, {"B_TYPE", "float"}, {"B_TYPE_VEC2", "vec2"}, {"B_TYPE_VEC4", "vec4"}, {"D_TYPE", "float"}}));
|
||||
string_to_spv("mul_mat_vec_" + tname + "_f16_f32", shader, merge_maps(base_dict, {{data_a_key, "1"}, {"B_TYPE", "float16_t"}, {"B_TYPE_VEC2", "f16vec2"}, {"B_TYPE_VEC4", "f16vec4"}, {"D_TYPE", "float"}}));
|
||||
|
Reference in New Issue
Block a user