vulkan: initial support for IQ1_S and IQ1_M quantizations (#11528)

* vulkan: initial support for IQ1_S and IQ1_M quantizations * vulkan: define MMV kernels for IQ1 quantizations * devops: increase timeout of Vulkan tests again * vulkan: simplify ifdef for init_iq_shmem
2025-06-28 20:25:20 +00:00 · 2025-02-15 09:01:40 +01:00
parent 89daa2564f
commit fc1b0d0936
17 changed files with 711 additions and 30 deletions
--- a/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp
+++ b/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp
@ -55,6 +55,8 @@ const std::vector<std::string> type_names = {
    "q4_k",
    "q5_k",
    "q6_k",
+    "iq1_s",
+    "iq1_m",
    "iq2_xxs",
    "iq2_xs",
    "iq2_s",
@ -182,6 +184,13 @@ std::string to_uppercase(const std::string& input) {
    return result;
 }

+bool string_starts_with(const std::string& str, const std::string& prefix) {
+    if (prefix.size() > str.size()) {
+        return false;
+    }
+    return std::equal(prefix.begin(), prefix.end(), str.begin());
+}
+
 bool string_ends_with(const std::string& str, const std::string& suffix) {
    if (suffix.size() > str.size()) {
        return false;
@ -387,7 +396,7 @@ void process_shaders() {
    for (const auto& tname : type_names) {
        // mul mat vec
        std::string data_a_key = "DATA_A_" + to_uppercase(tname);
-        std::string shader = (string_ends_with(tname, "_k")) ? "mul_mat_vec_" + tname + ".comp" : "mul_mat_vec.comp";
+        std::string shader = (string_ends_with(tname, "_k") || string_starts_with(tname, "iq1_")) ? "mul_mat_vec_" + tname + ".comp" : "mul_mat_vec.comp";

        string_to_spv("mul_mat_vec_" + tname + "_f32_f32", shader, merge_maps(base_dict, {{data_a_key, "1"}, {"B_TYPE", "float"}, {"B_TYPE_VEC2", "vec2"}, {"B_TYPE_VEC4", "vec4"}, {"D_TYPE", "float"}}));
        string_to_spv("mul_mat_vec_" + tname + "_f16_f32", shader, merge_maps(base_dict, {{data_a_key, "1"}, {"B_TYPE", "float16_t"}, {"B_TYPE_VEC2", "f16vec2"}, {"B_TYPE_VEC4", "f16vec4"}, {"D_TYPE", "float"}}));