Dequant improvements rebase (#8255)

* Single load for half2 * Store scales in local mem * Vec load quantized values
2025-08-26 18:18:28 -04:00 · 2024-07-03 02:55:34 +01:00
parent a27152b602
commit fadde67135
3 changed files with 30 additions and 13 deletions
--- a/ggml/src/ggml-sycl/common.hpp
+++ b/ggml/src/ggml-sycl/common.hpp
@@ -351,4 +351,10 @@ static __dpct_inline__ float warp_reduce_max(float x,
    return x;
 }

+// Helper for vec loading aligned data
+template <typename Tp, int n>
+inline sycl::vec<Tp, n> vec_aligned_load(const Tp* aligned_ptr) {
+    return *reinterpret_cast<const sycl::vec<Tp, n>*>(aligned_ptr);
+}
+
 #endif // GGML_SYCL_COMMON_HPP