mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-07-09 13:02:12 +00:00
Dequant improvements rebase (#8255)
* Single load for half2 * Store scales in local mem * Vec load quantized values
This commit is contained in:
@ -351,4 +351,10 @@ static __dpct_inline__ float warp_reduce_max(float x,
|
||||
return x;
|
||||
}
|
||||
|
||||
// Helper for vec loading aligned data
|
||||
template <typename Tp, int n>
|
||||
inline sycl::vec<Tp, n> vec_aligned_load(const Tp* aligned_ptr) {
|
||||
return *reinterpret_cast<const sycl::vec<Tp, n>*>(aligned_ptr);
|
||||
}
|
||||
|
||||
#endif // GGML_SYCL_COMMON_HPP
|
||||
|
Reference in New Issue
Block a user