SYCL: Take improvements from GLU branch and disable faulty fp16 exp after update

This commit is contained in:
Akarshan
2025-06-26 19:48:24 +05:30
parent b25346221d
commit 8b5ea7ad67
3 changed files with 604 additions and 1144 deletions

File diff suppressed because it is too large Load Diff

View File

@ -3,27 +3,30 @@
#include "common.hpp" #include "common.hpp"
#include "ggml.h" #include "ggml.h"
#include <limits.h> #include <limits> // For std::numeric_limits
template <typename T> template <typename T>
T neg_infinity() { T neg_infinity() {
return -std::numeric_limits<T>::infinity(); return -std::numeric_limits<T>::infinity();
} }
template<typename T> template<typename T_Dst, typename T_Src = T_Dst>
struct typed_data { struct typed_data {
const T * src; const T_Src * src;
T * dst; T_Dst * dst;
}; };
template<typename T> template<typename T_Dst, typename T_Src = T_Dst>
typed_data<T> cast_data(ggml_tensor * dst) { typed_data<T_Dst, T_Src> cast_data(ggml_tensor * dst) {
return { return {
/* .src = */ static_cast<const T *>(dst->src[0]->data), /* .src = */ static_cast<const T_Src *>(dst->src[0]->data),
/* .dst = */ static_cast<T *>(dst->data) /* .dst = */ static_cast<T_Dst *>(dst->data)
}; };
} }
const float GELU_QUICK_COEF = -1.702f;
void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, ggml_tensor * dst); void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
void ggml_sycl_sin(ggml_backend_sycl_context & ctx, ggml_tensor * dst); void ggml_sycl_sin(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
@ -73,5 +76,5 @@ void ggml_sycl_sgn(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
void ggml_sycl_abs(ggml_backend_sycl_context & ctx, ggml_tensor * dst); void ggml_sycl_abs(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
void ggml_sycl_elu(ggml_backend_sycl_context & ctx, ggml_tensor * dst); void ggml_sycl_elu(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
#endif // GGML_SYCL_ELEMENTWISE_HPP
#endif // GGML_SYCL_ELEMENTWISE_HPP

View File

@ -4201,6 +4201,8 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g
case GGML_UNARY_OP_GELU_ERF: case GGML_UNARY_OP_GELU_ERF:
case GGML_UNARY_OP_TANH: case GGML_UNARY_OP_TANH:
case GGML_UNARY_OP_EXP: case GGML_UNARY_OP_EXP:
// Disable FP16 until we find out the root cause of failing fp16 sycl::exp
return ggml_is_contiguous(op->src[0]) && (op->type == op->src[0]->type) && op->src[0]->type == GGML_TYPE_F32;
case GGML_UNARY_OP_SGN: case GGML_UNARY_OP_SGN:
case GGML_UNARY_OP_ABS: case GGML_UNARY_OP_ABS:
case GGML_UNARY_OP_ELU: case GGML_UNARY_OP_ELU: