mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-27 20:05:20 +00:00
SYCL: Take improvements from GLU branch and disable faulty fp16 exp after update
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@ -3,27 +3,30 @@
|
|||||||
|
|
||||||
#include "common.hpp"
|
#include "common.hpp"
|
||||||
#include "ggml.h"
|
#include "ggml.h"
|
||||||
#include <limits.h>
|
#include <limits> // For std::numeric_limits
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
T neg_infinity() {
|
T neg_infinity() {
|
||||||
return -std::numeric_limits<T>::infinity();
|
return -std::numeric_limits<T>::infinity();
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T_Dst, typename T_Src = T_Dst>
|
||||||
struct typed_data {
|
struct typed_data {
|
||||||
const T * src;
|
const T_Src * src;
|
||||||
T * dst;
|
T_Dst * dst;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename T>
|
template<typename T_Dst, typename T_Src = T_Dst>
|
||||||
typed_data<T> cast_data(ggml_tensor * dst) {
|
typed_data<T_Dst, T_Src> cast_data(ggml_tensor * dst) {
|
||||||
return {
|
return {
|
||||||
/* .src = */ static_cast<const T *>(dst->src[0]->data),
|
/* .src = */ static_cast<const T_Src *>(dst->src[0]->data),
|
||||||
/* .dst = */ static_cast<T *>(dst->data)
|
/* .dst = */ static_cast<T_Dst *>(dst->data)
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const float GELU_QUICK_COEF = -1.702f;
|
||||||
|
|
||||||
|
|
||||||
void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
||||||
|
|
||||||
void ggml_sycl_sin(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
void ggml_sycl_sin(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
||||||
@ -73,5 +76,5 @@ void ggml_sycl_sgn(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
|||||||
void ggml_sycl_abs(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
void ggml_sycl_abs(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
||||||
|
|
||||||
void ggml_sycl_elu(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
void ggml_sycl_elu(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
||||||
#endif // GGML_SYCL_ELEMENTWISE_HPP
|
|
||||||
|
|
||||||
|
#endif // GGML_SYCL_ELEMENTWISE_HPP
|
||||||
|
@ -4201,6 +4201,8 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g
|
|||||||
case GGML_UNARY_OP_GELU_ERF:
|
case GGML_UNARY_OP_GELU_ERF:
|
||||||
case GGML_UNARY_OP_TANH:
|
case GGML_UNARY_OP_TANH:
|
||||||
case GGML_UNARY_OP_EXP:
|
case GGML_UNARY_OP_EXP:
|
||||||
|
// Disable FP16 until we find out the root cause of failing fp16 sycl::exp
|
||||||
|
return ggml_is_contiguous(op->src[0]) && (op->type == op->src[0]->type) && op->src[0]->type == GGML_TYPE_F32;
|
||||||
case GGML_UNARY_OP_SGN:
|
case GGML_UNARY_OP_SGN:
|
||||||
case GGML_UNARY_OP_ABS:
|
case GGML_UNARY_OP_ABS:
|
||||||
case GGML_UNARY_OP_ELU:
|
case GGML_UNARY_OP_ELU:
|
||||||
|
Reference in New Issue
Block a user