diff --git a/docs/backend/SYCL.md b/docs/backend/SYCL.md index 249e73451..6e9b88935 100644 --- a/docs/backend/SYCL.md +++ b/docs/backend/SYCL.md @@ -757,7 +757,7 @@ use 1 SYCL GPUs: [0] with Max compute units:512 | Name | Value | Function | |-------------------|------------------|---------------------------------------------------------------------------------------------------------------------------| | GGML_SYCL_DEBUG | 0 (default) or 1 | Enable log function by macro: GGML_SYCL_DEBUG | -| GGML_SYCL_DISABLE_OPT | 0 (default) or 1 | Disable optimize features based on Intel GPU type, to compare the performance increase | +| GGML_SYCL_DISABLE_OPT | 0 (default) or 1 | Disable optimize features for Intel GPUs. (Recommended to 1 for intel devices older than Gen 10) | | GGML_SYCL_DISABLE_GRAPH | 0 or 1 (default) | Disable running computations through SYCL Graphs feature. Disabled by default because graph performance isn't yet better than non-graph performance. | | GGML_SYCL_DISABLE_DNN | 0 (default) or 1 | Disable running computations through oneDNN and always use oneMKL. | | ZES_ENABLE_SYSMAN | 0 (default) or 1 | Support to get free memory of GPU by sycl::aspect::ext_intel_free_memory.
Recommended to use when --split-mode = layer | diff --git a/ggml/src/ggml-sycl/common.hpp b/ggml/src/ggml-sycl/common.hpp index 753b4af14..4e7449d06 100644 --- a/ggml/src/ggml-sycl/common.hpp +++ b/ggml/src/ggml-sycl/common.hpp @@ -199,7 +199,7 @@ struct sycl_device_info { // size_t smpb; // max. shared memory per block bool vmm; // virtual memory support size_t total_vram; - sycl_hw_info hw_info; + //sycl_hw_info hw_info; \\ device id and aarch, currently not used optimize_feature opt_feature; }; @@ -286,29 +286,6 @@ struct ggml_tensor_extra_gpu { void release_extra_gpu(ggml_tensor_extra_gpu * extra, std::vector streams={}); -inline optimize_feature check_gpu_optimize_feature(syclex::architecture &arch) { - optimize_feature opt; - - opt.reorder = - (arch == syclex::architecture::intel_gpu_dg1 || - arch == syclex::architecture::intel_gpu_acm_g10 || - arch == syclex::architecture::intel_gpu_acm_g11 || - arch == syclex::architecture::intel_gpu_acm_g12 || - arch == syclex::architecture::intel_gpu_pvc || - arch == syclex::architecture::intel_gpu_pvc_vg || - arch == syclex::architecture::intel_gpu_mtl_u || - arch == syclex::architecture::intel_gpu_mtl_s || - arch == syclex::architecture::intel_gpu_mtl_h || - arch == syclex::architecture::intel_gpu_arl_u || - arch == syclex::architecture::intel_gpu_arl_s || - arch == syclex::architecture::intel_gpu_arl_h || - arch == syclex::architecture::intel_gpu_bmg_g21 || - arch == syclex::architecture::intel_gpu_lnl_m - ); - - return opt; -} - namespace sycl_ex = sycl::ext::oneapi::experimental; struct ggml_backend_sycl_context { int device; diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index f25a96a62..9cb36ae99 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -83,9 +83,7 @@ static ggml_sycl_device_info ggml_sycl_init() { info.devices[i].cc = 100 * prop.get_major_version() + 10 * prop.get_minor_version(); - info.devices[i].hw_info = get_device_hw_info(&device); - info.devices[i].opt_feature = check_gpu_optimize_feature(info.devices[i].hw_info.arch); - + info.devices[i].opt_feature.reorder = !device.ext_oneapi_architecture_is(syclex::arch_category::intel_gpu); info.max_work_group_sizes[i] = prop.get_max_work_group_size(); } @@ -195,7 +193,7 @@ static void ggml_check_sycl() try { if (!initialized) { g_ggml_sycl_debug = get_sycl_env("GGML_SYCL_DEBUG", 0); - g_ggml_sycl_disable_optimize= get_sycl_env("GGML_SYCL_DISABLE_OPT", 1); + g_ggml_sycl_disable_optimize = get_sycl_env("GGML_SYCL_DISABLE_OPT", 0); g_ggml_sycl_disable_graph = get_sycl_env("GGML_SYCL_DISABLE_GRAPH", 1); g_ggml_sycl_disable_dnn = get_sycl_env("GGML_SYCL_DISABLE_DNN", 0); g_ggml_sycl_prioritize_dmmv = get_sycl_env("GGML_SYCL_PRIORITIZE_DMMV", 0); diff --git a/ggml/src/ggml-sycl/sycl_hw.cpp b/ggml/src/ggml-sycl/sycl_hw.cpp index da121ffc2..704114003 100644 --- a/ggml/src/ggml-sycl/sycl_hw.cpp +++ b/ggml/src/ggml-sycl/sycl_hw.cpp @@ -1,6 +1,7 @@ #include "sycl_hw.hpp" - +// TODO: currently not used +/* sycl_hw_info get_device_hw_info(sycl::device *device_ptr) { sycl_hw_info res; int32_t id = device_ptr->get_info(); @@ -11,3 +12,4 @@ sycl_hw_info get_device_hw_info(sycl::device *device_ptr) { return res; } +*/ diff --git a/ggml/src/ggml-sycl/sycl_hw.hpp b/ggml/src/ggml-sycl/sycl_hw.hpp index bf689450c..36b140bf0 100644 --- a/ggml/src/ggml-sycl/sycl_hw.hpp +++ b/ggml/src/ggml-sycl/sycl_hw.hpp @@ -10,6 +10,8 @@ namespace syclex = sycl::ext::oneapi::experimental; +// TODO: currently not used +/* struct sycl_hw_info { syclex::architecture arch; int32_t device_id; @@ -18,6 +20,7 @@ struct sycl_hw_info { bool is_in_vector(std::vector &vec, int item); sycl_hw_info get_device_hw_info(sycl::device *device_ptr); +*/ #endif // SYCL_HW_HPP