sycl: GGML_SYCL_DISABLE_OPT on by default for all Intel Devices (#13973)

2025-06-27 20:05:20 +00:00 · 2025-06-25 17:09:55 +01:00
parent 73e53dc834
commit 2bf9d539dd
5 changed files with 10 additions and 30 deletions
--- a/docs/backend/SYCL.md
+++ b/docs/backend/SYCL.md
@ -757,7 +757,7 @@ use 1 SYCL GPUs: [0] with Max compute units:512
 | Name              | Value            | Function                                                                                                                  |
 |-------------------|------------------|---------------------------------------------------------------------------------------------------------------------------|
 | GGML_SYCL_DEBUG   | 0 (default) or 1 | Enable log function by macro: GGML_SYCL_DEBUG                                                                             |
-| GGML_SYCL_DISABLE_OPT | 0 (default) or 1 | Disable optimize features based on Intel GPU type, to compare the performance increase |
+| GGML_SYCL_DISABLE_OPT | 0 (default) or 1 | Disable optimize features for Intel GPUs. (Recommended to 1 for intel devices older than Gen 10) |
 | GGML_SYCL_DISABLE_GRAPH | 0 or 1 (default) | Disable running computations through SYCL Graphs feature. Disabled by default because graph performance isn't yet better than non-graph performance. |
 | GGML_SYCL_DISABLE_DNN | 0 (default) or 1 | Disable running computations through oneDNN and always use oneMKL. |
 | ZES_ENABLE_SYSMAN | 0 (default) or 1 | Support to get free memory of GPU by sycl::aspect::ext_intel_free_memory.<br>Recommended to use when --split-mode = layer |
--- a/ggml/src/ggml-sycl/common.hpp
+++ b/ggml/src/ggml-sycl/common.hpp
@ -199,7 +199,7 @@ struct sycl_device_info {
    // size_t  smpb;               // max. shared memory per block
    bool    vmm;                // virtual memory support
    size_t  total_vram;
-    sycl_hw_info hw_info;
+    //sycl_hw_info hw_info;     \\ device id and aarch, currently not used
    optimize_feature opt_feature;
 };
@ -286,29 +286,6 @@ struct ggml_tensor_extra_gpu {
 void release_extra_gpu(ggml_tensor_extra_gpu * extra, std::vector<queue_ptr> streams={});
 inline optimize_feature check_gpu_optimize_feature(syclex::architecture &arch) {
    optimize_feature opt;
    opt.reorder =
        (arch == syclex::architecture::intel_gpu_dg1 ||
         arch == syclex::architecture::intel_gpu_acm_g10 ||
         arch == syclex::architecture::intel_gpu_acm_g11 ||
         arch == syclex::architecture::intel_gpu_acm_g12 ||
         arch == syclex::architecture::intel_gpu_pvc ||
         arch == syclex::architecture::intel_gpu_pvc_vg ||
         arch == syclex::architecture::intel_gpu_mtl_u ||
         arch == syclex::architecture::intel_gpu_mtl_s ||
         arch == syclex::architecture::intel_gpu_mtl_h ||
         arch == syclex::architecture::intel_gpu_arl_u ||
         arch == syclex::architecture::intel_gpu_arl_s ||
         arch == syclex::architecture::intel_gpu_arl_h ||
         arch == syclex::architecture::intel_gpu_bmg_g21 ||
         arch == syclex::architecture::intel_gpu_lnl_m
        );
    return opt;
 }
 namespace sycl_ex = sycl::ext::oneapi::experimental;
 struct ggml_backend_sycl_context {
    int device;
--- a/ggml/src/ggml-sycl/ggml-sycl.cpp
+++ b/ggml/src/ggml-sycl/ggml-sycl.cpp
@ -83,9 +83,7 @@ static ggml_sycl_device_info ggml_sycl_init() {
        info.devices[i].cc =
            100 * prop.get_major_version() + 10 * prop.get_minor_version();
-        info.devices[i].hw_info = get_device_hw_info(&device);
+        info.devices[i].opt_feature.reorder = !device.ext_oneapi_architecture_is(syclex::arch_category::intel_gpu);
        info.devices[i].opt_feature = check_gpu_optimize_feature(info.devices[i].hw_info.arch);
        info.max_work_group_sizes[i] = prop.get_max_work_group_size();
    }
@ -195,7 +193,7 @@ static void ggml_check_sycl() try {
    if (!initialized) {
        g_ggml_sycl_debug = get_sycl_env("GGML_SYCL_DEBUG", 0);
-        g_ggml_sycl_disable_optimize= get_sycl_env("GGML_SYCL_DISABLE_OPT", 1);
+        g_ggml_sycl_disable_optimize = get_sycl_env("GGML_SYCL_DISABLE_OPT", 0);
        g_ggml_sycl_disable_graph = get_sycl_env("GGML_SYCL_DISABLE_GRAPH", 1);
        g_ggml_sycl_disable_dnn = get_sycl_env("GGML_SYCL_DISABLE_DNN", 0);
        g_ggml_sycl_prioritize_dmmv = get_sycl_env("GGML_SYCL_PRIORITIZE_DMMV", 0);
--- a/ggml/src/ggml-sycl/sycl_hw.cpp
+++ b/ggml/src/ggml-sycl/sycl_hw.cpp
@ -1,6 +1,7 @@
 #include "sycl_hw.hpp"
-
+// TODO: currently not used
 /*
 sycl_hw_info get_device_hw_info(sycl::device *device_ptr) {
  sycl_hw_info res;
  int32_t id = device_ptr->get_info<sycl::ext::intel::info::device::device_id>();
@ -11,3 +12,4 @@ sycl_hw_info get_device_hw_info(sycl::device *device_ptr) {
  return res;
 }
 */
--- a/ggml/src/ggml-sycl/sycl_hw.hpp
+++ b/ggml/src/ggml-sycl/sycl_hw.hpp
@ -10,6 +10,8 @@
 namespace syclex = sycl::ext::oneapi::experimental;
 // TODO: currently not used
 /*
 struct sycl_hw_info {
  syclex::architecture arch;
  int32_t device_id;
@ -18,6 +20,7 @@ struct sycl_hw_info {
 bool is_in_vector(std::vector<int> &vec, int item);
 sycl_hw_info get_device_hw_info(sycl::device *device_ptr);
 */
 #endif // SYCL_HW_HPP