mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-08-17 13:40:55 -04:00
Vulkan Mixture of Experts (MoE) support (#7628)
* Finish Vulkan mul_mat_id implementation * Add Vulkan sum_rows and div ops * Fix MUL_MAT_ID matrix matrix shader * Fix MUL_MAT_ID matrix vector shader dispatch size * Fix MUL_MAT_ID matrix vector shader and dispatch code * Update Vulkan CPU offload for MUL_MAT_ID * Fix crash when using split mode none and setting a main GPU
This commit is contained in:
@@ -1002,9 +1002,9 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
|
||||
return true;
|
||||
}
|
||||
params.main_gpu = std::stoi(argv[i]);
|
||||
#ifndef GGML_USE_CUDA_SYCL
|
||||
fprintf(stderr, "warning: llama.cpp was compiled without CUDA/SYCL. Setting the main GPU has no effect.\n");
|
||||
#endif // GGML_USE_CUDA_SYCL
|
||||
#ifndef GGML_USE_CUDA_SYCL_VULKAN
|
||||
fprintf(stderr, "warning: llama.cpp was compiled without CUDA/SYCL/Vulkan. Setting the main GPU has no effect.\n");
|
||||
#endif // GGML_USE_CUDA_SYCL_VULKAN
|
||||
return true;
|
||||
}
|
||||
if (arg == "--split-mode" || arg == "-sm") {
|
||||
@@ -1030,9 +1030,9 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
|
||||
invalid_param = true;
|
||||
return true;
|
||||
}
|
||||
#ifndef GGML_USE_CUDA_SYCL
|
||||
fprintf(stderr, "warning: llama.cpp was compiled without CUDA/SYCL. Setting the split mode has no effect.\n");
|
||||
#endif // GGML_USE_CUDA_SYCL
|
||||
#ifndef GGML_USE_CUDA_SYCL_VULKAN
|
||||
fprintf(stderr, "warning: llama.cpp was compiled without CUDA/SYCL/Vulkan. Setting the split mode has no effect.\n");
|
||||
#endif // GGML_USE_CUDA_SYCL_VULKAN
|
||||
return true;
|
||||
}
|
||||
if (arg == "--tensor-split" || arg == "-ts") {
|
||||
|
Reference in New Issue
Block a user