mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-08-28 19:18:57 -04:00
CUDA: faster q2_K, q3_K MMQ + int8 tensor cores (#7921)
* CUDA: faster q2_K, q3_K MMQ + int8 tensor cores * try CI fix * try CI fix * try CI fix * fix data race * rever q2_K precision related changes
This commit is contained in:
@@ -188,13 +188,15 @@ static ggml_cuda_device_info ggml_cuda_init() {
|
||||
info.default_tensor_split[id] = total_vram;
|
||||
total_vram += prop.totalGlobalMem;
|
||||
|
||||
info.devices[id].nsm = prop.multiProcessorCount;
|
||||
info.devices[id].smpb = prop.sharedMemPerBlock;
|
||||
#if defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)
|
||||
info.devices[id].smpbo = prop.sharedMemPerBlock;
|
||||
info.devices[id].cc = 100*prop.major + 10*prop.minor + CC_OFFSET_AMD;
|
||||
#else
|
||||
info.devices[id].smpbo = prop.sharedMemPerBlockOptin;
|
||||
info.devices[id].cc = 100*prop.major + 10*prop.minor;
|
||||
#endif // defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)
|
||||
info.devices[id].smpb = prop.sharedMemPerBlock;
|
||||
info.devices[id].nsm = prop.multiProcessorCount;
|
||||
}
|
||||
|
||||
for (int id = 0; id < info.device_count; ++id) {
|
||||
|
Reference in New Issue
Block a user