mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-07-06 19:43:51 +00:00
Vulkan: Add device architecture enum and logic to recognize AMD generations
This commit is contained in:
@ -149,6 +149,66 @@ static void ggml_vk_destroy_buffer(vk_buffer& buf);
|
|||||||
|
|
||||||
static constexpr uint32_t mul_mat_vec_max_cols = 8;
|
static constexpr uint32_t mul_mat_vec_max_cols = 8;
|
||||||
|
|
||||||
|
enum vk_device_architecture {
|
||||||
|
OTHER,
|
||||||
|
AMD_GCN,
|
||||||
|
AMD_RDNA1,
|
||||||
|
AMD_RDNA2,
|
||||||
|
AMD_RDNA3,
|
||||||
|
};
|
||||||
|
|
||||||
|
static vk_device_architecture get_device_architecture(const vk::PhysicalDevice& device) {
|
||||||
|
vk::PhysicalDeviceProperties props = device.getProperties();
|
||||||
|
|
||||||
|
if (props.vendorID == VK_VENDOR_ID_AMD) {
|
||||||
|
const std::vector<vk::ExtensionProperties> ext_props = device.enumerateDeviceExtensionProperties();
|
||||||
|
|
||||||
|
bool amd_shader_core_properties = false;
|
||||||
|
bool integer_dot_product = false;
|
||||||
|
bool subgroup_size_control = false;
|
||||||
|
|
||||||
|
for (const auto& properties : ext_props) {
|
||||||
|
if (strcmp("VK_AMD_shader_core_properties", properties.extensionName) == 0) {
|
||||||
|
amd_shader_core_properties = true;
|
||||||
|
} else if (strcmp("VK_KHR_shader_integer_dot_product", properties.extensionName) == 0) {
|
||||||
|
integer_dot_product = true;
|
||||||
|
} else if (strcmp("VK_EXT_subgroup_size_control", properties.extensionName) == 0) {
|
||||||
|
subgroup_size_control = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!amd_shader_core_properties || !integer_dot_product || !subgroup_size_control) {
|
||||||
|
return vk_device_architecture::OTHER;
|
||||||
|
}
|
||||||
|
|
||||||
|
vk::PhysicalDeviceProperties2 props2;
|
||||||
|
vk::PhysicalDeviceShaderCorePropertiesAMD shader_core_props_amd;
|
||||||
|
vk::PhysicalDeviceShaderIntegerDotProductPropertiesKHR integer_dot_props;
|
||||||
|
vk::PhysicalDeviceSubgroupSizeControlPropertiesEXT subgroup_size_control_props;
|
||||||
|
|
||||||
|
props2.pNext = &shader_core_props_amd;
|
||||||
|
shader_core_props_amd.pNext = &integer_dot_props;
|
||||||
|
integer_dot_props.pNext = &subgroup_size_control_props;
|
||||||
|
|
||||||
|
device.getProperties2(&props2);
|
||||||
|
|
||||||
|
if (subgroup_size_control_props.maxSubgroupSize == 64 && subgroup_size_control_props.minSubgroupSize == 64) {
|
||||||
|
return vk_device_architecture::AMD_GCN;
|
||||||
|
}
|
||||||
|
if (subgroup_size_control_props.maxSubgroupSize == 64 && subgroup_size_control_props.minSubgroupSize == 32) {
|
||||||
|
// RDNA
|
||||||
|
if (shader_core_props_amd.wavefrontsPerSimd == 20) {
|
||||||
|
return vk_device_architecture::AMD_RDNA1;
|
||||||
|
}
|
||||||
|
if (integer_dot_props.integerDotProduct4x8BitPackedMixedSignednessAccelerated) {
|
||||||
|
return vk_device_architecture::AMD_RDNA3;
|
||||||
|
}
|
||||||
|
return vk_device_architecture::AMD_RDNA2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return vk_device_architecture::OTHER;
|
||||||
|
}
|
||||||
|
|
||||||
struct vk_device_struct {
|
struct vk_device_struct {
|
||||||
std::mutex mutex;
|
std::mutex mutex;
|
||||||
|
|
||||||
@ -161,6 +221,7 @@ struct vk_device_struct {
|
|||||||
bool pipeline_robustness;
|
bool pipeline_robustness;
|
||||||
vk::Device device;
|
vk::Device device;
|
||||||
uint32_t vendor_id;
|
uint32_t vendor_id;
|
||||||
|
vk_device_architecture architecture;
|
||||||
vk_queue compute_queue;
|
vk_queue compute_queue;
|
||||||
vk_queue transfer_queue;
|
vk_queue transfer_queue;
|
||||||
bool single_queue;
|
bool single_queue;
|
||||||
@ -2219,7 +2280,7 @@ static void ggml_vk_load_shaders(vk_device& device) {
|
|||||||
device->need_compiles = false;
|
device->need_compiles = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool ggml_vk_khr_cooperative_matrix_support(const vk::PhysicalDeviceProperties& props, const vk::PhysicalDeviceDriverProperties& driver_props);
|
static bool ggml_vk_khr_cooperative_matrix_support(const vk::PhysicalDeviceProperties& props, const vk::PhysicalDeviceDriverProperties& driver_props, vk_device_architecture arch);
|
||||||
|
|
||||||
static vk_device ggml_vk_get_device(size_t idx) {
|
static vk_device ggml_vk_get_device(size_t idx) {
|
||||||
VK_LOG_DEBUG("ggml_vk_get_device(" << idx << ")");
|
VK_LOG_DEBUG("ggml_vk_get_device(" << idx << ")");
|
||||||
@ -2248,6 +2309,8 @@ static vk_device ggml_vk_get_device(size_t idx) {
|
|||||||
device->physical_device = physical_devices[dev_num];
|
device->physical_device = physical_devices[dev_num];
|
||||||
const std::vector<vk::ExtensionProperties> ext_props = device->physical_device.enumerateDeviceExtensionProperties();
|
const std::vector<vk::ExtensionProperties> ext_props = device->physical_device.enumerateDeviceExtensionProperties();
|
||||||
|
|
||||||
|
device->architecture = get_device_architecture(device->physical_device);
|
||||||
|
|
||||||
bool fp16_storage = false;
|
bool fp16_storage = false;
|
||||||
bool fp16_compute = false;
|
bool fp16_compute = false;
|
||||||
bool maintenance4_support = false;
|
bool maintenance4_support = false;
|
||||||
@ -2257,7 +2320,6 @@ static vk_device ggml_vk_get_device(size_t idx) {
|
|||||||
bool coopmat2_support = false;
|
bool coopmat2_support = false;
|
||||||
device->coopmat_support = false;
|
device->coopmat_support = false;
|
||||||
|
|
||||||
// Check if maintenance4 is supported
|
|
||||||
for (const auto& properties : ext_props) {
|
for (const auto& properties : ext_props) {
|
||||||
if (strcmp("VK_KHR_maintenance4", properties.extensionName) == 0) {
|
if (strcmp("VK_KHR_maintenance4", properties.extensionName) == 0) {
|
||||||
maintenance4_support = true;
|
maintenance4_support = true;
|
||||||
@ -2370,7 +2432,7 @@ static vk_device ggml_vk_get_device(size_t idx) {
|
|||||||
|
|
||||||
device->fp16 = !force_disable_f16 && fp16_storage && fp16_compute;
|
device->fp16 = !force_disable_f16 && fp16_storage && fp16_compute;
|
||||||
|
|
||||||
if (!ggml_vk_khr_cooperative_matrix_support(device->properties, driver_props)) {
|
if (!ggml_vk_khr_cooperative_matrix_support(device->properties, driver_props, device->architecture)) {
|
||||||
device->coopmat_support = false;
|
device->coopmat_support = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2776,7 +2838,9 @@ static void ggml_vk_print_gpu_info(size_t idx) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!ggml_vk_khr_cooperative_matrix_support(props2.properties, driver_props)) {
|
const vk_device_architecture device_architecture = get_device_architecture(physical_device);
|
||||||
|
|
||||||
|
if (!ggml_vk_khr_cooperative_matrix_support(props2.properties, driver_props, device_architecture)) {
|
||||||
coopmat_support = false;
|
coopmat_support = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -8435,7 +8499,7 @@ static bool ggml_vk_instance_portability_enumeration_ext_available(const std::ve
|
|||||||
UNUSED(instance_extensions);
|
UNUSED(instance_extensions);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool ggml_vk_khr_cooperative_matrix_support(const vk::PhysicalDeviceProperties& props, const vk::PhysicalDeviceDriverProperties& driver_props) {
|
static bool ggml_vk_khr_cooperative_matrix_support(const vk::PhysicalDeviceProperties& props, const vk::PhysicalDeviceDriverProperties& driver_props, vk_device_architecture arch) {
|
||||||
switch (props.vendorID) {
|
switch (props.vendorID) {
|
||||||
case VK_VENDOR_ID_INTEL:
|
case VK_VENDOR_ID_INTEL:
|
||||||
// Intel drivers don't support coopmat properly yet
|
// Intel drivers don't support coopmat properly yet
|
||||||
@ -8443,10 +8507,7 @@ static bool ggml_vk_khr_cooperative_matrix_support(const vk::PhysicalDevicePrope
|
|||||||
case VK_VENDOR_ID_AMD:
|
case VK_VENDOR_ID_AMD:
|
||||||
if (driver_props.driverID == vk::DriverId::eAmdProprietary || driver_props.driverID == vk::DriverId::eAmdOpenSource) {
|
if (driver_props.driverID == vk::DriverId::eAmdProprietary || driver_props.driverID == vk::DriverId::eAmdOpenSource) {
|
||||||
// Workaround for AMD proprietary driver reporting support on all GPUs
|
// Workaround for AMD proprietary driver reporting support on all GPUs
|
||||||
const std::string name = props.deviceName;
|
return arch == vk_device_architecture::AMD_RDNA3;
|
||||||
return name.rfind("AMD Radeon RX 7", 0) == 0 || name.rfind("AMD Radeon(TM) RX 7", 0) == 0 || // RDNA 3 consumer GPUs
|
|
||||||
name.rfind("AMD Radeon PRO W7", 0) == 0 || name.rfind("AMD Radeon(TM) PRO W7", 0) == 0 || // RDNA 3 workstation GPUs
|
|
||||||
name.rfind("AMD Radeon 7", 0) == 0 || name.rfind("AMD Radeon(TM) 7", 0) == 0; // RDNA 3 APUs
|
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
default:
|
default:
|
||||||
|
Reference in New Issue
Block a user