mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-08-08 09:57:45 -04:00
cuda : add f32 to bf16 copy op (#12806)
This allows BF16 KV-cache on CUDA.
This commit is contained in:
@@ -3079,6 +3079,9 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
|
||||
if (src0_type == GGML_TYPE_F32 && src1_type == GGML_TYPE_F32) {
|
||||
return true;
|
||||
}
|
||||
if (src0_type == GGML_TYPE_F32 && src1_type == GGML_TYPE_BF16) {
|
||||
return true;
|
||||
}
|
||||
if (src0_type == GGML_TYPE_F32 && src1_type == GGML_TYPE_F16) {
|
||||
return true;
|
||||
}
|
||||
|
Reference in New Issue
Block a user