mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-29 12:35:16 +00:00
CANN: Simplify the environment variable setting(#13104)
* Simplify the environment variable setting to specify the memory pool type. * Adjust the GGML_CANN_ASYNC_MODE setting to accept yes, enable, 1, or on (case-insensitive) as valid options. * update * fix CI * update * delete whitespace * fix according to review * update CANN.md * update CANN.md
This commit is contained in:
@ -8,6 +8,7 @@
|
|||||||
- [DataType Supports](#datatype-supports)
|
- [DataType Supports](#datatype-supports)
|
||||||
- [Docker](#docker)
|
- [Docker](#docker)
|
||||||
- [Linux](#linux)
|
- [Linux](#linux)
|
||||||
|
- [Environment variable setup](#environment-variable-setup)
|
||||||
- [TODO](#todo)
|
- [TODO](#todo)
|
||||||
|
|
||||||
|
|
||||||
@ -290,5 +291,24 @@ Authors from Peking University: Bizhao Shi (bshi@pku.edu.cn), Yuxin Yang (yxyang
|
|||||||
|
|
||||||
We would like to thank Tuo Dai, Shanni Li, and all of the project maintainers from Huawei Technologies Co., Ltd for their help during the code development and pull request.
|
We would like to thank Tuo Dai, Shanni Li, and all of the project maintainers from Huawei Technologies Co., Ltd for their help during the code development and pull request.
|
||||||
|
|
||||||
|
## Environment variable setup
|
||||||
|
|
||||||
|
### GGML_CANN_ASYNC_MODE
|
||||||
|
|
||||||
|
Enables asynchronous operator submission. Disabled by default.
|
||||||
|
|
||||||
|
### GGML_CANN_MEM_POOL
|
||||||
|
|
||||||
|
Specifies the memory pool management strategy:
|
||||||
|
|
||||||
|
- vmm: Utilizes a virtual memory manager pool. If hardware support for VMM is unavailable, falls back to the legacy (leg) memory pool.
|
||||||
|
|
||||||
|
- prio: Employs a priority queue-based memory pool management.
|
||||||
|
- leg: Uses a fixed-size buffer pool.
|
||||||
|
|
||||||
|
### GGML_CANN_DISABLE_BUF_POOL_CLEAN
|
||||||
|
|
||||||
|
Controls automatic cleanup of the memory pool. This option is only effective when using the prio or leg memory pool strategies.
|
||||||
|
|
||||||
## TODO
|
## TODO
|
||||||
- Support more models and data types.
|
- Support more models and data types.
|
||||||
|
@ -37,6 +37,7 @@
|
|||||||
#include <thread>
|
#include <thread>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
|
#include <optional>
|
||||||
|
|
||||||
#include "../include/ggml-cann.h"
|
#include "../include/ggml-cann.h"
|
||||||
#include "../include/ggml.h"
|
#include "../include/ggml.h"
|
||||||
@ -103,6 +104,9 @@ const ggml_cann_device_info& ggml_cann_info();
|
|||||||
void ggml_cann_set_device(int32_t device);
|
void ggml_cann_set_device(int32_t device);
|
||||||
int32_t ggml_cann_get_device();
|
int32_t ggml_cann_get_device();
|
||||||
|
|
||||||
|
std::optional<std::string> get_env(const std::string& name);
|
||||||
|
bool parse_bool(const std::string& value);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Abstract base class for memory pools used by CANN.
|
* @brief Abstract base class for memory pools used by CANN.
|
||||||
*/
|
*/
|
||||||
@ -354,7 +358,8 @@ struct ggml_backend_cann_context {
|
|||||||
: device(device), name("CANN" + std::to_string(device)), task_queue(1024, device) {
|
: device(device), name("CANN" + std::to_string(device)), task_queue(1024, device) {
|
||||||
ggml_cann_set_device(device);
|
ggml_cann_set_device(device);
|
||||||
description = aclrtGetSocName();
|
description = aclrtGetSocName();
|
||||||
async_mode = (getenv("GGML_CANN_ASYNC_MODE") != nullptr);
|
|
||||||
|
bool async_mode = parse_bool(get_env("GGML_CANN_ASYNC_MODE").value_or(""));
|
||||||
GGML_LOG_INFO("%s: device %d async operator submission is %s\n", __func__,
|
GGML_LOG_INFO("%s: device %d async operator submission is %s\n", __func__,
|
||||||
device, async_mode ? "ON" : "OFF");
|
device, async_mode ? "ON" : "OFF");
|
||||||
}
|
}
|
||||||
|
@ -31,6 +31,8 @@
|
|||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <queue>
|
#include <queue>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
|
#include <unordered_set>
|
||||||
|
#include <optional>
|
||||||
|
|
||||||
#include "ggml-impl.h"
|
#include "ggml-impl.h"
|
||||||
#include "ggml-backend-impl.h"
|
#include "ggml-backend-impl.h"
|
||||||
@ -93,6 +95,26 @@ int32_t ggml_cann_get_device() {
|
|||||||
return id;
|
return id;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Get the value of the specified environment variable (name).
|
||||||
|
* if not empty, return a std::string object
|
||||||
|
*/
|
||||||
|
std::optional<std::string> get_env(const std::string& name) {
|
||||||
|
const char* val = std::getenv(name.c_str());
|
||||||
|
if (!val) return std::nullopt;
|
||||||
|
std::string res = std::string(val);
|
||||||
|
std::transform(res.begin(), res.end(), res.begin(), ::tolower);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Verify whether the environment variable is a valid value.
|
||||||
|
*/
|
||||||
|
bool parse_bool(const std::string& value) {
|
||||||
|
std::unordered_set<std::string> valid_values = {"on", "1", "yes", "y", "enable", "true"};
|
||||||
|
return valid_values.find(value) != valid_values.end();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Initialize the CANN device information.
|
* @brief Initialize the CANN device information.
|
||||||
*
|
*
|
||||||
@ -214,7 +236,7 @@ struct ggml_cann_pool_buf_prio : public ggml_cann_pool {
|
|||||||
* @param device The device ID to associate with this buffer pool.
|
* @param device The device ID to associate with this buffer pool.
|
||||||
*/
|
*/
|
||||||
explicit ggml_cann_pool_buf_prio(int device) : device(device) {
|
explicit ggml_cann_pool_buf_prio(int device) : device(device) {
|
||||||
disable_clean = getenv("GGML_CANN_DISABLE_BUF_POOL_CLEAN") != nullptr;
|
disable_clean = parse_bool(get_env("GGML_CANN_DISABLE_BUF_POOL_CLEAN").value_or(""));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -410,7 +432,7 @@ struct ggml_cann_pool_buf : public ggml_cann_pool {
|
|||||||
* @param device The device ID to associate with this buffer pool.
|
* @param device The device ID to associate with this buffer pool.
|
||||||
*/
|
*/
|
||||||
explicit ggml_cann_pool_buf(int device) : device(device) {
|
explicit ggml_cann_pool_buf(int device) : device(device) {
|
||||||
disable_clean = getenv("GGML_CANN_DISABLE_BUF_POOL_CLEAN") != nullptr;
|
disable_clean = parse_bool(get_env("GGML_CANN_DISABLE_BUF_POOL_CLEAN").value_or(""));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -731,16 +753,18 @@ struct ggml_cann_pool_vmm : public ggml_cann_pool {
|
|||||||
*/
|
*/
|
||||||
std::unique_ptr<ggml_cann_pool> ggml_backend_cann_context::new_pool_for_device(
|
std::unique_ptr<ggml_cann_pool> ggml_backend_cann_context::new_pool_for_device(
|
||||||
int device) {
|
int device) {
|
||||||
bool disable_vmm = (getenv("GGML_CANN_DISABLE_VMM_POOL") != nullptr);
|
std::string mem_pool_type = get_env("GGML_CANN_MEM_POOL").value_or("");
|
||||||
if (!disable_vmm && ggml_cann_info().devices[device].vmm) {
|
|
||||||
GGML_LOG_INFO("%s: device %d use vmm pool\n", __func__, device);
|
if (mem_pool_type == "prio") {
|
||||||
return std::unique_ptr<ggml_cann_pool>(new ggml_cann_pool_vmm(device));
|
|
||||||
}
|
|
||||||
bool enable_buf_prio = (getenv("GGML_CANN_ENABLE_BUF_PRIO_POOL") != nullptr);
|
|
||||||
if (enable_buf_prio) {
|
|
||||||
GGML_LOG_INFO("%s: device %d use buffer pool with priority queue\n", __func__, device);
|
GGML_LOG_INFO("%s: device %d use buffer pool with priority queue\n", __func__, device);
|
||||||
return std::unique_ptr<ggml_cann_pool>(new ggml_cann_pool_buf_prio(device));
|
return std::unique_ptr<ggml_cann_pool>(new ggml_cann_pool_buf_prio(device));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (ggml_cann_info().devices[device].vmm && mem_pool_type != "leg") {
|
||||||
|
GGML_LOG_INFO("%s: device %d use vmm pool\n", __func__, device);
|
||||||
|
return std::unique_ptr<ggml_cann_pool>(new ggml_cann_pool_vmm(device));
|
||||||
|
}
|
||||||
|
|
||||||
GGML_LOG_INFO("%s: device %d use buffer pool\n", __func__, device);
|
GGML_LOG_INFO("%s: device %d use buffer pool\n", __func__, device);
|
||||||
return std::unique_ptr<ggml_cann_pool>(new ggml_cann_pool_buf(device));
|
return std::unique_ptr<ggml_cann_pool>(new ggml_cann_pool_buf(device));
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user