imatrix: add option to display importance score statistics for a given imatrix file (#12718)

* Add --show-statistics option

* Add --show-statistics logic

* Add tensor name parsing

* Tidy output format

* Fix typo in title

* Improve tensor influence ranking

* Add better statistics

* Change statistics' sort order

* Add Cosine Similarity

* Add header search path

* Change header search path to private

* Add weighted statistics per layer

* Update report title

* Refactor compute_statistics out of main

* Refactor compute_cossim out of load_imatrix

* Refactor compute_statistics out of load_imatrix

* Move imatrix statistics calculation into its own functions

* Add checks and validations

* Remove unnecessary include directory

* Rename labels

* Add m_stats getter and refactor compute_statistics out of load_imatrix

* Refactor variable names

* Minor cosmetic change

* Retrigger checks (empty commit)

* Rerun checks (empty commit)

* Fix unnecessary type promotion

Co-authored-by: compilade <git@compilade.net>

* Reverting change to improve code readability

* Rerun checks (empty commit)

* Rerun checks (empty commit)

* Rerun checks - third time's the Charm 🤞 (empty commit)

* Minor cosmetic change

* Update README

* Fix typo

* Update README

* Rerun checks (empty commit)

* Re-implement changes on top of #9400

* Update README.md

* Update README

* Update README.md

Co-authored-by: compilade <git@compilade.net>

* Update README.md

Co-authored-by: compilade <git@compilade.net>

* Update README.md

* Remove duplicate option in print_usage()

* Update README.md

* Update README.md

Co-authored-by: compilade <git@compilade.net>

* Update README.md

Co-authored-by: compilade <git@compilade.net>

* Remove input check

* Remove commented out code

---------

Co-authored-by: compilade <git@compilade.net>
This commit is contained in:
Ed Addario
2025-07-22 13:33:37 +01:00
committed by Aaron Teo
parent 888b75ba61
commit 45fc00e2c0
4 changed files with 339 additions and 22 deletions

View File

@@ -16,6 +16,8 @@
#include <fstream>
#include <unordered_map>
#include <map>
#include <regex>
#include <numeric>
#if defined(_MSC_VER)
#pragma warning(disable: 4244 4267) // possible loss of data
@@ -24,10 +26,10 @@
static void print_usage(int, char ** argv) {
LOG("\nexample usage:\n");
LOG("\n %s \\\n"
" -m model.gguf -f some-text.txt [-o imatrix.gguf] [--process-output] \\\n"
" [--no-ppl] [--chunk 123] [--output-frequency 10] [--save-frequency 0] \\\n"
" [--in-file imatrix-prev-0.gguf --in-file imatrix-prev-1.gguf ...] \\\n"
" [--parse-special]\n" , argv[0]);
" -m model.gguf -f some-text.txt [-o imatrix.gguf] [--no-ppl] \\\n"
" [--process-output] [--chunk 123] [--save-frequency 0] [--output-frequency 10] \\\n"
" [--in-file imatrix-prev-0.gguf --in-file imatrix-prev-1.gguf ...] [--parse-special] \\\n"
" [--show-statistics] [...]\n" , argv[0]);
LOG("\n");
}
@@ -40,6 +42,21 @@ struct Stats {
std::vector<int64_t> counts;
};
struct tensor_statistics {
std::string tensor;
Stats stats;
float total_sqract = 0.0f;
float mean_sqract = 0.0f;
float max_sqract = 0.0f;
float min_sqract = 0.0f;
int elements = 0;
float stddev = 0.0f;
float active = 0.0f;
float entropy = 0.0f;
float zd = 0.0f;
float cossim = 0.0f;
};
class IMatrixCollector {
public:
IMatrixCollector() = default;
@@ -49,6 +66,7 @@ public:
void save_imatrix(int32_t n_chunk = -1) const;
bool load_imatrix_legacy(const char * fname);
bool load_imatrix(const char * file_name);
const std::unordered_map<std::string, Stats> & get_mstats() const { return m_stats; }
private:
std::unordered_map<std::string, Stats> m_stats;
common_params m_params;
@@ -78,6 +96,126 @@ static std::string filter_tensor_name(const char * name) {
return wname;
}
static void process_tensor_name(const std::string & input, std::string & layer, std::string & tensor) {
std::vector<std::string> name;
std::istringstream stream(input);
std::string item;
while (std::getline(stream, item, '.')) {
name.push_back(item);
}
for (size_t i = 0; i < name.size(); ++i) {
if (name[i] == "blk" && i + 1 < name.size()) {
layer = name[i + 1];
break;
}
}
for (size_t i = 0; i < name.size(); ++i) {
if (name[i] == "weight" && i > 0) {
tensor = name[i - 1];
break;
}
}
if (tensor.empty()) {
tensor = input;
}
if (layer.empty()) {
layer = "-";
}
}
static void compute_statistics(std::vector<tensor_statistics> & tstats, const std::string & name, const Stats & e) {
if (e.values.size() % e.counts.size() != 0) {
LOG_ERR("%s: activation size mismatch for tensor %s (%zu vs %zu)\n", __func__, name.c_str(), e.counts.size(), e.values.size());
return;
}
if (e.counts.empty()) {
LOG_ERR("%s: there are no activations for tensor %s. The imatrix may be suboptimal\n", __func__, name.c_str());
return;
}
const int n_mat = e.counts.size();
const int row_size = e.values.size() / n_mat;
std::vector<float> activations;
activations.reserve(e.values.size());
for (int i = 0; i < n_mat; ++i) {
for (int j = 0; j < row_size; ++j) {
activations.push_back(e.values[i*row_size + j] / e.counts[i]);
}
}
const float act_total = std::accumulate(activations.begin(), activations.end(), 0.0f);
const float act_max = *std::max_element(activations.begin(), activations.end());
const float act_min = *std::min_element(activations.begin(), activations.end());
const float act_mean = act_total / activations.size();
const float act_sqr_total = std::inner_product(activations.begin(), activations.end(), activations.begin(), 0.0f);
const float act_var = (act_sqr_total / activations.size()) - (act_mean * act_mean);
const float act_dev = std::sqrt(std::max(0.0f, act_var));
float threshold = 1e-5f;
const int inactive_count = std::count_if(activations.begin(), activations.end(),
[threshold](const float v) { return fabsf(v) <= threshold; });
const float active_ratio = 1 - static_cast<float>(inactive_count) / activations.size();
float entropy = 0;
if (act_total > 0) {
for (const auto act : activations) {
if (const float p = act / act_total; p > 0) {
entropy -= p * std::log2(p);
}
}
}
int z_score = 0;
if (act_dev > 0.0f) {
for (const auto act : activations) {
if (const float p = (act - act_mean) / act_dev; p > 1) {
z_score++;
}
}
}
auto & ts = tstats.emplace_back();
ts.tensor = name;
ts.stats = e;
ts.total_sqract = act_total;
ts.mean_sqract = act_mean;
ts.max_sqract = act_max;
ts.min_sqract = act_min;
ts.elements = static_cast<int>(activations.size());
ts.stddev = act_dev;
ts.active = active_ratio;
ts.entropy = entropy;
ts.zd = static_cast<float>(z_score) / ts.elements;
}
static void compute_cossim(std::vector<tensor_statistics> & tstats) {
static const std::regex pattern(R"(blk\.(\d+)\.)");
for (auto & ts : tstats) {
if (std::smatch match; std::regex_search(ts.tensor, match, pattern)) {
const int blk = std::stoi(match[1]);
std::string tname(ts.tensor);
tname.replace(match.position(1), match.length(1), std::to_string(blk-1));
auto prev = std::find_if(tstats.begin(), tstats.end(),
[tname](const tensor_statistics & t) { return t.tensor == tname; });
if (prev != tstats.end()) {
const float dp = std::inner_product(ts.stats.values.begin(), ts.stats.values.end(),
prev->stats.values.begin(), 0.0f);
const float curr_mag = std::sqrt(std::inner_product(ts.stats.values.begin(), ts.stats.values.end(),
ts.stats.values.begin(), 0.0f));
const float prev_mag = std::sqrt(std::inner_product(prev->stats.values.begin(), prev->stats.values.end(),
prev->stats.values.begin(), 0.0f));
const float cs = dp / (curr_mag * prev_mag);
ts.cossim = cs;
}
} else {
ts.cossim = 0;
}
}
}
bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * user_data) {
GGML_UNUSED(user_data);
@@ -678,7 +816,6 @@ static bool ik_collect_imatrix(struct ggml_tensor * t, bool ask, void * user_dat
return g_collector.collect_imatrix(t, ask, user_data);
}
struct results_log_softmax {
double log_softmax;
float logit;
@@ -926,6 +1063,113 @@ static bool compute_imatrix(llama_context * ctx, const common_params & params, c
return true;
}
static bool show_statistics(const common_params & params) {
std::vector<tensor_statistics> ts;
if (params.in_files.empty() || params.in_files.size() > 1) {
LOG_ERR("\nError: a single imatrix file is required to compute tensor statistics\n\n");
return false;
}
if (g_collector.load_imatrix(params.in_files[0].c_str())) {
for (const auto & [name, stats] :g_collector.get_mstats()) {
compute_statistics(ts, name, stats);
}
} else {
LOG_ERR("\nError: %s is not a valid imatrix file\n\n", params.in_files[0].c_str());
return false;
}
if (!ts.empty()) {
compute_cossim(ts);
} else {
LOG_ERR("Error: cannot compute statistics for %s\n\n", params.in_files[0].c_str());
return false;
}
struct tensor_comparer {
bool operator()(const tensor_statistics & a, const tensor_statistics & b) const {
std::string layer, name_a, name_b;
;
process_tensor_name(a.tensor, layer, name_a);
process_tensor_name(b.tensor, layer, name_b);
return name_a < name_b || (name_a == name_b && a.total_sqract > b.total_sqract);
}
};
std::sort(ts.begin(), ts.end(), tensor_comparer());
struct weighted_stats {
float weighted_bias = 0.0f;
float weighted_zd = 0.0f;
float weighted_cossim = 0.0f;
int total_elements = 0;
};
std::map<int, weighted_stats> ws;
LOG_INF("\nComputing statistics for %s (%d tensors)\n", params.in_files[0].c_str(), static_cast<int>(ts.size()));
LOG_INF("\n%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n", " Layer", " Tensor", " Σ(Act²)",
" Min", " Max", " μ", " σ", " % Active", "N", " Entropy", "E (norm)", "ZD",
" CosSim");
LOG_INF(
"=============================================================================================================="
"===========================================================\n");
for (const auto & tstat : ts) {
std::string layer, name;
process_tensor_name(tstat.tensor, layer, name);
int blk;
try {
blk = std::stoi(layer);
} catch (const std::exception & e) {
blk = -1; // not a block layer
}
LOG_INF("%5s\t%-20s\t%10.2f\t%8.4f\t%11.4f\t%6.2f\t%6.2f\t%8.2f%%\t%6d\t%10.4f\t%6.2f%%\t%10.2f%%\t%8.4f\n",
layer.c_str(), name.c_str(), tstat.total_sqract, tstat.min_sqract, tstat.max_sqract, tstat.mean_sqract,
tstat.stddev, tstat.active * 100.0f, tstat.elements, tstat.entropy,
100.0f * (tstat.entropy / std::log2(tstat.elements)), 100.0f * tstat.zd, tstat.cossim);
const float weighted_bias = tstat.elements * tstat.total_sqract;
const float weighted_zd = tstat.elements * tstat.zd;
const float weighted_cossim = tstat.elements * tstat.cossim;
if (ws.find(blk) != ws.end()) {
ws[blk].weighted_bias += weighted_bias;
ws[blk].weighted_zd += weighted_zd;
ws[blk].weighted_cossim += weighted_cossim;
ws[blk].total_elements += tstat.elements;
} else {
weighted_stats temp_ws;
temp_ws.weighted_bias = weighted_bias;
temp_ws.weighted_zd = weighted_zd;
temp_ws.weighted_cossim = weighted_cossim;
temp_ws.total_elements = tstat.elements;
ws[blk] = temp_ws;
}
}
const int layers = std::count_if(ws.begin(), ws.end(), [](const auto & kv) { return kv.first >= 0; });
LOG_INF("\nComputing weighted average statistics per layer (%d layers)\n", layers);
LOG_INF("\n%s\t%s\t%s\t%s\n", " Layer", " μΣ(Act²)", " μZD", "μCosSim");
LOG_INF("================================================\n");
for (const auto & [first, second] : ws) {
const auto & layer = first;
const auto & stats = second;
if (stats.total_elements == 0) {
continue;
}
if (layer >= 0) {
const float bias = stats.weighted_bias / stats.total_elements;
const float zd = stats.weighted_zd / stats.total_elements;
const float cossim = stats.weighted_cossim / stats.total_elements;
LOG_INF("%5d\t%14.2f\t%10.4f%%\t%6.4f\n", layer, bias, 100.0f * zd, cossim);
}
}
LOG_INF("\n");
return true;
}
int main(int argc, char ** argv) {
common_params params;
@@ -938,6 +1182,13 @@ int main(int argc, char ** argv) {
return 1;
}
if (params.show_statistics) {
if (!show_statistics(params)) {
return 1;
}
return 0;
}
common_init();
const int32_t n_ctx = params.n_ctx;