server: Add "tokens per second" information in the backend (#10548)

* add cmake rvv support * add timings * remove space * update readme * fix * fix code * remove empty line * add test --------- Co-authored-by: Xuan Son Nguyen <son@huggingface.co>
2025-08-18 05:56:00 -04:00 · 2024-12-02 21:45:54 +08:00
parent 991f8aabee
commit 64ed2091b2
5 changed files with 44 additions and 1 deletions
--- a/common/common.h
+++ b/common/common.h
@@ -133,6 +133,7 @@ struct common_params_sampling {
    bool    penalize_nl        = false; // consider newlines as a repeatable token
    bool    ignore_eos         = false;
    bool    no_perf            = false; // disable performance metrics
+    bool    timing_per_token   = false;

    std::vector<std::string> dry_sequence_breakers = {"\n", ":", "\"", "*"};     // default sequence breakers for DRY