server : add /detokenize endpoint (#2802)

* Add a /detokenize endpoint to the example server * remove trailing white-space
2025-06-27 20:05:20 +00:00 · 2023-08-26 16:11:45 -07:00
parent 730d9c681e
commit c1ac54b77a
2 changed files with 27 additions and 0 deletions
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -1104,6 +1104,12 @@ static json format_tokenizer_response(const std::vector<llama_token> &tokens)
        {"tokens", tokens}};
 }

+static json format_detokenized_response(std::string content)
+{
+    return json{
+        {"content", content}};
+}
+
 template <typename T>
 static T json_value(const json &body, const std::string &key, const T &default_value)
 {
@ -1501,6 +1507,21 @@ int main(int argc, char **argv)
        const json data = format_tokenizer_response(tokens);
        return res.set_content(data.dump(), "application/json"); });

+    svr.Post("/detokenize", [&llama](const Request &req, Response &res)
+             {
+        auto lock = llama.lock();
+
+        const json body = json::parse(req.body);
+        std::string content;
+        if (body.count("tokens") != 0)
+        {
+            const std::vector<llama_token> tokens = body["tokens"];
+            content = tokens_to_str(llama.ctx, tokens.cbegin(), tokens.cend());
+        }
+
+        const json data = format_detokenized_response(content);
+        return res.set_content(data.dump(), "application/json"); });
+
    svr.Post("/embedding", [&llama](const Request &req, Response &res)
             {
        auto lock = llama.lock();