Introduce C-style API (#370)

* Major refactoring - introduce C-style API * Clean up * Add <cassert> * Add <iterator> * Add <algorithm> .... * Fix timing reporting and accumulation * Measure eval time only for single-token calls * Change llama_tokenize return meaning
2025-08-17 21:51:27 -04:00 · 2023-03-22 07:32:36 +02:00
parent da0e9fe90c
commit f5a77a629b
14 changed files with 1954 additions and 1752 deletions
--- a/convert-pth-to-ggml.py
+++ b/convert-pth-to-ggml.py
@@ -148,7 +148,7 @@ def main():
        model = torch.load(fname_model, map_location="cpu")

        with open(fname_out, "wb") as fout:
-            fout.write(struct.pack("i", hparams["vocab_size"]))
+            write_header(fout, hparams, ftype)
            write_tokens(fout, tokenizer)

        del model