From 96ca6e8d23c046aae5df8fc2a534e15ff18398cd Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 19 Mar 2025 10:48:42 +0200 Subject: [PATCH] swift : adapt to new API --- .../llama.cpp.swift/LibLlama.swift | 64 +++++++------------ 1 file changed, 24 insertions(+), 40 deletions(-) diff --git a/examples/llama.swiftui/llama.cpp.swift/LibLlama.swift b/examples/llama.swiftui/llama.cpp.swift/LibLlama.swift index f6e31abc9..d04c6353e 100644 --- a/examples/llama.swiftui/llama.cpp.swift/LibLlama.swift +++ b/examples/llama.swiftui/llama.cpp.swift/LibLlama.swift @@ -5,35 +5,19 @@ enum LlamaError: Error { case couldNotInitializeContext } -func llama_batch_clear(_ batch: inout llama_batch) { - batch.n_tokens = 0 -} - -func llama_batch_add(_ batch: inout llama_batch, _ id: llama_token, _ pos: llama_pos, _ seq_ids: [llama_seq_id], _ logits: Bool) { - batch.token [Int(batch.n_tokens)] = id - batch.pos [Int(batch.n_tokens)] = pos - batch.n_seq_id[Int(batch.n_tokens)] = Int32(seq_ids.count) - for i in 0.. - private var batch: llama_batch + private var batch: OpaquePointer private var tokens_list: [llama_token] var is_done: Bool = false /// This variable is used to store temporarily invalid cchars private var temporary_invalid_cchars: [CChar] - var n_len: Int32 = 1024 + var n_len: Int32 = 128 var n_cur: Int32 = 0 var n_decode: Int32 = 0 @@ -42,7 +26,7 @@ actor LlamaContext { self.model = model self.context = context self.tokens_list = [] - self.batch = llama_batch_init(512, 0, 1) + self.batch = llama_batch_ext_init(512, 1) self.temporary_invalid_cchars = [] let sparams = llama_sampler_chain_default_params() self.sampling = llama_sampler_chain_init(sparams) @@ -53,7 +37,7 @@ actor LlamaContext { deinit { llama_sampler_free(sampling) - llama_batch_free(batch) + llama_batch_ext_free(batch) llama_model_free(model) llama_free(context) llama_backend_free() @@ -111,7 +95,7 @@ actor LlamaContext { } func get_n_tokens() -> Int32 { - return batch.n_tokens; + return llama_batch_ext_get_n_tokens(batch) } func completion_init(text: String) { @@ -133,25 +117,25 @@ actor LlamaContext { print(String(cString: token_to_piece(token: id) + [0])) } - llama_batch_clear(&batch) + llama_batch_ext_clear(batch) for i1 in 0.. String { var new_token_id: llama_token = 0 - new_token_id = llama_sampler_sample(sampling, context, batch.n_tokens - 1) + new_token_id = llama_sampler_sample(sampling, context, llama_batch_ext_get_n_tokens(batch) - 1) if llama_vocab_is_eog(vocab, new_token_id) || n_cur == n_len { print("\n") @@ -178,13 +162,13 @@ actor LlamaContext { print(new_token_str) // tokens_list.append(new_token_id) - llama_batch_clear(&batch) - llama_batch_add(&batch, new_token_id, n_cur, [0], true) + llama_batch_ext_clear(batch) + llama_batch_ext_add_text(batch, new_token_id, n_cur, [llama_seq_id(0)], 1, true) n_decode += 1 n_cur += 1 - if llama_decode(context, batch) != 0 { + if llama_decode_ext(context, batch) != 0 { print("failed to evaluate llama!") } @@ -201,21 +185,21 @@ actor LlamaContext { for _ in 0..