mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-28 20:25:20 +00:00
* llama.swiftui : add bench button * llama.swiftui : initial bench functionality * force to use n_gpu_layers on simulator * add download buttons & expose llamaState.loadModel * update project.pbxproj * comment #Preview & fix editorconfig check * gitignore : xcode stuff * llama.swiftui : UX improvements * llama.swiftui : avoid data copy via "downloadTask" * llama.swiftui : remove model from project * llama : remove "mostly" from model infos * llama.swiftui : improve bench --------- Co-authored-by: jhen <developer@jhen.me>
86 lines
2.4 KiB
Swift
86 lines
2.4 KiB
Swift
import Foundation
|
|
|
|
@MainActor
|
|
class LlamaState: ObservableObject {
|
|
@Published var messageLog = ""
|
|
@Published var cacheCleared = false
|
|
|
|
private var llamaContext: LlamaContext?
|
|
private var defaultModelUrl: URL? {
|
|
Bundle.main.url(forResource: "ggml-model", withExtension: "gguf", subdirectory: "models")
|
|
// Bundle.main.url(forResource: "llama-2-7b-chat", withExtension: "Q2_K.gguf", subdirectory: "models")
|
|
}
|
|
|
|
init() {
|
|
do {
|
|
try loadModel(modelUrl: defaultModelUrl)
|
|
} catch {
|
|
messageLog += "Error!\n"
|
|
}
|
|
}
|
|
|
|
func loadModel(modelUrl: URL?) throws {
|
|
messageLog += "Loading model...\n"
|
|
if let modelUrl {
|
|
llamaContext = try LlamaContext.create_context(path: modelUrl.path())
|
|
messageLog += "Loaded model \(modelUrl.lastPathComponent)\n"
|
|
} else {
|
|
messageLog += "Could not locate model\n"
|
|
}
|
|
}
|
|
|
|
func complete(text: String) async {
|
|
guard let llamaContext else {
|
|
return
|
|
}
|
|
|
|
await llamaContext.completion_init(text: text)
|
|
messageLog += "\(text)"
|
|
|
|
while await llamaContext.n_cur <= llamaContext.n_len {
|
|
let result = await llamaContext.completion_loop()
|
|
messageLog += "\(result)"
|
|
}
|
|
await llamaContext.clear()
|
|
messageLog += "\n\ndone\n"
|
|
}
|
|
|
|
func bench() async {
|
|
guard let llamaContext else {
|
|
return
|
|
}
|
|
|
|
messageLog += "\n"
|
|
messageLog += "Running benchmark...\n"
|
|
messageLog += "Model info: "
|
|
messageLog += await llamaContext.model_info() + "\n"
|
|
|
|
let t_start = DispatchTime.now().uptimeNanoseconds
|
|
await llamaContext.bench(pp: 8, tg: 4, pl: 1) // heat up
|
|
let t_end = DispatchTime.now().uptimeNanoseconds
|
|
|
|
let t_heat = Double(t_end - t_start) / 1_000_000_000.0
|
|
messageLog += "Heat up time: \(t_heat) seconds, please wait...\n"
|
|
|
|
// if more than 5 seconds, then we're probably running on a slow device
|
|
if t_heat > 5.0 {
|
|
messageLog += "Heat up time is too long, aborting benchmark\n"
|
|
return
|
|
}
|
|
|
|
let result = await llamaContext.bench(pp: 512, tg: 128, pl: 1, nr: 3)
|
|
|
|
messageLog += "\(result)"
|
|
messageLog += "\n"
|
|
}
|
|
|
|
func clear() async {
|
|
guard let llamaContext else {
|
|
return
|
|
}
|
|
|
|
await llamaContext.clear()
|
|
messageLog = ""
|
|
}
|
|
}
|