mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-08-19 22:36:13 -04:00
speculative : PoC for speeding-up inference via speculative sampling (#2926)
* speculative : initial example * speculative : print encoding speed * speculative : add --draft CLI arg
This commit is contained in:
@@ -23,6 +23,7 @@ else()
|
||||
add_subdirectory(train-text-from-scratch)
|
||||
add_subdirectory(convert-llama2c-to-ggml)
|
||||
add_subdirectory(simple)
|
||||
add_subdirectory(speculative)
|
||||
add_subdirectory(embd-input)
|
||||
add_subdirectory(llama-bench)
|
||||
add_subdirectory(beam-search)
|
||||
|
Reference in New Issue
Block a user