mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-08-12 19:37:53 -04:00
speculative : PoC for speeding-up inference via speculative sampling (#2926)
* speculative : initial example * speculative : print encoding speed * speculative : add --draft CLI arg
This commit is contained in:
8
examples/speculative/CMakeLists.txt
Normal file
8
examples/speculative/CMakeLists.txt
Normal file
@@ -0,0 +1,8 @@
|
||||
set(TARGET speculative)
|
||||
add_executable(${TARGET} speculative.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||
if(TARGET BUILD_INFO)
|
||||
add_dependencies(${TARGET} BUILD_INFO)
|
||||
endif()
|
Reference in New Issue
Block a user