From 9f2da5871f4bbd205b8a3b952cdc76283218d595 Mon Sep 17 00:00:00 2001 From: Diego Devesa Date: Sun, 4 May 2025 14:20:49 +0200 Subject: [PATCH] llama : build windows releases with dl backends (#13220) --- .github/workflows/build.yml | 71 +++++++++++++--------------------- cmake/arm64-windows-msvc.cmake | 6 --- cmake/x64-windows-llvm.cmake | 6 --- tests/CMakeLists.txt | 5 ++- 4 files changed, 30 insertions(+), 58 deletions(-) delete mode 100644 cmake/arm64-windows-msvc.cmake diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index bcea1a8af..e42ba47a1 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -771,7 +771,7 @@ jobs: uses: hendrikmuhs/ccache-action@v1.2.16 with: key: windows-msys2 - variant: sccache + variant: ccache evict-old-files: 1d - name: Setup ${{ matrix.sys }} @@ -814,26 +814,18 @@ jobs: strategy: matrix: include: - - build: 'noavx-x64' - defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF' - - build: 'avx2-x64' - defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON' - - build: 'avx-x64' - defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX2=OFF' - - build: 'avx512-x64' - defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX512=ON' + - build: 'cpu-x64' + defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_OPENMP=OFF' - build: 'openblas-x64' - defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"' - - build: 'kompute-x64' - defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON' + defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_OPENMP=OFF -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"' - build: 'vulkan-x64' - defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_VULKAN=ON' + defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_VULKAN=ON' - build: 'llvm-arm64' defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON' - - build: 'msvc-arm64' - defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-msvc.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON' - build: 'llvm-arm64-opencl-adreno' defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" -DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON' + # - build: 'kompute-x64' + # defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_OPENMP=OFF -DGGML_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON' steps: - name: Clone @@ -846,7 +838,7 @@ jobs: uses: hendrikmuhs/ccache-action@v1.2.16 with: key: windows-latest-cmake-${{ matrix.build }} - variant: sccache + variant: ccache evict-old-files: 1d - name: Clone Kompute submodule @@ -922,39 +914,26 @@ jobs: cp $env:RUNNER_TEMP/openblas/bin/libopenblas.dll ./build/bin/Release/openblas.dll cp $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt ./build/bin/Release/OpenBLAS-${env:OPENBLAS_VERSION}.txt - - name: Check AVX512F support - id: check_avx512f - if: ${{ matrix.build == 'avx512-x64' }} - continue-on-error: true - run: | - cd build - $vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath) - $msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim())) - $cl = $(join-path $msvc 'bin\Hostx64\x64\cl.exe') - echo 'int main(void){unsigned int a[4];__cpuid(a,7);return !(a[1]&65536);}' >> avx512f.c - & $cl /O2 /GS- /kernel avx512f.c /link /nodefaultlib /entry:main - .\avx512f.exe && echo "AVX512F: YES" && ( echo HAS_AVX512F=1 >> $env:GITHUB_ENV ) || echo "AVX512F: NO" - - name: Test id: cmake_test - # not all machines have native AVX-512 - if: ${{ matrix.build != 'msvc-arm64' && matrix.build != 'llvm-arm64' && matrix.build != 'llvm-arm64-opencl-adreno' && matrix.build != 'kompute-x64' && matrix.build != 'vulkan-x64' && (matrix.build != 'avx512-x64' || env.HAS_AVX512F == '1') }} + if: ${{ matrix.build != 'llvm-arm64' && matrix.build != 'llvm-arm64-opencl-adreno' }} run: | cd build ctest -L main -C Release --verbose --timeout 900 - - name: Test (Intel SDE) - id: cmake_test_sde - if: ${{ matrix.build == 'avx512-x64' && env.HAS_AVX512F == '0' }} # use Intel SDE for AVX-512 emulation - run: | - curl.exe -o $env:RUNNER_TEMP/sde.tar.xz -L "https://downloadmirror.intel.com/813591/sde-external-${env:SDE_VERSION}-win.tar.xz" - # for some weird reason windows tar doesn't like sde tar.xz - 7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar.xz - 7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar - $sde = $(join-path $env:RUNNER_TEMP sde-external-${env:SDE_VERSION}-win/sde.exe) - cd build - $env:LLAMA_SKIP_TESTS_SLOW_ON_EMULATOR = 1 - & $sde -future -- ctest -L main -C Release --verbose --timeout 900 + # TODO: disabled for now, consider adding tests for all CPU variants instead + # - name: Test (Intel SDE) + # id: cmake_test_sde + # if: ${{ matrix.build == 'avx512-x64' && env.HAS_AVX512F == '0' }} # use Intel SDE for AVX-512 emulation + # run: | + # curl.exe -o $env:RUNNER_TEMP/sde.tar.xz -L "https://downloadmirror.intel.com/813591/sde-external-${env:SDE_VERSION}-win.tar.xz" + # # for some weird reason windows tar doesn't like sde tar.xz + # 7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar.xz + # 7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar + # $sde = $(join-path $env:RUNNER_TEMP sde-external-${env:SDE_VERSION}-win/sde.exe) + # cd build + # $env:LLAMA_SKIP_TESTS_SLOW_ON_EMULATOR = 1 + # & $sde -future -- ctest -L main -C Release --verbose --timeout 900 - name: Determine tag name id: tag @@ -1039,7 +1018,7 @@ jobs: uses: hendrikmuhs/ccache-action@v1.2.16 with: key: ${{ github.job }}-${{ matrix.cuda }}-${{ matrix.build }} - variant: sccache + variant: ccache evict-old-files: 1d - name: Install Cuda Toolkit 11.7 @@ -1117,6 +1096,8 @@ jobs: cmake -S . -B build -G "Ninja Multi-Config" ^ -DLLAMA_BUILD_SERVER=ON ^ -DGGML_NATIVE=OFF ^ + -DGGML_BACKEND_DL=ON ^ + -DGGML_CPU_ALL_VARIANTS=ON ^ -DGGML_CUDA=ON ^ -DGGML_RPC=ON ^ -DCURL_LIBRARY="%CURL_PATH%/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="%CURL_PATH%/include" @@ -1191,7 +1172,7 @@ jobs: uses: hendrikmuhs/ccache-action@v1.2.16 with: key: windows-latest-cmake-sycl - variant: sccache + variant: ccache evict-old-files: 1d - name: Install diff --git a/cmake/arm64-windows-msvc.cmake b/cmake/arm64-windows-msvc.cmake deleted file mode 100644 index c77631420..000000000 --- a/cmake/arm64-windows-msvc.cmake +++ /dev/null @@ -1,6 +0,0 @@ -set( CMAKE_SYSTEM_NAME Windows ) -set( CMAKE_SYSTEM_PROCESSOR arm64 ) - -set( target arm64-pc-windows-msvc ) -set( CMAKE_C_COMPILER_TARGET ${target} ) -set( CMAKE_CXX_COMPILER_TARGET ${target} ) diff --git a/cmake/x64-windows-llvm.cmake b/cmake/x64-windows-llvm.cmake index 0603d738f..77e791407 100644 --- a/cmake/x64-windows-llvm.cmake +++ b/cmake/x64-windows-llvm.cmake @@ -3,9 +3,3 @@ set( CMAKE_SYSTEM_PROCESSOR x86_64 ) set( CMAKE_C_COMPILER clang ) set( CMAKE_CXX_COMPILER clang++ ) - -set( arch_c_flags "-march=native" ) - -set( CMAKE_C_FLAGS_INIT "${arch_c_flags}" ) -set( CMAKE_CXX_FLAGS_INIT "${arch_c_flags}" ) - diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 8acbe689a..72b32df9f 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -114,7 +114,10 @@ if (NOT WIN32) target_include_directories(test-json-schema-to-grammar PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../tools/server) endif() - llama_build(test-quantize-stats.cpp) + if (NOT GGML_BACKEND_DL) + llama_build(test-quantize-stats.cpp) + endif() + llama_build(test-gbnf-validator.cpp) # build test-tokenizer-1-bpe target once and add many tests