diff --git a/.circleci/continue_config.yml b/.circleci/continue_config.yml index c6227f50..d9be352a 100644 --- a/.circleci/continue_config.yml +++ b/.circleci/continue_config.yml @@ -235,10 +235,8 @@ jobs: name: Build command: | export CMAKE_PREFIX_PATH=~/Qt/6.5.1/gcc_64/lib/cmake - mkdir build - cd build - ~/Qt/Tools/CMake/bin/cmake -DCMAKE_BUILD_TYPE=Release -S ../gpt4all-chat -B . - ~/Qt/Tools/CMake/bin/cmake --build . --target all + ~/Qt/Tools/CMake/bin/cmake -DCMAKE_BUILD_TYPE=Release -S gpt4all-chat -B build + ~/Qt/Tools/CMake/bin/cmake --build build --target all build-gpt4all-chat-windows: machine: @@ -291,17 +289,15 @@ jobs: $Env:INCLUDE = "${Env:INCLUDE};C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Tools\MSVC\14.29.30133\include" $Env:INCLUDE = "${Env:INCLUDE};C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Tools\MSVC\14.29.30133\ATLMFC\include" $Env:VULKAN_SDK = "C:\VulkanSDK\1.3.261.1" - mkdir build - cd build & "C:\Qt\Tools\CMake_64\bin\cmake.exe" ` "-DCMAKE_GENERATOR:STRING=Ninja" ` "-DCMAKE_BUILD_TYPE=Release" ` "-DCMAKE_PREFIX_PATH:PATH=C:\Qt\6.5.1\msvc2019_64" ` "-DCMAKE_MAKE_PROGRAM:FILEPATH=C:\Qt\Tools\Ninja\ninja.exe" ` "-DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON" ` - "-S ..\gpt4all-chat" ` - "-B ." - & "C:\Qt\Tools\Ninja\ninja.exe" + "-S gpt4all-chat" ` + "-B build" + & "C:\Qt\Tools\Ninja\ninja.exe" -C build build-gpt4all-chat-macos: macos: @@ -332,17 +328,15 @@ jobs: - run: name: Build command: | - mkdir build - cd build ~/Qt/Tools/CMake/CMake.app/Contents/bin/cmake \ -DCMAKE_GENERATOR:STRING=Ninja \ -DBUILD_UNIVERSAL=ON \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_PREFIX_PATH:PATH=~/Qt/6.5.1/macos/lib/cmake/Qt6 \ -DCMAKE_MAKE_PROGRAM:FILEPATH=~/Qt/Tools/Ninja/ninja \ - -S ../gpt4all-chat \ - -B . - ~/Qt/Tools/CMake/CMake.app/Contents/bin/cmake --build . --target all + -S gpt4all-chat \ + -B build + ~/Qt/Tools/CMake/CMake.app/Contents/bin/cmake --build build --target all build-ts-docs: docker: - image: cimg/base:stable @@ -407,13 +401,10 @@ jobs: - run: name: Build C library command: | - git submodule init - git submodule update + git submodule update --init --recursive cd gpt4all-backend - mkdir build - cd build - cmake .. - cmake --build . --parallel + cmake -B build + cmake --build build --parallel - run: name: Build wheel command: | @@ -440,13 +431,10 @@ jobs: - run: name: Build C library command: | - git submodule init - git submodule update + git submodule update --init # don't use --recursive because macOS doesn't use Kompute cd gpt4all-backend - mkdir build - cd build - cmake .. -DCMAKE_OSX_ARCHITECTURES="x86_64;arm64" - cmake --build . --parallel + cmake -B build -DCMAKE_OSX_ARCHITECTURES="x86_64;arm64" + cmake --build build --parallel - run: name: Build wheel command: | @@ -482,16 +470,13 @@ jobs: - run: name: Build C library command: | - git submodule init - git submodule update + git submodule update --init --recursive cd gpt4all-backend - mkdir build - cd build $Env:Path += ";C:\ProgramData\mingw64\mingw64\bin" $Env:Path += ";C:\VulkanSDK\1.3.261.1\bin" $Env:VULKAN_SDK = "C:\VulkanSDK\1.3.261.1" - cmake -G "MinGW Makefiles" .. -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON -DKOMPUTE_OPT_USE_BUILT_IN_VULKAN_HEADER=OFF - cmake --build . --parallel + cmake -G "MinGW Makefiles" -B build -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON -DKOMPUTE_OPT_USE_BUILT_IN_VULKAN_HEADER=OFF + cmake --build build --parallel - run: name: Build wheel # TODO: As part of this task, we need to move mingw64 binaries into package. diff --git a/gpt4all-backend/CMakeLists.txt b/gpt4all-backend/CMakeLists.txt index 39152a2e..f20404e3 100644 --- a/gpt4all-backend/CMakeLists.txt +++ b/gpt4all-backend/CMakeLists.txt @@ -39,10 +39,6 @@ else() message(STATUS "Interprocedural optimization support detected") endif() -if(NOT APPLE) - set(LLAMA_KOMPUTE YES) -endif() - include(llama.cpp.cmake) set(BUILD_VARIANTS default avxonly) diff --git a/gpt4all-backend/llama.cpp.cmake b/gpt4all-backend/llama.cpp.cmake index f8aa532f..d62a875b 100644 --- a/gpt4all-backend/llama.cpp.cmake +++ b/gpt4all-backend/llama.cpp.cmake @@ -71,12 +71,19 @@ option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" # option(LLAMA_F16C "llama: enable F16C" ON) #endif() +if (APPLE) + set(LLAMA_KOMPUTE_DEFAULT ON) +else() + set(LLAMA_KOMPUTE_DEFAULT OFF) +endif() + # 3rd party libs option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON) option(LLAMA_OPENBLAS "llama: use OpenBLAS" OFF) #option(LLAMA_CUBLAS "llama: use cuBLAS" OFF) #option(LLAMA_CLBLAST "llama: use CLBlast" OFF) #option(LLAMA_METAL "llama: use Metal" OFF) +option(LLAMA_KOMPUTE "llama: use Kompute" ${LLAMA_KOMPUTE_DEFAULT}) set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor") set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels") set(LLAMA_CUDA_DMMV_Y "1" CACHE STRING "llama: y block size for dmmv CUDA kernels") @@ -153,6 +160,11 @@ if (LLAMA_OPENBLAS) endif() if (LLAMA_KOMPUTE) + if (NOT EXISTS "${LLAMA_DIR}/kompute/CMakeLists.txt") + message(FATAL_ERROR "Kompute not found") + endif() + message(STATUS "Kompute found") + add_compile_definitions(VULKAN_HPP_DISPATCH_LOADER_DYNAMIC=1) find_package(Vulkan COMPONENTS glslc REQUIRED) find_program(glslc_executable NAMES glslc HINTS Vulkan::glslc) @@ -220,91 +232,86 @@ if (LLAMA_KOMPUTE) endforeach() endfunction() - if (EXISTS "${LLAMA_DIR}/kompute/CMakeLists.txt") - message(STATUS "Kompute found") - set(KOMPUTE_OPT_LOG_LEVEL Critical CACHE STRING "Kompute log level") - add_subdirectory(${LLAMA_DIR}/kompute) + set(KOMPUTE_OPT_LOG_LEVEL Critical CACHE STRING "Kompute log level") + add_subdirectory(${LLAMA_DIR}/kompute) - # Compile our shaders - compile_shader(SOURCES - kompute-shaders/op_scale.comp - kompute-shaders/op_scale_8.comp - kompute-shaders/op_add.comp - kompute-shaders/op_addrow.comp - kompute-shaders/op_mul.comp - kompute-shaders/op_silu.comp - kompute-shaders/op_relu.comp - kompute-shaders/op_gelu.comp - kompute-shaders/op_softmax.comp - kompute-shaders/op_norm.comp - kompute-shaders/op_rmsnorm.comp - kompute-shaders/op_diagmask.comp - kompute-shaders/op_mul_mat_mat_f32.comp - kompute-shaders/op_mul_mat_f16.comp - kompute-shaders/op_mul_mat_q8_0.comp - kompute-shaders/op_mul_mat_q4_0.comp - kompute-shaders/op_mul_mat_q4_1.comp - kompute-shaders/op_mul_mat_q6_k.comp - kompute-shaders/op_getrows_f16.comp - kompute-shaders/op_getrows_q4_0.comp - kompute-shaders/op_getrows_q4_1.comp - kompute-shaders/op_getrows_q6_k.comp - kompute-shaders/op_rope_f16.comp - kompute-shaders/op_rope_f32.comp - kompute-shaders/op_cpy_f16_f16.comp - kompute-shaders/op_cpy_f16_f32.comp - kompute-shaders/op_cpy_f32_f16.comp - kompute-shaders/op_cpy_f32_f32.comp - ) + # Compile our shaders + compile_shader(SOURCES + kompute-shaders/op_scale.comp + kompute-shaders/op_scale_8.comp + kompute-shaders/op_add.comp + kompute-shaders/op_addrow.comp + kompute-shaders/op_mul.comp + kompute-shaders/op_silu.comp + kompute-shaders/op_relu.comp + kompute-shaders/op_gelu.comp + kompute-shaders/op_softmax.comp + kompute-shaders/op_norm.comp + kompute-shaders/op_rmsnorm.comp + kompute-shaders/op_diagmask.comp + kompute-shaders/op_mul_mat_mat_f32.comp + kompute-shaders/op_mul_mat_f16.comp + kompute-shaders/op_mul_mat_q8_0.comp + kompute-shaders/op_mul_mat_q4_0.comp + kompute-shaders/op_mul_mat_q4_1.comp + kompute-shaders/op_mul_mat_q6_k.comp + kompute-shaders/op_getrows_f16.comp + kompute-shaders/op_getrows_q4_0.comp + kompute-shaders/op_getrows_q4_1.comp + kompute-shaders/op_getrows_q6_k.comp + kompute-shaders/op_rope_f16.comp + kompute-shaders/op_rope_f32.comp + kompute-shaders/op_cpy_f16_f16.comp + kompute-shaders/op_cpy_f16_f32.comp + kompute-shaders/op_cpy_f32_f16.comp + kompute-shaders/op_cpy_f32_f32.comp + ) - # Create a custom target for our generated shaders - add_custom_target(generated_shaders DEPENDS - shaderop_scale.h - shaderop_scale_8.h - shaderop_add.h - shaderop_addrow.h - shaderop_mul.h - shaderop_silu.h - shaderop_relu.h - shaderop_gelu.h - shaderop_softmax.h - shaderop_norm.h - shaderop_rmsnorm.h - shaderop_diagmask.h - shaderop_mul_mat_mat_f32.h - shaderop_mul_mat_f16.h - shaderop_mul_mat_q8_0.h - shaderop_mul_mat_q4_0.h - shaderop_mul_mat_q4_1.h - shaderop_mul_mat_q6_k.h - shaderop_getrows_f16.h - shaderop_getrows_q4_0.h - shaderop_getrows_q4_1.h - shaderop_getrows_q6_k.h - shaderop_rope_f16.h - shaderop_rope_f32.h - shaderop_cpy_f16_f16.h - shaderop_cpy_f16_f32.h - shaderop_cpy_f32_f16.h - shaderop_cpy_f32_f32.h - ) + # Create a custom target for our generated shaders + add_custom_target(generated_shaders DEPENDS + shaderop_scale.h + shaderop_scale_8.h + shaderop_add.h + shaderop_addrow.h + shaderop_mul.h + shaderop_silu.h + shaderop_relu.h + shaderop_gelu.h + shaderop_softmax.h + shaderop_norm.h + shaderop_rmsnorm.h + shaderop_diagmask.h + shaderop_mul_mat_mat_f32.h + shaderop_mul_mat_f16.h + shaderop_mul_mat_q8_0.h + shaderop_mul_mat_q4_0.h + shaderop_mul_mat_q4_1.h + shaderop_mul_mat_q6_k.h + shaderop_getrows_f16.h + shaderop_getrows_q4_0.h + shaderop_getrows_q4_1.h + shaderop_getrows_q6_k.h + shaderop_rope_f16.h + shaderop_rope_f32.h + shaderop_cpy_f16_f16.h + shaderop_cpy_f16_f32.h + shaderop_cpy_f32_f16.h + shaderop_cpy_f32_f32.h + ) - # Create a custom command that depends on the generated_shaders - add_custom_command( - OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp - COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp - DEPENDS generated_shaders - COMMENT "Ensuring shaders are generated before compiling ggml-kompute.cpp" - ) + # Create a custom command that depends on the generated_shaders + add_custom_command( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp + COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp + DEPENDS generated_shaders + COMMENT "Ensuring shaders are generated before compiling ggml-kompute.cpp" + ) - # Add the stamp to the main sources to ensure dependency tracking - set(GGML_SOURCES_KOMPUTE ${LLAMA_DIR}/ggml-kompute.cpp ${LLAMA_DIR}/ggml-kompute.h ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp) - add_compile_definitions(GGML_USE_KOMPUTE) - set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} kompute) - set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${CMAKE_BINARY_DIR}) - else() - message(WARNING "Kompute not found") - endif() + # Add the stamp to the main sources to ensure dependency tracking + set(GGML_SOURCES_KOMPUTE ${LLAMA_DIR}/ggml-kompute.cpp ${LLAMA_DIR}/ggml-kompute.h ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp) + add_compile_definitions(GGML_USE_KOMPUTE) + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} kompute) + set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${CMAKE_BINARY_DIR}) endif() if (LLAMA_ALL_WARNINGS) diff --git a/gpt4all-backend/llamamodel.cpp b/gpt4all-backend/llamamodel.cpp index 065ddf41..5b9960ff 100644 --- a/gpt4all-backend/llamamodel.cpp +++ b/gpt4all-backend/llamamodel.cpp @@ -432,6 +432,8 @@ std::vector LLamaModel::availableGPUDevices(size_t memoryReq free(vkDevices); return devices; } +#else + std::cerr << __func__ << ": built without Kompute\n"; #endif return {}; diff --git a/gpt4all-bindings/python/setup.py b/gpt4all-bindings/python/setup.py index 5b5a24f7..04094b23 100644 --- a/gpt4all-bindings/python/setup.py +++ b/gpt4all-bindings/python/setup.py @@ -61,7 +61,7 @@ copy_prebuilt_C_lib(SRC_CLIB_DIRECTORY, setup( name=package_name, - version="2.2.0", + version="2.2.1", description="Python bindings for GPT4All", author="Nomic and the Open Source Community", author_email="support@nomic.ai",