backend(build): set CUDA arch defaults before enable_language(CUDA) (#2855)

Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
Jared Van Bortel
2024-08-13 14:47:48 -04:00
committed by GitHub
parent 971c83d1d3
commit 7463b2170b
3 changed files with 20 additions and 13 deletions

View File

@@ -378,19 +378,7 @@ function(include_ggml SUFFIX)
find_package(CUDAToolkit REQUIRED)
set(CUDAToolkit_BIN_DIR ${CUDAToolkit_BIN_DIR} PARENT_SCOPE)
if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
# 52 == lowest CUDA 12 standard
# 60 == f16 CUDA intrinsics
# 61 == integer CUDA intrinsics
# 70 == compute capability at which unrolling a loop in mul_mat_q kernels is faster
if (GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16)
set(CMAKE_CUDA_ARCHITECTURES "60;61;70;75") # needed for f16 CUDA intrinsics
else()
set(CMAKE_CUDA_ARCHITECTURES "52;61;70;75") # lowest CUDA 12 standard + lowest for integer intrinsics
#set(CMAKE_CUDA_ARCHITECTURES "OFF") # use this to compile much faster, but only F16 models work
endif()
endif()
message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")
# architectures are set in gpt4all-backend/CMakeLists.txt
set(GGML_HEADERS_CUDA ${DIRECTORY}/ggml/include/ggml-cuda.h)
file(GLOB GGML_HEADERS_CUDA "${DIRECTORY}/ggml/src/ggml-cuda/*.cuh")