backend: build with CUDA compute 5.0 support by default (#3499)

Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
Jared Van Bortel 2025-02-19 11:27:06 -05:00 committed by GitHub
parent 29f29773af
commit 96aeb44210
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 5 additions and 4 deletions

View File

@ -1339,7 +1339,7 @@ jobs:
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
-DCMAKE_CUDA_COMPILER_LAUNCHER=ccache \
-DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON \
-DCMAKE_CUDA_ARCHITECTURES='52-virtual;61-virtual;70-virtual;75-virtual'
-DCMAKE_CUDA_ARCHITECTURES='50-virtual;52-virtual;61-virtual;70-virtual;75-virtual'
cmake --build build -j$(nproc)
ccache -s
- run:
@ -1458,7 +1458,7 @@ jobs:
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache `
-DCMAKE_CUDA_COMPILER_LAUNCHER=ccache `
-DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON `
-DCMAKE_CUDA_ARCHITECTURES='52-virtual;61-virtual;70-virtual;75-virtual'
-DCMAKE_CUDA_ARCHITECTURES='50-virtual;52-virtual;61-virtual;70-virtual;75-virtual'
cmake --build build --parallel
ccache -s
- run:

View File

@ -69,7 +69,7 @@ if (LLMODEL_CUDA)
cmake_minimum_required(VERSION 3.18) # for CMAKE_CUDA_ARCHITECTURES
# Defaults must be set before enable_language(CUDA).
# Keep this in sync with the arch list in ggml/src/CMakeLists.txt.
# Keep this in sync with the arch list in ggml/src/CMakeLists.txt (plus 5.0 for non-F16 branch).
if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
# 52 == lowest CUDA 12 standard
# 60 == f16 CUDA intrinsics
@ -78,7 +78,7 @@ if (LLMODEL_CUDA)
if (GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16)
set(CMAKE_CUDA_ARCHITECTURES "60;61;70;75") # needed for f16 CUDA intrinsics
else()
set(CMAKE_CUDA_ARCHITECTURES "52;61;70;75") # lowest CUDA 12 standard + lowest for integer intrinsics
set(CMAKE_CUDA_ARCHITECTURES "50;52;61;70;75") # lowest CUDA 12 standard + lowest for integer intrinsics
#set(CMAKE_CUDA_ARCHITECTURES "OFF") # use this to compile much faster, but only F16 models work
endif()
endif()

View File

@ -8,6 +8,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
### Added
- Whitelist Granite (non-MoE) model architecture (by [@ThiloteE](https://github.com/ThiloteE) in [#3487](https://github.com/nomic-ai/gpt4all/pull/3487))
- Add support for CUDA compute 5.0 GPUs such as the GTX 750 ([#3499](https://github.com/nomic-ai/gpt4all/pull/3499))
### Changed
- Substitute prettier default templates for OLMoE 7B 0924/0125 and Granite 3.1 3B/8B (by [@ThiloteE](https://github.com/ThiloteE) in [#3471](https://github.com/nomic-ai/gpt4all/pull/3471))