mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-09-05 02:20:28 +00:00
support the llama.cpp CUDA backend (#2310)
* rebase onto llama.cpp commit ggerganov/llama.cpp@d46dbc76f * support for CUDA backend (enabled by default) * partial support for Occam's Vulkan backend (disabled by default) * partial support for HIP/ROCm backend (disabled by default) * sync llama.cpp.cmake with upstream llama.cpp CMakeLists.txt * changes to GPT4All backend, bindings, and chat UI to handle choice of llama.cpp backend (Kompute or CUDA) * ship CUDA runtime with installed version * make device selection in the UI on macOS actually do something * model whitelist: remove dbrx, mamba, persimmon, plamo; add internlm and starcoder2 Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
@@ -2,15 +2,23 @@ cmake_minimum_required(VERSION 3.16)
|
||||
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
|
||||
if(APPLE)
|
||||
option(BUILD_UNIVERSAL "Build a Universal binary on macOS" ON)
|
||||
if(BUILD_UNIVERSAL)
|
||||
if (APPLE)
|
||||
option(BUILD_UNIVERSAL "Build a Universal binary on macOS" ON)
|
||||
else()
|
||||
option(LLMODEL_KOMPUTE "llmodel: use Kompute" ON)
|
||||
option(LLMODEL_VULKAN "llmodel: use Vulkan" OFF)
|
||||
option(LLMODEL_CUDA "llmodel: use CUDA" ON)
|
||||
option(LLMODEL_ROCM "llmodel: use ROCm" OFF)
|
||||
endif()
|
||||
|
||||
if (APPLE)
|
||||
if (BUILD_UNIVERSAL)
|
||||
# Build a Universal binary on macOS
|
||||
# This requires that the found Qt library is compiled as Universal binaries.
|
||||
set(CMAKE_OSX_ARCHITECTURES "arm64;x86_64" CACHE STRING "" FORCE)
|
||||
else()
|
||||
# Build for the host architecture on macOS
|
||||
if(NOT CMAKE_OSX_ARCHITECTURES)
|
||||
if (NOT CMAKE_OSX_ARCHITECTURES)
|
||||
set(CMAKE_OSX_ARCHITECTURES "${CMAKE_HOST_SYSTEM_PROCESSOR}" CACHE STRING "" FORCE)
|
||||
endif()
|
||||
endif()
|
||||
@@ -39,11 +47,35 @@ else()
|
||||
message(STATUS "Interprocedural optimization support detected")
|
||||
endif()
|
||||
|
||||
set(DIRECTORY llama.cpp-mainline)
|
||||
include(llama.cpp.cmake)
|
||||
|
||||
set(BUILD_VARIANTS default avxonly)
|
||||
if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
|
||||
set(BUILD_VARIANTS ${BUILD_VARIANTS} metal)
|
||||
set(BUILD_VARIANTS)
|
||||
set(GPTJ_BUILD_VARIANT cpu)
|
||||
if (APPLE)
|
||||
list(APPEND BUILD_VARIANTS metal)
|
||||
endif()
|
||||
if (LLMODEL_KOMPUTE)
|
||||
list(APPEND BUILD_VARIANTS kompute kompute-avxonly)
|
||||
set(GPTJ_BUILD_VARIANT kompute)
|
||||
else()
|
||||
list(PREPEND BUILD_VARIANTS cpu cpu-avxonly)
|
||||
endif()
|
||||
if (LLMODEL_VULKAN)
|
||||
list(APPEND BUILD_VARIANTS vulkan vulkan-avxonly)
|
||||
endif()
|
||||
if (LLMODEL_CUDA)
|
||||
include(CheckLanguage)
|
||||
check_language(CUDA)
|
||||
if (NOT CMAKE_CUDA_COMPILER)
|
||||
message(WARNING "CUDA Toolkit not found. To build without CUDA, use -DLLMODEL_CUDA=OFF.")
|
||||
endif()
|
||||
enable_language(CUDA)
|
||||
list(APPEND BUILD_VARIANTS cuda cuda-avxonly)
|
||||
endif()
|
||||
if (LLMODEL_ROCM)
|
||||
enable_language(HIP)
|
||||
list(APPEND BUILD_VARIANTS rocm rocm-avxonly)
|
||||
endif()
|
||||
|
||||
set(CMAKE_VERBOSE_MAKEFILE ON)
|
||||
@@ -51,24 +83,34 @@ set(CMAKE_VERBOSE_MAKEFILE ON)
|
||||
# Go through each build variant
|
||||
foreach(BUILD_VARIANT IN LISTS BUILD_VARIANTS)
|
||||
# Determine flags
|
||||
if (BUILD_VARIANT STREQUAL avxonly)
|
||||
set(GPT4ALL_ALLOW_NON_AVX NO)
|
||||
if (BUILD_VARIANT MATCHES avxonly)
|
||||
set(GPT4ALL_ALLOW_NON_AVX OFF)
|
||||
else()
|
||||
set(GPT4ALL_ALLOW_NON_AVX YES)
|
||||
set(GPT4ALL_ALLOW_NON_AVX ON)
|
||||
endif()
|
||||
set(LLAMA_AVX2 ${GPT4ALL_ALLOW_NON_AVX})
|
||||
set(LLAMA_F16C ${GPT4ALL_ALLOW_NON_AVX})
|
||||
set(LLAMA_FMA ${GPT4ALL_ALLOW_NON_AVX})
|
||||
|
||||
if (BUILD_VARIANT STREQUAL metal)
|
||||
set(LLAMA_METAL YES)
|
||||
else()
|
||||
set(LLAMA_METAL NO)
|
||||
set(LLAMA_METAL OFF)
|
||||
set(LLAMA_KOMPUTE OFF)
|
||||
set(LLAMA_VULKAN OFF)
|
||||
set(LLAMA_CUDA OFF)
|
||||
set(LLAMA_ROCM OFF)
|
||||
if (BUILD_VARIANT MATCHES metal)
|
||||
set(LLAMA_METAL ON)
|
||||
elseif (BUILD_VARIANT MATCHES kompute)
|
||||
set(LLAMA_KOMPUTE ON)
|
||||
elseif (BUILD_VARIANT MATCHES vulkan)
|
||||
set(LLAMA_VULKAN ON)
|
||||
elseif (BUILD_VARIANT MATCHES cuda)
|
||||
set(LLAMA_CUDA ON)
|
||||
elseif (BUILD_VARIANT MATCHES rocm)
|
||||
set(LLAMA_HIPBLAS ON)
|
||||
endif()
|
||||
|
||||
# Include GGML
|
||||
set(LLAMA_K_QUANTS YES)
|
||||
include_ggml(llama.cpp-mainline -mainline-${BUILD_VARIANT} ON)
|
||||
include_ggml(-mainline-${BUILD_VARIANT})
|
||||
|
||||
# Function for preparing individual implementations
|
||||
function(prepare_target TARGET_NAME BASE_LIB)
|
||||
@@ -93,11 +135,15 @@ foreach(BUILD_VARIANT IN LISTS BUILD_VARIANTS)
|
||||
LLAMA_VERSIONS=>=3 LLAMA_DATE=999999)
|
||||
prepare_target(llamamodel-mainline llama-mainline)
|
||||
|
||||
if (NOT LLAMA_METAL)
|
||||
if (BUILD_VARIANT MATCHES ${GPTJ_BUILD_VARIANT})
|
||||
add_library(gptj-${BUILD_VARIANT} SHARED
|
||||
gptj.cpp utils.h utils.cpp llmodel_shared.cpp llmodel_shared.h)
|
||||
prepare_target(gptj llama-mainline)
|
||||
endif()
|
||||
|
||||
if (BUILD_VARIANT STREQUAL cuda)
|
||||
set(CUDAToolkit_BIN_DIR ${CUDAToolkit_BIN_DIR} PARENT_SCOPE)
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
add_library(llmodel
|
||||
|
Reference in New Issue
Block a user