mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-08-01 07:42:40 +00:00
python: reduce size of wheels built by CI, other build tweaks (#2802)
* Read CMAKE_CUDA_ARCHITECTURES directly * Disable CUBINs for python build in CI * Search for CUDA 11 as well as CUDA 12 Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
parent
be66ec8ab5
commit
de7cb36fcc
@ -881,7 +881,7 @@ jobs:
|
|||||||
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
|
||||||
sudo dpkg -i cuda-keyring_1.1-1_all.deb
|
sudo dpkg -i cuda-keyring_1.1-1_all.deb
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install -y cmake build-essential vulkan-sdk cuda-compiler-11-8 libcublas-dev-11-8 libnvidia-compute-550-server libmysqlclient21 libodbc2 libpq5
|
sudo apt-get install -y cmake build-essential vulkan-sdk cuda-compiler-11-8 libcublas-dev-11-8 libnvidia-compute-550-server
|
||||||
pip install setuptools wheel cmake
|
pip install setuptools wheel cmake
|
||||||
- run:
|
- run:
|
||||||
name: Build C library
|
name: Build C library
|
||||||
@ -889,7 +889,9 @@ jobs:
|
|||||||
export PATH=$PATH:/usr/local/cuda/bin
|
export PATH=$PATH:/usr/local/cuda/bin
|
||||||
git submodule update --init --recursive
|
git submodule update --init --recursive
|
||||||
cd gpt4all-backend
|
cd gpt4all-backend
|
||||||
cmake -B build -DCMAKE_BUILD_TYPE=Release
|
cmake -B build -DCMAKE_BUILD_TYPE=Release \
|
||||||
|
-DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON \
|
||||||
|
-DCMAKE_CUDA_ARCHITECTURES='52-virtual;61-virtual;70-virtual;75-virtual'
|
||||||
cmake --build build -j$(nproc)
|
cmake --build build -j$(nproc)
|
||||||
- run:
|
- run:
|
||||||
name: Build wheel
|
name: Build wheel
|
||||||
@ -986,7 +988,9 @@ jobs:
|
|||||||
$Env:PATH += ";C:\VulkanSDK\1.3.261.1\bin"
|
$Env:PATH += ";C:\VulkanSDK\1.3.261.1\bin"
|
||||||
$Env:VULKAN_SDK = "C:\VulkanSDK\1.3.261.1"
|
$Env:VULKAN_SDK = "C:\VulkanSDK\1.3.261.1"
|
||||||
cd gpt4all-backend
|
cd gpt4all-backend
|
||||||
cmake -G Ninja -B build -DCMAKE_BUILD_TYPE=Release -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON
|
cmake -G Ninja -B build -DCMAKE_BUILD_TYPE=Release `
|
||||||
|
-DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON `
|
||||||
|
-DCMAKE_CUDA_ARCHITECTURES='52-virtual;61-virtual;70-virtual;75-virtual'
|
||||||
cmake --build build --parallel
|
cmake --build build --parallel
|
||||||
- run:
|
- run:
|
||||||
name: Build wheel
|
name: Build wheel
|
||||||
|
@ -63,10 +63,6 @@ if (LLMODEL_VULKAN)
|
|||||||
list(APPEND BUILD_VARIANTS vulkan vulkan-avxonly)
|
list(APPEND BUILD_VARIANTS vulkan vulkan-avxonly)
|
||||||
endif()
|
endif()
|
||||||
if (LLMODEL_CUDA)
|
if (LLMODEL_CUDA)
|
||||||
if (DEFINED CMAKE_CUDA_ARCHITECTURES)
|
|
||||||
set(GGML_CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
include(CheckLanguage)
|
include(CheckLanguage)
|
||||||
check_language(CUDA)
|
check_language(CUDA)
|
||||||
if (NOT CMAKE_CUDA_COMPILER)
|
if (NOT CMAKE_CUDA_COMPILER)
|
||||||
|
@ -378,19 +378,19 @@ function(include_ggml SUFFIX)
|
|||||||
find_package(CUDAToolkit REQUIRED)
|
find_package(CUDAToolkit REQUIRED)
|
||||||
set(CUDAToolkit_BIN_DIR ${CUDAToolkit_BIN_DIR} PARENT_SCOPE)
|
set(CUDAToolkit_BIN_DIR ${CUDAToolkit_BIN_DIR} PARENT_SCOPE)
|
||||||
|
|
||||||
if (NOT DEFINED GGML_CUDA_ARCHITECTURES)
|
if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
|
||||||
# 52 == lowest CUDA 12 standard
|
# 52 == lowest CUDA 12 standard
|
||||||
# 60 == f16 CUDA intrinsics
|
# 60 == f16 CUDA intrinsics
|
||||||
# 61 == integer CUDA intrinsics
|
# 61 == integer CUDA intrinsics
|
||||||
# 70 == compute capability at which unrolling a loop in mul_mat_q kernels is faster
|
# 70 == compute capability at which unrolling a loop in mul_mat_q kernels is faster
|
||||||
if (GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16)
|
if (GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16)
|
||||||
set(GGML_CUDA_ARCHITECTURES "60;61;70;75") # needed for f16 CUDA intrinsics
|
set(CMAKE_CUDA_ARCHITECTURES "60;61;70;75") # needed for f16 CUDA intrinsics
|
||||||
else()
|
else()
|
||||||
set(GGML_CUDA_ARCHITECTURES "52;61;70;75") # lowest CUDA 12 standard + lowest for integer intrinsics
|
set(CMAKE_CUDA_ARCHITECTURES "52;61;70;75") # lowest CUDA 12 standard + lowest for integer intrinsics
|
||||||
#set(GGML_CUDA_ARCHITECTURES "OFF") # use this to compile much faster, but only F16 models work
|
#set(CMAKE_CUDA_ARCHITECTURES "OFF") # use this to compile much faster, but only F16 models work
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
message(STATUS "Using CUDA architectures: ${GGML_CUDA_ARCHITECTURES}")
|
message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")
|
||||||
|
|
||||||
set(GGML_HEADERS_CUDA ${DIRECTORY}/ggml/include/ggml-cuda.h)
|
set(GGML_HEADERS_CUDA ${DIRECTORY}/ggml/include/ggml-cuda.h)
|
||||||
file(GLOB GGML_HEADERS_CUDA "${DIRECTORY}/ggml/src/ggml-cuda/*.cuh")
|
file(GLOB GGML_HEADERS_CUDA "${DIRECTORY}/ggml/src/ggml-cuda/*.cuh")
|
||||||
@ -1018,9 +1018,6 @@ function(include_ggml SUFFIX)
|
|||||||
C_STANDARD 11
|
C_STANDARD 11
|
||||||
C_STANDARD_REQUIRED true
|
C_STANDARD_REQUIRED true
|
||||||
)
|
)
|
||||||
if (GGML_CUDA_ARCHITECTURES)
|
|
||||||
set_property(TARGET ggml${SUFFIX} llama${SUFFIX} PROPERTY CUDA_ARCHITECTURES "${GGML_CUDA_ARCHITECTURES}")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
target_compile_options(ggml${SUFFIX} PRIVATE "${GGML_COMPILE_OPTS}")
|
target_compile_options(ggml${SUFFIX} PRIVATE "${GGML_COMPILE_OPTS}")
|
||||||
target_compile_options(llama${SUFFIX} PRIVATE "${GGML_COMPILE_OPTS}")
|
target_compile_options(llama${SUFFIX} PRIVATE "${GGML_COMPILE_OPTS}")
|
||||||
|
@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file.
|
|||||||
|
|
||||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
||||||
|
|
||||||
|
## [Unreleased]
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
- Search for pip-installed CUDA 11 as well as CUDA 12 ([#2802](https://github.com/nomic-ai/gpt4all/pull/2802))
|
||||||
|
- Stop shipping CUBINs to reduce wheel size ([#2802](https://github.com/nomic-ai/gpt4all/pull/2802))
|
||||||
|
|
||||||
## [2.8.0] - 2024-08-05
|
## [2.8.0] - 2024-08-05
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
@ -16,6 +22,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
|||||||
- Detect use of a Python interpreter under Rosetta for a clearer error message ([#2793](https://github.com/nomic-ai/gpt4all/pull/2793))
|
- Detect use of a Python interpreter under Rosetta for a clearer error message ([#2793](https://github.com/nomic-ai/gpt4all/pull/2793))
|
||||||
|
|
||||||
### Changed
|
### Changed
|
||||||
|
- Build against CUDA 11.8 instead of CUDA 12 for better compatibility with older drivers ([#2639](https://github.com/nomic-ai/gpt4all/pull/2639))
|
||||||
- Update llama.cpp to commit 87e397d00 from July 19th ([#2694](https://github.com/nomic-ai/gpt4all/pull/2694))
|
- Update llama.cpp to commit 87e397d00 from July 19th ([#2694](https://github.com/nomic-ai/gpt4all/pull/2694))
|
||||||
|
|
||||||
### Removed
|
### Removed
|
||||||
@ -33,4 +40,5 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
|||||||
- Restore leading space removal logic that was incorrectly removed in [#2694](https://github.com/nomic-ai/gpt4all/pull/2694)
|
- Restore leading space removal logic that was incorrectly removed in [#2694](https://github.com/nomic-ai/gpt4all/pull/2694)
|
||||||
- CUDA: Cherry-pick llama.cpp DMMV cols requirement fix that caused a crash with long conversations since [#2694](https://github.com/nomic-ai/gpt4all/pull/2694)
|
- CUDA: Cherry-pick llama.cpp DMMV cols requirement fix that caused a crash with long conversations since [#2694](https://github.com/nomic-ai/gpt4all/pull/2694)
|
||||||
|
|
||||||
|
[Unreleased]: https://github.com/nomic-ai/gpt4all/compare/python-v2.8.0...HEAD
|
||||||
[2.8.0]: https://github.com/nomic-ai/gpt4all/compare/python-v2.7.0...python-v2.8.0
|
[2.8.0]: https://github.com/nomic-ai/gpt4all/compare/python-v2.7.0...python-v2.8.0
|
||||||
|
@ -39,25 +39,34 @@ if platform.system() == "Darwin" and platform.processor() == "i386":
|
|||||||
Please install GPT4All in an environment that uses a native ARM64 Python interpreter.
|
Please install GPT4All in an environment that uses a native ARM64 Python interpreter.
|
||||||
"""))
|
"""))
|
||||||
|
|
||||||
|
|
||||||
|
def _load_cuda(rtver: str, blasver: str) -> None:
|
||||||
|
if platform.system() == "Linux":
|
||||||
|
cudalib = f"lib/libcudart.so.{rtver}"
|
||||||
|
cublaslib = f"lib/libcublas.so.{blasver}"
|
||||||
|
else: # Windows
|
||||||
|
cudalib = fr"bin\cudart64_{rtver.replace(".", "")}.dll"
|
||||||
|
cublaslib = fr"bin\cublas64_{blasver}.dll"
|
||||||
|
|
||||||
|
# preload the CUDA libs so the backend can find them
|
||||||
|
ctypes.CDLL(os.path.join(cuda_runtime.__path__[0], cudalib), mode=ctypes.RTLD_GLOBAL)
|
||||||
|
ctypes.CDLL(os.path.join(cublas.__path__[0], cublaslib), mode=ctypes.RTLD_GLOBAL)
|
||||||
|
|
||||||
|
|
||||||
# Find CUDA libraries from the official packages
|
# Find CUDA libraries from the official packages
|
||||||
cuda_found = False
|
cuda_found = False
|
||||||
if platform.system() in ('Linux', 'Windows'):
|
if platform.system() in ("Linux", "Windows"):
|
||||||
try:
|
try:
|
||||||
from nvidia import cuda_runtime, cublas
|
from nvidia import cuda_runtime, cublas
|
||||||
except ImportError:
|
except ImportError:
|
||||||
pass # CUDA is optional
|
pass # CUDA is optional
|
||||||
else:
|
else:
|
||||||
if platform.system() == 'Linux':
|
for rtver, blasver in [("12", "12"), ("11.0", "11")]:
|
||||||
cudalib = 'lib/libcudart.so.12'
|
try:
|
||||||
cublaslib = 'lib/libcublas.so.12'
|
_load_cuda(rtver, blasver)
|
||||||
else: # Windows
|
cuda_found = True
|
||||||
cudalib = r'bin\cudart64_12.dll'
|
except OSError: # dlopen() does not give specific error codes
|
||||||
cublaslib = r'bin\cublas64_12.dll'
|
pass # try the next one
|
||||||
|
|
||||||
# preload the CUDA libs so the backend can find them
|
|
||||||
ctypes.CDLL(os.path.join(cuda_runtime.__path__[0], cudalib), mode=ctypes.RTLD_GLOBAL)
|
|
||||||
ctypes.CDLL(os.path.join(cublas.__path__[0], cublaslib), mode=ctypes.RTLD_GLOBAL)
|
|
||||||
cuda_found = True
|
|
||||||
|
|
||||||
|
|
||||||
# TODO: provide a config file to make this more robust
|
# TODO: provide a config file to make this more robust
|
||||||
|
@ -68,7 +68,7 @@ def get_long_description():
|
|||||||
|
|
||||||
setup(
|
setup(
|
||||||
name=package_name,
|
name=package_name,
|
||||||
version="2.8.0",
|
version="2.8.1.dev0",
|
||||||
description="Python bindings for GPT4All",
|
description="Python bindings for GPT4All",
|
||||||
long_description=get_long_description(),
|
long_description=get_long_description(),
|
||||||
long_description_content_type="text/markdown",
|
long_description_content_type="text/markdown",
|
||||||
|
Loading…
Reference in New Issue
Block a user