diff --git a/.circleci/continue_config.yml b/.circleci/continue_config.yml index e71693a4..3b1f7e4a 100644 --- a/.circleci/continue_config.yml +++ b/.circleci/continue_config.yml @@ -881,7 +881,7 @@ jobs: wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb sudo dpkg -i cuda-keyring_1.1-1_all.deb sudo apt-get update - sudo apt-get install -y cmake build-essential vulkan-sdk cuda-compiler-11-8 libcublas-dev-11-8 libnvidia-compute-550-server libmysqlclient21 libodbc2 libpq5 + sudo apt-get install -y cmake build-essential vulkan-sdk cuda-compiler-11-8 libcublas-dev-11-8 libnvidia-compute-550-server pip install setuptools wheel cmake - run: name: Build C library @@ -889,7 +889,9 @@ jobs: export PATH=$PATH:/usr/local/cuda/bin git submodule update --init --recursive cd gpt4all-backend - cmake -B build -DCMAKE_BUILD_TYPE=Release + cmake -B build -DCMAKE_BUILD_TYPE=Release \ + -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON \ + -DCMAKE_CUDA_ARCHITECTURES='52-virtual;61-virtual;70-virtual;75-virtual' cmake --build build -j$(nproc) - run: name: Build wheel @@ -986,7 +988,9 @@ jobs: $Env:PATH += ";C:\VulkanSDK\1.3.261.1\bin" $Env:VULKAN_SDK = "C:\VulkanSDK\1.3.261.1" cd gpt4all-backend - cmake -G Ninja -B build -DCMAKE_BUILD_TYPE=Release -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON + cmake -G Ninja -B build -DCMAKE_BUILD_TYPE=Release ` + -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON ` + -DCMAKE_CUDA_ARCHITECTURES='52-virtual;61-virtual;70-virtual;75-virtual' cmake --build build --parallel - run: name: Build wheel diff --git a/gpt4all-backend/CMakeLists.txt b/gpt4all-backend/CMakeLists.txt index 5862c726..e6210d74 100644 --- a/gpt4all-backend/CMakeLists.txt +++ b/gpt4all-backend/CMakeLists.txt @@ -63,10 +63,6 @@ if (LLMODEL_VULKAN) list(APPEND BUILD_VARIANTS vulkan vulkan-avxonly) endif() if (LLMODEL_CUDA) - if (DEFINED CMAKE_CUDA_ARCHITECTURES) - set(GGML_CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}") - endif() - include(CheckLanguage) check_language(CUDA) if (NOT CMAKE_CUDA_COMPILER) diff --git a/gpt4all-backend/llama.cpp.cmake b/gpt4all-backend/llama.cpp.cmake index eac2dcf7..a541da03 100644 --- a/gpt4all-backend/llama.cpp.cmake +++ b/gpt4all-backend/llama.cpp.cmake @@ -378,19 +378,19 @@ function(include_ggml SUFFIX) find_package(CUDAToolkit REQUIRED) set(CUDAToolkit_BIN_DIR ${CUDAToolkit_BIN_DIR} PARENT_SCOPE) - if (NOT DEFINED GGML_CUDA_ARCHITECTURES) + if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES) # 52 == lowest CUDA 12 standard # 60 == f16 CUDA intrinsics # 61 == integer CUDA intrinsics # 70 == compute capability at which unrolling a loop in mul_mat_q kernels is faster if (GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16) - set(GGML_CUDA_ARCHITECTURES "60;61;70;75") # needed for f16 CUDA intrinsics + set(CMAKE_CUDA_ARCHITECTURES "60;61;70;75") # needed for f16 CUDA intrinsics else() - set(GGML_CUDA_ARCHITECTURES "52;61;70;75") # lowest CUDA 12 standard + lowest for integer intrinsics - #set(GGML_CUDA_ARCHITECTURES "OFF") # use this to compile much faster, but only F16 models work + set(CMAKE_CUDA_ARCHITECTURES "52;61;70;75") # lowest CUDA 12 standard + lowest for integer intrinsics + #set(CMAKE_CUDA_ARCHITECTURES "OFF") # use this to compile much faster, but only F16 models work endif() endif() - message(STATUS "Using CUDA architectures: ${GGML_CUDA_ARCHITECTURES}") + message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}") set(GGML_HEADERS_CUDA ${DIRECTORY}/ggml/include/ggml-cuda.h) file(GLOB GGML_HEADERS_CUDA "${DIRECTORY}/ggml/src/ggml-cuda/*.cuh") @@ -1018,9 +1018,6 @@ function(include_ggml SUFFIX) C_STANDARD 11 C_STANDARD_REQUIRED true ) - if (GGML_CUDA_ARCHITECTURES) - set_property(TARGET ggml${SUFFIX} llama${SUFFIX} PROPERTY CUDA_ARCHITECTURES "${GGML_CUDA_ARCHITECTURES}") - endif() target_compile_options(ggml${SUFFIX} PRIVATE "${GGML_COMPILE_OPTS}") target_compile_options(llama${SUFFIX} PRIVATE "${GGML_COMPILE_OPTS}") diff --git a/gpt4all-bindings/python/CHANGELOG.md b/gpt4all-bindings/python/CHANGELOG.md index fce39350..20c8eece 100644 --- a/gpt4all-bindings/python/CHANGELOG.md +++ b/gpt4all-bindings/python/CHANGELOG.md @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). +## [Unreleased] + +### Changed +- Search for pip-installed CUDA 11 as well as CUDA 12 ([#2802](https://github.com/nomic-ai/gpt4all/pull/2802)) +- Stop shipping CUBINs to reduce wheel size ([#2802](https://github.com/nomic-ai/gpt4all/pull/2802)) + ## [2.8.0] - 2024-08-05 ### Added @@ -16,6 +22,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - Detect use of a Python interpreter under Rosetta for a clearer error message ([#2793](https://github.com/nomic-ai/gpt4all/pull/2793)) ### Changed +- Build against CUDA 11.8 instead of CUDA 12 for better compatibility with older drivers ([#2639](https://github.com/nomic-ai/gpt4all/pull/2639)) - Update llama.cpp to commit 87e397d00 from July 19th ([#2694](https://github.com/nomic-ai/gpt4all/pull/2694)) ### Removed @@ -33,4 +40,5 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - Restore leading space removal logic that was incorrectly removed in [#2694](https://github.com/nomic-ai/gpt4all/pull/2694) - CUDA: Cherry-pick llama.cpp DMMV cols requirement fix that caused a crash with long conversations since [#2694](https://github.com/nomic-ai/gpt4all/pull/2694) +[Unreleased]: https://github.com/nomic-ai/gpt4all/compare/python-v2.8.0...HEAD [2.8.0]: https://github.com/nomic-ai/gpt4all/compare/python-v2.7.0...python-v2.8.0 diff --git a/gpt4all-bindings/python/gpt4all/_pyllmodel.py b/gpt4all-bindings/python/gpt4all/_pyllmodel.py index a4952fe3..88be2949 100644 --- a/gpt4all-bindings/python/gpt4all/_pyllmodel.py +++ b/gpt4all-bindings/python/gpt4all/_pyllmodel.py @@ -39,25 +39,34 @@ if platform.system() == "Darwin" and platform.processor() == "i386": Please install GPT4All in an environment that uses a native ARM64 Python interpreter. """)) + +def _load_cuda(rtver: str, blasver: str) -> None: + if platform.system() == "Linux": + cudalib = f"lib/libcudart.so.{rtver}" + cublaslib = f"lib/libcublas.so.{blasver}" + else: # Windows + cudalib = fr"bin\cudart64_{rtver.replace(".", "")}.dll" + cublaslib = fr"bin\cublas64_{blasver}.dll" + + # preload the CUDA libs so the backend can find them + ctypes.CDLL(os.path.join(cuda_runtime.__path__[0], cudalib), mode=ctypes.RTLD_GLOBAL) + ctypes.CDLL(os.path.join(cublas.__path__[0], cublaslib), mode=ctypes.RTLD_GLOBAL) + + # Find CUDA libraries from the official packages cuda_found = False -if platform.system() in ('Linux', 'Windows'): +if platform.system() in ("Linux", "Windows"): try: from nvidia import cuda_runtime, cublas except ImportError: pass # CUDA is optional else: - if platform.system() == 'Linux': - cudalib = 'lib/libcudart.so.12' - cublaslib = 'lib/libcublas.so.12' - else: # Windows - cudalib = r'bin\cudart64_12.dll' - cublaslib = r'bin\cublas64_12.dll' - - # preload the CUDA libs so the backend can find them - ctypes.CDLL(os.path.join(cuda_runtime.__path__[0], cudalib), mode=ctypes.RTLD_GLOBAL) - ctypes.CDLL(os.path.join(cublas.__path__[0], cublaslib), mode=ctypes.RTLD_GLOBAL) - cuda_found = True + for rtver, blasver in [("12", "12"), ("11.0", "11")]: + try: + _load_cuda(rtver, blasver) + cuda_found = True + except OSError: # dlopen() does not give specific error codes + pass # try the next one # TODO: provide a config file to make this more robust diff --git a/gpt4all-bindings/python/setup.py b/gpt4all-bindings/python/setup.py index 1fe93873..e92fba61 100644 --- a/gpt4all-bindings/python/setup.py +++ b/gpt4all-bindings/python/setup.py @@ -68,7 +68,7 @@ def get_long_description(): setup( name=package_name, - version="2.8.0", + version="2.8.1.dev0", description="Python bindings for GPT4All", long_description=get_long_description(), long_description_content_type="text/markdown",