python: reduce size of wheels built by CI, other build tweaks (#2802)

* Read CMAKE_CUDA_ARCHITECTURES directly * Disable CUBINs for python build in CI * Search for CUDA 11 as well as CUDA 12 Signed-off-by: Jared Van Bortel <jared@nomic.ai>
2025-09-21 03:20:59 +00:00 · 2024-08-07 11:27:50 -04:00
parent be66ec8ab5
commit de7cb36fcc
6 changed files with 42 additions and 28 deletions
--- a/gpt4all-bindings/python/gpt4all/_pyllmodel.py
+++ b/gpt4all-bindings/python/gpt4all/_pyllmodel.py
@@ -39,25 +39,34 @@ if platform.system() == "Darwin" and platform.processor() == "i386":
            Please install GPT4All in an environment that uses a native ARM64 Python interpreter.
        """))

+
+def _load_cuda(rtver: str, blasver: str) -> None:
+    if platform.system() == "Linux":
+        cudalib   = f"lib/libcudart.so.{rtver}"
+        cublaslib = f"lib/libcublas.so.{blasver}"
+    else:  # Windows
+        cudalib   = fr"bin\cudart64_{rtver.replace(".", "")}.dll"
+        cublaslib = fr"bin\cublas64_{blasver}.dll"
+
+    # preload the CUDA libs so the backend can find them
+    ctypes.CDLL(os.path.join(cuda_runtime.__path__[0], cudalib), mode=ctypes.RTLD_GLOBAL)
+    ctypes.CDLL(os.path.join(cublas.__path__[0], cublaslib), mode=ctypes.RTLD_GLOBAL)
+
+
 # Find CUDA libraries from the official packages
 cuda_found = False
-if platform.system() in ('Linux', 'Windows'):
+if platform.system() in ("Linux", "Windows"):
    try:
        from nvidia import cuda_runtime, cublas
    except ImportError:
        pass  # CUDA is optional
    else:
-        if platform.system() == 'Linux':
-            cudalib   = 'lib/libcudart.so.12'
-            cublaslib = 'lib/libcublas.so.12'
-        else:  # Windows
-            cudalib   = r'bin\cudart64_12.dll'
-            cublaslib = r'bin\cublas64_12.dll'
-
-        # preload the CUDA libs so the backend can find them
-        ctypes.CDLL(os.path.join(cuda_runtime.__path__[0], cudalib), mode=ctypes.RTLD_GLOBAL)
-        ctypes.CDLL(os.path.join(cublas.__path__[0], cublaslib), mode=ctypes.RTLD_GLOBAL)
-        cuda_found = True
+        for rtver, blasver in [("12", "12"), ("11.0", "11")]:
+            try:
+                _load_cuda(rtver, blasver)
+                cuda_found = True
+            except OSError:  # dlopen() does not give specific error codes
+                pass  # try the next one


 # TODO: provide a config file to make this more robust