diff --git a/gpt4all-bindings/python/CHANGELOG.md b/gpt4all-bindings/python/CHANGELOG.md index a2948702..97ad1b7e 100644 --- a/gpt4all-bindings/python/CHANGELOG.md +++ b/gpt4all-bindings/python/CHANGELOG.md @@ -12,6 +12,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ### Changed - Rebase llama.cpp on latest upstream as of September 26th ([#2998](https://github.com/nomic-ai/gpt4all/pull/2998)) - Change the error message when a message is too long ([#3004](https://github.com/nomic-ai/gpt4all/pull/3004)) +- Fix CalledProcessError on Intel Macs since v2.8.0 ([#3045](https://github.com/nomic-ai/gpt4all/pull/3045)) ## [2.8.2] - 2024-08-14 diff --git a/gpt4all-bindings/python/gpt4all/_pyllmodel.py b/gpt4all-bindings/python/gpt4all/_pyllmodel.py index 208d834c..8357731e 100644 --- a/gpt4all-bindings/python/gpt4all/_pyllmodel.py +++ b/gpt4all-bindings/python/gpt4all/_pyllmodel.py @@ -3,7 +3,6 @@ from __future__ import annotations import ctypes import os import platform -import re import subprocess import sys import textwrap @@ -28,16 +27,25 @@ if TYPE_CHECKING: EmbeddingsType = TypeVar('EmbeddingsType', bound='list[Any]') +cuda_found: bool = False + + +# TODO(jared): use operator.call after we drop python 3.10 support +def _operator_call(obj, /, *args, **kwargs): + return obj(*args, **kwargs) + # Detect Rosetta 2 -if platform.system() == "Darwin" and platform.processor() == "i386": - if subprocess.run( - "sysctl -n sysctl.proc_translated".split(), check=True, capture_output=True, text=True, - ).stdout.strip() == "1": - raise RuntimeError(textwrap.dedent("""\ - Running GPT4All under Rosetta is not supported due to CPU feature requirements. - Please install GPT4All in an environment that uses a native ARM64 Python interpreter. - """).strip()) +@_operator_call +def check_rosetta() -> None: + if platform.system() == "Darwin" and platform.processor() == "i386": + p = subprocess.run("sysctl -n sysctl.proc_translated".split(), capture_output=True, text=True) + if p.returncode == 0 and p.stdout.strip() == "1": + raise RuntimeError(textwrap.dedent("""\ + Running GPT4All under Rosetta is not supported due to CPU feature requirements. + Please install GPT4All in an environment that uses a native ARM64 Python interpreter. + """).strip()) + # Check for C++ runtime libraries if platform.system() == "Windows": @@ -53,33 +61,35 @@ if platform.system() == "Windows": """), file=sys.stderr) -def _load_cuda(rtver: str, blasver: str) -> None: - if platform.system() == "Linux": - cudalib = f"lib/libcudart.so.{rtver}" - cublaslib = f"lib/libcublas.so.{blasver}" - else: # Windows - cudalib = fr"bin\cudart64_{rtver.replace('.', '')}.dll" - cublaslib = fr"bin\cublas64_{blasver}.dll" +@_operator_call +def find_cuda() -> None: + global cuda_found - # preload the CUDA libs so the backend can find them - ctypes.CDLL(os.path.join(cuda_runtime.__path__[0], cudalib), mode=ctypes.RTLD_GLOBAL) - ctypes.CDLL(os.path.join(cublas.__path__[0], cublaslib), mode=ctypes.RTLD_GLOBAL) + def _load_cuda(rtver: str, blasver: str) -> None: + if platform.system() == "Linux": + cudalib = f"lib/libcudart.so.{rtver}" + cublaslib = f"lib/libcublas.so.{blasver}" + else: # Windows + cudalib = fr"bin\cudart64_{rtver.replace('.', '')}.dll" + cublaslib = fr"bin\cublas64_{blasver}.dll" + # preload the CUDA libs so the backend can find them + ctypes.CDLL(os.path.join(cuda_runtime.__path__[0], cudalib), mode=ctypes.RTLD_GLOBAL) + ctypes.CDLL(os.path.join(cublas.__path__[0], cublaslib), mode=ctypes.RTLD_GLOBAL) -# Find CUDA libraries from the official packages -cuda_found = False -if platform.system() in ("Linux", "Windows"): - try: - from nvidia import cuda_runtime, cublas - except ImportError: - pass # CUDA is optional - else: - for rtver, blasver in [("12", "12"), ("11.0", "11")]: - try: - _load_cuda(rtver, blasver) - cuda_found = True - except OSError: # dlopen() does not give specific error codes - pass # try the next one + # Find CUDA libraries from the official packages + if platform.system() in ("Linux", "Windows"): + try: + from nvidia import cuda_runtime, cublas + except ImportError: + pass # CUDA is optional + else: + for rtver, blasver in [("12", "12"), ("11.0", "11")]: + try: + _load_cuda(rtver, blasver) + cuda_found = True + except OSError: # dlopen() does not give specific error codes + pass # try the next one # TODO: provide a config file to make this more robust @@ -121,6 +131,7 @@ class LLModelPromptContext(ctypes.Structure): ("context_erase", ctypes.c_float), ] + class LLModelGPUDevice(ctypes.Structure): _fields_ = [ ("backend", ctypes.c_char_p), @@ -131,6 +142,7 @@ class LLModelGPUDevice(ctypes.Structure): ("vendor", ctypes.c_char_p), ] + # Define C function signatures using ctypes llmodel.llmodel_model_create.argtypes = [ctypes.c_char_p] llmodel.llmodel_model_create.restype = ctypes.c_void_p @@ -540,7 +552,6 @@ class LLModel: ctypes.c_char_p(), ) - def prompt_model_streaming( self, prompt: str, prompt_template: str, callback: ResponseCallbackType = empty_response_callback, **kwargs ) -> Iterable[str]: @@ -589,16 +600,16 @@ class LLModel: decoded = [] for byte in response: - + bits = "{:08b}".format(byte) (high_ones, _, _) = bits.partition('0') - if len(high_ones) == 1: + if len(high_ones) == 1: # continuation byte self.buffer.append(byte) self.buff_expecting_cont_bytes -= 1 - else: + else: # beginning of a byte sequence if len(self.buffer) > 0: decoded.append(self.buffer.decode(errors='replace')) @@ -608,18 +619,18 @@ class LLModel: self.buffer.append(byte) self.buff_expecting_cont_bytes = max(0, len(high_ones) - 1) - if self.buff_expecting_cont_bytes <= 0: + if self.buff_expecting_cont_bytes <= 0: # received the whole sequence or an out of place continuation byte decoded.append(self.buffer.decode(errors='replace')) self.buffer.clear() self.buff_expecting_cont_bytes = 0 - + if len(decoded) == 0 and self.buff_expecting_cont_bytes > 0: # wait for more continuation bytes return True - - return callback(token_id, ''.join(decoded)) + + return callback(token_id, ''.join(decoded)) return _raw_callback diff --git a/gpt4all-bindings/python/gpt4all/gpt4all.py b/gpt4all-bindings/python/gpt4all/gpt4all.py index 027f28df..c863817d 100644 --- a/gpt4all-bindings/python/gpt4all/gpt4all.py +++ b/gpt4all-bindings/python/gpt4all/gpt4all.py @@ -8,7 +8,6 @@ import os import platform import re import sys -import time import warnings from contextlib import contextmanager from pathlib import Path