python: fix CalledProcessError on Intel Macs since v2.8.0 (#3045)

Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
Jared Van Bortel 2024-10-09 09:13:33 -04:00 committed by GitHub
parent 8e3108fe1f
commit a59ec91369
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 53 additions and 42 deletions

View File

@ -12,6 +12,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
### Changed ### Changed
- Rebase llama.cpp on latest upstream as of September 26th ([#2998](https://github.com/nomic-ai/gpt4all/pull/2998)) - Rebase llama.cpp on latest upstream as of September 26th ([#2998](https://github.com/nomic-ai/gpt4all/pull/2998))
- Change the error message when a message is too long ([#3004](https://github.com/nomic-ai/gpt4all/pull/3004)) - Change the error message when a message is too long ([#3004](https://github.com/nomic-ai/gpt4all/pull/3004))
- Fix CalledProcessError on Intel Macs since v2.8.0 ([#3045](https://github.com/nomic-ai/gpt4all/pull/3045))
## [2.8.2] - 2024-08-14 ## [2.8.2] - 2024-08-14

View File

@ -3,7 +3,6 @@ from __future__ import annotations
import ctypes import ctypes
import os import os
import platform import platform
import re
import subprocess import subprocess
import sys import sys
import textwrap import textwrap
@ -28,16 +27,25 @@ if TYPE_CHECKING:
EmbeddingsType = TypeVar('EmbeddingsType', bound='list[Any]') EmbeddingsType = TypeVar('EmbeddingsType', bound='list[Any]')
cuda_found: bool = False
# TODO(jared): use operator.call after we drop python 3.10 support
def _operator_call(obj, /, *args, **kwargs):
return obj(*args, **kwargs)
# Detect Rosetta 2 # Detect Rosetta 2
if platform.system() == "Darwin" and platform.processor() == "i386": @_operator_call
if subprocess.run( def check_rosetta() -> None:
"sysctl -n sysctl.proc_translated".split(), check=True, capture_output=True, text=True, if platform.system() == "Darwin" and platform.processor() == "i386":
).stdout.strip() == "1": p = subprocess.run("sysctl -n sysctl.proc_translated".split(), capture_output=True, text=True)
raise RuntimeError(textwrap.dedent("""\ if p.returncode == 0 and p.stdout.strip() == "1":
Running GPT4All under Rosetta is not supported due to CPU feature requirements. raise RuntimeError(textwrap.dedent("""\
Please install GPT4All in an environment that uses a native ARM64 Python interpreter. Running GPT4All under Rosetta is not supported due to CPU feature requirements.
""").strip()) Please install GPT4All in an environment that uses a native ARM64 Python interpreter.
""").strip())
# Check for C++ runtime libraries # Check for C++ runtime libraries
if platform.system() == "Windows": if platform.system() == "Windows":
@ -53,33 +61,35 @@ if platform.system() == "Windows":
"""), file=sys.stderr) """), file=sys.stderr)
def _load_cuda(rtver: str, blasver: str) -> None: @_operator_call
if platform.system() == "Linux": def find_cuda() -> None:
cudalib = f"lib/libcudart.so.{rtver}" global cuda_found
cublaslib = f"lib/libcublas.so.{blasver}"
else: # Windows
cudalib = fr"bin\cudart64_{rtver.replace('.', '')}.dll"
cublaslib = fr"bin\cublas64_{blasver}.dll"
# preload the CUDA libs so the backend can find them def _load_cuda(rtver: str, blasver: str) -> None:
ctypes.CDLL(os.path.join(cuda_runtime.__path__[0], cudalib), mode=ctypes.RTLD_GLOBAL) if platform.system() == "Linux":
ctypes.CDLL(os.path.join(cublas.__path__[0], cublaslib), mode=ctypes.RTLD_GLOBAL) cudalib = f"lib/libcudart.so.{rtver}"
cublaslib = f"lib/libcublas.so.{blasver}"
else: # Windows
cudalib = fr"bin\cudart64_{rtver.replace('.', '')}.dll"
cublaslib = fr"bin\cublas64_{blasver}.dll"
# preload the CUDA libs so the backend can find them
ctypes.CDLL(os.path.join(cuda_runtime.__path__[0], cudalib), mode=ctypes.RTLD_GLOBAL)
ctypes.CDLL(os.path.join(cublas.__path__[0], cublaslib), mode=ctypes.RTLD_GLOBAL)
# Find CUDA libraries from the official packages # Find CUDA libraries from the official packages
cuda_found = False if platform.system() in ("Linux", "Windows"):
if platform.system() in ("Linux", "Windows"): try:
try: from nvidia import cuda_runtime, cublas
from nvidia import cuda_runtime, cublas except ImportError:
except ImportError: pass # CUDA is optional
pass # CUDA is optional else:
else: for rtver, blasver in [("12", "12"), ("11.0", "11")]:
for rtver, blasver in [("12", "12"), ("11.0", "11")]: try:
try: _load_cuda(rtver, blasver)
_load_cuda(rtver, blasver) cuda_found = True
cuda_found = True except OSError: # dlopen() does not give specific error codes
except OSError: # dlopen() does not give specific error codes pass # try the next one
pass # try the next one
# TODO: provide a config file to make this more robust # TODO: provide a config file to make this more robust
@ -121,6 +131,7 @@ class LLModelPromptContext(ctypes.Structure):
("context_erase", ctypes.c_float), ("context_erase", ctypes.c_float),
] ]
class LLModelGPUDevice(ctypes.Structure): class LLModelGPUDevice(ctypes.Structure):
_fields_ = [ _fields_ = [
("backend", ctypes.c_char_p), ("backend", ctypes.c_char_p),
@ -131,6 +142,7 @@ class LLModelGPUDevice(ctypes.Structure):
("vendor", ctypes.c_char_p), ("vendor", ctypes.c_char_p),
] ]
# Define C function signatures using ctypes # Define C function signatures using ctypes
llmodel.llmodel_model_create.argtypes = [ctypes.c_char_p] llmodel.llmodel_model_create.argtypes = [ctypes.c_char_p]
llmodel.llmodel_model_create.restype = ctypes.c_void_p llmodel.llmodel_model_create.restype = ctypes.c_void_p
@ -540,7 +552,6 @@ class LLModel:
ctypes.c_char_p(), ctypes.c_char_p(),
) )
def prompt_model_streaming( def prompt_model_streaming(
self, prompt: str, prompt_template: str, callback: ResponseCallbackType = empty_response_callback, **kwargs self, prompt: str, prompt_template: str, callback: ResponseCallbackType = empty_response_callback, **kwargs
) -> Iterable[str]: ) -> Iterable[str]:
@ -589,16 +600,16 @@ class LLModel:
decoded = [] decoded = []
for byte in response: for byte in response:
bits = "{:08b}".format(byte) bits = "{:08b}".format(byte)
(high_ones, _, _) = bits.partition('0') (high_ones, _, _) = bits.partition('0')
if len(high_ones) == 1: if len(high_ones) == 1:
# continuation byte # continuation byte
self.buffer.append(byte) self.buffer.append(byte)
self.buff_expecting_cont_bytes -= 1 self.buff_expecting_cont_bytes -= 1
else: else:
# beginning of a byte sequence # beginning of a byte sequence
if len(self.buffer) > 0: if len(self.buffer) > 0:
decoded.append(self.buffer.decode(errors='replace')) decoded.append(self.buffer.decode(errors='replace'))
@ -608,18 +619,18 @@ class LLModel:
self.buffer.append(byte) self.buffer.append(byte)
self.buff_expecting_cont_bytes = max(0, len(high_ones) - 1) self.buff_expecting_cont_bytes = max(0, len(high_ones) - 1)
if self.buff_expecting_cont_bytes <= 0: if self.buff_expecting_cont_bytes <= 0:
# received the whole sequence or an out of place continuation byte # received the whole sequence or an out of place continuation byte
decoded.append(self.buffer.decode(errors='replace')) decoded.append(self.buffer.decode(errors='replace'))
self.buffer.clear() self.buffer.clear()
self.buff_expecting_cont_bytes = 0 self.buff_expecting_cont_bytes = 0
if len(decoded) == 0 and self.buff_expecting_cont_bytes > 0: if len(decoded) == 0 and self.buff_expecting_cont_bytes > 0:
# wait for more continuation bytes # wait for more continuation bytes
return True return True
return callback(token_id, ''.join(decoded)) return callback(token_id, ''.join(decoded))
return _raw_callback return _raw_callback

View File

@ -8,7 +8,6 @@ import os
import platform import platform
import re import re
import sys import sys
import time
import warnings import warnings
from contextlib import contextmanager from contextlib import contextmanager
from pathlib import Path from pathlib import Path