mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-06-19 04:04:36 +00:00
python: fix CalledProcessError on Intel Macs since v2.8.0 (#3045)
Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
parent
8e3108fe1f
commit
a59ec91369
@ -12,6 +12,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
||||
### Changed
|
||||
- Rebase llama.cpp on latest upstream as of September 26th ([#2998](https://github.com/nomic-ai/gpt4all/pull/2998))
|
||||
- Change the error message when a message is too long ([#3004](https://github.com/nomic-ai/gpt4all/pull/3004))
|
||||
- Fix CalledProcessError on Intel Macs since v2.8.0 ([#3045](https://github.com/nomic-ai/gpt4all/pull/3045))
|
||||
|
||||
## [2.8.2] - 2024-08-14
|
||||
|
||||
|
@ -3,7 +3,6 @@ from __future__ import annotations
|
||||
import ctypes
|
||||
import os
|
||||
import platform
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import textwrap
|
||||
@ -28,16 +27,25 @@ if TYPE_CHECKING:
|
||||
|
||||
EmbeddingsType = TypeVar('EmbeddingsType', bound='list[Any]')
|
||||
|
||||
cuda_found: bool = False
|
||||
|
||||
|
||||
# TODO(jared): use operator.call after we drop python 3.10 support
|
||||
def _operator_call(obj, /, *args, **kwargs):
|
||||
return obj(*args, **kwargs)
|
||||
|
||||
|
||||
# Detect Rosetta 2
|
||||
if platform.system() == "Darwin" and platform.processor() == "i386":
|
||||
if subprocess.run(
|
||||
"sysctl -n sysctl.proc_translated".split(), check=True, capture_output=True, text=True,
|
||||
).stdout.strip() == "1":
|
||||
raise RuntimeError(textwrap.dedent("""\
|
||||
Running GPT4All under Rosetta is not supported due to CPU feature requirements.
|
||||
Please install GPT4All in an environment that uses a native ARM64 Python interpreter.
|
||||
""").strip())
|
||||
@_operator_call
|
||||
def check_rosetta() -> None:
|
||||
if platform.system() == "Darwin" and platform.processor() == "i386":
|
||||
p = subprocess.run("sysctl -n sysctl.proc_translated".split(), capture_output=True, text=True)
|
||||
if p.returncode == 0 and p.stdout.strip() == "1":
|
||||
raise RuntimeError(textwrap.dedent("""\
|
||||
Running GPT4All under Rosetta is not supported due to CPU feature requirements.
|
||||
Please install GPT4All in an environment that uses a native ARM64 Python interpreter.
|
||||
""").strip())
|
||||
|
||||
|
||||
# Check for C++ runtime libraries
|
||||
if platform.system() == "Windows":
|
||||
@ -53,33 +61,35 @@ if platform.system() == "Windows":
|
||||
"""), file=sys.stderr)
|
||||
|
||||
|
||||
def _load_cuda(rtver: str, blasver: str) -> None:
|
||||
if platform.system() == "Linux":
|
||||
cudalib = f"lib/libcudart.so.{rtver}"
|
||||
cublaslib = f"lib/libcublas.so.{blasver}"
|
||||
else: # Windows
|
||||
cudalib = fr"bin\cudart64_{rtver.replace('.', '')}.dll"
|
||||
cublaslib = fr"bin\cublas64_{blasver}.dll"
|
||||
@_operator_call
|
||||
def find_cuda() -> None:
|
||||
global cuda_found
|
||||
|
||||
# preload the CUDA libs so the backend can find them
|
||||
ctypes.CDLL(os.path.join(cuda_runtime.__path__[0], cudalib), mode=ctypes.RTLD_GLOBAL)
|
||||
ctypes.CDLL(os.path.join(cublas.__path__[0], cublaslib), mode=ctypes.RTLD_GLOBAL)
|
||||
def _load_cuda(rtver: str, blasver: str) -> None:
|
||||
if platform.system() == "Linux":
|
||||
cudalib = f"lib/libcudart.so.{rtver}"
|
||||
cublaslib = f"lib/libcublas.so.{blasver}"
|
||||
else: # Windows
|
||||
cudalib = fr"bin\cudart64_{rtver.replace('.', '')}.dll"
|
||||
cublaslib = fr"bin\cublas64_{blasver}.dll"
|
||||
|
||||
# preload the CUDA libs so the backend can find them
|
||||
ctypes.CDLL(os.path.join(cuda_runtime.__path__[0], cudalib), mode=ctypes.RTLD_GLOBAL)
|
||||
ctypes.CDLL(os.path.join(cublas.__path__[0], cublaslib), mode=ctypes.RTLD_GLOBAL)
|
||||
|
||||
# Find CUDA libraries from the official packages
|
||||
cuda_found = False
|
||||
if platform.system() in ("Linux", "Windows"):
|
||||
try:
|
||||
from nvidia import cuda_runtime, cublas
|
||||
except ImportError:
|
||||
pass # CUDA is optional
|
||||
else:
|
||||
for rtver, blasver in [("12", "12"), ("11.0", "11")]:
|
||||
try:
|
||||
_load_cuda(rtver, blasver)
|
||||
cuda_found = True
|
||||
except OSError: # dlopen() does not give specific error codes
|
||||
pass # try the next one
|
||||
# Find CUDA libraries from the official packages
|
||||
if platform.system() in ("Linux", "Windows"):
|
||||
try:
|
||||
from nvidia import cuda_runtime, cublas
|
||||
except ImportError:
|
||||
pass # CUDA is optional
|
||||
else:
|
||||
for rtver, blasver in [("12", "12"), ("11.0", "11")]:
|
||||
try:
|
||||
_load_cuda(rtver, blasver)
|
||||
cuda_found = True
|
||||
except OSError: # dlopen() does not give specific error codes
|
||||
pass # try the next one
|
||||
|
||||
|
||||
# TODO: provide a config file to make this more robust
|
||||
@ -121,6 +131,7 @@ class LLModelPromptContext(ctypes.Structure):
|
||||
("context_erase", ctypes.c_float),
|
||||
]
|
||||
|
||||
|
||||
class LLModelGPUDevice(ctypes.Structure):
|
||||
_fields_ = [
|
||||
("backend", ctypes.c_char_p),
|
||||
@ -131,6 +142,7 @@ class LLModelGPUDevice(ctypes.Structure):
|
||||
("vendor", ctypes.c_char_p),
|
||||
]
|
||||
|
||||
|
||||
# Define C function signatures using ctypes
|
||||
llmodel.llmodel_model_create.argtypes = [ctypes.c_char_p]
|
||||
llmodel.llmodel_model_create.restype = ctypes.c_void_p
|
||||
@ -540,7 +552,6 @@ class LLModel:
|
||||
ctypes.c_char_p(),
|
||||
)
|
||||
|
||||
|
||||
def prompt_model_streaming(
|
||||
self, prompt: str, prompt_template: str, callback: ResponseCallbackType = empty_response_callback, **kwargs
|
||||
) -> Iterable[str]:
|
||||
@ -589,16 +600,16 @@ class LLModel:
|
||||
decoded = []
|
||||
|
||||
for byte in response:
|
||||
|
||||
|
||||
bits = "{:08b}".format(byte)
|
||||
(high_ones, _, _) = bits.partition('0')
|
||||
|
||||
if len(high_ones) == 1:
|
||||
if len(high_ones) == 1:
|
||||
# continuation byte
|
||||
self.buffer.append(byte)
|
||||
self.buff_expecting_cont_bytes -= 1
|
||||
|
||||
else:
|
||||
else:
|
||||
# beginning of a byte sequence
|
||||
if len(self.buffer) > 0:
|
||||
decoded.append(self.buffer.decode(errors='replace'))
|
||||
@ -608,18 +619,18 @@ class LLModel:
|
||||
self.buffer.append(byte)
|
||||
self.buff_expecting_cont_bytes = max(0, len(high_ones) - 1)
|
||||
|
||||
if self.buff_expecting_cont_bytes <= 0:
|
||||
if self.buff_expecting_cont_bytes <= 0:
|
||||
# received the whole sequence or an out of place continuation byte
|
||||
decoded.append(self.buffer.decode(errors='replace'))
|
||||
|
||||
self.buffer.clear()
|
||||
self.buff_expecting_cont_bytes = 0
|
||||
|
||||
|
||||
if len(decoded) == 0 and self.buff_expecting_cont_bytes > 0:
|
||||
# wait for more continuation bytes
|
||||
return True
|
||||
|
||||
return callback(token_id, ''.join(decoded))
|
||||
|
||||
return callback(token_id, ''.join(decoded))
|
||||
|
||||
return _raw_callback
|
||||
|
||||
|
@ -8,7 +8,6 @@ import os
|
||||
import platform
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
import warnings
|
||||
from contextlib import contextmanager
|
||||
from pathlib import Path
|
||||
|
Loading…
Reference in New Issue
Block a user