mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-06-19 20:24:41 +00:00
python: fix CalledProcessError on Intel Macs since v2.8.0 (#3045)
Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
parent
8e3108fe1f
commit
a59ec91369
@ -12,6 +12,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
|||||||
### Changed
|
### Changed
|
||||||
- Rebase llama.cpp on latest upstream as of September 26th ([#2998](https://github.com/nomic-ai/gpt4all/pull/2998))
|
- Rebase llama.cpp on latest upstream as of September 26th ([#2998](https://github.com/nomic-ai/gpt4all/pull/2998))
|
||||||
- Change the error message when a message is too long ([#3004](https://github.com/nomic-ai/gpt4all/pull/3004))
|
- Change the error message when a message is too long ([#3004](https://github.com/nomic-ai/gpt4all/pull/3004))
|
||||||
|
- Fix CalledProcessError on Intel Macs since v2.8.0 ([#3045](https://github.com/nomic-ai/gpt4all/pull/3045))
|
||||||
|
|
||||||
## [2.8.2] - 2024-08-14
|
## [2.8.2] - 2024-08-14
|
||||||
|
|
||||||
|
@ -3,7 +3,6 @@ from __future__ import annotations
|
|||||||
import ctypes
|
import ctypes
|
||||||
import os
|
import os
|
||||||
import platform
|
import platform
|
||||||
import re
|
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import textwrap
|
import textwrap
|
||||||
@ -28,16 +27,25 @@ if TYPE_CHECKING:
|
|||||||
|
|
||||||
EmbeddingsType = TypeVar('EmbeddingsType', bound='list[Any]')
|
EmbeddingsType = TypeVar('EmbeddingsType', bound='list[Any]')
|
||||||
|
|
||||||
|
cuda_found: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
# TODO(jared): use operator.call after we drop python 3.10 support
|
||||||
|
def _operator_call(obj, /, *args, **kwargs):
|
||||||
|
return obj(*args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
# Detect Rosetta 2
|
# Detect Rosetta 2
|
||||||
if platform.system() == "Darwin" and platform.processor() == "i386":
|
@_operator_call
|
||||||
if subprocess.run(
|
def check_rosetta() -> None:
|
||||||
"sysctl -n sysctl.proc_translated".split(), check=True, capture_output=True, text=True,
|
if platform.system() == "Darwin" and platform.processor() == "i386":
|
||||||
).stdout.strip() == "1":
|
p = subprocess.run("sysctl -n sysctl.proc_translated".split(), capture_output=True, text=True)
|
||||||
raise RuntimeError(textwrap.dedent("""\
|
if p.returncode == 0 and p.stdout.strip() == "1":
|
||||||
Running GPT4All under Rosetta is not supported due to CPU feature requirements.
|
raise RuntimeError(textwrap.dedent("""\
|
||||||
Please install GPT4All in an environment that uses a native ARM64 Python interpreter.
|
Running GPT4All under Rosetta is not supported due to CPU feature requirements.
|
||||||
""").strip())
|
Please install GPT4All in an environment that uses a native ARM64 Python interpreter.
|
||||||
|
""").strip())
|
||||||
|
|
||||||
|
|
||||||
# Check for C++ runtime libraries
|
# Check for C++ runtime libraries
|
||||||
if platform.system() == "Windows":
|
if platform.system() == "Windows":
|
||||||
@ -53,33 +61,35 @@ if platform.system() == "Windows":
|
|||||||
"""), file=sys.stderr)
|
"""), file=sys.stderr)
|
||||||
|
|
||||||
|
|
||||||
def _load_cuda(rtver: str, blasver: str) -> None:
|
@_operator_call
|
||||||
if platform.system() == "Linux":
|
def find_cuda() -> None:
|
||||||
cudalib = f"lib/libcudart.so.{rtver}"
|
global cuda_found
|
||||||
cublaslib = f"lib/libcublas.so.{blasver}"
|
|
||||||
else: # Windows
|
|
||||||
cudalib = fr"bin\cudart64_{rtver.replace('.', '')}.dll"
|
|
||||||
cublaslib = fr"bin\cublas64_{blasver}.dll"
|
|
||||||
|
|
||||||
# preload the CUDA libs so the backend can find them
|
def _load_cuda(rtver: str, blasver: str) -> None:
|
||||||
ctypes.CDLL(os.path.join(cuda_runtime.__path__[0], cudalib), mode=ctypes.RTLD_GLOBAL)
|
if platform.system() == "Linux":
|
||||||
ctypes.CDLL(os.path.join(cublas.__path__[0], cublaslib), mode=ctypes.RTLD_GLOBAL)
|
cudalib = f"lib/libcudart.so.{rtver}"
|
||||||
|
cublaslib = f"lib/libcublas.so.{blasver}"
|
||||||
|
else: # Windows
|
||||||
|
cudalib = fr"bin\cudart64_{rtver.replace('.', '')}.dll"
|
||||||
|
cublaslib = fr"bin\cublas64_{blasver}.dll"
|
||||||
|
|
||||||
|
# preload the CUDA libs so the backend can find them
|
||||||
|
ctypes.CDLL(os.path.join(cuda_runtime.__path__[0], cudalib), mode=ctypes.RTLD_GLOBAL)
|
||||||
|
ctypes.CDLL(os.path.join(cublas.__path__[0], cublaslib), mode=ctypes.RTLD_GLOBAL)
|
||||||
|
|
||||||
# Find CUDA libraries from the official packages
|
# Find CUDA libraries from the official packages
|
||||||
cuda_found = False
|
if platform.system() in ("Linux", "Windows"):
|
||||||
if platform.system() in ("Linux", "Windows"):
|
try:
|
||||||
try:
|
from nvidia import cuda_runtime, cublas
|
||||||
from nvidia import cuda_runtime, cublas
|
except ImportError:
|
||||||
except ImportError:
|
pass # CUDA is optional
|
||||||
pass # CUDA is optional
|
else:
|
||||||
else:
|
for rtver, blasver in [("12", "12"), ("11.0", "11")]:
|
||||||
for rtver, blasver in [("12", "12"), ("11.0", "11")]:
|
try:
|
||||||
try:
|
_load_cuda(rtver, blasver)
|
||||||
_load_cuda(rtver, blasver)
|
cuda_found = True
|
||||||
cuda_found = True
|
except OSError: # dlopen() does not give specific error codes
|
||||||
except OSError: # dlopen() does not give specific error codes
|
pass # try the next one
|
||||||
pass # try the next one
|
|
||||||
|
|
||||||
|
|
||||||
# TODO: provide a config file to make this more robust
|
# TODO: provide a config file to make this more robust
|
||||||
@ -121,6 +131,7 @@ class LLModelPromptContext(ctypes.Structure):
|
|||||||
("context_erase", ctypes.c_float),
|
("context_erase", ctypes.c_float),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
class LLModelGPUDevice(ctypes.Structure):
|
class LLModelGPUDevice(ctypes.Structure):
|
||||||
_fields_ = [
|
_fields_ = [
|
||||||
("backend", ctypes.c_char_p),
|
("backend", ctypes.c_char_p),
|
||||||
@ -131,6 +142,7 @@ class LLModelGPUDevice(ctypes.Structure):
|
|||||||
("vendor", ctypes.c_char_p),
|
("vendor", ctypes.c_char_p),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
# Define C function signatures using ctypes
|
# Define C function signatures using ctypes
|
||||||
llmodel.llmodel_model_create.argtypes = [ctypes.c_char_p]
|
llmodel.llmodel_model_create.argtypes = [ctypes.c_char_p]
|
||||||
llmodel.llmodel_model_create.restype = ctypes.c_void_p
|
llmodel.llmodel_model_create.restype = ctypes.c_void_p
|
||||||
@ -540,7 +552,6 @@ class LLModel:
|
|||||||
ctypes.c_char_p(),
|
ctypes.c_char_p(),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def prompt_model_streaming(
|
def prompt_model_streaming(
|
||||||
self, prompt: str, prompt_template: str, callback: ResponseCallbackType = empty_response_callback, **kwargs
|
self, prompt: str, prompt_template: str, callback: ResponseCallbackType = empty_response_callback, **kwargs
|
||||||
) -> Iterable[str]:
|
) -> Iterable[str]:
|
||||||
@ -589,16 +600,16 @@ class LLModel:
|
|||||||
decoded = []
|
decoded = []
|
||||||
|
|
||||||
for byte in response:
|
for byte in response:
|
||||||
|
|
||||||
bits = "{:08b}".format(byte)
|
bits = "{:08b}".format(byte)
|
||||||
(high_ones, _, _) = bits.partition('0')
|
(high_ones, _, _) = bits.partition('0')
|
||||||
|
|
||||||
if len(high_ones) == 1:
|
if len(high_ones) == 1:
|
||||||
# continuation byte
|
# continuation byte
|
||||||
self.buffer.append(byte)
|
self.buffer.append(byte)
|
||||||
self.buff_expecting_cont_bytes -= 1
|
self.buff_expecting_cont_bytes -= 1
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# beginning of a byte sequence
|
# beginning of a byte sequence
|
||||||
if len(self.buffer) > 0:
|
if len(self.buffer) > 0:
|
||||||
decoded.append(self.buffer.decode(errors='replace'))
|
decoded.append(self.buffer.decode(errors='replace'))
|
||||||
@ -608,18 +619,18 @@ class LLModel:
|
|||||||
self.buffer.append(byte)
|
self.buffer.append(byte)
|
||||||
self.buff_expecting_cont_bytes = max(0, len(high_ones) - 1)
|
self.buff_expecting_cont_bytes = max(0, len(high_ones) - 1)
|
||||||
|
|
||||||
if self.buff_expecting_cont_bytes <= 0:
|
if self.buff_expecting_cont_bytes <= 0:
|
||||||
# received the whole sequence or an out of place continuation byte
|
# received the whole sequence or an out of place continuation byte
|
||||||
decoded.append(self.buffer.decode(errors='replace'))
|
decoded.append(self.buffer.decode(errors='replace'))
|
||||||
|
|
||||||
self.buffer.clear()
|
self.buffer.clear()
|
||||||
self.buff_expecting_cont_bytes = 0
|
self.buff_expecting_cont_bytes = 0
|
||||||
|
|
||||||
if len(decoded) == 0 and self.buff_expecting_cont_bytes > 0:
|
if len(decoded) == 0 and self.buff_expecting_cont_bytes > 0:
|
||||||
# wait for more continuation bytes
|
# wait for more continuation bytes
|
||||||
return True
|
return True
|
||||||
|
|
||||||
return callback(token_id, ''.join(decoded))
|
return callback(token_id, ''.join(decoded))
|
||||||
|
|
||||||
return _raw_callback
|
return _raw_callback
|
||||||
|
|
||||||
|
@ -8,7 +8,6 @@ import os
|
|||||||
import platform
|
import platform
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
import time
|
|
||||||
import warnings
|
import warnings
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
Loading…
Reference in New Issue
Block a user