mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-09-05 18:40:50 +00:00
python: do not print GPU name with verbose=False, expose this info via properties (#2222)
* llamamodel: only print device used in verbose mode Signed-off-by: Jared Van Bortel <jared@nomic.ai> * python: expose backend and device via GPT4All properties Signed-off-by: Jared Van Bortel <jared@nomic.ai> * backend: const correctness fixes Signed-off-by: Jared Van Bortel <jared@nomic.ai> * python: bump version Signed-off-by: Jared Van Bortel <jared@nomic.ai> * python: typing fixups Signed-off-by: Jared Van Bortel <jared@nomic.ai> * python: fix segfault with closed GPT4All Signed-off-by: Jared Van Bortel <jared@nomic.ai> --------- Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
@@ -9,7 +9,7 @@ import sys
|
||||
import threading
|
||||
from enum import Enum
|
||||
from queue import Queue
|
||||
from typing import TYPE_CHECKING, Any, Callable, Generic, Iterable, NoReturn, TypeVar, overload
|
||||
from typing import TYPE_CHECKING, Any, Callable, Generic, Iterable, Literal, NoReturn, TypeVar, overload
|
||||
|
||||
if sys.version_info >= (3, 9):
|
||||
import importlib.resources as importlib_resources
|
||||
@@ -158,6 +158,12 @@ llmodel.llmodel_gpu_init_gpu_device_by_int.restype = ctypes.c_bool
|
||||
llmodel.llmodel_has_gpu_device.argtypes = [ctypes.c_void_p]
|
||||
llmodel.llmodel_has_gpu_device.restype = ctypes.c_bool
|
||||
|
||||
llmodel.llmodel_model_backend_name.argtypes = [ctypes.c_void_p]
|
||||
llmodel.llmodel_model_backend_name.restype = ctypes.c_char_p
|
||||
|
||||
llmodel.llmodel_model_gpu_device_name.argtypes = [ctypes.c_void_p]
|
||||
llmodel.llmodel_model_gpu_device_name.restype = ctypes.c_char_p
|
||||
|
||||
ResponseCallbackType = Callable[[int, str], bool]
|
||||
RawResponseCallbackType = Callable[[int, bytes], bool]
|
||||
EmbCancelCallbackType: TypeAlias = 'Callable[[list[int], str], bool]'
|
||||
@@ -224,6 +230,19 @@ class LLModel:
|
||||
def _raise_closed(self) -> NoReturn:
|
||||
raise ValueError("Attempted operation on a closed LLModel")
|
||||
|
||||
@property
|
||||
def backend(self) -> Literal["cpu", "kompute", "metal"]:
|
||||
if self.model is None:
|
||||
self._raise_closed()
|
||||
return llmodel.llmodel_model_backend_name(self.model).decode()
|
||||
|
||||
@property
|
||||
def device(self) -> str | None:
|
||||
if self.model is None:
|
||||
self._raise_closed()
|
||||
dev = llmodel.llmodel_model_gpu_device_name(self.model)
|
||||
return None if dev is None else dev.decode()
|
||||
|
||||
@staticmethod
|
||||
def list_gpus(mem_required: int = 0) -> list[str]:
|
||||
"""
|
||||
@@ -333,22 +352,23 @@ class LLModel:
|
||||
|
||||
@overload
|
||||
def generate_embeddings(
|
||||
self, text: str, prefix: str, dimensionality: int, do_mean: bool, atlas: bool, cancel_cb: EmbCancelCallbackType,
|
||||
self, text: str, prefix: str | None, dimensionality: int, do_mean: bool, atlas: bool,
|
||||
cancel_cb: EmbCancelCallbackType | None,
|
||||
) -> EmbedResult[list[float]]: ...
|
||||
@overload
|
||||
def generate_embeddings(
|
||||
self, text: list[str], prefix: str | None, dimensionality: int, do_mean: bool, atlas: bool,
|
||||
cancel_cb: EmbCancelCallbackType,
|
||||
cancel_cb: EmbCancelCallbackType | None,
|
||||
) -> EmbedResult[list[list[float]]]: ...
|
||||
@overload
|
||||
def generate_embeddings(
|
||||
self, text: str | list[str], prefix: str | None, dimensionality: int, do_mean: bool, atlas: bool,
|
||||
cancel_cb: EmbCancelCallbackType,
|
||||
cancel_cb: EmbCancelCallbackType | None,
|
||||
) -> EmbedResult[list[Any]]: ...
|
||||
|
||||
def generate_embeddings(
|
||||
self, text: str | list[str], prefix: str | None, dimensionality: int, do_mean: bool, atlas: bool,
|
||||
cancel_cb: EmbCancelCallbackType,
|
||||
cancel_cb: EmbCancelCallbackType | None,
|
||||
) -> EmbedResult[list[Any]]:
|
||||
if not text:
|
||||
raise ValueError("text must not be None or empty")
|
||||
@@ -368,11 +388,11 @@ class LLModel:
|
||||
for i, t in enumerate(text):
|
||||
c_texts[i] = t.encode()
|
||||
|
||||
def wrap_cancel_cb(batch_sizes: ctypes.POINTER(ctypes.c_uint), n_batch: int, backend: bytes) -> bool:
|
||||
def wrap_cancel_cb(batch_sizes: Any, n_batch: int, backend: bytes) -> bool:
|
||||
assert cancel_cb is not None
|
||||
return cancel_cb(batch_sizes[:n_batch], backend.decode())
|
||||
|
||||
cancel_cb_wrapper = EmbCancelCallback(0x0 if cancel_cb is None else wrap_cancel_cb)
|
||||
cancel_cb_wrapper = EmbCancelCallback() if cancel_cb is None else EmbCancelCallback(wrap_cancel_cb)
|
||||
|
||||
# generate the embeddings
|
||||
embedding_ptr = llmodel.llmodel_embed(
|
||||
|
@@ -226,6 +226,16 @@ class GPT4All:
|
||||
"""Delete the model instance and free associated system resources."""
|
||||
self.model.close()
|
||||
|
||||
@property
|
||||
def backend(self) -> Literal["cpu", "kompute", "metal"]:
|
||||
"""The name of the llama.cpp backend currently in use. One of "cpu", "kompute", or "metal"."""
|
||||
return self.model.backend
|
||||
|
||||
@property
|
||||
def device(self) -> str | None:
|
||||
"""The name of the GPU device currently in use, or None for backends other than Kompute."""
|
||||
return self.model.device
|
||||
|
||||
@property
|
||||
def current_chat_session(self) -> list[MessageType] | None:
|
||||
return None if self._history is None else list(self._history)
|
||||
|
@@ -68,7 +68,7 @@ def get_long_description():
|
||||
|
||||
setup(
|
||||
name=package_name,
|
||||
version="2.5.2",
|
||||
version="2.6.0",
|
||||
description="Python bindings for GPT4All",
|
||||
long_description=get_long_description(),
|
||||
long_description_content_type="text/markdown",
|
||||
|
Reference in New Issue
Block a user