mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-06-22 21:48:23 +00:00
python: do not print GPU name with verbose=False, expose this info via properties (#2222)
* llamamodel: only print device used in verbose mode Signed-off-by: Jared Van Bortel <jared@nomic.ai> * python: expose backend and device via GPT4All properties Signed-off-by: Jared Van Bortel <jared@nomic.ai> * backend: const correctness fixes Signed-off-by: Jared Van Bortel <jared@nomic.ai> * python: bump version Signed-off-by: Jared Van Bortel <jared@nomic.ai> * python: typing fixups Signed-off-by: Jared Van Bortel <jared@nomic.ai> * python: fix segfault with closed GPT4All Signed-off-by: Jared Van Bortel <jared@nomic.ai> --------- Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
parent
271d752701
commit
ba53ab5da0
@ -364,8 +364,10 @@ bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)
|
|||||||
d_ptr->end_tokens = {llama_token_eos(d_ptr->model)};
|
d_ptr->end_tokens = {llama_token_eos(d_ptr->model)};
|
||||||
|
|
||||||
#ifdef GGML_USE_KOMPUTE
|
#ifdef GGML_USE_KOMPUTE
|
||||||
if (usingGPUDevice() && ggml_vk_has_device()) {
|
if (usingGPUDevice()) {
|
||||||
std::cerr << "llama.cpp: using Vulkan on " << ggml_vk_current_device().name << std::endl;
|
if (llama_verbose()) {
|
||||||
|
std::cerr << "llama.cpp: using Vulkan on " << ggml_vk_current_device().name << std::endl;
|
||||||
|
}
|
||||||
d_ptr->backend_name = "kompute";
|
d_ptr->backend_name = "kompute";
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@ -558,7 +560,7 @@ bool LLamaModel::initializeGPUDevice(int device, std::string *unavail_reason) co
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
bool LLamaModel::hasGPUDevice()
|
bool LLamaModel::hasGPUDevice() const
|
||||||
{
|
{
|
||||||
#if defined(GGML_USE_KOMPUTE)
|
#if defined(GGML_USE_KOMPUTE)
|
||||||
return d_ptr->device != -1;
|
return d_ptr->device != -1;
|
||||||
@ -567,10 +569,12 @@ bool LLamaModel::hasGPUDevice()
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
bool LLamaModel::usingGPUDevice()
|
bool LLamaModel::usingGPUDevice() const
|
||||||
{
|
{
|
||||||
#if defined(GGML_USE_KOMPUTE)
|
#if defined(GGML_USE_KOMPUTE)
|
||||||
return hasGPUDevice() && d_ptr->model_params.n_gpu_layers > 0;
|
bool hasDevice = hasGPUDevice() && d_ptr->model_params.n_gpu_layers > 0;
|
||||||
|
assert(!hasDevice || ggml_vk_has_device());
|
||||||
|
return hasDevice;
|
||||||
#elif defined(GGML_USE_METAL)
|
#elif defined(GGML_USE_METAL)
|
||||||
return true;
|
return true;
|
||||||
#else
|
#else
|
||||||
@ -578,6 +582,19 @@ bool LLamaModel::usingGPUDevice()
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const char *LLamaModel::backendName() const {
|
||||||
|
return d_ptr->backend_name;
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *LLamaModel::gpuDeviceName() const {
|
||||||
|
#if defined(GGML_USE_KOMPUTE)
|
||||||
|
if (usingGPUDevice()) {
|
||||||
|
return ggml_vk_current_device().name;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
void llama_batch_add(
|
void llama_batch_add(
|
||||||
struct llama_batch & batch,
|
struct llama_batch & batch,
|
||||||
llama_token id,
|
llama_token id,
|
||||||
|
@ -33,8 +33,10 @@ public:
|
|||||||
std::vector<GPUDevice> availableGPUDevices(size_t memoryRequired) const override;
|
std::vector<GPUDevice> availableGPUDevices(size_t memoryRequired) const override;
|
||||||
bool initializeGPUDevice(size_t memoryRequired, const std::string &name) const override;
|
bool initializeGPUDevice(size_t memoryRequired, const std::string &name) const override;
|
||||||
bool initializeGPUDevice(int device, std::string *unavail_reason = nullptr) const override;
|
bool initializeGPUDevice(int device, std::string *unavail_reason = nullptr) const override;
|
||||||
bool hasGPUDevice() override;
|
bool hasGPUDevice() const override;
|
||||||
bool usingGPUDevice() override;
|
bool usingGPUDevice() const override;
|
||||||
|
const char *backendName() const override;
|
||||||
|
const char *gpuDeviceName() const override;
|
||||||
|
|
||||||
size_t embeddingSize() const override;
|
size_t embeddingSize() const override;
|
||||||
// user-specified prefix
|
// user-specified prefix
|
||||||
|
@ -144,8 +144,10 @@ public:
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual bool hasGPUDevice() { return false; }
|
virtual bool hasGPUDevice() const { return false; }
|
||||||
virtual bool usingGPUDevice() { return false; }
|
virtual bool usingGPUDevice() const { return false; }
|
||||||
|
virtual const char *backendName() const { return "cpu"; }
|
||||||
|
virtual const char *gpuDeviceName() const { return nullptr; }
|
||||||
|
|
||||||
void setProgressCallback(ProgressCallback callback) { m_progressCallback = callback; }
|
void setProgressCallback(ProgressCallback callback) { m_progressCallback = callback; }
|
||||||
|
|
||||||
|
@ -283,6 +283,18 @@ bool llmodel_gpu_init_gpu_device_by_int(llmodel_model model, int device)
|
|||||||
|
|
||||||
bool llmodel_has_gpu_device(llmodel_model model)
|
bool llmodel_has_gpu_device(llmodel_model model)
|
||||||
{
|
{
|
||||||
auto *wrapper = static_cast<LLModelWrapper *>(model);
|
const auto *wrapper = static_cast<LLModelWrapper *>(model);
|
||||||
return wrapper->llModel->hasGPUDevice();
|
return wrapper->llModel->hasGPUDevice();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const char *llmodel_model_backend_name(llmodel_model model)
|
||||||
|
{
|
||||||
|
const auto *wrapper = static_cast<LLModelWrapper *>(model);
|
||||||
|
return wrapper->llModel->backendName();
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *llmodel_model_gpu_device_name(llmodel_model model)
|
||||||
|
{
|
||||||
|
const auto *wrapper = static_cast<LLModelWrapper *>(model);
|
||||||
|
return wrapper->llModel->gpuDeviceName();
|
||||||
|
}
|
||||||
|
@ -295,6 +295,16 @@ bool llmodel_gpu_init_gpu_device_by_int(llmodel_model model, int device);
|
|||||||
*/
|
*/
|
||||||
bool llmodel_has_gpu_device(llmodel_model model);
|
bool llmodel_has_gpu_device(llmodel_model model);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return The name of the llama.cpp backend currently in use. One of "cpu", "kompute", or "metal".
|
||||||
|
*/
|
||||||
|
const char *llmodel_model_backend_name(llmodel_model model);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return The name of the GPU device currently in use, or NULL for backends other than Kompute.
|
||||||
|
*/
|
||||||
|
const char *llmodel_model_gpu_device_name(llmodel_model model);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -9,7 +9,7 @@ import sys
|
|||||||
import threading
|
import threading
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from queue import Queue
|
from queue import Queue
|
||||||
from typing import TYPE_CHECKING, Any, Callable, Generic, Iterable, NoReturn, TypeVar, overload
|
from typing import TYPE_CHECKING, Any, Callable, Generic, Iterable, Literal, NoReturn, TypeVar, overload
|
||||||
|
|
||||||
if sys.version_info >= (3, 9):
|
if sys.version_info >= (3, 9):
|
||||||
import importlib.resources as importlib_resources
|
import importlib.resources as importlib_resources
|
||||||
@ -158,6 +158,12 @@ llmodel.llmodel_gpu_init_gpu_device_by_int.restype = ctypes.c_bool
|
|||||||
llmodel.llmodel_has_gpu_device.argtypes = [ctypes.c_void_p]
|
llmodel.llmodel_has_gpu_device.argtypes = [ctypes.c_void_p]
|
||||||
llmodel.llmodel_has_gpu_device.restype = ctypes.c_bool
|
llmodel.llmodel_has_gpu_device.restype = ctypes.c_bool
|
||||||
|
|
||||||
|
llmodel.llmodel_model_backend_name.argtypes = [ctypes.c_void_p]
|
||||||
|
llmodel.llmodel_model_backend_name.restype = ctypes.c_char_p
|
||||||
|
|
||||||
|
llmodel.llmodel_model_gpu_device_name.argtypes = [ctypes.c_void_p]
|
||||||
|
llmodel.llmodel_model_gpu_device_name.restype = ctypes.c_char_p
|
||||||
|
|
||||||
ResponseCallbackType = Callable[[int, str], bool]
|
ResponseCallbackType = Callable[[int, str], bool]
|
||||||
RawResponseCallbackType = Callable[[int, bytes], bool]
|
RawResponseCallbackType = Callable[[int, bytes], bool]
|
||||||
EmbCancelCallbackType: TypeAlias = 'Callable[[list[int], str], bool]'
|
EmbCancelCallbackType: TypeAlias = 'Callable[[list[int], str], bool]'
|
||||||
@ -224,6 +230,19 @@ class LLModel:
|
|||||||
def _raise_closed(self) -> NoReturn:
|
def _raise_closed(self) -> NoReturn:
|
||||||
raise ValueError("Attempted operation on a closed LLModel")
|
raise ValueError("Attempted operation on a closed LLModel")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def backend(self) -> Literal["cpu", "kompute", "metal"]:
|
||||||
|
if self.model is None:
|
||||||
|
self._raise_closed()
|
||||||
|
return llmodel.llmodel_model_backend_name(self.model).decode()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def device(self) -> str | None:
|
||||||
|
if self.model is None:
|
||||||
|
self._raise_closed()
|
||||||
|
dev = llmodel.llmodel_model_gpu_device_name(self.model)
|
||||||
|
return None if dev is None else dev.decode()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def list_gpus(mem_required: int = 0) -> list[str]:
|
def list_gpus(mem_required: int = 0) -> list[str]:
|
||||||
"""
|
"""
|
||||||
@ -333,22 +352,23 @@ class LLModel:
|
|||||||
|
|
||||||
@overload
|
@overload
|
||||||
def generate_embeddings(
|
def generate_embeddings(
|
||||||
self, text: str, prefix: str, dimensionality: int, do_mean: bool, atlas: bool, cancel_cb: EmbCancelCallbackType,
|
self, text: str, prefix: str | None, dimensionality: int, do_mean: bool, atlas: bool,
|
||||||
|
cancel_cb: EmbCancelCallbackType | None,
|
||||||
) -> EmbedResult[list[float]]: ...
|
) -> EmbedResult[list[float]]: ...
|
||||||
@overload
|
@overload
|
||||||
def generate_embeddings(
|
def generate_embeddings(
|
||||||
self, text: list[str], prefix: str | None, dimensionality: int, do_mean: bool, atlas: bool,
|
self, text: list[str], prefix: str | None, dimensionality: int, do_mean: bool, atlas: bool,
|
||||||
cancel_cb: EmbCancelCallbackType,
|
cancel_cb: EmbCancelCallbackType | None,
|
||||||
) -> EmbedResult[list[list[float]]]: ...
|
) -> EmbedResult[list[list[float]]]: ...
|
||||||
@overload
|
@overload
|
||||||
def generate_embeddings(
|
def generate_embeddings(
|
||||||
self, text: str | list[str], prefix: str | None, dimensionality: int, do_mean: bool, atlas: bool,
|
self, text: str | list[str], prefix: str | None, dimensionality: int, do_mean: bool, atlas: bool,
|
||||||
cancel_cb: EmbCancelCallbackType,
|
cancel_cb: EmbCancelCallbackType | None,
|
||||||
) -> EmbedResult[list[Any]]: ...
|
) -> EmbedResult[list[Any]]: ...
|
||||||
|
|
||||||
def generate_embeddings(
|
def generate_embeddings(
|
||||||
self, text: str | list[str], prefix: str | None, dimensionality: int, do_mean: bool, atlas: bool,
|
self, text: str | list[str], prefix: str | None, dimensionality: int, do_mean: bool, atlas: bool,
|
||||||
cancel_cb: EmbCancelCallbackType,
|
cancel_cb: EmbCancelCallbackType | None,
|
||||||
) -> EmbedResult[list[Any]]:
|
) -> EmbedResult[list[Any]]:
|
||||||
if not text:
|
if not text:
|
||||||
raise ValueError("text must not be None or empty")
|
raise ValueError("text must not be None or empty")
|
||||||
@ -368,11 +388,11 @@ class LLModel:
|
|||||||
for i, t in enumerate(text):
|
for i, t in enumerate(text):
|
||||||
c_texts[i] = t.encode()
|
c_texts[i] = t.encode()
|
||||||
|
|
||||||
def wrap_cancel_cb(batch_sizes: ctypes.POINTER(ctypes.c_uint), n_batch: int, backend: bytes) -> bool:
|
def wrap_cancel_cb(batch_sizes: Any, n_batch: int, backend: bytes) -> bool:
|
||||||
assert cancel_cb is not None
|
assert cancel_cb is not None
|
||||||
return cancel_cb(batch_sizes[:n_batch], backend.decode())
|
return cancel_cb(batch_sizes[:n_batch], backend.decode())
|
||||||
|
|
||||||
cancel_cb_wrapper = EmbCancelCallback(0x0 if cancel_cb is None else wrap_cancel_cb)
|
cancel_cb_wrapper = EmbCancelCallback() if cancel_cb is None else EmbCancelCallback(wrap_cancel_cb)
|
||||||
|
|
||||||
# generate the embeddings
|
# generate the embeddings
|
||||||
embedding_ptr = llmodel.llmodel_embed(
|
embedding_ptr = llmodel.llmodel_embed(
|
||||||
|
@ -226,6 +226,16 @@ class GPT4All:
|
|||||||
"""Delete the model instance and free associated system resources."""
|
"""Delete the model instance and free associated system resources."""
|
||||||
self.model.close()
|
self.model.close()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def backend(self) -> Literal["cpu", "kompute", "metal"]:
|
||||||
|
"""The name of the llama.cpp backend currently in use. One of "cpu", "kompute", or "metal"."""
|
||||||
|
return self.model.backend
|
||||||
|
|
||||||
|
@property
|
||||||
|
def device(self) -> str | None:
|
||||||
|
"""The name of the GPU device currently in use, or None for backends other than Kompute."""
|
||||||
|
return self.model.device
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def current_chat_session(self) -> list[MessageType] | None:
|
def current_chat_session(self) -> list[MessageType] | None:
|
||||||
return None if self._history is None else list(self._history)
|
return None if self._history is None else list(self._history)
|
||||||
|
@ -68,7 +68,7 @@ def get_long_description():
|
|||||||
|
|
||||||
setup(
|
setup(
|
||||||
name=package_name,
|
name=package_name,
|
||||||
version="2.5.2",
|
version="2.6.0",
|
||||||
description="Python bindings for GPT4All",
|
description="Python bindings for GPT4All",
|
||||||
long_description=get_long_description(),
|
long_description=get_long_description(),
|
||||||
long_description_content_type="text/markdown",
|
long_description_content_type="text/markdown",
|
||||||
|
Loading…
Reference in New Issue
Block a user