mirror of
https://github.com/nomic-ai/gpt4all.git
synced 2025-09-06 11:00:48 +00:00
python: do not print GPU name with verbose=False, expose this info via properties (#2222)
* llamamodel: only print device used in verbose mode Signed-off-by: Jared Van Bortel <jared@nomic.ai> * python: expose backend and device via GPT4All properties Signed-off-by: Jared Van Bortel <jared@nomic.ai> * backend: const correctness fixes Signed-off-by: Jared Van Bortel <jared@nomic.ai> * python: bump version Signed-off-by: Jared Van Bortel <jared@nomic.ai> * python: typing fixups Signed-off-by: Jared Van Bortel <jared@nomic.ai> * python: fix segfault with closed GPT4All Signed-off-by: Jared Van Bortel <jared@nomic.ai> --------- Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
@@ -364,8 +364,10 @@ bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)
|
||||
d_ptr->end_tokens = {llama_token_eos(d_ptr->model)};
|
||||
|
||||
#ifdef GGML_USE_KOMPUTE
|
||||
if (usingGPUDevice() && ggml_vk_has_device()) {
|
||||
std::cerr << "llama.cpp: using Vulkan on " << ggml_vk_current_device().name << std::endl;
|
||||
if (usingGPUDevice()) {
|
||||
if (llama_verbose()) {
|
||||
std::cerr << "llama.cpp: using Vulkan on " << ggml_vk_current_device().name << std::endl;
|
||||
}
|
||||
d_ptr->backend_name = "kompute";
|
||||
}
|
||||
#endif
|
||||
@@ -558,7 +560,7 @@ bool LLamaModel::initializeGPUDevice(int device, std::string *unavail_reason) co
|
||||
#endif
|
||||
}
|
||||
|
||||
bool LLamaModel::hasGPUDevice()
|
||||
bool LLamaModel::hasGPUDevice() const
|
||||
{
|
||||
#if defined(GGML_USE_KOMPUTE)
|
||||
return d_ptr->device != -1;
|
||||
@@ -567,10 +569,12 @@ bool LLamaModel::hasGPUDevice()
|
||||
#endif
|
||||
}
|
||||
|
||||
bool LLamaModel::usingGPUDevice()
|
||||
bool LLamaModel::usingGPUDevice() const
|
||||
{
|
||||
#if defined(GGML_USE_KOMPUTE)
|
||||
return hasGPUDevice() && d_ptr->model_params.n_gpu_layers > 0;
|
||||
bool hasDevice = hasGPUDevice() && d_ptr->model_params.n_gpu_layers > 0;
|
||||
assert(!hasDevice || ggml_vk_has_device());
|
||||
return hasDevice;
|
||||
#elif defined(GGML_USE_METAL)
|
||||
return true;
|
||||
#else
|
||||
@@ -578,6 +582,19 @@ bool LLamaModel::usingGPUDevice()
|
||||
#endif
|
||||
}
|
||||
|
||||
const char *LLamaModel::backendName() const {
|
||||
return d_ptr->backend_name;
|
||||
}
|
||||
|
||||
const char *LLamaModel::gpuDeviceName() const {
|
||||
#if defined(GGML_USE_KOMPUTE)
|
||||
if (usingGPUDevice()) {
|
||||
return ggml_vk_current_device().name;
|
||||
}
|
||||
#endif
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void llama_batch_add(
|
||||
struct llama_batch & batch,
|
||||
llama_token id,
|
||||
|
@@ -33,8 +33,10 @@ public:
|
||||
std::vector<GPUDevice> availableGPUDevices(size_t memoryRequired) const override;
|
||||
bool initializeGPUDevice(size_t memoryRequired, const std::string &name) const override;
|
||||
bool initializeGPUDevice(int device, std::string *unavail_reason = nullptr) const override;
|
||||
bool hasGPUDevice() override;
|
||||
bool usingGPUDevice() override;
|
||||
bool hasGPUDevice() const override;
|
||||
bool usingGPUDevice() const override;
|
||||
const char *backendName() const override;
|
||||
const char *gpuDeviceName() const override;
|
||||
|
||||
size_t embeddingSize() const override;
|
||||
// user-specified prefix
|
||||
|
@@ -144,8 +144,10 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual bool hasGPUDevice() { return false; }
|
||||
virtual bool usingGPUDevice() { return false; }
|
||||
virtual bool hasGPUDevice() const { return false; }
|
||||
virtual bool usingGPUDevice() const { return false; }
|
||||
virtual const char *backendName() const { return "cpu"; }
|
||||
virtual const char *gpuDeviceName() const { return nullptr; }
|
||||
|
||||
void setProgressCallback(ProgressCallback callback) { m_progressCallback = callback; }
|
||||
|
||||
|
@@ -283,6 +283,18 @@ bool llmodel_gpu_init_gpu_device_by_int(llmodel_model model, int device)
|
||||
|
||||
bool llmodel_has_gpu_device(llmodel_model model)
|
||||
{
|
||||
auto *wrapper = static_cast<LLModelWrapper *>(model);
|
||||
const auto *wrapper = static_cast<LLModelWrapper *>(model);
|
||||
return wrapper->llModel->hasGPUDevice();
|
||||
}
|
||||
|
||||
const char *llmodel_model_backend_name(llmodel_model model)
|
||||
{
|
||||
const auto *wrapper = static_cast<LLModelWrapper *>(model);
|
||||
return wrapper->llModel->backendName();
|
||||
}
|
||||
|
||||
const char *llmodel_model_gpu_device_name(llmodel_model model)
|
||||
{
|
||||
const auto *wrapper = static_cast<LLModelWrapper *>(model);
|
||||
return wrapper->llModel->gpuDeviceName();
|
||||
}
|
||||
|
@@ -295,6 +295,16 @@ bool llmodel_gpu_init_gpu_device_by_int(llmodel_model model, int device);
|
||||
*/
|
||||
bool llmodel_has_gpu_device(llmodel_model model);
|
||||
|
||||
/**
|
||||
* @return The name of the llama.cpp backend currently in use. One of "cpu", "kompute", or "metal".
|
||||
*/
|
||||
const char *llmodel_model_backend_name(llmodel_model model);
|
||||
|
||||
/**
|
||||
* @return The name of the GPU device currently in use, or NULL for backends other than Kompute.
|
||||
*/
|
||||
const char *llmodel_model_gpu_device_name(llmodel_model model);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
Reference in New Issue
Block a user