python: do not print GPU name with verbose=False, expose this info via properties (#2222)

* llamamodel: only print device used in verbose mode

Signed-off-by: Jared Van Bortel <jared@nomic.ai>

* python: expose backend and device via GPT4All properties

Signed-off-by: Jared Van Bortel <jared@nomic.ai>

* backend: const correctness fixes

Signed-off-by: Jared Van Bortel <jared@nomic.ai>

* python: bump version

Signed-off-by: Jared Van Bortel <jared@nomic.ai>

* python: typing fixups

Signed-off-by: Jared Van Bortel <jared@nomic.ai>

* python: fix segfault with closed GPT4All

Signed-off-by: Jared Van Bortel <jared@nomic.ai>

---------

Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
Jared Van Bortel
2024-04-18 14:52:02 -04:00
committed by GitHub
parent 271d752701
commit ba53ab5da0
8 changed files with 91 additions and 18 deletions

View File

@@ -364,8 +364,10 @@ bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)
d_ptr->end_tokens = {llama_token_eos(d_ptr->model)};
#ifdef GGML_USE_KOMPUTE
if (usingGPUDevice() && ggml_vk_has_device()) {
std::cerr << "llama.cpp: using Vulkan on " << ggml_vk_current_device().name << std::endl;
if (usingGPUDevice()) {
if (llama_verbose()) {
std::cerr << "llama.cpp: using Vulkan on " << ggml_vk_current_device().name << std::endl;
}
d_ptr->backend_name = "kompute";
}
#endif
@@ -558,7 +560,7 @@ bool LLamaModel::initializeGPUDevice(int device, std::string *unavail_reason) co
#endif
}
bool LLamaModel::hasGPUDevice()
bool LLamaModel::hasGPUDevice() const
{
#if defined(GGML_USE_KOMPUTE)
return d_ptr->device != -1;
@@ -567,10 +569,12 @@ bool LLamaModel::hasGPUDevice()
#endif
}
bool LLamaModel::usingGPUDevice()
bool LLamaModel::usingGPUDevice() const
{
#if defined(GGML_USE_KOMPUTE)
return hasGPUDevice() && d_ptr->model_params.n_gpu_layers > 0;
bool hasDevice = hasGPUDevice() && d_ptr->model_params.n_gpu_layers > 0;
assert(!hasDevice || ggml_vk_has_device());
return hasDevice;
#elif defined(GGML_USE_METAL)
return true;
#else
@@ -578,6 +582,19 @@ bool LLamaModel::usingGPUDevice()
#endif
}
const char *LLamaModel::backendName() const {
return d_ptr->backend_name;
}
const char *LLamaModel::gpuDeviceName() const {
#if defined(GGML_USE_KOMPUTE)
if (usingGPUDevice()) {
return ggml_vk_current_device().name;
}
#endif
return nullptr;
}
void llama_batch_add(
struct llama_batch & batch,
llama_token id,

View File

@@ -33,8 +33,10 @@ public:
std::vector<GPUDevice> availableGPUDevices(size_t memoryRequired) const override;
bool initializeGPUDevice(size_t memoryRequired, const std::string &name) const override;
bool initializeGPUDevice(int device, std::string *unavail_reason = nullptr) const override;
bool hasGPUDevice() override;
bool usingGPUDevice() override;
bool hasGPUDevice() const override;
bool usingGPUDevice() const override;
const char *backendName() const override;
const char *gpuDeviceName() const override;
size_t embeddingSize() const override;
// user-specified prefix

View File

@@ -144,8 +144,10 @@ public:
return false;
}
virtual bool hasGPUDevice() { return false; }
virtual bool usingGPUDevice() { return false; }
virtual bool hasGPUDevice() const { return false; }
virtual bool usingGPUDevice() const { return false; }
virtual const char *backendName() const { return "cpu"; }
virtual const char *gpuDeviceName() const { return nullptr; }
void setProgressCallback(ProgressCallback callback) { m_progressCallback = callback; }

View File

@@ -283,6 +283,18 @@ bool llmodel_gpu_init_gpu_device_by_int(llmodel_model model, int device)
bool llmodel_has_gpu_device(llmodel_model model)
{
auto *wrapper = static_cast<LLModelWrapper *>(model);
const auto *wrapper = static_cast<LLModelWrapper *>(model);
return wrapper->llModel->hasGPUDevice();
}
const char *llmodel_model_backend_name(llmodel_model model)
{
const auto *wrapper = static_cast<LLModelWrapper *>(model);
return wrapper->llModel->backendName();
}
const char *llmodel_model_gpu_device_name(llmodel_model model)
{
const auto *wrapper = static_cast<LLModelWrapper *>(model);
return wrapper->llModel->gpuDeviceName();
}

View File

@@ -295,6 +295,16 @@ bool llmodel_gpu_init_gpu_device_by_int(llmodel_model model, int device);
*/
bool llmodel_has_gpu_device(llmodel_model model);
/**
* @return The name of the llama.cpp backend currently in use. One of "cpu", "kompute", or "metal".
*/
const char *llmodel_model_backend_name(llmodel_model model);
/**
* @return The name of the GPU device currently in use, or NULL for backends other than Kompute.
*/
const char *llmodel_model_gpu_device_name(llmodel_model model);
#ifdef __cplusplus
}
#endif