python: do not print GPU name with verbose=False, expose this info via properties (#2222)

* llamamodel: only print device used in verbose mode Signed-off-by: Jared Van Bortel <jared@nomic.ai> * python: expose backend and device via GPT4All properties Signed-off-by: Jared Van Bortel <jared@nomic.ai> * backend: const correctness fixes Signed-off-by: Jared Van Bortel <jared@nomic.ai> * python: bump version Signed-off-by: Jared Van Bortel <jared@nomic.ai> * python: typing fixups Signed-off-by: Jared Van Bortel <jared@nomic.ai> * python: fix segfault with closed GPT4All Signed-off-by: Jared Van Bortel <jared@nomic.ai> --------- Signed-off-by: Jared Van Bortel <jared@nomic.ai>
2025-09-06 11:00:48 +00:00 · 2024-04-18 14:52:02 -04:00
parent 271d752701
commit ba53ab5da0
8 changed files with 91 additions and 18 deletions
--- a/gpt4all-backend/llamamodel.cpp
+++ b/gpt4all-backend/llamamodel.cpp
@@ -364,8 +364,10 @@ bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)
    d_ptr->end_tokens = {llama_token_eos(d_ptr->model)};

 #ifdef GGML_USE_KOMPUTE
-    if (usingGPUDevice() && ggml_vk_has_device()) {
-        std::cerr << "llama.cpp: using Vulkan on " << ggml_vk_current_device().name << std::endl;
+    if (usingGPUDevice()) {
+        if (llama_verbose()) {
+            std::cerr << "llama.cpp: using Vulkan on " << ggml_vk_current_device().name << std::endl;
+        }
        d_ptr->backend_name = "kompute";
    }
 #endif
@@ -558,7 +560,7 @@ bool LLamaModel::initializeGPUDevice(int device, std::string *unavail_reason) co
 #endif
 }

-bool LLamaModel::hasGPUDevice()
+bool LLamaModel::hasGPUDevice() const
 {
 #if defined(GGML_USE_KOMPUTE)
    return d_ptr->device != -1;
@@ -567,10 +569,12 @@ bool LLamaModel::hasGPUDevice()
 #endif
 }

-bool LLamaModel::usingGPUDevice()
+bool LLamaModel::usingGPUDevice() const
 {
 #if defined(GGML_USE_KOMPUTE)
-    return hasGPUDevice() && d_ptr->model_params.n_gpu_layers > 0;
+    bool hasDevice = hasGPUDevice() && d_ptr->model_params.n_gpu_layers > 0;
+    assert(!hasDevice || ggml_vk_has_device());
+    return hasDevice;
 #elif defined(GGML_USE_METAL)
    return true;
 #else
@@ -578,6 +582,19 @@ bool LLamaModel::usingGPUDevice()
 #endif
 }

+const char *LLamaModel::backendName() const {
+    return d_ptr->backend_name;
+}
+
+const char *LLamaModel::gpuDeviceName() const {
+#if defined(GGML_USE_KOMPUTE)
+    if (usingGPUDevice()) {
+        return ggml_vk_current_device().name;
+    }
+#endif
+    return nullptr;
+}
+
 void llama_batch_add(
                 struct llama_batch & batch,
                        llama_token   id,
--- a/gpt4all-backend/llamamodel_impl.h
+++ b/gpt4all-backend/llamamodel_impl.h
@@ -33,8 +33,10 @@ public:
    std::vector<GPUDevice> availableGPUDevices(size_t memoryRequired) const override;
    bool initializeGPUDevice(size_t memoryRequired, const std::string &name) const override;
    bool initializeGPUDevice(int device, std::string *unavail_reason = nullptr) const override;
-    bool hasGPUDevice() override;
-    bool usingGPUDevice() override;
+    bool hasGPUDevice() const override;
+    bool usingGPUDevice() const override;
+    const char *backendName() const override;
+    const char *gpuDeviceName() const override;

    size_t embeddingSize() const override;
    // user-specified prefix
--- a/gpt4all-backend/llmodel.h
+++ b/gpt4all-backend/llmodel.h
@@ -144,8 +144,10 @@ public:
        return false;
    }

-    virtual bool hasGPUDevice() { return false; }
-    virtual bool usingGPUDevice() { return false; }
+    virtual bool hasGPUDevice() const { return false; }
+    virtual bool usingGPUDevice() const { return false; }
+    virtual const char *backendName() const { return "cpu"; }
+    virtual const char *gpuDeviceName() const { return nullptr; }

    void setProgressCallback(ProgressCallback callback) { m_progressCallback = callback; }

--- a/gpt4all-backend/llmodel_c.cpp
+++ b/gpt4all-backend/llmodel_c.cpp
@@ -283,6 +283,18 @@ bool llmodel_gpu_init_gpu_device_by_int(llmodel_model model, int device)

 bool llmodel_has_gpu_device(llmodel_model model)
 {
-    auto *wrapper = static_cast<LLModelWrapper *>(model);
+    const auto *wrapper = static_cast<LLModelWrapper *>(model);
    return wrapper->llModel->hasGPUDevice();
 }
+
+const char *llmodel_model_backend_name(llmodel_model model)
+{
+    const auto *wrapper = static_cast<LLModelWrapper *>(model);
+    return wrapper->llModel->backendName();
+}
+
+const char *llmodel_model_gpu_device_name(llmodel_model model)
+{
+    const auto *wrapper = static_cast<LLModelWrapper *>(model);
+    return wrapper->llModel->gpuDeviceName();
+}
--- a/gpt4all-backend/llmodel_c.h
+++ b/gpt4all-backend/llmodel_c.h
@@ -295,6 +295,16 @@ bool llmodel_gpu_init_gpu_device_by_int(llmodel_model model, int device);
 */
 bool llmodel_has_gpu_device(llmodel_model model);

+/**
+ * @return The name of the llama.cpp backend currently in use. One of "cpu", "kompute", or "metal".
+ */
+const char *llmodel_model_backend_name(llmodel_model model);
+
+/**
+ * @return The name of the GPU device currently in use, or NULL for backends other than Kompute.
+ */
+const char *llmodel_model_gpu_device_name(llmodel_model model);
+
 #ifdef __cplusplus
 }
 #endif