chat: fix blank device in UI and improve Mixpanel reporting (#2409)

Also remove LLModel::hasGPUDevice. Signed-off-by: Jared Van Bortel <jared@nomic.ai>
2025-09-07 03:20:26 +00:00 · 2024-06-26 15:26:27 -04:00
parent 53fc2d56f6
commit 01870b4a46
13 changed files with 146 additions and 91 deletions
--- a/gpt4all-backend/llamamodel.cpp
+++ b/gpt4all-backend/llamamodel.cpp
@@ -371,6 +371,11 @@ bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)
        d_ptr->model_params.main_gpu = d_ptr->device;
        d_ptr->model_params.n_gpu_layers = ngl;
        d_ptr->model_params.split_mode = LLAMA_SPLIT_MODE_NONE;
+    } else {
+#ifdef GGML_USE_CUDA
+        std::cerr << "Llama ERROR: CUDA loadModel was called without a device\n";
+        return false;
+#endif // GGML_USE_CUDA
    }
 #elif defined(GGML_USE_METAL)
    (void)ngl;
@@ -383,15 +388,17 @@ bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)
    // always fully offload on Metal
    // TODO(cebtenzzre): use this parameter to allow using more than 53% of system RAM to load a model
    d_ptr->model_params.n_gpu_layers = 100;
-#else
+#else // !KOMPUTE && !VULKAN && !CUDA && !METAL
    (void)ngl;
 #endif

-    d_ptr->model = llama_load_model_from_file_gpt4all(modelPath.c_str(), &d_ptr->model_params);
+    d_ptr->model = llama_load_model_from_file(modelPath.c_str(), d_ptr->model_params);
    if (!d_ptr->model) {
        fflush(stdout);
+#ifndef GGML_USE_CUDA
        d_ptr->device = -1;
        d_ptr->deviceName.clear();
+#endif
        std::cerr << "LLAMA ERROR: failed to load model from " << modelPath << std::endl;
        return false;
    }
@@ -434,8 +441,10 @@ bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)
        std::cerr << "LLAMA ERROR: failed to init context for model " <<  modelPath << std::endl;
        llama_free_model(d_ptr->model);
        d_ptr->model = nullptr;
+#ifndef GGML_USE_CUDA
        d_ptr->device = -1;
        d_ptr->deviceName.clear();
+#endif
        return false;
    }

@@ -723,31 +732,16 @@ bool LLamaModel::initializeGPUDevice(int device, std::string *unavail_reason) co
 #endif
 }

-bool LLamaModel::hasGPUDevice() const
-{
-#if defined(GGML_USE_KOMPUTE) || defined(GGML_USE_VULKAN) || defined(GGML_USE_CUDA)
-    return d_ptr->device != -1;
-#else
-    return false;
-#endif
-}
-
 bool LLamaModel::usingGPUDevice() const
 {
-    bool hasDevice;
+    if (!d_ptr->model)
+        return false;

+    bool usingGPU = llama_model_using_gpu(d_ptr->model);
 #ifdef GGML_USE_KOMPUTE
-    hasDevice = hasGPUDevice() && d_ptr->model_params.n_gpu_layers > 0;
-    assert(!hasDevice || ggml_vk_has_device());
-#elif defined(GGML_USE_VULKAN) || defined(GGML_USE_CUDA)
-    hasDevice = hasGPUDevice() && d_ptr->model_params.n_gpu_layers > 0;
-#elif defined(GGML_USE_METAL)
-    hasDevice = true;
-#else
-    hasDevice = false;
+    assert(!usingGPU || ggml_vk_has_device());
 #endif
-
-    return hasDevice;
+    return usingGPU;
 }

 const char *LLamaModel::backendName() const
@@ -760,6 +754,8 @@ const char *LLamaModel::gpuDeviceName() const
    if (usingGPUDevice()) {
 #if defined(GGML_USE_KOMPUTE) || defined(GGML_USE_VULKAN) || defined(GGML_USE_CUDA)
        return d_ptr->deviceName.c_str();
+#elif defined(GGML_USE_METAL)
+        return "Metal";
 #endif
    }
    return nullptr;