chat: fix blank device in UI and improve Mixpanel reporting (#2409)

Also remove LLModel::hasGPUDevice. Signed-off-by: Jared Van Bortel <jared@nomic.ai>
2025-09-11 05:19:31 +00:00 · 2024-06-26 15:26:27 -04:00
parent 53fc2d56f6
commit 01870b4a46
13 changed files with 146 additions and 91 deletions
--- a/gpt4all-chat/chatllm.cpp
+++ b/gpt4all-chat/chatllm.cpp
@@ -93,6 +93,12 @@ void LLModelStore::destroy()
    m_availableModel.reset();
 }

+void LLModelInfo::resetModel(ChatLLM *cllm, LLModel *model) {
+    this->model.reset(model);
+    fallbackReason.reset();
+    emit cllm->loadedModelInfoChanged();
+}
+
 ChatLLM::ChatLLM(Chat *parent, bool isServer)
    : QObject{nullptr}
    , m_promptResponseTokens(0)
@@ -141,7 +147,7 @@ void ChatLLM::destroy()
    // The only time we should have a model loaded here is on shutdown
    // as we explicitly unload the model in all other circumstances
    if (isModelLoaded()) {
-        m_llModelInfo.model.reset();
+        m_llModelInfo.resetModel(this);
    }
 }

@@ -208,7 +214,7 @@ void ChatLLM::trySwitchContextOfLoadedModel(const ModelInfo &modelInfo)
    QString filePath = modelInfo.dirpath + modelInfo.filename();
    QFileInfo fileInfo(filePath);

-    m_llModelInfo = LLModelStore::globalInstance()->acquireModel();
+    acquireModel();
 #if defined(DEBUG_MODEL_LOADING)
        qDebug() << "acquired model from store" << m_llmThread.objectName() << m_llModelInfo.model.get();
 #endif
@@ -251,8 +257,6 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
    // reset status
    emit modelLoadingPercentageChanged(std::numeric_limits<float>::min()); // small non-zero positive value
    emit modelLoadingError("");
-    emit reportFallbackReason("");
-    emit reportDevice("");
    m_pristineLoadedState = false;

    QString filePath = modelInfo.dirpath + modelInfo.filename();
@@ -265,12 +269,12 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
 #if defined(DEBUG_MODEL_LOADING)
        qDebug() << "already acquired model deleted" << m_llmThread.objectName() << m_llModelInfo.model.get();
 #endif
-        m_llModelInfo.model.reset();
+        m_llModelInfo.resetModel(this);
    } else if (!m_isServer) {
        // This is a blocking call that tries to retrieve the model we need from the model store.
        // If it succeeds, then we just have to restore state. If the store has never had a model
        // returned to it, then the modelInfo.model pointer should be null which will happen on startup
-        m_llModelInfo = LLModelStore::globalInstance()->acquireModel();
+        acquireModel();
 #if defined(DEBUG_MODEL_LOADING)
        qDebug() << "acquired model from store" << m_llmThread.objectName() << m_llModelInfo.model.get();
 #endif
@@ -305,7 +309,7 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
 #if defined(DEBUG_MODEL_LOADING)
            qDebug() << "deleting model" << m_llmThread.objectName() << m_llModelInfo.model.get();
 #endif
-            m_llModelInfo.model.reset();
+            m_llModelInfo.resetModel(this);
        }
    }

@@ -335,7 +339,7 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
            model->setModelName(modelName);
            model->setRequestURL(modelInfo.url());
            model->setAPIKey(apiKey);
-            m_llModelInfo.model.reset(model);
+            m_llModelInfo.resetModel(this, model);
        } else {
            QElapsedTimer modelLoadTimer;
            modelLoadTimer.start();
@@ -360,10 +364,10 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
 #endif

            QString constructError;
-            m_llModelInfo.model.reset();
+            m_llModelInfo.resetModel(this);
            try {
                auto *model = LLModel::Implementation::construct(filePath.toStdString(), backend, n_ctx);
-                m_llModelInfo.model.reset(model);
+                m_llModelInfo.resetModel(this, model);
            } catch (const LLModel::MissingImplementationError &e) {
                modelLoadProps.insert("error", "missing_model_impl");
                constructError = e.what();
@@ -412,14 +416,15 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
                        memGB = std::floor(memGB * 10.f) / 10.f; // truncate to 1 decimal place
                        modelLoadProps.insert("default_device", QString::fromStdString(defaultDevice->name));
                        modelLoadProps.insert("default_device_mem", approxDeviceMemGB(defaultDevice));
+                        modelLoadProps.insert("default_device_backend", QString::fromStdString(defaultDevice->backendName()));
                    }
                }

-                QString actualDevice("CPU");
+                bool actualDeviceIsCPU = true;

 #if defined(Q_OS_MAC) && defined(__aarch64__)
                if (m_llModelInfo.model->implementation().buildVariant() == "metal")
-                    actualDevice = "Metal";
+                    actualDeviceIsCPU = false;
 #else
                if (requestedDevice != "CPU") {
                    const auto *device = defaultDevice;
@@ -437,41 +442,39 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
                    if (!device) {
                        // GPU not available
                    } else if (!m_llModelInfo.model->initializeGPUDevice(device->index, &unavail_reason)) {
-                        emit reportFallbackReason(QString::fromStdString("<br>" + unavail_reason));
+                        m_llModelInfo.fallbackReason = QString::fromStdString(unavail_reason);
                    } else {
-                        actualDevice = QString::fromStdString(device->reportedName());
+                        actualDeviceIsCPU = false;
                        modelLoadProps.insert("requested_device_mem", approxDeviceMemGB(device));
                    }
                }
 #endif

                // Report which device we're actually using
-                emit reportDevice(actualDevice);
                bool success = m_llModelInfo.model->loadModel(filePath.toStdString(), n_ctx, ngl);

                if (!m_shouldBeLoaded) {
-                    m_llModelInfo.model.reset();
+                    m_llModelInfo.resetModel(this);
                    if (!m_isServer)
                        LLModelStore::globalInstance()->releaseModel(std::move(m_llModelInfo));
-                    m_llModelInfo = LLModelInfo();
+                    resetModel();
                    emit modelLoadingPercentageChanged(0.0f);
                    return false;
                }

-                if (actualDevice == "CPU") {
+                if (actualDeviceIsCPU) {
                    // we asked llama.cpp to use the CPU
                } else if (!success) {
                    // llama_init_from_file returned nullptr
-                    emit reportDevice("CPU");
-                    emit reportFallbackReason("<br>GPU loading failed (out of VRAM?)");
+                    m_llModelInfo.fallbackReason = "GPU loading failed (out of VRAM?)";
                    modelLoadProps.insert("cpu_fallback_reason", "gpu_load_failed");
                    success = m_llModelInfo.model->loadModel(filePath.toStdString(), n_ctx, 0);

                    if (!m_shouldBeLoaded) {
-                        m_llModelInfo.model.reset();
+                        m_llModelInfo.resetModel(this);
                        if (!m_isServer)
                            LLModelStore::globalInstance()->releaseModel(std::move(m_llModelInfo));
-                        m_llModelInfo = LLModelInfo();
+                        resetModel();
                        emit modelLoadingPercentageChanged(0.0f);
                        return false;
                    }
@@ -479,16 +482,15 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
                    // ggml_vk_init was not called in llama.cpp
                    // We might have had to fallback to CPU after load if the model is not possible to accelerate
                    // for instance if the quantization method is not supported on Vulkan yet
-                    emit reportDevice("CPU");
-                    emit reportFallbackReason("<br>model or quant has no GPU support");
+                    m_llModelInfo.fallbackReason = "model or quant has no GPU support";
                    modelLoadProps.insert("cpu_fallback_reason", "gpu_unsupported_model");
                }

                if (!success) {
-                    m_llModelInfo.model.reset();
+                    m_llModelInfo.resetModel(this);
                    if (!m_isServer)
                        LLModelStore::globalInstance()->releaseModel(std::move(m_llModelInfo));
-                    m_llModelInfo = LLModelInfo();
+                    resetModel();
                    emit modelLoadingError(u"Could not load model due to invalid model file for %1"_s.arg(modelInfo.filename()));
                    modelLoadProps.insert("error", "loadmodel_failed");
                } else {
@@ -497,10 +499,10 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
                    case 'G': m_llModelType = LLModelType::GPTJ_; break;
                    default:
                        {
-                            m_llModelInfo.model.reset();
+                            m_llModelInfo.resetModel(this);
                            if (!m_isServer)
                                LLModelStore::globalInstance()->releaseModel(std::move(m_llModelInfo));
-                            m_llModelInfo = LLModelInfo();
+                            resetModel();
                            emit modelLoadingError(u"Could not determine model type for %1"_s.arg(modelInfo.filename()));
                        }
                    }
@@ -510,7 +512,7 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
            } else {
                if (!m_isServer)
                    LLModelStore::globalInstance()->releaseModel(std::move(m_llModelInfo));
-                m_llModelInfo = LLModelInfo();
+                resetModel();
                emit modelLoadingError(u"Error loading %1: %2"_s.arg(modelInfo.filename(), constructError));
            }
        }
@@ -523,6 +525,7 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
        fflush(stdout);
 #endif
        emit modelLoadingPercentageChanged(isModelLoaded() ? 1.0f : 0.0f);
+        emit loadedModelInfoChanged();

        modelLoadProps.insert("requestedDevice", MySettings::globalInstance()->device());
        modelLoadProps.insert("model", modelInfo.filename());
@@ -530,7 +533,7 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
    } else {
        if (!m_isServer)
            LLModelStore::globalInstance()->releaseModel(std::move(m_llModelInfo)); // release back into the store
-        m_llModelInfo = LLModelInfo();
+        resetModel();
        emit modelLoadingError(u"Could not find file for model %1"_s.arg(modelInfo.filename()));
    }

@@ -621,6 +624,16 @@ void ChatLLM::setModelInfo(const ModelInfo &modelInfo)
    emit modelInfoChanged(modelInfo);
 }

+void ChatLLM::acquireModel() {
+    m_llModelInfo = LLModelStore::globalInstance()->acquireModel();
+    emit loadedModelInfoChanged();
+}
+
+void ChatLLM::resetModel() {
+    m_llModelInfo = {};
+    emit loadedModelInfoChanged();
+}
+
 void ChatLLM::modelChangeRequested(const ModelInfo &modelInfo)
 {
    m_shouldBeLoaded = true;
@@ -809,7 +822,7 @@ void ChatLLM::unloadModel()
 #endif

    if (m_forceUnloadModel) {
-        m_llModelInfo.model.reset();
+        m_llModelInfo.resetModel(this);
        m_forceUnloadModel = false;
    }