improve mixpanel usage statistics (#2238)

Other changes: - Always display first start dialog if privacy options are unset (e.g. if the user closed GPT4All without selecting them) - LocalDocs scanQueue is now always deferred - Fix a potential crash in magic_match - LocalDocs indexing is now started after the first start dialog is dismissed so usage stats are included Signed-off-by: Jared Van Bortel <jared@nomic.ai>
2025-09-08 11:58:53 +00:00 · 2024-04-25 13:16:52 -04:00
parent 4193533154
commit c622921894
30 changed files with 520 additions and 532 deletions
--- a/gpt4all-chat/chatllm.cpp
+++ b/gpt4all-chat/chatllm.cpp
@@ -7,6 +7,8 @@
 #include "mysettings.h"
 #include "../gpt4all-backend/llmodel.h"

+#include <QElapsedTimer>
+
 //#define DEBUG
 //#define DEBUG_MODEL_LOADING

@@ -74,8 +76,6 @@ ChatLLM::ChatLLM(Chat *parent, bool isServer)
    , m_restoreStateFromText(false)
 {
    moveToThread(&m_llmThread);
-    connect(this, &ChatLLM::sendStartup, Network::globalInstance(), &Network::sendStartup);
-    connect(this, &ChatLLM::sendModelLoaded, Network::globalInstance(), &Network::sendModelLoaded);
    connect(this, &ChatLLM::shouldBeLoadedChanged, this, &ChatLLM::handleShouldBeLoadedChanged,
        Qt::QueuedConnection); // explicitly queued
    connect(this, &ChatLLM::shouldTrySwitchContextChanged, this, &ChatLLM::handleShouldTrySwitchContextChanged,
@@ -278,6 +278,7 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
    m_llModelInfo.fileInfo = fileInfo;

    if (fileInfo.exists()) {
+        QVariantMap modelLoadProps;
        if (modelInfo.isOnline) {
            QString apiKey;
            QString modelName;
@@ -298,6 +299,9 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
            model->setAPIKey(apiKey);
            m_llModelInfo.model = model;
        } else {
+            QElapsedTimer modelLoadTimer;
+            modelLoadTimer.start();
+
            auto n_ctx = MySettings::globalInstance()->modelContextLength(modelInfo);
            m_ctx.n_ctx = n_ctx;
            auto ngl = MySettings::globalInstance()->modelGpuLayers(modelInfo);
@@ -307,7 +311,21 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
            if (m_forceMetal)
                buildVariant = "metal";
 #endif
-            m_llModelInfo.model = LLModel::Implementation::construct(filePath.toStdString(), buildVariant, n_ctx);
+            QString constructError;
+            m_llModelInfo.model = nullptr;
+            try {
+                m_llModelInfo.model = LLModel::Implementation::construct(filePath.toStdString(), buildVariant, n_ctx);
+            } catch (const LLModel::MissingImplementationError &e) {
+                modelLoadProps.insert("error", "missing_model_impl");
+                constructError = e.what();
+            } catch (const LLModel::UnsupportedModelError &e) {
+                modelLoadProps.insert("error", "unsupported_model_file");
+                constructError = e.what();
+            } catch (const LLModel::BadArchError &e) {
+                constructError = e.what();
+                modelLoadProps.insert("error", "unsupported_model_arch");
+                modelLoadProps.insert("model_arch", QString::fromStdString(e.arch()));
+            }

            if (m_llModelInfo.model) {
                if (m_llModelInfo.model->isModelBlacklisted(filePath.toStdString())) {
@@ -368,6 +386,7 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
                    // llama_init_from_file returned nullptr
                    emit reportDevice("CPU");
                    emit reportFallbackReason("<br>GPU loading failed (out of VRAM?)");
+                    modelLoadProps.insert("cpu_fallback_reason", "gpu_load_failed");
                    success = m_llModelInfo.model->loadModel(filePath.toStdString(), n_ctx, 0);
                } else if (!m_llModelInfo.model->usingGPUDevice()) {
                    // ggml_vk_init was not called in llama.cpp
@@ -375,6 +394,7 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
                    // for instance if the quantization method is not supported on Vulkan yet
                    emit reportDevice("CPU");
                    emit reportFallbackReason("<br>model or quant has no GPU support");
+                    modelLoadProps.insert("cpu_fallback_reason", "gpu_unsupported_model");
                }

                if (!success) {
@@ -384,6 +404,7 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
                        LLModelStore::globalInstance()->releaseModel(m_llModelInfo); // release back into the store
                    m_llModelInfo = LLModelInfo();
                    emit modelLoadingError(QString("Could not load model due to invalid model file for %1").arg(modelInfo.filename()));
+                    modelLoadProps.insert("error", "loadmodel_failed");
                } else {
                    switch (m_llModelInfo.model->implementation().modelType()[0]) {
                    case 'L': m_llModelType = LLModelType::LLAMA_; break;
@@ -398,12 +419,14 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
                            emit modelLoadingError(QString("Could not determine model type for %1").arg(modelInfo.filename()));
                        }
                    }
+
+                    modelLoadProps.insert("$duration", modelLoadTimer.elapsed() / 1000.);
                }
            } else {
                if (!m_isServer)
                    LLModelStore::globalInstance()->releaseModel(m_llModelInfo); // release back into the store
                m_llModelInfo = LLModelInfo();
-                emit modelLoadingError(QString("Could not load model due to invalid format for %1").arg(modelInfo.filename()));
+                emit modelLoadingError(QString("Error loading %1: %2").arg(modelInfo.filename()).arg(constructError));
            }
        }
 #if defined(DEBUG_MODEL_LOADING)
@@ -416,12 +439,9 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
 #endif
        emit modelLoadingPercentageChanged(isModelLoaded() ? 1.0f : 0.0f);

-        static bool isFirstLoad = true;
-        if (isFirstLoad) {
-            emit sendStartup();
-            isFirstLoad = false;
-        } else
-            emit sendModelLoaded();
+        modelLoadProps.insert("requestedDevice", MySettings::globalInstance()->device());
+        modelLoadProps.insert("model", modelInfo.filename());
+        Network::globalInstance()->trackChatEvent("model_load", modelLoadProps);
    } else {
        if (!m_isServer)
            LLModelStore::globalInstance()->releaseModel(m_llModelInfo); // release back into the store
@@ -632,6 +652,8 @@ bool ChatLLM::promptInternal(const QList<QString> &collectionList, const QString
    printf("%s", qPrintable(prompt));
    fflush(stdout);
 #endif
+    QElapsedTimer totalTime;
+    totalTime.start();
    m_timer->start();
    if (!docsContext.isEmpty()) {
        auto old_n_predict = std::exchange(m_ctx.n_predict, 0); // decode localdocs context without a response
@@ -644,12 +666,13 @@ bool ChatLLM::promptInternal(const QList<QString> &collectionList, const QString
    fflush(stdout);
 #endif
    m_timer->stop();
+    qint64 elapsed = totalTime.elapsed();
    std::string trimmed = trim_whitespace(m_response);
    if (trimmed != m_response) {
        m_response = trimmed;
        emit responseChanged(QString::fromStdString(m_response));
    }
-    emit responseStopped();
+    emit responseStopped(elapsed);
    return true;
 }