chat: fix issues with quickly switching between multiple chats (#2343)

* prevent load progress from getting out of sync with the current chat * fix memory leak on exit if the LLModelStore contains a model * do not report cancellation as a failure in console/Mixpanel * show "waiting for model" separately from "switching context" in UI * do not show lower "reload" button on error * skip context switch if unload is pending * skip unnecessary calls to LLModel::saveState Signed-off-by: Jared Van Bortel <jared@nomic.ai>
2025-09-05 10:30:29 +00:00 · 2024-05-15 14:07:03 -04:00
parent 7f1c3d4275
commit 7e1e00f331
6 changed files with 179 additions and 143 deletions
--- a/gpt4all-chat/chatllm.h
+++ b/gpt4all-chat/chatllm.h
@@ -5,6 +5,8 @@
 #include <QThread>
 #include <QFileInfo>

+#include <memory>
+
 #include "database.h"
 #include "modellist.h"
 #include "../gpt4all-backend/llmodel.h"
@@ -16,7 +18,7 @@ enum LLModelType {
 };

 struct LLModelInfo {
-    LLModel *model = nullptr;
+    std::unique_ptr<LLModel> model;
    QFileInfo fileInfo;
    // NOTE: This does not store the model type or name on purpose as this is left for ChatLLM which
    // must be able to serialize the information even if it is in the unloaded state
@@ -72,6 +74,7 @@ public:
    virtual ~ChatLLM();

    void destroy();
+    static void destroyStore();
    bool isModelLoaded() const;
    void regenerateResponse();
    void resetResponse();
@@ -81,7 +84,7 @@ public:

    bool shouldBeLoaded() const { return m_shouldBeLoaded; }
    void setShouldBeLoaded(bool b);
-    void setShouldTrySwitchContext(bool b);
+    void requestTrySwitchContext();
    void setForceUnloadModel(bool b) { m_forceUnloadModel = b; }
    void setMarkedForDeletion(bool b) { m_markedForDeletion = b; }

@@ -101,7 +104,7 @@ public:
 public Q_SLOTS:
    bool prompt(const QList<QString> &collectionList, const QString &prompt);
    bool loadDefaultModel();
-    bool trySwitchContextOfLoadedModel(const ModelInfo &modelInfo);
+    void trySwitchContextOfLoadedModel(const ModelInfo &modelInfo);
    bool loadModel(const ModelInfo &modelInfo);
    void modelChangeRequested(const ModelInfo &modelInfo);
    void unloadModel();
@@ -109,7 +112,6 @@ public Q_SLOTS:
    void generateName();
    void handleChatIdChanged(const QString &id);
    void handleShouldBeLoadedChanged();
-    void handleShouldTrySwitchContextChanged();
    void handleThreadStarted();
    void handleForceMetalChanged(bool forceMetal);
    void handleDeviceChanged();
@@ -128,8 +130,8 @@ Q_SIGNALS:
    void stateChanged();
    void threadStarted();
    void shouldBeLoadedChanged();
-    void shouldTrySwitchContextChanged();
-    void trySwitchContextOfLoadedModelCompleted(bool);
+    void trySwitchContextRequested(const ModelInfo &modelInfo);
+    void trySwitchContextOfLoadedModelCompleted(int value);
    void requestRetrieveFromDB(const QList<QString> &collections, const QString &text, int retrievalSize, QList<ResultInfo> *results);
    void reportSpeed(const QString &speed);
    void reportDevice(const QString &device);
@@ -172,7 +174,6 @@ private:
    QThread m_llmThread;
    std::atomic<bool> m_stopGenerating;
    std::atomic<bool> m_shouldBeLoaded;
-    std::atomic<bool> m_shouldTrySwitchContext;
    std::atomic<bool> m_isRecalc;
    std::atomic<bool> m_forceUnloadModel;
    std::atomic<bool> m_markedForDeletion;
@@ -181,6 +182,10 @@ private:
    bool m_reloadingToChangeVariant;
    bool m_processedSystemPrompt;
    bool m_restoreStateFromText;
+    // m_pristineLoadedState is set if saveSate is unnecessary, either because:
+    // - an unload was queued during LLModel::restoreState()
+    // - the chat will be restored from text and hasn't been interacted with yet
+    bool m_pristineLoadedState = false;
    QVector<QPair<QString, QString>> m_stateFromText;
 };