chat: fix issues with quickly switching between multiple chats (#2343)

* prevent load progress from getting out of sync with the current chat
* fix memory leak on exit if the LLModelStore contains a model
* do not report cancellation as a failure in console/Mixpanel
* show "waiting for model" separately from "switching context" in UI
* do not show lower "reload" button on error
* skip context switch if unload is pending
* skip unnecessary calls to LLModel::saveState

Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
Jared Van Bortel
2024-05-15 14:07:03 -04:00
committed by GitHub
parent 7f1c3d4275
commit 7e1e00f331
6 changed files with 179 additions and 143 deletions

View File

@@ -5,6 +5,8 @@
#include <QThread>
#include <QFileInfo>
#include <memory>
#include "database.h"
#include "modellist.h"
#include "../gpt4all-backend/llmodel.h"
@@ -16,7 +18,7 @@ enum LLModelType {
};
struct LLModelInfo {
LLModel *model = nullptr;
std::unique_ptr<LLModel> model;
QFileInfo fileInfo;
// NOTE: This does not store the model type or name on purpose as this is left for ChatLLM which
// must be able to serialize the information even if it is in the unloaded state
@@ -72,6 +74,7 @@ public:
virtual ~ChatLLM();
void destroy();
static void destroyStore();
bool isModelLoaded() const;
void regenerateResponse();
void resetResponse();
@@ -81,7 +84,7 @@ public:
bool shouldBeLoaded() const { return m_shouldBeLoaded; }
void setShouldBeLoaded(bool b);
void setShouldTrySwitchContext(bool b);
void requestTrySwitchContext();
void setForceUnloadModel(bool b) { m_forceUnloadModel = b; }
void setMarkedForDeletion(bool b) { m_markedForDeletion = b; }
@@ -101,7 +104,7 @@ public:
public Q_SLOTS:
bool prompt(const QList<QString> &collectionList, const QString &prompt);
bool loadDefaultModel();
bool trySwitchContextOfLoadedModel(const ModelInfo &modelInfo);
void trySwitchContextOfLoadedModel(const ModelInfo &modelInfo);
bool loadModel(const ModelInfo &modelInfo);
void modelChangeRequested(const ModelInfo &modelInfo);
void unloadModel();
@@ -109,7 +112,6 @@ public Q_SLOTS:
void generateName();
void handleChatIdChanged(const QString &id);
void handleShouldBeLoadedChanged();
void handleShouldTrySwitchContextChanged();
void handleThreadStarted();
void handleForceMetalChanged(bool forceMetal);
void handleDeviceChanged();
@@ -128,8 +130,8 @@ Q_SIGNALS:
void stateChanged();
void threadStarted();
void shouldBeLoadedChanged();
void shouldTrySwitchContextChanged();
void trySwitchContextOfLoadedModelCompleted(bool);
void trySwitchContextRequested(const ModelInfo &modelInfo);
void trySwitchContextOfLoadedModelCompleted(int value);
void requestRetrieveFromDB(const QList<QString> &collections, const QString &text, int retrievalSize, QList<ResultInfo> *results);
void reportSpeed(const QString &speed);
void reportDevice(const QString &device);
@@ -172,7 +174,6 @@ private:
QThread m_llmThread;
std::atomic<bool> m_stopGenerating;
std::atomic<bool> m_shouldBeLoaded;
std::atomic<bool> m_shouldTrySwitchContext;
std::atomic<bool> m_isRecalc;
std::atomic<bool> m_forceUnloadModel;
std::atomic<bool> m_markedForDeletion;
@@ -181,6 +182,10 @@ private:
bool m_reloadingToChangeVariant;
bool m_processedSystemPrompt;
bool m_restoreStateFromText;
// m_pristineLoadedState is set if saveSate is unnecessary, either because:
// - an unload was queued during LLModel::restoreState()
// - the chat will be restored from text and hasn't been interacted with yet
bool m_pristineLoadedState = false;
QVector<QPair<QString, QString>> m_stateFromText;
};