Complete revamp of model loading to allow for more discreet control by

the user of the models loading behavior.

Signed-off-by: Adam Treat <treat.adam@gmail.com>
This commit is contained in:
Adam Treat
2024-02-07 09:37:59 -05:00
committed by AT
parent f2024a1f9e
commit d948a4f2ee
14 changed files with 506 additions and 175 deletions

View File

@@ -81,6 +81,8 @@ public:
bool shouldBeLoaded() const { return m_shouldBeLoaded; }
void setShouldBeLoaded(bool b);
void setShouldTrySwitchContext(bool b);
void setForceUnloadModel(bool b) { m_forceUnloadModel = b; }
QString response() const;
@@ -98,14 +100,15 @@ public:
public Q_SLOTS:
bool prompt(const QList<QString> &collectionList, const QString &prompt);
bool loadDefaultModel();
bool trySwitchContextOfLoadedModel(const ModelInfo &modelInfo);
bool loadModel(const ModelInfo &modelInfo);
void modelChangeRequested(const ModelInfo &modelInfo);
void forceUnloadModel();
void unloadModel();
void reloadModel();
void generateName();
void handleChatIdChanged(const QString &id);
void handleShouldBeLoadedChanged();
void handleShouldTrySwitchContextChanged();
void handleThreadStarted();
void handleForceMetalChanged(bool forceMetal);
void handleDeviceChanged();
@@ -114,7 +117,7 @@ public Q_SLOTS:
Q_SIGNALS:
void recalcChanged();
void isModelLoadedChanged(bool);
void modelLoadingPercentageChanged(float);
void modelLoadingError(const QString &error);
void responseChanged(const QString &response);
void promptProcessing();
@@ -125,6 +128,8 @@ Q_SIGNALS:
void stateChanged();
void threadStarted();
void shouldBeLoadedChanged();
void shouldTrySwitchContextChanged();
void trySwitchContextOfLoadedModelCompleted(bool);
void requestRetrieveFromDB(const QList<QString> &collections, const QString &text, int retrievalSize, QList<ResultInfo> *results);
void reportSpeed(const QString &speed);
void reportDevice(const QString &device);
@@ -167,7 +172,9 @@ private:
QThread m_llmThread;
std::atomic<bool> m_stopGenerating;
std::atomic<bool> m_shouldBeLoaded;
std::atomic<bool> m_shouldTrySwitchContext;
std::atomic<bool> m_isRecalc;
std::atomic<bool> m_forceUnloadModel;
bool m_isServer;
bool m_forceMetal;
bool m_reloadingToChangeVariant;