From f2e5c931fe7103cec6ae308ec63b1b2d93961a05 Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Thu, 8 Aug 2024 15:14:58 -0400 Subject: [PATCH] rename ChatLLM to LlamaCppModel Signed-off-by: Jared Van Bortel --- gpt4all-chat/CMakeLists.txt | 2 +- gpt4all-chat/chat.cpp | 50 +++---- gpt4all-chat/chat.h | 4 +- gpt4all-chat/chatapi.cpp | 2 +- gpt4all-chat/chatlistmodel.h | 6 +- .../{chatllm.cpp => llamacpp_model.cpp} | 126 +++++++++--------- gpt4all-chat/{chatllm.h => llamacpp_model.h} | 12 +- gpt4all-chat/main.cpp | 2 +- gpt4all-chat/server.cpp | 2 +- gpt4all-chat/server.h | 4 +- 10 files changed, 105 insertions(+), 105 deletions(-) rename gpt4all-chat/{chatllm.cpp => llamacpp_model.cpp} (91%) rename gpt4all-chat/{chatllm.h => llamacpp_model.h} (96%) diff --git a/gpt4all-chat/CMakeLists.txt b/gpt4all-chat/CMakeLists.txt index bada68c3..e3c37d6b 100644 --- a/gpt4all-chat/CMakeLists.txt +++ b/gpt4all-chat/CMakeLists.txt @@ -109,7 +109,7 @@ endif() qt_add_executable(chat main.cpp chat.h chat.cpp - chatllm.h chatllm.cpp + llamacpp_model.h llamacpp_model.cpp chatmodel.h chatlistmodel.h chatlistmodel.cpp chatapi.h chatapi.cpp chatviewtextprocessor.h chatviewtextprocessor.cpp diff --git a/gpt4all-chat/chat.cpp b/gpt4all-chat/chat.cpp index a44022c0..bc4bbb6b 100644 --- a/gpt4all-chat/chat.cpp +++ b/gpt4all-chat/chat.cpp @@ -26,7 +26,7 @@ Chat::Chat(QObject *parent) , m_chatModel(new ChatModel(this)) , m_responseState(Chat::ResponseStopped) , m_creationDate(QDateTime::currentSecsSinceEpoch()) - , m_llmodel(new ChatLLM(this)) + , m_llmodel(new LlamaCppModel(this)) , m_collectionModel(new LocalDocsCollectionsModel(this)) { connectLLM(); @@ -55,31 +55,31 @@ Chat::~Chat() void Chat::connectLLM() { // Should be in different threads - connect(m_llmodel, &ChatLLM::modelLoadingPercentageChanged, this, &Chat::handleModelLoadingPercentageChanged, Qt::QueuedConnection); - connect(m_llmodel, &ChatLLM::responseChanged, this, &Chat::handleResponseChanged, Qt::QueuedConnection); - connect(m_llmodel, &ChatLLM::promptProcessing, this, &Chat::promptProcessing, Qt::QueuedConnection); - connect(m_llmodel, &ChatLLM::generatingQuestions, this, &Chat::generatingQuestions, Qt::QueuedConnection); - connect(m_llmodel, &ChatLLM::responseStopped, this, &Chat::responseStopped, Qt::QueuedConnection); - connect(m_llmodel, &ChatLLM::modelLoadingError, this, &Chat::handleModelLoadingError, Qt::QueuedConnection); - connect(m_llmodel, &ChatLLM::modelLoadingWarning, this, &Chat::modelLoadingWarning, Qt::QueuedConnection); - connect(m_llmodel, &ChatLLM::restoringFromTextChanged, this, &Chat::handleRestoringFromText, Qt::QueuedConnection); - connect(m_llmodel, &ChatLLM::generatedNameChanged, this, &Chat::generatedNameChanged, Qt::QueuedConnection); - connect(m_llmodel, &ChatLLM::generatedQuestionFinished, this, &Chat::generatedQuestionFinished, Qt::QueuedConnection); - connect(m_llmodel, &ChatLLM::reportSpeed, this, &Chat::handleTokenSpeedChanged, Qt::QueuedConnection); - connect(m_llmodel, &ChatLLM::loadedModelInfoChanged, this, &Chat::loadedModelInfoChanged, Qt::QueuedConnection); - connect(m_llmodel, &ChatLLM::databaseResultsChanged, this, &Chat::handleDatabaseResultsChanged, Qt::QueuedConnection); - connect(m_llmodel, &ChatLLM::modelInfoChanged, this, &Chat::handleModelInfoChanged, Qt::QueuedConnection); - connect(m_llmodel, &ChatLLM::trySwitchContextOfLoadedModelCompleted, this, &Chat::handleTrySwitchContextOfLoadedModelCompleted, Qt::QueuedConnection); + connect(m_llmodel, &LlamaCppModel::modelLoadingPercentageChanged, this, &Chat::handleModelLoadingPercentageChanged, Qt::QueuedConnection); + connect(m_llmodel, &LlamaCppModel::responseChanged, this, &Chat::handleResponseChanged, Qt::QueuedConnection); + connect(m_llmodel, &LlamaCppModel::promptProcessing, this, &Chat::promptProcessing, Qt::QueuedConnection); + connect(m_llmodel, &LlamaCppModel::generatingQuestions, this, &Chat::generatingQuestions, Qt::QueuedConnection); + connect(m_llmodel, &LlamaCppModel::responseStopped, this, &Chat::responseStopped, Qt::QueuedConnection); + connect(m_llmodel, &LlamaCppModel::modelLoadingError, this, &Chat::handleModelLoadingError, Qt::QueuedConnection); + connect(m_llmodel, &LlamaCppModel::modelLoadingWarning, this, &Chat::modelLoadingWarning, Qt::QueuedConnection); + connect(m_llmodel, &LlamaCppModel::restoringFromTextChanged, this, &Chat::handleRestoringFromText, Qt::QueuedConnection); + connect(m_llmodel, &LlamaCppModel::generatedNameChanged, this, &Chat::generatedNameChanged, Qt::QueuedConnection); + connect(m_llmodel, &LlamaCppModel::generatedQuestionFinished, this, &Chat::generatedQuestionFinished, Qt::QueuedConnection); + connect(m_llmodel, &LlamaCppModel::reportSpeed, this, &Chat::handleTokenSpeedChanged, Qt::QueuedConnection); + connect(m_llmodel, &LlamaCppModel::loadedModelInfoChanged, this, &Chat::loadedModelInfoChanged, Qt::QueuedConnection); + connect(m_llmodel, &LlamaCppModel::databaseResultsChanged, this, &Chat::handleDatabaseResultsChanged, Qt::QueuedConnection); + connect(m_llmodel, &LlamaCppModel::modelInfoChanged, this, &Chat::handleModelInfoChanged, Qt::QueuedConnection); + connect(m_llmodel, &LlamaCppModel::trySwitchContextOfLoadedModelCompleted, this, &Chat::handleTrySwitchContextOfLoadedModelCompleted, Qt::QueuedConnection); - connect(this, &Chat::promptRequested, m_llmodel, &ChatLLM::prompt, Qt::QueuedConnection); - connect(this, &Chat::modelChangeRequested, m_llmodel, &ChatLLM::modelChangeRequested, Qt::QueuedConnection); - connect(this, &Chat::loadDefaultModelRequested, m_llmodel, &ChatLLM::loadDefaultModel, Qt::QueuedConnection); - connect(this, &Chat::loadModelRequested, m_llmodel, &ChatLLM::loadModel, Qt::QueuedConnection); - connect(this, &Chat::generateNameRequested, m_llmodel, &ChatLLM::generateName, Qt::QueuedConnection); - connect(this, &Chat::regenerateResponseRequested, m_llmodel, &ChatLLM::regenerateResponse, Qt::QueuedConnection); - connect(this, &Chat::resetResponseRequested, m_llmodel, &ChatLLM::resetResponse, Qt::QueuedConnection); - connect(this, &Chat::resetContextRequested, m_llmodel, &ChatLLM::resetContext, Qt::QueuedConnection); - connect(this, &Chat::processSystemPromptRequested, m_llmodel, &ChatLLM::processSystemPrompt, Qt::QueuedConnection); + connect(this, &Chat::promptRequested, m_llmodel, &LlamaCppModel::prompt, Qt::QueuedConnection); + connect(this, &Chat::modelChangeRequested, m_llmodel, &LlamaCppModel::modelChangeRequested, Qt::QueuedConnection); + connect(this, &Chat::loadDefaultModelRequested, m_llmodel, &LlamaCppModel::loadDefaultModel, Qt::QueuedConnection); + connect(this, &Chat::loadModelRequested, m_llmodel, &LlamaCppModel::loadModel, Qt::QueuedConnection); + connect(this, &Chat::generateNameRequested, m_llmodel, &LlamaCppModel::generateName, Qt::QueuedConnection); + connect(this, &Chat::regenerateResponseRequested, m_llmodel, &LlamaCppModel::regenerateResponse, Qt::QueuedConnection); + connect(this, &Chat::resetResponseRequested, m_llmodel, &LlamaCppModel::resetResponse, Qt::QueuedConnection); + connect(this, &Chat::resetContextRequested, m_llmodel, &LlamaCppModel::resetContext, Qt::QueuedConnection); + connect(this, &Chat::processSystemPromptRequested, m_llmodel, &LlamaCppModel::processSystemPrompt, Qt::QueuedConnection); connect(this, &Chat::collectionListChanged, m_collectionModel, &LocalDocsCollectionsModel::setCollections); } diff --git a/gpt4all-chat/chat.h b/gpt4all-chat/chat.h index cb1b1ccc..f61a26c9 100644 --- a/gpt4all-chat/chat.h +++ b/gpt4all-chat/chat.h @@ -1,7 +1,7 @@ #ifndef CHAT_H #define CHAT_H -#include "chatllm.h" +#include "llamacpp_model.h" #include "chatmodel.h" #include "database.h" // IWYU pragma: keep #include "localdocsmodel.h" // IWYU pragma: keep @@ -191,7 +191,7 @@ private: bool m_responseInProgress = false; ResponseState m_responseState; qint64 m_creationDate; - ChatLLM *m_llmodel; + LlamaCppModel *m_llmodel; QList m_databaseResults; bool m_isServer = false; bool m_shouldDeleteLater = false; diff --git a/gpt4all-chat/chatapi.cpp b/gpt4all-chat/chatapi.cpp index 41fb7f5b..740f1a14 100644 --- a/gpt4all-chat/chatapi.cpp +++ b/gpt4all-chat/chatapi.cpp @@ -47,7 +47,7 @@ bool ChatAPI::isModelLoaded() const return true; } -// All three of the state virtual functions are handled custom inside of chatllm save/restore +// All three of the state virtual functions are handled custom inside of LlamaCppModel save/restore size_t ChatAPI::stateSize() const { return 0; diff --git a/gpt4all-chat/chatlistmodel.h b/gpt4all-chat/chatlistmodel.h index 95f8e6f2..cc6c079d 100644 --- a/gpt4all-chat/chatlistmodel.h +++ b/gpt4all-chat/chatlistmodel.h @@ -2,7 +2,7 @@ #define CHATLISTMODEL_H #include "chat.h" -#include "chatllm.h" +#include "llamacpp_model.h" #include "chatmodel.h" #include @@ -220,11 +220,11 @@ public: int count() const { return m_chats.size(); } - // stop ChatLLM threads for clean shutdown + // stop LlamaCppModel threads for clean shutdown void destroyChats() { for (auto *chat: m_chats) { chat->destroy(); } - ChatLLM::destroyStore(); + LlamaCppModel::destroyStore(); } void removeChatFile(Chat *chat) const; diff --git a/gpt4all-chat/chatllm.cpp b/gpt4all-chat/llamacpp_model.cpp similarity index 91% rename from gpt4all-chat/chatllm.cpp rename to gpt4all-chat/llamacpp_model.cpp index 104d020e..bca81403 100644 --- a/gpt4all-chat/chatllm.cpp +++ b/gpt4all-chat/llamacpp_model.cpp @@ -1,4 +1,4 @@ -#include "chatllm.h" +#include "llamacpp_model.h" #include "chat.h" #include "chatapi.h" @@ -94,13 +94,13 @@ void LLModelStore::destroy() m_availableModel.reset(); } -void LLModelInfo::resetModel(ChatLLM *cllm, ModelBackend *model) { +void LLModelInfo::resetModel(LlamaCppModel *cllm, ModelBackend *model) { this->model.reset(model); fallbackReason.reset(); emit cllm->loadedModelInfoChanged(); } -ChatLLM::ChatLLM(Chat *parent, bool isServer) +LlamaCppModel::LlamaCppModel(Chat *parent, bool isServer) : QObject{nullptr} , m_promptResponseTokens(0) , m_promptTokens(0) @@ -117,29 +117,29 @@ ChatLLM::ChatLLM(Chat *parent, bool isServer) , m_restoreStateFromText(false) { moveToThread(&m_llmThread); - connect(this, &ChatLLM::shouldBeLoadedChanged, this, &ChatLLM::handleShouldBeLoadedChanged, + connect(this, &LlamaCppModel::shouldBeLoadedChanged, this, &LlamaCppModel::handleShouldBeLoadedChanged, Qt::QueuedConnection); // explicitly queued - connect(this, &ChatLLM::trySwitchContextRequested, this, &ChatLLM::trySwitchContextOfLoadedModel, + connect(this, &LlamaCppModel::trySwitchContextRequested, this, &LlamaCppModel::trySwitchContextOfLoadedModel, Qt::QueuedConnection); // explicitly queued - connect(parent, &Chat::idChanged, this, &ChatLLM::handleChatIdChanged); - connect(&m_llmThread, &QThread::started, this, &ChatLLM::handleThreadStarted); - connect(MySettings::globalInstance(), &MySettings::forceMetalChanged, this, &ChatLLM::handleForceMetalChanged); - connect(MySettings::globalInstance(), &MySettings::deviceChanged, this, &ChatLLM::handleDeviceChanged); + connect(parent, &Chat::idChanged, this, &LlamaCppModel::handleChatIdChanged); + connect(&m_llmThread, &QThread::started, this, &LlamaCppModel::handleThreadStarted); + connect(MySettings::globalInstance(), &MySettings::forceMetalChanged, this, &LlamaCppModel::handleForceMetalChanged); + connect(MySettings::globalInstance(), &MySettings::deviceChanged, this, &LlamaCppModel::handleDeviceChanged); // The following are blocking operations and will block the llm thread - connect(this, &ChatLLM::requestRetrieveFromDB, LocalDocs::globalInstance()->database(), &Database::retrieveFromDB, + connect(this, &LlamaCppModel::requestRetrieveFromDB, LocalDocs::globalInstance()->database(), &Database::retrieveFromDB, Qt::BlockingQueuedConnection); m_llmThread.setObjectName(parent->id()); m_llmThread.start(); } -ChatLLM::~ChatLLM() +LlamaCppModel::~LlamaCppModel() { destroy(); } -void ChatLLM::destroy() +void LlamaCppModel::destroy() { m_stopGenerating = true; m_llmThread.quit(); @@ -152,19 +152,19 @@ void ChatLLM::destroy() } } -void ChatLLM::destroyStore() +void LlamaCppModel::destroyStore() { LLModelStore::globalInstance()->destroy(); } -void ChatLLM::handleThreadStarted() +void LlamaCppModel::handleThreadStarted() { m_timer = new TokenTimer(this); - connect(m_timer, &TokenTimer::report, this, &ChatLLM::reportSpeed); + connect(m_timer, &TokenTimer::report, this, &LlamaCppModel::reportSpeed); emit threadStarted(); } -void ChatLLM::handleForceMetalChanged(bool forceMetal) +void LlamaCppModel::handleForceMetalChanged(bool forceMetal) { #if defined(Q_OS_MAC) && defined(__aarch64__) m_forceMetal = forceMetal; @@ -177,7 +177,7 @@ void ChatLLM::handleForceMetalChanged(bool forceMetal) #endif } -void ChatLLM::handleDeviceChanged() +void LlamaCppModel::handleDeviceChanged() { if (isModelLoaded() && m_shouldBeLoaded) { m_reloadingToChangeVariant = true; @@ -187,7 +187,7 @@ void ChatLLM::handleDeviceChanged() } } -bool ChatLLM::loadDefaultModel() +bool LlamaCppModel::loadDefaultModel() { ModelInfo defaultModel = ModelList::globalInstance()->defaultModelInfo(); if (defaultModel.filename().isEmpty()) { @@ -197,7 +197,7 @@ bool ChatLLM::loadDefaultModel() return loadModel(defaultModel); } -void ChatLLM::trySwitchContextOfLoadedModel(const ModelInfo &modelInfo) +void LlamaCppModel::trySwitchContextOfLoadedModel(const ModelInfo &modelInfo) { // We're trying to see if the store already has the model fully loaded that we wish to use // and if so we just acquire it from the store and switch the context and return true. If the @@ -241,7 +241,7 @@ void ChatLLM::trySwitchContextOfLoadedModel(const ModelInfo &modelInfo) processSystemPrompt(); } -bool ChatLLM::loadModel(const ModelInfo &modelInfo) +bool LlamaCppModel::loadModel(const ModelInfo &modelInfo) { // This is a complicated method because N different possible threads are interested in the outcome // of this method. Why? Because we have a main/gui thread trying to monitor the state of N different @@ -388,7 +388,7 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo) /* Returns false if the model should no longer be loaded (!m_shouldBeLoaded). * Otherwise returns true, even on error. */ -bool ChatLLM::loadNewModel(const ModelInfo &modelInfo, QVariantMap &modelLoadProps) +bool LlamaCppModel::loadNewModel(const ModelInfo &modelInfo, QVariantMap &modelLoadProps) { QElapsedTimer modelLoadTimer; modelLoadTimer.start(); @@ -585,7 +585,7 @@ bool ChatLLM::loadNewModel(const ModelInfo &modelInfo, QVariantMap &modelLoadPro return true; }; -bool ChatLLM::isModelLoaded() const +bool LlamaCppModel::isModelLoaded() const { return m_llModelInfo.model && m_llModelInfo.model->isModelLoaded(); } @@ -619,7 +619,7 @@ std::string trim_whitespace(const std::string& input) } // FIXME(jared): we don't actually have to re-decode the prompt to generate a new response -void ChatLLM::regenerateResponse() +void LlamaCppModel::regenerateResponse() { // ChatGPT uses a different semantic meaning for n_past than local models. For ChatGPT, the meaning // of n_past is of the number of prompt/response pairs, rather than for total tokens. @@ -635,7 +635,7 @@ void ChatLLM::regenerateResponse() emit responseChanged(QString::fromStdString(m_response)); } -void ChatLLM::resetResponse() +void LlamaCppModel::resetResponse() { m_promptTokens = 0; m_promptResponseTokens = 0; @@ -643,43 +643,43 @@ void ChatLLM::resetResponse() emit responseChanged(QString::fromStdString(m_response)); } -void ChatLLM::resetContext() +void LlamaCppModel::resetContext() { resetResponse(); m_processedSystemPrompt = false; m_ctx = ModelBackend::PromptContext(); } -QString ChatLLM::response() const +QString LlamaCppModel::response() const { return QString::fromStdString(remove_leading_whitespace(m_response)); } -void ChatLLM::setModelInfo(const ModelInfo &modelInfo) +void LlamaCppModel::setModelInfo(const ModelInfo &modelInfo) { m_modelInfo = modelInfo; emit modelInfoChanged(modelInfo); } -void ChatLLM::acquireModel() +void LlamaCppModel::acquireModel() { m_llModelInfo = LLModelStore::globalInstance()->acquireModel(); emit loadedModelInfoChanged(); } -void ChatLLM::resetModel() +void LlamaCppModel::resetModel() { m_llModelInfo = {}; emit loadedModelInfoChanged(); } -void ChatLLM::modelChangeRequested(const ModelInfo &modelInfo) +void LlamaCppModel::modelChangeRequested(const ModelInfo &modelInfo) { m_shouldBeLoaded = true; loadModel(modelInfo); } -bool ChatLLM::handlePrompt(int32_t token) +bool LlamaCppModel::handlePrompt(int32_t token) { // m_promptResponseTokens is related to last prompt/response not // the entire context window which we can reset on regenerate prompt @@ -692,7 +692,7 @@ bool ChatLLM::handlePrompt(int32_t token) return !m_stopGenerating; } -bool ChatLLM::handleResponse(int32_t token, const std::string &response) +bool LlamaCppModel::handleResponse(int32_t token, const std::string &response) { #if defined(DEBUG) printf("%s", response.c_str()); @@ -716,7 +716,7 @@ bool ChatLLM::handleResponse(int32_t token, const std::string &response) return !m_stopGenerating; } -bool ChatLLM::prompt(const QList &collectionList, const QString &prompt) +bool LlamaCppModel::prompt(const QList &collectionList, const QString &prompt) { if (m_restoreStateFromText) { Q_ASSERT(m_state.isEmpty()); @@ -738,7 +738,7 @@ bool ChatLLM::prompt(const QList &collectionList, const QString &prompt repeat_penalty, repeat_penalty_tokens); } -bool ChatLLM::promptInternal(const QList &collectionList, const QString &prompt, const QString &promptTemplate, +bool LlamaCppModel::promptInternal(const QList &collectionList, const QString &prompt, const QString &promptTemplate, int32_t n_predict, int32_t top_k, float top_p, float min_p, float temp, int32_t n_batch, float repeat_penalty, int32_t repeat_penalty_tokens) { @@ -766,8 +766,8 @@ bool ChatLLM::promptInternal(const QList &collectionList, const QString int n_threads = MySettings::globalInstance()->threadCount(); m_stopGenerating = false; - auto promptFunc = std::bind(&ChatLLM::handlePrompt, this, std::placeholders::_1); - auto responseFunc = std::bind(&ChatLLM::handleResponse, this, std::placeholders::_1, + auto promptFunc = std::bind(&LlamaCppModel::handlePrompt, this, std::placeholders::_1); + auto responseFunc = std::bind(&LlamaCppModel::handleResponse, this, std::placeholders::_1, std::placeholders::_2); emit promptProcessing(); m_ctx.n_predict = n_predict; @@ -820,7 +820,7 @@ bool ChatLLM::promptInternal(const QList &collectionList, const QString return true; } -void ChatLLM::setShouldBeLoaded(bool b) +void LlamaCppModel::setShouldBeLoaded(bool b) { #if defined(DEBUG_MODEL_LOADING) qDebug() << "setShouldBeLoaded" << m_llmThread.objectName() << b << m_llModelInfo.model.get(); @@ -829,13 +829,13 @@ void ChatLLM::setShouldBeLoaded(bool b) emit shouldBeLoadedChanged(); } -void ChatLLM::requestTrySwitchContext() +void LlamaCppModel::requestTrySwitchContext() { m_shouldBeLoaded = true; // atomic emit trySwitchContextRequested(modelInfo()); } -void ChatLLM::handleShouldBeLoadedChanged() +void LlamaCppModel::handleShouldBeLoadedChanged() { if (m_shouldBeLoaded) reloadModel(); @@ -843,7 +843,7 @@ void ChatLLM::handleShouldBeLoadedChanged() unloadModel(); } -void ChatLLM::unloadModel() +void LlamaCppModel::unloadModel() { if (!isModelLoaded() || m_isServer) return; @@ -869,7 +869,7 @@ void ChatLLM::unloadModel() m_pristineLoadedState = false; } -void ChatLLM::reloadModel() +void LlamaCppModel::reloadModel() { if (isModelLoaded() && m_forceUnloadModel) unloadModel(); // we unload first if we are forcing an unload @@ -887,7 +887,7 @@ void ChatLLM::reloadModel() loadModel(m); } -void ChatLLM::generateName() +void LlamaCppModel::generateName() { Q_ASSERT(isModelLoaded()); if (!isModelLoaded()) @@ -895,13 +895,13 @@ void ChatLLM::generateName() const QString chatNamePrompt = MySettings::globalInstance()->modelChatNamePrompt(m_modelInfo); if (chatNamePrompt.trimmed().isEmpty()) { - qWarning() << "ChatLLM: not generating chat name because prompt is empty"; + qWarning() << "LlamaCppModel: not generating chat name because prompt is empty"; return; } auto promptTemplate = MySettings::globalInstance()->modelPromptTemplate(m_modelInfo); - auto promptFunc = std::bind(&ChatLLM::handleNamePrompt, this, std::placeholders::_1); - auto responseFunc = std::bind(&ChatLLM::handleNameResponse, this, std::placeholders::_1, std::placeholders::_2); + auto promptFunc = std::bind(&LlamaCppModel::handleNamePrompt, this, std::placeholders::_1); + auto responseFunc = std::bind(&LlamaCppModel::handleNameResponse, this, std::placeholders::_1, std::placeholders::_2); ModelBackend::PromptContext ctx = m_ctx; m_llModelInfo.model->prompt(chatNamePrompt.toStdString(), promptTemplate.toStdString(), promptFunc, responseFunc, /*allowContextShift*/ false, ctx); @@ -913,12 +913,12 @@ void ChatLLM::generateName() m_pristineLoadedState = false; } -void ChatLLM::handleChatIdChanged(const QString &id) +void LlamaCppModel::handleChatIdChanged(const QString &id) { m_llmThread.setObjectName(id); } -bool ChatLLM::handleNamePrompt(int32_t token) +bool LlamaCppModel::handleNamePrompt(int32_t token) { #if defined(DEBUG) qDebug() << "name prompt" << m_llmThread.objectName() << token; @@ -927,7 +927,7 @@ bool ChatLLM::handleNamePrompt(int32_t token) return !m_stopGenerating; } -bool ChatLLM::handleNameResponse(int32_t token, const std::string &response) +bool LlamaCppModel::handleNameResponse(int32_t token, const std::string &response) { #if defined(DEBUG) qDebug() << "name response" << m_llmThread.objectName() << token << response; @@ -941,7 +941,7 @@ bool ChatLLM::handleNameResponse(int32_t token, const std::string &response) return words.size() <= 3; } -bool ChatLLM::handleQuestionPrompt(int32_t token) +bool LlamaCppModel::handleQuestionPrompt(int32_t token) { #if defined(DEBUG) qDebug() << "question prompt" << m_llmThread.objectName() << token; @@ -950,7 +950,7 @@ bool ChatLLM::handleQuestionPrompt(int32_t token) return !m_stopGenerating; } -bool ChatLLM::handleQuestionResponse(int32_t token, const std::string &response) +bool LlamaCppModel::handleQuestionResponse(int32_t token, const std::string &response) { #if defined(DEBUG) qDebug() << "question response" << m_llmThread.objectName() << token << response; @@ -979,7 +979,7 @@ bool ChatLLM::handleQuestionResponse(int32_t token, const std::string &response) return true; } -void ChatLLM::generateQuestions(qint64 elapsed) +void LlamaCppModel::generateQuestions(qint64 elapsed) { Q_ASSERT(isModelLoaded()); if (!isModelLoaded()) { @@ -996,8 +996,8 @@ void ChatLLM::generateQuestions(qint64 elapsed) emit generatingQuestions(); m_questionResponse.clear(); auto promptTemplate = MySettings::globalInstance()->modelPromptTemplate(m_modelInfo); - auto promptFunc = std::bind(&ChatLLM::handleQuestionPrompt, this, std::placeholders::_1); - auto responseFunc = std::bind(&ChatLLM::handleQuestionResponse, this, std::placeholders::_1, std::placeholders::_2); + auto promptFunc = std::bind(&LlamaCppModel::handleQuestionPrompt, this, std::placeholders::_1); + auto responseFunc = std::bind(&LlamaCppModel::handleQuestionResponse, this, std::placeholders::_1, std::placeholders::_2); ModelBackend::PromptContext ctx = m_ctx; QElapsedTimer totalTime; totalTime.start(); @@ -1008,7 +1008,7 @@ void ChatLLM::generateQuestions(qint64 elapsed) } -bool ChatLLM::handleSystemPrompt(int32_t token) +bool LlamaCppModel::handleSystemPrompt(int32_t token) { #if defined(DEBUG) qDebug() << "system prompt" << m_llmThread.objectName() << token << m_stopGenerating; @@ -1017,7 +1017,7 @@ bool ChatLLM::handleSystemPrompt(int32_t token) return !m_stopGenerating; } -bool ChatLLM::handleRestoreStateFromTextPrompt(int32_t token) +bool LlamaCppModel::handleRestoreStateFromTextPrompt(int32_t token) { #if defined(DEBUG) qDebug() << "restore state from text prompt" << m_llmThread.objectName() << token << m_stopGenerating; @@ -1028,7 +1028,7 @@ bool ChatLLM::handleRestoreStateFromTextPrompt(int32_t token) // this function serialized the cached model state to disk. // we want to also serialize n_ctx, and read it at load time. -bool ChatLLM::serialize(QDataStream &stream, int version, bool serializeKV) +bool LlamaCppModel::serialize(QDataStream &stream, int version, bool serializeKV) { if (version > 1) { stream << m_llModelType; @@ -1068,7 +1068,7 @@ bool ChatLLM::serialize(QDataStream &stream, int version, bool serializeKV) return stream.status() == QDataStream::Ok; } -bool ChatLLM::deserialize(QDataStream &stream, int version, bool deserializeKV, bool discardKV) +bool LlamaCppModel::deserialize(QDataStream &stream, int version, bool deserializeKV, bool discardKV) { if (version > 1) { int internalStateVersion; @@ -1148,7 +1148,7 @@ bool ChatLLM::deserialize(QDataStream &stream, int version, bool deserializeKV, return stream.status() == QDataStream::Ok; } -void ChatLLM::saveState() +void LlamaCppModel::saveState() { if (!isModelLoaded() || m_pristineLoadedState) return; @@ -1170,7 +1170,7 @@ void ChatLLM::saveState() m_llModelInfo.model->saveState(static_cast(reinterpret_cast(m_state.data()))); } -void ChatLLM::restoreState() +void LlamaCppModel::restoreState() { if (!isModelLoaded()) return; @@ -1211,7 +1211,7 @@ void ChatLLM::restoreState() } } -void ChatLLM::processSystemPrompt() +void LlamaCppModel::processSystemPrompt() { Q_ASSERT(isModelLoaded()); if (!isModelLoaded() || m_processedSystemPrompt || m_restoreStateFromText || m_isServer) @@ -1227,7 +1227,7 @@ void ChatLLM::processSystemPrompt() m_stopGenerating = false; m_ctx = ModelBackend::PromptContext(); - auto promptFunc = std::bind(&ChatLLM::handleSystemPrompt, this, std::placeholders::_1); + auto promptFunc = std::bind(&LlamaCppModel::handleSystemPrompt, this, std::placeholders::_1); const int32_t n_predict = MySettings::globalInstance()->modelMaxLength(m_modelInfo); const int32_t top_k = MySettings::globalInstance()->modelTopK(m_modelInfo); @@ -1268,7 +1268,7 @@ void ChatLLM::processSystemPrompt() m_pristineLoadedState = false; } -void ChatLLM::processRestoreStateFromText() +void LlamaCppModel::processRestoreStateFromText() { Q_ASSERT(isModelLoaded()); if (!isModelLoaded() || !m_restoreStateFromText || m_isServer) @@ -1280,7 +1280,7 @@ void ChatLLM::processRestoreStateFromText() m_stopGenerating = false; m_ctx = ModelBackend::PromptContext(); - auto promptFunc = std::bind(&ChatLLM::handleRestoreStateFromTextPrompt, this, std::placeholders::_1); + auto promptFunc = std::bind(&LlamaCppModel::handleRestoreStateFromTextPrompt, this, std::placeholders::_1); const QString promptTemplate = MySettings::globalInstance()->modelPromptTemplate(m_modelInfo); const int32_t n_predict = MySettings::globalInstance()->modelMaxLength(m_modelInfo); diff --git a/gpt4all-chat/chatllm.h b/gpt4all-chat/llamacpp_model.h similarity index 96% rename from gpt4all-chat/chatllm.h rename to gpt4all-chat/llamacpp_model.h index c721095e..e436044d 100644 --- a/gpt4all-chat/chatllm.h +++ b/gpt4all-chat/llamacpp_model.h @@ -27,7 +27,7 @@ using namespace Qt::Literals::StringLiterals; class Chat; -class ChatLLM; +class LlamaCppModel; class QDataStream; // NOTE: values serialized to disk, do not change or reuse @@ -43,10 +43,10 @@ struct LLModelInfo { QFileInfo fileInfo; std::optional fallbackReason; - // NOTE: This does not store the model type or name on purpose as this is left for ChatLLM which + // NOTE: This does not store the model type or name on purpose as this is left for LlamaCppModel which // must be able to serialize the information even if it is in the unloaded state - void resetModel(ChatLLM *cllm, ModelBackend *model = nullptr); + void resetModel(LlamaCppModel *cllm, ModelBackend *model = nullptr); }; class TokenTimer : public QObject { @@ -89,7 +89,7 @@ private: quint32 m_tokens; }; -class ChatLLM : public QObject +class LlamaCppModel : public QObject { Q_OBJECT Q_PROPERTY(bool restoringFromText READ restoringFromText NOTIFY restoringFromTextChanged) @@ -98,8 +98,8 @@ class ChatLLM : public QObject Q_PROPERTY(QString fallbackReason READ fallbackReason NOTIFY loadedModelInfoChanged) public: - ChatLLM(Chat *parent, bool isServer = false); - virtual ~ChatLLM(); + LlamaCppModel(Chat *parent, bool isServer = false); + virtual ~LlamaCppModel(); void destroy(); static void destroyStore(); diff --git a/gpt4all-chat/main.cpp b/gpt4all-chat/main.cpp index 7bed7d29..9f848a2f 100644 --- a/gpt4all-chat/main.cpp +++ b/gpt4all-chat/main.cpp @@ -87,7 +87,7 @@ int main(int argc, char *argv[]) int res = app.exec(); - // Make sure ChatLLM threads are joined before global destructors run. + // Make sure LlamaCppModel threads are joined before global destructors run. // Otherwise, we can get a heap-use-after-free inside of llama.cpp. ChatListModel::globalInstance()->destroyChats(); diff --git a/gpt4all-chat/server.cpp b/gpt4all-chat/server.cpp index c8485d93..34cb4018 100644 --- a/gpt4all-chat/server.cpp +++ b/gpt4all-chat/server.cpp @@ -71,7 +71,7 @@ static inline QJsonObject resultToJson(const ResultInfo &info) } Server::Server(Chat *chat) - : ChatLLM(chat, true /*isServer*/) + : LlamaCppModel(chat, true /*isServer*/) , m_chat(chat) , m_server(nullptr) { diff --git a/gpt4all-chat/server.h b/gpt4all-chat/server.h index 689f0b60..6686e152 100644 --- a/gpt4all-chat/server.h +++ b/gpt4all-chat/server.h @@ -1,7 +1,7 @@ #ifndef SERVER_H #define SERVER_H -#include "chatllm.h" +#include "llamacpp_model.h" #include "database.h" #include @@ -13,7 +13,7 @@ class Chat; class QHttpServer; -class Server : public ChatLLM +class Server : public LlamaCppModel { Q_OBJECT