diff --git a/deps/CMakeLists.txt b/deps/CMakeLists.txt index 46f1b39a..21ece673 100644 --- a/deps/CMakeLists.txt +++ b/deps/CMakeLists.txt @@ -11,3 +11,13 @@ set(QCORO_WITH_QTQUICK OFF) set(QCORO_WITH_QML OFF) set(QCORO_WITH_QTTEST OFF) add_subdirectory(qcoro) + +set(GPT4ALL_BOOST_TAG 1.87.0) +FetchContent_Declare( + boost + URL "https://github.com/boostorg/boost/releases/download/boost-${GPT4ALL_BOOST_TAG}/boost-${GPT4ALL_BOOST_TAG}-cmake.tar.xz" + URL_HASH "SHA256=7da75f171837577a52bbf217e17f8ea576c7c246e4594d617bfde7fafd408be5" +) + +set(BOOST_INCLUDE_LIBRARIES json describe system) +FetchContent_MakeAvailable(boost) diff --git a/gpt4all-backend/deps/CMakeLists.txt b/gpt4all-backend/deps/CMakeLists.txt index e74dcbe7..705af2f2 100644 --- a/gpt4all-backend/deps/CMakeLists.txt +++ b/gpt4all-backend/deps/CMakeLists.txt @@ -2,15 +2,4 @@ include(FetchContent) set(BUILD_SHARED_LIBS OFF) -# suppress warnings during boost build -add_compile_definitions($<$:BOOST_ALLOW_DEPRECATED_HEADERS>) - -set(GPT4ALL_BOOST_TAG 1.87.0) -FetchContent_Declare( - boost - URL "https://github.com/boostorg/boost/releases/download/boost-${GPT4ALL_BOOST_TAG}/boost-${GPT4ALL_BOOST_TAG}-cmake.tar.xz" - URL_HASH "SHA256=7da75f171837577a52bbf217e17f8ea576c7c246e4594d617bfde7fafd408be5" -) -FetchContent_MakeAvailable(boost) - add_subdirectory(date) diff --git a/gpt4all-chat/CMakeLists.txt b/gpt4all-chat/CMakeLists.txt index 6783efc0..bf987db8 100644 --- a/gpt4all-chat/CMakeLists.txt +++ b/gpt4all-chat/CMakeLists.txt @@ -240,7 +240,8 @@ qt_add_executable(chat src/jinja_replacements.cpp src/jinja_replacements.h src/json-helpers.cpp src/json-helpers.h src/llm.cpp src/llm.h - src/llmodel_chat.h + src/llmodel_chat.h src/llmodel_chat.cpp + src/llmodel_description.h src/llmodel_description.cpp src/llmodel_ollama.cpp src/llmodel_ollama.h src/llmodel_openai.cpp src/llmodel_openai.h src/llmodel_provider.cpp src/llmodel_provider.h diff --git a/gpt4all-chat/src/chatllm.cpp b/gpt4all-chat/src/chatllm.cpp index 66996a89..b5491727 100644 --- a/gpt4all-chat/src/chatllm.cpp +++ b/gpt4all-chat/src/chatllm.cpp @@ -127,7 +127,7 @@ struct PromptModelWithToolsResult { bool shouldExecuteToolCall; }; static auto promptModelWithTools( - ChatLLMInstance *model, BaseResponseHandler &respHandler, const GenerationParams ¶ms, const QByteArray &prompt, + ChatLLMInstance *model, BaseResponseHandler &respHandler, const GenerationParams *params, const QByteArray &prompt, const QStringList &toolNames ) -> QCoro::Task { @@ -499,8 +499,8 @@ void ChatLLM::modelChangeRequested(const ModelInfo &modelInfo) } } -auto ChatLLM::modelDescription() -> const ModelDescription * -{ return m_llmInstance->description(); } +auto ChatLLM::modelProvider() -> const ModelProvider * +{ return m_llmInstance->description()->provider(); } void ChatLLM::prompt(const QStringList &enabledCollections) { @@ -512,7 +512,7 @@ void ChatLLM::prompt(const QStringList &enabledCollections) } try { - promptInternalChat(enabledCollections, mySettings->modelGenParams(m_modelInfo)); + QCoro::waitFor(promptInternalChat(enabledCollections, mySettings->modelGenParams(m_modelInfo).get())); } catch (const std::exception &e) { // FIXME(jared): this is neither translated nor serialized m_chatModel->setResponseValue(u"Error: %1"_s.arg(QString::fromUtf8(e.what()))); @@ -641,8 +641,8 @@ std::string ChatLLM::applyJinjaTemplate(std::span items) cons Q_UNREACHABLE(); } -auto ChatLLM::promptInternalChat(const QStringList &enabledCollections, const GenerationParams ¶ms, - qsizetype startOffset) -> ChatPromptResult +auto ChatLLM::promptInternalChat(const QStringList &enabledCollections, const GenerationParams *params, + qsizetype startOffset) -> QCoro::Task { Q_ASSERT(isModelLoaded()); Q_ASSERT(m_chatModel); @@ -679,8 +679,8 @@ auto ChatLLM::promptInternalChat(const QStringList &enabledCollections, const Ge auto messageItems = getChat(); messageItems.pop_back(); // exclude new response - auto result = promptInternal(messageItems, params, !databaseResults.isEmpty()); - return { + auto result = co_await promptInternal(messageItems, params, !databaseResults.isEmpty()); + co_return { /*PromptResult*/ { .response = std::move(result.response), .promptTokens = result.promptTokens, @@ -748,7 +748,7 @@ private: }; auto ChatLLM::promptInternal( - const std::variant, std::string_view> &prompt, const GenerationParams ¶ms, + const std::variant, std::string_view> &prompt, const GenerationParams *params, bool usedLocalDocs ) -> QCoro::Task { @@ -967,7 +967,7 @@ void ChatLLM::generateName() // TODO: support interruption via m_stopGenerating promptModelWithTools( m_llmInstance.get(), - respHandler, mySettings->modelGenParams(m_modelInfo), + respHandler, mySettings->modelGenParams(m_modelInfo).get(), applyJinjaTemplate(forkConversation(chatNamePrompt)).c_str(), { ToolCallConstants::ThinkTagName } ); @@ -1043,7 +1043,7 @@ void ChatLLM::generateQuestions(qint64 elapsed) // TODO: support interruption via m_stopGenerating promptModelWithTools( m_llmInstance.get(), - respHandler, mySettings->modelGenParams(m_modelInfo), + respHandler, mySettings->modelGenParams(m_modelInfo).get(), applyJinjaTemplate(forkConversation(suggestedFollowUpPrompt)).c_str(), { ToolCallConstants::ThinkTagName } ); diff --git a/gpt4all-chat/src/chatllm.h b/gpt4all-chat/src/chatllm.h index 5c2491ba..29be034d 100644 --- a/gpt4all-chat/src/chatllm.h +++ b/gpt4all-chat/src/chatllm.h @@ -6,6 +6,8 @@ #include "llmodel_chat.h" #include "modellist.h" +#include // IWYU pragma: keep + #include #include #include @@ -32,6 +34,7 @@ using namespace Qt::Literals::StringLiterals; class ChatLLM; class QDataStream; namespace QCoro { template class Task; } +namespace gpt4all::ui { class ModelProvider; } // NOTE: values serialized to disk, do not change or reuse @@ -210,13 +213,13 @@ protected: QList databaseResults; }; - auto modelDescription() -> const gpt4all::ui::ModelDescription *; + auto modelProvider() -> const gpt4all::ui::ModelProvider *; - auto promptInternalChat(const QStringList &enabledCollections, const gpt4all::ui::GenerationParams ¶ms, - qsizetype startOffset = 0) -> ChatPromptResult; + auto promptInternalChat(const QStringList &enabledCollections, const gpt4all::ui::GenerationParams *params, + qsizetype startOffset = 0) -> QCoro::Task; // passing a string_view directly skips templating and uses the raw string auto promptInternal(const std::variant, std::string_view> &prompt, - const gpt4all::ui::GenerationParams ¶ms, bool usedLocalDocs) -> QCoro::Task; + const gpt4all::ui::GenerationParams *params, bool usedLocalDocs) -> QCoro::Task; private: auto loadNewModel(const ModelInfo &modelInfo, QVariantMap &modelLoadProps) -> QCoro::Task; diff --git a/gpt4all-chat/src/llmodel_chat.cpp b/gpt4all-chat/src/llmodel_chat.cpp new file mode 100644 index 00000000..b21ac3f4 --- /dev/null +++ b/gpt4all-chat/src/llmodel_chat.cpp @@ -0,0 +1,10 @@ +#include "llmodel_chat.h" + + +namespace gpt4all::ui { + + +ChatLLMInstance::~ChatLLMInstance() noexcept = default; + + +} // namespace gpt4all::ui diff --git a/gpt4all-chat/src/llmodel_chat.h b/gpt4all-chat/src/llmodel_chat.h index ca533a31..a44ec3c4 100644 --- a/gpt4all-chat/src/llmodel_chat.h +++ b/gpt4all-chat/src/llmodel_chat.h @@ -22,11 +22,11 @@ struct ChatResponseMetadata { // TODO: implement two of these; one based on Ollama (TBD) and the other based on OpenAI (chatapi.h) class ChatLLMInstance { public: - virtual ~ChatLLMInstance() = 0; + virtual ~ChatLLMInstance() noexcept = 0; virtual auto description() const -> const ModelDescription * = 0; virtual auto preload() -> QCoro::Task = 0; - virtual auto generate(QStringView prompt, const GenerationParams ¶ms, /*out*/ ChatResponseMetadata &metadata) + virtual auto generate(QStringView prompt, const GenerationParams *params, /*out*/ ChatResponseMetadata &metadata) -> QCoro::AsyncGenerator = 0; }; diff --git a/gpt4all-chat/src/llmodel_description.cpp b/gpt4all-chat/src/llmodel_description.cpp index 2dd5804f..670577a5 100644 --- a/gpt4all-chat/src/llmodel_description.cpp +++ b/gpt4all-chat/src/llmodel_description.cpp @@ -7,6 +7,8 @@ namespace gpt4all::ui { +ModelDescription::~ModelDescription() noexcept = default; + auto ModelDescription::newInstance(QNetworkAccessManager *nam) const -> std::unique_ptr { return std::unique_ptr(newInstanceImpl(nam)); } diff --git a/gpt4all-chat/src/llmodel_description.h b/gpt4all-chat/src/llmodel_description.h index ae9b7c14..56004a31 100644 --- a/gpt4all-chat/src/llmodel_description.h +++ b/gpt4all-chat/src/llmodel_description.h @@ -1,5 +1,7 @@ #pragma once +#include "llmodel_provider.h" // IWYU pragma: keep + #include #include @@ -12,7 +14,6 @@ namespace gpt4all::ui { class ChatLLMInstance; -class ModelProvider; // TODO: implement shared_from_this guidance for restricted construction class ModelDescription : public std::enable_shared_from_this { diff --git a/gpt4all-chat/src/llmodel_ollama.cpp b/gpt4all-chat/src/llmodel_ollama.cpp index 4a38d5a8..56bea6aa 100644 --- a/gpt4all-chat/src/llmodel_ollama.cpp +++ b/gpt4all-chat/src/llmodel_ollama.cpp @@ -21,6 +21,8 @@ auto OllamaGenerationParams::toMap() const -> QMap }; } +OllamaProvider::~OllamaProvider() noexcept = default; + auto OllamaProvider::supportedGenerationParams() const -> QSet { using enum GenerationParam; @@ -70,7 +72,7 @@ auto OllamaChatModel::preload() -> QCoro::Task<> co_return; } -auto OllamaChatModel::generate(QStringView prompt, const GenerationParams ¶ms, +auto OllamaChatModel::generate(QStringView prompt, const GenerationParams *params, /*out*/ ChatResponseMetadata &metadata) -> QCoro::AsyncGenerator { diff --git a/gpt4all-chat/src/llmodel_ollama.h b/gpt4all-chat/src/llmodel_ollama.h index 8a95565b..761b4db0 100644 --- a/gpt4all-chat/src/llmodel_ollama.h +++ b/gpt4all-chat/src/llmodel_ollama.h @@ -50,7 +50,7 @@ public: auto makeGenerationParams(const QMap &values) const -> OllamaGenerationParams * override; }; -class OllamaProviderBuiltin : public ModelProviderBuiltin, public OllamaProvider { +class OllamaProviderBuiltin : public OllamaProvider, public ModelProviderBuiltin { Q_GADGET public: @@ -109,7 +109,7 @@ public: auto preload() -> QCoro::Task override; - auto generate(QStringView prompt, const GenerationParams ¶ms, /*out*/ ChatResponseMetadata &metadata) + auto generate(QStringView prompt, const GenerationParams *params, /*out*/ ChatResponseMetadata &metadata) -> QCoro::AsyncGenerator override; private: diff --git a/gpt4all-chat/src/llmodel_openai.cpp b/gpt4all-chat/src/llmodel_openai.cpp index 6be8f9bf..53b51e85 100644 --- a/gpt4all-chat/src/llmodel_openai.cpp +++ b/gpt4all-chat/src/llmodel_openai.cpp @@ -84,6 +84,8 @@ auto OpenaiGenerationParams::toMap() const -> QMap }; } +OpenaiProvider::~OpenaiProvider() noexcept = default; + auto OpenaiProvider::supportedGenerationParams() const -> QSet { using enum GenerationParam; @@ -212,22 +214,22 @@ static auto parsePrompt(QXmlStreamReader &xml) -> std::expected QCoro::Task<> +auto OpenaiChatModel::preload() -> QCoro::Task<> { co_return; /* not supported -> no-op */ } -auto OpenaiChatModel::generate(QStringView prompt, const GenerationParams ¶ms, +auto OpenaiChatModel::generate(QStringView prompt, const GenerationParams *params, /*out*/ ChatResponseMetadata &metadata) -> QCoro::AsyncGenerator { auto *mySettings = MySettings::globalInstance(); - if (params.isNoop()) + if (params->isNoop()) co_return; // nothing requested auto reqBody = makeJsonObject({ { "model"_L1, m_description->modelName() }, { "stream"_L1, true }, }); - extend(reqBody, params.toMap()); + extend(reqBody, params->toMap()); // conversation history { diff --git a/gpt4all-chat/src/llmodel_openai.h b/gpt4all-chat/src/llmodel_openai.h index 50dba4d7..28c2cee9 100644 --- a/gpt4all-chat/src/llmodel_openai.h +++ b/gpt4all-chat/src/llmodel_openai.h @@ -63,7 +63,7 @@ protected: QString m_apiKey; }; -class OpenaiProviderBuiltin : public ModelProviderBuiltin, public OpenaiProvider { +class OpenaiProviderBuiltin : public OpenaiProvider, public ModelProviderBuiltin { Q_GADGET Q_PROPERTY(QString apiKey READ apiKey CONSTANT) @@ -127,7 +127,7 @@ public: auto preload() -> QCoro::Task override; - auto generate(QStringView prompt, const GenerationParams ¶ms, /*out*/ ChatResponseMetadata &metadata) + auto generate(QStringView prompt, const GenerationParams *params, /*out*/ ChatResponseMetadata &metadata) -> QCoro::AsyncGenerator override; private: diff --git a/gpt4all-chat/src/llmodel_provider.cpp b/gpt4all-chat/src/llmodel_provider.cpp index 9ae83410..ae88c3a5 100644 --- a/gpt4all-chat/src/llmodel_provider.cpp +++ b/gpt4all-chat/src/llmodel_provider.cpp @@ -14,6 +14,8 @@ namespace fs = std::filesystem; namespace gpt4all::ui { +GenerationParams::~GenerationParams() noexcept = default; + void GenerationParams::parse(QMap values) { parseInner(values); @@ -38,6 +40,8 @@ QVariant GenerationParams::tryParseValue(QMap &values return value; } +ModelProvider::~ModelProvider() noexcept = default; + ModelProviderCustom::~ModelProviderCustom() noexcept { if (auto res = m_store->release(m_id); !res) diff --git a/gpt4all-chat/src/llmodel_provider.h b/gpt4all-chat/src/llmodel_provider.h index 5b8c3595..a4aea566 100644 --- a/gpt4all-chat/src/llmodel_provider.h +++ b/gpt4all-chat/src/llmodel_provider.h @@ -97,9 +97,6 @@ class ModelProviderBuiltin : public virtual ModelProvider { Q_GADGET Q_PROPERTY(QString name READ name CONSTANT) Q_PROPERTY(QUrl baseUrl READ baseUrl CONSTANT) - -public: - ~ModelProviderBuiltin() noexcept override = 0; }; class ModelProviderCustom : public virtual ModelProvider { diff --git a/gpt4all-chat/src/modellist.cpp b/gpt4all-chat/src/modellist.cpp index 248f2f09..218efdad 100644 --- a/gpt4all-chat/src/modellist.cpp +++ b/gpt4all-chat/src/modellist.cpp @@ -2,6 +2,7 @@ #include "download.h" #include "jinja_replacements.h" +#include "llmodel_description.h" #include "mysettings.h" #include "network.h" @@ -46,6 +47,7 @@ #include using namespace Qt::Literals::StringLiterals; +using namespace gpt4all::ui; //#define USE_LOCAL_MODELSJSON @@ -90,6 +92,12 @@ void ModelInfo::setId(const QString &id) m_id = id; } +void ModelInfo::setModelDesc(std::shared_ptr value) +{ m_modelDesc = std::move(value); } + +void ModelInfo::setModelDescQt(const ModelDescription *value) +{ return setModelDesc(value->shared_from_this()); } + QString ModelInfo::name() const { return MySettings::globalInstance()->modelName(*this); diff --git a/gpt4all-chat/src/modellist.h b/gpt4all-chat/src/modellist.h index 5520b288..36a84871 100644 --- a/gpt4all-chat/src/modellist.h +++ b/gpt4all-chat/src/modellist.h @@ -77,7 +77,7 @@ private: struct ModelInfo { Q_GADGET Q_PROPERTY(QString id READ id WRITE setId) - Q_PROPERTY(const ModelDescription *modelDesc READ modelDescQt WRITE setModelDescQt) + Q_PROPERTY(const gpt4all::ui::ModelDescription *modelDesc READ modelDescQt WRITE setModelDescQt) Q_PROPERTY(QString name READ name WRITE setName) Q_PROPERTY(QString filename READ filename WRITE setFilename) Q_PROPERTY(QString dirpath MEMBER dirpath) @@ -140,12 +140,13 @@ public: QString id() const; void setId(const QString &id); - auto modelDesc() const -> const std::shared_ptr &; + auto modelDesc() const -> const std::shared_ptr & + { return m_modelDesc; } auto modelDescQt() const -> const gpt4all::ui::ModelDescription * { return modelDesc().get(); } void setModelDesc(std::shared_ptr value); - void setModelDescQt(const gpt4all::ui::ModelDescription *); // TODO: implement + void setModelDescQt(const gpt4all::ui::ModelDescription *value); QString name() const; void setName(const QString &name); @@ -257,7 +258,7 @@ private: QVariant getField(QLatin1StringView name) const; QString m_id; - std::shared_ptr m_modelDesc; + std::shared_ptr m_modelDesc; // TODO: set this somewhere QString m_name; QString m_filename; QString m_description; diff --git a/gpt4all-chat/src/mysettings.cpp b/gpt4all-chat/src/mysettings.cpp index dc538d90..a12daaba 100644 --- a/gpt4all-chat/src/mysettings.cpp +++ b/gpt4all-chat/src/mysettings.cpp @@ -2,6 +2,7 @@ #include "chatllm.h" #include "config.h" +#include "llmodel_provider.h" #include "modellist.h" #include @@ -31,6 +32,7 @@ #endif using namespace Qt::Literals::StringLiterals; +using namespace gpt4all::ui; // used only for settings serialization, do not translate @@ -352,6 +354,28 @@ int MySettings::modelRepeatPenaltyTokens (const ModelInfo &info) const QString MySettings::modelChatNamePrompt (const ModelInfo &info) const { return getModelSetting("chatNamePrompt", info).toString(); } QString MySettings::modelSuggestedFollowUpPrompt(const ModelInfo &info) const { return getModelSetting("suggestedFollowUpPrompt", info).toString(); } +auto MySettings::modelGenParams(const ModelInfo &info) -> std::unique_ptr +{ +#if 0 + // this coed is copied from server.cpp. + std::unique_ptr genParams; + { + using enum GenerationParam; + QMap values; + if (auto v = request.max_tokens ) values.insert(NPredict, *v); + if (auto v = request.temperature) values.insert(Temperature, *v); + if (auto v = request.top_p ) values.insert(TopP, *v); + if (auto v = request.min_p ) values.insert(MinP, *v); + try { + genParams.reset(modelProvider()->makeGenerationParams(values)); + } catch (const std::exception &e) { + throw InvalidRequestError(e.what()); + } + } +#endif + return nullptr; // TODO: implement +} + auto MySettings::getUpgradeableModelSetting( const ModelInfo &info, QLatin1StringView legacyKey, QLatin1StringView newKey ) const -> UpgradeableSetting diff --git a/gpt4all-chat/src/mysettings.h b/gpt4all-chat/src/mysettings.h index 29fe1866..251ec5ed 100644 --- a/gpt4all-chat/src/mysettings.h +++ b/gpt4all-chat/src/mysettings.h @@ -156,8 +156,7 @@ public: QString modelSuggestedFollowUpPrompt(const ModelInfo &info) const; Q_INVOKABLE void setModelSuggestedFollowUpPrompt(const ModelInfo &info, const QString &value, bool force = false); - // TODO: implement - auto modelGenParams(const ModelInfo &info) -> gpt4all::ui::GenerationParams; + auto modelGenParams(const ModelInfo &info) -> std::unique_ptr; // Application settings bool systemTray() const; diff --git a/gpt4all-chat/src/server.cpp b/gpt4all-chat/src/server.cpp index 42ca9c80..ac1c8772 100644 --- a/gpt4all-chat/src/server.cpp +++ b/gpt4all-chat/src/server.cpp @@ -3,9 +3,11 @@ #include "chat.h" #include "chatmodel.h" #include "llmodel_description.h" +#include "llmodel_provider.h" #include "modellist.h" #include "mysettings.h" +#include #include #include #include @@ -527,7 +529,7 @@ void Server::start() #endif CompletionRequest req; parseRequest(req, std::move(reqObj)); - auto [resp, respObj] = handleCompletionRequest(req); + auto [resp, respObj] = QCoro::waitFor(handleCompletionRequest(req)); #if defined(DEBUG) if (respObj) qDebug().noquote() << "/v1/completions reply" << QJsonDocument(*respObj).toJson(QJsonDocument::Indented); @@ -551,7 +553,7 @@ void Server::start() #endif ChatRequest req; parseRequest(req, std::move(reqObj)); - auto [resp, respObj] = handleChatRequest(req); + auto [resp, respObj] = QCoro::waitFor(handleChatRequest(req)); (void)respObj; #if defined(DEBUG) if (respObj) @@ -628,7 +630,7 @@ static auto makeError(auto &&...args) -> std::pair std::pair> + -> QCoro::Task>> { Q_ASSERT(m_chatModel); @@ -649,7 +651,7 @@ auto Server::handleCompletionRequest(const CompletionRequest &request) if (modelInfo.filename().isEmpty()) { std::cerr << "ERROR: couldn't load default model " << request.model.toStdString() << std::endl; - return makeError(QHttpServerResponder::StatusCode::InternalServerError); + co_return makeError(QHttpServerResponder::StatusCode::InternalServerError); } emit requestResetResponseState(); // blocks @@ -657,10 +659,9 @@ auto Server::handleCompletionRequest(const CompletionRequest &request) if (prevMsgIndex >= 0) m_chatModel->updateCurrentResponse(prevMsgIndex, false); - // NB: this resets the context, regardless of whether this model is already loaded - if (!loadModel(modelInfo)) { + if (!co_await loadModel(modelInfo)) { std::cerr << "ERROR: couldn't load model " << modelInfo.name().toStdString() << std::endl; - return makeError(QHttpServerResponder::StatusCode::InternalServerError); + co_return makeError(QHttpServerResponder::StatusCode::InternalServerError); } std::unique_ptr genParams; @@ -672,7 +673,7 @@ auto Server::handleCompletionRequest(const CompletionRequest &request) if (auto v = request.top_p ) values.insert(TopP, *v); if (auto v = request.min_p ) values.insert(MinP, *v); try { - genParams.reset(modelDescription()->makeGenerationParams(values)); + genParams.reset(modelProvider()->makeGenerationParams(values)); } catch (const std::exception &e) { throw InvalidRequestError(e.what()); } @@ -689,14 +690,14 @@ auto Server::handleCompletionRequest(const CompletionRequest &request) for (int i = 0; i < request.n; ++i) { PromptResult result; try { - result = promptInternal(std::string_view(promptUtf8.cbegin(), promptUtf8.cend()), - *genParams, - /*usedLocalDocs*/ false); + result = co_await promptInternal(std::string_view(promptUtf8.cbegin(), promptUtf8.cend()), + genParams.get(), + /*usedLocalDocs*/ false); } catch (const std::exception &e) { m_chatModel->setResponseValue(e.what()); m_chatModel->setError(); emit responseStopped(0); - return makeError(QHttpServerResponder::StatusCode::InternalServerError); + co_return makeError(QHttpServerResponder::StatusCode::InternalServerError); } QString resp = QString::fromUtf8(result.response); if (request.echo) @@ -731,11 +732,11 @@ auto Server::handleCompletionRequest(const CompletionRequest &request) { "total_tokens", promptTokens + responseTokens }, }); - return {QHttpServerResponse(responseObject), responseObject}; + co_return { QHttpServerResponse(responseObject), responseObject }; } auto Server::handleChatRequest(const ChatRequest &request) - -> std::pair> + -> QCoro::Task>> { ModelInfo modelInfo = ModelList::globalInstance()->defaultModelInfo(); const QList modelList = ModelList::globalInstance()->selectableModelList(); @@ -754,15 +755,14 @@ auto Server::handleChatRequest(const ChatRequest &request) if (modelInfo.filename().isEmpty()) { std::cerr << "ERROR: couldn't load default model " << request.model.toStdString() << std::endl; - return makeError(QHttpServerResponder::StatusCode::InternalServerError); + co_return makeError(QHttpServerResponder::StatusCode::InternalServerError); } emit requestResetResponseState(); // blocks - // NB: this resets the context, regardless of whether this model is already loaded - if (!loadModel(modelInfo)) { + if (!co_await loadModel(modelInfo)) { std::cerr << "ERROR: couldn't load model " << modelInfo.name().toStdString() << std::endl; - return makeError(QHttpServerResponder::StatusCode::InternalServerError); + co_return makeError(QHttpServerResponder::StatusCode::InternalServerError); } m_chatModel->updateCurrentResponse(m_chatModel->count() - 1, false); @@ -790,7 +790,7 @@ auto Server::handleChatRequest(const ChatRequest &request) if (auto v = request.top_p ) values.insert(TopP, *v); if (auto v = request.min_p ) values.insert(MinP, *v); try { - genParams.reset(modelDescription()->makeGenerationParams(values)); + genParams.reset(modelProvider()->makeGenerationParams(values)); } catch (const std::exception &e) { throw InvalidRequestError(e.what()); } @@ -802,12 +802,12 @@ auto Server::handleChatRequest(const ChatRequest &request) for (int i = 0; i < request.n; ++i) { ChatPromptResult result; try { - result = promptInternalChat(m_collections, *genParams, startOffset); + result = co_await promptInternalChat(m_collections, genParams.get(), startOffset); } catch (const std::exception &e) { m_chatModel->setResponseValue(e.what()); m_chatModel->setError(); emit responseStopped(0); - return makeError(QHttpServerResponder::StatusCode::InternalServerError); + co_return makeError(QHttpServerResponder::StatusCode::InternalServerError); } responses.emplace_back(result.response, result.databaseResults); if (i == 0) @@ -855,5 +855,5 @@ auto Server::handleChatRequest(const ChatRequest &request) { "total_tokens", promptTokens + responseTokens }, }); - return {QHttpServerResponse(responseObject), responseObject}; + co_return {QHttpServerResponse(responseObject), responseObject}; } diff --git a/gpt4all-chat/src/server.h b/gpt4all-chat/src/server.h index 465bf524..3485948d 100644 --- a/gpt4all-chat/src/server.h +++ b/gpt4all-chat/src/server.h @@ -18,6 +18,7 @@ class Chat; class ChatRequest; class CompletionRequest; +namespace QCoro { template class Task; } class Server : public ChatLLM @@ -35,8 +36,8 @@ Q_SIGNALS: void requestResetResponseState(); private: - auto handleCompletionRequest(const CompletionRequest &request) -> std::pair>; - auto handleChatRequest(const ChatRequest &request) -> std::pair>; + auto handleCompletionRequest(const CompletionRequest &request) -> QCoro::Task>>; + auto handleChatRequest(const ChatRequest &request) -> QCoro::Task>>; private Q_SLOTS: void handleDatabaseResultsChanged(const QList &results) { m_databaseResults = results; } diff --git a/gpt4all-chat/src/store_provider.cpp b/gpt4all-chat/src/store_provider.cpp index e7dc1eec..6c42a437 100644 --- a/gpt4all-chat/src/store_provider.cpp +++ b/gpt4all-chat/src/store_provider.cpp @@ -17,7 +17,7 @@ auto ProviderStore::create(QString name, QUrl base_url, QString api_key) auto ProviderStore::create(QString name, QUrl base_url) -> DataStoreResult { - ModelProviderData data { QUuid::createUuid(), ProviderType::ollama, name, std::move(base_url) }; + ModelProviderData data { QUuid::createUuid(), ProviderType::ollama, name, std::move(base_url), {} }; return createImpl(std::move(data), name); } diff --git a/gpt4all-chat/src/utils.inl b/gpt4all-chat/src/utils.inl index 68d4b2da..d93c7298 100644 --- a/gpt4all-chat/src/utils.inl +++ b/gpt4all-chat/src/utils.inl @@ -36,9 +36,7 @@ inline auto toFSPath(const QString &str) -> std::filesystem::path reinterpret_cast(str.cend ()) }; } -FileError::FileError(const QString &str, QFileDevice::FileError code) +inline FileError::FileError(const QString &str, QFileDevice::FileError code) : std::runtime_error(str.toUtf8().constData()) , m_code(code) -{ - Q_ASSERT(code); -} +{ Q_ASSERT(code); }